src/lj_trace.c - luajit-2.0-src

Data types defined

Functions defined

Macros defined

Source code

  1. /*
  2. ** Trace management.
  3. ** Copyright (C) 2005-2015 Mike Pall. See Copyright Notice in luajit.h
  4. */

  5. #define lj_trace_c
  6. #define LUA_CORE

  7. #include "lj_obj.h"

  8. #if LJ_HASJIT

  9. #include "lj_gc.h"
  10. #include "lj_err.h"
  11. #include "lj_debug.h"
  12. #include "lj_str.h"
  13. #include "lj_frame.h"
  14. #include "lj_state.h"
  15. #include "lj_bc.h"
  16. #include "lj_ir.h"
  17. #include "lj_jit.h"
  18. #include "lj_iropt.h"
  19. #include "lj_mcode.h"
  20. #include "lj_trace.h"
  21. #include "lj_snap.h"
  22. #include "lj_gdbjit.h"
  23. #include "lj_record.h"
  24. #include "lj_asm.h"
  25. #include "lj_dispatch.h"
  26. #include "lj_vm.h"
  27. #include "lj_vmevent.h"
  28. #include "lj_target.h"

  29. /* -- Error handling ------------------------------------------------------ */

  30. /* Synchronous abort with error message. */
  31. void lj_trace_err(jit_State *J, TraceError e)
  32. {
  33.   setnilV(&J->errinfo);  /* No error info. */
  34.   setintV(J->L->top++, (int32_t)e);
  35.   lj_err_throw(J->L, LUA_ERRRUN);
  36. }

  37. /* Synchronous abort with error message and error info. */
  38. void lj_trace_err_info(jit_State *J, TraceError e)
  39. {
  40.   setintV(J->L->top++, (int32_t)e);
  41.   lj_err_throw(J->L, LUA_ERRRUN);
  42. }

  43. /* -- Trace management ---------------------------------------------------- */

  44. /* The current trace is first assembled in J->cur. The variable length
  45. ** arrays point to shared, growable buffers (J->irbuf etc.). When trace
  46. ** recording ends successfully, the current trace and its data structures
  47. ** are copied to a new (compact) GCtrace object.
  48. */

  49. /* Find a free trace number. */
  50. static TraceNo trace_findfree(jit_State *J)
  51. {
  52.   MSize osz, lim;
  53.   if (J->freetrace == 0)
  54.     J->freetrace = 1;
  55.   for (; J->freetrace < J->sizetrace; J->freetrace++)
  56.     if (traceref(J, J->freetrace) == NULL)
  57.       return J->freetrace++;
  58.   /* Need to grow trace array. */
  59.   lim = (MSize)J->param[JIT_P_maxtrace] + 1;
  60.   if (lim < 2) lim = 2; else if (lim > 65535) lim = 65535;
  61.   osz = J->sizetrace;
  62.   if (osz >= lim)
  63.     return 0/* Too many traces. */
  64.   lj_mem_growvec(J->L, J->trace, J->sizetrace, lim, GCRef);
  65.   for (; osz < J->sizetrace; osz++)
  66.     setgcrefnull(J->trace[osz]);
  67.   return J->freetrace;
  68. }

  69. #define TRACE_APPENDVEC(field, szfield, tp) \
  70.   T->field = (tp *)p; \
  71.   memcpy(p, J->cur.field, J->cur.szfield*sizeof(tp)); \
  72.   p += J->cur.szfield*sizeof(tp);

  73. #ifdef LUAJIT_USE_PERFTOOLS
  74. /*
  75. ** Create symbol table of JIT-compiled code. For use with Linux perf tools.
  76. ** Example usage:
  77. **   perf record -f -e cycles luajit test.lua
  78. **   perf report -s symbol
  79. **   rm perf.data /tmp/perf-*.map
  80. */
  81. #include <stdio.h>
  82. #include <unistd.h>

  83. static void perftools_addtrace(GCtrace *T)
  84. {
  85.   static FILE *fp;
  86.   GCproto *pt = &gcref(T->startpt)->pt;
  87.   const BCIns *startpc = mref(T->startpc, const BCIns);
  88.   const char *name = proto_chunknamestr(pt);
  89.   BCLine lineno;
  90.   if (name[0] == '@' || name[0] == '=')
  91.     name++;
  92.   else
  93.     name = "(string)";
  94.   lua_assert(startpc >= proto_bc(pt) && startpc < proto_bc(pt) + pt->sizebc);
  95.   lineno = lj_debug_line(pt, proto_bcpos(pt, startpc));
  96.   if (!fp) {
  97.     char fname[40];
  98.     sprintf(fname, "/tmp/perf-%d.map", getpid());
  99.     if (!(fp = fopen(fname, "w"))) return;
  100.     setlinebuf(fp);
  101.   }
  102.   fprintf(fp, "%lx %x TRACE_%d::%s:%u\n",
  103.           (long)T->mcode, T->szmcode, T->traceno, name, lineno);
  104. }
  105. #endif

  106. /* Save current trace by copying and compacting it. */
  107. static void trace_save(jit_State *J)
  108. {
  109.   size_t sztr = ((sizeof(GCtrace)+7)&~7);
  110.   size_t szins = (J->cur.nins-J->cur.nk)*sizeof(IRIns);
  111.   size_t sz = sztr + szins +
  112.               J->cur.nsnap*sizeof(SnapShot) +
  113.               J->cur.nsnapmap*sizeof(SnapEntry);
  114.   GCtrace *T = lj_mem_newt(J->L, (MSize)sz, GCtrace);
  115.   char *p = (char *)T + sztr;
  116.   memcpy(T, &J->cur, sizeof(GCtrace));
  117.   setgcrefr(T->nextgc, J2G(J)->gc.root);
  118.   setgcrefp(J2G(J)->gc.root, T);
  119.   newwhite(J2G(J), T);
  120.   T->gct = ~LJ_TTRACE;
  121.   T->ir = (IRIns *)p - J->cur.nk;
  122.   memcpy(p, J->cur.ir+J->cur.nk, szins);
  123.   p += szins;
  124.   TRACE_APPENDVEC(snap, nsnap, SnapShot)
  125.   TRACE_APPENDVEC(snapmap, nsnapmap, SnapEntry)
  126.   J->cur.traceno = 0;
  127.   setgcrefp(J->trace[T->traceno], T);
  128.   lj_gc_barriertrace(J2G(J), T->traceno);
  129.   lj_gdbjit_addtrace(J, T);
  130. #ifdef LUAJIT_USE_PERFTOOLS
  131.   perftools_addtrace(T);
  132. #endif
  133. }

  134. void LJ_FASTCALL lj_trace_free(global_State *g, GCtrace *T)
  135. {
  136.   jit_State *J = G2J(g);
  137.   if (T->traceno) {
  138.     lj_gdbjit_deltrace(J, T);
  139.     if (T->traceno < J->freetrace)
  140.       J->freetrace = T->traceno;
  141.     setgcrefnull(J->trace[T->traceno]);
  142.   }
  143.   lj_mem_free(g, T,
  144.     ((sizeof(GCtrace)+7)&~7) + (T->nins-T->nk)*sizeof(IRIns) +
  145.     T->nsnap*sizeof(SnapShot) + T->nsnapmap*sizeof(SnapEntry));
  146. }

  147. /* Re-enable compiling a prototype by unpatching any modified bytecode. */
  148. void lj_trace_reenableproto(GCproto *pt)
  149. {
  150.   if ((pt->flags & PROTO_ILOOP)) {
  151.     BCIns *bc = proto_bc(pt);
  152.     BCPos i, sizebc = pt->sizebc;;
  153.     pt->flags &= ~PROTO_ILOOP;
  154.     if (bc_op(bc[0]) == BC_IFUNCF)
  155.       setbc_op(&bc[0], BC_FUNCF);
  156.     for (i = 1; i < sizebc; i++) {
  157.       BCOp op = bc_op(bc[i]);
  158.       if (op == BC_IFORL || op == BC_IITERL || op == BC_ILOOP)
  159.         setbc_op(&bc[i], (int)op+(int)BC_LOOP-(int)BC_ILOOP);
  160.     }
  161.   }
  162. }

  163. /* Unpatch the bytecode modified by a root trace. */
  164. static void trace_unpatch(jit_State *J, GCtrace *T)
  165. {
  166.   BCOp op = bc_op(T->startins);
  167.   BCIns *pc = mref(T->startpc, BCIns);
  168.   UNUSED(J);
  169.   if (op == BC_JMP)
  170.     return/* No need to unpatch branches in parent traces (yet). */
  171.   switch (bc_op(*pc)) {
  172.   case BC_JFORL:
  173.     lua_assert(traceref(J, bc_d(*pc)) == T);
  174.     *pc = T->startins;
  175.     pc += bc_j(T->startins);
  176.     lua_assert(bc_op(*pc) == BC_JFORI);
  177.     setbc_op(pc, BC_FORI);
  178.     break;
  179.   case BC_JITERL:
  180.   case BC_JLOOP:
  181.     lua_assert(op == BC_ITERL || op == BC_LOOP || bc_isret(op));
  182.     *pc = T->startins;
  183.     break;
  184.   case BC_JMP:
  185.     lua_assert(op == BC_ITERL);
  186.     pc += bc_j(*pc)+2;
  187.     if (bc_op(*pc) == BC_JITERL) {
  188.       lua_assert(traceref(J, bc_d(*pc)) == T);
  189.       *pc = T->startins;
  190.     }
  191.     break;
  192.   case BC_JFUNCF:
  193.     lua_assert(op == BC_FUNCF);
  194.     *pc = T->startins;
  195.     break;
  196.   default:  /* Already unpatched. */
  197.     break;
  198.   }
  199. }

  200. /* Flush a root trace. */
  201. static void trace_flushroot(jit_State *J, GCtrace *T)
  202. {
  203.   GCproto *pt = &gcref(T->startpt)->pt;
  204.   lua_assert(T->root == 0 && pt != NULL);
  205.   /* First unpatch any modified bytecode. */
  206.   trace_unpatch(J, T);
  207.   /* Unlink root trace from chain anchored in prototype. */
  208.   if (pt->trace == T->traceno) {  /* Trace is first in chain. Easy. */
  209.     pt->trace = T->nextroot;
  210.   } else if (pt->trace) {  /* Otherwise search in chain of root traces. */
  211.     GCtrace *T2 = traceref(J, pt->trace);
  212.     if (T2) {
  213.       for (; T2->nextroot; T2 = traceref(J, T2->nextroot))
  214.         if (T2->nextroot == T->traceno) {
  215.           T2->nextroot = T->nextroot;  /* Unlink from chain. */
  216.           break;
  217.         }
  218.     }
  219.   }
  220. }

  221. /* Flush a trace. Only root traces are considered. */
  222. void lj_trace_flush(jit_State *J, TraceNo traceno)
  223. {
  224.   if (traceno > 0 && traceno < J->sizetrace) {
  225.     GCtrace *T = traceref(J, traceno);
  226.     if (T && T->root == 0)
  227.       trace_flushroot(J, T);
  228.   }
  229. }

  230. /* Flush all traces associated with a prototype. */
  231. void lj_trace_flushproto(global_State *g, GCproto *pt)
  232. {
  233.   while (pt->trace != 0)
  234.     trace_flushroot(G2J(g), traceref(G2J(g), pt->trace));
  235. }

  236. /* Flush all traces. */
  237. int lj_trace_flushall(lua_State *L)
  238. {
  239.   jit_State *J = L2J(L);
  240.   ptrdiff_t i;
  241.   if ((J2G(J)->hookmask & HOOK_GC))
  242.     return 1;
  243.   for (i = (ptrdiff_t)J->sizetrace-1; i > 0; i--) {
  244.     GCtrace *T = traceref(J, i);
  245.     if (T) {
  246.       if (T->root == 0)
  247.         trace_flushroot(J, T);
  248.       lj_gdbjit_deltrace(J, T);
  249.       T->traceno = 0;
  250.       setgcrefnull(J->trace[i]);
  251.     }
  252.   }
  253.   J->cur.traceno = 0;
  254.   J->freetrace = 0;
  255.   /* Clear penalty cache. */
  256.   memset(J->penalty, 0, sizeof(J->penalty));
  257.   /* Free the whole machine code and invalidate all exit stub groups. */
  258.   lj_mcode_free(J);
  259.   memset(J->exitstubgroup, 0, sizeof(J->exitstubgroup));
  260.   lj_vmevent_send(L, TRACE,
  261.     setstrV(L, L->top++, lj_str_newlit(L, "flush"));
  262.   );
  263.   return 0;
  264. }

  265. /* Initialize JIT compiler state. */
  266. void lj_trace_initstate(global_State *g)
  267. {
  268.   jit_State *J = G2J(g);
  269.   TValue *tv;
  270.   /* Initialize SIMD constants. */
  271.   tv = LJ_KSIMD(J, LJ_KSIMD_ABS);
  272.   tv[0].u64 = U64x(7fffffff,ffffffff);
  273.   tv[1].u64 = U64x(7fffffff,ffffffff);
  274.   tv = LJ_KSIMD(J, LJ_KSIMD_NEG);
  275.   tv[0].u64 = U64x(80000000,00000000);
  276.   tv[1].u64 = U64x(80000000,00000000);
  277. }

  278. /* Free everything associated with the JIT compiler state. */
  279. void lj_trace_freestate(global_State *g)
  280. {
  281.   jit_State *J = G2J(g);
  282. #ifdef LUA_USE_ASSERT
  283.   {  /* This assumes all traces have already been freed. */
  284.     ptrdiff_t i;
  285.     for (i = 1; i < (ptrdiff_t)J->sizetrace; i++)
  286.       lua_assert(i == (ptrdiff_t)J->cur.traceno || traceref(J, i) == NULL);
  287.   }
  288. #endif
  289.   lj_mcode_free(J);
  290.   lj_ir_k64_freeall(J);
  291.   lj_mem_freevec(g, J->snapmapbuf, J->sizesnapmap, SnapEntry);
  292.   lj_mem_freevec(g, J->snapbuf, J->sizesnap, SnapShot);
  293.   lj_mem_freevec(g, J->irbuf + J->irbotlim, J->irtoplim - J->irbotlim, IRIns);
  294.   lj_mem_freevec(g, J->trace, J->sizetrace, GCRef);
  295. }

  296. /* -- Penalties and blacklisting ------------------------------------------ */

  297. /* Blacklist a bytecode instruction. */
  298. static void blacklist_pc(GCproto *pt, BCIns *pc)
  299. {
  300.   setbc_op(pc, (int)bc_op(*pc)+(int)BC_ILOOP-(int)BC_LOOP);
  301.   pt->flags |= PROTO_ILOOP;
  302. }

  303. /* Penalize a bytecode instruction. */
  304. static void penalty_pc(jit_State *J, GCproto *pt, BCIns *pc, TraceError e)
  305. {
  306.   uint32_t i, val = PENALTY_MIN;
  307.   for (i = 0; i < PENALTY_SLOTS; i++)
  308.     if (mref(J->penalty[i].pc, const BCIns) == pc) {  /* Cache slot found? */
  309.       /* First try to bump its hotcount several times. */
  310.       val = ((uint32_t)J->penalty[i].val << 1) +
  311.             LJ_PRNG_BITS(J, PENALTY_RNDBITS);
  312.       if (val > PENALTY_MAX) {
  313.         blacklist_pc(pt, pc);  /* Blacklist it, if that didn't help. */
  314.         return;
  315.       }
  316.       goto setpenalty;
  317.     }
  318.   /* Assign a new penalty cache slot. */
  319.   i = J->penaltyslot;
  320.   J->penaltyslot = (J->penaltyslot + 1) & (PENALTY_SLOTS-1);
  321.   setmref(J->penalty[i].pc, pc);
  322. setpenalty:
  323.   J->penalty[i].val = (uint16_t)val;
  324.   J->penalty[i].reason = e;
  325.   hotcount_set(J2GG(J), pc+1, val);
  326. }

  327. /* -- Trace compiler state machine ---------------------------------------- */

  328. /* Start tracing. */
  329. static void trace_start(jit_State *J)
  330. {
  331.   lua_State *L;
  332.   TraceNo traceno;

  333.   if ((J->pt->flags & PROTO_NOJIT)) {  /* JIT disabled for this proto? */
  334.     if (J->parent == 0 && J->exitno == 0) {
  335.       /* Lazy bytecode patching to disable hotcount events. */
  336.       lua_assert(bc_op(*J->pc) == BC_FORL || bc_op(*J->pc) == BC_ITERL ||
  337.                  bc_op(*J->pc) == BC_LOOP || bc_op(*J->pc) == BC_FUNCF);
  338.       setbc_op(J->pc, (int)bc_op(*J->pc)+(int)BC_ILOOP-(int)BC_LOOP);
  339.       J->pt->flags |= PROTO_ILOOP;
  340.     }
  341.     J->state = LJ_TRACE_IDLE;  /* Silently ignored. */
  342.     return;
  343.   }

  344.   /* Get a new trace number. */
  345.   traceno = trace_findfree(J);
  346.   if (LJ_UNLIKELY(traceno == 0)) {  /* No free trace? */
  347.     lua_assert((J2G(J)->hookmask & HOOK_GC) == 0);
  348.     lj_trace_flushall(J->L);
  349.     J->state = LJ_TRACE_IDLE;  /* Silently ignored. */
  350.     return;
  351.   }
  352.   setgcrefp(J->trace[traceno], &J->cur);

  353.   /* Setup enough of the current trace to be able to send the vmevent. */
  354.   memset(&J->cur, 0, sizeof(GCtrace));
  355.   J->cur.traceno = traceno;
  356.   J->cur.nins = J->cur.nk = REF_BASE;
  357.   J->cur.ir = J->irbuf;
  358.   J->cur.snap = J->snapbuf;
  359.   J->cur.snapmap = J->snapmapbuf;
  360.   J->mergesnap = 0;
  361.   J->needsnap = 0;
  362.   J->bcskip = 0;
  363.   J->guardemit.irt = 0;
  364.   J->postproc = LJ_POST_NONE;
  365.   lj_resetsplit(J);
  366.   setgcref(J->cur.startpt, obj2gco(J->pt));

  367.   L = J->L;
  368.   lj_vmevent_send(L, TRACE,
  369.     setstrV(L, L->top++, lj_str_newlit(L, "start"));
  370.     setintV(L->top++, traceno);
  371.     setfuncV(L, L->top++, J->fn);
  372.     setintV(L->top++, proto_bcpos(J->pt, J->pc));
  373.     if (J->parent) {
  374.       setintV(L->top++, J->parent);
  375.       setintV(L->top++, J->exitno);
  376.     }
  377.   );
  378.   lj_record_setup(J);
  379. }

  380. /* Stop tracing. */
  381. static void trace_stop(jit_State *J)
  382. {
  383.   BCIns *pc = mref(J->cur.startpc, BCIns);
  384.   BCOp op = bc_op(J->cur.startins);
  385.   GCproto *pt = &gcref(J->cur.startpt)->pt;
  386.   TraceNo traceno = J->cur.traceno;
  387.   lua_State *L;

  388.   switch (op) {
  389.   case BC_FORL:
  390.     setbc_op(pc+bc_j(J->cur.startins), BC_JFORI);  /* Patch FORI, too. */
  391.     /* fallthrough */
  392.   case BC_LOOP:
  393.   case BC_ITERL:
  394.   case BC_FUNCF:
  395.     /* Patch bytecode of starting instruction in root trace. */
  396.     setbc_op(pc, (int)op+(int)BC_JLOOP-(int)BC_LOOP);
  397.     setbc_d(pc, traceno);
  398.   addroot:
  399.     /* Add to root trace chain in prototype. */
  400.     J->cur.nextroot = pt->trace;
  401.     pt->trace = (TraceNo1)traceno;
  402.     break;
  403.   case BC_RET:
  404.   case BC_RET0:
  405.   case BC_RET1:
  406.     *pc = BCINS_AD(BC_JLOOP, J->cur.snap[0].nslots, traceno);
  407.     goto addroot;
  408.   case BC_JMP:
  409.     /* Patch exit branch in parent to side trace entry. */
  410.     lua_assert(J->parent != 0 && J->cur.root != 0);
  411.     lj_asm_patchexit(J, traceref(J, J->parent), J->exitno, J->cur.mcode);
  412.     /* Avoid compiling a side trace twice (stack resizing uses parent exit). */
  413.     traceref(J, J->parent)->snap[J->exitno].count = SNAPCOUNT_DONE;
  414.     /* Add to side trace chain in root trace. */
  415.     {
  416.       GCtrace *root = traceref(J, J->cur.root);
  417.       root->nchild++;
  418.       J->cur.nextside = root->nextside;
  419.       root->nextside = (TraceNo1)traceno;
  420.     }
  421.     break;
  422.   case BC_CALLM:
  423.   case BC_CALL:
  424.   case BC_ITERC:
  425.     /* Trace stitching: patch link of previous trace. */
  426.     traceref(J, J->exitno)->link = traceno;
  427.     break;
  428.   default:
  429.     lua_assert(0);
  430.     break;
  431.   }

  432.   /* Commit new mcode only after all patching is done. */
  433.   lj_mcode_commit(J, J->cur.mcode);
  434.   J->postproc = LJ_POST_NONE;
  435.   trace_save(J);

  436.   L = J->L;
  437.   lj_vmevent_send(L, TRACE,
  438.     setstrV(L, L->top++, lj_str_newlit(L, "stop"));
  439.     setintV(L->top++, traceno);
  440.     setfuncV(L, L->top++, J->fn);
  441.   );
  442. }

  443. /* Start a new root trace for down-recursion. */
  444. static int trace_downrec(jit_State *J)
  445. {
  446.   /* Restart recording at the return instruction. */
  447.   lua_assert(J->pt != NULL);
  448.   lua_assert(bc_isret(bc_op(*J->pc)));
  449.   if (bc_op(*J->pc) == BC_RETM)
  450.     return 0/* NYI: down-recursion with RETM. */
  451.   J->parent = 0;
  452.   J->exitno = 0;
  453.   J->state = LJ_TRACE_RECORD;
  454.   trace_start(J);
  455.   return 1;
  456. }

  457. /* Abort tracing. */
  458. static int trace_abort(jit_State *J)
  459. {
  460.   lua_State *L = J->L;
  461.   TraceError e = LJ_TRERR_RECERR;
  462.   TraceNo traceno;

  463.   J->postproc = LJ_POST_NONE;
  464.   lj_mcode_abort(J);
  465.   if (tvisnumber(L->top-1))
  466.     e = (TraceError)numberVint(L->top-1);
  467.   if (e == LJ_TRERR_MCODELM) {
  468.     L->top--;  /* Remove error object */
  469.     J->state = LJ_TRACE_ASM;
  470.     return 1/* Retry ASM with new MCode area. */
  471.   }
  472.   /* Penalize or blacklist starting bytecode instruction. */
  473.   if (J->parent == 0 && !bc_isret(bc_op(J->cur.startins))) {
  474.     if (J->exitno == 0)
  475.       penalty_pc(J, &gcref(J->cur.startpt)->pt, mref(J->cur.startpc, BCIns), e);
  476.     else
  477.       traceref(J, J->exitno)->link = J->exitno;  /* Self-link is blacklisted. */
  478.   }

  479.   /* Is there anything to abort? */
  480.   traceno = J->cur.traceno;
  481.   if (traceno) {
  482.     ptrdiff_t errobj = savestack(L, L->top-1);  /* Stack may be resized. */
  483.     J->cur.link = 0;
  484.     J->cur.linktype = LJ_TRLINK_NONE;
  485.     lj_vmevent_send(L, TRACE,
  486.       TValue *frame;
  487.       const BCIns *pc;
  488.       GCfunc *fn;
  489.       setstrV(L, L->top++, lj_str_newlit(L, "abort"));
  490.       setintV(L->top++, traceno);
  491.       /* Find original Lua function call to generate a better error message. */
  492.       frame = J->L->base-1;
  493.       pc = J->pc;
  494.       while (!isluafunc(frame_func(frame))) {
  495.         pc = (frame_iscont(frame) ? frame_contpc(frame) : frame_pc(frame)) - 1;
  496.         frame = frame_prev(frame);
  497.       }
  498.       fn = frame_func(frame);
  499.       setfuncV(L, L->top++, fn);
  500.       setintV(L->top++, proto_bcpos(funcproto(fn), pc));
  501.       copyTV(L, L->top++, restorestack(L, errobj));
  502.       copyTV(L, L->top++, &J->errinfo);
  503.     );
  504.     /* Drop aborted trace after the vmevent (which may still access it). */
  505.     setgcrefnull(J->trace[traceno]);
  506.     if (traceno < J->freetrace)
  507.       J->freetrace = traceno;
  508.     J->cur.traceno = 0;
  509.   }
  510.   L->top--;  /* Remove error object */
  511.   if (e == LJ_TRERR_DOWNREC)
  512.     return trace_downrec(J);
  513.   else if (e == LJ_TRERR_MCODEAL)
  514.     lj_trace_flushall(L);
  515.   return 0;
  516. }

  517. /* Perform pending re-patch of a bytecode instruction. */
  518. static LJ_AINLINE void trace_pendpatch(jit_State *J, int force)
  519. {
  520.   if (LJ_UNLIKELY(J->patchpc)) {
  521.     if (force || J->bcskip == 0) {
  522.       *J->patchpc = J->patchins;
  523.       J->patchpc = NULL;
  524.     } else {
  525.       J->bcskip = 0;
  526.     }
  527.   }
  528. }

  529. /* State machine for the trace compiler. Protected callback. */
  530. static TValue *trace_state(lua_State *L, lua_CFunction dummy, void *ud)
  531. {
  532.   jit_State *J = (jit_State *)ud;
  533.   UNUSED(dummy);
  534.   do {
  535.   retry:
  536.     switch (J->state) {
  537.     case LJ_TRACE_START:
  538.       J->state = LJ_TRACE_RECORD;  /* trace_start() may change state. */
  539.       trace_start(J);
  540.       lj_dispatch_update(J2G(J));
  541.       break;

  542.     case LJ_TRACE_RECORD:
  543.       trace_pendpatch(J, 0);
  544.       setvmstate(J2G(J), RECORD);
  545.       lj_vmevent_send_(L, RECORD,
  546.         /* Save/restore tmptv state for trace recorder. */
  547.         TValue savetv = J2G(J)->tmptv;
  548.         TValue savetv2 = J2G(J)->tmptv2;
  549.         setintV(L->top++, J->cur.traceno);
  550.         setfuncV(L, L->top++, J->fn);
  551.         setintV(L->top++, J->pt ? (int32_t)proto_bcpos(J->pt, J->pc) : -1);
  552.         setintV(L->top++, J->framedepth);
  553.       ,
  554.         J2G(J)->tmptv = savetv;
  555.         J2G(J)->tmptv2 = savetv2;
  556.       );
  557.       lj_record_ins(J);
  558.       break;

  559.     case LJ_TRACE_END:
  560.       trace_pendpatch(J, 1);
  561.       J->loopref = 0;
  562.       if ((J->flags & JIT_F_OPT_LOOP) &&
  563.           J->cur.link == J->cur.traceno && J->framedepth + J->retdepth == 0) {
  564.         setvmstate(J2G(J), OPT);
  565.         lj_opt_dce(J);
  566.         if (lj_opt_loop(J)) {  /* Loop optimization failed? */
  567.           J->cur.link = 0;
  568.           J->cur.linktype = LJ_TRLINK_NONE;
  569.           J->loopref = J->cur.nins;
  570.           J->state = LJ_TRACE_RECORD;  /* Try to continue recording. */
  571.           break;
  572.         }
  573.         J->loopref = J->chain[IR_LOOP];  /* Needed by assembler. */
  574.       }
  575.       lj_opt_split(J);
  576.       lj_opt_sink(J);
  577.       if (!J->loopref) J->cur.snap[J->cur.nsnap-1].count = SNAPCOUNT_DONE;
  578.       J->state = LJ_TRACE_ASM;
  579.       break;

  580.     case LJ_TRACE_ASM:
  581.       setvmstate(J2G(J), ASM);
  582.       lj_asm_trace(J, &J->cur);
  583.       trace_stop(J);
  584.       setvmstate(J2G(J), INTERP);
  585.       J->state = LJ_TRACE_IDLE;
  586.       lj_dispatch_update(J2G(J));
  587.       return NULL;

  588.     default:  /* Trace aborted asynchronously. */
  589.       setintV(L->top++, (int32_t)LJ_TRERR_RECERR);
  590.       /* fallthrough */
  591.     case LJ_TRACE_ERR:
  592.       trace_pendpatch(J, 1);
  593.       if (trace_abort(J))
  594.         goto retry;
  595.       setvmstate(J2G(J), INTERP);
  596.       J->state = LJ_TRACE_IDLE;
  597.       lj_dispatch_update(J2G(J));
  598.       return NULL;
  599.     }
  600.   } while (J->state > LJ_TRACE_RECORD);
  601.   return NULL;
  602. }

  603. /* -- Event handling ------------------------------------------------------ */

  604. /* A bytecode instruction is about to be executed. Record it. */
  605. void lj_trace_ins(jit_State *J, const BCIns *pc)
  606. {
  607.   /* Note: J->L must already be set. pc is the true bytecode PC here. */
  608.   J->pc = pc;
  609.   J->fn = curr_func(J->L);
  610.   J->pt = isluafunc(J->fn) ? funcproto(J->fn) : NULL;
  611.   while (lj_vm_cpcall(J->L, NULL, (void *)J, trace_state) != 0)
  612.     J->state = LJ_TRACE_ERR;
  613. }

  614. /* A hotcount triggered. Start recording a root trace. */
  615. void LJ_FASTCALL lj_trace_hot(jit_State *J, const BCIns *pc)
  616. {
  617.   /* Note: pc is the interpreter bytecode PC here. It's offset by 1. */
  618.   ERRNO_SAVE
  619.   /* Reset hotcount. */
  620.   hotcount_set(J2GG(J), pc, J->param[JIT_P_hotloop]*HOTCOUNT_LOOP);
  621.   /* Only start a new trace if not recording or inside __gc call or vmevent. */
  622.   if (J->state == LJ_TRACE_IDLE &&
  623.       !(J2G(J)->hookmask & (HOOK_GC|HOOK_VMEVENT))) {
  624.     J->parent = 0/* Root trace. */
  625.     J->exitno = 0;
  626.     J->state = LJ_TRACE_START;
  627.     lj_trace_ins(J, pc-1);
  628.   }
  629.   ERRNO_RESTORE
  630. }

  631. /* Check for a hot side exit. If yes, start recording a side trace. */
  632. static void trace_hotside(jit_State *J, const BCIns *pc)
  633. {
  634.   SnapShot *snap = &traceref(J, J->parent)->snap[J->exitno];
  635.   if (!(J2G(J)->hookmask & (HOOK_GC|HOOK_VMEVENT)) &&
  636.       isluafunc(curr_func(J->L)) &&
  637.       snap->count != SNAPCOUNT_DONE &&
  638.       ++snap->count >= J->param[JIT_P_hotexit]) {
  639.     lua_assert(J->state == LJ_TRACE_IDLE);
  640.     /* J->parent is non-zero for a side trace. */
  641.     J->state = LJ_TRACE_START;
  642.     lj_trace_ins(J, pc);
  643.   }
  644. }

  645. /* Stitch a new trace to the previous trace. */
  646. void LJ_FASTCALL lj_trace_stitch(jit_State *J, const BCIns *pc)
  647. {
  648.   /* Only start a new trace if not recording or inside __gc call or vmevent. */
  649.   if (J->state == LJ_TRACE_IDLE &&
  650.       !(J2G(J)->hookmask & (HOOK_GC|HOOK_VMEVENT))) {
  651.     J->parent = 0/* Have to treat it like a root trace. */
  652.     /* J->exitno is set to the invoking trace. */
  653.     J->state = LJ_TRACE_START;
  654.     lj_trace_ins(J, pc);
  655.   }
  656. }


  657. /* Tiny struct to pass data to protected call. */
  658. typedef struct ExitDataCP {
  659.   jit_State *J;
  660.   void *exptr;                /* Pointer to exit state. */
  661.   const BCIns *pc;        /* Restart interpreter at this PC. */
  662. } ExitDataCP;

  663. /* Need to protect lj_snap_restore because it may throw. */
  664. static TValue *trace_exit_cp(lua_State *L, lua_CFunction dummy, void *ud)
  665. {
  666.   ExitDataCP *exd = (ExitDataCP *)ud;
  667.   cframe_errfunc(L->cframe) = -1/* Inherit error function. */
  668.   exd->pc = lj_snap_restore(exd->J, exd->exptr);
  669.   UNUSED(dummy);
  670.   return NULL;
  671. }

  672. #ifndef LUAJIT_DISABLE_VMEVENT
  673. /* Push all registers from exit state. */
  674. static void trace_exit_regs(lua_State *L, ExitState *ex)
  675. {
  676.   int32_t i;
  677.   setintV(L->top++, RID_NUM_GPR);
  678.   setintV(L->top++, RID_NUM_FPR);
  679.   for (i = 0; i < RID_NUM_GPR; i++) {
  680.     if (sizeof(ex->gpr[i]) == sizeof(int32_t))
  681.       setintV(L->top++, (int32_t)ex->gpr[i]);
  682.     else
  683.       setnumV(L->top++, (lua_Number)ex->gpr[i]);
  684.   }
  685. #if !LJ_SOFTFP
  686.   for (i = 0; i < RID_NUM_FPR; i++) {
  687.     setnumV(L->top, ex->fpr[i]);
  688.     if (LJ_UNLIKELY(tvisnan(L->top)))
  689.       setnanV(L->top);
  690.     L->top++;
  691.   }
  692. #endif
  693. }
  694. #endif

  695. #ifdef EXITSTATE_PCREG
  696. /* Determine trace number from pc of exit instruction. */
  697. static TraceNo trace_exit_find(jit_State *J, MCode *pc)
  698. {
  699.   TraceNo traceno;
  700.   for (traceno = 1; traceno < J->sizetrace; traceno++) {
  701.     GCtrace *T = traceref(J, traceno);
  702.     if (T && pc >= T->mcode && pc < (MCode *)((char *)T->mcode + T->szmcode))
  703.       return traceno;
  704.   }
  705.   lua_assert(0);
  706.   return 0;
  707. }
  708. #endif

  709. /* A trace exited. Restore interpreter state. */
  710. int LJ_FASTCALL lj_trace_exit(jit_State *J, void *exptr)
  711. {
  712.   ERRNO_SAVE
  713.   lua_State *L = J->L;
  714.   ExitState *ex = (ExitState *)exptr;
  715.   ExitDataCP exd;
  716.   int errcode;
  717.   const BCIns *pc;
  718.   void *cf;
  719.   GCtrace *T;
  720. #ifdef EXITSTATE_PCREG
  721.   J->parent = trace_exit_find(J, (MCode *)(intptr_t)ex->gpr[EXITSTATE_PCREG]);
  722. #endif
  723.   T = traceref(J, J->parent); UNUSED(T);
  724. #ifdef EXITSTATE_CHECKEXIT
  725.   if (J->exitno == T->nsnap) {  /* Treat stack check like a parent exit. */
  726.     lua_assert(T->root != 0);
  727.     J->exitno = T->ir[REF_BASE].op2;
  728.     J->parent = T->ir[REF_BASE].op1;
  729.     T = traceref(J, J->parent);
  730.   }
  731. #endif
  732.   lua_assert(T != NULL && J->exitno < T->nsnap);
  733.   exd.J = J;
  734.   exd.exptr = exptr;
  735.   errcode = lj_vm_cpcall(L, NULL, &exd, trace_exit_cp);
  736.   if (errcode)
  737.     return -errcode;  /* Return negated error code. */

  738.   if (!(LJ_HASPROFILE && (G(L)->hookmask & HOOK_PROFILE)))
  739.     lj_vmevent_send(L, TEXIT,
  740.       lj_state_checkstack(L, 4+RID_NUM_GPR+RID_NUM_FPR+LUA_MINSTACK);
  741.       setintV(L->top++, J->parent);
  742.       setintV(L->top++, J->exitno);
  743.       trace_exit_regs(L, ex);
  744.     );

  745.   pc = exd.pc;
  746.   cf = cframe_raw(L->cframe);
  747.   setcframe_pc(cf, pc);
  748.   if (LJ_HASPROFILE && (G(L)->hookmask & HOOK_PROFILE)) {
  749.     /* Just exit to interpreter. */
  750.   } else if (G(L)->gc.state == GCSatomic || G(L)->gc.state == GCSfinalize) {
  751.     if (!(G(L)->hookmask & HOOK_GC))
  752.       lj_gc_step(L);  /* Exited because of GC: drive GC forward. */
  753.   } else {
  754.     trace_hotside(J, pc);
  755.   }
  756.   if (bc_op(*pc) == BC_JLOOP) {
  757.     BCIns *retpc = &traceref(J, bc_d(*pc))->startins;
  758.     if (bc_isret(bc_op(*retpc))) {
  759.       if (J->state == LJ_TRACE_RECORD) {
  760.         J->patchins = *pc;
  761.         J->patchpc = (BCIns *)pc;
  762.         *J->patchpc = *retpc;
  763.         J->bcskip = 1;
  764.       } else {
  765.         pc = retpc;
  766.         setcframe_pc(cf, pc);
  767.       }
  768.     }
  769.   }
  770.   /* Return MULTRES or 0. */
  771.   ERRNO_RESTORE
  772.   switch (bc_op(*pc)) {
  773.   case BC_CALLM: case BC_CALLMT:
  774.     return (int)((BCReg)(L->top - L->base) - bc_a(*pc) - bc_c(*pc) + LJ_FR2);
  775.   case BC_RETM:
  776.     return (int)((BCReg)(L->top - L->base) + 1 - bc_a(*pc) - bc_d(*pc));
  777.   case BC_TSETM:
  778.     return (int)((BCReg)(L->top - L->base) + 1 - bc_a(*pc));
  779.   default:
  780.     if (bc_op(*pc) >= BC_FUNCF)
  781.       return (int)((BCReg)(L->top - L->base) + 1);
  782.     return 0;
  783.   }
  784. }

  785. #endif