src/lj_snap.c - luajit-2.0-src

Functions defined

Macros defined

Source code

  1. /*
  2. ** Snapshot handling.
  3. ** Copyright (C) 2005-2015 Mike Pall. See Copyright Notice in luajit.h
  4. */

  5. #define lj_snap_c
  6. #define LUA_CORE

  7. #include "lj_obj.h"

  8. #if LJ_HASJIT

  9. #include "lj_gc.h"
  10. #include "lj_tab.h"
  11. #include "lj_state.h"
  12. #include "lj_frame.h"
  13. #include "lj_bc.h"
  14. #include "lj_ir.h"
  15. #include "lj_jit.h"
  16. #include "lj_iropt.h"
  17. #include "lj_trace.h"
  18. #include "lj_snap.h"
  19. #include "lj_target.h"
  20. #if LJ_HASFFI
  21. #include "lj_ctype.h"
  22. #include "lj_cdata.h"
  23. #endif

  24. /* Some local macros to save typing. Undef'd at the end. */
  25. #define IR(ref)                (&J->cur.ir[(ref)])

  26. /* Pass IR on to next optimization in chain (FOLD). */
  27. #define emitir(ot, a, b)        (lj_ir_set(J, (ot), (a), (b)), lj_opt_fold(J))

  28. /* Emit raw IR without passing through optimizations. */
  29. #define emitir_raw(ot, a, b)        (lj_ir_set(J, (ot), (a), (b)), lj_ir_emit(J))

  30. /* -- Snapshot buffer allocation ------------------------------------------ */

  31. /* Grow snapshot buffer. */
  32. void lj_snap_grow_buf_(jit_State *J, MSize need)
  33. {
  34.   MSize maxsnap = (MSize)J->param[JIT_P_maxsnap];
  35.   if (need > maxsnap)
  36.     lj_trace_err(J, LJ_TRERR_SNAPOV);
  37.   lj_mem_growvec(J->L, J->snapbuf, J->sizesnap, maxsnap, SnapShot);
  38.   J->cur.snap = J->snapbuf;
  39. }

  40. /* Grow snapshot map buffer. */
  41. void lj_snap_grow_map_(jit_State *J, MSize need)
  42. {
  43.   if (need < 2*J->sizesnapmap)
  44.     need = 2*J->sizesnapmap;
  45.   else if (need < 64)
  46.     need = 64;
  47.   J->snapmapbuf = (SnapEntry *)lj_mem_realloc(J->L, J->snapmapbuf,
  48.                     J->sizesnapmap*sizeof(SnapEntry), need*sizeof(SnapEntry));
  49.   J->cur.snapmap = J->snapmapbuf;
  50.   J->sizesnapmap = need;
  51. }

  52. /* -- Snapshot generation ------------------------------------------------- */

  53. /* Add all modified slots to the snapshot. */
  54. static MSize snapshot_slots(jit_State *J, SnapEntry *map, BCReg nslots)
  55. {
  56.   IRRef retf = J->chain[IR_RETF];  /* Limits SLOAD restore elimination. */
  57.   BCReg s;
  58.   MSize n = 0;
  59.   for (s = 0; s < nslots; s++) {
  60.     TRef tr = J->slot[s];
  61.     IRRef ref = tref_ref(tr);
  62.     if (ref) {
  63.       SnapEntry sn = SNAP_TR(s, tr);
  64.       IRIns *ir = IR(ref);
  65.       if (!(sn & (SNAP_CONT|SNAP_FRAME)) &&
  66.           ir->o == IR_SLOAD && ir->op1 == s && ref > retf) {
  67.         /* No need to snapshot unmodified non-inherited slots. */
  68.         if (!(ir->op2 & IRSLOAD_INHERIT))
  69.           continue;
  70.         /* No need to restore readonly slots and unmodified non-parent slots. */
  71.         if (!(LJ_DUALNUM && (ir->op2 & IRSLOAD_CONVERT)) &&
  72.             (ir->op2 & (IRSLOAD_READONLY|IRSLOAD_PARENT)) != IRSLOAD_PARENT)
  73.           sn |= SNAP_NORESTORE;
  74.       }
  75.       if (LJ_SOFTFP && irt_isnum(ir->t))
  76.         sn |= SNAP_SOFTFPNUM;
  77.       map[n++] = sn;
  78.     }
  79.   }
  80.   return n;
  81. }

  82. /* Add frame links at the end of the snapshot. */
  83. static BCReg snapshot_framelinks(jit_State *J, SnapEntry *map)
  84. {
  85.   cTValue *frame = J->L->base - 1;
  86.   cTValue *lim = J->L->base - J->baseslot;
  87.   GCfunc *fn = frame_func(frame);
  88.   cTValue *ftop = isluafunc(fn) ? (frame+funcproto(fn)->framesize) : J->L->top;
  89.   MSize f = 0;
  90.   lua_assert(!LJ_FR2);  /* TODO_FR2: store 64 bit PCs. */
  91.   map[f++] = SNAP_MKPC(J->pc);  /* The current PC is always the first entry. */
  92.   while (frame > lim) {  /* Backwards traversal of all frames above base. */
  93.     if (frame_islua(frame)) {
  94.       map[f++] = SNAP_MKPC(frame_pc(frame));
  95.       frame = frame_prevl(frame);
  96.     } else if (frame_iscont(frame)) {
  97.       map[f++] = SNAP_MKFTSZ(frame_ftsz(frame));
  98.       map[f++] = SNAP_MKPC(frame_contpc(frame));
  99.       frame = frame_prevd(frame);
  100.     } else {
  101.       lua_assert(!frame_isc(frame));
  102.       map[f++] = SNAP_MKFTSZ(frame_ftsz(frame));
  103.       frame = frame_prevd(frame);
  104.       continue;
  105.     }
  106.     if (frame + funcproto(frame_func(frame))->framesize > ftop)
  107.       ftop = frame + funcproto(frame_func(frame))->framesize;
  108.   }
  109.   lua_assert(f == (MSize)(1 + J->framedepth));
  110.   return (BCReg)(ftop - lim);
  111. }

  112. /* Take a snapshot of the current stack. */
  113. static void snapshot_stack(jit_State *J, SnapShot *snap, MSize nsnapmap)
  114. {
  115.   BCReg nslots = J->baseslot + J->maxslot;
  116.   MSize nent;
  117.   SnapEntry *p;
  118.   /* Conservative estimate. */
  119.   lj_snap_grow_map(J, nsnapmap + nslots + (MSize)J->framedepth+1);
  120.   p = &J->cur.snapmap[nsnapmap];
  121.   nent = snapshot_slots(J, p, nslots);
  122.   snap->topslot = (uint8_t)snapshot_framelinks(J, p + nent);
  123.   snap->mapofs = (uint16_t)nsnapmap;
  124.   snap->ref = (IRRef1)J->cur.nins;
  125.   snap->nent = (uint8_t)nent;
  126.   snap->nslots = (uint8_t)nslots;
  127.   snap->count = 0;
  128.   J->cur.nsnapmap = (uint16_t)(nsnapmap + nent + 1 + J->framedepth);
  129. }

  130. /* Add or merge a snapshot. */
  131. void lj_snap_add(jit_State *J)
  132. {
  133.   MSize nsnap = J->cur.nsnap;
  134.   MSize nsnapmap = J->cur.nsnapmap;
  135.   /* Merge if no ins. inbetween or if requested and no guard inbetween. */
  136.   if (J->mergesnap ? !irt_isguard(J->guardemit) :
  137.       (nsnap > 0 && J->cur.snap[nsnap-1].ref == J->cur.nins)) {
  138.     if (nsnap == 1) {  /* But preserve snap #0 PC. */
  139.       emitir_raw(IRT(IR_NOP, IRT_NIL), 0, 0);
  140.       goto nomerge;
  141.     }
  142.     nsnapmap = J->cur.snap[--nsnap].mapofs;
  143.   } else {
  144.   nomerge:
  145.     lj_snap_grow_buf(J, nsnap+1);
  146.     J->cur.nsnap = (uint16_t)(nsnap+1);
  147.   }
  148.   J->mergesnap = 0;
  149.   J->guardemit.irt = 0;
  150.   snapshot_stack(J, &J->cur.snap[nsnap], nsnapmap);
  151. }

  152. /* -- Snapshot modification ----------------------------------------------- */

  153. #define SNAP_USEDEF_SLOTS        (LJ_MAX_JSLOTS+LJ_STACK_EXTRA)

  154. /* Find unused slots with reaching-definitions bytecode data-flow analysis. */
  155. static BCReg snap_usedef(jit_State *J, uint8_t *udf,
  156.                          const BCIns *pc, BCReg maxslot)
  157. {
  158.   BCReg s;
  159.   GCobj *o;

  160.   if (maxslot == 0) return 0;
  161. #ifdef LUAJIT_USE_VALGRIND
  162.   /* Avoid errors for harmless reads beyond maxslot. */
  163.   memset(udf, 1, SNAP_USEDEF_SLOTS);
  164. #else
  165.   memset(udf, 1, maxslot);
  166. #endif

  167.   /* Treat open upvalues as used. */
  168.   o = gcref(J->L->openupval);
  169.   while (o) {
  170.     if (uvval(gco2uv(o)) < J->L->base) break;
  171.     udf[uvval(gco2uv(o)) - J->L->base] = 0;
  172.     o = gcref(o->gch.nextgc);
  173.   }

  174. #define USE_SLOT(s)                udf[(s)] &= ~1
  175. #define DEF_SLOT(s)                udf[(s)] *= 3

  176.   /* Scan through following bytecode and check for uses/defs. */
  177.   lua_assert(pc >= proto_bc(J->pt) && pc < proto_bc(J->pt) + J->pt->sizebc);
  178.   for (;;) {
  179.     BCIns ins = *pc++;
  180.     BCOp op = bc_op(ins);
  181.     switch (bcmode_b(op)) {
  182.     case BCMvar: USE_SLOT(bc_b(ins)); break;
  183.     default: break;
  184.     }
  185.     switch (bcmode_c(op)) {
  186.     case BCMvar: USE_SLOT(bc_c(ins)); break;
  187.     case BCMrbase:
  188.       lua_assert(op == BC_CAT);
  189.       for (s = bc_b(ins); s <= bc_c(ins); s++) USE_SLOT(s);
  190.       for (; s < maxslot; s++) DEF_SLOT(s);
  191.       break;
  192.     case BCMjump:
  193.     handle_jump: {
  194.       BCReg minslot = bc_a(ins);
  195.       if (op >= BC_FORI && op <= BC_JFORL) minslot += FORL_EXT;
  196.       else if (op >= BC_ITERL && op <= BC_JITERL) minslot += bc_b(pc[-2])-1;
  197.       else if (op == BC_UCLO) { pc += bc_j(ins); break; }
  198.       for (s = minslot; s < maxslot; s++) DEF_SLOT(s);
  199.       return minslot < maxslot ? minslot : maxslot;
  200.       }
  201.     case BCMlit:
  202.       if (op == BC_JFORL || op == BC_JITERL || op == BC_JLOOP) {
  203.         goto handle_jump;
  204.       } else if (bc_isret(op)) {
  205.         BCReg top = op == BC_RETM ? maxslot : (bc_a(ins) + bc_d(ins)-1);
  206.         for (s = 0; s < bc_a(ins); s++) DEF_SLOT(s);
  207.         for (; s < top; s++) USE_SLOT(s);
  208.         for (; s < maxslot; s++) DEF_SLOT(s);
  209.         return 0;
  210.       }
  211.       break;
  212.     case BCMfunc: return maxslot;  /* NYI: will abort, anyway. */
  213.     default: break;
  214.     }
  215.     switch (bcmode_a(op)) {
  216.     case BCMvar: USE_SLOT(bc_a(ins)); break;
  217.     case BCMdst:
  218.        if (!(op == BC_ISTC || op == BC_ISFC)) DEF_SLOT(bc_a(ins));
  219.        break;
  220.     case BCMbase:
  221.       if (op >= BC_CALLM && op <= BC_VARG) {
  222.         BCReg top = (op == BC_CALLM || op == BC_CALLMT || bc_c(ins) == 0) ?
  223.                     maxslot : (bc_a(ins) + bc_c(ins)+LJ_FR2);
  224.         if (LJ_FR2) DEF_SLOT(bc_a(ins)+1);
  225.         s = bc_a(ins) - ((op == BC_ITERC || op == BC_ITERN) ? 3 : 0);
  226.         for (; s < top; s++) USE_SLOT(s);
  227.         for (; s < maxslot; s++) DEF_SLOT(s);
  228.         if (op == BC_CALLT || op == BC_CALLMT) {
  229.           for (s = 0; s < bc_a(ins); s++) DEF_SLOT(s);
  230.           return 0;
  231.         }
  232.       } else if (op == BC_KNIL) {
  233.         for (s = bc_a(ins); s <= bc_d(ins); s++) DEF_SLOT(s);
  234.       } else if (op == BC_TSETM) {
  235.         for (s = bc_a(ins)-1; s < maxslot; s++) USE_SLOT(s);
  236.       }
  237.       break;
  238.     default: break;
  239.     }
  240.     lua_assert(pc >= proto_bc(J->pt) && pc < proto_bc(J->pt) + J->pt->sizebc);
  241.   }

  242. #undef USE_SLOT
  243. #undef DEF_SLOT

  244.   return 0/* unreachable */
  245. }

  246. /* Purge dead slots before the next snapshot. */
  247. void lj_snap_purge(jit_State *J)
  248. {
  249.   uint8_t udf[SNAP_USEDEF_SLOTS];
  250.   BCReg maxslot = J->maxslot;
  251.   BCReg s = snap_usedef(J, udf, J->pc, maxslot);
  252.   for (; s < maxslot; s++)
  253.     if (udf[s] != 0)
  254.       J->base[s] = 0/* Purge dead slots. */
  255. }

  256. /* Shrink last snapshot. */
  257. void lj_snap_shrink(jit_State *J)
  258. {
  259.   SnapShot *snap = &J->cur.snap[J->cur.nsnap-1];
  260.   SnapEntry *map = &J->cur.snapmap[snap->mapofs];
  261.   MSize n, m, nlim, nent = snap->nent;
  262.   uint8_t udf[SNAP_USEDEF_SLOTS];
  263.   BCReg maxslot = J->maxslot;
  264.   BCReg minslot = snap_usedef(J, udf, snap_pc(map[nent]), maxslot);
  265.   BCReg baseslot = J->baseslot;
  266.   maxslot += baseslot;
  267.   minslot += baseslot;
  268.   snap->nslots = (uint8_t)maxslot;
  269.   for (n = m = 0; n < nent; n++) {  /* Remove unused slots from snapshot. */
  270.     BCReg s = snap_slot(map[n]);
  271.     if (s < minslot || (s < maxslot && udf[s-baseslot] == 0))
  272.       map[m++] = map[n];  /* Only copy used slots. */
  273.   }
  274.   snap->nent = (uint8_t)m;
  275.   nlim = J->cur.nsnapmap - snap->mapofs - 1;
  276.   while (n <= nlim) map[m++] = map[n++];  /* Move PC + frame links down. */
  277.   J->cur.nsnapmap = (uint16_t)(snap->mapofs + m);  /* Free up space in map. */
  278. }

  279. /* -- Snapshot access ----------------------------------------------------- */

  280. /* Initialize a Bloom Filter with all renamed refs.
  281. ** There are very few renames (often none), so the filter has
  282. ** very few bits set. This makes it suitable for negative filtering.
  283. */
  284. static BloomFilter snap_renamefilter(GCtrace *T, SnapNo lim)
  285. {
  286.   BloomFilter rfilt = 0;
  287.   IRIns *ir;
  288.   for (ir = &T->ir[T->nins-1]; ir->o == IR_RENAME; ir--)
  289.     if (ir->op2 <= lim)
  290.       bloomset(rfilt, ir->op1);
  291.   return rfilt;
  292. }

  293. /* Process matching renames to find the original RegSP. */
  294. static RegSP snap_renameref(GCtrace *T, SnapNo lim, IRRef ref, RegSP rs)
  295. {
  296.   IRIns *ir;
  297.   for (ir = &T->ir[T->nins-1]; ir->o == IR_RENAME; ir--)
  298.     if (ir->op1 == ref && ir->op2 <= lim)
  299.       rs = ir->prev;
  300.   return rs;
  301. }

  302. /* Copy RegSP from parent snapshot to the parent links of the IR. */
  303. IRIns *lj_snap_regspmap(GCtrace *T, SnapNo snapno, IRIns *ir)
  304. {
  305.   SnapShot *snap = &T->snap[snapno];
  306.   SnapEntry *map = &T->snapmap[snap->mapofs];
  307.   BloomFilter rfilt = snap_renamefilter(T, snapno);
  308.   MSize n = 0;
  309.   IRRef ref = 0;
  310.   for ( ; ; ir++) {
  311.     uint32_t rs;
  312.     if (ir->o == IR_SLOAD) {
  313.       if (!(ir->op2 & IRSLOAD_PARENT)) break;
  314.       for ( ; ; n++) {
  315.         lua_assert(n < snap->nent);
  316.         if (snap_slot(map[n]) == ir->op1) {
  317.           ref = snap_ref(map[n++]);
  318.           break;
  319.         }
  320.       }
  321.     } else if (LJ_SOFTFP && ir->o == IR_HIOP) {
  322.       ref++;
  323.     } else if (ir->o == IR_PVAL) {
  324.       ref = ir->op1 + REF_BIAS;
  325.     } else {
  326.       break;
  327.     }
  328.     rs = T->ir[ref].prev;
  329.     if (bloomtest(rfilt, ref))
  330.       rs = snap_renameref(T, snapno, ref, rs);
  331.     ir->prev = (uint16_t)rs;
  332.     lua_assert(regsp_used(rs));
  333.   }
  334.   return ir;
  335. }

  336. /* -- Snapshot replay ----------------------------------------------------- */

  337. /* Replay constant from parent trace. */
  338. static TRef snap_replay_const(jit_State *J, IRIns *ir)
  339. {
  340.   /* Only have to deal with constants that can occur in stack slots. */
  341.   switch ((IROp)ir->o) {
  342.   case IR_KPRI: return TREF_PRI(irt_type(ir->t));
  343.   case IR_KINT: return lj_ir_kint(J, ir->i);
  344.   case IR_KGC: return lj_ir_kgc(J, ir_kgc(ir), irt_t(ir->t));
  345.   case IR_KNUM: return lj_ir_k64(J, IR_KNUM, ir_knum(ir));
  346.   case IR_KINT64: return lj_ir_k64(J, IR_KINT64, ir_kint64(ir));
  347.   case IR_KPTR: return lj_ir_kptr(J, ir_kptr(ir));  /* Continuation. */
  348.   default: lua_assert(0); return TREF_NIL; break;
  349.   }
  350. }

  351. /* De-duplicate parent reference. */
  352. static TRef snap_dedup(jit_State *J, SnapEntry *map, MSize nmax, IRRef ref)
  353. {
  354.   MSize j;
  355.   for (j = 0; j < nmax; j++)
  356.     if (snap_ref(map[j]) == ref)
  357.       return J->slot[snap_slot(map[j])] & ~(SNAP_CONT|SNAP_FRAME);
  358.   return 0;
  359. }

  360. /* Emit parent reference with de-duplication. */
  361. static TRef snap_pref(jit_State *J, GCtrace *T, SnapEntry *map, MSize nmax,
  362.                       BloomFilter seen, IRRef ref)
  363. {
  364.   IRIns *ir = &T->ir[ref];
  365.   TRef tr;
  366.   if (irref_isk(ref))
  367.     tr = snap_replay_const(J, ir);
  368.   else if (!regsp_used(ir->prev))
  369.     tr = 0;
  370.   else if (!bloomtest(seen, ref) || (tr = snap_dedup(J, map, nmax, ref)) == 0)
  371.     tr = emitir(IRT(IR_PVAL, irt_type(ir->t)), ref - REF_BIAS, 0);
  372.   return tr;
  373. }

  374. /* Check whether a sunk store corresponds to an allocation. Slow path. */
  375. static int snap_sunk_store2(jit_State *J, IRIns *ira, IRIns *irs)
  376. {
  377.   if (irs->o == IR_ASTORE || irs->o == IR_HSTORE ||
  378.       irs->o == IR_FSTORE || irs->o == IR_XSTORE) {
  379.     IRIns *irk = IR(irs->op1);
  380.     if (irk->o == IR_AREF || irk->o == IR_HREFK)
  381.       irk = IR(irk->op1);
  382.     return (IR(irk->op1) == ira);
  383.   }
  384.   return 0;
  385. }

  386. /* Check whether a sunk store corresponds to an allocation. Fast path. */
  387. static LJ_AINLINE int snap_sunk_store(jit_State *J, IRIns *ira, IRIns *irs)
  388. {
  389.   if (irs->s != 255)
  390.     return (ira + irs->s == irs);  /* Fast check. */
  391.   return snap_sunk_store2(J, ira, irs);
  392. }

  393. /* Replay snapshot state to setup side trace. */
  394. void lj_snap_replay(jit_State *J, GCtrace *T)
  395. {
  396.   SnapShot *snap = &T->snap[J->exitno];
  397.   SnapEntry *map = &T->snapmap[snap->mapofs];
  398.   MSize n, nent = snap->nent;
  399.   BloomFilter seen = 0;
  400.   int pass23 = 0;
  401.   J->framedepth = 0;
  402.   /* Emit IR for slots inherited from parent snapshot. */
  403.   for (n = 0; n < nent; n++) {
  404.     SnapEntry sn = map[n];
  405.     BCReg s = snap_slot(sn);
  406.     IRRef ref = snap_ref(sn);
  407.     IRIns *ir = &T->ir[ref];
  408.     TRef tr;
  409.     /* The bloom filter avoids O(nent^2) overhead for de-duping slots. */
  410.     if (bloomtest(seen, ref) && (tr = snap_dedup(J, map, n, ref)) != 0)
  411.       goto setslot;
  412.     bloomset(seen, ref);
  413.     if (irref_isk(ref)) {
  414.       tr = snap_replay_const(J, ir);
  415.     } else if (!regsp_used(ir->prev)) {
  416.       pass23 = 1;
  417.       lua_assert(s != 0);
  418.       tr = s;
  419.     } else {
  420.       IRType t = irt_type(ir->t);
  421.       uint32_t mode = IRSLOAD_INHERIT|IRSLOAD_PARENT;
  422.       if (LJ_SOFTFP && (sn & SNAP_SOFTFPNUM)) t = IRT_NUM;
  423.       if (ir->o == IR_SLOAD) mode |= (ir->op2 & IRSLOAD_READONLY);
  424.       tr = emitir_raw(IRT(IR_SLOAD, t), s, mode);
  425.     }
  426.   setslot:
  427.     J->slot[s] = tr | (sn&(SNAP_CONT|SNAP_FRAME));  /* Same as TREF_* flags. */
  428.     J->framedepth += ((sn & (SNAP_CONT|SNAP_FRAME)) && s);
  429.     if ((sn & SNAP_FRAME))
  430.       J->baseslot = s+1;
  431.   }
  432.   if (pass23) {
  433.     IRIns *irlast = &T->ir[snap->ref];
  434.     pass23 = 0;
  435.     /* Emit dependent PVALs. */
  436.     for (n = 0; n < nent; n++) {
  437.       SnapEntry sn = map[n];
  438.       IRRef refp = snap_ref(sn);
  439.       IRIns *ir = &T->ir[refp];
  440.       if (regsp_reg(ir->r) == RID_SUNK) {
  441.         if (J->slot[snap_slot(sn)] != snap_slot(sn)) continue;
  442.         pass23 = 1;
  443.         lua_assert(ir->o == IR_TNEW || ir->o == IR_TDUP ||
  444.                    ir->o == IR_CNEW || ir->o == IR_CNEWI);
  445.         if (ir->op1 >= T->nk) snap_pref(J, T, map, nent, seen, ir->op1);
  446.         if (ir->op2 >= T->nk) snap_pref(J, T, map, nent, seen, ir->op2);
  447.         if (LJ_HASFFI && ir->o == IR_CNEWI) {
  448.           if (LJ_32 && refp+1 < T->nins && (ir+1)->o == IR_HIOP)
  449.             snap_pref(J, T, map, nent, seen, (ir+1)->op2);
  450.         } else {
  451.           IRIns *irs;
  452.           for (irs = ir+1; irs < irlast; irs++)
  453.             if (irs->r == RID_SINK && snap_sunk_store(J, ir, irs)) {
  454.               if (snap_pref(J, T, map, nent, seen, irs->op2) == 0)
  455.                 snap_pref(J, T, map, nent, seen, T->ir[irs->op2].op1);
  456.               else if ((LJ_SOFTFP || (LJ_32 && LJ_HASFFI)) &&
  457.                        irs+1 < irlast && (irs+1)->o == IR_HIOP)
  458.                 snap_pref(J, T, map, nent, seen, (irs+1)->op2);
  459.             }
  460.         }
  461.       } else if (!irref_isk(refp) && !regsp_used(ir->prev)) {
  462.         lua_assert(ir->o == IR_CONV && ir->op2 == IRCONV_NUM_INT);
  463.         J->slot[snap_slot(sn)] = snap_pref(J, T, map, nent, seen, ir->op1);
  464.       }
  465.     }
  466.     /* Replay sunk instructions. */
  467.     for (n = 0; pass23 && n < nent; n++) {
  468.       SnapEntry sn = map[n];
  469.       IRRef refp = snap_ref(sn);
  470.       IRIns *ir = &T->ir[refp];
  471.       if (regsp_reg(ir->r) == RID_SUNK) {
  472.         TRef op1, op2;
  473.         if (J->slot[snap_slot(sn)] != snap_slot(sn)) {  /* De-dup allocs. */
  474.           J->slot[snap_slot(sn)] = J->slot[J->slot[snap_slot(sn)]];
  475.           continue;
  476.         }
  477.         op1 = ir->op1;
  478.         if (op1 >= T->nk) op1 = snap_pref(J, T, map, nent, seen, op1);
  479.         op2 = ir->op2;
  480.         if (op2 >= T->nk) op2 = snap_pref(J, T, map, nent, seen, op2);
  481.         if (LJ_HASFFI && ir->o == IR_CNEWI) {
  482.           if (LJ_32 && refp+1 < T->nins && (ir+1)->o == IR_HIOP) {
  483.             lj_needsplit(J);  /* Emit joining HIOP. */
  484.             op2 = emitir_raw(IRT(IR_HIOP, IRT_I64), op2,
  485.                              snap_pref(J, T, map, nent, seen, (ir+1)->op2));
  486.           }
  487.           J->slot[snap_slot(sn)] = emitir(ir->ot, op1, op2);
  488.         } else {
  489.           IRIns *irs;
  490.           TRef tr = emitir(ir->ot, op1, op2);
  491.           J->slot[snap_slot(sn)] = tr;
  492.           for (irs = ir+1; irs < irlast; irs++)
  493.             if (irs->r == RID_SINK && snap_sunk_store(J, ir, irs)) {
  494.               IRIns *irr = &T->ir[irs->op1];
  495.               TRef val, key = irr->op2, tmp = tr;
  496.               if (irr->o != IR_FREF) {
  497.                 IRIns *irk = &T->ir[key];
  498.                 if (irr->o == IR_HREFK)
  499.                   key = lj_ir_kslot(J, snap_replay_const(J, &T->ir[irk->op1]),
  500.                                     irk->op2);
  501.                 else
  502.                   key = snap_replay_const(J, irk);
  503.                 if (irr->o == IR_HREFK || irr->o == IR_AREF) {
  504.                   IRIns *irf = &T->ir[irr->op1];
  505.                   tmp = emitir(irf->ot, tmp, irf->op2);
  506.                 }
  507.               }
  508.               tmp = emitir(irr->ot, tmp, key);
  509.               val = snap_pref(J, T, map, nent, seen, irs->op2);
  510.               if (val == 0) {
  511.                 IRIns *irc = &T->ir[irs->op2];
  512.                 lua_assert(irc->o == IR_CONV && irc->op2 == IRCONV_NUM_INT);
  513.                 val = snap_pref(J, T, map, nent, seen, irc->op1);
  514.                 val = emitir(IRTN(IR_CONV), val, IRCONV_NUM_INT);
  515.               } else if ((LJ_SOFTFP || (LJ_32 && LJ_HASFFI)) &&
  516.                          irs+1 < irlast && (irs+1)->o == IR_HIOP) {
  517.                 IRType t = IRT_I64;
  518.                 if (LJ_SOFTFP && irt_type((irs+1)->t) == IRT_SOFTFP)
  519.                   t = IRT_NUM;
  520.                 lj_needsplit(J);
  521.                 if (irref_isk(irs->op2) && irref_isk((irs+1)->op2)) {
  522.                   uint64_t k = (uint32_t)T->ir[irs->op2].i +
  523.                                ((uint64_t)T->ir[(irs+1)->op2].i << 32);
  524.                   val = lj_ir_k64(J, t == IRT_I64 ? IR_KINT64 : IR_KNUM,
  525.                                   lj_ir_k64_find(J, k));
  526.                 } else {
  527.                   val = emitir_raw(IRT(IR_HIOP, t), val,
  528.                           snap_pref(J, T, map, nent, seen, (irs+1)->op2));
  529.                 }
  530.                 tmp = emitir(IRT(irs->o, t), tmp, val);
  531.                 continue;
  532.               }
  533.               tmp = emitir(irs->ot, tmp, val);
  534.             } else if (LJ_HASFFI && irs->o == IR_XBAR && ir->o == IR_CNEW) {
  535.               emitir(IRT(IR_XBAR, IRT_NIL), 0, 0);
  536.             }
  537.         }
  538.       }
  539.     }
  540.   }
  541.   J->base = J->slot + J->baseslot;
  542.   J->maxslot = snap->nslots - J->baseslot;
  543.   lj_snap_add(J);
  544.   if (pass23)  /* Need explicit GC step _after_ initial snapshot. */
  545.     emitir_raw(IRTG(IR_GCSTEP, IRT_NIL), 0, 0);
  546. }

  547. /* -- Snapshot restore ---------------------------------------------------- */

  548. static void snap_unsink(jit_State *J, GCtrace *T, ExitState *ex,
  549.                         SnapNo snapno, BloomFilter rfilt,
  550.                         IRIns *ir, TValue *o);

  551. /* Restore a value from the trace exit state. */
  552. static void snap_restoreval(jit_State *J, GCtrace *T, ExitState *ex,
  553.                             SnapNo snapno, BloomFilter rfilt,
  554.                             IRRef ref, TValue *o)
  555. {
  556.   IRIns *ir = &T->ir[ref];
  557.   IRType1 t = ir->t;
  558.   RegSP rs = ir->prev;
  559.   if (irref_isk(ref)) {  /* Restore constant slot. */
  560.     lj_ir_kvalue(J->L, o, ir);
  561.     return;
  562.   }
  563.   if (LJ_UNLIKELY(bloomtest(rfilt, ref)))
  564.     rs = snap_renameref(T, snapno, ref, rs);
  565.   lua_assert(!LJ_GC64);  /* TODO_GC64: handle 64 bit references. */
  566.   if (ra_hasspill(regsp_spill(rs))) {  /* Restore from spill slot. */
  567.     int32_t *sps = &ex->spill[regsp_spill(rs)];
  568.     if (irt_isinteger(t)) {
  569.       setintV(o, *sps);
  570. #if !LJ_SOFTFP
  571.     } else if (irt_isnum(t)) {
  572.       o->u64 = *(uint64_t *)sps;
  573. #endif
  574.     } else if (LJ_64 && irt_islightud(t)) {
  575.       /* 64 bit lightuserdata which may escape already has the tag bits. */
  576.       o->u64 = *(uint64_t *)sps;
  577.     } else {
  578.       lua_assert(!irt_ispri(t));  /* PRI refs never have a spill slot. */
  579.       setgcV(J->L, o, (GCobj *)(uintptr_t)*(GCSize *)sps, irt_toitype(t));
  580.     }
  581.   } else/* Restore from register. */
  582.     Reg r = regsp_reg(rs);
  583.     if (ra_noreg(r)) {
  584.       lua_assert(ir->o == IR_CONV && ir->op2 == IRCONV_NUM_INT);
  585.       snap_restoreval(J, T, ex, snapno, rfilt, ir->op1, o);
  586.       if (LJ_DUALNUM) setnumV(o, (lua_Number)intV(o));
  587.       return;
  588.     } else if (irt_isinteger(t)) {
  589.       setintV(o, (int32_t)ex->gpr[r-RID_MIN_GPR]);
  590. #if !LJ_SOFTFP
  591.     } else if (irt_isnum(t)) {
  592.       setnumV(o, ex->fpr[r-RID_MIN_FPR]);
  593. #endif
  594.     } else if (LJ_64 && irt_islightud(t)) {
  595.       /* 64 bit lightuserdata which may escape already has the tag bits. */
  596.       o->u64 = ex->gpr[r-RID_MIN_GPR];
  597.     } else if (irt_ispri(t)) {
  598.       setpriV(o, irt_toitype(t));
  599.     } else {
  600.       setgcV(J->L, o, (GCobj *)ex->gpr[r-RID_MIN_GPR], irt_toitype(t));
  601.     }
  602.   }
  603. }

  604. #if LJ_HASFFI
  605. /* Restore raw data from the trace exit state. */
  606. static void snap_restoredata(GCtrace *T, ExitState *ex,
  607.                              SnapNo snapno, BloomFilter rfilt,
  608.                              IRRef ref, void *dst, CTSize sz)
  609. {
  610.   IRIns *ir = &T->ir[ref];
  611.   RegSP rs = ir->prev;
  612.   int32_t *src;
  613.   uint64_t tmp;
  614.   if (irref_isk(ref)) {
  615.     if (ir->o == IR_KNUM || ir->o == IR_KINT64) {
  616.       src = mref(ir->ptr, int32_t);
  617.     } else if (sz == 8) {
  618.       tmp = (uint64_t)(uint32_t)ir->i;
  619.       src = (int32_t *)&tmp;
  620.     } else {
  621.       src = &ir->i;
  622.     }
  623.   } else {
  624.     if (LJ_UNLIKELY(bloomtest(rfilt, ref)))
  625.       rs = snap_renameref(T, snapno, ref, rs);
  626.     if (ra_hasspill(regsp_spill(rs))) {
  627.       src = &ex->spill[regsp_spill(rs)];
  628.       if (sz == 8 && !irt_is64(ir->t)) {
  629.         tmp = (uint64_t)(uint32_t)*src;
  630.         src = (int32_t *)&tmp;
  631.       }
  632.     } else {
  633.       Reg r = regsp_reg(rs);
  634.       if (ra_noreg(r)) {
  635.         /* Note: this assumes CNEWI is never used for SOFTFP split numbers. */
  636.         lua_assert(sz == 8 && ir->o == IR_CONV && ir->op2 == IRCONV_NUM_INT);
  637.         snap_restoredata(T, ex, snapno, rfilt, ir->op1, dst, 4);
  638.         *(lua_Number *)dst = (lua_Number)*(int32_t *)dst;
  639.         return;
  640.       }
  641.       src = (int32_t *)&ex->gpr[r-RID_MIN_GPR];
  642. #if !LJ_SOFTFP
  643.       if (r >= RID_MAX_GPR) {
  644.         src = (int32_t *)&ex->fpr[r-RID_MIN_FPR];
  645. #if LJ_TARGET_PPC
  646.         if (sz == 4) {  /* PPC FPRs are always doubles. */
  647.           *(float *)dst = (float)*(double *)src;
  648.           return;
  649.         }
  650. #else
  651.         if (LJ_BE && sz == 4) src++;
  652. #endif
  653.       }
  654. #endif
  655.     }
  656.   }
  657.   lua_assert(sz == 1 || sz == 2 || sz == 4 || sz == 8);
  658.   if (sz == 4) *(int32_t *)dst = *src;
  659.   else if (sz == 8) *(int64_t *)dst = *(int64_t *)src;
  660.   else if (sz == 1) *(int8_t *)dst = (int8_t)*src;
  661.   else *(int16_t *)dst = (int16_t)*src;
  662. }
  663. #endif

  664. /* Unsink allocation from the trace exit state. Unsink sunk stores. */
  665. static void snap_unsink(jit_State *J, GCtrace *T, ExitState *ex,
  666.                         SnapNo snapno, BloomFilter rfilt,
  667.                         IRIns *ir, TValue *o)
  668. {
  669.   lua_assert(ir->o == IR_TNEW || ir->o == IR_TDUP ||
  670.              ir->o == IR_CNEW || ir->o == IR_CNEWI);
  671. #if LJ_HASFFI
  672.   if (ir->o == IR_CNEW || ir->o == IR_CNEWI) {
  673.     CTState *cts = ctype_cts(J->L);
  674.     CTypeID id = (CTypeID)T->ir[ir->op1].i;
  675.     CTSize sz = lj_ctype_size(cts, id);
  676.     GCcdata *cd = lj_cdata_new(cts, id, sz);
  677.     setcdataV(J->L, o, cd);
  678.     if (ir->o == IR_CNEWI) {
  679.       uint8_t *p = (uint8_t *)cdataptr(cd);
  680.       lua_assert(sz == 4 || sz == 8);
  681.       if (LJ_32 && sz == 8 && ir+1 < T->ir + T->nins && (ir+1)->o == IR_HIOP) {
  682.         snap_restoredata(T, ex, snapno, rfilt, (ir+1)->op2, LJ_LE?p+4:p, 4);
  683.         if (LJ_BE) p += 4;
  684.         sz = 4;
  685.       }
  686.       snap_restoredata(T, ex, snapno, rfilt, ir->op2, p, sz);
  687.     } else {
  688.       IRIns *irs, *irlast = &T->ir[T->snap[snapno].ref];
  689.       for (irs = ir+1; irs < irlast; irs++)
  690.         if (irs->r == RID_SINK && snap_sunk_store(J, ir, irs)) {
  691.           IRIns *iro = &T->ir[T->ir[irs->op1].op2];
  692.           uint8_t *p = (uint8_t *)cd;
  693.           CTSize szs;
  694.           lua_assert(irs->o == IR_XSTORE && T->ir[irs->op1].o == IR_ADD);
  695.           lua_assert(iro->o == IR_KINT || iro->o == IR_KINT64);
  696.           if (irt_is64(irs->t)) szs = 8;
  697.           else if (irt_isi8(irs->t) || irt_isu8(irs->t)) szs = 1;
  698.           else if (irt_isi16(irs->t) || irt_isu16(irs->t)) szs = 2;
  699.           else szs = 4;
  700.           if (LJ_64 && iro->o == IR_KINT64)
  701.             p += (int64_t)ir_k64(iro)->u64;
  702.           else
  703.             p += iro->i;
  704.           lua_assert(p >= (uint8_t *)cdataptr(cd) &&
  705.                      p + szs <= (uint8_t *)cdataptr(cd) + sz);
  706.           if (LJ_32 && irs+1 < T->ir + T->nins && (irs+1)->o == IR_HIOP) {
  707.             lua_assert(szs == 4);
  708.             snap_restoredata(T, ex, snapno, rfilt, (irs+1)->op2, LJ_LE?p+4:p,4);
  709.             if (LJ_BE) p += 4;
  710.           }
  711.           snap_restoredata(T, ex, snapno, rfilt, irs->op2, p, szs);
  712.         }
  713.     }
  714.   } else
  715. #endif
  716.   {
  717.     IRIns *irs, *irlast;
  718.     GCtab *t = ir->o == IR_TNEW ? lj_tab_new(J->L, ir->op1, ir->op2) :
  719.                                   lj_tab_dup(J->L, ir_ktab(&T->ir[ir->op1]));
  720.     settabV(J->L, o, t);
  721.     irlast = &T->ir[T->snap[snapno].ref];
  722.     for (irs = ir+1; irs < irlast; irs++)
  723.       if (irs->r == RID_SINK && snap_sunk_store(J, ir, irs)) {
  724.         IRIns *irk = &T->ir[irs->op1];
  725.         TValue tmp, *val;
  726.         lua_assert(irs->o == IR_ASTORE || irs->o == IR_HSTORE ||
  727.                    irs->o == IR_FSTORE);
  728.         if (irk->o == IR_FREF) {
  729.           lua_assert(irk->op2 == IRFL_TAB_META);
  730.           snap_restoreval(J, T, ex, snapno, rfilt, irs->op2, &tmp);
  731.           /* NOBARRIER: The table is new (marked white). */
  732.           setgcref(t->metatable, obj2gco(tabV(&tmp)));
  733.         } else {
  734.           irk = &T->ir[irk->op2];
  735.           if (irk->o == IR_KSLOT) irk = &T->ir[irk->op1];
  736.           lj_ir_kvalue(J->L, &tmp, irk);
  737.           val = lj_tab_set(J->L, t, &tmp);
  738.           /* NOBARRIER: The table is new (marked white). */
  739.           snap_restoreval(J, T, ex, snapno, rfilt, irs->op2, val);
  740.           if (LJ_SOFTFP && irs+1 < T->ir + T->nins && (irs+1)->o == IR_HIOP) {
  741.             snap_restoreval(J, T, ex, snapno, rfilt, (irs+1)->op2, &tmp);
  742.             val->u32.hi = tmp.u32.lo;
  743.           }
  744.         }
  745.       }
  746.   }
  747. }

  748. /* Restore interpreter state from exit state with the help of a snapshot. */
  749. const BCIns *lj_snap_restore(jit_State *J, void *exptr)
  750. {
  751.   ExitState *ex = (ExitState *)exptr;
  752.   SnapNo snapno = J->exitno;  /* For now, snapno == exitno. */
  753.   GCtrace *T = traceref(J, J->parent);
  754.   SnapShot *snap = &T->snap[snapno];
  755.   MSize n, nent = snap->nent;
  756.   SnapEntry *map = &T->snapmap[snap->mapofs];
  757.   SnapEntry *flinks = &T->snapmap[snap_nextofs(T, snap)-1];
  758.   ptrdiff_t ftsz0;
  759.   TValue *frame;
  760.   BloomFilter rfilt = snap_renamefilter(T, snapno);
  761.   const BCIns *pc = snap_pc(map[nent]);
  762.   lua_State *L = J->L;

  763.   /* Set interpreter PC to the next PC to get correct error messages. */
  764.   setcframe_pc(cframe_raw(L->cframe), pc+1);

  765.   /* Make sure the stack is big enough for the slots from the snapshot. */
  766.   if (LJ_UNLIKELY(L->base + snap->topslot >= tvref(L->maxstack))) {
  767.     L->top = curr_topL(L);
  768.     lj_state_growstack(L, snap->topslot - curr_proto(L)->framesize);
  769.   }

  770.   /* Fill stack slots with data from the registers and spill slots. */
  771.   frame = L->base-1;
  772.   ftsz0 = frame_ftsz(frame);  /* Preserve link to previous frame in slot #0. */
  773.   for (n = 0; n < nent; n++) {
  774.     SnapEntry sn = map[n];
  775.     if (!(sn & SNAP_NORESTORE)) {
  776.       TValue *o = &frame[snap_slot(sn)];
  777.       IRRef ref = snap_ref(sn);
  778.       IRIns *ir = &T->ir[ref];
  779.       if (ir->r == RID_SUNK) {
  780.         MSize j;
  781.         for (j = 0; j < n; j++)
  782.           if (snap_ref(map[j]) == ref) {  /* De-duplicate sunk allocations. */
  783.             copyTV(L, o, &frame[snap_slot(map[j])]);
  784.             goto dupslot;
  785.           }
  786.         snap_unsink(J, T, ex, snapno, rfilt, ir, o);
  787.       dupslot:
  788.         continue;
  789.       }
  790.       snap_restoreval(J, T, ex, snapno, rfilt, ref, o);
  791.       if (LJ_SOFTFP && (sn & SNAP_SOFTFPNUM) && tvisint(o)) {
  792.         TValue tmp;
  793.         snap_restoreval(J, T, ex, snapno, rfilt, ref+1, &tmp);
  794.         o->u32.hi = tmp.u32.lo;
  795.       } else if ((sn & (SNAP_CONT|SNAP_FRAME))) {
  796.         lua_assert(!LJ_FR2);  /* TODO_FR2: store 64 bit PCs. */
  797.         /* Overwrite tag with frame link. */
  798.         setframe_ftsz(o, snap_slot(sn) != 0 ? (int32_t)*flinks-- : ftsz0);
  799.         L->base = o+1;
  800.       }
  801.     }
  802.   }
  803.   lua_assert(map + nent == flinks);

  804.   /* Compute current stack top. */
  805.   switch (bc_op(*pc)) {
  806.   default:
  807.     if (bc_op(*pc) < BC_FUNCF) {
  808.       L->top = curr_topL(L);
  809.       break;
  810.     }
  811.     /* fallthrough */
  812.   case BC_CALLM: case BC_CALLMT: case BC_RETM: case BC_TSETM:
  813.     L->top = frame + snap->nslots;
  814.     break;
  815.   }
  816.   return pc;
  817. }

  818. #undef IR
  819. #undef emitir_raw
  820. #undef emitir

  821. #endif