src/lj_asm_mips.h - luajit-2.0-src

Functions defined

Macros defined

Source code

  1. /*
  2. ** MIPS IR assembler (SSA IR -> machine code).
  3. ** Copyright (C) 2005-2015 Mike Pall. See Copyright Notice in luajit.h
  4. */

  5. /* -- Register allocator extensions --------------------------------------- */

  6. /* Allocate a register with a hint. */
  7. static Reg ra_hintalloc(ASMState *as, IRRef ref, Reg hint, RegSet allow)
  8. {
  9.   Reg r = IR(ref)->r;
  10.   if (ra_noreg(r)) {
  11.     if (!ra_hashint(r) && !iscrossref(as, ref))
  12.       ra_sethint(IR(ref)->r, hint);  /* Propagate register hint. */
  13.     r = ra_allocref(as, ref, allow);
  14.   }
  15.   ra_noweak(as, r);
  16.   return r;
  17. }

  18. /* Allocate a register or RID_ZERO. */
  19. static Reg ra_alloc1z(ASMState *as, IRRef ref, RegSet allow)
  20. {
  21.   Reg r = IR(ref)->r;
  22.   if (ra_noreg(r)) {
  23.     if (!(allow & RSET_FPR) && irref_isk(ref) && IR(ref)->i == 0)
  24.       return RID_ZERO;
  25.     r = ra_allocref(as, ref, allow);
  26.   } else {
  27.     ra_noweak(as, r);
  28.   }
  29.   return r;
  30. }

  31. /* Allocate two source registers for three-operand instructions. */
  32. static Reg ra_alloc2(ASMState *as, IRIns *ir, RegSet allow)
  33. {
  34.   IRIns *irl = IR(ir->op1), *irr = IR(ir->op2);
  35.   Reg left = irl->r, right = irr->r;
  36.   if (ra_hasreg(left)) {
  37.     ra_noweak(as, left);
  38.     if (ra_noreg(right))
  39.       right = ra_alloc1z(as, ir->op2, rset_exclude(allow, left));
  40.     else
  41.       ra_noweak(as, right);
  42.   } else if (ra_hasreg(right)) {
  43.     ra_noweak(as, right);
  44.     left = ra_alloc1z(as, ir->op1, rset_exclude(allow, right));
  45.   } else if (ra_hashint(right)) {
  46.     right = ra_alloc1z(as, ir->op2, allow);
  47.     left = ra_alloc1z(as, ir->op1, rset_exclude(allow, right));
  48.   } else {
  49.     left = ra_alloc1z(as, ir->op1, allow);
  50.     right = ra_alloc1z(as, ir->op2, rset_exclude(allow, left));
  51.   }
  52.   return left | (right << 8);
  53. }

  54. /* -- Guard handling ------------------------------------------------------ */

  55. /* Need some spare long-range jump slots, for out-of-range branches. */
  56. #define MIPS_SPAREJUMP                4

  57. /* Setup spare long-range jump slots per mcarea. */
  58. static void asm_sparejump_setup(ASMState *as)
  59. {
  60.   MCode *mxp = as->mcbot;
  61.   /* Assumes sizeof(MCLink) == 8. */
  62.   if (((uintptr_t)mxp & (LJ_PAGESIZE-1)) == 8) {
  63.     lua_assert(MIPSI_NOP == 0);
  64.     memset(mxp+2, 0, MIPS_SPAREJUMP*8);
  65.     mxp += MIPS_SPAREJUMP*2;
  66.     lua_assert(mxp < as->mctop);
  67.     lj_mcode_sync(as->mcbot, mxp);
  68.     lj_mcode_commitbot(as->J, mxp);
  69.     as->mcbot = mxp;
  70.     as->mclim = as->mcbot + MCLIM_REDZONE;
  71.   }
  72. }

  73. /* Setup exit stub after the end of each trace. */
  74. static void asm_exitstub_setup(ASMState *as)
  75. {
  76.   MCode *mxp = as->mctop;
  77.   /* sw TMP, 0(sp); j ->vm_exit_handler; li TMP, traceno */
  78.   *--mxp = MIPSI_LI|MIPSF_T(RID_TMP)|as->T->traceno;
  79.   *--mxp = MIPSI_J|((((uintptr_t)(void *)lj_vm_exit_handler)>>2)&0x03ffffffu);
  80.   lua_assert(((uintptr_t)mxp ^ (uintptr_t)(void *)lj_vm_exit_handler)>>28 == 0);
  81.   *--mxp = MIPSI_SW|MIPSF_T(RID_TMP)|MIPSF_S(RID_SP)|0;
  82.   as->mctop = mxp;
  83. }

  84. /* Keep this in-sync with exitstub_trace_addr(). */
  85. #define asm_exitstub_addr(as)        ((as)->mctop)

  86. /* Emit conditional branch to exit for guard. */
  87. static void asm_guard(ASMState *as, MIPSIns mi, Reg rs, Reg rt)
  88. {
  89.   MCode *target = asm_exitstub_addr(as);
  90.   MCode *p = as->mcp;
  91.   if (LJ_UNLIKELY(p == as->invmcp)) {
  92.     as->invmcp = NULL;
  93.     as->loopinv = 1;
  94.     as->mcp = p+1;
  95.     mi = mi ^ ((mi>>28) == 1 ? 0x04000000u : 0x00010000u);  /* Invert cond. */
  96.     target = p;  /* Patch target later in asm_loop_fixup. */
  97.   }
  98.   emit_ti(as, MIPSI_LI, RID_TMP, as->snapno);
  99.   emit_branch(as, mi, rs, rt, target);
  100. }

  101. /* -- Operand fusion ------------------------------------------------------ */

  102. /* Limit linear search to this distance. Avoids O(n^2) behavior. */
  103. #define CONFLICT_SEARCH_LIM        31

  104. /* Check if there's no conflicting instruction between curins and ref. */
  105. static int noconflict(ASMState *as, IRRef ref, IROp conflict)
  106. {
  107.   IRIns *ir = as->ir;
  108.   IRRef i = as->curins;
  109.   if (i > ref + CONFLICT_SEARCH_LIM)
  110.     return 0/* Give up, ref is too far away. */
  111.   while (--i > ref)
  112.     if (ir[i].o == conflict)
  113.       return 0/* Conflict found. */
  114.   return 1/* Ok, no conflict. */
  115. }

  116. /* Fuse the array base of colocated arrays. */
  117. static int32_t asm_fuseabase(ASMState *as, IRRef ref)
  118. {
  119.   IRIns *ir = IR(ref);
  120.   if (ir->o == IR_TNEW && ir->op1 <= LJ_MAX_COLOSIZE &&
  121.       !neverfuse(as) && noconflict(as, ref, IR_NEWREF))
  122.     return (int32_t)sizeof(GCtab);
  123.   return 0;
  124. }

  125. /* Fuse array/hash/upvalue reference into register+offset operand. */
  126. static Reg asm_fuseahuref(ASMState *as, IRRef ref, int32_t *ofsp, RegSet allow)
  127. {
  128.   IRIns *ir = IR(ref);
  129.   if (ra_noreg(ir->r)) {
  130.     if (ir->o == IR_AREF) {
  131.       if (mayfuse(as, ref)) {
  132.         if (irref_isk(ir->op2)) {
  133.           IRRef tab = IR(ir->op1)->op1;
  134.           int32_t ofs = asm_fuseabase(as, tab);
  135.           IRRef refa = ofs ? tab : ir->op1;
  136.           ofs += 8*IR(ir->op2)->i;
  137.           if (checki16(ofs)) {
  138.             *ofsp = ofs;
  139.             return ra_alloc1(as, refa, allow);
  140.           }
  141.         }
  142.       }
  143.     } else if (ir->o == IR_HREFK) {
  144.       if (mayfuse(as, ref)) {
  145.         int32_t ofs = (int32_t)(IR(ir->op2)->op2 * sizeof(Node));
  146.         if (checki16(ofs)) {
  147.           *ofsp = ofs;
  148.           return ra_alloc1(as, ir->op1, allow);
  149.         }
  150.       }
  151.     } else if (ir->o == IR_UREFC) {
  152.       if (irref_isk(ir->op1)) {
  153.         GCfunc *fn = ir_kfunc(IR(ir->op1));
  154.         int32_t ofs = i32ptr(&gcref(fn->l.uvptr[(ir->op2 >> 8)])->uv.tv);
  155.         int32_t jgl = (intptr_t)J2G(as->J);
  156.         if ((uint32_t)(ofs-jgl) < 65536) {
  157.           *ofsp = ofs-jgl-32768;
  158.           return RID_JGL;
  159.         } else {
  160.           *ofsp = (int16_t)ofs;
  161.           return ra_allock(as, ofs-(int16_t)ofs, allow);
  162.         }
  163.       }
  164.     }
  165.   }
  166.   *ofsp = 0;
  167.   return ra_alloc1(as, ref, allow);
  168. }

  169. /* Fuse XLOAD/XSTORE reference into load/store operand. */
  170. static void asm_fusexref(ASMState *as, MIPSIns mi, Reg rt, IRRef ref,
  171.                          RegSet allow, int32_t ofs)
  172. {
  173.   IRIns *ir = IR(ref);
  174.   Reg base;
  175.   if (ra_noreg(ir->r) && canfuse(as, ir)) {
  176.     if (ir->o == IR_ADD) {
  177.       int32_t ofs2;
  178.       if (irref_isk(ir->op2) && (ofs2 = ofs + IR(ir->op2)->i, checki16(ofs2))) {
  179.         ref = ir->op1;
  180.         ofs = ofs2;
  181.       }
  182.     } else if (ir->o == IR_STRREF) {
  183.       int32_t ofs2 = 65536;
  184.       lua_assert(ofs == 0);
  185.       ofs = (int32_t)sizeof(GCstr);
  186.       if (irref_isk(ir->op2)) {
  187.         ofs2 = ofs + IR(ir->op2)->i;
  188.         ref = ir->op1;
  189.       } else if (irref_isk(ir->op1)) {
  190.         ofs2 = ofs + IR(ir->op1)->i;
  191.         ref = ir->op2;
  192.       }
  193.       if (!checki16(ofs2)) {
  194.         /* NYI: Fuse ADD with constant. */
  195.         Reg right, left = ra_alloc2(as, ir, allow);
  196.         right = (left >> 8); left &= 255;
  197.         emit_hsi(as, mi, rt, RID_TMP, ofs);
  198.         emit_dst(as, MIPSI_ADDU, RID_TMP, left, right);
  199.         return;
  200.       }
  201.       ofs = ofs2;
  202.     }
  203.   }
  204.   base = ra_alloc1(as, ref, allow);
  205.   emit_hsi(as, mi, rt, base, ofs);
  206. }

  207. /* -- Calls --------------------------------------------------------------- */

  208. /* Generate a call to a C function. */
  209. static void asm_gencall(ASMState *as, const CCallInfo *ci, IRRef *args)
  210. {
  211.   uint32_t n, nargs = CCI_XNARGS(ci);
  212.   int32_t ofs = 16;
  213.   Reg gpr, fpr = REGARG_FIRSTFPR;
  214.   if ((void *)ci->func)
  215.     emit_call(as, (void *)ci->func);
  216.   for (gpr = REGARG_FIRSTGPR; gpr <= REGARG_LASTGPR; gpr++)
  217.     as->cost[gpr] = REGCOST(~0u, ASMREF_L);
  218.   gpr = REGARG_FIRSTGPR;
  219.   for (n = 0; n < nargs; n++) {  /* Setup args. */
  220.     IRRef ref = args[n];
  221.     if (ref) {
  222.       IRIns *ir = IR(ref);
  223.       if (irt_isfp(ir->t) && fpr <= REGARG_LASTFPR &&
  224.           !(ci->flags & CCI_VARARG)) {
  225.         lua_assert(rset_test(as->freeset, fpr));  /* Already evicted. */
  226.         ra_leftov(as, fpr, ref);
  227.         fpr += 2;
  228.         gpr += irt_isnum(ir->t) ? 2 : 1;
  229.       } else {
  230.         fpr = REGARG_LASTFPR+1;
  231.         if (irt_isnum(ir->t)) gpr = (gpr+1) & ~1;
  232.         if (gpr <= REGARG_LASTGPR) {
  233.           lua_assert(rset_test(as->freeset, gpr));  /* Already evicted. */
  234.           if (irt_isfp(ir->t)) {
  235.             RegSet of = as->freeset;
  236.             Reg r;
  237.             /* Workaround to protect argument GPRs from being used for remat. */
  238.             as->freeset &= ~RSET_RANGE(REGARG_FIRSTGPR, REGARG_LASTGPR+1);
  239.             r = ra_alloc1(as, ref, RSET_FPR);
  240.             as->freeset |= (of & RSET_RANGE(REGARG_FIRSTGPR, REGARG_LASTGPR+1));
  241.             if (irt_isnum(ir->t)) {
  242.               emit_tg(as, MIPSI_MFC1, gpr+(LJ_BE?0:1), r+1);
  243.               emit_tg(as, MIPSI_MFC1, gpr+(LJ_BE?1:0), r);
  244.               lua_assert(rset_test(as->freeset, gpr+1));  /* Already evicted. */
  245.               gpr += 2;
  246.             } else if (irt_isfloat(ir->t)) {
  247.               emit_tg(as, MIPSI_MFC1, gpr, r);
  248.               gpr++;
  249.             }
  250.           } else {
  251.             ra_leftov(as, gpr, ref);
  252.             gpr++;
  253.           }
  254.         } else {
  255.           Reg r = ra_alloc1z(as, ref, irt_isfp(ir->t) ? RSET_FPR : RSET_GPR);
  256.           if (irt_isnum(ir->t)) ofs = (ofs + 4) & ~4;
  257.           emit_spstore(as, ir, r, ofs);
  258.           ofs += irt_isnum(ir->t) ? 8 : 4;
  259.         }
  260.       }
  261.     } else {
  262.       fpr = REGARG_LASTFPR+1;
  263.       if (gpr <= REGARG_LASTGPR)
  264.         gpr++;
  265.       else
  266.         ofs += 4;
  267.     }
  268.     checkmclim(as);
  269.   }
  270. }

  271. /* Setup result reg/sp for call. Evict scratch regs. */
  272. static void asm_setupresult(ASMState *as, IRIns *ir, const CCallInfo *ci)
  273. {
  274.   RegSet drop = RSET_SCRATCH;
  275.   int hiop = ((ir+1)->o == IR_HIOP);
  276.   if ((ci->flags & CCI_NOFPRCLOBBER))
  277.     drop &= ~RSET_FPR;
  278.   if (ra_hasreg(ir->r))
  279.     rset_clear(drop, ir->r);  /* Dest reg handled below. */
  280.   if (hiop && ra_hasreg((ir+1)->r))
  281.     rset_clear(drop, (ir+1)->r);  /* Dest reg handled below. */
  282.   ra_evictset(as, drop);  /* Evictions must be performed first. */
  283.   if (ra_used(ir)) {
  284.     lua_assert(!irt_ispri(ir->t));
  285.     if (irt_isfp(ir->t)) {
  286.       if ((ci->flags & CCI_CASTU64)) {
  287.         int32_t ofs = sps_scale(ir->s);
  288.         Reg dest = ir->r;
  289.         if (ra_hasreg(dest)) {
  290.           ra_free(as, dest);
  291.           ra_modified(as, dest);
  292.           emit_tg(as, MIPSI_MTC1, RID_RETHI, dest+1);
  293.           emit_tg(as, MIPSI_MTC1, RID_RETLO, dest);
  294.         }
  295.         if (ofs) {
  296.           emit_tsi(as, MIPSI_SW, RID_RETLO, RID_SP, ofs+(LJ_BE?4:0));
  297.           emit_tsi(as, MIPSI_SW, RID_RETHI, RID_SP, ofs+(LJ_BE?0:4));
  298.         }
  299.       } else {
  300.         ra_destreg(as, ir, RID_FPRET);
  301.       }
  302.     } else if (hiop) {
  303.       ra_destpair(as, ir);
  304.     } else {
  305.       ra_destreg(as, ir, RID_RET);
  306.     }
  307.   }
  308. }

  309. static void asm_callx(ASMState *as, IRIns *ir)
  310. {
  311.   IRRef args[CCI_NARGS_MAX*2];
  312.   CCallInfo ci;
  313.   IRRef func;
  314.   IRIns *irf;
  315.   ci.flags = asm_callx_flags(as, ir);
  316.   asm_collectargs(as, ir, &ci, args);
  317.   asm_setupresult(as, ir, &ci);
  318.   func = ir->op2; irf = IR(func);
  319.   if (irf->o == IR_CARG) { func = irf->op1; irf = IR(func); }
  320.   if (irref_isk(func)) {  /* Call to constant address. */
  321.     ci.func = (ASMFunction)(void *)(irf->i);
  322.   } else/* Need specific register for indirect calls. */
  323.     Reg r = ra_alloc1(as, func, RID2RSET(RID_CFUNCADDR));
  324.     MCode *p = as->mcp;
  325.     if (r == RID_CFUNCADDR)
  326.       *--p = MIPSI_NOP;
  327.     else
  328.       *--p = MIPSI_MOVE | MIPSF_D(RID_CFUNCADDR) | MIPSF_S(r);
  329.     *--p = MIPSI_JALR | MIPSF_S(r);
  330.     as->mcp = p;
  331.     ci.func = (ASMFunction)(void *)0;
  332.   }
  333.   asm_gencall(as, &ci, args);
  334. }

  335. static void asm_callround(ASMState *as, IRIns *ir, IRCallID id)
  336. {
  337.   /* The modified regs must match with the *.dasc implementation. */
  338.   RegSet drop = RID2RSET(RID_R1)|RID2RSET(RID_R12)|RID2RSET(RID_FPRET)|
  339.                 RID2RSET(RID_F2)|RID2RSET(RID_F4)|RID2RSET(REGARG_FIRSTFPR);
  340.   if (ra_hasreg(ir->r)) rset_clear(drop, ir->r);
  341.   ra_evictset(as, drop);
  342.   ra_destreg(as, ir, RID_FPRET);
  343.   emit_call(as, (void *)lj_ir_callinfo[id].func);
  344.   ra_leftov(as, REGARG_FIRSTFPR, ir->op1);
  345. }

  346. /* -- Returns ------------------------------------------------------------- */

  347. /* Return to lower frame. Guard that it goes to the right spot. */
  348. static void asm_retf(ASMState *as, IRIns *ir)
  349. {
  350.   Reg base = ra_alloc1(as, REF_BASE, RSET_GPR);
  351.   void *pc = ir_kptr(IR(ir->op2));
  352.   int32_t delta = 1+LJ_FR2+bc_a(*((const BCIns *)pc - 1));
  353.   as->topslot -= (BCReg)delta;
  354.   if ((int32_t)as->topslot < 0) as->topslot = 0;
  355.   irt_setmark(IR(REF_BASE)->t);  /* Children must not coalesce with BASE reg. */
  356.   emit_setgl(as, base, jit_base);
  357.   emit_addptr(as, base, -8*delta);
  358.   asm_guard(as, MIPSI_BNE, RID_TMP,
  359.             ra_allock(as, i32ptr(pc), rset_exclude(RSET_GPR, base)));
  360.   emit_tsi(as, MIPSI_LW, RID_TMP, base, -8);
  361. }

  362. /* -- Type conversions ---------------------------------------------------- */

  363. static void asm_tointg(ASMState *as, IRIns *ir, Reg left)
  364. {
  365.   Reg tmp = ra_scratch(as, rset_exclude(RSET_FPR, left));
  366.   Reg dest = ra_dest(as, ir, RSET_GPR);
  367.   asm_guard(as, MIPSI_BC1F, 0, 0);
  368.   emit_fgh(as, MIPSI_C_EQ_D, 0, tmp, left);
  369.   emit_fg(as, MIPSI_CVT_D_W, tmp, tmp);
  370.   emit_tg(as, MIPSI_MFC1, dest, tmp);
  371.   emit_fg(as, MIPSI_CVT_W_D, tmp, left);
  372. }

  373. static void asm_tobit(ASMState *as, IRIns *ir)
  374. {
  375.   RegSet allow = RSET_FPR;
  376.   Reg dest = ra_dest(as, ir, RSET_GPR);
  377.   Reg left = ra_alloc1(as, ir->op1, allow);
  378.   Reg right = ra_alloc1(as, ir->op2, rset_clear(allow, left));
  379.   Reg tmp = ra_scratch(as, rset_clear(allow, right));
  380.   emit_tg(as, MIPSI_MFC1, dest, tmp);
  381.   emit_fgh(as, MIPSI_ADD_D, tmp, left, right);
  382. }

  383. static void asm_conv(ASMState *as, IRIns *ir)
  384. {
  385.   IRType st = (IRType)(ir->op2 & IRCONV_SRCMASK);
  386.   int stfp = (st == IRT_NUM || st == IRT_FLOAT);
  387.   IRRef lref = ir->op1;
  388.   lua_assert(irt_type(ir->t) != st);
  389.   lua_assert(!(irt_isint64(ir->t) ||
  390.                (st == IRT_I64 || st == IRT_U64))); /* Handled by SPLIT. */
  391.   if (irt_isfp(ir->t)) {
  392.     Reg dest = ra_dest(as, ir, RSET_FPR);
  393.     if (stfp) {  /* FP to FP conversion. */
  394.       emit_fg(as, st == IRT_NUM ? MIPSI_CVT_S_D : MIPSI_CVT_D_S,
  395.               dest, ra_alloc1(as, lref, RSET_FPR));
  396.     } else if (st == IRT_U32) {  /* U32 to FP conversion. */
  397.       /* y = (x ^ 0x8000000) + 2147483648.0 */
  398.       Reg left = ra_alloc1(as, lref, RSET_GPR);
  399.       Reg tmp = ra_scratch(as, rset_exclude(RSET_FPR, dest));
  400.       emit_fgh(as, irt_isfloat(ir->t) ? MIPSI_ADD_S : MIPSI_ADD_D,
  401.                dest, dest, tmp);
  402.       emit_fg(as, irt_isfloat(ir->t) ? MIPSI_CVT_S_W : MIPSI_CVT_D_W,
  403.               dest, dest);
  404.       if (irt_isfloat(ir->t))
  405.         emit_lsptr(as, MIPSI_LWC1, (tmp & 31),
  406.                    (void *)lj_ir_k64_find(as->J, U64x(4f000000,4f000000)),
  407.                    RSET_GPR);
  408.       else
  409.         emit_lsptr(as, MIPSI_LDC1, (tmp & 31),
  410.                    (void *)lj_ir_k64_find(as->J, U64x(41e00000,00000000)),
  411.                    RSET_GPR);
  412.       emit_tg(as, MIPSI_MTC1, RID_TMP, dest);
  413.       emit_dst(as, MIPSI_XOR, RID_TMP, RID_TMP, left);
  414.       emit_ti(as, MIPSI_LUI, RID_TMP, 0x8000);
  415.     } else/* Integer to FP conversion. */
  416.       Reg left = ra_alloc1(as, lref, RSET_GPR);
  417.       emit_fg(as, irt_isfloat(ir->t) ? MIPSI_CVT_S_W : MIPSI_CVT_D_W,
  418.               dest, dest);
  419.       emit_tg(as, MIPSI_MTC1, left, dest);
  420.     }
  421.   } else if (stfp) {  /* FP to integer conversion. */
  422.     if (irt_isguard(ir->t)) {
  423.       /* Checked conversions are only supported from number to int. */
  424.       lua_assert(irt_isint(ir->t) && st == IRT_NUM);
  425.       asm_tointg(as, ir, ra_alloc1(as, lref, RSET_FPR));
  426.     } else {
  427.       Reg dest = ra_dest(as, ir, RSET_GPR);
  428.       Reg left = ra_alloc1(as, lref, RSET_FPR);
  429.       Reg tmp = ra_scratch(as, rset_exclude(RSET_FPR, left));
  430.       if (irt_isu32(ir->t)) {
  431.         /* y = (int)floor(x - 2147483648.0) ^ 0x80000000 */
  432.         emit_dst(as, MIPSI_XOR, dest, dest, RID_TMP);
  433.         emit_ti(as, MIPSI_LUI, RID_TMP, 0x8000);
  434.         emit_tg(as, MIPSI_MFC1, dest, tmp);
  435.         emit_fg(as, st == IRT_FLOAT ? MIPSI_FLOOR_W_S : MIPSI_FLOOR_W_D,
  436.                 tmp, tmp);
  437.         emit_fgh(as, st == IRT_FLOAT ? MIPSI_SUB_S : MIPSI_SUB_D,
  438.                  tmp, left, tmp);
  439.         if (st == IRT_FLOAT)
  440.           emit_lsptr(as, MIPSI_LWC1, (tmp & 31),
  441.                      (void *)lj_ir_k64_find(as->J, U64x(4f000000,4f000000)),
  442.                      RSET_GPR);
  443.         else
  444.           emit_lsptr(as, MIPSI_LDC1, (tmp & 31),
  445.                      (void *)lj_ir_k64_find(as->J, U64x(41e00000,00000000)),
  446.                      RSET_GPR);
  447.       } else {
  448.         emit_tg(as, MIPSI_MFC1, dest, tmp);
  449.         emit_fg(as, st == IRT_FLOAT ? MIPSI_TRUNC_W_S : MIPSI_TRUNC_W_D,
  450.                 tmp, left);
  451.       }
  452.     }
  453.   } else {
  454.     Reg dest = ra_dest(as, ir, RSET_GPR);
  455.     if (st >= IRT_I8 && st <= IRT_U16) {  /* Extend to 32 bit integer. */
  456.       Reg left = ra_alloc1(as, ir->op1, RSET_GPR);
  457.       lua_assert(irt_isint(ir->t) || irt_isu32(ir->t));
  458.       if ((ir->op2 & IRCONV_SEXT)) {
  459.         if ((as->flags & JIT_F_MIPS32R2)) {
  460.           emit_dst(as, st == IRT_I8 ? MIPSI_SEB : MIPSI_SEH, dest, 0, left);
  461.         } else {
  462.           uint32_t shift = st == IRT_I8 ? 24 : 16;
  463.           emit_dta(as, MIPSI_SRA, dest, dest, shift);
  464.           emit_dta(as, MIPSI_SLL, dest, left, shift);
  465.         }
  466.       } else {
  467.         emit_tsi(as, MIPSI_ANDI, dest, left,
  468.                  (int32_t)(st == IRT_U8 ? 0xff : 0xffff));
  469.       }
  470.     } else/* 32/64 bit integer conversions. */
  471.       /* Only need to handle 32/32 bit no-op (cast) on 32 bit archs. */
  472.       ra_leftov(as, dest, lref);  /* Do nothing, but may need to move regs. */
  473.     }
  474.   }
  475. }

  476. static void asm_strto(ASMState *as, IRIns *ir)
  477. {
  478.   const CCallInfo *ci = &lj_ir_callinfo[IRCALL_lj_strscan_num];
  479.   IRRef args[2];
  480.   RegSet drop = RSET_SCRATCH;
  481.   if (ra_hasreg(ir->r)) rset_set(drop, ir->r);  /* Spill dest reg (if any). */
  482.   ra_evictset(as, drop);
  483.   asm_guard(as, MIPSI_BEQ, RID_RET, RID_ZERO);  /* Test return status. */
  484.   args[0] = ir->op1;      /* GCstr *str */
  485.   args[1] = ASMREF_TMP1/* TValue *n  */
  486.   asm_gencall(as, ci, args);
  487.   /* Store the result to the spill slot or temp slots. */
  488.   emit_tsi(as, MIPSI_ADDIU, ra_releasetmp(as, ASMREF_TMP1),
  489.            RID_SP, sps_scale(ir->s));
  490. }

  491. /* -- Memory references --------------------------------------------------- */

  492. /* Get pointer to TValue. */
  493. static void asm_tvptr(ASMState *as, Reg dest, IRRef ref)
  494. {
  495.   IRIns *ir = IR(ref);
  496.   if (irt_isnum(ir->t)) {
  497.     if (irref_isk(ref))  /* Use the number constant itself as a TValue. */
  498.       ra_allockreg(as, i32ptr(ir_knum(ir)), dest);
  499.     else  /* Otherwise force a spill and use the spill slot. */
  500.       emit_tsi(as, MIPSI_ADDIU, dest, RID_SP, ra_spill(as, ir));
  501.   } else {
  502.     /* Otherwise use g->tmptv to hold the TValue. */
  503.     RegSet allow = rset_exclude(RSET_GPR, dest);
  504.     Reg type;
  505.     emit_tsi(as, MIPSI_ADDIU, dest, RID_JGL, offsetof(global_State, tmptv)-32768);
  506.     if (!irt_ispri(ir->t)) {
  507.       Reg src = ra_alloc1(as, ref, allow);
  508.       emit_setgl(as, src, tmptv.gcr);
  509.     }
  510.     type = ra_allock(as, irt_toitype(ir->t), allow);
  511.     emit_setgl(as, type, tmptv.it);
  512.   }
  513. }

  514. static void asm_aref(ASMState *as, IRIns *ir)
  515. {
  516.   Reg dest = ra_dest(as, ir, RSET_GPR);
  517.   Reg idx, base;
  518.   if (irref_isk(ir->op2)) {
  519.     IRRef tab = IR(ir->op1)->op1;
  520.     int32_t ofs = asm_fuseabase(as, tab);
  521.     IRRef refa = ofs ? tab : ir->op1;
  522.     ofs += 8*IR(ir->op2)->i;
  523.     if (checki16(ofs)) {
  524.       base = ra_alloc1(as, refa, RSET_GPR);
  525.       emit_tsi(as, MIPSI_ADDIU, dest, base, ofs);
  526.       return;
  527.     }
  528.   }
  529.   base = ra_alloc1(as, ir->op1, RSET_GPR);
  530.   idx = ra_alloc1(as, ir->op2, rset_exclude(RSET_GPR, base));
  531.   emit_dst(as, MIPSI_ADDU, dest, RID_TMP, base);
  532.   emit_dta(as, MIPSI_SLL, RID_TMP, idx, 3);
  533. }

  534. /* Inlined hash lookup. Specialized for key type and for const keys.
  535. ** The equivalent C code is:
  536. **   Node *n = hashkey(t, key);
  537. **   do {
  538. **     if (lj_obj_equal(&n->key, key)) return &n->val;
  539. **   } while ((n = nextnode(n)));
  540. **   return niltv(L);
  541. */
  542. static void asm_href(ASMState *as, IRIns *ir, IROp merge)
  543. {
  544.   RegSet allow = RSET_GPR;
  545.   int destused = ra_used(ir);
  546.   Reg dest = ra_dest(as, ir, allow);
  547.   Reg tab = ra_alloc1(as, ir->op1, rset_clear(allow, dest));
  548.   Reg key = RID_NONE, type = RID_NONE, tmpnum = RID_NONE, tmp1 = RID_TMP, tmp2;
  549.   IRRef refkey = ir->op2;
  550.   IRIns *irkey = IR(refkey);
  551.   IRType1 kt = irkey->t;
  552.   uint32_t khash;
  553.   MCLabel l_end, l_loop, l_next;

  554.   rset_clear(allow, tab);
  555.   if (irt_isnum(kt)) {
  556.     key = ra_alloc1(as, refkey, RSET_FPR);
  557.     tmpnum = ra_scratch(as, rset_exclude(RSET_FPR, key));
  558.   } else if (!irt_ispri(kt)) {
  559.     key = ra_alloc1(as, refkey, allow);
  560.     rset_clear(allow, key);
  561.     type = ra_allock(as, irt_toitype(irkey->t), allow);
  562.     rset_clear(allow, type);
  563.   }
  564.   tmp2 = ra_scratch(as, allow);
  565.   rset_clear(allow, tmp2);

  566.   /* Key not found in chain: jump to exit (if merged) or load niltv. */
  567.   l_end = emit_label(as);
  568.   as->invmcp = NULL;
  569.   if (merge == IR_NE)
  570.     asm_guard(as, MIPSI_B, RID_ZERO, RID_ZERO);
  571.   else if (destused)
  572.     emit_loada(as, dest, niltvg(J2G(as->J)));
  573.   /* Follow hash chain until the end. */
  574.   emit_move(as, dest, tmp2);
  575.   l_loop = --as->mcp;
  576.   emit_tsi(as, MIPSI_LW, tmp2, dest, (int32_t)offsetof(Node, next));
  577.   l_next = emit_label(as);

  578.   /* Type and value comparison. */
  579.   if (merge == IR_EQ) {  /* Must match asm_guard(). */
  580.     emit_ti(as, MIPSI_LI, RID_TMP, as->snapno);
  581.     l_end = asm_exitstub_addr(as);
  582.   }
  583.   if (irt_isnum(kt)) {
  584.     emit_branch(as, MIPSI_BC1T, 0, 0, l_end);
  585.     emit_fgh(as, MIPSI_C_EQ_D, 0, tmpnum, key);
  586.     *--as->mcp = MIPSI_NOP;  /* Avoid NaN comparison overhead. */
  587.     emit_branch(as, MIPSI_BEQ, tmp2, RID_ZERO, l_next);
  588.     emit_tsi(as, MIPSI_SLTIU, tmp2, tmp2, (int32_t)LJ_TISNUM);
  589.     emit_hsi(as, MIPSI_LDC1, tmpnum, dest, (int32_t)offsetof(Node, key.n));
  590.   } else {
  591.     if (irt_ispri(kt)) {
  592.       emit_branch(as, MIPSI_BEQ, tmp2, type, l_end);
  593.     } else {
  594.       emit_branch(as, MIPSI_BEQ, tmp1, key, l_end);
  595.       emit_tsi(as, MIPSI_LW, tmp1, dest, (int32_t)offsetof(Node, key.gcr));
  596.       emit_branch(as, MIPSI_BNE, tmp2, type, l_next);
  597.     }
  598.   }
  599.   emit_tsi(as, MIPSI_LW, tmp2, dest, (int32_t)offsetof(Node, key.it));
  600.   *l_loop = MIPSI_BNE | MIPSF_S(tmp2) | ((as->mcp-l_loop-1) & 0xffffu);

  601.   /* Load main position relative to tab->node into dest. */
  602.   khash = irref_isk(refkey) ? ir_khash(irkey) : 1;
  603.   if (khash == 0) {
  604.     emit_tsi(as, MIPSI_LW, dest, tab, (int32_t)offsetof(GCtab, node));
  605.   } else {
  606.     Reg tmphash = tmp1;
  607.     if (irref_isk(refkey))
  608.       tmphash = ra_allock(as, khash, allow);
  609.     emit_dst(as, MIPSI_ADDU, dest, dest, tmp1);
  610.     lua_assert(sizeof(Node) == 24);
  611.     emit_dst(as, MIPSI_SUBU, tmp1, tmp2, tmp1);
  612.     emit_dta(as, MIPSI_SLL, tmp1, tmp1, 3);
  613.     emit_dta(as, MIPSI_SLL, tmp2, tmp1, 5);
  614.     emit_dst(as, MIPSI_AND, tmp1, tmp2, tmphash);
  615.     emit_tsi(as, MIPSI_LW, dest, tab, (int32_t)offsetof(GCtab, node));
  616.     emit_tsi(as, MIPSI_LW, tmp2, tab, (int32_t)offsetof(GCtab, hmask));
  617.     if (irref_isk(refkey)) {
  618.       /* Nothing to do. */
  619.     } else if (irt_isstr(kt)) {
  620.       emit_tsi(as, MIPSI_LW, tmp1, key, (int32_t)offsetof(GCstr, hash));
  621.     } else/* Must match with hash*() in lj_tab.c. */
  622.       emit_dst(as, MIPSI_SUBU, tmp1, tmp1, tmp2);
  623.       emit_rotr(as, tmp2, tmp2, dest, (-HASH_ROT3)&31);
  624.       emit_dst(as, MIPSI_XOR, tmp1, tmp1, tmp2);
  625.       emit_rotr(as, tmp1, tmp1, dest, (-HASH_ROT2-HASH_ROT1)&31);
  626.       emit_dst(as, MIPSI_SUBU, tmp2, tmp2, dest);
  627.       if (irt_isnum(kt)) {
  628.         emit_dst(as, MIPSI_XOR, tmp2, tmp2, tmp1);
  629.         if ((as->flags & JIT_F_MIPS32R2)) {
  630.           emit_dta(as, MIPSI_ROTR, dest, tmp1, (-HASH_ROT1)&31);
  631.         } else {
  632.           emit_dst(as, MIPSI_OR, dest, dest, tmp1);
  633.           emit_dta(as, MIPSI_SLL, tmp1, tmp1, HASH_ROT1);
  634.           emit_dta(as, MIPSI_SRL, dest, tmp1, (-HASH_ROT1)&31);
  635.         }
  636.         emit_dst(as, MIPSI_ADDU, tmp1, tmp1, tmp1);
  637.         emit_tg(as, MIPSI_MFC1, tmp2, key);
  638.         emit_tg(as, MIPSI_MFC1, tmp1, key+1);
  639.       } else {
  640.         emit_dst(as, MIPSI_XOR, tmp2, key, tmp1);
  641.         emit_rotr(as, dest, tmp1, tmp2, (-HASH_ROT1)&31);
  642.         emit_dst(as, MIPSI_ADDU, tmp1, key, ra_allock(as, HASH_BIAS, allow));
  643.       }
  644.     }
  645.   }
  646. }

  647. static void asm_hrefk(ASMState *as, IRIns *ir)
  648. {
  649.   IRIns *kslot = IR(ir->op2);
  650.   IRIns *irkey = IR(kslot->op1);
  651.   int32_t ofs = (int32_t)(kslot->op2 * sizeof(Node));
  652.   int32_t kofs = ofs + (int32_t)offsetof(Node, key);
  653.   Reg dest = (ra_used(ir)||ofs > 32736) ? ra_dest(as, ir, RSET_GPR) : RID_NONE;
  654.   Reg node = ra_alloc1(as, ir->op1, RSET_GPR);
  655.   Reg key = RID_NONE, type = RID_TMP, idx = node;
  656.   RegSet allow = rset_exclude(RSET_GPR, node);
  657.   int32_t lo, hi;
  658.   lua_assert(ofs % sizeof(Node) == 0);
  659.   if (ofs > 32736) {
  660.     idx = dest;
  661.     rset_clear(allow, dest);
  662.     kofs = (int32_t)offsetof(Node, key);
  663.   } else if (ra_hasreg(dest)) {
  664.     emit_tsi(as, MIPSI_ADDIU, dest, node, ofs);
  665.   }
  666.   if (!irt_ispri(irkey->t)) {
  667.     key = ra_scratch(as, allow);
  668.     rset_clear(allow, key);
  669.   }
  670.   if (irt_isnum(irkey->t)) {
  671.     lo = (int32_t)ir_knum(irkey)->u32.lo;
  672.     hi = (int32_t)ir_knum(irkey)->u32.hi;
  673.   } else {
  674.     lo = irkey->i;
  675.     hi = irt_toitype(irkey->t);
  676.     if (!ra_hasreg(key))
  677.       goto nolo;
  678.   }
  679.   asm_guard(as, MIPSI_BNE, key, lo ? ra_allock(as, lo, allow) : RID_ZERO);
  680. nolo:
  681.   asm_guard(as, MIPSI_BNE, type, hi ? ra_allock(as, hi, allow) : RID_ZERO);
  682.   if (ra_hasreg(key)) emit_tsi(as, MIPSI_LW, key, idx, kofs+(LJ_BE?4:0));
  683.   emit_tsi(as, MIPSI_LW, type, idx, kofs+(LJ_BE?0:4));
  684.   if (ofs > 32736)
  685.     emit_tsi(as, MIPSI_ADDU, dest, node, ra_allock(as, ofs, allow));
  686. }

  687. static void asm_uref(ASMState *as, IRIns *ir)
  688. {
  689.   /* NYI: Check that UREFO is still open and not aliasing a slot. */
  690.   Reg dest = ra_dest(as, ir, RSET_GPR);
  691.   if (irref_isk(ir->op1)) {
  692.     GCfunc *fn = ir_kfunc(IR(ir->op1));
  693.     MRef *v = &gcref(fn->l.uvptr[(ir->op2 >> 8)])->uv.v;
  694.     emit_lsptr(as, MIPSI_LW, dest, v, RSET_GPR);
  695.   } else {
  696.     Reg uv = ra_scratch(as, RSET_GPR);
  697.     Reg func = ra_alloc1(as, ir->op1, RSET_GPR);
  698.     if (ir->o == IR_UREFC) {
  699.       asm_guard(as, MIPSI_BEQ, RID_TMP, RID_ZERO);
  700.       emit_tsi(as, MIPSI_ADDIU, dest, uv, (int32_t)offsetof(GCupval, tv));
  701.       emit_tsi(as, MIPSI_LBU, RID_TMP, uv, (int32_t)offsetof(GCupval, closed));
  702.     } else {
  703.       emit_tsi(as, MIPSI_LW, dest, uv, (int32_t)offsetof(GCupval, v));
  704.     }
  705.     emit_tsi(as, MIPSI_LW, uv, func,
  706.              (int32_t)offsetof(GCfuncL, uvptr) + 4*(int32_t)(ir->op2 >> 8));
  707.   }
  708. }

  709. static void asm_fref(ASMState *as, IRIns *ir)
  710. {
  711.   UNUSED(as); UNUSED(ir);
  712.   lua_assert(!ra_used(ir));
  713. }

  714. static void asm_strref(ASMState *as, IRIns *ir)
  715. {
  716.   Reg dest = ra_dest(as, ir, RSET_GPR);
  717.   IRRef ref = ir->op2, refk = ir->op1;
  718.   int32_t ofs = (int32_t)sizeof(GCstr);
  719.   Reg r;
  720.   if (irref_isk(ref)) {
  721.     IRRef tmp = refk; refk = ref; ref = tmp;
  722.   } else if (!irref_isk(refk)) {
  723.     Reg right, left = ra_alloc1(as, ir->op1, RSET_GPR);
  724.     IRIns *irr = IR(ir->op2);
  725.     if (ra_hasreg(irr->r)) {
  726.       ra_noweak(as, irr->r);
  727.       right = irr->r;
  728.     } else if (mayfuse(as, irr->op2) &&
  729.                irr->o == IR_ADD && irref_isk(irr->op2) &&
  730.                checki16(ofs + IR(irr->op2)->i)) {
  731.       ofs += IR(irr->op2)->i;
  732.       right = ra_alloc1(as, irr->op1, rset_exclude(RSET_GPR, left));
  733.     } else {
  734.       right = ra_allocref(as, ir->op2, rset_exclude(RSET_GPR, left));
  735.     }
  736.     emit_tsi(as, MIPSI_ADDIU, dest, dest, ofs);
  737.     emit_dst(as, MIPSI_ADDU, dest, left, right);
  738.     return;
  739.   }
  740.   r = ra_alloc1(as, ref, RSET_GPR);
  741.   ofs += IR(refk)->i;
  742.   if (checki16(ofs))
  743.     emit_tsi(as, MIPSI_ADDIU, dest, r, ofs);
  744.   else
  745.     emit_dst(as, MIPSI_ADDU, dest, r,
  746.              ra_allock(as, ofs, rset_exclude(RSET_GPR, r)));
  747. }

  748. /* -- Loads and stores ---------------------------------------------------- */

  749. static MIPSIns asm_fxloadins(IRIns *ir)
  750. {
  751.   switch (irt_type(ir->t)) {
  752.   case IRT_I8: return MIPSI_LB;
  753.   case IRT_U8: return MIPSI_LBU;
  754.   case IRT_I16: return MIPSI_LH;
  755.   case IRT_U16: return MIPSI_LHU;
  756.   case IRT_NUM: return MIPSI_LDC1;
  757.   case IRT_FLOAT: return MIPSI_LWC1;
  758.   default: return MIPSI_LW;
  759.   }
  760. }

  761. static MIPSIns asm_fxstoreins(IRIns *ir)
  762. {
  763.   switch (irt_type(ir->t)) {
  764.   case IRT_I8: case IRT_U8: return MIPSI_SB;
  765.   case IRT_I16: case IRT_U16: return MIPSI_SH;
  766.   case IRT_NUM: return MIPSI_SDC1;
  767.   case IRT_FLOAT: return MIPSI_SWC1;
  768.   default: return MIPSI_SW;
  769.   }
  770. }

  771. static void asm_fload(ASMState *as, IRIns *ir)
  772. {
  773.   Reg dest = ra_dest(as, ir, RSET_GPR);
  774.   Reg idx = ra_alloc1(as, ir->op1, RSET_GPR);
  775.   MIPSIns mi = asm_fxloadins(ir);
  776.   int32_t ofs;
  777.   if (ir->op2 == IRFL_TAB_ARRAY) {
  778.     ofs = asm_fuseabase(as, ir->op1);
  779.     if (ofs) {  /* Turn the t->array load into an add for colocated arrays. */
  780.       emit_tsi(as, MIPSI_ADDIU, dest, idx, ofs);
  781.       return;
  782.     }
  783.   }
  784.   ofs = field_ofs[ir->op2];
  785.   lua_assert(!irt_isfp(ir->t));
  786.   emit_tsi(as, mi, dest, idx, ofs);
  787. }

  788. static void asm_fstore(ASMState *as, IRIns *ir)
  789. {
  790.   if (ir->r != RID_SINK) {
  791.     Reg src = ra_alloc1z(as, ir->op2, RSET_GPR);
  792.     IRIns *irf = IR(ir->op1);
  793.     Reg idx = ra_alloc1(as, irf->op1, rset_exclude(RSET_GPR, src));
  794.     int32_t ofs = field_ofs[irf->op2];
  795.     MIPSIns mi = asm_fxstoreins(ir);
  796.     lua_assert(!irt_isfp(ir->t));
  797.     emit_tsi(as, mi, src, idx, ofs);
  798.   }
  799. }

  800. static void asm_xload(ASMState *as, IRIns *ir)
  801. {
  802.   Reg dest = ra_dest(as, ir, irt_isfp(ir->t) ? RSET_FPR : RSET_GPR);
  803.   lua_assert(!(ir->op2 & IRXLOAD_UNALIGNED));
  804.   asm_fusexref(as, asm_fxloadins(ir), dest, ir->op1, RSET_GPR, 0);
  805. }

  806. static void asm_xstore_(ASMState *as, IRIns *ir, int32_t ofs)
  807. {
  808.   if (ir->r != RID_SINK) {
  809.     Reg src = ra_alloc1z(as, ir->op2, irt_isfp(ir->t) ? RSET_FPR : RSET_GPR);
  810.     asm_fusexref(as, asm_fxstoreins(ir), src, ir->op1,
  811.                  rset_exclude(RSET_GPR, src), ofs);
  812.   }
  813. }

  814. #define asm_xstore(as, ir)        asm_xstore_(as, ir, 0)

  815. static void asm_ahuvload(ASMState *as, IRIns *ir)
  816. {
  817.   IRType1 t = ir->t;
  818.   Reg dest = RID_NONE, type = RID_TMP, idx;
  819.   RegSet allow = RSET_GPR;
  820.   int32_t ofs = 0;
  821.   if (ra_used(ir)) {
  822.     lua_assert(irt_isnum(t) || irt_isint(t) || irt_isaddr(t));
  823.     dest = ra_dest(as, ir, irt_isnum(t) ? RSET_FPR : RSET_GPR);
  824.     rset_clear(allow, dest);
  825.   }
  826.   idx = asm_fuseahuref(as, ir->op1, &ofs, allow);
  827.   rset_clear(allow, idx);
  828.   if (irt_isnum(t)) {
  829.     asm_guard(as, MIPSI_BEQ, type, RID_ZERO);
  830.     emit_tsi(as, MIPSI_SLTIU, type, type, (int32_t)LJ_TISNUM);
  831.     if (ra_hasreg(dest))
  832.       emit_hsi(as, MIPSI_LDC1, dest, idx, ofs);
  833.   } else {
  834.     asm_guard(as, MIPSI_BNE, type, ra_allock(as, irt_toitype(t), allow));
  835.     if (ra_hasreg(dest)) emit_tsi(as, MIPSI_LW, dest, idx, ofs+(LJ_BE?4:0));
  836.   }
  837.   emit_tsi(as, MIPSI_LW, type, idx, ofs+(LJ_BE?0:4));
  838. }

  839. static void asm_ahustore(ASMState *as, IRIns *ir)
  840. {
  841.   RegSet allow = RSET_GPR;
  842.   Reg idx, src = RID_NONE, type = RID_NONE;
  843.   int32_t ofs = 0;
  844.   if (ir->r == RID_SINK)
  845.     return;
  846.   if (irt_isnum(ir->t)) {
  847.     src = ra_alloc1(as, ir->op2, RSET_FPR);
  848.   } else {
  849.     if (!irt_ispri(ir->t)) {
  850.       src = ra_alloc1(as, ir->op2, allow);
  851.       rset_clear(allow, src);
  852.     }
  853.     type = ra_allock(as, (int32_t)irt_toitype(ir->t), allow);
  854.     rset_clear(allow, type);
  855.   }
  856.   idx = asm_fuseahuref(as, ir->op1, &ofs, allow);
  857.   if (irt_isnum(ir->t)) {
  858.     emit_hsi(as, MIPSI_SDC1, src, idx, ofs);
  859.   } else {
  860.     if (ra_hasreg(src))
  861.       emit_tsi(as, MIPSI_SW, src, idx, ofs+(LJ_BE?4:0));
  862.     emit_tsi(as, MIPSI_SW, type, idx, ofs+(LJ_BE?0:4));
  863.   }
  864. }

  865. static void asm_sload(ASMState *as, IRIns *ir)
  866. {
  867.   int32_t ofs = 8*((int32_t)ir->op1-1) + ((ir->op2 & IRSLOAD_FRAME) ? 4 : 0);
  868.   IRType1 t = ir->t;
  869.   Reg dest = RID_NONE, type = RID_NONE, base;
  870.   RegSet allow = RSET_GPR;
  871.   lua_assert(!(ir->op2 & IRSLOAD_PARENT));  /* Handled by asm_head_side(). */
  872.   lua_assert(irt_isguard(t) || !(ir->op2 & IRSLOAD_TYPECHECK));
  873.   lua_assert(!irt_isint(t) || (ir->op2 & (IRSLOAD_CONVERT|IRSLOAD_FRAME)));
  874.   if ((ir->op2 & IRSLOAD_CONVERT) && irt_isguard(t) && irt_isint(t)) {
  875.     dest = ra_scratch(as, RSET_FPR);
  876.     asm_tointg(as, ir, dest);
  877.     t.irt = IRT_NUM;  /* Continue with a regular number type check. */
  878.   } else if (ra_used(ir)) {
  879.     lua_assert(irt_isnum(t) || irt_isint(t) || irt_isaddr(t));
  880.     dest = ra_dest(as, ir, irt_isnum(t) ? RSET_FPR : RSET_GPR);
  881.     rset_clear(allow, dest);
  882.     base = ra_alloc1(as, REF_BASE, allow);
  883.     rset_clear(allow, base);
  884.     if ((ir->op2 & IRSLOAD_CONVERT)) {
  885.       if (irt_isint(t)) {
  886.         Reg tmp = ra_scratch(as, RSET_FPR);
  887.         emit_tg(as, MIPSI_MFC1, dest, tmp);
  888.         emit_fg(as, MIPSI_TRUNC_W_D, tmp, tmp);
  889.         dest = tmp;
  890.         t.irt = IRT_NUM;  /* Check for original type. */
  891.       } else {
  892.         Reg tmp = ra_scratch(as, RSET_GPR);
  893.         emit_fg(as, MIPSI_CVT_D_W, dest, dest);
  894.         emit_tg(as, MIPSI_MTC1, tmp, dest);
  895.         dest = tmp;
  896.         t.irt = IRT_INT;  /* Check for original type. */
  897.       }
  898.     }
  899.     goto dotypecheck;
  900.   }
  901.   base = ra_alloc1(as, REF_BASE, allow);
  902.   rset_clear(allow, base);
  903. dotypecheck:
  904.   if (irt_isnum(t)) {
  905.     if ((ir->op2 & IRSLOAD_TYPECHECK)) {
  906.       asm_guard(as, MIPSI_BEQ, RID_TMP, RID_ZERO);
  907.       emit_tsi(as, MIPSI_SLTIU, RID_TMP, RID_TMP, (int32_t)LJ_TISNUM);
  908.       type = RID_TMP;
  909.     }
  910.     if (ra_hasreg(dest)) emit_hsi(as, MIPSI_LDC1, dest, base, ofs);
  911.   } else {
  912.     if ((ir->op2 & IRSLOAD_TYPECHECK)) {
  913.       Reg ktype = ra_allock(as, irt_toitype(t), allow);
  914.       asm_guard(as, MIPSI_BNE, RID_TMP, ktype);
  915.       type = RID_TMP;
  916.     }
  917.     if (ra_hasreg(dest)) emit_tsi(as, MIPSI_LW, dest, base, ofs ^ (LJ_BE?4:0));
  918.   }
  919.   if (ra_hasreg(type)) emit_tsi(as, MIPSI_LW, type, base, ofs ^ (LJ_BE?0:4));
  920. }

  921. /* -- Allocations --------------------------------------------------------- */

  922. #if LJ_HASFFI
  923. static void asm_cnew(ASMState *as, IRIns *ir)
  924. {
  925.   CTState *cts = ctype_ctsG(J2G(as->J));
  926.   CTypeID id = (CTypeID)IR(ir->op1)->i;
  927.   CTSize sz;
  928.   CTInfo info = lj_ctype_info(cts, id, &sz);
  929.   const CCallInfo *ci = &lj_ir_callinfo[IRCALL_lj_mem_newgco];
  930.   IRRef args[4];
  931.   RegSet drop = RSET_SCRATCH;
  932.   lua_assert(sz != CTSIZE_INVALID || (ir->o == IR_CNEW && ir->op2 != REF_NIL));

  933.   as->gcsteps++;
  934.   if (ra_hasreg(ir->r))
  935.     rset_clear(drop, ir->r);  /* Dest reg handled below. */
  936.   ra_evictset(as, drop);
  937.   if (ra_used(ir))
  938.     ra_destreg(as, ir, RID_RET);  /* GCcdata * */

  939.   /* Initialize immutable cdata object. */
  940.   if (ir->o == IR_CNEWI) {
  941.     RegSet allow = (RSET_GPR & ~RSET_SCRATCH);
  942.     int32_t ofs = sizeof(GCcdata);
  943.     lua_assert(sz == 4 || sz == 8);
  944.     if (sz == 8) {
  945.       ofs += 4;
  946.       lua_assert((ir+1)->o == IR_HIOP);
  947.       if (LJ_LE) ir++;
  948.     }
  949.     for (;;) {
  950.       Reg r = ra_alloc1z(as, ir->op2, allow);
  951.       emit_tsi(as, MIPSI_SW, r, RID_RET, ofs);
  952.       rset_clear(allow, r);
  953.       if (ofs == sizeof(GCcdata)) break;
  954.       ofs -= 4; if (LJ_BE) ir++; else ir--;
  955.     }
  956.   } else if (ir->op2 != REF_NIL) {  /* Create VLA/VLS/aligned cdata. */
  957.     ci = &lj_ir_callinfo[IRCALL_lj_cdata_newv];
  958.     args[0] = ASMREF_L;     /* lua_State *L */
  959.     args[1] = ir->op1;      /* CTypeID id   */
  960.     args[2] = ir->op2;      /* CTSize sz    */
  961.     args[3] = ASMREF_TMP1/* CTSize align */
  962.     asm_gencall(as, ci, args);
  963.     emit_loadi(as, ra_releasetmp(as, ASMREF_TMP1), (int32_t)ctype_align(info));
  964.     return;
  965.   }

  966.   /* Initialize gct and ctypeid. lj_mem_newgco() already sets marked. */
  967.   emit_tsi(as, MIPSI_SB, RID_RET+1, RID_RET, offsetof(GCcdata, gct));
  968.   emit_tsi(as, MIPSI_SH, RID_TMP, RID_RET, offsetof(GCcdata, ctypeid));
  969.   emit_ti(as, MIPSI_LI, RID_RET+1, ~LJ_TCDATA);
  970.   emit_ti(as, MIPSI_LI, RID_TMP, id); /* Lower 16 bit used. Sign-ext ok. */
  971.   args[0] = ASMREF_L;     /* lua_State *L */
  972.   args[1] = ASMREF_TMP1/* MSize size   */
  973.   asm_gencall(as, ci, args);
  974.   ra_allockreg(as, (int32_t)(sz+sizeof(GCcdata)),
  975.                ra_releasetmp(as, ASMREF_TMP1));
  976. }
  977. #else
  978. #define asm_cnew(as, ir)        ((void)0)
  979. #endif

  980. /* -- Write barriers ------------------------------------------------------ */

  981. static void asm_tbar(ASMState *as, IRIns *ir)
  982. {
  983.   Reg tab = ra_alloc1(as, ir->op1, RSET_GPR);
  984.   Reg mark = ra_scratch(as, rset_exclude(RSET_GPR, tab));
  985.   Reg link = RID_TMP;
  986.   MCLabel l_end = emit_label(as);
  987.   emit_tsi(as, MIPSI_SW, link, tab, (int32_t)offsetof(GCtab, gclist));
  988.   emit_tsi(as, MIPSI_SB, mark, tab, (int32_t)offsetof(GCtab, marked));
  989.   emit_setgl(as, tab, gc.grayagain);
  990.   emit_getgl(as, link, gc.grayagain);
  991.   emit_dst(as, MIPSI_XOR, mark, mark, RID_TMP);  /* Clear black bit. */
  992.   emit_branch(as, MIPSI_BEQ, RID_TMP, RID_ZERO, l_end);
  993.   emit_tsi(as, MIPSI_ANDI, RID_TMP, mark, LJ_GC_BLACK);
  994.   emit_tsi(as, MIPSI_LBU, mark, tab, (int32_t)offsetof(GCtab, marked));
  995. }

  996. static void asm_obar(ASMState *as, IRIns *ir)
  997. {
  998.   const CCallInfo *ci = &lj_ir_callinfo[IRCALL_lj_gc_barrieruv];
  999.   IRRef args[2];
  1000.   MCLabel l_end;
  1001.   Reg obj, val, tmp;
  1002.   /* No need for other object barriers (yet). */
  1003.   lua_assert(IR(ir->op1)->o == IR_UREFC);
  1004.   ra_evictset(as, RSET_SCRATCH);
  1005.   l_end = emit_label(as);
  1006.   args[0] = ASMREF_TMP1/* global_State *g */
  1007.   args[1] = ir->op1;      /* TValue *tv      */
  1008.   asm_gencall(as, ci, args);
  1009.   emit_tsi(as, MIPSI_ADDIU, ra_releasetmp(as, ASMREF_TMP1), RID_JGL, -32768);
  1010.   obj = IR(ir->op1)->r;
  1011.   tmp = ra_scratch(as, rset_exclude(RSET_GPR, obj));
  1012.   emit_branch(as, MIPSI_BEQ, RID_TMP, RID_ZERO, l_end);
  1013.   emit_tsi(as, MIPSI_ANDI, tmp, tmp, LJ_GC_BLACK);
  1014.   emit_branch(as, MIPSI_BEQ, RID_TMP, RID_ZERO, l_end);
  1015.   emit_tsi(as, MIPSI_ANDI, RID_TMP, RID_TMP, LJ_GC_WHITES);
  1016.   val = ra_alloc1(as, ir->op2, rset_exclude(RSET_GPR, obj));
  1017.   emit_tsi(as, MIPSI_LBU, tmp, obj,
  1018.            (int32_t)offsetof(GCupval, marked)-(int32_t)offsetof(GCupval, tv));
  1019.   emit_tsi(as, MIPSI_LBU, RID_TMP, val, (int32_t)offsetof(GChead, marked));
  1020. }

  1021. /* -- Arithmetic and logic operations ------------------------------------- */

  1022. static void asm_fparith(ASMState *as, IRIns *ir, MIPSIns mi)
  1023. {
  1024.   Reg dest = ra_dest(as, ir, RSET_FPR);
  1025.   Reg right, left = ra_alloc2(as, ir, RSET_FPR);
  1026.   right = (left >> 8); left &= 255;
  1027.   emit_fgh(as, mi, dest, left, right);
  1028. }

  1029. static void asm_fpunary(ASMState *as, IRIns *ir, MIPSIns mi)
  1030. {
  1031.   Reg dest = ra_dest(as, ir, RSET_FPR);
  1032.   Reg left = ra_hintalloc(as, ir->op1, dest, RSET_FPR);
  1033.   emit_fg(as, mi, dest, left);
  1034. }

  1035. static void asm_fpmath(ASMState *as, IRIns *ir)
  1036. {
  1037.   if (ir->op2 == IRFPM_EXP2 && asm_fpjoin_pow(as, ir))
  1038.     return;
  1039.   if (ir->op2 <= IRFPM_TRUNC)
  1040.     asm_callround(as, ir, IRCALL_lj_vm_floor + ir->op2);
  1041.   else if (ir->op2 == IRFPM_SQRT)
  1042.     asm_fpunary(as, ir, MIPSI_SQRT_D);
  1043.   else
  1044.     asm_callid(as, ir, IRCALL_lj_vm_floor + ir->op2);
  1045. }

  1046. static void asm_add(ASMState *as, IRIns *ir)
  1047. {
  1048.   if (irt_isnum(ir->t)) {
  1049.     asm_fparith(as, ir, MIPSI_ADD_D);
  1050.   } else {
  1051.     Reg dest = ra_dest(as, ir, RSET_GPR);
  1052.     Reg right, left = ra_hintalloc(as, ir->op1, dest, RSET_GPR);
  1053.     if (irref_isk(ir->op2)) {
  1054.       int32_t k = IR(ir->op2)->i;
  1055.       if (checki16(k)) {
  1056.         emit_tsi(as, MIPSI_ADDIU, dest, left, k);
  1057.         return;
  1058.       }
  1059.     }
  1060.     right = ra_alloc1(as, ir->op2, rset_exclude(RSET_GPR, left));
  1061.     emit_dst(as, MIPSI_ADDU, dest, left, right);
  1062.   }
  1063. }

  1064. static void asm_sub(ASMState *as, IRIns *ir)
  1065. {
  1066.   if (irt_isnum(ir->t)) {
  1067.     asm_fparith(as, ir, MIPSI_SUB_D);
  1068.   } else {
  1069.     Reg dest = ra_dest(as, ir, RSET_GPR);
  1070.     Reg right, left = ra_alloc2(as, ir, RSET_GPR);
  1071.     right = (left >> 8); left &= 255;
  1072.     emit_dst(as, MIPSI_SUBU, dest, left, right);
  1073.   }
  1074. }

  1075. static void asm_mul(ASMState *as, IRIns *ir)
  1076. {
  1077.   if (irt_isnum(ir->t)) {
  1078.     asm_fparith(as, ir, MIPSI_MUL_D);
  1079.   } else {
  1080.     Reg dest = ra_dest(as, ir, RSET_GPR);
  1081.     Reg right, left = ra_alloc2(as, ir, RSET_GPR);
  1082.     right = (left >> 8); left &= 255;
  1083.     emit_dst(as, MIPSI_MUL, dest, left, right);
  1084.   }
  1085. }

  1086. #define asm_div(as, ir)                asm_fparith(as, ir, MIPSI_DIV_D)
  1087. #define asm_mod(as, ir)                asm_callid(as, ir, IRCALL_lj_vm_modi)
  1088. #define asm_pow(as, ir)                asm_callid(as, ir, IRCALL_lj_vm_powi)

  1089. static void asm_neg(ASMState *as, IRIns *ir)
  1090. {
  1091.   if (irt_isnum(ir->t)) {
  1092.     asm_fpunary(as, ir, MIPSI_NEG_D);
  1093.   } else {
  1094.     Reg dest = ra_dest(as, ir, RSET_GPR);
  1095.     Reg left = ra_hintalloc(as, ir->op1, dest, RSET_GPR);
  1096.     emit_dst(as, MIPSI_SUBU, dest, RID_ZERO, left);
  1097.   }
  1098. }

  1099. #define asm_abs(as, ir)                asm_fpunary(as, ir, MIPSI_ABS_D)
  1100. #define asm_atan2(as, ir)        asm_callid(as, ir, IRCALL_atan2)
  1101. #define asm_ldexp(as, ir)        asm_callid(as, ir, IRCALL_ldexp)

  1102. static void asm_arithov(ASMState *as, IRIns *ir)
  1103. {
  1104.   Reg right, left, tmp, dest = ra_dest(as, ir, RSET_GPR);
  1105.   if (irref_isk(ir->op2)) {
  1106.     int k = IR(ir->op2)->i;
  1107.     if (ir->o == IR_SUBOV) k = -k;
  1108.     if (checki16(k)) {  /* (dest < left) == (k >= 0 ? 1 : 0) */
  1109.       left = ra_alloc1(as, ir->op1, RSET_GPR);
  1110.       asm_guard(as, k >= 0 ? MIPSI_BNE : MIPSI_BEQ, RID_TMP, RID_ZERO);
  1111.       emit_dst(as, MIPSI_SLT, RID_TMP, dest, dest == left ? RID_TMP : left);
  1112.       emit_tsi(as, MIPSI_ADDIU, dest, left, k);
  1113.       if (dest == left) emit_move(as, RID_TMP, left);
  1114.       return;
  1115.     }
  1116.   }
  1117.   left = ra_alloc2(as, ir, RSET_GPR);
  1118.   right = (left >> 8); left &= 255;
  1119.   tmp = ra_scratch(as, rset_exclude(rset_exclude(rset_exclude(RSET_GPR, left),
  1120.                                                  right), dest));
  1121.   asm_guard(as, MIPSI_BLTZ, RID_TMP, 0);
  1122.   emit_dst(as, MIPSI_AND, RID_TMP, RID_TMP, tmp);
  1123.   if (ir->o == IR_ADDOV) {  /* ((dest^left) & (dest^right)) < 0 */
  1124.     emit_dst(as, MIPSI_XOR, RID_TMP, dest, dest == right ? RID_TMP : right);
  1125.   } else/* ((dest^left) & (dest^~right)) < 0 */
  1126.     emit_dst(as, MIPSI_XOR, RID_TMP, RID_TMP, dest);
  1127.     emit_dst(as, MIPSI_NOR, RID_TMP, dest == right ? RID_TMP : right, RID_ZERO);
  1128.   }
  1129.   emit_dst(as, MIPSI_XOR, tmp, dest, dest == left ? RID_TMP : left);
  1130.   emit_dst(as, ir->o == IR_ADDOV ? MIPSI_ADDU : MIPSI_SUBU, dest, left, right);
  1131.   if (dest == left || dest == right)
  1132.     emit_move(as, RID_TMP, dest == left ? left : right);
  1133. }

  1134. #define asm_addov(as, ir)        asm_arithov(as, ir)
  1135. #define asm_subov(as, ir)        asm_arithov(as, ir)

  1136. static void asm_mulov(ASMState *as, IRIns *ir)
  1137. {
  1138.   Reg dest = ra_dest(as, ir, RSET_GPR);
  1139.   Reg tmp, right, left = ra_alloc2(as, ir, RSET_GPR);
  1140.   right = (left >> 8); left &= 255;
  1141.   tmp = ra_scratch(as, rset_exclude(rset_exclude(rset_exclude(RSET_GPR, left),
  1142.                                                  right), dest));
  1143.   asm_guard(as, MIPSI_BNE, RID_TMP, tmp);
  1144.   emit_dta(as, MIPSI_SRA, RID_TMP, dest, 31);
  1145.   emit_dst(as, MIPSI_MFHI, tmp, 0, 0);
  1146.   emit_dst(as, MIPSI_MFLO, dest, 0, 0);
  1147.   emit_dst(as, MIPSI_MULT, 0, left, right);
  1148. }

  1149. #if LJ_HASFFI
  1150. static void asm_add64(ASMState *as, IRIns *ir)
  1151. {
  1152.   Reg dest = ra_dest(as, ir, RSET_GPR);
  1153.   Reg right, left = ra_alloc1(as, ir->op1, RSET_GPR);
  1154.   if (irref_isk(ir->op2)) {
  1155.     int32_t k = IR(ir->op2)->i;
  1156.     if (k == 0) {
  1157.       emit_dst(as, MIPSI_ADDU, dest, left, RID_TMP);
  1158.       goto loarith;
  1159.     } else if (checki16(k)) {
  1160.       emit_dst(as, MIPSI_ADDU, dest, dest, RID_TMP);
  1161.       emit_tsi(as, MIPSI_ADDIU, dest, left, k);
  1162.       goto loarith;
  1163.     }
  1164.   }
  1165.   emit_dst(as, MIPSI_ADDU, dest, dest, RID_TMP);
  1166.   right = ra_alloc1(as, ir->op2, rset_exclude(RSET_GPR, left));
  1167.   emit_dst(as, MIPSI_ADDU, dest, left, right);
  1168. loarith:
  1169.   ir--;
  1170.   dest = ra_dest(as, ir, RSET_GPR);
  1171.   left = ra_alloc1(as, ir->op1, RSET_GPR);
  1172.   if (irref_isk(ir->op2)) {
  1173.     int32_t k = IR(ir->op2)->i;
  1174.     if (k == 0) {
  1175.       if (dest != left)
  1176.         emit_move(as, dest, left);
  1177.       return;
  1178.     } else if (checki16(k)) {
  1179.       if (dest == left) {
  1180.         Reg tmp = ra_scratch(as, rset_exclude(RSET_GPR, left));
  1181.         emit_move(as, dest, tmp);
  1182.         dest = tmp;
  1183.       }
  1184.       emit_dst(as, MIPSI_SLTU, RID_TMP, dest, left);
  1185.       emit_tsi(as, MIPSI_ADDIU, dest, left, k);
  1186.       return;
  1187.     }
  1188.   }
  1189.   right = ra_alloc1(as, ir->op2, rset_exclude(RSET_GPR, left));
  1190.   if (dest == left && dest == right) {
  1191.     Reg tmp = ra_scratch(as, rset_exclude(rset_exclude(RSET_GPR, left), right));
  1192.     emit_move(as, dest, tmp);
  1193.     dest = tmp;
  1194.   }
  1195.   emit_dst(as, MIPSI_SLTU, RID_TMP, dest, dest == left ? right : left);
  1196.   emit_dst(as, MIPSI_ADDU, dest, left, right);
  1197. }

  1198. static void asm_sub64(ASMState *as, IRIns *ir)
  1199. {
  1200.   Reg dest = ra_dest(as, ir, RSET_GPR);
  1201.   Reg right, left = ra_alloc2(as, ir, RSET_GPR);
  1202.   right = (left >> 8); left &= 255;
  1203.   emit_dst(as, MIPSI_SUBU, dest, dest, RID_TMP);
  1204.   emit_dst(as, MIPSI_SUBU, dest, left, right);
  1205.   ir--;
  1206.   dest = ra_dest(as, ir, RSET_GPR);
  1207.   left = ra_alloc2(as, ir, RSET_GPR);
  1208.   right = (left >> 8); left &= 255;
  1209.   if (dest == left) {
  1210.     Reg tmp = ra_scratch(as, rset_exclude(rset_exclude(RSET_GPR, left), right));
  1211.     emit_move(as, dest, tmp);
  1212.     dest = tmp;
  1213.   }
  1214.   emit_dst(as, MIPSI_SLTU, RID_TMP, left, dest);
  1215.   emit_dst(as, MIPSI_SUBU, dest, left, right);
  1216. }

  1217. static void asm_neg64(ASMState *as, IRIns *ir)
  1218. {
  1219.   Reg dest = ra_dest(as, ir, RSET_GPR);
  1220.   Reg left = ra_alloc1(as, ir->op1, RSET_GPR);
  1221.   emit_dst(as, MIPSI_SUBU, dest, dest, RID_TMP);
  1222.   emit_dst(as, MIPSI_SUBU, dest, RID_ZERO, left);
  1223.   ir--;
  1224.   dest = ra_dest(as, ir, RSET_GPR);
  1225.   left = ra_alloc1(as, ir->op1, RSET_GPR);
  1226.   emit_dst(as, MIPSI_SLTU, RID_TMP, RID_ZERO, dest);
  1227.   emit_dst(as, MIPSI_SUBU, dest, RID_ZERO, left);
  1228. }
  1229. #endif

  1230. static void asm_bnot(ASMState *as, IRIns *ir)
  1231. {
  1232.   Reg left, right, dest = ra_dest(as, ir, RSET_GPR);
  1233.   IRIns *irl = IR(ir->op1);
  1234.   if (mayfuse(as, ir->op1) && irl->o == IR_BOR) {
  1235.     left = ra_alloc2(as, irl, RSET_GPR);
  1236.     right = (left >> 8); left &= 255;
  1237.   } else {
  1238.     left = ra_hintalloc(as, ir->op1, dest, RSET_GPR);
  1239.     right = RID_ZERO;
  1240.   }
  1241.   emit_dst(as, MIPSI_NOR, dest, left, right);
  1242. }

  1243. static void asm_bswap(ASMState *as, IRIns *ir)
  1244. {
  1245.   Reg dest = ra_dest(as, ir, RSET_GPR);
  1246.   Reg left = ra_alloc1(as, ir->op1, RSET_GPR);
  1247.   if ((as->flags & JIT_F_MIPS32R2)) {
  1248.     emit_dta(as, MIPSI_ROTR, dest, RID_TMP, 16);
  1249.     emit_dst(as, MIPSI_WSBH, RID_TMP, 0, left);
  1250.   } else {
  1251.     Reg tmp = ra_scratch(as, rset_exclude(rset_exclude(RSET_GPR, left), dest));
  1252.     emit_dst(as, MIPSI_OR, dest, dest, tmp);
  1253.     emit_dst(as, MIPSI_OR, dest, dest, RID_TMP);
  1254.     emit_tsi(as, MIPSI_ANDI, dest, dest, 0xff00);
  1255.     emit_dta(as, MIPSI_SLL, RID_TMP, RID_TMP, 8);
  1256.     emit_dta(as, MIPSI_SRL, dest, left, 8);
  1257.     emit_tsi(as, MIPSI_ANDI, RID_TMP, left, 0xff00);
  1258.     emit_dst(as, MIPSI_OR, tmp, tmp, RID_TMP);
  1259.     emit_dta(as, MIPSI_SRL, tmp, left, 24);
  1260.     emit_dta(as, MIPSI_SLL, RID_TMP, left, 24);
  1261.   }
  1262. }

  1263. static void asm_bitop(ASMState *as, IRIns *ir, MIPSIns mi, MIPSIns mik)
  1264. {
  1265.   Reg dest = ra_dest(as, ir, RSET_GPR);
  1266.   Reg right, left = ra_hintalloc(as, ir->op1, dest, RSET_GPR);
  1267.   if (irref_isk(ir->op2)) {
  1268.     int32_t k = IR(ir->op2)->i;
  1269.     if (checku16(k)) {
  1270.       emit_tsi(as, mik, dest, left, k);
  1271.       return;
  1272.     }
  1273.   }
  1274.   right = ra_alloc1(as, ir->op2, rset_exclude(RSET_GPR, left));
  1275.   emit_dst(as, mi, dest, left, right);
  1276. }

  1277. #define asm_band(as, ir)        asm_bitop(as, ir, MIPSI_AND, MIPSI_ANDI)
  1278. #define asm_bor(as, ir)                asm_bitop(as, ir, MIPSI_OR, MIPSI_ORI)
  1279. #define asm_bxor(as, ir)        asm_bitop(as, ir, MIPSI_XOR, MIPSI_XORI)

  1280. static void asm_bitshift(ASMState *as, IRIns *ir, MIPSIns mi, MIPSIns mik)
  1281. {
  1282.   Reg dest = ra_dest(as, ir, RSET_GPR);
  1283.   if (irref_isk(ir->op2)) {  /* Constant shifts. */
  1284.     uint32_t shift = (uint32_t)(IR(ir->op2)->i & 31);
  1285.     emit_dta(as, mik, dest, ra_hintalloc(as, ir->op1, dest, RSET_GPR), shift);
  1286.   } else {
  1287.     Reg right, left = ra_alloc2(as, ir, RSET_GPR);
  1288.     right = (left >> 8); left &= 255;
  1289.     emit_dst(as, mi, dest, right, left);  /* Shift amount is in rs. */
  1290.   }
  1291. }

  1292. #define asm_bshl(as, ir)        asm_bitshift(as, ir, MIPSI_SLLV, MIPSI_SLL)
  1293. #define asm_bshr(as, ir)        asm_bitshift(as, ir, MIPSI_SRLV, MIPSI_SRL)
  1294. #define asm_bsar(as, ir)        asm_bitshift(as, ir, MIPSI_SRAV, MIPSI_SRA)
  1295. #define asm_brol(as, ir)        lua_assert(0)

  1296. static void asm_bror(ASMState *as, IRIns *ir)
  1297. {
  1298.   if ((as->flags & JIT_F_MIPS32R2)) {
  1299.     asm_bitshift(as, ir, MIPSI_ROTRV, MIPSI_ROTR);
  1300.   } else {
  1301.     Reg dest = ra_dest(as, ir, RSET_GPR);
  1302.     if (irref_isk(ir->op2)) {  /* Constant shifts. */
  1303.       uint32_t shift = (uint32_t)(IR(ir->op2)->i & 31);
  1304.       Reg left = ra_hintalloc(as, ir->op1, dest, RSET_GPR);
  1305.       emit_rotr(as, dest, left, RID_TMP, shift);
  1306.     } else {
  1307.       Reg right, left = ra_alloc2(as, ir, RSET_GPR);
  1308.       right = (left >> 8); left &= 255;
  1309.       emit_dst(as, MIPSI_OR, dest, dest, RID_TMP);
  1310.       emit_dst(as, MIPSI_SRLV, dest, right, left);
  1311.       emit_dst(as, MIPSI_SLLV, RID_TMP, RID_TMP, left);
  1312.       emit_dst(as, MIPSI_SUBU, RID_TMP, ra_allock(as, 32, RSET_GPR), right);
  1313.     }
  1314.   }
  1315. }

  1316. static void asm_min_max(ASMState *as, IRIns *ir, int ismax)
  1317. {
  1318.   if (irt_isnum(ir->t)) {
  1319.     Reg dest = ra_dest(as, ir, RSET_FPR);
  1320.     Reg right, left = ra_alloc2(as, ir, RSET_FPR);
  1321.     right = (left >> 8); left &= 255;
  1322.     if (dest == left) {
  1323.       emit_fg(as, MIPSI_MOVT_D, dest, right);
  1324.     } else {
  1325.       emit_fg(as, MIPSI_MOVF_D, dest, left);
  1326.       if (dest != right) emit_fg(as, MIPSI_MOV_D, dest, right);
  1327.     }
  1328.     emit_fgh(as, MIPSI_C_OLT_D, 0, ismax ? left : right, ismax ? right : left);
  1329.   } else {
  1330.     Reg dest = ra_dest(as, ir, RSET_GPR);
  1331.     Reg right, left = ra_alloc2(as, ir, RSET_GPR);
  1332.     right = (left >> 8); left &= 255;
  1333.     if (dest == left) {
  1334.       emit_dst(as, MIPSI_MOVN, dest, right, RID_TMP);
  1335.     } else {
  1336.       emit_dst(as, MIPSI_MOVZ, dest, left, RID_TMP);
  1337.       if (dest != right) emit_move(as, dest, right);
  1338.     }
  1339.     emit_dst(as, MIPSI_SLT, RID_TMP,
  1340.              ismax ? left : right, ismax ? right : left);
  1341.   }
  1342. }

  1343. #define asm_min(as, ir)                asm_min_max(as, ir, 0)
  1344. #define asm_max(as, ir)                asm_min_max(as, ir, 1)

  1345. /* -- Comparisons --------------------------------------------------------- */

  1346. static void asm_comp(ASMState *as, IRIns *ir)
  1347. {
  1348.   /* ORDER IR: LT GE LE GT  ULT UGE ULE UGT. */
  1349.   IROp op = ir->o;
  1350.   if (irt_isnum(ir->t)) {
  1351.     Reg right, left = ra_alloc2(as, ir, RSET_FPR);
  1352.     right = (left >> 8); left &= 255;
  1353.     asm_guard(as, (op&1) ? MIPSI_BC1T : MIPSI_BC1F, 0, 0);
  1354.     emit_fgh(as, MIPSI_C_OLT_D + ((op&3) ^ ((op>>2)&1)), 0, left, right);
  1355.   } else {
  1356.     Reg right, left = ra_alloc1(as, ir->op1, RSET_GPR);
  1357.     if (op == IR_ABC) op = IR_UGT;
  1358.     if ((op&4) == 0 && irref_isk(ir->op2) && IR(ir->op2)->i == 0) {
  1359.       MIPSIns mi = (op&2) ? ((op&1) ? MIPSI_BLEZ : MIPSI_BGTZ) :
  1360.                             ((op&1) ? MIPSI_BLTZ : MIPSI_BGEZ);
  1361.       asm_guard(as, mi, left, 0);
  1362.     } else {
  1363.       if (irref_isk(ir->op2)) {
  1364.         int32_t k = IR(ir->op2)->i;
  1365.         if ((op&2)) k++;
  1366.         if (checki16(k)) {
  1367.           asm_guard(as, (op&1) ? MIPSI_BNE : MIPSI_BEQ, RID_TMP, RID_ZERO);
  1368.           emit_tsi(as, (op&4) ? MIPSI_SLTIU : MIPSI_SLTI,
  1369.                    RID_TMP, left, k);
  1370.           return;
  1371.         }
  1372.       }
  1373.       right = ra_alloc1(as, ir->op2, rset_exclude(RSET_GPR, left));
  1374.       asm_guard(as, ((op^(op>>1))&1) ? MIPSI_BNE : MIPSI_BEQ, RID_TMP, RID_ZERO);
  1375.       emit_dst(as, (op&4) ? MIPSI_SLTU : MIPSI_SLT,
  1376.                RID_TMP, (op&2) ? right : left, (op&2) ? left : right);
  1377.     }
  1378.   }
  1379. }

  1380. static void asm_equal(ASMState *as, IRIns *ir)
  1381. {
  1382.   Reg right, left = ra_alloc2(as, ir, irt_isnum(ir->t) ? RSET_FPR : RSET_GPR);
  1383.   right = (left >> 8); left &= 255;
  1384.   if (irt_isnum(ir->t)) {
  1385.     asm_guard(as, (ir->o & 1) ? MIPSI_BC1T : MIPSI_BC1F, 0, 0);
  1386.     emit_fgh(as, MIPSI_C_EQ_D, 0, left, right);
  1387.   } else {
  1388.     asm_guard(as, (ir->o & 1) ? MIPSI_BEQ : MIPSI_BNE, left, right);
  1389.   }
  1390. }

  1391. #if LJ_HASFFI
  1392. /* 64 bit integer comparisons. */
  1393. static void asm_comp64(ASMState *as, IRIns *ir)
  1394. {
  1395.   /* ORDER IR: LT GE LE GT  ULT UGE ULE UGT. */
  1396.   IROp op = (ir-1)->o;
  1397.   MCLabel l_end;
  1398.   Reg rightlo, leftlo, righthi, lefthi = ra_alloc2(as, ir, RSET_GPR);
  1399.   righthi = (lefthi >> 8); lefthi &= 255;
  1400.   leftlo = ra_alloc2(as, ir-1,
  1401.                      rset_exclude(rset_exclude(RSET_GPR, lefthi), righthi));
  1402.   rightlo = (leftlo >> 8); leftlo &= 255;
  1403.   asm_guard(as, ((op^(op>>1))&1) ? MIPSI_BNE : MIPSI_BEQ, RID_TMP, RID_ZERO);
  1404.   l_end = emit_label(as);
  1405.   if (lefthi != righthi)
  1406.     emit_dst(as, (op&4) ? MIPSI_SLTU : MIPSI_SLT, RID_TMP,
  1407.              (op&2) ? righthi : lefthi, (op&2) ? lefthi : righthi);
  1408.   emit_dst(as, MIPSI_SLTU, RID_TMP,
  1409.            (op&2) ? rightlo : leftlo, (op&2) ? leftlo : rightlo);
  1410.   if (lefthi != righthi)
  1411.     emit_branch(as, MIPSI_BEQ, lefthi, righthi, l_end);
  1412. }

  1413. static void asm_comp64eq(ASMState *as, IRIns *ir)
  1414. {
  1415.   Reg tmp, right, left = ra_alloc2(as, ir, RSET_GPR);
  1416.   right = (left >> 8); left &= 255;
  1417.   asm_guard(as, ((ir-1)->o & 1) ? MIPSI_BEQ : MIPSI_BNE, RID_TMP, RID_ZERO);
  1418.   tmp = ra_scratch(as, rset_exclude(rset_exclude(RSET_GPR, left), right));
  1419.   emit_dst(as, MIPSI_OR, RID_TMP, RID_TMP, tmp);
  1420.   emit_dst(as, MIPSI_XOR, tmp, left, right);
  1421.   left = ra_alloc2(as, ir-1, RSET_GPR);
  1422.   right = (left >> 8); left &= 255;
  1423.   emit_dst(as, MIPSI_XOR, RID_TMP, left, right);
  1424. }
  1425. #endif

  1426. /* -- Support for 64 bit ops in 32 bit mode ------------------------------- */

  1427. /* Hiword op of a split 64 bit op. Previous op must be the loword op. */
  1428. static void asm_hiop(ASMState *as, IRIns *ir)
  1429. {
  1430. #if LJ_HASFFI
  1431.   /* HIOP is marked as a store because it needs its own DCE logic. */
  1432.   int uselo = ra_used(ir-1), usehi = ra_used(ir);  /* Loword/hiword used? */
  1433.   if (LJ_UNLIKELY(!(as->flags & JIT_F_OPT_DCE))) uselo = usehi = 1;
  1434.   if ((ir-1)->o == IR_CONV) {  /* Conversions to/from 64 bit. */
  1435.     as->curins--;  /* Always skip the CONV. */
  1436.     if (usehi || uselo)
  1437.       asm_conv64(as, ir);
  1438.     return;
  1439.   } else if ((ir-1)->o < IR_EQ) {  /* 64 bit integer comparisons. ORDER IR. */
  1440.     as->curins--;  /* Always skip the loword comparison. */
  1441.     asm_comp64(as, ir);
  1442.     return;
  1443.   } else if ((ir-1)->o <= IR_NE) {  /* 64 bit integer comparisons. ORDER IR. */
  1444.     as->curins--;  /* Always skip the loword comparison. */
  1445.     asm_comp64eq(as, ir);
  1446.     return;
  1447.   } else if ((ir-1)->o == IR_XSTORE) {
  1448.     as->curins--;  /* Handle both stores here. */
  1449.     if ((ir-1)->r != RID_SINK) {
  1450.       asm_xstore_(as, ir, LJ_LE ? 4 : 0);
  1451.       asm_xstore_(as, ir-1, LJ_LE ? 0 : 4);
  1452.     }
  1453.     return;
  1454.   }
  1455.   if (!usehi) return/* Skip unused hiword op for all remaining ops. */
  1456.   switch ((ir-1)->o) {
  1457.   case IR_ADD: as->curins--; asm_add64(as, ir); break;
  1458.   case IR_SUB: as->curins--; asm_sub64(as, ir); break;
  1459.   case IR_NEG: as->curins--; asm_neg64(as, ir); break;
  1460.   case IR_CALLN:
  1461.   case IR_CALLXS:
  1462.     if (!uselo)
  1463.       ra_allocref(as, ir->op1, RID2RSET(RID_RETLO));  /* Mark lo op as used. */
  1464.     break;
  1465.   case IR_CNEWI:
  1466.     /* Nothing to do here. Handled by lo op itself. */
  1467.     break;
  1468.   default: lua_assert(0); break;
  1469.   }
  1470. #else
  1471.   UNUSED(as); UNUSED(ir); lua_assert(0);  /* Unused without FFI. */
  1472. #endif
  1473. }

  1474. /* -- Profiling ----------------------------------------------------------- */

  1475. static void asm_prof(ASMState *as, IRIns *ir)
  1476. {
  1477.   UNUSED(ir);
  1478.   asm_guard(as, MIPSI_BNE, RID_TMP, RID_ZERO);
  1479.   emit_tsi(as, MIPSI_ANDI, RID_TMP, RID_TMP, HOOK_PROFILE);
  1480.   emit_lsglptr(as, MIPSI_LBU, RID_TMP,
  1481.                (int32_t)offsetof(global_State, hookmask));
  1482. }

  1483. /* -- Stack handling ------------------------------------------------------ */

  1484. /* Check Lua stack size for overflow. Use exit handler as fallback. */
  1485. static void asm_stack_check(ASMState *as, BCReg topslot,
  1486.                             IRIns *irp, RegSet allow, ExitNo exitno)
  1487. {
  1488.   /* Try to get an unused temp. register, otherwise spill/restore RID_RET*. */
  1489.   Reg tmp, pbase = irp ? (ra_hasreg(irp->r) ? irp->r : RID_TMP) : RID_BASE;
  1490.   ExitNo oldsnap = as->snapno;
  1491.   rset_clear(allow, pbase);
  1492.   tmp = allow ? rset_pickbot(allow) :
  1493.                 (pbase == RID_RETHI ? RID_RETLO : RID_RETHI);
  1494.   as->snapno = exitno;
  1495.   asm_guard(as, MIPSI_BNE, RID_TMP, RID_ZERO);
  1496.   as->snapno = oldsnap;
  1497.   if (allow == RSET_EMPTY/* Restore temp. register. */
  1498.     emit_tsi(as, MIPSI_LW, tmp, RID_SP, 0);
  1499.   else
  1500.     ra_modified(as, tmp);
  1501.   emit_tsi(as, MIPSI_SLTIU, RID_TMP, RID_TMP, (int32_t)(8*topslot));
  1502.   emit_dst(as, MIPSI_SUBU, RID_TMP, tmp, pbase);
  1503.   emit_tsi(as, MIPSI_LW, tmp, tmp, offsetof(lua_State, maxstack));
  1504.   if (pbase == RID_TMP)
  1505.     emit_getgl(as, RID_TMP, jit_base);
  1506.   emit_getgl(as, tmp, cur_L);
  1507.   if (allow == RSET_EMPTY/* Spill temp. register. */
  1508.     emit_tsi(as, MIPSI_SW, tmp, RID_SP, 0);
  1509. }

  1510. /* Restore Lua stack from on-trace state. */
  1511. static void asm_stack_restore(ASMState *as, SnapShot *snap)
  1512. {
  1513.   SnapEntry *map = &as->T->snapmap[snap->mapofs];
  1514.   SnapEntry *flinks = &as->T->snapmap[snap_nextofs(as->T, snap)-1];
  1515.   MSize n, nent = snap->nent;
  1516.   /* Store the value of all modified slots to the Lua stack. */
  1517.   for (n = 0; n < nent; n++) {
  1518.     SnapEntry sn = map[n];
  1519.     BCReg s = snap_slot(sn);
  1520.     int32_t ofs = 8*((int32_t)s-1);
  1521.     IRRef ref = snap_ref(sn);
  1522.     IRIns *ir = IR(ref);
  1523.     if ((sn & SNAP_NORESTORE))
  1524.       continue;
  1525.     if (irt_isnum(ir->t)) {
  1526.       Reg src = ra_alloc1(as, ref, RSET_FPR);
  1527.       emit_hsi(as, MIPSI_SDC1, src, RID_BASE, ofs);
  1528.     } else {
  1529.       Reg type;
  1530.       RegSet allow = rset_exclude(RSET_GPR, RID_BASE);
  1531.       lua_assert(irt_ispri(ir->t) || irt_isaddr(ir->t) || irt_isinteger(ir->t));
  1532.       if (!irt_ispri(ir->t)) {
  1533.         Reg src = ra_alloc1(as, ref, allow);
  1534.         rset_clear(allow, src);
  1535.         emit_tsi(as, MIPSI_SW, src, RID_BASE, ofs+(LJ_BE?4:0));
  1536.       }
  1537.       if ((sn & (SNAP_CONT|SNAP_FRAME))) {
  1538.         if (s == 0) continue/* Do not overwrite link to previous frame. */
  1539.         type = ra_allock(as, (int32_t)(*flinks--), allow);
  1540.       } else {
  1541.         type = ra_allock(as, (int32_t)irt_toitype(ir->t), allow);
  1542.       }
  1543.       emit_tsi(as, MIPSI_SW, type, RID_BASE, ofs+(LJ_BE?0:4));
  1544.     }
  1545.     checkmclim(as);
  1546.   }
  1547.   lua_assert(map + nent == flinks);
  1548. }

  1549. /* -- GC handling --------------------------------------------------------- */

  1550. /* Check GC threshold and do one or more GC steps. */
  1551. static void asm_gc_check(ASMState *as)
  1552. {
  1553.   const CCallInfo *ci = &lj_ir_callinfo[IRCALL_lj_gc_step_jit];
  1554.   IRRef args[2];
  1555.   MCLabel l_end;
  1556.   Reg tmp;
  1557.   ra_evictset(as, RSET_SCRATCH);
  1558.   l_end = emit_label(as);
  1559.   /* Exit trace if in GCSatomic or GCSfinalize. Avoids syncing GC objects. */
  1560.   /* Assumes asm_snap_prep() already done. */
  1561.   asm_guard(as, MIPSI_BNE, RID_RET, RID_ZERO);
  1562.   args[0] = ASMREF_TMP1/* global_State *g */
  1563.   args[1] = ASMREF_TMP2/* MSize steps     */
  1564.   asm_gencall(as, ci, args);
  1565.   emit_tsi(as, MIPSI_ADDIU, ra_releasetmp(as, ASMREF_TMP1), RID_JGL, -32768);
  1566.   tmp = ra_releasetmp(as, ASMREF_TMP2);
  1567.   emit_loadi(as, tmp, as->gcsteps);
  1568.   /* Jump around GC step if GC total < GC threshold. */
  1569.   emit_branch(as, MIPSI_BNE, RID_TMP, RID_ZERO, l_end);
  1570.   emit_dst(as, MIPSI_SLTU, RID_TMP, RID_TMP, tmp);
  1571.   emit_getgl(as, tmp, gc.threshold);
  1572.   emit_getgl(as, RID_TMP, gc.total);
  1573.   as->gcsteps = 0;
  1574.   checkmclim(as);
  1575. }

  1576. /* -- Loop handling ------------------------------------------------------- */

  1577. /* Fixup the loop branch. */
  1578. static void asm_loop_fixup(ASMState *as)
  1579. {
  1580.   MCode *p = as->mctop;
  1581.   MCode *target = as->mcp;
  1582.   p[-1] = MIPSI_NOP;
  1583.   if (as->loopinv) {  /* Inverted loop branch? */
  1584.     /* asm_guard already inverted the cond branch. Only patch the target. */
  1585.     p[-3] |= ((target-p+2) & 0x0000ffffu);
  1586.   } else {
  1587.     p[-2] = MIPSI_J|(((uintptr_t)target>>2)&0x03ffffffu);
  1588.   }
  1589. }

  1590. /* -- Head of trace ------------------------------------------------------- */

  1591. /* Coalesce BASE register for a root trace. */
  1592. static void asm_head_root_base(ASMState *as)
  1593. {
  1594.   IRIns *ir = IR(REF_BASE);
  1595.   Reg r = ir->r;
  1596.   if (as->loopinv) as->mctop--;
  1597.   if (ra_hasreg(r)) {
  1598.     ra_free(as, r);
  1599.     if (rset_test(as->modset, r) || irt_ismarked(ir->t))
  1600.       ir->r = RID_INIT/* No inheritance for modified BASE register. */
  1601.     if (r != RID_BASE)
  1602.       emit_move(as, r, RID_BASE);
  1603.   }
  1604. }

  1605. /* Coalesce BASE register for a side trace. */
  1606. static RegSet asm_head_side_base(ASMState *as, IRIns *irp, RegSet allow)
  1607. {
  1608.   IRIns *ir = IR(REF_BASE);
  1609.   Reg r = ir->r;
  1610.   if (as->loopinv) as->mctop--;
  1611.   if (ra_hasreg(r)) {
  1612.     ra_free(as, r);
  1613.     if (rset_test(as->modset, r) || irt_ismarked(ir->t))
  1614.       ir->r = RID_INIT/* No inheritance for modified BASE register. */
  1615.     if (irp->r == r) {
  1616.       rset_clear(allow, r);  /* Mark same BASE register as coalesced. */
  1617.     } else if (ra_hasreg(irp->r) && rset_test(as->freeset, irp->r)) {
  1618.       rset_clear(allow, irp->r);
  1619.       emit_move(as, r, irp->r);  /* Move from coalesced parent reg. */
  1620.     } else {
  1621.       emit_getgl(as, r, jit_base);  /* Otherwise reload BASE. */
  1622.     }
  1623.   }
  1624.   return allow;
  1625. }

  1626. /* -- Tail of trace ------------------------------------------------------- */

  1627. /* Fixup the tail code. */
  1628. static void asm_tail_fixup(ASMState *as, TraceNo lnk)
  1629. {
  1630.   MCode *target = lnk ? traceref(as->J,lnk)->mcode : (MCode *)lj_vm_exit_interp;
  1631.   int32_t spadj = as->T->spadjust;
  1632.   MCode *p = as->mctop-1;
  1633.   *p = spadj ? (MIPSI_ADDIU|MIPSF_T(RID_SP)|MIPSF_S(RID_SP)|spadj) : MIPSI_NOP;
  1634.   p[-1] = MIPSI_J|(((uintptr_t)target>>2)&0x03ffffffu);
  1635. }

  1636. /* Prepare tail of code. */
  1637. static void asm_tail_prep(ASMState *as)
  1638. {
  1639.   as->mcp = as->mctop-2/* Leave room for branch plus nop or stack adj. */
  1640.   as->invmcp = as->loopref ? as->mcp : NULL;
  1641. }

  1642. /* -- Trace setup --------------------------------------------------------- */

  1643. /* Ensure there are enough stack slots for call arguments. */
  1644. static Reg asm_setup_call_slots(ASMState *as, IRIns *ir, const CCallInfo *ci)
  1645. {
  1646.   IRRef args[CCI_NARGS_MAX*2];
  1647.   uint32_t i, nargs = CCI_XNARGS(ci);
  1648.   int nslots = 4, ngpr = REGARG_NUMGPR, nfpr = REGARG_NUMFPR;
  1649.   asm_collectargs(as, ir, ci, args);
  1650.   for (i = 0; i < nargs; i++) {
  1651.     if (args[i] && irt_isfp(IR(args[i])->t) &&
  1652.         nfpr > 0 && !(ci->flags & CCI_VARARG)) {
  1653.       nfpr--;
  1654.       ngpr -= irt_isnum(IR(args[i])->t) ? 2 : 1;
  1655.     } else if (args[i] && irt_isnum(IR(args[i])->t)) {
  1656.       nfpr = 0;
  1657.       ngpr = ngpr & ~1;
  1658.       if (ngpr > 0) ngpr -= 2; else nslots = (nslots+3) & ~1;
  1659.     } else {
  1660.       nfpr = 0;
  1661.       if (ngpr > 0) ngpr--; else nslots++;
  1662.     }
  1663.   }
  1664.   if (nslots > as->evenspill)  /* Leave room for args in stack slots. */
  1665.     as->evenspill = nslots;
  1666.   return irt_isfp(ir->t) ? REGSP_HINT(RID_FPRET) : REGSP_HINT(RID_RET);
  1667. }

  1668. static void asm_setup_target(ASMState *as)
  1669. {
  1670.   asm_sparejump_setup(as);
  1671.   asm_exitstub_setup(as);
  1672. }

  1673. /* -- Trace patching ------------------------------------------------------ */

  1674. /* Patch exit jumps of existing machine code to a new target. */
  1675. void lj_asm_patchexit(jit_State *J, GCtrace *T, ExitNo exitno, MCode *target)
  1676. {
  1677.   MCode *p = T->mcode;
  1678.   MCode *pe = (MCode *)((char *)p + T->szmcode);
  1679.   MCode *px = exitstub_trace_addr(T, exitno);
  1680.   MCode *cstart = NULL, *cstop = NULL;
  1681.   MCode *mcarea = lj_mcode_patch(J, p, 0);
  1682.   MCode exitload = MIPSI_LI | MIPSF_T(RID_TMP) | exitno;
  1683.   MCode tjump = MIPSI_J|(((uintptr_t)target>>2)&0x03ffffffu);
  1684.   for (p++; p < pe; p++) {
  1685.     if (*p == exitload) {  /* Look for load of exit number. */
  1686.       if (((p[-1] ^ (px-p)) & 0xffffu) == 0) {  /* Look for exitstub branch. */
  1687.         ptrdiff_t delta = target - p;
  1688.         if (((delta + 0x8000) >> 16) == 0) {  /* Patch in-range branch. */
  1689.         patchbranch:
  1690.           p[-1] = (p[-1] & 0xffff0000u) | (delta & 0xffffu);
  1691.           *p = MIPSI_NOP;  /* Replace the load of the exit number. */
  1692.           cstop = p;
  1693.           if (!cstart) cstart = p-1;
  1694.         } else/* Branch out of range. Use spare jump slot in mcarea. */
  1695.           int i;
  1696.           for (i = 2; i < 2+MIPS_SPAREJUMP*2; i += 2) {
  1697.             if (mcarea[i] == tjump) {
  1698.               delta = mcarea+i - p;
  1699.               goto patchbranch;
  1700.             } else if (mcarea[i] == MIPSI_NOP) {
  1701.               mcarea[i] = tjump;
  1702.               cstart = mcarea+i;
  1703.               delta = mcarea+i - p;
  1704.               goto patchbranch;
  1705.             }
  1706.           }
  1707.           /* Ignore jump slot overflow. Child trace is simply not attached. */
  1708.         }
  1709.       } else if (p+1 == pe) {
  1710.         /* Patch NOP after code for inverted loop branch. Use of J is ok. */
  1711.         lua_assert(p[1] == MIPSI_NOP);
  1712.         p[1] = tjump;
  1713.         *p = MIPSI_NOP;  /* Replace the load of the exit number. */
  1714.         cstop = p+2;
  1715.         if (!cstart) cstart = p+1;
  1716.       }
  1717.     }
  1718.   }
  1719.   if (cstart) lj_mcode_sync(cstart, cstop);
  1720.   lj_mcode_patch(J, mcarea, 1);
  1721. }