src/lj_asm_arm.h - luajit-2.0-src

Global variables defined

Functions defined

Macros defined

Source code

  1. /*
  2. ** ARM IR assembler (SSA IR -> machine code).
  3. ** Copyright (C) 2005-2015 Mike Pall. See Copyright Notice in luajit.h
  4. */

  5. /* -- Register allocator extensions --------------------------------------- */

  6. /* Allocate a register with a hint. */
  7. static Reg ra_hintalloc(ASMState *as, IRRef ref, Reg hint, RegSet allow)
  8. {
  9.   Reg r = IR(ref)->r;
  10.   if (ra_noreg(r)) {
  11.     if (!ra_hashint(r) && !iscrossref(as, ref))
  12.       ra_sethint(IR(ref)->r, hint);  /* Propagate register hint. */
  13.     r = ra_allocref(as, ref, allow);
  14.   }
  15.   ra_noweak(as, r);
  16.   return r;
  17. }

  18. /* Allocate a scratch register pair. */
  19. static Reg ra_scratchpair(ASMState *as, RegSet allow)
  20. {
  21.   RegSet pick1 = as->freeset & allow;
  22.   RegSet pick2 = pick1 & (pick1 >> 1) & RSET_GPREVEN;
  23.   Reg r;
  24.   if (pick2) {
  25.     r = rset_picktop(pick2);
  26.   } else {
  27.     RegSet pick = pick1 & (allow >> 1) & RSET_GPREVEN;
  28.     if (pick) {
  29.       r = rset_picktop(pick);
  30.       ra_restore(as, regcost_ref(as->cost[r+1]));
  31.     } else {
  32.       pick = pick1 & (allow << 1) & RSET_GPRODD;
  33.       if (pick) {
  34.         r = ra_restore(as, regcost_ref(as->cost[rset_picktop(pick)-1]));
  35.       } else {
  36.         r = ra_evict(as, allow & (allow >> 1) & RSET_GPREVEN);
  37.         ra_restore(as, regcost_ref(as->cost[r+1]));
  38.       }
  39.     }
  40.   }
  41.   lua_assert(rset_test(RSET_GPREVEN, r));
  42.   ra_modified(as, r);
  43.   ra_modified(as, r+1);
  44.   RA_DBGX((as, "scratchpair    $r $r", r, r+1));
  45.   return r;
  46. }

  47. #if !LJ_SOFTFP
  48. /* Allocate two source registers for three-operand instructions. */
  49. static Reg ra_alloc2(ASMState *as, IRIns *ir, RegSet allow)
  50. {
  51.   IRIns *irl = IR(ir->op1), *irr = IR(ir->op2);
  52.   Reg left = irl->r, right = irr->r;
  53.   if (ra_hasreg(left)) {
  54.     ra_noweak(as, left);
  55.     if (ra_noreg(right))
  56.       right = ra_allocref(as, ir->op2, rset_exclude(allow, left));
  57.     else
  58.       ra_noweak(as, right);
  59.   } else if (ra_hasreg(right)) {
  60.     ra_noweak(as, right);
  61.     left = ra_allocref(as, ir->op1, rset_exclude(allow, right));
  62.   } else if (ra_hashint(right)) {
  63.     right = ra_allocref(as, ir->op2, allow);
  64.     left = ra_alloc1(as, ir->op1, rset_exclude(allow, right));
  65.   } else {
  66.     left = ra_allocref(as, ir->op1, allow);
  67.     right = ra_alloc1(as, ir->op2, rset_exclude(allow, left));
  68.   }
  69.   return left | (right << 8);
  70. }
  71. #endif

  72. /* -- Guard handling ------------------------------------------------------ */

  73. /* Generate an exit stub group at the bottom of the reserved MCode memory. */
  74. static MCode *asm_exitstub_gen(ASMState *as, ExitNo group)
  75. {
  76.   MCode *mxp = as->mcbot;
  77.   int i;
  78.   if (mxp + 4*4+4*EXITSTUBS_PER_GROUP >= as->mctop)
  79.     asm_mclimit(as);
  80.   /* str lr, [sp]; bl ->vm_exit_handler; .long DISPATCH_address, group. */
  81.   *mxp++ = ARMI_STR|ARMI_LS_P|ARMI_LS_U|ARMF_D(RID_LR)|ARMF_N(RID_SP);
  82.   *mxp = ARMI_BL|((((MCode *)(void *)lj_vm_exit_handler-mxp)-2)&0x00ffffffu);
  83.   mxp++;
  84.   *mxp++ = (MCode)i32ptr(J2GG(as->J)->dispatch);  /* DISPATCH address */
  85.   *mxp++ = group*EXITSTUBS_PER_GROUP;
  86.   for (i = 0; i < EXITSTUBS_PER_GROUP; i++)
  87.     *mxp++ = ARMI_B|((-6-i)&0x00ffffffu);
  88.   lj_mcode_sync(as->mcbot, mxp);
  89.   lj_mcode_commitbot(as->J, mxp);
  90.   as->mcbot = mxp;
  91.   as->mclim = as->mcbot + MCLIM_REDZONE;
  92.   return mxp - EXITSTUBS_PER_GROUP;
  93. }

  94. /* Setup all needed exit stubs. */
  95. static void asm_exitstub_setup(ASMState *as, ExitNo nexits)
  96. {
  97.   ExitNo i;
  98.   if (nexits >= EXITSTUBS_PER_GROUP*LJ_MAX_EXITSTUBGR)
  99.     lj_trace_err(as->J, LJ_TRERR_SNAPOV);
  100.   for (i = 0; i < (nexits+EXITSTUBS_PER_GROUP-1)/EXITSTUBS_PER_GROUP; i++)
  101.     if (as->J->exitstubgroup[i] == NULL)
  102.       as->J->exitstubgroup[i] = asm_exitstub_gen(as, i);
  103. }

  104. /* Emit conditional branch to exit for guard. */
  105. static void asm_guardcc(ASMState *as, ARMCC cc)
  106. {
  107.   MCode *target = exitstub_addr(as->J, as->snapno);
  108.   MCode *p = as->mcp;
  109.   if (LJ_UNLIKELY(p == as->invmcp)) {
  110.     as->loopinv = 1;
  111.     *p = ARMI_BL | ((target-p-2) & 0x00ffffffu);
  112.     emit_branch(as, ARMF_CC(ARMI_B, cc^1), p+1);
  113.     return;
  114.   }
  115.   emit_branch(as, ARMF_CC(ARMI_BL, cc), target);
  116. }

  117. /* -- Operand fusion ------------------------------------------------------ */

  118. /* Limit linear search to this distance. Avoids O(n^2) behavior. */
  119. #define CONFLICT_SEARCH_LIM        31

  120. /* Check if there's no conflicting instruction between curins and ref. */
  121. static int noconflict(ASMState *as, IRRef ref, IROp conflict)
  122. {
  123.   IRIns *ir = as->ir;
  124.   IRRef i = as->curins;
  125.   if (i > ref + CONFLICT_SEARCH_LIM)
  126.     return 0/* Give up, ref is too far away. */
  127.   while (--i > ref)
  128.     if (ir[i].o == conflict)
  129.       return 0/* Conflict found. */
  130.   return 1/* Ok, no conflict. */
  131. }

  132. /* Fuse the array base of colocated arrays. */
  133. static int32_t asm_fuseabase(ASMState *as, IRRef ref)
  134. {
  135.   IRIns *ir = IR(ref);
  136.   if (ir->o == IR_TNEW && ir->op1 <= LJ_MAX_COLOSIZE &&
  137.       !neverfuse(as) && noconflict(as, ref, IR_NEWREF))
  138.     return (int32_t)sizeof(GCtab);
  139.   return 0;
  140. }

  141. /* Fuse array/hash/upvalue reference into register+offset operand. */
  142. static Reg asm_fuseahuref(ASMState *as, IRRef ref, int32_t *ofsp, RegSet allow,
  143.                           int lim)
  144. {
  145.   IRIns *ir = IR(ref);
  146.   if (ra_noreg(ir->r)) {
  147.     if (ir->o == IR_AREF) {
  148.       if (mayfuse(as, ref)) {
  149.         if (irref_isk(ir->op2)) {
  150.           IRRef tab = IR(ir->op1)->op1;
  151.           int32_t ofs = asm_fuseabase(as, tab);
  152.           IRRef refa = ofs ? tab : ir->op1;
  153.           ofs += 8*IR(ir->op2)->i;
  154.           if (ofs > -lim && ofs < lim) {
  155.             *ofsp = ofs;
  156.             return ra_alloc1(as, refa, allow);
  157.           }
  158.         }
  159.       }
  160.     } else if (ir->o == IR_HREFK) {
  161.       if (mayfuse(as, ref)) {
  162.         int32_t ofs = (int32_t)(IR(ir->op2)->op2 * sizeof(Node));
  163.         if (ofs < lim) {
  164.           *ofsp = ofs;
  165.           return ra_alloc1(as, ir->op1, allow);
  166.         }
  167.       }
  168.     } else if (ir->o == IR_UREFC) {
  169.       if (irref_isk(ir->op1)) {
  170.         GCfunc *fn = ir_kfunc(IR(ir->op1));
  171.         int32_t ofs = i32ptr(&gcref(fn->l.uvptr[(ir->op2 >> 8)])->uv.tv);
  172.         *ofsp = (ofs & 255);  /* Mask out less bits to allow LDRD. */
  173.         return ra_allock(as, (ofs & ~255), allow);
  174.       }
  175.     }
  176.   }
  177.   *ofsp = 0;
  178.   return ra_alloc1(as, ref, allow);
  179. }

  180. /* Fuse m operand into arithmetic/logic instructions. */
  181. static uint32_t asm_fuseopm(ASMState *as, ARMIns ai, IRRef ref, RegSet allow)
  182. {
  183.   IRIns *ir = IR(ref);
  184.   if (ra_hasreg(ir->r)) {
  185.     ra_noweak(as, ir->r);
  186.     return ARMF_M(ir->r);
  187.   } else if (irref_isk(ref)) {
  188.     uint32_t k = emit_isk12(ai, ir->i);
  189.     if (k)
  190.       return k;
  191.   } else if (mayfuse(as, ref)) {
  192.     if (ir->o >= IR_BSHL && ir->o <= IR_BROR) {
  193.       Reg m = ra_alloc1(as, ir->op1, allow);
  194.       ARMShift sh = ir->o == IR_BSHL ? ARMSH_LSL :
  195.                     ir->o == IR_BSHR ? ARMSH_LSR :
  196.                     ir->o == IR_BSAR ? ARMSH_ASR : ARMSH_ROR;
  197.       if (irref_isk(ir->op2)) {
  198.         return m | ARMF_SH(sh, (IR(ir->op2)->i & 31));
  199.       } else {
  200.         Reg s = ra_alloc1(as, ir->op2, rset_exclude(allow, m));
  201.         return m | ARMF_RSH(sh, s);
  202.       }
  203.     } else if (ir->o == IR_ADD && ir->op1 == ir->op2) {
  204.       Reg m = ra_alloc1(as, ir->op1, allow);
  205.       return m | ARMF_SH(ARMSH_LSL, 1);
  206.     }
  207.   }
  208.   return ra_allocref(as, ref, allow);
  209. }

  210. /* Fuse shifts into loads/stores. Only bother with BSHL 2 => lsl #2. */
  211. static IRRef asm_fuselsl2(ASMState *as, IRRef ref)
  212. {
  213.   IRIns *ir = IR(ref);
  214.   if (ra_noreg(ir->r) && mayfuse(as, ref) && ir->o == IR_BSHL &&
  215.       irref_isk(ir->op2) && IR(ir->op2)->i == 2)
  216.     return ir->op1;
  217.   return 0/* No fusion. */
  218. }

  219. /* Fuse XLOAD/XSTORE reference into load/store operand. */
  220. static void asm_fusexref(ASMState *as, ARMIns ai, Reg rd, IRRef ref,
  221.                          RegSet allow, int32_t ofs)
  222. {
  223.   IRIns *ir = IR(ref);
  224.   Reg base;
  225.   if (ra_noreg(ir->r) && canfuse(as, ir)) {
  226.     int32_t lim = (!LJ_SOFTFP && (ai & 0x08000000)) ? 1024 :
  227.                    (ai & 0x04000000) ? 4096 : 256;
  228.     if (ir->o == IR_ADD) {
  229.       int32_t ofs2;
  230.       if (irref_isk(ir->op2) &&
  231.           (ofs2 = ofs + IR(ir->op2)->i) > -lim && ofs2 < lim &&
  232.           (!(!LJ_SOFTFP && (ai & 0x08000000)) || !(ofs2 & 3))) {
  233.         ofs = ofs2;
  234.         ref = ir->op1;
  235.       } else if (ofs == 0 && !(!LJ_SOFTFP && (ai & 0x08000000))) {
  236.         IRRef lref = ir->op1, rref = ir->op2;
  237.         Reg rn, rm;
  238.         if ((ai & 0x04000000)) {
  239.           IRRef sref = asm_fuselsl2(as, rref);
  240.           if (sref) {
  241.             rref = sref;
  242.             ai |= ARMF_SH(ARMSH_LSL, 2);
  243.           } else if ((sref = asm_fuselsl2(as, lref)) != 0) {
  244.             lref = rref;
  245.             rref = sref;
  246.             ai |= ARMF_SH(ARMSH_LSL, 2);
  247.           }
  248.         }
  249.         rn = ra_alloc1(as, lref, allow);
  250.         rm = ra_alloc1(as, rref, rset_exclude(allow, rn));
  251.         if ((ai & 0x04000000)) ai |= ARMI_LS_R;
  252.         emit_dnm(as, ai|ARMI_LS_P|ARMI_LS_U, rd, rn, rm);
  253.         return;
  254.       }
  255.     } else if (ir->o == IR_STRREF && !(!LJ_SOFTFP && (ai & 0x08000000))) {
  256.       lua_assert(ofs == 0);
  257.       ofs = (int32_t)sizeof(GCstr);
  258.       if (irref_isk(ir->op2)) {
  259.         ofs += IR(ir->op2)->i;
  260.         ref = ir->op1;
  261.       } else if (irref_isk(ir->op1)) {
  262.         ofs += IR(ir->op1)->i;
  263.         ref = ir->op2;
  264.       } else {
  265.         /* NYI: Fuse ADD with constant. */
  266.         Reg rn = ra_alloc1(as, ir->op1, allow);
  267.         uint32_t m = asm_fuseopm(as, 0, ir->op2, rset_exclude(allow, rn));
  268.         if ((ai & 0x04000000))
  269.           emit_lso(as, ai, rd, rd, ofs);
  270.         else
  271.           emit_lsox(as, ai, rd, rd, ofs);
  272.         emit_dn(as, ARMI_ADD^m, rd, rn);
  273.         return;
  274.       }
  275.       if (ofs <= -lim || ofs >= lim) {
  276.         Reg rn = ra_alloc1(as, ref, allow);
  277.         Reg rm = ra_allock(as, ofs, rset_exclude(allow, rn));
  278.         if ((ai & 0x04000000)) ai |= ARMI_LS_R;
  279.         emit_dnm(as, ai|ARMI_LS_P|ARMI_LS_U, rd, rn, rm);
  280.         return;
  281.       }
  282.     }
  283.   }
  284.   base = ra_alloc1(as, ref, allow);
  285. #if !LJ_SOFTFP
  286.   if ((ai & 0x08000000))
  287.     emit_vlso(as, ai, rd, base, ofs);
  288.   else
  289. #endif
  290.   if ((ai & 0x04000000))
  291.     emit_lso(as, ai, rd, base, ofs);
  292.   else
  293.     emit_lsox(as, ai, rd, base, ofs);
  294. }

  295. #if !LJ_SOFTFP
  296. /* Fuse to multiply-add/sub instruction. */
  297. static int asm_fusemadd(ASMState *as, IRIns *ir, ARMIns ai, ARMIns air)
  298. {
  299.   IRRef lref = ir->op1, rref = ir->op2;
  300.   IRIns *irm;
  301.   if (lref != rref &&
  302.       ((mayfuse(as, lref) && (irm = IR(lref), irm->o == IR_MUL) &&
  303.         ra_noreg(irm->r)) ||
  304.        (mayfuse(as, rref) && (irm = IR(rref), irm->o == IR_MUL) &&
  305.         (rref = lref, ai = air, ra_noreg(irm->r))))) {
  306.     Reg dest = ra_dest(as, ir, RSET_FPR);
  307.     Reg add = ra_hintalloc(as, rref, dest, RSET_FPR);
  308.     Reg right, left = ra_alloc2(as, irm,
  309.                         rset_exclude(rset_exclude(RSET_FPR, dest), add));
  310.     right = (left >> 8); left &= 255;
  311.     emit_dnm(as, ai, (dest & 15), (left & 15), (right & 15));
  312.     if (dest != add) emit_dm(as, ARMI_VMOV_D, (dest & 15), (add & 15));
  313.     return 1;
  314.   }
  315.   return 0;
  316. }
  317. #endif

  318. /* -- Calls --------------------------------------------------------------- */

  319. /* Generate a call to a C function. */
  320. static void asm_gencall(ASMState *as, const CCallInfo *ci, IRRef *args)
  321. {
  322.   uint32_t n, nargs = CCI_XNARGS(ci);
  323.   int32_t ofs = 0;
  324. #if LJ_SOFTFP
  325.   Reg gpr = REGARG_FIRSTGPR;
  326. #else
  327.   Reg gpr, fpr = REGARG_FIRSTFPR, fprodd = 0;
  328. #endif
  329.   if ((void *)ci->func)
  330.     emit_call(as, (void *)ci->func);
  331. #if !LJ_SOFTFP
  332.   for (gpr = REGARG_FIRSTGPR; gpr <= REGARG_LASTGPR; gpr++)
  333.     as->cost[gpr] = REGCOST(~0u, ASMREF_L);
  334.   gpr = REGARG_FIRSTGPR;
  335. #endif
  336.   for (n = 0; n < nargs; n++) {  /* Setup args. */
  337.     IRRef ref = args[n];
  338.     IRIns *ir = IR(ref);
  339. #if !LJ_SOFTFP
  340.     if (ref && irt_isfp(ir->t)) {
  341.       RegSet of = as->freeset;
  342.       Reg src;
  343.       if (!LJ_ABI_SOFTFP && !(ci->flags & CCI_VARARG)) {
  344.         if (irt_isnum(ir->t)) {
  345.           if (fpr <= REGARG_LASTFPR) {
  346.             ra_leftov(as, fpr, ref);
  347.             fpr++;
  348.             continue;
  349.           }
  350.         } else if (fprodd) {  /* Ick. */
  351.           src = ra_alloc1(as, ref, RSET_FPR);
  352.           emit_dm(as, ARMI_VMOV_S, (fprodd & 15), (src & 15) | 0x00400000);
  353.           fprodd = 0;
  354.           continue;
  355.         } else if (fpr <= REGARG_LASTFPR) {
  356.           ra_leftov(as, fpr, ref);
  357.           fprodd = fpr++;
  358.           continue;
  359.         }
  360.         /* Workaround to protect argument GPRs from being used for remat. */
  361.         as->freeset &= ~RSET_RANGE(REGARG_FIRSTGPR, REGARG_LASTGPR+1);
  362.         src = ra_alloc1(as, ref, RSET_FPR);  /* May alloc GPR to remat FPR. */
  363.         as->freeset |= (of & RSET_RANGE(REGARG_FIRSTGPR, REGARG_LASTGPR+1));
  364.         fprodd = 0;
  365.         goto stackfp;
  366.       }
  367.       /* Workaround to protect argument GPRs from being used for remat. */
  368.       as->freeset &= ~RSET_RANGE(REGARG_FIRSTGPR, REGARG_LASTGPR+1);
  369.       src = ra_alloc1(as, ref, RSET_FPR);  /* May alloc GPR to remat FPR. */
  370.       as->freeset |= (of & RSET_RANGE(REGARG_FIRSTGPR, REGARG_LASTGPR+1));
  371.       if (irt_isnum(ir->t)) gpr = (gpr+1) & ~1u;
  372.       if (gpr <= REGARG_LASTGPR) {
  373.         lua_assert(rset_test(as->freeset, gpr));  /* Must have been evicted. */
  374.         if (irt_isnum(ir->t)) {
  375.           lua_assert(rset_test(as->freeset, gpr+1));  /* Ditto. */
  376.           emit_dnm(as, ARMI_VMOV_RR_D, gpr, gpr+1, (src & 15));
  377.           gpr += 2;
  378.         } else {
  379.           emit_dn(as, ARMI_VMOV_R_S, gpr, (src & 15));
  380.           gpr++;
  381.         }
  382.       } else {
  383.       stackfp:
  384.         if (irt_isnum(ir->t)) ofs = (ofs + 4) & ~4;
  385.         emit_spstore(as, ir, src, ofs);
  386.         ofs += irt_isnum(ir->t) ? 8 : 4;
  387.       }
  388.     } else
  389. #endif
  390.     {
  391.       if (gpr <= REGARG_LASTGPR) {
  392.         lua_assert(rset_test(as->freeset, gpr));  /* Must have been evicted. */
  393.         if (ref) ra_leftov(as, gpr, ref);
  394.         gpr++;
  395.       } else {
  396.         if (ref) {
  397.           Reg r = ra_alloc1(as, ref, RSET_GPR);
  398.           emit_spstore(as, ir, r, ofs);
  399.         }
  400.         ofs += 4;
  401.       }
  402.     }
  403.   }
  404. }

  405. /* Setup result reg/sp for call. Evict scratch regs. */
  406. static void asm_setupresult(ASMState *as, IRIns *ir, const CCallInfo *ci)
  407. {
  408.   RegSet drop = RSET_SCRATCH;
  409.   int hiop = ((ir+1)->o == IR_HIOP);
  410.   if (ra_hasreg(ir->r))
  411.     rset_clear(drop, ir->r);  /* Dest reg handled below. */
  412.   if (hiop && ra_hasreg((ir+1)->r))
  413.     rset_clear(drop, (ir+1)->r);  /* Dest reg handled below. */
  414.   ra_evictset(as, drop);  /* Evictions must be performed first. */
  415.   if (ra_used(ir)) {
  416.     lua_assert(!irt_ispri(ir->t));
  417.     if (!LJ_SOFTFP && irt_isfp(ir->t)) {
  418.       if (LJ_ABI_SOFTFP || (ci->flags & (CCI_CASTU64|CCI_VARARG))) {
  419.         Reg dest = (ra_dest(as, ir, RSET_FPR) & 15);
  420.         if (irt_isnum(ir->t))
  421.           emit_dnm(as, ARMI_VMOV_D_RR, RID_RETLO, RID_RETHI, dest);
  422.         else
  423.           emit_dn(as, ARMI_VMOV_S_R, RID_RET, dest);
  424.       } else {
  425.         ra_destreg(as, ir, RID_FPRET);
  426.       }
  427.     } else if (hiop) {
  428.       ra_destpair(as, ir);
  429.     } else {
  430.       ra_destreg(as, ir, RID_RET);
  431.     }
  432.   }
  433.   UNUSED(ci);
  434. }

  435. static void asm_callx(ASMState *as, IRIns *ir)
  436. {
  437.   IRRef args[CCI_NARGS_MAX*2];
  438.   CCallInfo ci;
  439.   IRRef func;
  440.   IRIns *irf;
  441.   ci.flags = asm_callx_flags(as, ir);
  442.   asm_collectargs(as, ir, &ci, args);
  443.   asm_setupresult(as, ir, &ci);
  444.   func = ir->op2; irf = IR(func);
  445.   if (irf->o == IR_CARG) { func = irf->op1; irf = IR(func); }
  446.   if (irref_isk(func)) {  /* Call to constant address. */
  447.     ci.func = (ASMFunction)(void *)(irf->i);
  448.   } else/* Need a non-argument register for indirect calls. */
  449.     Reg freg = ra_alloc1(as, func, RSET_RANGE(RID_R4, RID_R12+1));
  450.     emit_m(as, ARMI_BLXr, freg);
  451.     ci.func = (ASMFunction)(void *)0;
  452.   }
  453.   asm_gencall(as, &ci, args);
  454. }

  455. /* -- Returns ------------------------------------------------------------- */

  456. /* Return to lower frame. Guard that it goes to the right spot. */
  457. static void asm_retf(ASMState *as, IRIns *ir)
  458. {
  459.   Reg base = ra_alloc1(as, REF_BASE, RSET_GPR);
  460.   void *pc = ir_kptr(IR(ir->op2));
  461.   int32_t delta = 1+LJ_FR2+bc_a(*((const BCIns *)pc - 1));
  462.   as->topslot -= (BCReg)delta;
  463.   if ((int32_t)as->topslot < 0) as->topslot = 0;
  464.   irt_setmark(IR(REF_BASE)->t);  /* Children must not coalesce with BASE reg. */
  465.   /* Need to force a spill on REF_BASE now to update the stack slot. */
  466.   emit_lso(as, ARMI_STR, base, RID_SP, ra_spill(as, IR(REF_BASE)));
  467.   emit_setgl(as, base, jit_base);
  468.   emit_addptr(as, base, -8*delta);
  469.   asm_guardcc(as, CC_NE);
  470.   emit_nm(as, ARMI_CMP, RID_TMP,
  471.           ra_allock(as, i32ptr(pc), rset_exclude(RSET_GPR, base)));
  472.   emit_lso(as, ARMI_LDR, RID_TMP, base, -4);
  473. }

  474. /* -- Type conversions ---------------------------------------------------- */

  475. #if !LJ_SOFTFP
  476. static void asm_tointg(ASMState *as, IRIns *ir, Reg left)
  477. {
  478.   Reg tmp = ra_scratch(as, rset_exclude(RSET_FPR, left));
  479.   Reg dest = ra_dest(as, ir, RSET_GPR);
  480.   asm_guardcc(as, CC_NE);
  481.   emit_d(as, ARMI_VMRS, 0);
  482.   emit_dm(as, ARMI_VCMP_D, (tmp & 15), (left & 15));
  483.   emit_dm(as, ARMI_VCVT_F64_S32, (tmp & 15), (tmp & 15));
  484.   emit_dn(as, ARMI_VMOV_R_S, dest, (tmp & 15));
  485.   emit_dm(as, ARMI_VCVT_S32_F64, (tmp & 15), (left & 15));
  486. }

  487. static void asm_tobit(ASMState *as, IRIns *ir)
  488. {
  489.   RegSet allow = RSET_FPR;
  490.   Reg left = ra_alloc1(as, ir->op1, allow);
  491.   Reg right = ra_alloc1(as, ir->op2, rset_clear(allow, left));
  492.   Reg tmp = ra_scratch(as, rset_clear(allow, right));
  493.   Reg dest = ra_dest(as, ir, RSET_GPR);
  494.   emit_dn(as, ARMI_VMOV_R_S, dest, (tmp & 15));
  495.   emit_dnm(as, ARMI_VADD_D, (tmp & 15), (left & 15), (right & 15));
  496. }
  497. #else
  498. #define asm_tobit(as, ir)        lua_assert(0)
  499. #endif

  500. static void asm_conv(ASMState *as, IRIns *ir)
  501. {
  502.   IRType st = (IRType)(ir->op2 & IRCONV_SRCMASK);
  503. #if !LJ_SOFTFP
  504.   int stfp = (st == IRT_NUM || st == IRT_FLOAT);
  505. #endif
  506.   IRRef lref = ir->op1;
  507.   /* 64 bit integer conversions are handled by SPLIT. */
  508.   lua_assert(!irt_isint64(ir->t) && !(st == IRT_I64 || st == IRT_U64));
  509. #if LJ_SOFTFP
  510.   /* FP conversions are handled by SPLIT. */
  511.   lua_assert(!irt_isfp(ir->t) && !(st == IRT_NUM || st == IRT_FLOAT));
  512.   /* Can't check for same types: SPLIT uses CONV int.int + BXOR for sfp NEG. */
  513. #else
  514.   lua_assert(irt_type(ir->t) != st);
  515.   if (irt_isfp(ir->t)) {
  516.     Reg dest = ra_dest(as, ir, RSET_FPR);
  517.     if (stfp) {  /* FP to FP conversion. */
  518.       emit_dm(as, st == IRT_NUM ? ARMI_VCVT_F32_F64 : ARMI_VCVT_F64_F32,
  519.               (dest & 15), (ra_alloc1(as, lref, RSET_FPR) & 15));
  520.     } else/* Integer to FP conversion. */
  521.       Reg left = ra_alloc1(as, lref, RSET_GPR);
  522.       ARMIns ai = irt_isfloat(ir->t) ?
  523.         (st == IRT_INT ? ARMI_VCVT_F32_S32 : ARMI_VCVT_F32_U32) :
  524.         (st == IRT_INT ? ARMI_VCVT_F64_S32 : ARMI_VCVT_F64_U32);
  525.       emit_dm(as, ai, (dest & 15), (dest & 15));
  526.       emit_dn(as, ARMI_VMOV_S_R, left, (dest & 15));
  527.     }
  528.   } else if (stfp) {  /* FP to integer conversion. */
  529.     if (irt_isguard(ir->t)) {
  530.       /* Checked conversions are only supported from number to int. */
  531.       lua_assert(irt_isint(ir->t) && st == IRT_NUM);
  532.       asm_tointg(as, ir, ra_alloc1(as, lref, RSET_FPR));
  533.     } else {
  534.       Reg left = ra_alloc1(as, lref, RSET_FPR);
  535.       Reg tmp = ra_scratch(as, rset_exclude(RSET_FPR, left));
  536.       Reg dest = ra_dest(as, ir, RSET_GPR);
  537.       ARMIns ai;
  538.       emit_dn(as, ARMI_VMOV_R_S, dest, (tmp & 15));
  539.       ai = irt_isint(ir->t) ?
  540.         (st == IRT_NUM ? ARMI_VCVT_S32_F64 : ARMI_VCVT_S32_F32) :
  541.         (st == IRT_NUM ? ARMI_VCVT_U32_F64 : ARMI_VCVT_U32_F32);
  542.       emit_dm(as, ai, (tmp & 15), (left & 15));
  543.     }
  544.   } else
  545. #endif
  546.   {
  547.     Reg dest = ra_dest(as, ir, RSET_GPR);
  548.     if (st >= IRT_I8 && st <= IRT_U16) {  /* Extend to 32 bit integer. */
  549.       Reg left = ra_alloc1(as, lref, RSET_GPR);
  550.       lua_assert(irt_isint(ir->t) || irt_isu32(ir->t));
  551.       if ((as->flags & JIT_F_ARMV6)) {
  552.         ARMIns ai = st == IRT_I8 ? ARMI_SXTB :
  553.                     st == IRT_U8 ? ARMI_UXTB :
  554.                     st == IRT_I16 ? ARMI_SXTH : ARMI_UXTH;
  555.         emit_dm(as, ai, dest, left);
  556.       } else if (st == IRT_U8) {
  557.         emit_dn(as, ARMI_AND|ARMI_K12|255, dest, left);
  558.       } else {
  559.         uint32_t shift = st == IRT_I8 ? 24 : 16;
  560.         ARMShift sh = st == IRT_U16 ? ARMSH_LSR : ARMSH_ASR;
  561.         emit_dm(as, ARMI_MOV|ARMF_SH(sh, shift), dest, RID_TMP);
  562.         emit_dm(as, ARMI_MOV|ARMF_SH(ARMSH_LSL, shift), RID_TMP, left);
  563.       }
  564.     } else/* Handle 32/32 bit no-op (cast). */
  565.       ra_leftov(as, dest, lref);  /* Do nothing, but may need to move regs. */
  566.     }
  567.   }
  568. }

  569. static void asm_strto(ASMState *as, IRIns *ir)
  570. {
  571.   const CCallInfo *ci = &lj_ir_callinfo[IRCALL_lj_strscan_num];
  572.   IRRef args[2];
  573.   Reg rlo = 0, rhi = 0, tmp;
  574.   int destused = ra_used(ir);
  575.   int32_t ofs = 0;
  576.   ra_evictset(as, RSET_SCRATCH);
  577. #if LJ_SOFTFP
  578.   if (destused) {
  579.     if (ra_hasspill(ir->s) && ra_hasspill((ir+1)->s) &&
  580.         (ir->s & 1) == 0 && ir->s + 1 == (ir+1)->s) {
  581.       int i;
  582.       for (i = 0; i < 2; i++) {
  583.         Reg r = (ir+i)->r;
  584.         if (ra_hasreg(r)) {
  585.           ra_free(as, r);
  586.           ra_modified(as, r);
  587.           emit_spload(as, ir+i, r, sps_scale((ir+i)->s));
  588.         }
  589.       }
  590.       ofs = sps_scale(ir->s);
  591.       destused = 0;
  592.     } else {
  593.       rhi = ra_dest(as, ir+1, RSET_GPR);
  594.       rlo = ra_dest(as, ir, rset_exclude(RSET_GPR, rhi));
  595.     }
  596.   }
  597.   asm_guardcc(as, CC_EQ);
  598.   if (destused) {
  599.     emit_lso(as, ARMI_LDR, rhi, RID_SP, 4);
  600.     emit_lso(as, ARMI_LDR, rlo, RID_SP, 0);
  601.   }
  602. #else
  603.   UNUSED(rhi);
  604.   if (destused) {
  605.     if (ra_hasspill(ir->s)) {
  606.       ofs = sps_scale(ir->s);
  607.       destused = 0;
  608.       if (ra_hasreg(ir->r)) {
  609.         ra_free(as, ir->r);
  610.         ra_modified(as, ir->r);
  611.         emit_spload(as, ir, ir->r, ofs);
  612.       }
  613.     } else {
  614.       rlo = ra_dest(as, ir, RSET_FPR);
  615.     }
  616.   }
  617.   asm_guardcc(as, CC_EQ);
  618.   if (destused)
  619.     emit_vlso(as, ARMI_VLDR_D, rlo, RID_SP, 0);
  620. #endif
  621.   emit_n(as, ARMI_CMP|ARMI_K12|0, RID_RET);  /* Test return status. */
  622.   args[0] = ir->op1;      /* GCstr *str */
  623.   args[1] = ASMREF_TMP1/* TValue *n  */
  624.   asm_gencall(as, ci, args);
  625.   tmp = ra_releasetmp(as, ASMREF_TMP1);
  626.   if (ofs == 0)
  627.     emit_dm(as, ARMI_MOV, tmp, RID_SP);
  628.   else
  629.     emit_opk(as, ARMI_ADD, tmp, RID_SP, ofs, RSET_GPR);
  630. }

  631. /* -- Memory references --------------------------------------------------- */

  632. /* Get pointer to TValue. */
  633. static void asm_tvptr(ASMState *as, Reg dest, IRRef ref)
  634. {
  635.   IRIns *ir = IR(ref);
  636.   if (irt_isnum(ir->t)) {
  637.     if (irref_isk(ref)) {
  638.       /* Use the number constant itself as a TValue. */
  639.       ra_allockreg(as, i32ptr(ir_knum(ir)), dest);
  640.     } else {
  641. #if LJ_SOFTFP
  642.       lua_assert(0);
  643. #else
  644.       /* Otherwise force a spill and use the spill slot. */
  645.       emit_opk(as, ARMI_ADD, dest, RID_SP, ra_spill(as, ir), RSET_GPR);
  646. #endif
  647.     }
  648.   } else {
  649.     /* Otherwise use [sp] and [sp+4] to hold the TValue. */
  650.     RegSet allow = rset_exclude(RSET_GPR, dest);
  651.     Reg type;
  652.     emit_dm(as, ARMI_MOV, dest, RID_SP);
  653.     if (!irt_ispri(ir->t)) {
  654.       Reg src = ra_alloc1(as, ref, allow);
  655.       emit_lso(as, ARMI_STR, src, RID_SP, 0);
  656.     }
  657.     if (LJ_SOFTFP && (ir+1)->o == IR_HIOP)
  658.       type = ra_alloc1(as, ref+1, allow);
  659.     else
  660.       type = ra_allock(as, irt_toitype(ir->t), allow);
  661.     emit_lso(as, ARMI_STR, type, RID_SP, 4);
  662.   }
  663. }

  664. static void asm_aref(ASMState *as, IRIns *ir)
  665. {
  666.   Reg dest = ra_dest(as, ir, RSET_GPR);
  667.   Reg idx, base;
  668.   if (irref_isk(ir->op2)) {
  669.     IRRef tab = IR(ir->op1)->op1;
  670.     int32_t ofs = asm_fuseabase(as, tab);
  671.     IRRef refa = ofs ? tab : ir->op1;
  672.     uint32_t k = emit_isk12(ARMI_ADD, ofs + 8*IR(ir->op2)->i);
  673.     if (k) {
  674.       base = ra_alloc1(as, refa, RSET_GPR);
  675.       emit_dn(as, ARMI_ADD^k, dest, base);
  676.       return;
  677.     }
  678.   }
  679.   base = ra_alloc1(as, ir->op1, RSET_GPR);
  680.   idx = ra_alloc1(as, ir->op2, rset_exclude(RSET_GPR, base));
  681.   emit_dnm(as, ARMI_ADD|ARMF_SH(ARMSH_LSL, 3), dest, base, idx);
  682. }

  683. /* Inlined hash lookup. Specialized for key type and for const keys.
  684. ** The equivalent C code is:
  685. **   Node *n = hashkey(t, key);
  686. **   do {
  687. **     if (lj_obj_equal(&n->key, key)) return &n->val;
  688. **   } while ((n = nextnode(n)));
  689. **   return niltv(L);
  690. */
  691. static void asm_href(ASMState *as, IRIns *ir, IROp merge)
  692. {
  693.   RegSet allow = RSET_GPR;
  694.   int destused = ra_used(ir);
  695.   Reg dest = ra_dest(as, ir, allow);
  696.   Reg tab = ra_alloc1(as, ir->op1, rset_clear(allow, dest));
  697.   Reg key = 0, keyhi = 0, keynumhi = RID_NONE, tmp = RID_TMP;
  698.   IRRef refkey = ir->op2;
  699.   IRIns *irkey = IR(refkey);
  700.   IRType1 kt = irkey->t;
  701.   int32_t k = 0, khi = emit_isk12(ARMI_CMP, irt_toitype(kt));
  702.   uint32_t khash;
  703.   MCLabel l_end, l_loop;
  704.   rset_clear(allow, tab);
  705.   if (!irref_isk(refkey) || irt_isstr(kt)) {
  706. #if LJ_SOFTFP
  707.     key = ra_alloc1(as, refkey, allow);
  708.     rset_clear(allow, key);
  709.     if (irkey[1].o == IR_HIOP) {
  710.       if (ra_hasreg((irkey+1)->r)) {
  711.         keynumhi = (irkey+1)->r;
  712.         keyhi = RID_TMP;
  713.         ra_noweak(as, keynumhi);
  714.       } else {
  715.         keyhi = keynumhi = ra_allocref(as, refkey+1, allow);
  716.       }
  717.       rset_clear(allow, keynumhi);
  718.       khi = 0;
  719.     }
  720. #else
  721.     if (irt_isnum(kt)) {
  722.       key = ra_scratch(as, allow);
  723.       rset_clear(allow, key);
  724.       keyhi = keynumhi = ra_scratch(as, allow);
  725.       rset_clear(allow, keyhi);
  726.       khi = 0;
  727.     } else {
  728.       key = ra_alloc1(as, refkey, allow);
  729.       rset_clear(allow, key);
  730.     }
  731. #endif
  732.   } else if (irt_isnum(kt)) {
  733.     int32_t val = (int32_t)ir_knum(irkey)->u32.lo;
  734.     k = emit_isk12(ARMI_CMP, val);
  735.     if (!k) {
  736.       key = ra_allock(as, val, allow);
  737.       rset_clear(allow, key);
  738.     }
  739.     val = (int32_t)ir_knum(irkey)->u32.hi;
  740.     khi = emit_isk12(ARMI_CMP, val);
  741.     if (!khi) {
  742.       keyhi = ra_allock(as, val, allow);
  743.       rset_clear(allow, keyhi);
  744.     }
  745.   } else if (!irt_ispri(kt)) {
  746.     k = emit_isk12(ARMI_CMP, irkey->i);
  747.     if (!k) {
  748.       key = ra_alloc1(as, refkey, allow);
  749.       rset_clear(allow, key);
  750.     }
  751.   }
  752.   if (!irt_ispri(kt))
  753.     tmp = ra_scratchpair(as, allow);

  754.   /* Key not found in chain: jump to exit (if merged) or load niltv. */
  755.   l_end = emit_label(as);
  756.   as->invmcp = NULL;
  757.   if (merge == IR_NE)
  758.     asm_guardcc(as, CC_AL);
  759.   else if (destused)
  760.     emit_loada(as, dest, niltvg(J2G(as->J)));

  761.   /* Follow hash chain until the end. */
  762.   l_loop = --as->mcp;
  763.   emit_n(as, ARMI_CMP|ARMI_K12|0, dest);
  764.   emit_lso(as, ARMI_LDR, dest, dest, (int32_t)offsetof(Node, next));

  765.   /* Type and value comparison. */
  766.   if (merge == IR_EQ)
  767.     asm_guardcc(as, CC_EQ);
  768.   else
  769.     emit_branch(as, ARMF_CC(ARMI_B, CC_EQ), l_end);
  770.   if (!irt_ispri(kt)) {
  771.     emit_nm(as, ARMF_CC(ARMI_CMP, CC_EQ)^k, tmp, key);
  772.     emit_nm(as, ARMI_CMP^khi, tmp+1, keyhi);
  773.     emit_lsox(as, ARMI_LDRD, tmp, dest, (int32_t)offsetof(Node, key));
  774.   } else {
  775.     emit_n(as, ARMI_CMP^khi, tmp);
  776.     emit_lso(as, ARMI_LDR, tmp, dest, (int32_t)offsetof(Node, key.it));
  777.   }
  778.   *l_loop = ARMF_CC(ARMI_B, CC_NE) | ((as->mcp-l_loop-2) & 0x00ffffffu);

  779.   /* Load main position relative to tab->node into dest. */
  780.   khash = irref_isk(refkey) ? ir_khash(irkey) : 1;
  781.   if (khash == 0) {
  782.     emit_lso(as, ARMI_LDR, dest, tab, (int32_t)offsetof(GCtab, node));
  783.   } else {
  784.     emit_dnm(as, ARMI_ADD|ARMF_SH(ARMSH_LSL, 3), dest, dest, tmp);
  785.     emit_dnm(as, ARMI_ADD|ARMF_SH(ARMSH_LSL, 1), tmp, tmp, tmp);
  786.     if (irt_isstr(kt)) {  /* Fetch of str->hash is cheaper than ra_allock. */
  787.       emit_dnm(as, ARMI_AND, tmp, tmp+1, RID_TMP);
  788.       emit_lso(as, ARMI_LDR, dest, tab, (int32_t)offsetof(GCtab, node));
  789.       emit_lso(as, ARMI_LDR, tmp+1, key, (int32_t)offsetof(GCstr, hash));
  790.       emit_lso(as, ARMI_LDR, RID_TMP, tab, (int32_t)offsetof(GCtab, hmask));
  791.     } else if (irref_isk(refkey)) {
  792.       emit_opk(as, ARMI_AND, tmp, RID_TMP, (int32_t)khash,
  793.                rset_exclude(rset_exclude(RSET_GPR, tab), dest));
  794.       emit_lso(as, ARMI_LDR, dest, tab, (int32_t)offsetof(GCtab, node));
  795.       emit_lso(as, ARMI_LDR, RID_TMP, tab, (int32_t)offsetof(GCtab, hmask));
  796.     } else/* Must match with hash*() in lj_tab.c. */
  797.       if (ra_hasreg(keynumhi)) {  /* Canonicalize +-0.0 to 0.0. */
  798.         if (keyhi == RID_TMP)
  799.           emit_dm(as, ARMF_CC(ARMI_MOV, CC_NE), keyhi, keynumhi);
  800.         emit_d(as, ARMF_CC(ARMI_MOV, CC_EQ)|ARMI_K12|0, keyhi);
  801.       }
  802.       emit_dnm(as, ARMI_AND, tmp, tmp, RID_TMP);
  803.       emit_dnm(as, ARMI_SUB|ARMF_SH(ARMSH_ROR, 32-HASH_ROT3), tmp, tmp, tmp+1);
  804.       emit_lso(as, ARMI_LDR, dest, tab, (int32_t)offsetof(GCtab, node));
  805.       emit_dnm(as, ARMI_EOR|ARMF_SH(ARMSH_ROR, 32-((HASH_ROT2+HASH_ROT1)&31)),
  806.                tmp, tmp+1, tmp);
  807.       emit_lso(as, ARMI_LDR, RID_TMP, tab, (int32_t)offsetof(GCtab, hmask));
  808.       emit_dnm(as, ARMI_SUB|ARMF_SH(ARMSH_ROR, 32-HASH_ROT1), tmp+1, tmp+1, tmp);
  809.       if (ra_hasreg(keynumhi)) {
  810.         emit_dnm(as, ARMI_EOR, tmp+1, tmp, key);
  811.         emit_dnm(as, ARMI_ORR|ARMI_S, RID_TMP, tmp, key);  /* Test for +-0.0. */
  812.         emit_dnm(as, ARMI_ADD, tmp, keynumhi, keynumhi);
  813. #if !LJ_SOFTFP
  814.         emit_dnm(as, ARMI_VMOV_RR_D, key, keynumhi,
  815.                  (ra_alloc1(as, refkey, RSET_FPR) & 15));
  816. #endif
  817.       } else {
  818.         emit_dnm(as, ARMI_EOR, tmp+1, tmp, key);
  819.         emit_opk(as, ARMI_ADD, tmp, key, (int32_t)HASH_BIAS,
  820.                  rset_exclude(rset_exclude(RSET_GPR, tab), key));
  821.       }
  822.     }
  823.   }
  824. }

  825. static void asm_hrefk(ASMState *as, IRIns *ir)
  826. {
  827.   IRIns *kslot = IR(ir->op2);
  828.   IRIns *irkey = IR(kslot->op1);
  829.   int32_t ofs = (int32_t)(kslot->op2 * sizeof(Node));
  830.   int32_t kofs = ofs + (int32_t)offsetof(Node, key);
  831.   Reg dest = (ra_used(ir) || ofs > 4095) ? ra_dest(as, ir, RSET_GPR) : RID_NONE;
  832.   Reg node = ra_alloc1(as, ir->op1, RSET_GPR);
  833.   Reg key = RID_NONE, type = RID_TMP, idx = node;
  834.   RegSet allow = rset_exclude(RSET_GPR, node);
  835.   lua_assert(ofs % sizeof(Node) == 0);
  836.   if (ofs > 4095) {
  837.     idx = dest;
  838.     rset_clear(allow, dest);
  839.     kofs = (int32_t)offsetof(Node, key);
  840.   } else if (ra_hasreg(dest)) {
  841.     emit_opk(as, ARMI_ADD, dest, node, ofs, allow);
  842.   }
  843.   asm_guardcc(as, CC_NE);
  844.   if (!irt_ispri(irkey->t)) {
  845.     RegSet even = (as->freeset & allow);
  846.     even = even & (even >> 1) & RSET_GPREVEN;
  847.     if (even) {
  848.       key = ra_scratch(as, even);
  849.       if (rset_test(as->freeset, key+1)) {
  850.         type = key+1;
  851.         ra_modified(as, type);
  852.       }
  853.     } else {
  854.       key = ra_scratch(as, allow);
  855.     }
  856.     rset_clear(allow, key);
  857.   }
  858.   rset_clear(allow, type);
  859.   if (irt_isnum(irkey->t)) {
  860.     emit_opk(as, ARMF_CC(ARMI_CMP, CC_EQ), 0, type,
  861.              (int32_t)ir_knum(irkey)->u32.hi, allow);
  862.     emit_opk(as, ARMI_CMP, 0, key,
  863.              (int32_t)ir_knum(irkey)->u32.lo, allow);
  864.   } else {
  865.     if (ra_hasreg(key))
  866.       emit_opk(as, ARMF_CC(ARMI_CMP, CC_EQ), 0, key, irkey->i, allow);
  867.     emit_n(as, ARMI_CMN|ARMI_K12|-irt_toitype(irkey->t), type);
  868.   }
  869.   emit_lso(as, ARMI_LDR, type, idx, kofs+4);
  870.   if (ra_hasreg(key)) emit_lso(as, ARMI_LDR, key, idx, kofs);
  871.   if (ofs > 4095)
  872.     emit_opk(as, ARMI_ADD, dest, node, ofs, RSET_GPR);
  873. }

  874. static void asm_uref(ASMState *as, IRIns *ir)
  875. {
  876.   /* NYI: Check that UREFO is still open and not aliasing a slot. */
  877.   Reg dest = ra_dest(as, ir, RSET_GPR);
  878.   if (irref_isk(ir->op1)) {
  879.     GCfunc *fn = ir_kfunc(IR(ir->op1));
  880.     MRef *v = &gcref(fn->l.uvptr[(ir->op2 >> 8)])->uv.v;
  881.     emit_lsptr(as, ARMI_LDR, dest, v);
  882.   } else {
  883.     Reg uv = ra_scratch(as, RSET_GPR);
  884.     Reg func = ra_alloc1(as, ir->op1, RSET_GPR);
  885.     if (ir->o == IR_UREFC) {
  886.       asm_guardcc(as, CC_NE);
  887.       emit_n(as, ARMI_CMP|ARMI_K12|1, RID_TMP);
  888.       emit_opk(as, ARMI_ADD, dest, uv,
  889.                (int32_t)offsetof(GCupval, tv), RSET_GPR);
  890.       emit_lso(as, ARMI_LDRB, RID_TMP, uv, (int32_t)offsetof(GCupval, closed));
  891.     } else {
  892.       emit_lso(as, ARMI_LDR, dest, uv, (int32_t)offsetof(GCupval, v));
  893.     }
  894.     emit_lso(as, ARMI_LDR, uv, func,
  895.              (int32_t)offsetof(GCfuncL, uvptr) + 4*(int32_t)(ir->op2 >> 8));
  896.   }
  897. }

  898. static void asm_fref(ASMState *as, IRIns *ir)
  899. {
  900.   UNUSED(as); UNUSED(ir);
  901.   lua_assert(!ra_used(ir));
  902. }

  903. static void asm_strref(ASMState *as, IRIns *ir)
  904. {
  905.   Reg dest = ra_dest(as, ir, RSET_GPR);
  906.   IRRef ref = ir->op2, refk = ir->op1;
  907.   Reg r;
  908.   if (irref_isk(ref)) {
  909.     IRRef tmp = refk; refk = ref; ref = tmp;
  910.   } else if (!irref_isk(refk)) {
  911.     uint32_t k, m = ARMI_K12|sizeof(GCstr);
  912.     Reg right, left = ra_alloc1(as, ir->op1, RSET_GPR);
  913.     IRIns *irr = IR(ir->op2);
  914.     if (ra_hasreg(irr->r)) {
  915.       ra_noweak(as, irr->r);
  916.       right = irr->r;
  917.     } else if (mayfuse(as, irr->op2) &&
  918.                irr->o == IR_ADD && irref_isk(irr->op2) &&
  919.                (k = emit_isk12(ARMI_ADD,
  920.                                (int32_t)sizeof(GCstr) + IR(irr->op2)->i))) {
  921.       m = k;
  922.       right = ra_alloc1(as, irr->op1, rset_exclude(RSET_GPR, left));
  923.     } else {
  924.       right = ra_allocref(as, ir->op2, rset_exclude(RSET_GPR, left));
  925.     }
  926.     emit_dn(as, ARMI_ADD^m, dest, dest);
  927.     emit_dnm(as, ARMI_ADD, dest, left, right);
  928.     return;
  929.   }
  930.   r = ra_alloc1(as, ref, RSET_GPR);
  931.   emit_opk(as, ARMI_ADD, dest, r,
  932.            sizeof(GCstr) + IR(refk)->i, rset_exclude(RSET_GPR, r));
  933. }

  934. /* -- Loads and stores ---------------------------------------------------- */

  935. static ARMIns asm_fxloadins(IRIns *ir)
  936. {
  937.   switch (irt_type(ir->t)) {
  938.   case IRT_I8: return ARMI_LDRSB;
  939.   case IRT_U8: return ARMI_LDRB;
  940.   case IRT_I16: return ARMI_LDRSH;
  941.   case IRT_U16: return ARMI_LDRH;
  942.   case IRT_NUM: lua_assert(!LJ_SOFTFP); return ARMI_VLDR_D;
  943.   case IRT_FLOAT: if (!LJ_SOFTFP) return ARMI_VLDR_S;
  944.   default: return ARMI_LDR;
  945.   }
  946. }

  947. static ARMIns asm_fxstoreins(IRIns *ir)
  948. {
  949.   switch (irt_type(ir->t)) {
  950.   case IRT_I8: case IRT_U8: return ARMI_STRB;
  951.   case IRT_I16: case IRT_U16: return ARMI_STRH;
  952.   case IRT_NUM: lua_assert(!LJ_SOFTFP); return ARMI_VSTR_D;
  953.   case IRT_FLOAT: if (!LJ_SOFTFP) return ARMI_VSTR_S;
  954.   default: return ARMI_STR;
  955.   }
  956. }

  957. static void asm_fload(ASMState *as, IRIns *ir)
  958. {
  959.   Reg dest = ra_dest(as, ir, RSET_GPR);
  960.   Reg idx = ra_alloc1(as, ir->op1, RSET_GPR);
  961.   ARMIns ai = asm_fxloadins(ir);
  962.   int32_t ofs;
  963.   if (ir->op2 == IRFL_TAB_ARRAY) {
  964.     ofs = asm_fuseabase(as, ir->op1);
  965.     if (ofs) {  /* Turn the t->array load into an add for colocated arrays. */
  966.       emit_dn(as, ARMI_ADD|ARMI_K12|ofs, dest, idx);
  967.       return;
  968.     }
  969.   }
  970.   ofs = field_ofs[ir->op2];
  971.   if ((ai & 0x04000000))
  972.     emit_lso(as, ai, dest, idx, ofs);
  973.   else
  974.     emit_lsox(as, ai, dest, idx, ofs);
  975. }

  976. static void asm_fstore(ASMState *as, IRIns *ir)
  977. {
  978.   if (ir->r != RID_SINK) {
  979.     Reg src = ra_alloc1(as, ir->op2, RSET_GPR);
  980.     IRIns *irf = IR(ir->op1);
  981.     Reg idx = ra_alloc1(as, irf->op1, rset_exclude(RSET_GPR, src));
  982.     int32_t ofs = field_ofs[irf->op2];
  983.     ARMIns ai = asm_fxstoreins(ir);
  984.     if ((ai & 0x04000000))
  985.       emit_lso(as, ai, src, idx, ofs);
  986.     else
  987.       emit_lsox(as, ai, src, idx, ofs);
  988.   }
  989. }

  990. static void asm_xload(ASMState *as, IRIns *ir)
  991. {
  992.   Reg dest = ra_dest(as, ir,
  993.                      (!LJ_SOFTFP && irt_isfp(ir->t)) ? RSET_FPR : RSET_GPR);
  994.   lua_assert(!(ir->op2 & IRXLOAD_UNALIGNED));
  995.   asm_fusexref(as, asm_fxloadins(ir), dest, ir->op1, RSET_GPR, 0);
  996. }

  997. static void asm_xstore_(ASMState *as, IRIns *ir, int32_t ofs)
  998. {
  999.   if (ir->r != RID_SINK) {
  1000.     Reg src = ra_alloc1(as, ir->op2,
  1001.                         (!LJ_SOFTFP && irt_isfp(ir->t)) ? RSET_FPR : RSET_GPR);
  1002.     asm_fusexref(as, asm_fxstoreins(ir), src, ir->op1,
  1003.                  rset_exclude(RSET_GPR, src), ofs);
  1004.   }
  1005. }

  1006. #define asm_xstore(as, ir)        asm_xstore_(as, ir, 0)

  1007. static void asm_ahuvload(ASMState *as, IRIns *ir)
  1008. {
  1009.   int hiop = (LJ_SOFTFP && (ir+1)->o == IR_HIOP);
  1010.   IRType t = hiop ? IRT_NUM : irt_type(ir->t);
  1011.   Reg dest = RID_NONE, type = RID_NONE, idx;
  1012.   RegSet allow = RSET_GPR;
  1013.   int32_t ofs = 0;
  1014.   if (hiop && ra_used(ir+1)) {
  1015.     type = ra_dest(as, ir+1, allow);
  1016.     rset_clear(allow, type);
  1017.   }
  1018.   if (ra_used(ir)) {
  1019.     lua_assert((LJ_SOFTFP ? 0 : irt_isnum(ir->t)) ||
  1020.                irt_isint(ir->t) || irt_isaddr(ir->t));
  1021.     dest = ra_dest(as, ir, (!LJ_SOFTFP && t == IRT_NUM) ? RSET_FPR : allow);
  1022.     rset_clear(allow, dest);
  1023.   }
  1024.   idx = asm_fuseahuref(as, ir->op1, &ofs, allow,
  1025.                        (!LJ_SOFTFP && t == IRT_NUM) ? 1024 : 4096);
  1026.   if (!hiop || type == RID_NONE) {
  1027.     rset_clear(allow, idx);
  1028.     if (ofs < 256 && ra_hasreg(dest) && (dest & 1) == 0 &&
  1029.         rset_test((as->freeset & allow), dest+1)) {
  1030.       type = dest+1;
  1031.       ra_modified(as, type);
  1032.     } else {
  1033.       type = RID_TMP;
  1034.     }
  1035.   }
  1036.   asm_guardcc(as, t == IRT_NUM ? CC_HS : CC_NE);
  1037.   emit_n(as, ARMI_CMN|ARMI_K12|-irt_toitype_(t), type);
  1038.   if (ra_hasreg(dest)) {
  1039. #if !LJ_SOFTFP
  1040.     if (t == IRT_NUM)
  1041.       emit_vlso(as, ARMI_VLDR_D, dest, idx, ofs);
  1042.     else
  1043. #endif
  1044.       emit_lso(as, ARMI_LDR, dest, idx, ofs);
  1045.   }
  1046.   emit_lso(as, ARMI_LDR, type, idx, ofs+4);
  1047. }

  1048. static void asm_ahustore(ASMState *as, IRIns *ir)
  1049. {
  1050.   if (ir->r != RID_SINK) {
  1051.     RegSet allow = RSET_GPR;
  1052.     Reg idx, src = RID_NONE, type = RID_NONE;
  1053.     int32_t ofs = 0;
  1054. #if !LJ_SOFTFP
  1055.     if (irt_isnum(ir->t)) {
  1056.       src = ra_alloc1(as, ir->op2, RSET_FPR);
  1057.       idx = asm_fuseahuref(as, ir->op1, &ofs, allow, 1024);
  1058.       emit_vlso(as, ARMI_VSTR_D, src, idx, ofs);
  1059.     } else
  1060. #endif
  1061.     {
  1062.       int hiop = (LJ_SOFTFP && (ir+1)->o == IR_HIOP);
  1063.       if (!irt_ispri(ir->t)) {
  1064.         src = ra_alloc1(as, ir->op2, allow);
  1065.         rset_clear(allow, src);
  1066.       }
  1067.       if (hiop)
  1068.         type = ra_alloc1(as, (ir+1)->op2, allow);
  1069.       else
  1070.         type = ra_allock(as, (int32_t)irt_toitype(ir->t), allow);
  1071.       idx = asm_fuseahuref(as, ir->op1, &ofs, rset_exclude(allow, type), 4096);
  1072.       if (ra_hasreg(src)) emit_lso(as, ARMI_STR, src, idx, ofs);
  1073.       emit_lso(as, ARMI_STR, type, idx, ofs+4);
  1074.     }
  1075.   }
  1076. }

  1077. static void asm_sload(ASMState *as, IRIns *ir)
  1078. {
  1079.   int32_t ofs = 8*((int32_t)ir->op1-1) + ((ir->op2 & IRSLOAD_FRAME) ? 4 : 0);
  1080.   int hiop = (LJ_SOFTFP && (ir+1)->o == IR_HIOP);
  1081.   IRType t = hiop ? IRT_NUM : irt_type(ir->t);
  1082.   Reg dest = RID_NONE, type = RID_NONE, base;
  1083.   RegSet allow = RSET_GPR;
  1084.   lua_assert(!(ir->op2 & IRSLOAD_PARENT));  /* Handled by asm_head_side(). */
  1085.   lua_assert(irt_isguard(ir->t) || !(ir->op2 & IRSLOAD_TYPECHECK));
  1086. #if LJ_SOFTFP
  1087.   lua_assert(!(ir->op2 & IRSLOAD_CONVERT));  /* Handled by LJ_SOFTFP SPLIT. */
  1088.   if (hiop && ra_used(ir+1)) {
  1089.     type = ra_dest(as, ir+1, allow);
  1090.     rset_clear(allow, type);
  1091.   }
  1092. #else
  1093.   if ((ir->op2 & IRSLOAD_CONVERT) && irt_isguard(ir->t) && t == IRT_INT) {
  1094.     dest = ra_scratch(as, RSET_FPR);
  1095.     asm_tointg(as, ir, dest);
  1096.     t = IRT_NUM;  /* Continue with a regular number type check. */
  1097.   } else
  1098. #endif
  1099.   if (ra_used(ir)) {
  1100.     Reg tmp = RID_NONE;
  1101.     if ((ir->op2 & IRSLOAD_CONVERT))
  1102.       tmp = ra_scratch(as, t == IRT_INT ? RSET_FPR : RSET_GPR);
  1103.     lua_assert((LJ_SOFTFP ? 0 : irt_isnum(ir->t)) ||
  1104.                irt_isint(ir->t) || irt_isaddr(ir->t));
  1105.     dest = ra_dest(as, ir, (!LJ_SOFTFP && t == IRT_NUM) ? RSET_FPR : allow);
  1106.     rset_clear(allow, dest);
  1107.     base = ra_alloc1(as, REF_BASE, allow);
  1108.     if ((ir->op2 & IRSLOAD_CONVERT)) {
  1109.       if (t == IRT_INT) {
  1110.         emit_dn(as, ARMI_VMOV_R_S, dest, (tmp & 15));
  1111.         emit_dm(as, ARMI_VCVT_S32_F64, (tmp & 15), (tmp & 15));
  1112.         t = IRT_NUM;  /* Check for original type. */
  1113.       } else {
  1114.         emit_dm(as, ARMI_VCVT_F64_S32, (dest & 15), (dest & 15));
  1115.         emit_dn(as, ARMI_VMOV_S_R, tmp, (dest & 15));
  1116.         t = IRT_INT;  /* Check for original type. */
  1117.       }
  1118.       dest = tmp;
  1119.     }
  1120.     goto dotypecheck;
  1121.   }
  1122.   base = ra_alloc1(as, REF_BASE, allow);
  1123. dotypecheck:
  1124.   rset_clear(allow, base);
  1125.   if ((ir->op2 & IRSLOAD_TYPECHECK)) {
  1126.     if (ra_noreg(type)) {
  1127.       if (ofs < 256 && ra_hasreg(dest) && (dest & 1) == 0 &&
  1128.           rset_test((as->freeset & allow), dest+1)) {
  1129.         type = dest+1;
  1130.         ra_modified(as, type);
  1131.       } else {
  1132.         type = RID_TMP;
  1133.       }
  1134.     }
  1135.     asm_guardcc(as, t == IRT_NUM ? CC_HS : CC_NE);
  1136.     emit_n(as, ARMI_CMN|ARMI_K12|-irt_toitype_(t), type);
  1137.   }
  1138.   if (ra_hasreg(dest)) {
  1139. #if !LJ_SOFTFP
  1140.     if (t == IRT_NUM) {
  1141.       if (ofs < 1024) {
  1142.         emit_vlso(as, ARMI_VLDR_D, dest, base, ofs);
  1143.       } else {
  1144.         if (ra_hasreg(type)) emit_lso(as, ARMI_LDR, type, base, ofs+4);
  1145.         emit_vlso(as, ARMI_VLDR_D, dest, RID_TMP, 0);
  1146.         emit_opk(as, ARMI_ADD, RID_TMP, base, ofs, allow);
  1147.         return;
  1148.       }
  1149.     } else
  1150. #endif
  1151.       emit_lso(as, ARMI_LDR, dest, base, ofs);
  1152.   }
  1153.   if (ra_hasreg(type)) emit_lso(as, ARMI_LDR, type, base, ofs+4);
  1154. }

  1155. /* -- Allocations --------------------------------------------------------- */

  1156. #if LJ_HASFFI
  1157. static void asm_cnew(ASMState *as, IRIns *ir)
  1158. {
  1159.   CTState *cts = ctype_ctsG(J2G(as->J));
  1160.   CTypeID id = (CTypeID)IR(ir->op1)->i;
  1161.   CTSize sz;
  1162.   CTInfo info = lj_ctype_info(cts, id, &sz);
  1163.   const CCallInfo *ci = &lj_ir_callinfo[IRCALL_lj_mem_newgco];
  1164.   IRRef args[4];
  1165.   RegSet allow = (RSET_GPR & ~RSET_SCRATCH);
  1166.   RegSet drop = RSET_SCRATCH;
  1167.   lua_assert(sz != CTSIZE_INVALID || (ir->o == IR_CNEW && ir->op2 != REF_NIL));

  1168.   as->gcsteps++;
  1169.   if (ra_hasreg(ir->r))
  1170.     rset_clear(drop, ir->r);  /* Dest reg handled below. */
  1171.   ra_evictset(as, drop);
  1172.   if (ra_used(ir))
  1173.     ra_destreg(as, ir, RID_RET);  /* GCcdata * */

  1174.   /* Initialize immutable cdata object. */
  1175.   if (ir->o == IR_CNEWI) {
  1176.     int32_t ofs = sizeof(GCcdata);
  1177.     lua_assert(sz == 4 || sz == 8);
  1178.     if (sz == 8) {
  1179.       ofs += 4; ir++;
  1180.       lua_assert(ir->o == IR_HIOP);
  1181.     }
  1182.     for (;;) {
  1183.       Reg r = ra_alloc1(as, ir->op2, allow);
  1184.       emit_lso(as, ARMI_STR, r, RID_RET, ofs);
  1185.       rset_clear(allow, r);
  1186.       if (ofs == sizeof(GCcdata)) break;
  1187.       ofs -= 4; ir--;
  1188.     }
  1189.   } else if (ir->op2 != REF_NIL) {  /* Create VLA/VLS/aligned cdata. */
  1190.     ci = &lj_ir_callinfo[IRCALL_lj_cdata_newv];
  1191.     args[0] = ASMREF_L;     /* lua_State *L */
  1192.     args[1] = ir->op1;      /* CTypeID id   */
  1193.     args[2] = ir->op2;      /* CTSize sz    */
  1194.     args[3] = ASMREF_TMP1/* CTSize align */
  1195.     asm_gencall(as, ci, args);
  1196.     emit_loadi(as, ra_releasetmp(as, ASMREF_TMP1), (int32_t)ctype_align(info));
  1197.     return;
  1198.   }

  1199.   /* Initialize gct and ctypeid. lj_mem_newgco() already sets marked. */
  1200.   {
  1201.     uint32_t k = emit_isk12(ARMI_MOV, id);
  1202.     Reg r = k ? RID_R1 : ra_allock(as, id, allow);
  1203.     emit_lso(as, ARMI_STRB, RID_TMP, RID_RET, offsetof(GCcdata, gct));
  1204.     emit_lsox(as, ARMI_STRH, r, RID_RET, offsetof(GCcdata, ctypeid));
  1205.     emit_d(as, ARMI_MOV|ARMI_K12|~LJ_TCDATA, RID_TMP);
  1206.     if (k) emit_d(as, ARMI_MOV^k, RID_R1);
  1207.   }
  1208.   args[0] = ASMREF_L;     /* lua_State *L */
  1209.   args[1] = ASMREF_TMP1/* MSize size   */
  1210.   asm_gencall(as, ci, args);
  1211.   ra_allockreg(as, (int32_t)(sz+sizeof(GCcdata)),
  1212.                ra_releasetmp(as, ASMREF_TMP1));
  1213. }
  1214. #else
  1215. #define asm_cnew(as, ir)        ((void)0)
  1216. #endif

  1217. /* -- Write barriers ------------------------------------------------------ */

  1218. static void asm_tbar(ASMState *as, IRIns *ir)
  1219. {
  1220.   Reg tab = ra_alloc1(as, ir->op1, RSET_GPR);
  1221.   Reg link = ra_scratch(as, rset_exclude(RSET_GPR, tab));
  1222.   Reg gr = ra_allock(as, i32ptr(J2G(as->J)),
  1223.                      rset_exclude(rset_exclude(RSET_GPR, tab), link));
  1224.   Reg mark = RID_TMP;
  1225.   MCLabel l_end = emit_label(as);
  1226.   emit_lso(as, ARMI_STR, link, tab, (int32_t)offsetof(GCtab, gclist));
  1227.   emit_lso(as, ARMI_STRB, mark, tab, (int32_t)offsetof(GCtab, marked));
  1228.   emit_lso(as, ARMI_STR, tab, gr,
  1229.            (int32_t)offsetof(global_State, gc.grayagain));
  1230.   emit_dn(as, ARMI_BIC|ARMI_K12|LJ_GC_BLACK, mark, mark);
  1231.   emit_lso(as, ARMI_LDR, link, gr,
  1232.            (int32_t)offsetof(global_State, gc.grayagain));
  1233.   emit_branch(as, ARMF_CC(ARMI_B, CC_EQ), l_end);
  1234.   emit_n(as, ARMI_TST|ARMI_K12|LJ_GC_BLACK, mark);
  1235.   emit_lso(as, ARMI_LDRB, mark, tab, (int32_t)offsetof(GCtab, marked));
  1236. }

  1237. static void asm_obar(ASMState *as, IRIns *ir)
  1238. {
  1239.   const CCallInfo *ci = &lj_ir_callinfo[IRCALL_lj_gc_barrieruv];
  1240.   IRRef args[2];
  1241.   MCLabel l_end;
  1242.   Reg obj, val, tmp;
  1243.   /* No need for other object barriers (yet). */
  1244.   lua_assert(IR(ir->op1)->o == IR_UREFC);
  1245.   ra_evictset(as, RSET_SCRATCH);
  1246.   l_end = emit_label(as);
  1247.   args[0] = ASMREF_TMP1/* global_State *g */
  1248.   args[1] = ir->op1;      /* TValue *tv      */
  1249.   asm_gencall(as, ci, args);
  1250.   if ((l_end[-1] >> 28) == CC_AL)
  1251.     l_end[-1] = ARMF_CC(l_end[-1], CC_NE);
  1252.   else
  1253.     emit_branch(as, ARMF_CC(ARMI_B, CC_EQ), l_end);
  1254.   ra_allockreg(as, i32ptr(J2G(as->J)), ra_releasetmp(as, ASMREF_TMP1));
  1255.   obj = IR(ir->op1)->r;
  1256.   tmp = ra_scratch(as, rset_exclude(RSET_GPR, obj));
  1257.   emit_n(as, ARMF_CC(ARMI_TST, CC_NE)|ARMI_K12|LJ_GC_BLACK, tmp);
  1258.   emit_n(as, ARMI_TST|ARMI_K12|LJ_GC_WHITES, RID_TMP);
  1259.   val = ra_alloc1(as, ir->op2, rset_exclude(RSET_GPR, obj));
  1260.   emit_lso(as, ARMI_LDRB, tmp, obj,
  1261.            (int32_t)offsetof(GCupval, marked)-(int32_t)offsetof(GCupval, tv));
  1262.   emit_lso(as, ARMI_LDRB, RID_TMP, val, (int32_t)offsetof(GChead, marked));
  1263. }

  1264. /* -- Arithmetic and logic operations ------------------------------------- */

  1265. #if !LJ_SOFTFP
  1266. static void asm_fparith(ASMState *as, IRIns *ir, ARMIns ai)
  1267. {
  1268.   Reg dest = ra_dest(as, ir, RSET_FPR);
  1269.   Reg right, left = ra_alloc2(as, ir, RSET_FPR);
  1270.   right = (left >> 8); left &= 255;
  1271.   emit_dnm(as, ai, (dest & 15), (left & 15), (right & 15));
  1272. }

  1273. static void asm_fpunary(ASMState *as, IRIns *ir, ARMIns ai)
  1274. {
  1275.   Reg dest = ra_dest(as, ir, RSET_FPR);
  1276.   Reg left = ra_hintalloc(as, ir->op1, dest, RSET_FPR);
  1277.   emit_dm(as, ai, (dest & 15), (left & 15));
  1278. }

  1279. static void asm_callround(ASMState *as, IRIns *ir, int id)
  1280. {
  1281.   /* The modified regs must match with the *.dasc implementation. */
  1282.   RegSet drop = RID2RSET(RID_R0)|RID2RSET(RID_R1)|RID2RSET(RID_R2)|
  1283.                 RID2RSET(RID_R3)|RID2RSET(RID_R12);
  1284.   RegSet of;
  1285.   Reg dest, src;
  1286.   ra_evictset(as, drop);
  1287.   dest = ra_dest(as, ir, RSET_FPR);
  1288.   emit_dnm(as, ARMI_VMOV_D_RR, RID_RETLO, RID_RETHI, (dest & 15));
  1289.   emit_call(as, id == IRFPM_FLOOR ? (void *)lj_vm_floor_sf :
  1290.                 id == IRFPM_CEIL ? (void *)lj_vm_ceil_sf :
  1291.                                    (void *)lj_vm_trunc_sf);
  1292.   /* Workaround to protect argument GPRs from being used for remat. */
  1293.   of = as->freeset;
  1294.   as->freeset &= ~RSET_RANGE(RID_R0, RID_R1+1);
  1295.   as->cost[RID_R0] = as->cost[RID_R1] = REGCOST(~0u, ASMREF_L);
  1296.   src = ra_alloc1(as, ir->op1, RSET_FPR);  /* May alloc GPR to remat FPR. */
  1297.   as->freeset |= (of & RSET_RANGE(RID_R0, RID_R1+1));
  1298.   emit_dnm(as, ARMI_VMOV_RR_D, RID_R0, RID_R1, (src & 15));
  1299. }

  1300. static void asm_fpmath(ASMState *as, IRIns *ir)
  1301. {
  1302.   if (ir->op2 == IRFPM_EXP2 && asm_fpjoin_pow(as, ir))
  1303.     return;
  1304.   if (ir->op2 <= IRFPM_TRUNC)
  1305.     asm_callround(as, ir, ir->op2);
  1306.   else if (ir->op2 == IRFPM_SQRT)
  1307.     asm_fpunary(as, ir, ARMI_VSQRT_D);
  1308.   else
  1309.     asm_callid(as, ir, IRCALL_lj_vm_floor + ir->op2);
  1310. }
  1311. #else
  1312. #define asm_fpmath(as, ir)        lua_assert(0)
  1313. #endif

  1314. static int asm_swapops(ASMState *as, IRRef lref, IRRef rref)
  1315. {
  1316.   IRIns *ir;
  1317.   if (irref_isk(rref))
  1318.     return 0/* Don't swap constants to the left. */
  1319.   if (irref_isk(lref))
  1320.     return 1/* But swap constants to the right. */
  1321.   ir = IR(rref);
  1322.   if ((ir->o >= IR_BSHL && ir->o <= IR_BROR) ||
  1323.       (ir->o == IR_ADD && ir->op1 == ir->op2))
  1324.     return 0/* Don't swap fusable operands to the left. */
  1325.   ir = IR(lref);
  1326.   if ((ir->o >= IR_BSHL && ir->o <= IR_BROR) ||
  1327.       (ir->o == IR_ADD && ir->op1 == ir->op2))
  1328.     return 1/* But swap fusable operands to the right. */
  1329.   return 0/* Otherwise don't swap. */
  1330. }

  1331. static void asm_intop(ASMState *as, IRIns *ir, ARMIns ai)
  1332. {
  1333.   IRRef lref = ir->op1, rref = ir->op2;
  1334.   Reg left, dest = ra_dest(as, ir, RSET_GPR);
  1335.   uint32_t m;
  1336.   if (asm_swapops(as, lref, rref)) {
  1337.     IRRef tmp = lref; lref = rref; rref = tmp;
  1338.     if ((ai & ~ARMI_S) == ARMI_SUB || (ai & ~ARMI_S) == ARMI_SBC)
  1339.       ai ^= (ARMI_SUB^ARMI_RSB);
  1340.   }
  1341.   left = ra_hintalloc(as, lref, dest, RSET_GPR);
  1342.   m = asm_fuseopm(as, ai, rref, rset_exclude(RSET_GPR, left));
  1343.   if (irt_isguard(ir->t)) {  /* For IR_ADDOV etc. */
  1344.     asm_guardcc(as, CC_VS);
  1345.     ai |= ARMI_S;
  1346.   }
  1347.   emit_dn(as, ai^m, dest, left);
  1348. }

  1349. static void asm_intop_s(ASMState *as, IRIns *ir, ARMIns ai)
  1350. {
  1351.   if (as->flagmcp == as->mcp) {  /* Drop cmp r, #0. */
  1352.     as->flagmcp = NULL;
  1353.     as->mcp++;
  1354.     ai |= ARMI_S;
  1355.   }
  1356.   asm_intop(as, ir, ai);
  1357. }

  1358. static void asm_intneg(ASMState *as, IRIns *ir, ARMIns ai)
  1359. {
  1360.   Reg dest = ra_dest(as, ir, RSET_GPR);
  1361.   Reg left = ra_hintalloc(as, ir->op1, dest, RSET_GPR);
  1362.   emit_dn(as, ai|ARMI_K12|0, dest, left);
  1363. }

  1364. /* NYI: use add/shift for MUL(OV) with constants. FOLD only does 2^k. */
  1365. static void asm_intmul(ASMState *as, IRIns *ir)
  1366. {
  1367.   Reg dest = ra_dest(as, ir, RSET_GPR);
  1368.   Reg left = ra_alloc1(as, ir->op1, rset_exclude(RSET_GPR, dest));
  1369.   Reg right = ra_alloc1(as, ir->op2, rset_exclude(RSET_GPR, left));
  1370.   Reg tmp = RID_NONE;
  1371.   /* ARMv5 restriction: dest != left and dest_hi != left. */
  1372.   if (dest == left && left != right) { left = right; right = dest; }
  1373.   if (irt_isguard(ir->t)) {  /* IR_MULOV */
  1374.     if (!(as->flags & JIT_F_ARMV6) && dest == left)
  1375.       tmp = left = ra_scratch(as, rset_exclude(RSET_GPR, left));
  1376.     asm_guardcc(as, CC_NE);
  1377.     emit_nm(as, ARMI_TEQ|ARMF_SH(ARMSH_ASR, 31), RID_TMP, dest);
  1378.     emit_dnm(as, ARMI_SMULL|ARMF_S(right), dest, RID_TMP, left);
  1379.   } else {
  1380.     if (!(as->flags & JIT_F_ARMV6) && dest == left) tmp = left = RID_TMP;
  1381.     emit_nm(as, ARMI_MUL|ARMF_S(right), dest, left);
  1382.   }
  1383.   /* Only need this for the dest == left == right case. */
  1384.   if (ra_hasreg(tmp)) emit_dm(as, ARMI_MOV, tmp, right);
  1385. }

  1386. static void asm_add(ASMState *as, IRIns *ir)
  1387. {
  1388. #if !LJ_SOFTFP
  1389.   if (irt_isnum(ir->t)) {
  1390.     if (!asm_fusemadd(as, ir, ARMI_VMLA_D, ARMI_VMLA_D))
  1391.       asm_fparith(as, ir, ARMI_VADD_D);
  1392.     return;
  1393.   }
  1394. #endif
  1395.   asm_intop_s(as, ir, ARMI_ADD);
  1396. }

  1397. static void asm_sub(ASMState *as, IRIns *ir)
  1398. {
  1399. #if !LJ_SOFTFP
  1400.   if (irt_isnum(ir->t)) {
  1401.     if (!asm_fusemadd(as, ir, ARMI_VNMLS_D, ARMI_VMLS_D))
  1402.       asm_fparith(as, ir, ARMI_VSUB_D);
  1403.     return;
  1404.   }
  1405. #endif
  1406.   asm_intop_s(as, ir, ARMI_SUB);
  1407. }

  1408. static void asm_mul(ASMState *as, IRIns *ir)
  1409. {
  1410. #if !LJ_SOFTFP
  1411.   if (irt_isnum(ir->t)) {
  1412.     asm_fparith(as, ir, ARMI_VMUL_D);
  1413.     return;
  1414.   }
  1415. #endif
  1416.   asm_intmul(as, ir);
  1417. }

  1418. #define asm_addov(as, ir)        asm_add(as, ir)
  1419. #define asm_subov(as, ir)        asm_sub(as, ir)
  1420. #define asm_mulov(as, ir)        asm_mul(as, ir)

  1421. #if LJ_SOFTFP
  1422. #define asm_div(as, ir)                lua_assert(0)
  1423. #define asm_pow(as, ir)                lua_assert(0)
  1424. #define asm_abs(as, ir)                lua_assert(0)
  1425. #define asm_atan2(as, ir)        lua_assert(0)
  1426. #define asm_ldexp(as, ir)        lua_assert(0)
  1427. #else
  1428. #define asm_div(as, ir)                asm_fparith(as, ir, ARMI_VDIV_D)
  1429. #define asm_pow(as, ir)                asm_callid(as, ir, IRCALL_lj_vm_powi)
  1430. #define asm_abs(as, ir)                asm_fpunary(as, ir, ARMI_VABS_D)
  1431. #define asm_atan2(as, ir)        asm_callid(as, ir, IRCALL_atan2)
  1432. #define asm_ldexp(as, ir)        asm_callid(as, ir, IRCALL_ldexp)
  1433. #endif

  1434. #define asm_mod(as, ir)                asm_callid(as, ir, IRCALL_lj_vm_modi)

  1435. static void asm_neg(ASMState *as, IRIns *ir)
  1436. {
  1437. #if !LJ_SOFTFP
  1438.   if (irt_isnum(ir->t)) {
  1439.     asm_fpunary(as, ir, ARMI_VNEG_D);
  1440.     return;
  1441.   }
  1442. #endif
  1443.   asm_intneg(as, ir, ARMI_RSB);
  1444. }

  1445. static void asm_bitop(ASMState *as, IRIns *ir, ARMIns ai)
  1446. {
  1447.   if (as->flagmcp == as->mcp) {  /* Try to drop cmp r, #0. */
  1448.     uint32_t cc = (as->mcp[1] >> 28);
  1449.     as->flagmcp = NULL;
  1450.     if (cc <= CC_NE) {
  1451.       as->mcp++;
  1452.       ai |= ARMI_S;
  1453.     } else if (cc == CC_GE) {
  1454.       *++as->mcp ^= ((CC_GE^CC_PL) << 28);
  1455.       ai |= ARMI_S;
  1456.     } else if (cc == CC_LT) {
  1457.       *++as->mcp ^= ((CC_LT^CC_MI) << 28);
  1458.       ai |= ARMI_S;
  1459.     }  /* else: other conds don't work with bit ops. */
  1460.   }
  1461.   if (ir->op2 == 0) {
  1462.     Reg dest = ra_dest(as, ir, RSET_GPR);
  1463.     uint32_t m = asm_fuseopm(as, ai, ir->op1, RSET_GPR);
  1464.     emit_d(as, ai^m, dest);
  1465.   } else {
  1466.     /* NYI: Turn BAND !k12 into uxtb, uxth or bfc or shl+shr. */
  1467.     asm_intop(as, ir, ai);
  1468.   }
  1469. }

  1470. #define asm_bnot(as, ir)        asm_bitop(as, ir, ARMI_MVN)

  1471. static void asm_bswap(ASMState *as, IRIns *ir)
  1472. {
  1473.   Reg dest = ra_dest(as, ir, RSET_GPR);
  1474.   Reg left = ra_alloc1(as, ir->op1, RSET_GPR);
  1475.   if ((as->flags & JIT_F_ARMV6)) {
  1476.     emit_dm(as, ARMI_REV, dest, left);
  1477.   } else {
  1478.     Reg tmp2 = dest;
  1479.     if (tmp2 == left)
  1480.       tmp2 = ra_scratch(as, rset_exclude(rset_exclude(RSET_GPR, dest), left));
  1481.     emit_dnm(as, ARMI_EOR|ARMF_SH(ARMSH_LSR, 8), dest, tmp2, RID_TMP);
  1482.     emit_dm(as, ARMI_MOV|ARMF_SH(ARMSH_ROR, 8), tmp2, left);
  1483.     emit_dn(as, ARMI_BIC|ARMI_K12|256*8|255, RID_TMP, RID_TMP);
  1484.     emit_dnm(as, ARMI_EOR|ARMF_SH(ARMSH_ROR, 16), RID_TMP, left, left);
  1485.   }
  1486. }

  1487. #define asm_band(as, ir)        asm_bitop(as, ir, ARMI_AND)
  1488. #define asm_bor(as, ir)                asm_bitop(as, ir, ARMI_ORR)
  1489. #define asm_bxor(as, ir)        asm_bitop(as, ir, ARMI_EOR)

  1490. static void asm_bitshift(ASMState *as, IRIns *ir, ARMShift sh)
  1491. {
  1492.   if (irref_isk(ir->op2)) {  /* Constant shifts. */
  1493.     /* NYI: Turn SHL+SHR or BAND+SHR into uxtb, uxth or ubfx. */
  1494.     /* NYI: Turn SHL+ASR into sxtb, sxth or sbfx. */
  1495.     Reg dest = ra_dest(as, ir, RSET_GPR);
  1496.     Reg left = ra_alloc1(as, ir->op1, RSET_GPR);
  1497.     int32_t shift = (IR(ir->op2)->i & 31);
  1498.     emit_dm(as, ARMI_MOV|ARMF_SH(sh, shift), dest, left);
  1499.   } else {
  1500.     Reg dest = ra_dest(as, ir, RSET_GPR);
  1501.     Reg left = ra_alloc1(as, ir->op1, RSET_GPR);
  1502.     Reg right = ra_alloc1(as, ir->op2, rset_exclude(RSET_GPR, left));
  1503.     emit_dm(as, ARMI_MOV|ARMF_RSH(sh, right), dest, left);
  1504.   }
  1505. }

  1506. #define asm_bshl(as, ir)        asm_bitshift(as, ir, ARMSH_LSL)
  1507. #define asm_bshr(as, ir)        asm_bitshift(as, ir, ARMSH_LSR)
  1508. #define asm_bsar(as, ir)        asm_bitshift(as, ir, ARMSH_ASR)
  1509. #define asm_bror(as, ir)        asm_bitshift(as, ir, ARMSH_ROR)
  1510. #define asm_brol(as, ir)        lua_assert(0)

  1511. static void asm_intmin_max(ASMState *as, IRIns *ir, int cc)
  1512. {
  1513.   uint32_t kcmp = 0, kmov = 0;
  1514.   Reg dest = ra_dest(as, ir, RSET_GPR);
  1515.   Reg left = ra_hintalloc(as, ir->op1, dest, RSET_GPR);
  1516.   Reg right = 0;
  1517.   if (irref_isk(ir->op2)) {
  1518.     kcmp = emit_isk12(ARMI_CMP, IR(ir->op2)->i);
  1519.     if (kcmp) kmov = emit_isk12(ARMI_MOV, IR(ir->op2)->i);
  1520.   }
  1521.   if (!kmov) {
  1522.     kcmp = 0;
  1523.     right = ra_alloc1(as, ir->op2, rset_exclude(RSET_GPR, left));
  1524.   }
  1525.   if (kmov || dest != right) {
  1526.     emit_dm(as, ARMF_CC(ARMI_MOV, cc)^kmov, dest, right);
  1527.     cc ^= 1/* Must use opposite conditions for paired moves. */
  1528.   } else {
  1529.     cc ^= (CC_LT^CC_GT);  /* Otherwise may swap CC_LT <-> CC_GT. */
  1530.   }
  1531.   if (dest != left) emit_dm(as, ARMF_CC(ARMI_MOV, cc), dest, left);
  1532.   emit_nm(as, ARMI_CMP^kcmp, left, right);
  1533. }

  1534. #if LJ_SOFTFP
  1535. static void asm_sfpmin_max(ASMState *as, IRIns *ir, int cc)
  1536. {
  1537.   const CCallInfo *ci = &lj_ir_callinfo[IRCALL_softfp_cmp];
  1538.   RegSet drop = RSET_SCRATCH;
  1539.   Reg r;
  1540.   IRRef args[4];
  1541.   args[0] = ir->op1; args[1] = (ir+1)->op1;
  1542.   args[2] = ir->op2; args[3] = (ir+1)->op2;
  1543.   /* __aeabi_cdcmple preserves r0-r3. */
  1544.   if (ra_hasreg(ir->r)) rset_clear(drop, ir->r);
  1545.   if (ra_hasreg((ir+1)->r)) rset_clear(drop, (ir+1)->r);
  1546.   if (!rset_test(as->freeset, RID_R2) &&
  1547.       regcost_ref(as->cost[RID_R2]) == args[2]) rset_clear(drop, RID_R2);
  1548.   if (!rset_test(as->freeset, RID_R3) &&
  1549.       regcost_ref(as->cost[RID_R3]) == args[3]) rset_clear(drop, RID_R3);
  1550.   ra_evictset(as, drop);
  1551.   ra_destpair(as, ir);
  1552.   emit_dm(as, ARMF_CC(ARMI_MOV, cc), RID_RETHI, RID_R3);
  1553.   emit_dm(as, ARMF_CC(ARMI_MOV, cc), RID_RETLO, RID_R2);
  1554.   emit_call(as, (void *)ci->func);
  1555.   for (r = RID_R0; r <= RID_R3; r++)
  1556.     ra_leftov(as, r, args[r-RID_R0]);
  1557. }
  1558. #else
  1559. static void asm_fpmin_max(ASMState *as, IRIns *ir, int cc)
  1560. {
  1561.   Reg dest = (ra_dest(as, ir, RSET_FPR) & 15);
  1562.   Reg right, left = ra_alloc2(as, ir, RSET_FPR);
  1563.   right = ((left >> 8) & 15); left &= 15;
  1564.   if (dest != left) emit_dm(as, ARMF_CC(ARMI_VMOV_D, cc^1), dest, left);
  1565.   if (dest != right) emit_dm(as, ARMF_CC(ARMI_VMOV_D, cc), dest, right);
  1566.   emit_d(as, ARMI_VMRS, 0);
  1567.   emit_dm(as, ARMI_VCMP_D, left, right);
  1568. }
  1569. #endif

  1570. static void asm_min_max(ASMState *as, IRIns *ir, int cc, int fcc)
  1571. {
  1572. #if LJ_SOFTFP
  1573.   UNUSED(fcc);
  1574. #else
  1575.   if (irt_isnum(ir->t))
  1576.     asm_fpmin_max(as, ir, fcc);
  1577.   else
  1578. #endif
  1579.     asm_intmin_max(as, ir, cc);
  1580. }

  1581. #define asm_min(as, ir)                asm_min_max(as, ir, CC_GT, CC_HI)
  1582. #define asm_max(as, ir)                asm_min_max(as, ir, CC_LT, CC_LO)

  1583. /* -- Comparisons --------------------------------------------------------- */

  1584. /* Map of comparisons to flags. ORDER IR. */
  1585. static const uint8_t asm_compmap[IR_ABC+1] = {
  1586.   /* op  FP swp  int cc   FP cc */
  1587.   /* LT       */ CC_GE + (CC_HS << 4),
  1588.   /* GE    x  */ CC_LT + (CC_HI << 4),
  1589.   /* LE       */ CC_GT + (CC_HI << 4),
  1590.   /* GT    x  */ CC_LE + (CC_HS << 4),
  1591.   /* ULT   x  */ CC_HS + (CC_LS << 4),
  1592.   /* UGE      */ CC_LO + (CC_LO << 4),
  1593.   /* ULE   x  */ CC_HI + (CC_LO << 4),
  1594.   /* UGT      */ CC_LS + (CC_LS << 4),
  1595.   /* EQ       */ CC_NE + (CC_NE << 4),
  1596.   /* NE       */ CC_EQ + (CC_EQ << 4),
  1597.   /* ABC      */ CC_LS + (CC_LS << 4/* Same as UGT. */
  1598. };

  1599. #if LJ_SOFTFP
  1600. /* FP comparisons. */
  1601. static void asm_sfpcomp(ASMState *as, IRIns *ir)
  1602. {
  1603.   const CCallInfo *ci = &lj_ir_callinfo[IRCALL_softfp_cmp];
  1604.   RegSet drop = RSET_SCRATCH;
  1605.   Reg r;
  1606.   IRRef args[4];
  1607.   int swp = (((ir->o ^ (ir->o >> 2)) & ~(ir->o >> 3) & 1) << 1);
  1608.   args[swp^0] = ir->op1; args[swp^1] = (ir+1)->op1;
  1609.   args[swp^2] = ir->op2; args[swp^3] = (ir+1)->op2;
  1610.   /* __aeabi_cdcmple preserves r0-r3. This helps to reduce spills. */
  1611.   for (r = RID_R0; r <= RID_R3; r++)
  1612.     if (!rset_test(as->freeset, r) &&
  1613.         regcost_ref(as->cost[r]) == args[r-RID_R0]) rset_clear(drop, r);
  1614.   ra_evictset(as, drop);
  1615.   asm_guardcc(as, (asm_compmap[ir->o] >> 4));
  1616.   emit_call(as, (void *)ci->func);
  1617.   for (r = RID_R0; r <= RID_R3; r++)
  1618.     ra_leftov(as, r, args[r-RID_R0]);
  1619. }
  1620. #else
  1621. /* FP comparisons. */
  1622. static void asm_fpcomp(ASMState *as, IRIns *ir)
  1623. {
  1624.   Reg left, right;
  1625.   ARMIns ai;
  1626.   int swp = ((ir->o ^ (ir->o >> 2)) & ~(ir->o >> 3) & 1);
  1627.   if (!swp && irref_isk(ir->op2) && ir_knum(IR(ir->op2))->u64 == 0) {
  1628.     left = (ra_alloc1(as, ir->op1, RSET_FPR) & 15);
  1629.     right = 0;
  1630.     ai = ARMI_VCMPZ_D;
  1631.   } else {
  1632.     left = ra_alloc2(as, ir, RSET_FPR);
  1633.     if (swp) {
  1634.       right = (left & 15); left = ((left >> 8) & 15);
  1635.     } else {
  1636.       right = ((left >> 8) & 15); left &= 15;
  1637.     }
  1638.     ai = ARMI_VCMP_D;
  1639.   }
  1640.   asm_guardcc(as, (asm_compmap[ir->o] >> 4));
  1641.   emit_d(as, ARMI_VMRS, 0);
  1642.   emit_dm(as, ai, left, right);
  1643. }
  1644. #endif

  1645. /* Integer comparisons. */
  1646. static void asm_intcomp(ASMState *as, IRIns *ir)
  1647. {
  1648.   ARMCC cc = (asm_compmap[ir->o] & 15);
  1649.   IRRef lref = ir->op1, rref = ir->op2;
  1650.   Reg left;
  1651.   uint32_t m;
  1652.   int cmpprev0 = 0;
  1653.   lua_assert(irt_isint(ir->t) || irt_isu32(ir->t) || irt_isaddr(ir->t));
  1654.   if (asm_swapops(as, lref, rref)) {
  1655.     Reg tmp = lref; lref = rref; rref = tmp;
  1656.     if (cc >= CC_GE) cc ^= 7/* LT <-> GT, LE <-> GE */
  1657.     else if (cc > CC_NE) cc ^= 11/* LO <-> HI, LS <-> HS */
  1658.   }
  1659.   if (irref_isk(rref) && IR(rref)->i == 0) {
  1660.     IRIns *irl = IR(lref);
  1661.     cmpprev0 = (irl+1 == ir);
  1662.     /* Combine comp(BAND(left, right), 0) into tst left, right. */
  1663.     if (cmpprev0 && irl->o == IR_BAND && !ra_used(irl)) {
  1664.       IRRef blref = irl->op1, brref = irl->op2;
  1665.       uint32_t m2 = 0;
  1666.       Reg bleft;
  1667.       if (asm_swapops(as, blref, brref)) {
  1668.         Reg tmp = blref; blref = brref; brref = tmp;
  1669.       }
  1670.       if (irref_isk(brref)) {
  1671.         m2 = emit_isk12(ARMI_AND, IR(brref)->i);
  1672.         if ((m2 & (ARMI_AND^ARMI_BIC)))
  1673.           goto notst;  /* Not beneficial if we miss a constant operand. */
  1674.       }
  1675.       if (cc == CC_GE) cc = CC_PL;
  1676.       else if (cc == CC_LT) cc = CC_MI;
  1677.       else if (cc > CC_NE) goto notst;  /* Other conds don't work with tst. */
  1678.       bleft = ra_alloc1(as, blref, RSET_GPR);
  1679.       if (!m2) m2 = asm_fuseopm(as, 0, brref, rset_exclude(RSET_GPR, bleft));
  1680.       asm_guardcc(as, cc);
  1681.       emit_n(as, ARMI_TST^m2, bleft);
  1682.       return;
  1683.     }
  1684.   }
  1685. notst:
  1686.   left = ra_alloc1(as, lref, RSET_GPR);
  1687.   m = asm_fuseopm(as, ARMI_CMP, rref, rset_exclude(RSET_GPR, left));
  1688.   asm_guardcc(as, cc);
  1689.   emit_n(as, ARMI_CMP^m, left);
  1690.   /* Signed comparison with zero and referencing previous ins? */
  1691.   if (cmpprev0 && (cc <= CC_NE || cc >= CC_GE))
  1692.     as->flagmcp = as->mcp;  /* Allow elimination of the compare. */
  1693. }

  1694. static void asm_comp(ASMState *as, IRIns *ir)
  1695. {
  1696. #if !LJ_SOFTFP
  1697.   if (irt_isnum(ir->t))
  1698.     asm_fpcomp(as, ir);
  1699.   else
  1700. #endif
  1701.     asm_intcomp(as, ir);
  1702. }

  1703. #define asm_equal(as, ir)        asm_comp(as, ir)

  1704. #if LJ_HASFFI
  1705. /* 64 bit integer comparisons. */
  1706. static void asm_int64comp(ASMState *as, IRIns *ir)
  1707. {
  1708.   int signedcomp = (ir->o <= IR_GT);
  1709.   ARMCC cclo, cchi;
  1710.   Reg leftlo, lefthi;
  1711.   uint32_t mlo, mhi;
  1712.   RegSet allow = RSET_GPR, oldfree;

  1713.   /* Always use unsigned comparison for loword. */
  1714.   cclo = asm_compmap[ir->o + (signedcomp ? 4 : 0)] & 15;
  1715.   leftlo = ra_alloc1(as, ir->op1, allow);
  1716.   oldfree = as->freeset;
  1717.   mlo = asm_fuseopm(as, ARMI_CMP, ir->op2, rset_clear(allow, leftlo));
  1718.   allow &= ~(oldfree & ~as->freeset);  /* Update for allocs of asm_fuseopm. */

  1719.   /* Use signed or unsigned comparison for hiword. */
  1720.   cchi = asm_compmap[ir->o] & 15;
  1721.   lefthi = ra_alloc1(as, (ir+1)->op1, allow);
  1722.   mhi = asm_fuseopm(as, ARMI_CMP, (ir+1)->op2, rset_clear(allow, lefthi));

  1723.   /* All register allocations must be performed _before_ this point. */
  1724.   if (signedcomp) {
  1725.     MCLabel l_around = emit_label(as);
  1726.     asm_guardcc(as, cclo);
  1727.     emit_n(as, ARMI_CMP^mlo, leftlo);
  1728.     emit_branch(as, ARMF_CC(ARMI_B, CC_NE), l_around);
  1729.     if (cchi == CC_GE || cchi == CC_LE) cchi ^= 6/* GE -> GT, LE -> LT */
  1730.     asm_guardcc(as, cchi);
  1731.   } else {
  1732.     asm_guardcc(as, cclo);
  1733.     emit_n(as, ARMF_CC(ARMI_CMP, CC_EQ)^mlo, leftlo);
  1734.   }
  1735.   emit_n(as, ARMI_CMP^mhi, lefthi);
  1736. }
  1737. #endif

  1738. /* -- Support for 64 bit ops in 32 bit mode ------------------------------- */

  1739. /* Hiword op of a split 64 bit op. Previous op must be the loword op. */
  1740. static void asm_hiop(ASMState *as, IRIns *ir)
  1741. {
  1742. #if LJ_HASFFI || LJ_SOFTFP
  1743.   /* HIOP is marked as a store because it needs its own DCE logic. */
  1744.   int uselo = ra_used(ir-1), usehi = ra_used(ir);  /* Loword/hiword used? */
  1745.   if (LJ_UNLIKELY(!(as->flags & JIT_F_OPT_DCE))) uselo = usehi = 1;
  1746.   if ((ir-1)->o <= IR_NE) {  /* 64 bit integer or FP comparisons. ORDER IR. */
  1747.     as->curins--;  /* Always skip the loword comparison. */
  1748. #if LJ_SOFTFP
  1749.     if (!irt_isint(ir->t)) {
  1750.       asm_sfpcomp(as, ir-1);
  1751.       return;
  1752.     }
  1753. #endif
  1754. #if LJ_HASFFI
  1755.     asm_int64comp(as, ir-1);
  1756. #endif
  1757.     return;
  1758. #if LJ_SOFTFP
  1759.   } else if ((ir-1)->o == IR_MIN || (ir-1)->o == IR_MAX) {
  1760.     as->curins--;  /* Always skip the loword min/max. */
  1761.     if (uselo || usehi)
  1762.       asm_sfpmin_max(as, ir-1, (ir-1)->o == IR_MIN ? CC_HI : CC_LO);
  1763.     return;
  1764. #elif LJ_HASFFI
  1765.   } else if ((ir-1)->o == IR_CONV) {
  1766.     as->curins--;  /* Always skip the CONV. */
  1767.     if (usehi || uselo)
  1768.       asm_conv64(as, ir);
  1769.     return;
  1770. #endif
  1771.   } else if ((ir-1)->o == IR_XSTORE) {
  1772.     if ((ir-1)->r != RID_SINK)
  1773.       asm_xstore_(as, ir, 4);
  1774.     return;
  1775.   }
  1776.   if (!usehi) return/* Skip unused hiword op for all remaining ops. */
  1777.   switch ((ir-1)->o) {
  1778. #if LJ_HASFFI
  1779.   case IR_ADD:
  1780.     as->curins--;
  1781.     asm_intop(as, ir, ARMI_ADC);
  1782.     asm_intop(as, ir-1, ARMI_ADD|ARMI_S);
  1783.     break;
  1784.   case IR_SUB:
  1785.     as->curins--;
  1786.     asm_intop(as, ir, ARMI_SBC);
  1787.     asm_intop(as, ir-1, ARMI_SUB|ARMI_S);
  1788.     break;
  1789.   case IR_NEG:
  1790.     as->curins--;
  1791.     asm_intneg(as, ir, ARMI_RSC);
  1792.     asm_intneg(as, ir-1, ARMI_RSB|ARMI_S);
  1793.     break;
  1794. #endif
  1795. #if LJ_SOFTFP
  1796.   case IR_SLOAD: case IR_ALOAD: case IR_HLOAD: case IR_ULOAD: case IR_VLOAD:
  1797.   case IR_STRTO:
  1798.     if (!uselo)
  1799.       ra_allocref(as, ir->op1, RSET_GPR);  /* Mark lo op as used. */
  1800.     break;
  1801. #endif
  1802.   case IR_CALLN:
  1803.   case IR_CALLS:
  1804.   case IR_CALLXS:
  1805.     if (!uselo)
  1806.       ra_allocref(as, ir->op1, RID2RSET(RID_RETLO));  /* Mark lo op as used. */
  1807.     break;
  1808. #if LJ_SOFTFP
  1809.   case IR_ASTORE: case IR_HSTORE: case IR_USTORE: case IR_TOSTR:
  1810. #endif
  1811.   case IR_CNEWI:
  1812.     /* Nothing to do here. Handled by lo op itself. */
  1813.     break;
  1814.   default: lua_assert(0); break;
  1815.   }
  1816. #else
  1817.   UNUSED(as); UNUSED(ir); lua_assert(0);
  1818. #endif
  1819. }

  1820. /* -- Profiling ----------------------------------------------------------- */

  1821. static void asm_prof(ASMState *as, IRIns *ir)
  1822. {
  1823.   UNUSED(ir);
  1824.   asm_guardcc(as, CC_NE);
  1825.   emit_n(as, ARMI_TST|ARMI_K12|HOOK_PROFILE, RID_TMP);
  1826.   emit_lsptr(as, ARMI_LDRB, RID_TMP, (void *)&J2G(as->J)->hookmask);
  1827. }

  1828. /* -- Stack handling ------------------------------------------------------ */

  1829. /* Check Lua stack size for overflow. Use exit handler as fallback. */
  1830. static void asm_stack_check(ASMState *as, BCReg topslot,
  1831.                             IRIns *irp, RegSet allow, ExitNo exitno)
  1832. {
  1833.   Reg pbase;
  1834.   uint32_t k;
  1835.   if (irp) {
  1836.     if (!ra_hasspill(irp->s)) {
  1837.       pbase = irp->r;
  1838.       lua_assert(ra_hasreg(pbase));
  1839.     } else if (allow) {
  1840.       pbase = rset_pickbot(allow);
  1841.     } else {
  1842.       pbase = RID_RET;
  1843.       emit_lso(as, ARMI_LDR, RID_RET, RID_SP, 0);  /* Restore temp. register. */
  1844.     }
  1845.   } else {
  1846.     pbase = RID_BASE;
  1847.   }
  1848.   emit_branch(as, ARMF_CC(ARMI_BL, CC_LS), exitstub_addr(as->J, exitno));
  1849.   k = emit_isk12(0, (int32_t)(8*topslot));
  1850.   lua_assert(k);
  1851.   emit_n(as, ARMI_CMP^k, RID_TMP);
  1852.   emit_dnm(as, ARMI_SUB, RID_TMP, RID_TMP, pbase);
  1853.   emit_lso(as, ARMI_LDR, RID_TMP, RID_TMP,
  1854.            (int32_t)offsetof(lua_State, maxstack));
  1855.   if (irp) {  /* Must not spill arbitrary registers in head of side trace. */
  1856.     int32_t i = i32ptr(&J2G(as->J)->cur_L);
  1857.     if (ra_hasspill(irp->s))
  1858.       emit_lso(as, ARMI_LDR, pbase, RID_SP, sps_scale(irp->s));
  1859.     emit_lso(as, ARMI_LDR, RID_TMP, RID_TMP, (i & 4095));
  1860.     if (ra_hasspill(irp->s) && !allow)
  1861.       emit_lso(as, ARMI_STR, RID_RET, RID_SP, 0);  /* Save temp. register. */
  1862.     emit_loadi(as, RID_TMP, (i & ~4095));
  1863.   } else {
  1864.     emit_getgl(as, RID_TMP, cur_L);
  1865.   }
  1866. }

  1867. /* Restore Lua stack from on-trace state. */
  1868. static void asm_stack_restore(ASMState *as, SnapShot *snap)
  1869. {
  1870.   SnapEntry *map = &as->T->snapmap[snap->mapofs];
  1871.   SnapEntry *flinks = &as->T->snapmap[snap_nextofs(as->T, snap)-1];
  1872.   MSize n, nent = snap->nent;
  1873.   /* Store the value of all modified slots to the Lua stack. */
  1874.   for (n = 0; n < nent; n++) {
  1875.     SnapEntry sn = map[n];
  1876.     BCReg s = snap_slot(sn);
  1877.     int32_t ofs = 8*((int32_t)s-1);
  1878.     IRRef ref = snap_ref(sn);
  1879.     IRIns *ir = IR(ref);
  1880.     if ((sn & SNAP_NORESTORE))
  1881.       continue;
  1882.     if (irt_isnum(ir->t)) {
  1883. #if LJ_SOFTFP
  1884.       RegSet odd = rset_exclude(RSET_GPRODD, RID_BASE);
  1885.       Reg tmp;
  1886.       lua_assert(irref_isk(ref));  /* LJ_SOFTFP: must be a number constant. */
  1887.       tmp = ra_allock(as, (int32_t)ir_knum(ir)->u32.lo,
  1888.                       rset_exclude(RSET_GPREVEN, RID_BASE));
  1889.       emit_lso(as, ARMI_STR, tmp, RID_BASE, ofs);
  1890.       if (rset_test(as->freeset, tmp+1)) odd = RID2RSET(tmp+1);
  1891.       tmp = ra_allock(as, (int32_t)ir_knum(ir)->u32.hi, odd);
  1892.       emit_lso(as, ARMI_STR, tmp, RID_BASE, ofs+4);
  1893. #else
  1894.       Reg src = ra_alloc1(as, ref, RSET_FPR);
  1895.       emit_vlso(as, ARMI_VSTR_D, src, RID_BASE, ofs);
  1896. #endif
  1897.     } else {
  1898.       RegSet odd = rset_exclude(RSET_GPRODD, RID_BASE);
  1899.       Reg type;
  1900.       lua_assert(irt_ispri(ir->t) || irt_isaddr(ir->t) || irt_isinteger(ir->t));
  1901.       if (!irt_ispri(ir->t)) {
  1902.         Reg src = ra_alloc1(as, ref, rset_exclude(RSET_GPREVEN, RID_BASE));
  1903.         emit_lso(as, ARMI_STR, src, RID_BASE, ofs);
  1904.         if (rset_test(as->freeset, src+1)) odd = RID2RSET(src+1);
  1905.       }
  1906.       if ((sn & (SNAP_CONT|SNAP_FRAME))) {
  1907.         if (s == 0) continue/* Do not overwrite link to previous frame. */
  1908.         type = ra_allock(as, (int32_t)(*flinks--), odd);
  1909. #if LJ_SOFTFP
  1910.       } else if ((sn & SNAP_SOFTFPNUM)) {
  1911.         type = ra_alloc1(as, ref+1, rset_exclude(RSET_GPRODD, RID_BASE));
  1912. #endif
  1913.       } else {
  1914.         type = ra_allock(as, (int32_t)irt_toitype(ir->t), odd);
  1915.       }
  1916.       emit_lso(as, ARMI_STR, type, RID_BASE, ofs+4);
  1917.     }
  1918.     checkmclim(as);
  1919.   }
  1920.   lua_assert(map + nent == flinks);
  1921. }

  1922. /* -- GC handling --------------------------------------------------------- */

  1923. /* Check GC threshold and do one or more GC steps. */
  1924. static void asm_gc_check(ASMState *as)
  1925. {
  1926.   const CCallInfo *ci = &lj_ir_callinfo[IRCALL_lj_gc_step_jit];
  1927.   IRRef args[2];
  1928.   MCLabel l_end;
  1929.   Reg tmp1, tmp2;
  1930.   ra_evictset(as, RSET_SCRATCH);
  1931.   l_end = emit_label(as);
  1932.   /* Exit trace if in GCSatomic or GCSfinalize. Avoids syncing GC objects. */
  1933.   asm_guardcc(as, CC_NE);  /* Assumes asm_snap_prep() already done. */
  1934.   emit_n(as, ARMI_CMP|ARMI_K12|0, RID_RET);
  1935.   args[0] = ASMREF_TMP1/* global_State *g */
  1936.   args[1] = ASMREF_TMP2/* MSize steps     */
  1937.   asm_gencall(as, ci, args);
  1938.   tmp1 = ra_releasetmp(as, ASMREF_TMP1);
  1939.   tmp2 = ra_releasetmp(as, ASMREF_TMP2);
  1940.   emit_loadi(as, tmp2, as->gcsteps);
  1941.   /* Jump around GC step if GC total < GC threshold. */
  1942.   emit_branch(as, ARMF_CC(ARMI_B, CC_LS), l_end);
  1943.   emit_nm(as, ARMI_CMP, RID_TMP, tmp2);
  1944.   emit_lso(as, ARMI_LDR, tmp2, tmp1,
  1945.            (int32_t)offsetof(global_State, gc.threshold));
  1946.   emit_lso(as, ARMI_LDR, RID_TMP, tmp1,
  1947.            (int32_t)offsetof(global_State, gc.total));
  1948.   ra_allockreg(as, i32ptr(J2G(as->J)), tmp1);
  1949.   as->gcsteps = 0;
  1950.   checkmclim(as);
  1951. }

  1952. /* -- Loop handling ------------------------------------------------------- */

  1953. /* Fixup the loop branch. */
  1954. static void asm_loop_fixup(ASMState *as)
  1955. {
  1956.   MCode *p = as->mctop;
  1957.   MCode *target = as->mcp;
  1958.   if (as->loopinv) {  /* Inverted loop branch? */
  1959.     /* asm_guardcc already inverted the bcc and patched the final bl. */
  1960.     p[-2] |= ((uint32_t)(target-p) & 0x00ffffffu);
  1961.   } else {
  1962.     p[-1] = ARMI_B | ((uint32_t)((target-p)-1) & 0x00ffffffu);
  1963.   }
  1964. }

  1965. /* -- Head of trace ------------------------------------------------------- */

  1966. /* Reload L register from g->cur_L. */
  1967. static void asm_head_lreg(ASMState *as)
  1968. {
  1969.   IRIns *ir = IR(ASMREF_L);
  1970.   if (ra_used(ir)) {
  1971.     Reg r = ra_dest(as, ir, RSET_GPR);
  1972.     emit_getgl(as, r, cur_L);
  1973.     ra_evictk(as);
  1974.   }
  1975. }

  1976. /* Coalesce BASE register for a root trace. */
  1977. static void asm_head_root_base(ASMState *as)
  1978. {
  1979.   IRIns *ir;
  1980.   asm_head_lreg(as);
  1981.   ir = IR(REF_BASE);
  1982.   if (ra_hasreg(ir->r) && (rset_test(as->modset, ir->r) || irt_ismarked(ir->t)))
  1983.     ra_spill(as, ir);
  1984.   ra_destreg(as, ir, RID_BASE);
  1985. }

  1986. /* Coalesce BASE register for a side trace. */
  1987. static RegSet asm_head_side_base(ASMState *as, IRIns *irp, RegSet allow)
  1988. {
  1989.   IRIns *ir;
  1990.   asm_head_lreg(as);
  1991.   ir = IR(REF_BASE);
  1992.   if (ra_hasreg(ir->r) && (rset_test(as->modset, ir->r) || irt_ismarked(ir->t)))
  1993.     ra_spill(as, ir);
  1994.   if (ra_hasspill(irp->s)) {
  1995.     rset_clear(allow, ra_dest(as, ir, allow));
  1996.   } else {
  1997.     Reg r = irp->r;
  1998.     lua_assert(ra_hasreg(r));
  1999.     rset_clear(allow, r);
  2000.     if (r != ir->r && !rset_test(as->freeset, r))
  2001.       ra_restore(as, regcost_ref(as->cost[r]));
  2002.     ra_destreg(as, ir, r);
  2003.   }
  2004.   return allow;
  2005. }

  2006. /* -- Tail of trace ------------------------------------------------------- */

  2007. /* Fixup the tail code. */
  2008. static void asm_tail_fixup(ASMState *as, TraceNo lnk)
  2009. {
  2010.   MCode *p = as->mctop;
  2011.   MCode *target;
  2012.   int32_t spadj = as->T->spadjust;
  2013.   if (spadj == 0) {
  2014.     as->mctop = --p;
  2015.   } else {
  2016.     /* Patch stack adjustment. */
  2017.     uint32_t k = emit_isk12(ARMI_ADD, spadj);
  2018.     lua_assert(k);
  2019.     p[-2] = (ARMI_ADD^k) | ARMF_D(RID_SP) | ARMF_N(RID_SP);
  2020.   }
  2021.   /* Patch exit branch. */
  2022.   target = lnk ? traceref(as->J, lnk)->mcode : (MCode *)lj_vm_exit_interp;
  2023.   p[-1] = ARMI_B|(((target-p)-1)&0x00ffffffu);
  2024. }

  2025. /* Prepare tail of code. */
  2026. static void asm_tail_prep(ASMState *as)
  2027. {
  2028.   MCode *p = as->mctop - 1/* Leave room for exit branch. */
  2029.   if (as->loopref) {
  2030.     as->invmcp = as->mcp = p;
  2031.   } else {
  2032.     as->mcp = p-1/* Leave room for stack pointer adjustment. */
  2033.     as->invmcp = NULL;
  2034.   }
  2035.   *p = 0/* Prevent load/store merging. */
  2036. }

  2037. /* -- Trace setup --------------------------------------------------------- */

  2038. /* Ensure there are enough stack slots for call arguments. */
  2039. static Reg asm_setup_call_slots(ASMState *as, IRIns *ir, const CCallInfo *ci)
  2040. {
  2041.   IRRef args[CCI_NARGS_MAX*2];
  2042.   uint32_t i, nargs = CCI_XNARGS(ci);
  2043.   int nslots = 0, ngpr = REGARG_NUMGPR, nfpr = REGARG_NUMFPR, fprodd = 0;
  2044.   asm_collectargs(as, ir, ci, args);
  2045.   for (i = 0; i < nargs; i++) {
  2046.     if (!LJ_SOFTFP && args[i] && irt_isfp(IR(args[i])->t)) {
  2047.       if (!LJ_ABI_SOFTFP && !(ci->flags & CCI_VARARG)) {
  2048.         if (irt_isnum(IR(args[i])->t)) {
  2049.           if (nfpr > 0) nfpr--;
  2050.           else fprodd = 0, nslots = (nslots + 3) & ~1;
  2051.         } else {
  2052.           if (fprodd) fprodd--;
  2053.           else if (nfpr > 0) fprodd = 1, nfpr--;
  2054.           else nslots++;
  2055.         }
  2056.       } else if (irt_isnum(IR(args[i])->t)) {
  2057.         ngpr &= ~1;
  2058.         if (ngpr > 0) ngpr -= 2; else nslots += 2;
  2059.       } else {
  2060.         if (ngpr > 0) ngpr--; else nslots++;
  2061.       }
  2062.     } else {
  2063.       if (ngpr > 0) ngpr--; else nslots++;
  2064.     }
  2065.   }
  2066.   if (nslots > as->evenspill)  /* Leave room for args in stack slots. */
  2067.     as->evenspill = nslots;
  2068.   return REGSP_HINT(RID_RET);
  2069. }

  2070. static void asm_setup_target(ASMState *as)
  2071. {
  2072.   /* May need extra exit for asm_stack_check on side traces. */
  2073.   asm_exitstub_setup(as, as->T->nsnap + (as->parent ? 1 : 0));
  2074. }

  2075. /* -- Trace patching ------------------------------------------------------ */

  2076. /* Patch exit jumps of existing machine code to a new target. */
  2077. void lj_asm_patchexit(jit_State *J, GCtrace *T, ExitNo exitno, MCode *target)
  2078. {
  2079.   MCode *p = T->mcode;
  2080.   MCode *pe = (MCode *)((char *)p + T->szmcode);
  2081.   MCode *cstart = NULL, *cend = p;
  2082.   MCode *mcarea = lj_mcode_patch(J, p, 0);
  2083.   MCode *px = exitstub_addr(J, exitno) - 2;
  2084.   for (; p < pe; p++) {
  2085.     /* Look for bl_cc exitstub, replace with b_cc target. */
  2086.     uint32_t ins = *p;
  2087.     if ((ins & 0x0f000000u) == 0x0b000000u && ins < 0xf0000000u &&
  2088.         ((ins ^ (px-p)) & 0x00ffffffu) == 0) {
  2089.       *p = (ins & 0xfe000000u) | (((target-p)-2) & 0x00ffffffu);
  2090.       cend = p+1;
  2091.       if (!cstart) cstart = p;
  2092.     }
  2093.   }
  2094.   lua_assert(cstart != NULL);
  2095.   lj_mcode_sync(cstart, cend);
  2096.   lj_mcode_patch(J, mcarea, 1);
  2097. }