src/lj_opt_split.c - luajit-2.0-src

Functions defined

Macros defined

Source code

  1. /*
  2. ** SPLIT: Split 64 bit IR instructions into 32 bit IR instructions.
  3. ** Copyright (C) 2005-2015 Mike Pall. See Copyright Notice in luajit.h
  4. */

  5. #define lj_opt_split_c
  6. #define LUA_CORE

  7. #include "lj_obj.h"

  8. #if LJ_HASJIT && (LJ_SOFTFP || (LJ_32 && LJ_HASFFI))

  9. #include "lj_err.h"
  10. #include "lj_buf.h"
  11. #include "lj_ir.h"
  12. #include "lj_jit.h"
  13. #include "lj_ircall.h"
  14. #include "lj_iropt.h"
  15. #include "lj_vm.h"

  16. /* SPLIT pass:
  17. **
  18. ** This pass splits up 64 bit IR instructions into multiple 32 bit IR
  19. ** instructions. It's only active for soft-float targets or for 32 bit CPUs
  20. ** which lack native 64 bit integer operations (the FFI is currently the
  21. ** only emitter for 64 bit integer instructions).
  22. **
  23. ** Splitting the IR in a separate pass keeps each 32 bit IR assembler
  24. ** backend simple. Only a small amount of extra functionality needs to be
  25. ** implemented. This is much easier than adding support for allocating
  26. ** register pairs to each backend (believe me, I tried). A few simple, but
  27. ** important optimizations can be performed by the SPLIT pass, which would
  28. ** be tedious to do in the backend.
  29. **
  30. ** The basic idea is to replace each 64 bit IR instruction with its 32 bit
  31. ** equivalent plus an extra HIOP instruction. The splitted IR is not passed
  32. ** through FOLD or any other optimizations, so each HIOP is guaranteed to
  33. ** immediately follow it's counterpart. The actual functionality of HIOP is
  34. ** inferred from the previous instruction.
  35. **
  36. ** The operands of HIOP hold the hiword input references. The output of HIOP
  37. ** is the hiword output reference, which is also used to hold the hiword
  38. ** register or spill slot information. The register allocator treats this
  39. ** instruction independently of any other instruction, which improves code
  40. ** quality compared to using fixed register pairs.
  41. **
  42. ** It's easier to split up some instructions into two regular 32 bit
  43. ** instructions. E.g. XLOAD is split up into two XLOADs with two different
  44. ** addresses. Obviously 64 bit constants need to be split up into two 32 bit
  45. ** constants, too. Some hiword instructions can be entirely omitted, e.g.
  46. ** when zero-extending a 32 bit value to 64 bits. 64 bit arguments for calls
  47. ** are split up into two 32 bit arguments each.
  48. **
  49. ** On soft-float targets, floating-point instructions are directly converted
  50. ** to soft-float calls by the SPLIT pass (except for comparisons and MIN/MAX).
  51. ** HIOP for number results has the type IRT_SOFTFP ("sfp" in -jdump).
  52. **
  53. ** Here's the IR and x64 machine code for 'x.b = x.a + 1' for a struct with
  54. ** two int64_t fields:
  55. **
  56. ** 0100    p32 ADD    base  +8
  57. ** 0101    i64 XLOAD  0100
  58. ** 0102    i64 ADD    0101  +1
  59. ** 0103    p32 ADD    base  +16
  60. ** 0104    i64 XSTORE 0103  0102
  61. **
  62. **         mov rax, [esi+0x8]
  63. **         add rax, +0x01
  64. **         mov [esi+0x10], rax
  65. **
  66. ** Here's the transformed IR and the x86 machine code after the SPLIT pass:
  67. **
  68. ** 0100    p32 ADD    base  +8
  69. ** 0101    int XLOAD  0100
  70. ** 0102    p32 ADD    base  +12
  71. ** 0103    int XLOAD  0102
  72. ** 0104    int ADD    0101  +1
  73. ** 0105    int HIOP   0103  +0
  74. ** 0106    p32 ADD    base  +16
  75. ** 0107    int XSTORE 0106  0104
  76. ** 0108    int HIOP   0106  0105
  77. **
  78. **         mov eax, [esi+0x8]
  79. **         mov ecx, [esi+0xc]
  80. **         add eax, +0x01
  81. **         adc ecx, +0x00
  82. **         mov [esi+0x10], eax
  83. **         mov [esi+0x14], ecx
  84. **
  85. ** You may notice the reassociated hiword address computation, which is
  86. ** later fused into the mov operands by the assembler.
  87. */

  88. /* Some local macros to save typing. Undef'd at the end. */
  89. #define IR(ref)                (&J->cur.ir[(ref)])

  90. /* Directly emit the transformed IR without updating chains etc. */
  91. static IRRef split_emit(jit_State *J, uint16_t ot, IRRef1 op1, IRRef1 op2)
  92. {
  93.   IRRef nref = lj_ir_nextins(J);
  94.   IRIns *ir = IR(nref);
  95.   ir->ot = ot;
  96.   ir->op1 = op1;
  97.   ir->op2 = op2;
  98.   return nref;
  99. }

  100. #if LJ_SOFTFP
  101. /* Emit a (checked) number to integer conversion. */
  102. static IRRef split_num2int(jit_State *J, IRRef lo, IRRef hi, int check)
  103. {
  104.   IRRef tmp, res;
  105. #if LJ_LE
  106.   tmp = split_emit(J, IRT(IR_CARG, IRT_NIL), lo, hi);
  107. #else
  108.   tmp = split_emit(J, IRT(IR_CARG, IRT_NIL), hi, lo);
  109. #endif
  110.   res = split_emit(J, IRTI(IR_CALLN), tmp, IRCALL_softfp_d2i);
  111.   if (check) {
  112.     tmp = split_emit(J, IRTI(IR_CALLN), res, IRCALL_softfp_i2d);
  113.     split_emit(J, IRT(IR_HIOP, IRT_SOFTFP), tmp, tmp);
  114.     split_emit(J, IRTGI(IR_EQ), tmp, lo);
  115.     split_emit(J, IRTG(IR_HIOP, IRT_SOFTFP), tmp+1, hi);
  116.   }
  117.   return res;
  118. }

  119. /* Emit a CALLN with one split 64 bit argument. */
  120. static IRRef split_call_l(jit_State *J, IRRef1 *hisubst, IRIns *oir,
  121.                           IRIns *ir, IRCallID id)
  122. {
  123.   IRRef tmp, op1 = ir->op1;
  124.   J->cur.nins--;
  125. #if LJ_LE
  126.   tmp = split_emit(J, IRT(IR_CARG, IRT_NIL), oir[op1].prev, hisubst[op1]);
  127. #else
  128.   tmp = split_emit(J, IRT(IR_CARG, IRT_NIL), hisubst[op1], oir[op1].prev);
  129. #endif
  130.   ir->prev = tmp = split_emit(J, IRTI(IR_CALLN), tmp, id);
  131.   return split_emit(J, IRT(IR_HIOP, IRT_SOFTFP), tmp, tmp);
  132. }
  133. #endif

  134. /* Emit a CALLN with one split 64 bit argument and a 32 bit argument. */
  135. static IRRef split_call_li(jit_State *J, IRRef1 *hisubst, IRIns *oir,
  136.                            IRIns *ir, IRCallID id)
  137. {
  138.   IRRef tmp, op1 = ir->op1, op2 = ir->op2;
  139.   J->cur.nins--;
  140. #if LJ_LE
  141.   tmp = split_emit(J, IRT(IR_CARG, IRT_NIL), oir[op1].prev, hisubst[op1]);
  142. #else
  143.   tmp = split_emit(J, IRT(IR_CARG, IRT_NIL), hisubst[op1], oir[op1].prev);
  144. #endif
  145.   tmp = split_emit(J, IRT(IR_CARG, IRT_NIL), tmp, oir[op2].prev);
  146.   ir->prev = tmp = split_emit(J, IRTI(IR_CALLN), tmp, id);
  147.   return split_emit(J, IRT(IR_HIOP, IRT_SOFTFP), tmp, tmp);
  148. }

  149. /* Emit a CALLN with two split 64 bit arguments. */
  150. static IRRef split_call_ll(jit_State *J, IRRef1 *hisubst, IRIns *oir,
  151.                            IRIns *ir, IRCallID id)
  152. {
  153.   IRRef tmp, op1 = ir->op1, op2 = ir->op2;
  154.   J->cur.nins--;
  155. #if LJ_LE
  156.   tmp = split_emit(J, IRT(IR_CARG, IRT_NIL), oir[op1].prev, hisubst[op1]);
  157.   tmp = split_emit(J, IRT(IR_CARG, IRT_NIL), tmp, oir[op2].prev);
  158.   tmp = split_emit(J, IRT(IR_CARG, IRT_NIL), tmp, hisubst[op2]);
  159. #else
  160.   tmp = split_emit(J, IRT(IR_CARG, IRT_NIL), hisubst[op1], oir[op1].prev);
  161.   tmp = split_emit(J, IRT(IR_CARG, IRT_NIL), tmp, hisubst[op2]);
  162.   tmp = split_emit(J, IRT(IR_CARG, IRT_NIL), tmp, oir[op2].prev);
  163. #endif
  164.   ir->prev = tmp = split_emit(J, IRTI(IR_CALLN), tmp, id);
  165.   return split_emit(J,
  166.     IRT(IR_HIOP, (LJ_SOFTFP && irt_isnum(ir->t)) ? IRT_SOFTFP : IRT_INT),
  167.     tmp, tmp);
  168. }

  169. /* Get a pointer to the other 32 bit word (LE: hiword, BE: loword). */
  170. static IRRef split_ptr(jit_State *J, IRIns *oir, IRRef ref)
  171. {
  172.   IRRef nref = oir[ref].prev;
  173.   IRIns *ir = IR(nref);
  174.   int32_t ofs = 4;
  175.   if (ir->o == IR_KPTR)
  176.     return lj_ir_kptr(J, (char *)ir_kptr(ir) + ofs);
  177.   if (ir->o == IR_ADD && irref_isk(ir->op2) && !irt_isphi(oir[ref].t)) {
  178.     /* Reassociate address. */
  179.     ofs += IR(ir->op2)->i;
  180.     nref = ir->op1;
  181.     if (ofs == 0) return nref;
  182.   }
  183.   return split_emit(J, IRTI(IR_ADD), nref, lj_ir_kint(J, ofs));
  184. }

  185. #if LJ_HASFFI
  186. static IRRef split_bitshift(jit_State *J, IRRef1 *hisubst,
  187.                             IRIns *oir, IRIns *nir, IRIns *ir)
  188. {
  189.   IROp op = ir->o;
  190.   IRRef kref = nir->op2;
  191.   if (irref_isk(kref)) {  /* Optimize constant shifts. */
  192.     int32_t k = (IR(kref)->i & 63);
  193.     IRRef lo = nir->op1, hi = hisubst[ir->op1];
  194.     if (op == IR_BROL || op == IR_BROR) {
  195.       if (op == IR_BROR) k = (-k & 63);
  196.       if (k >= 32) { IRRef t = lo; lo = hi; hi = t; k -= 32; }
  197.       if (k == 0) {
  198.       passthrough:
  199.         J->cur.nins--;
  200.         ir->prev = lo;
  201.         return hi;
  202.       } else {
  203.         TRef k1, k2;
  204.         IRRef t1, t2, t3, t4;
  205.         J->cur.nins--;
  206.         k1 = lj_ir_kint(J, k);
  207.         k2 = lj_ir_kint(J, (-k & 31));
  208.         t1 = split_emit(J, IRTI(IR_BSHL), lo, k1);
  209.         t2 = split_emit(J, IRTI(IR_BSHL), hi, k1);
  210.         t3 = split_emit(J, IRTI(IR_BSHR), lo, k2);
  211.         t4 = split_emit(J, IRTI(IR_BSHR), hi, k2);
  212.         ir->prev = split_emit(J, IRTI(IR_BOR), t1, t4);
  213.         return split_emit(J, IRTI(IR_BOR), t2, t3);
  214.       }
  215.     } else if (k == 0) {
  216.       goto passthrough;
  217.     } else if (k < 32) {
  218.       if (op == IR_BSHL) {
  219.         IRRef t1 = split_emit(J, IRTI(IR_BSHL), hi, kref);
  220.         IRRef t2 = split_emit(J, IRTI(IR_BSHR), lo, lj_ir_kint(J, (-k&31)));
  221.         return split_emit(J, IRTI(IR_BOR), t1, t2);
  222.       } else {
  223.         IRRef t1 = ir->prev, t2;
  224.         lua_assert(op == IR_BSHR || op == IR_BSAR);
  225.         nir->o = IR_BSHR;
  226.         t2 = split_emit(J, IRTI(IR_BSHL), hi, lj_ir_kint(J, (-k&31)));
  227.         ir->prev = split_emit(J, IRTI(IR_BOR), t1, t2);
  228.         return split_emit(J, IRTI(op), hi, kref);
  229.       }
  230.     } else {
  231.       if (op == IR_BSHL) {
  232.         if (k == 32)
  233.           J->cur.nins--;
  234.         else
  235.           lo = ir->prev;
  236.         ir->prev = lj_ir_kint(J, 0);
  237.         return lo;
  238.       } else {
  239.         lua_assert(op == IR_BSHR || op == IR_BSAR);
  240.         if (k == 32) {
  241.           J->cur.nins--;
  242.           ir->prev = hi;
  243.         } else {
  244.           nir->op1 = hi;
  245.         }
  246.         if (op == IR_BSHR)
  247.           return lj_ir_kint(J, 0);
  248.         else
  249.           return split_emit(J, IRTI(IR_BSAR), hi, lj_ir_kint(J, 31));
  250.       }
  251.     }
  252.   }
  253.   return split_call_li(J, hisubst, oir, ir,
  254.                        op - IR_BSHL + IRCALL_lj_carith_shl64);
  255. }

  256. static IRRef split_bitop(jit_State *J, IRRef1 *hisubst,
  257.                          IRIns *nir, IRIns *ir)
  258. {
  259.   IROp op = ir->o;
  260.   IRRef hi, kref = nir->op2;
  261.   if (irref_isk(kref)) {  /* Optimize bit operations with lo constant. */
  262.     int32_t k = IR(kref)->i;
  263.     if (k == 0 || k == -1) {
  264.       if (op == IR_BAND) k = ~k;
  265.       if (k == 0) {
  266.         J->cur.nins--;
  267.         ir->prev = nir->op1;
  268.       } else if (op == IR_BXOR) {
  269.         nir->o = IR_BNOT;
  270.         nir->op2 = 0;
  271.       } else {
  272.         J->cur.nins--;
  273.         ir->prev = kref;
  274.       }
  275.     }
  276.   }
  277.   hi = hisubst[ir->op1];
  278.   kref = hisubst[ir->op2];
  279.   if (irref_isk(kref)) {  /* Optimize bit operations with hi constant. */
  280.     int32_t k = IR(kref)->i;
  281.     if (k == 0 || k == -1) {
  282.       if (op == IR_BAND) k = ~k;
  283.       if (k == 0) {
  284.         return hi;
  285.       } else if (op == IR_BXOR) {
  286.         return split_emit(J, IRTI(IR_BNOT), hi, 0);
  287.       } else {
  288.         return kref;
  289.       }
  290.     }
  291.   }
  292.   return split_emit(J, IRTI(op), hi, kref);
  293. }
  294. #endif

  295. /* Substitute references of a snapshot. */
  296. static void split_subst_snap(jit_State *J, SnapShot *snap, IRIns *oir)
  297. {
  298.   SnapEntry *map = &J->cur.snapmap[snap->mapofs];
  299.   MSize n, nent = snap->nent;
  300.   for (n = 0; n < nent; n++) {
  301.     SnapEntry sn = map[n];
  302.     IRIns *ir = &oir[snap_ref(sn)];
  303.     if (!(LJ_SOFTFP && (sn & SNAP_SOFTFPNUM) && irref_isk(snap_ref(sn))))
  304.       map[n] = ((sn & 0xffff0000) | ir->prev);
  305.   }
  306. }

  307. /* Transform the old IR to the new IR. */
  308. static void split_ir(jit_State *J)
  309. {
  310.   IRRef nins = J->cur.nins, nk = J->cur.nk;
  311.   MSize irlen = nins - nk;
  312.   MSize need = (irlen+1)*(sizeof(IRIns) + sizeof(IRRef1));
  313.   IRIns *oir = (IRIns *)lj_buf_tmp(J->L, need);
  314.   IRRef1 *hisubst;
  315.   IRRef ref, snref;
  316.   SnapShot *snap;

  317.   /* Copy old IR to buffer. */
  318.   memcpy(oir, IR(nk), irlen*sizeof(IRIns));
  319.   /* Bias hiword substitution table and old IR. Loword kept in field prev. */
  320.   hisubst = (IRRef1 *)&oir[irlen] - nk;
  321.   oir -= nk;

  322.   /* Remove all IR instructions, but retain IR constants. */
  323.   J->cur.nins = REF_FIRST;
  324.   J->loopref = 0;

  325.   /* Process constants and fixed references. */
  326.   for (ref = nk; ref <= REF_BASE; ref++) {
  327.     IRIns *ir = &oir[ref];
  328.     if ((LJ_SOFTFP && ir->o == IR_KNUM) || ir->o == IR_KINT64) {
  329.       /* Split up 64 bit constant. */
  330.       TValue tv = *ir_k64(ir);
  331.       ir->prev = lj_ir_kint(J, (int32_t)tv.u32.lo);
  332.       hisubst[ref] = lj_ir_kint(J, (int32_t)tv.u32.hi);
  333.     } else {
  334.       ir->prev = ref;  /* Identity substitution for loword. */
  335.       hisubst[ref] = 0;
  336.     }
  337.   }

  338.   /* Process old IR instructions. */
  339.   snap = J->cur.snap;
  340.   snref = snap->ref;
  341.   for (ref = REF_FIRST; ref < nins; ref++) {
  342.     IRIns *ir = &oir[ref];
  343.     IRRef nref = lj_ir_nextins(J);
  344.     IRIns *nir = IR(nref);
  345.     IRRef hi = 0;

  346.     if (ref >= snref) {
  347.       snap->ref = nref;
  348.       split_subst_snap(J, snap++, oir);
  349.       snref = snap < &J->cur.snap[J->cur.nsnap] ? snap->ref : ~(IRRef)0;
  350.     }

  351.     /* Copy-substitute old instruction to new instruction. */
  352.     nir->op1 = ir->op1 < nk ? ir->op1 : oir[ir->op1].prev;
  353.     nir->op2 = ir->op2 < nk ? ir->op2 : oir[ir->op2].prev;
  354.     ir->prev = nref;  /* Loword substitution. */
  355.     nir->o = ir->o;
  356.     nir->t.irt = ir->t.irt & ~(IRT_MARK|IRT_ISPHI);
  357.     hisubst[ref] = 0;

  358.     /* Split 64 bit instructions. */
  359. #if LJ_SOFTFP
  360.     if (irt_isnum(ir->t)) {
  361.       nir->t.irt = IRT_INT | (nir->t.irt & IRT_GUARD);  /* Turn into INT op. */
  362.       /* Note: hi ref = lo ref + 1! Required for SNAP_SOFTFPNUM logic. */
  363.       switch (ir->o) {
  364.       case IR_ADD:
  365.         hi = split_call_ll(J, hisubst, oir, ir, IRCALL_softfp_add);
  366.         break;
  367.       case IR_SUB:
  368.         hi = split_call_ll(J, hisubst, oir, ir, IRCALL_softfp_sub);
  369.         break;
  370.       case IR_MUL:
  371.         hi = split_call_ll(J, hisubst, oir, ir, IRCALL_softfp_mul);
  372.         break;
  373.       case IR_DIV:
  374.         hi = split_call_ll(J, hisubst, oir, ir, IRCALL_softfp_div);
  375.         break;
  376.       case IR_POW:
  377.         hi = split_call_li(J, hisubst, oir, ir, IRCALL_lj_vm_powi);
  378.         break;
  379.       case IR_FPMATH:
  380.         /* Try to rejoin pow from EXP2, MUL and LOG2. */
  381.         if (nir->op2 == IRFPM_EXP2 && nir->op1 > J->loopref) {
  382.           IRIns *irp = IR(nir->op1);
  383.           if (irp->o == IR_CALLN && irp->op2 == IRCALL_softfp_mul) {
  384.             IRIns *irm4 = IR(irp->op1);
  385.             IRIns *irm3 = IR(irm4->op1);
  386.             IRIns *irm12 = IR(irm3->op1);
  387.             IRIns *irl1 = IR(irm12->op1);
  388.             if (irm12->op1 > J->loopref && irl1->o == IR_CALLN &&
  389.                 irl1->op2 == IRCALL_lj_vm_log2) {
  390.               IRRef tmp = irl1->op1;  /* Recycle first two args from LOG2. */
  391.               IRRef arg3 = irm3->op2, arg4 = irm4->op2;
  392.               J->cur.nins--;
  393.               tmp = split_emit(J, IRT(IR_CARG, IRT_NIL), tmp, arg3);
  394.               tmp = split_emit(J, IRT(IR_CARG, IRT_NIL), tmp, arg4);
  395.               ir->prev = tmp = split_emit(J, IRTI(IR_CALLN), tmp, IRCALL_pow);
  396.               hi = split_emit(J, IRT(IR_HIOP, IRT_SOFTFP), tmp, tmp);
  397.               break;
  398.             }
  399.           }
  400.         }
  401.         hi = split_call_l(J, hisubst, oir, ir, IRCALL_lj_vm_floor + ir->op2);
  402.         break;
  403.       case IR_ATAN2:
  404.         hi = split_call_ll(J, hisubst, oir, ir, IRCALL_atan2);
  405.         break;
  406.       case IR_LDEXP:
  407.         hi = split_call_li(J, hisubst, oir, ir, IRCALL_ldexp);
  408.         break;
  409.       case IR_NEG: case IR_ABS:
  410.         nir->o = IR_CONV;  /* Pass through loword. */
  411.         nir->op2 = (IRT_INT << 5) | IRT_INT;
  412.         hi = split_emit(J, IRT(ir->o == IR_NEG ? IR_BXOR : IR_BAND, IRT_SOFTFP),
  413.                         hisubst[ir->op1], hisubst[ir->op2]);
  414.         break;
  415.       case IR_SLOAD:
  416.         if ((nir->op2 & IRSLOAD_CONVERT)) {  /* Convert from int to number. */
  417.           nir->op2 &= ~IRSLOAD_CONVERT;
  418.           ir->prev = nref = split_emit(J, IRTI(IR_CALLN), nref,
  419.                                        IRCALL_softfp_i2d);
  420.           hi = split_emit(J, IRT(IR_HIOP, IRT_SOFTFP), nref, nref);
  421.           break;
  422.         }
  423.         /* fallthrough */
  424.       case IR_ALOAD: case IR_HLOAD: case IR_ULOAD: case IR_VLOAD:
  425.       case IR_STRTO:
  426.         hi = split_emit(J, IRT(IR_HIOP, IRT_SOFTFP), nref, nref);
  427.         break;
  428.       case IR_XLOAD: {
  429.         IRIns inslo = *nir;  /* Save/undo the emit of the lo XLOAD. */
  430.         J->cur.nins--;
  431.         hi = split_ptr(J, oir, ir->op1);  /* Insert the hiref ADD. */
  432.         nref = lj_ir_nextins(J);
  433.         nir = IR(nref);
  434.         *nir = inslo;  /* Re-emit lo XLOAD immediately before hi XLOAD. */
  435.         hi = split_emit(J, IRT(IR_XLOAD, IRT_SOFTFP), hi, ir->op2);
  436. #if LJ_LE
  437.         ir->prev = nref;
  438. #else
  439.         ir->prev = hi; hi = nref;
  440. #endif
  441.         break;
  442.         }
  443.       case IR_ASTORE: case IR_HSTORE: case IR_USTORE: case IR_XSTORE:
  444.         split_emit(J, IRT(IR_HIOP, IRT_SOFTFP), nir->op1, hisubst[ir->op2]);
  445.         break;
  446.       case IR_CONV: {  /* Conversion to number. Others handled below. */
  447.         IRType st = (IRType)(ir->op2 & IRCONV_SRCMASK);
  448.         UNUSED(st);
  449. #if LJ_32 && LJ_HASFFI
  450.         if (st == IRT_I64 || st == IRT_U64) {
  451.           hi = split_call_l(J, hisubst, oir, ir,
  452.                  st == IRT_I64 ? IRCALL_fp64_l2d : IRCALL_fp64_ul2d);
  453.           break;
  454.         }
  455. #endif
  456.         lua_assert(st == IRT_INT ||
  457.                    (LJ_32 && LJ_HASFFI && (st == IRT_U32 || st == IRT_FLOAT)));
  458.         nir->o = IR_CALLN;
  459. #if LJ_32 && LJ_HASFFI
  460.         nir->op2 = st == IRT_INT ? IRCALL_softfp_i2d :
  461.                    st == IRT_FLOAT ? IRCALL_softfp_f2d :
  462.                    IRCALL_softfp_ui2d;
  463. #else
  464.         nir->op2 = IRCALL_softfp_i2d;
  465. #endif
  466.         hi = split_emit(J, IRT(IR_HIOP, IRT_SOFTFP), nref, nref);
  467.         break;
  468.         }
  469.       case IR_CALLN:
  470.       case IR_CALLL:
  471.       case IR_CALLS:
  472.       case IR_CALLXS:
  473.         goto split_call;
  474.       case IR_PHI:
  475.         if (nir->op1 == nir->op2)
  476.           J->cur.nins--;  /* Drop useless PHIs. */
  477.         if (hisubst[ir->op1] != hisubst[ir->op2])
  478.           split_emit(J, IRT(IR_PHI, IRT_SOFTFP),
  479.                      hisubst[ir->op1], hisubst[ir->op2]);
  480.         break;
  481.       case IR_HIOP:
  482.         J->cur.nins--;  /* Drop joining HIOP. */
  483.         ir->prev = nir->op1;
  484.         hi = nir->op2;
  485.         break;
  486.       default:
  487.         lua_assert(ir->o <= IR_NE || ir->o == IR_MIN || ir->o == IR_MAX);
  488.         hi = split_emit(J, IRTG(IR_HIOP, IRT_SOFTFP),
  489.                         hisubst[ir->op1], hisubst[ir->op2]);
  490.         break;
  491.       }
  492.     } else
  493. #endif
  494. #if LJ_32 && LJ_HASFFI
  495.     if (irt_isint64(ir->t)) {
  496.       IRRef hiref = hisubst[ir->op1];
  497.       nir->t.irt = IRT_INT | (nir->t.irt & IRT_GUARD);  /* Turn into INT op. */
  498.       switch (ir->o) {
  499.       case IR_ADD:
  500.       case IR_SUB:
  501.         /* Use plain op for hiword if loword cannot produce a carry/borrow. */
  502.         if (irref_isk(nir->op2) && IR(nir->op2)->i == 0) {
  503.           ir->prev = nir->op1;  /* Pass through loword. */
  504.           nir->op1 = hiref; nir->op2 = hisubst[ir->op2];
  505.           hi = nref;
  506.           break;
  507.         }
  508.         /* fallthrough */
  509.       case IR_NEG:
  510.         hi = split_emit(J, IRTI(IR_HIOP), hiref, hisubst[ir->op2]);
  511.         break;
  512.       case IR_MUL:
  513.         hi = split_call_ll(J, hisubst, oir, ir, IRCALL_lj_carith_mul64);
  514.         break;
  515.       case IR_DIV:
  516.         hi = split_call_ll(J, hisubst, oir, ir,
  517.                            irt_isi64(ir->t) ? IRCALL_lj_carith_divi64 :
  518.                                               IRCALL_lj_carith_divu64);
  519.         break;
  520.       case IR_MOD:
  521.         hi = split_call_ll(J, hisubst, oir, ir,
  522.                            irt_isi64(ir->t) ? IRCALL_lj_carith_modi64 :
  523.                                               IRCALL_lj_carith_modu64);
  524.         break;
  525.       case IR_POW:
  526.         hi = split_call_ll(J, hisubst, oir, ir,
  527.                            irt_isi64(ir->t) ? IRCALL_lj_carith_powi64 :
  528.                                               IRCALL_lj_carith_powu64);
  529.         break;
  530.       case IR_BNOT:
  531.         hi = split_emit(J, IRTI(IR_BNOT), hiref, 0);
  532.         break;
  533.       case IR_BSWAP:
  534.         ir->prev = split_emit(J, IRTI(IR_BSWAP), hiref, 0);
  535.         hi = nref;
  536.         break;
  537.       case IR_BAND: case IR_BOR: case IR_BXOR:
  538.         hi = split_bitop(J, hisubst, nir, ir);
  539.         break;
  540.       case IR_BSHL: case IR_BSHR: case IR_BSAR: case IR_BROL: case IR_BROR:
  541.         hi = split_bitshift(J, hisubst, oir, nir, ir);
  542.         break;
  543.       case IR_FLOAD:
  544.         lua_assert(ir->op2 == IRFL_CDATA_INT64);
  545.         hi = split_emit(J, IRTI(IR_FLOAD), nir->op1, IRFL_CDATA_INT64_4);
  546. #if LJ_BE
  547.         ir->prev = hi; hi = nref;
  548. #endif
  549.         break;
  550.       case IR_XLOAD:
  551.         hi = split_emit(J, IRTI(IR_XLOAD), split_ptr(J, oir, ir->op1), ir->op2);
  552. #if LJ_BE
  553.         ir->prev = hi; hi = nref;
  554. #endif
  555.         break;
  556.       case IR_XSTORE:
  557.         split_emit(J, IRTI(IR_HIOP), nir->op1, hisubst[ir->op2]);
  558.         break;
  559.       case IR_CONV: {  /* Conversion to 64 bit integer. Others handled below. */
  560.         IRType st = (IRType)(ir->op2 & IRCONV_SRCMASK);
  561. #if LJ_SOFTFP
  562.         if (st == IRT_NUM) {  /* NUM to 64 bit int conv. */
  563.           hi = split_call_l(J, hisubst, oir, ir,
  564.                  irt_isi64(ir->t) ? IRCALL_fp64_d2l : IRCALL_fp64_d2ul);
  565.         } else if (st == IRT_FLOAT) {  /* FLOAT to 64 bit int conv. */
  566.           nir->o = IR_CALLN;
  567.           nir->op2 = irt_isi64(ir->t) ? IRCALL_fp64_f2l : IRCALL_fp64_f2ul;
  568.           hi = split_emit(J, IRTI(IR_HIOP), nref, nref);
  569.         }
  570. #else
  571.         if (st == IRT_NUM || st == IRT_FLOAT) {  /* FP to 64 bit int conv. */
  572.           hi = split_emit(J, IRTI(IR_HIOP), nir->op1, nref);
  573.         }
  574. #endif
  575.         else if (st == IRT_I64 || st == IRT_U64) {  /* 64/64 bit cast. */
  576.           /* Drop cast, since assembler doesn't care. */
  577.           goto fwdlo;
  578.         } else if ((ir->op2 & IRCONV_SEXT)) {  /* Sign-extend to 64 bit. */
  579.           IRRef k31 = lj_ir_kint(J, 31);
  580.           nir = IR(nref);  /* May have been reallocated. */
  581.           ir->prev = nir->op1;  /* Pass through loword. */
  582.           nir->o = IR_BSAR;  /* hi = bsar(lo, 31). */
  583.           nir->op2 = k31;
  584.           hi = nref;
  585.         } else/* Zero-extend to 64 bit. */
  586.           hi = lj_ir_kint(J, 0);
  587.           goto fwdlo;
  588.         }
  589.         break;
  590.         }
  591.       case IR_CALLXS:
  592.         goto split_call;
  593.       case IR_PHI: {
  594.         IRRef hiref2;
  595.         if ((irref_isk(nir->op1) && irref_isk(nir->op2)) ||
  596.             nir->op1 == nir->op2)
  597.           J->cur.nins--;  /* Drop useless PHIs. */
  598.         hiref2 = hisubst[ir->op2];
  599.         if (!((irref_isk(hiref) && irref_isk(hiref2)) || hiref == hiref2))
  600.           split_emit(J, IRTI(IR_PHI), hiref, hiref2);
  601.         break;
  602.         }
  603.       case IR_HIOP:
  604.         J->cur.nins--;  /* Drop joining HIOP. */
  605.         ir->prev = nir->op1;
  606.         hi = nir->op2;
  607.         break;
  608.       default:
  609.         lua_assert(ir->o <= IR_NE);  /* Comparisons. */
  610.         split_emit(J, IRTGI(IR_HIOP), hiref, hisubst[ir->op2]);
  611.         break;
  612.       }
  613.     } else
  614. #endif
  615. #if LJ_SOFTFP
  616.     if (ir->o == IR_SLOAD) {
  617.       if ((nir->op2 & IRSLOAD_CONVERT)) {  /* Convert from number to int. */
  618.         nir->op2 &= ~IRSLOAD_CONVERT;
  619.         if (!(nir->op2 & IRSLOAD_TYPECHECK))
  620.           nir->t.irt = IRT_INT;  /* Drop guard. */
  621.         split_emit(J, IRT(IR_HIOP, IRT_SOFTFP), nref, nref);
  622.         ir->prev = split_num2int(J, nref, nref+1, irt_isguard(ir->t));
  623.       }
  624.     } else if (ir->o == IR_TOBIT) {
  625.       IRRef tmp, op1 = ir->op1;
  626.       J->cur.nins--;
  627. #if LJ_LE
  628.       tmp = split_emit(J, IRT(IR_CARG, IRT_NIL), oir[op1].prev, hisubst[op1]);
  629. #else
  630.       tmp = split_emit(J, IRT(IR_CARG, IRT_NIL), hisubst[op1], oir[op1].prev);
  631. #endif
  632.       ir->prev = split_emit(J, IRTI(IR_CALLN), tmp, IRCALL_lj_vm_tobit);
  633.     } else if (ir->o == IR_TOSTR) {
  634.       if (hisubst[ir->op1]) {
  635.         if (irref_isk(ir->op1))
  636.           nir->op1 = ir->op1;
  637.         else
  638.           split_emit(J, IRT(IR_HIOP, IRT_NIL), hisubst[ir->op1], nref);
  639.       }
  640.     } else if (ir->o == IR_HREF || ir->o == IR_NEWREF) {
  641.       if (irref_isk(ir->op2) && hisubst[ir->op2])
  642.         nir->op2 = ir->op2;
  643.     } else
  644. #endif
  645.     if (ir->o == IR_CONV) {  /* See above, too. */
  646.       IRType st = (IRType)(ir->op2 & IRCONV_SRCMASK);
  647. #if LJ_32 && LJ_HASFFI
  648.       if (st == IRT_I64 || st == IRT_U64) {  /* Conversion from 64 bit int. */
  649. #if LJ_SOFTFP
  650.         if (irt_isfloat(ir->t)) {
  651.           split_call_l(J, hisubst, oir, ir,
  652.                        st == IRT_I64 ? IRCALL_fp64_l2f : IRCALL_fp64_ul2f);
  653.           J->cur.nins--;  /* Drop unused HIOP. */
  654.         }
  655. #else
  656.         if (irt_isfp(ir->t)) {  /* 64 bit integer to FP conversion. */
  657.           ir->prev = split_emit(J, IRT(IR_HIOP, irt_type(ir->t)),
  658.                                 hisubst[ir->op1], nref);
  659.         }
  660. #endif
  661.         else/* Truncate to lower 32 bits. */
  662.         fwdlo:
  663.           ir->prev = nir->op1;  /* Forward loword. */
  664.           /* Replace with NOP to avoid messing up the snapshot logic. */
  665.           nir->ot = IRT(IR_NOP, IRT_NIL);
  666.           nir->op1 = nir->op2 = 0;
  667.         }
  668.       }
  669. #endif
  670. #if LJ_SOFTFP && LJ_32 && LJ_HASFFI
  671.       else if (irt_isfloat(ir->t)) {
  672.         if (st == IRT_NUM) {
  673.           split_call_l(J, hisubst, oir, ir, IRCALL_softfp_d2f);
  674.           J->cur.nins--;  /* Drop unused HIOP. */
  675.         } else {
  676.           nir->o = IR_CALLN;
  677.           nir->op2 = st == IRT_INT ? IRCALL_softfp_i2f : IRCALL_softfp_ui2f;
  678.         }
  679.       } else if (st == IRT_FLOAT) {
  680.         nir->o = IR_CALLN;
  681.         nir->op2 = irt_isint(ir->t) ? IRCALL_softfp_f2i : IRCALL_softfp_f2ui;
  682.       } else
  683. #endif
  684. #if LJ_SOFTFP
  685.       if (st == IRT_NUM || (LJ_32 && LJ_HASFFI && st == IRT_FLOAT)) {
  686.         if (irt_isguard(ir->t)) {
  687.           lua_assert(st == IRT_NUM && irt_isint(ir->t));
  688.           J->cur.nins--;
  689.           ir->prev = split_num2int(J, nir->op1, hisubst[ir->op1], 1);
  690.         } else {
  691.           split_call_l(J, hisubst, oir, ir,
  692. #if LJ_32 && LJ_HASFFI
  693.             st == IRT_NUM ?
  694.               (irt_isint(ir->t) ? IRCALL_softfp_d2i : IRCALL_softfp_d2ui) :
  695.               (irt_isint(ir->t) ? IRCALL_softfp_f2i : IRCALL_softfp_f2ui)
  696. #else
  697.             IRCALL_softfp_d2i
  698. #endif
  699.           );
  700.           J->cur.nins--;  /* Drop unused HIOP. */
  701.         }
  702.       }
  703. #endif
  704.     } else if (ir->o == IR_CALLXS) {
  705.       IRRef hiref;
  706.     split_call:
  707.       hiref = hisubst[ir->op1];
  708.       if (hiref) {
  709.         IROpT ot = nir->ot;
  710.         IRRef op2 = nir->op2;
  711.         nir->ot = IRT(IR_CARG, IRT_NIL);
  712. #if LJ_LE
  713.         nir->op2 = hiref;
  714. #else
  715.         nir->op2 = nir->op1; nir->op1 = hiref;
  716. #endif
  717.         ir->prev = nref = split_emit(J, ot, nref, op2);
  718.       }
  719.       if (LJ_SOFTFP ? irt_is64(ir->t) : irt_isint64(ir->t))
  720.         hi = split_emit(J,
  721.           IRT(IR_HIOP, (LJ_SOFTFP && irt_isnum(ir->t)) ? IRT_SOFTFP : IRT_INT),
  722.           nref, nref);
  723.     } else if (ir->o == IR_CARG) {
  724.       IRRef hiref = hisubst[ir->op1];
  725.       if (hiref) {
  726.         IRRef op2 = nir->op2;
  727. #if LJ_LE
  728.         nir->op2 = hiref;
  729. #else
  730.         nir->op2 = nir->op1; nir->op1 = hiref;
  731. #endif
  732.         ir->prev = nref = split_emit(J, IRT(IR_CARG, IRT_NIL), nref, op2);
  733.         nir = IR(nref);
  734.       }
  735.       hiref = hisubst[ir->op2];
  736.       if (hiref) {
  737. #if !LJ_TARGET_X86
  738.         int carg = 0;
  739.         IRIns *cir;
  740.         for (cir = IR(nir->op1); cir->o == IR_CARG; cir = IR(cir->op1))
  741.           carg++;
  742.         if ((carg & 1) == 0) {  /* Align 64 bit arguments. */
  743.           IRRef op2 = nir->op2;
  744.           nir->op2 = REF_NIL;
  745.           nref = split_emit(J, IRT(IR_CARG, IRT_NIL), nref, op2);
  746.           nir = IR(nref);
  747.         }
  748. #endif
  749. #if LJ_BE
  750.         { IRRef tmp = nir->op2; nir->op2 = hiref; hiref = tmp; }
  751. #endif
  752.         ir->prev = split_emit(J, IRT(IR_CARG, IRT_NIL), nref, hiref);
  753.       }
  754.     } else if (ir->o == IR_CNEWI) {
  755.       if (hisubst[ir->op2])
  756.         split_emit(J, IRT(IR_HIOP, IRT_NIL), nref, hisubst[ir->op2]);
  757.     } else if (ir->o == IR_LOOP) {
  758.       J->loopref = nref;  /* Needed by assembler. */
  759.     }
  760.     hisubst[ref] = hi;  /* Store hiword substitution. */
  761.   }
  762.   if (snref == nins) {  /* Substitution for last snapshot. */
  763.     snap->ref = J->cur.nins;
  764.     split_subst_snap(J, snap, oir);
  765.   }

  766.   /* Add PHI marks. */
  767.   for (ref = J->cur.nins-1; ref >= REF_FIRST; ref--) {
  768.     IRIns *ir = IR(ref);
  769.     if (ir->o != IR_PHI) break;
  770.     if (!irref_isk(ir->op1)) irt_setphi(IR(ir->op1)->t);
  771.     if (ir->op2 > J->loopref) irt_setphi(IR(ir->op2)->t);
  772.   }
  773. }

  774. /* Protected callback for split pass. */
  775. static TValue *cpsplit(lua_State *L, lua_CFunction dummy, void *ud)
  776. {
  777.   jit_State *J = (jit_State *)ud;
  778.   split_ir(J);
  779.   UNUSED(L); UNUSED(dummy);
  780.   return NULL;
  781. }

  782. #if defined(LUA_USE_ASSERT) || LJ_SOFTFP
  783. /* Slow, but sure way to check whether a SPLIT pass is needed. */
  784. static int split_needsplit(jit_State *J)
  785. {
  786.   IRIns *ir, *irend;
  787.   IRRef ref;
  788.   for (ir = IR(REF_FIRST), irend = IR(J->cur.nins); ir < irend; ir++)
  789.     if (LJ_SOFTFP ? irt_is64orfp(ir->t) : irt_isint64(ir->t))
  790.       return 1;
  791.   if (LJ_SOFTFP) {
  792.     for (ref = J->chain[IR_SLOAD]; ref; ref = IR(ref)->prev)
  793.       if ((IR(ref)->op2 & IRSLOAD_CONVERT))
  794.         return 1;
  795.     if (J->chain[IR_TOBIT])
  796.       return 1;
  797.   }
  798.   for (ref = J->chain[IR_CONV]; ref; ref = IR(ref)->prev) {
  799.     IRType st = (IR(ref)->op2 & IRCONV_SRCMASK);
  800.     if ((LJ_SOFTFP && (st == IRT_NUM || st == IRT_FLOAT)) ||
  801.         st == IRT_I64 || st == IRT_U64)
  802.       return 1;
  803.   }
  804.   return 0/* Nope. */
  805. }
  806. #endif

  807. /* SPLIT pass. */
  808. void lj_opt_split(jit_State *J)
  809. {
  810. #if LJ_SOFTFP
  811.   if (!J->needsplit)
  812.     J->needsplit = split_needsplit(J);
  813. #else
  814.   lua_assert(J->needsplit >= split_needsplit(J));  /* Verify flag. */
  815. #endif
  816.   if (J->needsplit) {
  817.     int errcode = lj_vm_cpcall(J->L, NULL, J, cpsplit);
  818.     if (errcode) {
  819.       /* Completely reset the trace to avoid inconsistent dump on abort. */
  820.       J->cur.nins = J->cur.nk = REF_BASE;
  821.       J->cur.nsnap = 0;
  822.       lj_err_throw(J->L, errcode);  /* Propagate errors. */
  823.     }
  824.   }
  825. }

  826. #undef IR

  827. #endif