src/lj_jit.h - luajit-2.0-src

Global variables defined

Data types defined

Functions defined

Macros defined

Source code

  1. /*
  2. ** Common definitions for the JIT compiler.
  3. ** Copyright (C) 2005-2015 Mike Pall. See Copyright Notice in luajit.h
  4. */

  5. #ifndef _LJ_JIT_H
  6. #define _LJ_JIT_H

  7. #include "lj_obj.h"
  8. #include "lj_ir.h"

  9. /* JIT engine flags. */
  10. #define JIT_F_ON                0x00000001

  11. /* CPU-specific JIT engine flags. */
  12. #if LJ_TARGET_X86ORX64
  13. #define JIT_F_SSE2                0x00000010
  14. #define JIT_F_SSE3                0x00000020
  15. #define JIT_F_SSE4_1                0x00000040
  16. #define JIT_F_PREFER_IMUL        0x00000080
  17. #define JIT_F_LEA_AGU                0x00000100

  18. /* Names for the CPU-specific flags. Must match the order above. */
  19. #define JIT_F_CPU_FIRST                JIT_F_SSE2
  20. #define JIT_F_CPUSTRING                "\4SSE2\4SSE3\6SSE4.1\3AMD\4ATOM"
  21. #elif LJ_TARGET_ARM
  22. #define JIT_F_ARMV6_                0x00000010
  23. #define JIT_F_ARMV6T2_                0x00000020
  24. #define JIT_F_ARMV7                0x00000040
  25. #define JIT_F_VFPV2                0x00000080
  26. #define JIT_F_VFPV3                0x00000100

  27. #define JIT_F_ARMV6                (JIT_F_ARMV6_|JIT_F_ARMV6T2_|JIT_F_ARMV7)
  28. #define JIT_F_ARMV6T2                (JIT_F_ARMV6T2_|JIT_F_ARMV7)
  29. #define JIT_F_VFP                (JIT_F_VFPV2|JIT_F_VFPV3)

  30. /* Names for the CPU-specific flags. Must match the order above. */
  31. #define JIT_F_CPU_FIRST                JIT_F_ARMV6_
  32. #define JIT_F_CPUSTRING                "\5ARMv6\7ARMv6T2\5ARMv7\5VFPv2\5VFPv3"
  33. #elif LJ_TARGET_PPC
  34. #define JIT_F_SQRT                0x00000010
  35. #define JIT_F_ROUND                0x00000020

  36. /* Names for the CPU-specific flags. Must match the order above. */
  37. #define JIT_F_CPU_FIRST                JIT_F_SQRT
  38. #define JIT_F_CPUSTRING                "\4SQRT\5ROUND"
  39. #elif LJ_TARGET_MIPS
  40. #define JIT_F_MIPS32R2                0x00000010

  41. /* Names for the CPU-specific flags. Must match the order above. */
  42. #define JIT_F_CPU_FIRST                JIT_F_MIPS32R2
  43. #define JIT_F_CPUSTRING                "\010MIPS32R2"
  44. #else
  45. #define JIT_F_CPU_FIRST                0
  46. #define JIT_F_CPUSTRING                ""
  47. #endif

  48. /* Optimization flags. */
  49. #define JIT_F_OPT_MASK                0x0fff0000

  50. #define JIT_F_OPT_FOLD                0x00010000
  51. #define JIT_F_OPT_CSE                0x00020000
  52. #define JIT_F_OPT_DCE                0x00040000
  53. #define JIT_F_OPT_FWD                0x00080000
  54. #define JIT_F_OPT_DSE                0x00100000
  55. #define JIT_F_OPT_NARROW        0x00200000
  56. #define JIT_F_OPT_LOOP                0x00400000
  57. #define JIT_F_OPT_ABC                0x00800000
  58. #define JIT_F_OPT_SINK                0x01000000
  59. #define JIT_F_OPT_FUSE                0x02000000

  60. /* Optimizations names for -O. Must match the order above. */
  61. #define JIT_F_OPT_FIRST                JIT_F_OPT_FOLD
  62. #define JIT_F_OPTSTRING        \
  63.   "\4fold\3cse\3dce\3fwd\3dse\6narrow\4loop\3abc\4sink\4fuse"

  64. /* Optimization levels set a fixed combination of flags. */
  65. #define JIT_F_OPT_0        0
  66. #define JIT_F_OPT_1        (JIT_F_OPT_FOLD|JIT_F_OPT_CSE|JIT_F_OPT_DCE)
  67. #define JIT_F_OPT_2        (JIT_F_OPT_1|JIT_F_OPT_NARROW|JIT_F_OPT_LOOP)
  68. #define JIT_F_OPT_3        (JIT_F_OPT_2|\
  69.   JIT_F_OPT_FWD|JIT_F_OPT_DSE|JIT_F_OPT_ABC|JIT_F_OPT_SINK|JIT_F_OPT_FUSE)
  70. #define JIT_F_OPT_DEFAULT        JIT_F_OPT_3

  71. #if LJ_TARGET_WINDOWS || LJ_64
  72. /* See: http://blogs.msdn.com/oldnewthing/archive/2003/10/08/55239.aspx */
  73. #define JIT_P_sizemcode_DEFAULT                64
  74. #else
  75. /* Could go as low as 4K, but the mmap() overhead would be rather high. */
  76. #define JIT_P_sizemcode_DEFAULT                32
  77. #endif

  78. /* Optimization parameters and their defaults. Length is a char in octal! */
  79. #define JIT_PARAMDEF(_) \
  80.   _(\010, maxtrace,        1000)        /* Max. # of traces in cache. */ \
  81.   _(\011, maxrecord,        4000)        /* Max. # of recorded IR instructions. */ \
  82.   _(\012, maxirconst,        500)        /* Max. # of IR constants of a trace. */ \
  83.   _(\007, maxside,        100)        /* Max. # of side traces of a root trace. */ \
  84.   _(\007, maxsnap,        500)        /* Max. # of snapshots for a trace. */ \
  85.   _(\011, minstitch,        0)        /* Min. # of IR ins for a stitched trace. */ \
  86.   \
  87.   _(\007, hotloop,        56)        /* # of iter. to detect a hot loop/call. */ \
  88.   _(\007, hotexit,        10)        /* # of taken exits to start a side trace. */ \
  89.   _(\007, tryside,        4)        /* # of attempts to compile a side trace. */ \
  90.   \
  91.   _(\012, instunroll,        4)        /* Max. unroll for instable loops. */ \
  92.   _(\012, loopunroll,        15)        /* Max. unroll for loop ops in side traces. */ \
  93.   _(\012, callunroll,        3)        /* Max. unroll for recursive calls. */ \
  94.   _(\011, recunroll,        2)        /* Min. unroll for true recursion. */ \
  95.   \
  96.   /* Size of each machine code area (in KBytes). */ \
  97.   _(\011, sizemcode,        JIT_P_sizemcode_DEFAULT) \
  98.   /* Max. total size of all machine code areas (in KBytes). */ \
  99.   _(\010, maxmcode,        512) \
  100.   /* End of list. */

  101. enum {
  102. #define JIT_PARAMENUM(len, name, value)        JIT_P_##name,
  103. JIT_PARAMDEF(JIT_PARAMENUM)
  104. #undef JIT_PARAMENUM
  105.   JIT_P__MAX
  106. };

  107. #define JIT_PARAMSTR(len, name, value)        #len #name
  108. #define JIT_P_STRING        JIT_PARAMDEF(JIT_PARAMSTR)

  109. /* Trace compiler state. */
  110. typedef enum {
  111.   LJ_TRACE_IDLE,        /* Trace compiler idle. */
  112.   LJ_TRACE_ACTIVE = 0x10,
  113.   LJ_TRACE_RECORD,        /* Bytecode recording active. */
  114.   LJ_TRACE_START,        /* New trace started. */
  115.   LJ_TRACE_END,                /* End of trace. */
  116.   LJ_TRACE_ASM,                /* Assemble trace. */
  117.   LJ_TRACE_ERR                /* Trace aborted with error. */
  118. } TraceState;

  119. /* Post-processing action. */
  120. typedef enum {
  121.   LJ_POST_NONE,                /* No action. */
  122.   LJ_POST_FIXCOMP,        /* Fixup comparison and emit pending guard. */
  123.   LJ_POST_FIXGUARD,        /* Fixup and emit pending guard. */
  124.   LJ_POST_FIXGUARDSNAP,        /* Fixup and emit pending guard and snapshot. */
  125.   LJ_POST_FIXBOOL,        /* Fixup boolean result. */
  126.   LJ_POST_FIXCONST,        /* Fixup constant results. */
  127.   LJ_POST_FFRETRY        /* Suppress recording of retried fast functions. */
  128. } PostProc;

  129. /* Machine code type. */
  130. #if LJ_TARGET_X86ORX64
  131. typedef uint8_t MCode;
  132. #else
  133. typedef uint32_t MCode;
  134. #endif

  135. /* Stack snapshot header. */
  136. typedef struct SnapShot {
  137.   uint16_t mapofs;        /* Offset into snapshot map. */
  138.   IRRef1 ref;                /* First IR ref for this snapshot. */
  139.   uint8_t nslots;        /* Number of valid slots. */
  140.   uint8_t topslot;        /* Maximum frame extent. */
  141.   uint8_t nent;                /* Number of compressed entries. */
  142.   uint8_t count;        /* Count of taken exits for this snapshot. */
  143. } SnapShot;

  144. #define SNAPCOUNT_DONE        255        /* Already compiled and linked a side trace. */

  145. /* Compressed snapshot entry. */
  146. typedef uint32_t SnapEntry;

  147. #define SNAP_FRAME                0x010000        /* Frame slot. */
  148. #define SNAP_CONT                0x020000        /* Continuation slot. */
  149. #define SNAP_NORESTORE                0x040000        /* No need to restore slot. */
  150. #define SNAP_SOFTFPNUM                0x080000        /* Soft-float number. */
  151. LJ_STATIC_ASSERT(SNAP_FRAME == TREF_FRAME);
  152. LJ_STATIC_ASSERT(SNAP_CONT == TREF_CONT);

  153. #define SNAP(slot, flags, ref)        (((SnapEntry)(slot) << 24) + (flags) + (ref))
  154. #define SNAP_TR(slot, tr) \
  155.   (((SnapEntry)(slot) << 24) + ((tr) & (TREF_CONT|TREF_FRAME|TREF_REFMASK)))
  156. #define SNAP_MKPC(pc)                ((SnapEntry)u32ptr(pc))
  157. #define SNAP_MKFTSZ(ftsz)        ((SnapEntry)(ftsz))
  158. #define snap_ref(sn)                ((sn) & 0xffff)
  159. #define snap_slot(sn)                ((BCReg)((sn) >> 24))
  160. #define snap_isframe(sn)        ((sn) & SNAP_FRAME)
  161. #define snap_pc(sn)                ((const BCIns *)(uintptr_t)(sn))
  162. #define snap_setref(sn, ref)        (((sn) & (0xffff0000&~SNAP_NORESTORE)) | (ref))

  163. /* Snapshot and exit numbers. */
  164. typedef uint32_t SnapNo;
  165. typedef uint32_t ExitNo;

  166. /* Trace number. */
  167. typedef uint32_t TraceNo;        /* Used to pass around trace numbers. */
  168. typedef uint16_t TraceNo1;        /* Stored trace number. */

  169. /* Type of link. ORDER LJ_TRLINK */
  170. typedef enum {
  171.   LJ_TRLINK_NONE,                /* Incomplete trace. No link, yet. */
  172.   LJ_TRLINK_ROOT,                /* Link to other root trace. */
  173.   LJ_TRLINK_LOOP,                /* Loop to same trace. */
  174.   LJ_TRLINK_TAILREC,                /* Tail-recursion. */
  175.   LJ_TRLINK_UPREC,                /* Up-recursion. */
  176.   LJ_TRLINK_DOWNREC,                /* Down-recursion. */
  177.   LJ_TRLINK_INTERP,                /* Fallback to interpreter. */
  178.   LJ_TRLINK_RETURN,                /* Return to interpreter. */
  179.   LJ_TRLINK_STITCH                /* Trace stitching. */
  180. } TraceLink;

  181. /* Trace object. */
  182. typedef struct GCtrace {
  183.   GCHeader;
  184.   uint8_t topslot;        /* Top stack slot already checked to be allocated. */
  185.   uint8_t linktype;        /* Type of link. */
  186.   IRRef nins;                /* Next IR instruction. Biased with REF_BIAS. */
  187. #if LJ_GC64
  188.   uint32_t unused_gc64;
  189. #endif
  190.   GCRef gclist;
  191.   IRIns *ir;                /* IR instructions/constants. Biased with REF_BIAS. */
  192.   IRRef nk;                /* Lowest IR constant. Biased with REF_BIAS. */
  193.   uint16_t nsnap;        /* Number of snapshots. */
  194.   uint16_t nsnapmap;        /* Number of snapshot map elements. */
  195.   SnapShot *snap;        /* Snapshot array. */
  196.   SnapEntry *snapmap;        /* Snapshot map. */
  197.   GCRef startpt;        /* Starting prototype. */
  198.   MRef startpc;                /* Bytecode PC of starting instruction. */
  199.   BCIns startins;        /* Original bytecode of starting instruction. */
  200.   MSize szmcode;        /* Size of machine code. */
  201.   MCode *mcode;                /* Start of machine code. */
  202.   MSize mcloop;                /* Offset of loop start in machine code. */
  203.   uint16_t nchild;        /* Number of child traces (root trace only). */
  204.   uint16_t spadjust;        /* Stack pointer adjustment (offset in bytes). */
  205.   TraceNo1 traceno;        /* Trace number. */
  206.   TraceNo1 link;        /* Linked trace (or self for loops). */
  207.   TraceNo1 root;        /* Root trace of side trace (or 0 for root traces). */
  208.   TraceNo1 nextroot;        /* Next root trace for same prototype. */
  209.   TraceNo1 nextside;        /* Next side trace of same root trace. */
  210.   uint8_t sinktags;        /* Trace has SINK tags. */
  211.   uint8_t unused1;
  212. #ifdef LUAJIT_USE_GDBJIT
  213.   void *gdbjit_entry;        /* GDB JIT entry. */
  214. #endif
  215. } GCtrace;

  216. #define gco2trace(o)        check_exp((o)->gch.gct == ~LJ_TTRACE, (GCtrace *)(o))
  217. #define traceref(J, n) \
  218.   check_exp((n)>0 && (MSize)(n)<J->sizetrace, (GCtrace *)gcref(J->trace[(n)]))

  219. LJ_STATIC_ASSERT(offsetof(GChead, gclist) == offsetof(GCtrace, gclist));

  220. static LJ_AINLINE MSize snap_nextofs(GCtrace *T, SnapShot *snap)
  221. {
  222.   if (snap+1 == &T->snap[T->nsnap])
  223.     return T->nsnapmap;
  224.   else
  225.     return (snap+1)->mapofs;
  226. }

  227. /* Round-robin penalty cache for bytecodes leading to aborted traces. */
  228. typedef struct HotPenalty {
  229.   MRef pc;                /* Starting bytecode PC. */
  230.   uint16_t val;                /* Penalty value, i.e. hotcount start. */
  231.   uint16_t reason;        /* Abort reason (really TraceErr). */
  232. } HotPenalty;

  233. #define PENALTY_SLOTS        64        /* Penalty cache slot. Must be a power of 2. */
  234. #define PENALTY_MIN        (36*2)        /* Minimum penalty value. */
  235. #define PENALTY_MAX        60000        /* Maximum penalty value. */
  236. #define PENALTY_RNDBITS        4        /* # of random bits to add to penalty value. */

  237. /* Round-robin backpropagation cache for narrowing conversions. */
  238. typedef struct BPropEntry {
  239.   IRRef1 key;                /* Key: original reference. */
  240.   IRRef1 val;                /* Value: reference after conversion. */
  241.   IRRef mode;                /* Mode for this entry (currently IRCONV_*). */
  242. } BPropEntry;

  243. /* Number of slots for the backpropagation cache. Must be a power of 2. */
  244. #define BPROP_SLOTS        16

  245. /* Scalar evolution analysis cache. */
  246. typedef struct ScEvEntry {
  247.   MRef pc;                /* Bytecode PC of FORI. */
  248.   IRRef1 idx;                /* Index reference. */
  249.   IRRef1 start;                /* Constant start reference. */
  250.   IRRef1 stop;                /* Constant stop reference. */
  251.   IRRef1 step;                /* Constant step reference. */
  252.   IRType1 t;                /* Scalar type. */
  253.   uint8_t dir;                /* Direction. 1: +, 0: -. */
  254. } ScEvEntry;

  255. /* 128 bit SIMD constants. */
  256. enum {
  257.   LJ_KSIMD_ABS,
  258.   LJ_KSIMD_NEG,
  259.   LJ_KSIMD__MAX
  260. };

  261. /* Get 16 byte aligned pointer to SIMD constant. */
  262. #define LJ_KSIMD(J, n) \
  263.   ((TValue *)(((intptr_t)&J->ksimd[2*(n)] + 15) & ~(intptr_t)15))

  264. /* Set/reset flag to activate the SPLIT pass for the current trace. */
  265. #if LJ_SOFTFP || (LJ_32 && LJ_HASFFI)
  266. #define lj_needsplit(J)                (J->needsplit = 1)
  267. #define lj_resetsplit(J)        (J->needsplit = 0)
  268. #else
  269. #define lj_needsplit(J)                UNUSED(J)
  270. #define lj_resetsplit(J)        UNUSED(J)
  271. #endif

  272. /* Fold state is used to fold instructions on-the-fly. */
  273. typedef struct FoldState {
  274.   IRIns ins;                /* Currently emitted instruction. */
  275.   IRIns left;                /* Instruction referenced by left operand. */
  276.   IRIns right;                /* Instruction referenced by right operand. */
  277. } FoldState;

  278. /* JIT compiler state. */
  279. typedef struct jit_State {
  280.   GCtrace cur;                /* Current trace. */

  281.   lua_State *L;                /* Current Lua state. */
  282.   const BCIns *pc;        /* Current PC. */
  283.   GCfunc *fn;                /* Current function. */
  284.   GCproto *pt;                /* Current prototype. */
  285.   TRef *base;                /* Current frame base, points into J->slots. */

  286.   uint32_t flags;        /* JIT engine flags. */
  287.   BCReg maxslot;        /* Relative to baseslot. */
  288.   BCReg baseslot;        /* Current frame base, offset into J->slots. */

  289.   uint8_t mergesnap;        /* Allowed to merge with next snapshot. */
  290.   uint8_t needsnap;        /* Need snapshot before recording next bytecode. */
  291.   IRType1 guardemit;        /* Accumulated IRT_GUARD for emitted instructions. */
  292.   uint8_t bcskip;        /* Number of bytecode instructions to skip. */

  293.   FoldState fold;        /* Fold state. */

  294.   const BCIns *bc_min;        /* Start of allowed bytecode range for root trace. */
  295.   MSize bc_extent;        /* Extent of the range. */

  296.   TraceState state;        /* Trace compiler state. */

  297.   int32_t instunroll;        /* Unroll counter for instable loops. */
  298.   int32_t loopunroll;        /* Unroll counter for loop ops in side traces. */
  299.   int32_t tailcalled;        /* Number of successive tailcalls. */
  300.   int32_t framedepth;        /* Current frame depth. */
  301.   int32_t retdepth;        /* Return frame depth (count of RETF). */

  302.   MRef k64;                /* Pointer to chained array of 64 bit constants. */
  303.   TValue ksimd[LJ_KSIMD__MAX*2+1];  /* 16 byte aligned SIMD constants. */

  304.   IRIns *irbuf;                /* Temp. IR instruction buffer. Biased with REF_BIAS. */
  305.   IRRef irtoplim;        /* Upper limit of instuction buffer (biased). */
  306.   IRRef irbotlim;        /* Lower limit of instuction buffer (biased). */
  307.   IRRef loopref;        /* Last loop reference or ref of final LOOP (or 0). */

  308.   MSize sizesnap;        /* Size of temp. snapshot buffer. */
  309.   SnapShot *snapbuf;        /* Temp. snapshot buffer. */
  310.   SnapEntry *snapmapbuf;  /* Temp. snapshot map buffer. */
  311.   MSize sizesnapmap;        /* Size of temp. snapshot map buffer. */

  312.   PostProc postproc;        /* Required post-processing after execution. */
  313. #if LJ_SOFTFP || (LJ_32 && LJ_HASFFI)
  314.   int needsplit;        /* Need SPLIT pass. */
  315. #endif

  316.   GCRef *trace;                /* Array of traces. */
  317.   TraceNo freetrace;        /* Start of scan for next free trace. */
  318.   MSize sizetrace;        /* Size of trace array. */

  319.   IRRef1 chain[IR__MAX];  /* IR instruction skip-list chain anchors. */
  320.   TRef slot[LJ_MAX_JSLOTS+LJ_STACK_EXTRA];  /* Stack slot map. */

  321.   int32_t param[JIT_P__MAX];  /* JIT engine parameters. */

  322.   MCode *exitstubgroup[LJ_MAX_EXITSTUBGR];  /* Exit stub group addresses. */

  323.   HotPenalty penalty[PENALTY_SLOTS];  /* Penalty slots. */
  324.   uint32_t penaltyslot;        /* Round-robin index into penalty slots. */
  325.   uint32_t prngstate;        /* PRNG state. */

  326.   BPropEntry bpropcache[BPROP_SLOTS];  /* Backpropagation cache slots. */
  327.   uint32_t bpropslot;        /* Round-robin index into bpropcache slots. */

  328.   ScEvEntry scev;        /* Scalar evolution analysis cache slots. */

  329.   const BCIns *startpc;        /* Bytecode PC of starting instruction. */
  330.   TraceNo parent;        /* Parent of current side trace (0 for root traces). */
  331.   ExitNo exitno;        /* Exit number in parent of current side trace. */

  332.   BCIns *patchpc;        /* PC for pending re-patch. */
  333.   BCIns patchins;        /* Instruction for pending re-patch. */

  334.   int mcprot;                /* Protection of current mcode area. */
  335.   MCode *mcarea;        /* Base of current mcode area. */
  336.   MCode *mctop;                /* Top of current mcode area. */
  337.   MCode *mcbot;                /* Bottom of current mcode area. */
  338.   size_t szmcarea;        /* Size of current mcode area. */
  339.   size_t szallmcarea;        /* Total size of all allocated mcode areas. */

  340.   TValue errinfo;        /* Additional info element for trace errors. */

  341. #if LJ_HASPROFILE
  342.   GCproto *prev_pt;        /* Previous prototype. */
  343.   BCLine prev_line;        /* Previous line. */
  344.   int prof_mode;        /* Profiling mode: 0, 'f', 'l'. */
  345. #endif
  346. }
  347. #if LJ_TARGET_ARM
  348. LJ_ALIGN(16)                /* For DISPATCH-relative addresses in assembler part. */
  349. #endif
  350. jit_State;

  351. /* Trivial PRNG e.g. used for penalty randomization. */
  352. static LJ_AINLINE uint32_t LJ_PRNG_BITS(jit_State *J, int bits)
  353. {
  354.   /* Yes, this LCG is very weak, but that doesn't matter for our use case. */
  355.   J->prngstate = J->prngstate * 1103515245 + 12345;
  356.   return J->prngstate >> (32-bits);
  357. }

  358. #endif