src/lj_target_x86.h - luajit-2.0-src

Data types defined

Macros defined

Source code

  1. /*
  2. ** Definitions for x86 and x64 CPUs.
  3. ** Copyright (C) 2005-2015 Mike Pall. See Copyright Notice in luajit.h
  4. */

  5. #ifndef _LJ_TARGET_X86_H
  6. #define _LJ_TARGET_X86_H

  7. /* -- Registers IDs ------------------------------------------------------- */

  8. #if LJ_64
  9. #define GPRDEF(_) \
  10.   _(EAX) _(ECX) _(EDX) _(EBX) _(ESP) _(EBP) _(ESI) _(EDI) \
  11.   _(R8D) _(R9D) _(R10D) _(R11D) _(R12D) _(R13D) _(R14D) _(R15D)
  12. #define FPRDEF(_) \
  13.   _(XMM0) _(XMM1) _(XMM2) _(XMM3) _(XMM4) _(XMM5) _(XMM6) _(XMM7) \
  14.   _(XMM8) _(XMM9) _(XMM10) _(XMM11) _(XMM12) _(XMM13) _(XMM14) _(XMM15)
  15. #else
  16. #define GPRDEF(_) \
  17.   _(EAX) _(ECX) _(EDX) _(EBX) _(ESP) _(EBP) _(ESI) _(EDI)
  18. #define FPRDEF(_) \
  19.   _(XMM0) _(XMM1) _(XMM2) _(XMM3) _(XMM4) _(XMM5) _(XMM6) _(XMM7)
  20. #endif
  21. #define VRIDDEF(_) \
  22.   _(MRM)

  23. #define RIDENUM(name)        RID_##name,

  24. enum {
  25.   GPRDEF(RIDENUM)                /* General-purpose registers (GPRs). */
  26.   FPRDEF(RIDENUM)                /* Floating-point registers (FPRs). */
  27.   RID_MAX,
  28.   RID_MRM = RID_MAX,                /* Pseudo-id for ModRM operand. */

  29.   /* Calling conventions. */
  30.   RID_SP = RID_ESP,
  31.   RID_RET = RID_EAX,
  32. #if LJ_64
  33.   RID_FPRET = RID_XMM0,
  34. #else
  35.   RID_RETLO = RID_EAX,
  36.   RID_RETHI = RID_EDX,
  37. #endif

  38.   /* These definitions must match with the *.dasc file(s): */
  39.   RID_BASE = RID_EDX,                /* Interpreter BASE. */
  40. #if LJ_64 && !LJ_ABI_WIN
  41.   RID_LPC = RID_EBX,                /* Interpreter PC. */
  42.   RID_DISPATCH = RID_R14D,        /* Interpreter DISPATCH table. */
  43. #else
  44.   RID_LPC = RID_ESI,                /* Interpreter PC. */
  45.   RID_DISPATCH = RID_EBX,        /* Interpreter DISPATCH table. */
  46. #endif

  47.   /* Register ranges [min, max) and number of registers. */
  48.   RID_MIN_GPR = RID_EAX,
  49.   RID_MIN_FPR = RID_XMM0,
  50.   RID_MAX_GPR = RID_MIN_FPR,
  51.   RID_MAX_FPR = RID_MAX,
  52.   RID_NUM_GPR = RID_MAX_GPR - RID_MIN_GPR,
  53.   RID_NUM_FPR = RID_MAX_FPR - RID_MIN_FPR,
  54. };

  55. /* -- Register sets ------------------------------------------------------- */

  56. /* Make use of all registers, except the stack pointer. */
  57. #define RSET_GPR        (RSET_RANGE(RID_MIN_GPR, RID_MAX_GPR)-RID2RSET(RID_ESP))
  58. #define RSET_FPR        (RSET_RANGE(RID_MIN_FPR, RID_MAX_FPR))
  59. #define RSET_ALL        (RSET_GPR|RSET_FPR)
  60. #define RSET_INIT        RSET_ALL

  61. #if LJ_64
  62. /* Note: this requires the use of FORCE_REX! */
  63. #define RSET_GPR8        RSET_GPR
  64. #else
  65. #define RSET_GPR8        (RSET_RANGE(RID_EAX, RID_EBX+1))
  66. #endif

  67. /* ABI-specific register sets. */
  68. #define RSET_ACD        (RID2RSET(RID_EAX)|RID2RSET(RID_ECX)|RID2RSET(RID_EDX))
  69. #if LJ_64
  70. #if LJ_ABI_WIN
  71. /* Windows x64 ABI. */
  72. #define RSET_SCRATCH \
  73.   (RSET_ACD|RSET_RANGE(RID_R8D, RID_R11D+1)|RSET_RANGE(RID_XMM0, RID_XMM5+1))
  74. #define REGARG_GPRS \
  75.   (RID_ECX|((RID_EDX|((RID_R8D|(RID_R9D<<5))<<5))<<5))
  76. #define REGARG_NUMGPR        4
  77. #define REGARG_NUMFPR        4
  78. #define REGARG_FIRSTFPR        RID_XMM0
  79. #define REGARG_LASTFPR        RID_XMM3
  80. #define STACKARG_OFS        (4*8)
  81. #else
  82. /* The rest of the civilized x64 world has a common ABI. */
  83. #define RSET_SCRATCH \
  84.   (RSET_ACD|RSET_RANGE(RID_ESI, RID_R11D+1)|RSET_FPR)
  85. #define REGARG_GPRS \
  86.   (RID_EDI|((RID_ESI|((RID_EDX|((RID_ECX|((RID_R8D|(RID_R9D \
  87.    <<5))<<5))<<5))<<5))<<5))
  88. #define REGARG_NUMGPR        6
  89. #define REGARG_NUMFPR        8
  90. #define REGARG_FIRSTFPR        RID_XMM0
  91. #define REGARG_LASTFPR        RID_XMM7
  92. #define STACKARG_OFS        0
  93. #endif
  94. #else
  95. /* Common x86 ABI. */
  96. #define RSET_SCRATCH        (RSET_ACD|RSET_FPR)
  97. #define REGARG_GPRS        (RID_ECX|(RID_EDX<<5))  /* Fastcall only. */
  98. #define REGARG_NUMGPR        2  /* Fastcall only. */
  99. #define REGARG_NUMFPR        0
  100. #define STACKARG_OFS        0
  101. #endif

  102. #if LJ_64
  103. /* Prefer the low 8 regs of each type to reduce REX prefixes. */
  104. #undef rset_picktop
  105. #define rset_picktop(rs)        (lj_fls(lj_bswap(rs)) ^ 0x18)
  106. #endif

  107. /* -- Spill slots --------------------------------------------------------- */

  108. /* Spill slots are 32 bit wide. An even/odd pair is used for FPRs.
  109. **
  110. ** SPS_FIXED: Available fixed spill slots in interpreter frame.
  111. ** This definition must match with the *.dasc file(s).
  112. **
  113. ** SPS_FIRST: First spill slot for general use. Reserve min. two 32 bit slots.
  114. */
  115. #if LJ_64
  116. #if LJ_ABI_WIN
  117. #define SPS_FIXED        (4*2)
  118. #define SPS_FIRST        (4*2)        /* Don't use callee register save area. */
  119. #else
  120. #define SPS_FIXED        4
  121. #define SPS_FIRST        2
  122. #endif
  123. #else
  124. #define SPS_FIXED        6
  125. #define SPS_FIRST        2
  126. #endif

  127. #define SPOFS_TMP        0

  128. #define sps_scale(slot)                (4 * (int32_t)(slot))
  129. #define sps_align(slot)                (((slot) - SPS_FIXED + 3) & ~3)

  130. /* -- Exit state ---------------------------------------------------------- */

  131. /* This definition must match with the *.dasc file(s). */
  132. typedef struct {
  133.   lua_Number fpr[RID_NUM_FPR];        /* Floating-point registers. */
  134.   intptr_t gpr[RID_NUM_GPR];        /* General-purpose registers. */
  135.   int32_t spill[256];                /* Spill slots. */
  136. } ExitState;

  137. /* Limited by the range of a short fwd jump (127): (2+2)*(32-1)-2 = 122. */
  138. #define EXITSTUB_SPACING        (2+2)
  139. #define EXITSTUBS_PER_GROUP        32

  140. /* -- x86 ModRM operand encoding ------------------------------------------ */

  141. typedef enum {
  142.   XM_OFS0 = 0x00, XM_OFS8 = 0x40, XM_OFS32 = 0x80, XM_REG = 0xc0,
  143.   XM_SCALE1 = 0x00, XM_SCALE2 = 0x40, XM_SCALE4 = 0x80, XM_SCALE8 = 0xc0,
  144.   XM_MASK = 0xc0
  145. } x86Mode;

  146. /* Structure to hold variable ModRM operand. */
  147. typedef struct {
  148.   int32_t ofs;                /* Offset. */
  149.   uint8_t base;                /* Base register or RID_NONE. */
  150.   uint8_t idx;                /* Index register or RID_NONE. */
  151.   uint8_t scale;        /* Index scale (XM_SCALE1 .. XM_SCALE8). */
  152. } x86ModRM;

  153. /* -- Opcodes ------------------------------------------------------------- */

  154. /* Macros to construct variable-length x86 opcodes. -(len+1) is in LSB. */
  155. #define XO_(o)                ((uint32_t)(0x0000fe + (0x##o<<24)))
  156. #define XO_FPU(a,b)        ((uint32_t)(0x00fd + (0x##a<<16)+(0x##b<<24)))
  157. #define XO_0f(o)        ((uint32_t)(0x0f00fd + (0x##o<<24)))
  158. #define XO_66(o)        ((uint32_t)(0x6600fd + (0x##o<<24)))
  159. #define XO_660f(o)        ((uint32_t)(0x0f66fc + (0x##o<<24)))
  160. #define XO_f20f(o)        ((uint32_t)(0x0ff2fc + (0x##o<<24)))
  161. #define XO_f30f(o)        ((uint32_t)(0x0ff3fc + (0x##o<<24)))

  162. /* This list of x86 opcodes is not intended to be complete. Opcodes are only
  163. ** included when needed. Take a look at DynASM or jit.dis_x86 to see the
  164. ** whole mess.
  165. */
  166. typedef enum {
  167.   /* Fixed length opcodes. XI_* prefix. */
  168.   XI_NOP =        0x90,
  169.   XI_XCHGa =        0x90,
  170.   XI_CALL =        0xe8,
  171.   XI_JMP =        0xe9,
  172.   XI_JMPs =        0xeb,
  173.   XI_PUSH =        0x50, /* Really 50+r. */
  174.   XI_JCCs =        0x70, /* Really 7x. */
  175.   XI_JCCn =        0x80, /* Really 0f8x. */
  176.   XI_LEA =        0x8d,
  177.   XI_MOVrib =        0xb0, /* Really b0+r. */
  178.   XI_MOVri =        0xb8, /* Really b8+r. */
  179.   XI_ARITHib =        0x80,
  180.   XI_ARITHi =        0x81,
  181.   XI_ARITHi8 =        0x83,
  182.   XI_PUSHi8 =        0x6a,
  183.   XI_TESTb =        0x84,
  184.   XI_TEST =        0x85,
  185.   XI_MOVmi =        0xc7,
  186.   XI_GROUP5 =        0xff,

  187.   /* Note: little-endian byte-order! */
  188.   XI_FLDZ =        0xeed9,
  189.   XI_FLD1 =        0xe8d9,
  190.   XI_FLDLG2 =        0xecd9,
  191.   XI_FLDLN2 =        0xedd9,
  192.   XI_FDUP =        0xc0d9/* Really fld st0. */
  193.   XI_FPOP =        0xd8dd/* Really fstp st0. */
  194.   XI_FPOP1 =        0xd9dd/* Really fstp st1. */
  195.   XI_FRNDINT =        0xfcd9,
  196.   XI_FSIN =        0xfed9,
  197.   XI_FCOS =        0xffd9,
  198.   XI_FPTAN =        0xf2d9,
  199.   XI_FPATAN =        0xf3d9,
  200.   XI_FSCALE =        0xfdd9,
  201.   XI_FYL2X =        0xf1d9,

  202.   /* Variable-length opcodes. XO_* prefix. */
  203.   XO_MOV =        XO_(8b),
  204.   XO_MOVto =        XO_(89),
  205.   XO_MOVtow =        XO_66(89),
  206.   XO_MOVtob =        XO_(88),
  207.   XO_MOVmi =        XO_(c7),
  208.   XO_MOVmib =        XO_(c6),
  209.   XO_LEA =        XO_(8d),
  210.   XO_ARITHib =        XO_(80),
  211.   XO_ARITHi =        XO_(81),
  212.   XO_ARITHi8 =        XO_(83),
  213.   XO_ARITHiw8 =        XO_66(83),
  214.   XO_SHIFTi =        XO_(c1),
  215.   XO_SHIFT1 =        XO_(d1),
  216.   XO_SHIFTcl =        XO_(d3),
  217.   XO_IMUL =        XO_0f(af),
  218.   XO_IMULi =        XO_(69),
  219.   XO_IMULi8 =        XO_(6b),
  220.   XO_CMP =        XO_(3b),
  221.   XO_TESTb =        XO_(84),
  222.   XO_TEST =        XO_(85),
  223.   XO_GROUP3b =        XO_(f6),
  224.   XO_GROUP3 =        XO_(f7),
  225.   XO_GROUP5b =        XO_(fe),
  226.   XO_GROUP5 =        XO_(ff),
  227.   XO_MOVZXb =        XO_0f(b6),
  228.   XO_MOVZXw =        XO_0f(b7),
  229.   XO_MOVSXb =        XO_0f(be),
  230.   XO_MOVSXw =        XO_0f(bf),
  231.   XO_MOVSXd =        XO_(63),
  232.   XO_BSWAP =        XO_0f(c8),
  233.   XO_CMOV =        XO_0f(40),

  234.   XO_MOVSD =        XO_f20f(10),
  235.   XO_MOVSDto =        XO_f20f(11),
  236.   XO_MOVSS =        XO_f30f(10),
  237.   XO_MOVSSto =        XO_f30f(11),
  238.   XO_MOVLPD =        XO_660f(12),
  239.   XO_MOVAPS =        XO_0f(28),
  240.   XO_XORPS =        XO_0f(57),
  241.   XO_ANDPS =        XO_0f(54),
  242.   XO_ADDSD =        XO_f20f(58),
  243.   XO_SUBSD =        XO_f20f(5c),
  244.   XO_MULSD =        XO_f20f(59),
  245.   XO_DIVSD =        XO_f20f(5e),
  246.   XO_SQRTSD =        XO_f20f(51),
  247.   XO_MINSD =        XO_f20f(5d),
  248.   XO_MAXSD =        XO_f20f(5f),
  249.   XO_ROUNDSD =        0x0b3a0ffc/* Really 66 0f 3a 0b. See asm_fpmath. */
  250.   XO_UCOMISD =        XO_660f(2e),
  251.   XO_CVTSI2SD =        XO_f20f(2a),
  252.   XO_CVTTSD2SI=        XO_f20f(2c),
  253.   XO_CVTSI2SS =        XO_f30f(2a),
  254.   XO_CVTTSS2SI=        XO_f30f(2c),
  255.   XO_CVTSS2SD =        XO_f30f(5a),
  256.   XO_CVTSD2SS =        XO_f20f(5a),
  257.   XO_ADDSS =        XO_f30f(58),
  258.   XO_MOVD =        XO_660f(6e),
  259.   XO_MOVDto =        XO_660f(7e),

  260.   XO_FLDd =        XO_(d9), XOg_FLDd = 0,
  261.   XO_FLDq =        XO_(dd), XOg_FLDq = 0,
  262.   XO_FILDd =        XO_(db), XOg_FILDd = 0,
  263.   XO_FILDq =        XO_(df), XOg_FILDq = 5,
  264.   XO_FSTPd =        XO_(d9), XOg_FSTPd = 3,
  265.   XO_FSTPq =        XO_(dd), XOg_FSTPq = 3,
  266.   XO_FISTPq =        XO_(df), XOg_FISTPq = 7,
  267.   XO_FISTTPq =        XO_(dd), XOg_FISTTPq = 1,
  268.   XO_FADDq =        XO_(dc), XOg_FADDq = 0,
  269.   XO_FLDCW =        XO_(d9), XOg_FLDCW = 5,
  270.   XO_FNSTCW =        XO_(d9), XOg_FNSTCW = 7
  271. } x86Op;

  272. /* x86 opcode groups. */
  273. typedef uint32_t x86Group;

  274. #define XG_(i8, i, g)        ((x86Group)(((i8) << 16) + ((i) << 8) + (g)))
  275. #define XG_ARITHi(g)        XG_(XI_ARITHi8, XI_ARITHi, g)
  276. #define XG_TOXOi(xg)        ((x86Op)(0x000000fe + (((xg)<<16) & 0xff000000)))
  277. #define XG_TOXOi8(xg)        ((x86Op)(0x000000fe + (((xg)<<8) & 0xff000000)))

  278. #define XO_ARITH(a)        ((x86Op)(0x030000fe + ((a)<<27)))
  279. #define XO_ARITHw(a)        ((x86Op)(0x036600fd + ((a)<<27)))

  280. typedef enum {
  281.   XOg_ADD, XOg_OR, XOg_ADC, XOg_SBB, XOg_AND, XOg_SUB, XOg_XOR, XOg_CMP,
  282.   XOg_X_IMUL
  283. } x86Arith;

  284. typedef enum {
  285.   XOg_ROL, XOg_ROR, XOg_RCL, XOg_RCR, XOg_SHL, XOg_SHR, XOg_SAL, XOg_SAR
  286. } x86Shift;

  287. typedef enum {
  288.   XOg_TEST, XOg_TEST_, XOg_NOT, XOg_NEG, XOg_MUL, XOg_IMUL, XOg_DIV, XOg_IDIV
  289. } x86Group3;

  290. typedef enum {
  291.   XOg_INC, XOg_DEC, XOg_CALL, XOg_CALLfar, XOg_JMP, XOg_JMPfar, XOg_PUSH
  292. } x86Group5;

  293. /* x86 condition codes. */
  294. typedef enum {
  295.   CC_O, CC_NO, CC_B, CC_NB, CC_E, CC_NE, CC_BE, CC_NBE,
  296.   CC_S, CC_NS, CC_P, CC_NP, CC_L, CC_NL, CC_LE, CC_NLE,
  297.   CC_C = CC_B, CC_NAE = CC_C, CC_NC = CC_NB, CC_AE = CC_NB,
  298.   CC_Z = CC_E, CC_NZ = CC_NE, CC_NA = CC_BE, CC_A = CC_NBE,
  299.   CC_PE = CC_P, CC_PO = CC_NP, CC_NGE = CC_L, CC_GE = CC_NL,
  300.   CC_NG = CC_LE, CC_G = CC_NLE
  301. } x86CC;

  302. #endif