src/lj_target.h - luajit-2.0-src

Data types defined

Functions defined

Macros defined

Source code

  1. /*
  2. ** Definitions for target CPU.
  3. ** Copyright (C) 2005-2015 Mike Pall. See Copyright Notice in luajit.h
  4. */

  5. #ifndef _LJ_TARGET_H
  6. #define _LJ_TARGET_H

  7. #include "lj_def.h"
  8. #include "lj_arch.h"

  9. /* -- Registers and spill slots ------------------------------------------- */

  10. /* Register type (uint8_t in ir->r). */
  11. typedef uint32_t Reg;

  12. /* The hi-bit is NOT set for an allocated register. This means the value
  13. ** can be directly used without masking. The hi-bit is set for a register
  14. ** allocation hint or for RID_INIT, RID_SINK or RID_SUNK.
  15. */
  16. #define RID_NONE                0x80
  17. #define RID_MASK                0x7f
  18. #define RID_INIT                (RID_NONE|RID_MASK)
  19. #define RID_SINK                (RID_INIT-1)
  20. #define RID_SUNK                (RID_INIT-2)

  21. #define ra_noreg(r)                ((r) & RID_NONE)
  22. #define ra_hasreg(r)                (!((r) & RID_NONE))

  23. /* The ra_hashint() macro assumes a previous test for ra_noreg(). */
  24. #define ra_hashint(r)                ((r) < RID_SUNK)
  25. #define ra_gethint(r)                ((Reg)((r) & RID_MASK))
  26. #define ra_sethint(rr, r)        rr = (uint8_t)((r)|RID_NONE)
  27. #define ra_samehint(r1, r2)        (ra_gethint((r1)^(r2)) == 0)

  28. /* Spill slot 0 means no spill slot has been allocated. */
  29. #define SPS_NONE                0

  30. #define ra_hasspill(s)                ((s) != SPS_NONE)

  31. /* Combined register and spill slot (uint16_t in ir->prev). */
  32. typedef uint32_t RegSP;

  33. #define REGSP(r, s)                ((r) + ((s) << 8))
  34. #define REGSP_HINT(r)                ((r)|RID_NONE)
  35. #define REGSP_INIT                REGSP(RID_INIT, 0)

  36. #define regsp_reg(rs)                ((rs) & 255)
  37. #define regsp_spill(rs)                ((rs) >> 8)
  38. #define regsp_used(rs) \
  39.   (((rs) & ~REGSP(RID_MASK, 0)) != REGSP(RID_NONE, 0))

  40. /* -- Register sets ------------------------------------------------------- */

  41. /* Bitset for registers. 32 registers suffice for most architectures.
  42. ** Note that one set holds bits for both GPRs and FPRs.
  43. */
  44. #if LJ_TARGET_PPC || LJ_TARGET_MIPS
  45. typedef uint64_t RegSet;
  46. #else
  47. typedef uint32_t RegSet;
  48. #endif

  49. #define RID2RSET(r)                (((RegSet)1) << (r))
  50. #define RSET_EMPTY                ((RegSet)0)
  51. #define RSET_RANGE(lo, hi)        ((RID2RSET((hi)-(lo))-1) << (lo))

  52. #define rset_test(rs, r)        ((int)((rs) >> (r)) & 1)
  53. #define rset_set(rs, r)                (rs |= RID2RSET(r))
  54. #define rset_clear(rs, r)        (rs &= ~RID2RSET(r))
  55. #define rset_exclude(rs, r)        (rs & ~RID2RSET(r))
  56. #if LJ_TARGET_PPC || LJ_TARGET_MIPS
  57. #define rset_picktop(rs)        ((Reg)(__builtin_clzll(rs)^63))
  58. #define rset_pickbot(rs)        ((Reg)__builtin_ctzll(rs))
  59. #else
  60. #define rset_picktop(rs)        ((Reg)lj_fls(rs))
  61. #define rset_pickbot(rs)        ((Reg)lj_ffs(rs))
  62. #endif

  63. /* -- Register allocation cost -------------------------------------------- */

  64. /* The register allocation heuristic keeps track of the cost for allocating
  65. ** a specific register:
  66. **
  67. ** A free register (obviously) has a cost of 0 and a 1-bit in the free mask.
  68. **
  69. ** An already allocated register has the (non-zero) IR reference in the lowest
  70. ** bits and the result of a blended cost-model in the higher bits.
  71. **
  72. ** The allocator first checks the free mask for a hit. Otherwise an (unrolled)
  73. ** linear search for the minimum cost is used. The search doesn't need to
  74. ** keep track of the position of the minimum, which makes it very fast.
  75. ** The lowest bits of the minimum cost show the desired IR reference whose
  76. ** register is the one to evict.
  77. **
  78. ** Without the cost-model this degenerates to the standard heuristics for
  79. ** (reverse) linear-scan register allocation. Since code generation is done
  80. ** in reverse, a live interval extends from the last use to the first def.
  81. ** For an SSA IR the IR reference is the first (and only) def and thus
  82. ** trivially marks the end of the interval. The LSRA heuristics says to pick
  83. ** the register whose live interval has the furthest extent, i.e. the lowest
  84. ** IR reference in our case.
  85. **
  86. ** A cost-model should take into account other factors, like spill-cost and
  87. ** restore- or rematerialization-cost, which depend on the kind of instruction.
  88. ** E.g. constants have zero spill costs, variant instructions have higher
  89. ** costs than invariants and PHIs should preferably never be spilled.
  90. **
  91. ** Here's a first cut at simple, but effective blended cost-model for R-LSRA:
  92. ** - Due to careful design of the IR, constants already have lower IR
  93. **   references than invariants and invariants have lower IR references
  94. **   than variants.
  95. ** - The cost in the upper 16 bits is the sum of the IR reference and a
  96. **   weighted score. The score currently only takes into account whether
  97. **   the IRT_ISPHI bit is set in the instruction type.
  98. ** - The PHI weight is the minimum distance (in IR instructions) a PHI
  99. **   reference has to be further apart from a non-PHI reference to be spilled.
  100. ** - It should be a power of two (for speed) and must be between 2 and 32768.
  101. **   Good values for the PHI weight seem to be between 40 and 150.
  102. ** - Further study is required.
  103. */
  104. #define REGCOST_PHI_WEIGHT        64

  105. /* Cost for allocating a specific register. */
  106. typedef uint32_t RegCost;

  107. /* Note: assumes 16 bit IRRef1. */
  108. #define REGCOST(cost, ref)        ((RegCost)(ref) + ((RegCost)(cost) << 16))
  109. #define regcost_ref(rc)                ((IRRef1)(rc))

  110. #define REGCOST_T(t) \
  111.   ((RegCost)((t)&IRT_ISPHI) * (((RegCost)(REGCOST_PHI_WEIGHT)<<16)/IRT_ISPHI))
  112. #define REGCOST_REF_T(ref, t)        (REGCOST((ref), (ref)) + REGCOST_T((t)))

  113. /* -- Target-specific definitions ----------------------------------------- */

  114. #if LJ_TARGET_X86ORX64
  115. #include "lj_target_x86.h"
  116. #elif LJ_TARGET_ARM
  117. #include "lj_target_arm.h"
  118. #elif LJ_TARGET_ARM64
  119. #include "lj_target_arm64.h"
  120. #elif LJ_TARGET_PPC
  121. #include "lj_target_ppc.h"
  122. #elif LJ_TARGET_MIPS
  123. #include "lj_target_mips.h"
  124. #else
  125. #error "Missing include for target CPU"
  126. #endif

  127. #ifdef EXITSTUBS_PER_GROUP
  128. /* Return the address of an exit stub. */
  129. static LJ_AINLINE char *exitstub_addr_(char **group, uint32_t exitno)
  130. {
  131.   lua_assert(group[exitno / EXITSTUBS_PER_GROUP] != NULL);
  132.   return (char *)group[exitno / EXITSTUBS_PER_GROUP] +
  133.          EXITSTUB_SPACING*(exitno % EXITSTUBS_PER_GROUP);
  134. }
  135. /* Avoid dependence on lj_jit.h if only including lj_target.h. */
  136. #define exitstub_addr(J, exitno) \
  137.   ((MCode *)exitstub_addr_((char **)((J)->exitstubgroup), (exitno)))
  138. #endif

  139. #endif