src/lj_bcread.c - luajit-2.0-src

Functions defined

Macros defined

Source code

  1. /*
  2. ** Bytecode reader.
  3. ** Copyright (C) 2005-2015 Mike Pall. See Copyright Notice in luajit.h
  4. */

  5. #define lj_bcread_c
  6. #define LUA_CORE

  7. #include "lj_obj.h"
  8. #include "lj_gc.h"
  9. #include "lj_err.h"
  10. #include "lj_buf.h"
  11. #include "lj_str.h"
  12. #include "lj_tab.h"
  13. #include "lj_bc.h"
  14. #if LJ_HASFFI
  15. #include "lj_ctype.h"
  16. #include "lj_cdata.h"
  17. #include "lualib.h"
  18. #endif
  19. #include "lj_lex.h"
  20. #include "lj_bcdump.h"
  21. #include "lj_state.h"
  22. #include "lj_strfmt.h"

  23. /* Reuse some lexer fields for our own purposes. */
  24. #define bcread_flags(ls)        ls->level
  25. #define bcread_swap(ls) \
  26.   ((bcread_flags(ls) & BCDUMP_F_BE) != LJ_BE*BCDUMP_F_BE)
  27. #define bcread_oldtop(L, ls)        restorestack(L, ls->lastline)
  28. #define bcread_savetop(L, ls, top) \
  29.   ls->lastline = (BCLine)savestack(L, (top))

  30. /* -- Input buffer handling ----------------------------------------------- */

  31. /* Throw reader error. */
  32. static LJ_NOINLINE void bcread_error(LexState *ls, ErrMsg em)
  33. {
  34.   lua_State *L = ls->L;
  35.   const char *name = ls->chunkarg;
  36.   if (*name == BCDUMP_HEAD1) name = "(binary)";
  37.   else if (*name == '@' || *name == '=') name++;
  38.   lj_strfmt_pushf(L, "%s: %s", name, err2msg(em));
  39.   lj_err_throw(L, LUA_ERRSYNTAX);
  40. }

  41. /* Refill buffer. */
  42. static LJ_NOINLINE void bcread_fill(LexState *ls, MSize len, int need)
  43. {
  44.   lua_assert(len != 0);
  45.   if (len > LJ_MAX_BUF || ls->c < 0)
  46.     bcread_error(ls, LJ_ERR_BCBAD);
  47.   do {
  48.     const char *buf;
  49.     size_t sz;
  50.     char *p = sbufB(&ls->sb);
  51.     MSize n = (MSize)(ls->pe - ls->p);
  52.     if (n) {  /* Copy remainder to buffer. */
  53.       if (sbuflen(&ls->sb)) {  /* Move down in buffer. */
  54.         lua_assert(ls->pe == sbufP(&ls->sb));
  55.         if (ls->p != p) memmove(p, ls->p, n);
  56.       } else/* Copy from buffer provided by reader. */
  57.         p = lj_buf_need(&ls->sb, len);
  58.         memcpy(p, ls->p, n);
  59.       }
  60.       ls->p = p;
  61.       ls->pe = p + n;
  62.     }
  63.     setsbufP(&ls->sb, p + n);
  64.     buf = ls->rfunc(ls->L, ls->rdata, &sz);  /* Get more data from reader. */
  65.     if (buf == NULL || sz == 0) {  /* EOF? */
  66.       if (need) bcread_error(ls, LJ_ERR_BCBAD);
  67.       ls->c = -1/* Only bad if we get called again. */
  68.       break;
  69.     }
  70.     if (n) {  /* Append to buffer. */
  71.       n += (MSize)sz;
  72.       p = lj_buf_need(&ls->sb, n < len ? len : n);
  73.       memcpy(sbufP(&ls->sb), buf, sz);
  74.       setsbufP(&ls->sb, p + n);
  75.       ls->p = p;
  76.       ls->pe = p + n;
  77.     } else/* Return buffer provided by reader. */
  78.       ls->p = buf;
  79.       ls->pe = buf + sz;
  80.     }
  81.   } while (ls->p + len > ls->pe);
  82. }

  83. /* Need a certain number of bytes. */
  84. static LJ_AINLINE void bcread_need(LexState *ls, MSize len)
  85. {
  86.   if (LJ_UNLIKELY(ls->p + len > ls->pe))
  87.     bcread_fill(ls, len, 1);
  88. }

  89. /* Want to read up to a certain number of bytes, but may need less. */
  90. static LJ_AINLINE void bcread_want(LexState *ls, MSize len)
  91. {
  92.   if (LJ_UNLIKELY(ls->p + len > ls->pe))
  93.     bcread_fill(ls, len, 0);
  94. }

  95. /* Return memory block from buffer. */
  96. static LJ_AINLINE uint8_t *bcread_mem(LexState *ls, MSize len)
  97. {
  98.   uint8_t *p = (uint8_t *)ls->p;
  99.   ls->p += len;
  100.   lua_assert(ls->p <= ls->pe);
  101.   return p;
  102. }

  103. /* Copy memory block from buffer. */
  104. static void bcread_block(LexState *ls, void *q, MSize len)
  105. {
  106.   memcpy(q, bcread_mem(ls, len), len);
  107. }

  108. /* Read byte from buffer. */
  109. static LJ_AINLINE uint32_t bcread_byte(LexState *ls)
  110. {
  111.   lua_assert(ls->p < ls->pe);
  112.   return (uint32_t)(uint8_t)*ls->p++;
  113. }

  114. /* Read ULEB128 value from buffer. */
  115. static LJ_AINLINE uint32_t bcread_uleb128(LexState *ls)
  116. {
  117.   uint32_t v = lj_buf_ruleb128(&ls->p);
  118.   lua_assert(ls->p <= ls->pe);
  119.   return v;
  120. }

  121. /* Read top 32 bits of 33 bit ULEB128 value from buffer. */
  122. static uint32_t bcread_uleb128_33(LexState *ls)
  123. {
  124.   const uint8_t *p = (const uint8_t *)ls->p;
  125.   uint32_t v = (*p++ >> 1);
  126.   if (LJ_UNLIKELY(v >= 0x40)) {
  127.     int sh = -1;
  128.     v &= 0x3f;
  129.     do {
  130.      v |= ((*p & 0x7f) << (sh += 7));
  131.    } while (*p++ >= 0x80);
  132.   }
  133.   ls->p = (char *)p;
  134.   lua_assert(ls->p <= ls->pe);
  135.   return v;
  136. }

  137. /* -- Bytecode reader ----------------------------------------------------- */

  138. /* Read debug info of a prototype. */
  139. static void bcread_dbg(LexState *ls, GCproto *pt, MSize sizedbg)
  140. {
  141.   void *lineinfo = (void *)proto_lineinfo(pt);
  142.   bcread_block(ls, lineinfo, sizedbg);
  143.   /* Swap lineinfo if the endianess differs. */
  144.   if (bcread_swap(ls) && pt->numline >= 256) {
  145.     MSize i, n = pt->sizebc-1;
  146.     if (pt->numline < 65536) {
  147.       uint16_t *p = (uint16_t *)lineinfo;
  148.       for (i = 0; i < n; i++) p[i] = (uint16_t)((p[i] >> 8)|(p[i] << 8));
  149.     } else {
  150.       uint32_t *p = (uint32_t *)lineinfo;
  151.       for (i = 0; i < n; i++) p[i] = lj_bswap(p[i]);
  152.     }
  153.   }
  154. }

  155. /* Find pointer to varinfo. */
  156. static const void *bcread_varinfo(GCproto *pt)
  157. {
  158.   const uint8_t *p = proto_uvinfo(pt);
  159.   MSize n = pt->sizeuv;
  160.   if (n) while (*p++ || --n) ;
  161.   return p;
  162. }

  163. /* Read a single constant key/value of a template table. */
  164. static void bcread_ktabk(LexState *ls, TValue *o)
  165. {
  166.   MSize tp = bcread_uleb128(ls);
  167.   if (tp >= BCDUMP_KTAB_STR) {
  168.     MSize len = tp - BCDUMP_KTAB_STR;
  169.     const char *p = (const char *)bcread_mem(ls, len);
  170.     setstrV(ls->L, o, lj_str_new(ls->L, p, len));
  171.   } else if (tp == BCDUMP_KTAB_INT) {
  172.     setintV(o, (int32_t)bcread_uleb128(ls));
  173.   } else if (tp == BCDUMP_KTAB_NUM) {
  174.     o->u32.lo = bcread_uleb128(ls);
  175.     o->u32.hi = bcread_uleb128(ls);
  176.   } else {
  177.     lua_assert(tp <= BCDUMP_KTAB_TRUE);
  178.     setpriV(o, ~tp);
  179.   }
  180. }

  181. /* Read a template table. */
  182. static GCtab *bcread_ktab(LexState *ls)
  183. {
  184.   MSize narray = bcread_uleb128(ls);
  185.   MSize nhash = bcread_uleb128(ls);
  186.   GCtab *t = lj_tab_new(ls->L, narray, hsize2hbits(nhash));
  187.   if (narray) {  /* Read array entries. */
  188.     MSize i;
  189.     TValue *o = tvref(t->array);
  190.     for (i = 0; i < narray; i++, o++)
  191.       bcread_ktabk(ls, o);
  192.   }
  193.   if (nhash) {  /* Read hash entries. */
  194.     MSize i;
  195.     for (i = 0; i < nhash; i++) {
  196.       TValue key;
  197.       bcread_ktabk(ls, &key);
  198.       lua_assert(!tvisnil(&key));
  199.       bcread_ktabk(ls, lj_tab_set(ls->L, t, &key));
  200.     }
  201.   }
  202.   return t;
  203. }

  204. /* Read GC constants of a prototype. */
  205. static void bcread_kgc(LexState *ls, GCproto *pt, MSize sizekgc)
  206. {
  207.   MSize i;
  208.   GCRef *kr = mref(pt->k, GCRef) - (ptrdiff_t)sizekgc;
  209.   for (i = 0; i < sizekgc; i++, kr++) {
  210.     MSize tp = bcread_uleb128(ls);
  211.     if (tp >= BCDUMP_KGC_STR) {
  212.       MSize len = tp - BCDUMP_KGC_STR;
  213.       const char *p = (const char *)bcread_mem(ls, len);
  214.       setgcref(*kr, obj2gco(lj_str_new(ls->L, p, len)));
  215.     } else if (tp == BCDUMP_KGC_TAB) {
  216.       setgcref(*kr, obj2gco(bcread_ktab(ls)));
  217. #if LJ_HASFFI
  218.     } else if (tp != BCDUMP_KGC_CHILD) {
  219.       CTypeID id = tp == BCDUMP_KGC_COMPLEX ? CTID_COMPLEX_DOUBLE :
  220.                    tp == BCDUMP_KGC_I64 ? CTID_INT64 : CTID_UINT64;
  221.       CTSize sz = tp == BCDUMP_KGC_COMPLEX ? 16 : 8;
  222.       GCcdata *cd = lj_cdata_new_(ls->L, id, sz);
  223.       TValue *p = (TValue *)cdataptr(cd);
  224.       setgcref(*kr, obj2gco(cd));
  225.       p[0].u32.lo = bcread_uleb128(ls);
  226.       p[0].u32.hi = bcread_uleb128(ls);
  227.       if (tp == BCDUMP_KGC_COMPLEX) {
  228.         p[1].u32.lo = bcread_uleb128(ls);
  229.         p[1].u32.hi = bcread_uleb128(ls);
  230.       }
  231. #endif
  232.     } else {
  233.       lua_State *L = ls->L;
  234.       lua_assert(tp == BCDUMP_KGC_CHILD);
  235.       if (L->top <= bcread_oldtop(L, ls))  /* Stack underflow? */
  236.         bcread_error(ls, LJ_ERR_BCBAD);
  237.       L->top--;
  238.       setgcref(*kr, obj2gco(protoV(L->top)));
  239.     }
  240.   }
  241. }

  242. /* Read number constants of a prototype. */
  243. static void bcread_knum(LexState *ls, GCproto *pt, MSize sizekn)
  244. {
  245.   MSize i;
  246.   TValue *o = mref(pt->k, TValue);
  247.   for (i = 0; i < sizekn; i++, o++) {
  248.     int isnum = (ls->p[0] & 1);
  249.     uint32_t lo = bcread_uleb128_33(ls);
  250.     if (isnum) {
  251.       o->u32.lo = lo;
  252.       o->u32.hi = bcread_uleb128(ls);
  253.     } else {
  254.       setintV(o, lo);
  255.     }
  256.   }
  257. }

  258. /* Read bytecode instructions. */
  259. static void bcread_bytecode(LexState *ls, GCproto *pt, MSize sizebc)
  260. {
  261.   BCIns *bc = proto_bc(pt);
  262.   bc[0] = BCINS_AD((pt->flags & PROTO_VARARG) ? BC_FUNCV : BC_FUNCF,
  263.                    pt->framesize, 0);
  264.   bcread_block(ls, bc+1, (sizebc-1)*(MSize)sizeof(BCIns));
  265.   /* Swap bytecode instructions if the endianess differs. */
  266.   if (bcread_swap(ls)) {
  267.     MSize i;
  268.     for (i = 1; i < sizebc; i++) bc[i] = lj_bswap(bc[i]);
  269.   }
  270. }

  271. /* Read upvalue refs. */
  272. static void bcread_uv(LexState *ls, GCproto *pt, MSize sizeuv)
  273. {
  274.   if (sizeuv) {
  275.     uint16_t *uv = proto_uv(pt);
  276.     bcread_block(ls, uv, sizeuv*2);
  277.     /* Swap upvalue refs if the endianess differs. */
  278.     if (bcread_swap(ls)) {
  279.       MSize i;
  280.       for (i = 0; i < sizeuv; i++)
  281.         uv[i] = (uint16_t)((uv[i] >> 8)|(uv[i] << 8));
  282.     }
  283.   }
  284. }

  285. /* Read a prototype. */
  286. GCproto *lj_bcread_proto(LexState *ls)
  287. {
  288.   GCproto *pt;
  289.   MSize framesize, numparams, flags, sizeuv, sizekgc, sizekn, sizebc, sizept;
  290.   MSize ofsk, ofsuv, ofsdbg;
  291.   MSize sizedbg = 0;
  292.   BCLine firstline = 0, numline = 0;

  293.   /* Read prototype header. */
  294.   flags = bcread_byte(ls);
  295.   numparams = bcread_byte(ls);
  296.   framesize = bcread_byte(ls);
  297.   sizeuv = bcread_byte(ls);
  298.   sizekgc = bcread_uleb128(ls);
  299.   sizekn = bcread_uleb128(ls);
  300.   sizebc = bcread_uleb128(ls) + 1;
  301.   if (!(bcread_flags(ls) & BCDUMP_F_STRIP)) {
  302.     sizedbg = bcread_uleb128(ls);
  303.     if (sizedbg) {
  304.       firstline = bcread_uleb128(ls);
  305.       numline = bcread_uleb128(ls);
  306.     }
  307.   }

  308.   /* Calculate total size of prototype including all colocated arrays. */
  309.   sizept = (MSize)sizeof(GCproto) +
  310.            sizebc*(MSize)sizeof(BCIns) +
  311.            sizekgc*(MSize)sizeof(GCRef);
  312.   sizept = (sizept + (MSize)sizeof(TValue)-1) & ~((MSize)sizeof(TValue)-1);
  313.   ofsk = sizept; sizept += sizekn*(MSize)sizeof(TValue);
  314.   ofsuv = sizept; sizept += ((sizeuv+1)&~1)*2;
  315.   ofsdbg = sizept; sizept += sizedbg;

  316.   /* Allocate prototype object and initialize its fields. */
  317.   pt = (GCproto *)lj_mem_newgco(ls->L, (MSize)sizept);
  318.   pt->gct = ~LJ_TPROTO;
  319.   pt->numparams = (uint8_t)numparams;
  320.   pt->framesize = (uint8_t)framesize;
  321.   pt->sizebc = sizebc;
  322.   setmref(pt->k, (char *)pt + ofsk);
  323.   setmref(pt->uv, (char *)pt + ofsuv);
  324.   pt->sizekgc = 0/* Set to zero until fully initialized. */
  325.   pt->sizekn = sizekn;
  326.   pt->sizept = sizept;
  327.   pt->sizeuv = (uint8_t)sizeuv;
  328.   pt->flags = (uint8_t)flags;
  329.   pt->trace = 0;
  330.   setgcref(pt->chunkname, obj2gco(ls->chunkname));

  331.   /* Close potentially uninitialized gap between bc and kgc. */
  332.   *(uint32_t *)((char *)pt + ofsk - sizeof(GCRef)*(sizekgc+1)) = 0;

  333.   /* Read bytecode instructions and upvalue refs. */
  334.   bcread_bytecode(ls, pt, sizebc);
  335.   bcread_uv(ls, pt, sizeuv);

  336.   /* Read constants. */
  337.   bcread_kgc(ls, pt, sizekgc);
  338.   pt->sizekgc = sizekgc;
  339.   bcread_knum(ls, pt, sizekn);

  340.   /* Read and initialize debug info. */
  341.   pt->firstline = firstline;
  342.   pt->numline = numline;
  343.   if (sizedbg) {
  344.     MSize sizeli = (sizebc-1) << (numline < 256 ? 0 : numline < 65536 ? 1 : 2);
  345.     setmref(pt->lineinfo, (char *)pt + ofsdbg);
  346.     setmref(pt->uvinfo, (char *)pt + ofsdbg + sizeli);
  347.     bcread_dbg(ls, pt, sizedbg);
  348.     setmref(pt->varinfo, bcread_varinfo(pt));
  349.   } else {
  350.     setmref(pt->lineinfo, NULL);
  351.     setmref(pt->uvinfo, NULL);
  352.     setmref(pt->varinfo, NULL);
  353.   }
  354.   return pt;
  355. }

  356. /* Read and check header of bytecode dump. */
  357. static int bcread_header(LexState *ls)
  358. {
  359.   uint32_t flags;
  360.   bcread_want(ls, 3+5+5);
  361.   if (bcread_byte(ls) != BCDUMP_HEAD2 ||
  362.       bcread_byte(ls) != BCDUMP_HEAD3 ||
  363.       bcread_byte(ls) != BCDUMP_VERSION) return 0;
  364.   bcread_flags(ls) = flags = bcread_uleb128(ls);
  365.   if ((flags & ~(BCDUMP_F_KNOWN)) != 0) return 0;
  366.   if ((flags & BCDUMP_F_FR2) != LJ_FR2*BCDUMP_F_FR2) return 0;
  367.   if ((flags & BCDUMP_F_FFI)) {
  368. #if LJ_HASFFI
  369.     lua_State *L = ls->L;
  370.     if (!ctype_ctsG(G(L))) {
  371.       ptrdiff_t oldtop = savestack(L, L->top);
  372.       luaopen_ffi(L);  /* Load FFI library on-demand. */
  373.       L->top = restorestack(L, oldtop);
  374.     }
  375. #else
  376.     return 0;
  377. #endif
  378.   }
  379.   if ((flags & BCDUMP_F_STRIP)) {
  380.     ls->chunkname = lj_str_newz(ls->L, ls->chunkarg);
  381.   } else {
  382.     MSize len = bcread_uleb128(ls);
  383.     bcread_need(ls, len);
  384.     ls->chunkname = lj_str_new(ls->L, (const char *)bcread_mem(ls, len), len);
  385.   }
  386.   return 1/* Ok. */
  387. }

  388. /* Read a bytecode dump. */
  389. GCproto *lj_bcread(LexState *ls)
  390. {
  391.   lua_State *L = ls->L;
  392.   lua_assert(ls->c == BCDUMP_HEAD1);
  393.   bcread_savetop(L, ls, L->top);
  394.   lj_buf_reset(&ls->sb);
  395.   /* Check for a valid bytecode dump header. */
  396.   if (!bcread_header(ls))
  397.     bcread_error(ls, LJ_ERR_BCFMT);
  398.   for (;;) {  /* Process all prototypes in the bytecode dump. */
  399.     GCproto *pt;
  400.     MSize len;
  401.     const char *startp;
  402.     /* Read length. */
  403.     if (ls->p < ls->pe && ls->p[0] == 0) {  /* Shortcut EOF. */
  404.       ls->p++;
  405.       break;
  406.     }
  407.     bcread_want(ls, 5);
  408.     len = bcread_uleb128(ls);
  409.     if (!len) break/* EOF */
  410.     bcread_need(ls, len);
  411.     startp = ls->p;
  412.     pt = lj_bcread_proto(ls);
  413.     if (ls->p != startp + len)
  414.       bcread_error(ls, LJ_ERR_BCBAD);
  415.     setprotoV(L, L->top, pt);
  416.     incr_top(L);
  417.   }
  418.   if ((int32_t)(2*(uint32_t)(ls->pe - ls->p)) > 0 ||
  419.       L->top-1 != bcread_oldtop(L, ls))
  420.     bcread_error(ls, LJ_ERR_BCBAD);
  421.   /* Pop off last prototype. */
  422.   L->top--;
  423.   return protoV(L->top);
  424. }