src/lj_str.c - luajit-2.0-src

Functions defined

Macros defined

Source code

  1. /*
  2. ** String handling.
  3. ** Copyright (C) 2005-2015 Mike Pall. See Copyright Notice in luajit.h
  4. */

  5. #define lj_str_c
  6. #define LUA_CORE

  7. #include "lj_obj.h"
  8. #include "lj_gc.h"
  9. #include "lj_err.h"
  10. #include "lj_str.h"
  11. #include "lj_char.h"

  12. /* -- String helpers ------------------------------------------------------ */

  13. /* Ordered compare of strings. Assumes string data is 4-byte aligned. */
  14. int32_t LJ_FASTCALL lj_str_cmp(GCstr *a, GCstr *b)
  15. {
  16.   MSize i, n = a->len > b->len ? b->len : a->len;
  17.   for (i = 0; i < n; i += 4) {
  18.     /* Note: innocuous access up to end of string + 3. */
  19.     uint32_t va = *(const uint32_t *)(strdata(a)+i);
  20.     uint32_t vb = *(const uint32_t *)(strdata(b)+i);
  21.     if (va != vb) {
  22. #if LJ_LE
  23.       va = lj_bswap(va); vb = lj_bswap(vb);
  24. #endif
  25.       i -= n;
  26.       if ((int32_t)i >= -3) {
  27.         va >>= 32+(i<<3); vb >>= 32+(i<<3);
  28.         if (va == vb) break;
  29.       }
  30.       return va < vb ? -1 : 1;
  31.     }
  32.   }
  33.   return (int32_t)(a->len - b->len);
  34. }

  35. /* Fast string data comparison. Caveat: unaligned access to 1st string! */
  36. static LJ_AINLINE int str_fastcmp(const char *a, const char *b, MSize len)
  37. {
  38.   MSize i = 0;
  39.   lua_assert(len > 0);
  40.   lua_assert((((uintptr_t)a+len-1) & (LJ_PAGESIZE-1)) <= LJ_PAGESIZE-4);
  41.   do/* Note: innocuous access up to end of string + 3. */
  42.     uint32_t v = lj_getu32(a+i) ^ *(const uint32_t *)(b+i);
  43.     if (v) {
  44.       i -= len;
  45. #if LJ_LE
  46.       return (int32_t)i >= -3 ? (v << (32+(i<<3))) : 1;
  47. #else
  48.       return (int32_t)i >= -3 ? (v >> (32+(i<<3))) : 1;
  49. #endif
  50.     }
  51.     i += 4;
  52.   } while (i < len);
  53.   return 0;
  54. }

  55. /* Find fixed string p inside string s. */
  56. const char *lj_str_find(const char *s, const char *p, MSize slen, MSize plen)
  57. {
  58.   if (plen <= slen) {
  59.     if (plen == 0) {
  60.       return s;
  61.     } else {
  62.       int c = *(const uint8_t *)p++;
  63.       plen--; slen -= plen;
  64.       while (slen) {
  65.         const char *q = (const char *)memchr(s, c, slen);
  66.         if (!q) break;
  67.         if (memcmp(q+1, p, plen) == 0) return q;
  68.         q++; slen -= (MSize)(q-s); s = q;
  69.       }
  70.     }
  71.   }
  72.   return NULL;
  73. }

  74. /* Check whether a string has a pattern matching character. */
  75. int lj_str_haspattern(GCstr *s)
  76. {
  77.   const char *p = strdata(s), *q = p + s->len;
  78.   while (p < q) {
  79.     int c = *(const uint8_t *)p++;
  80.     if (lj_char_ispunct(c) && strchr("^$*+?.([%-", c))
  81.       return 1/* Found a pattern matching char. */
  82.   }
  83.   return 0/* No pattern matching chars found. */
  84. }

  85. /* -- String interning ---------------------------------------------------- */

  86. /* Resize the string hash table (grow and shrink). */
  87. void lj_str_resize(lua_State *L, MSize newmask)
  88. {
  89.   global_State *g = G(L);
  90.   GCRef *newhash;
  91.   MSize i;
  92.   if (g->gc.state == GCSsweepstring || newmask >= LJ_MAX_STRTAB-1)
  93.     return/* No resizing during GC traversal or if already too big. */
  94.   newhash = lj_mem_newvec(L, newmask+1, GCRef);
  95.   memset(newhash, 0, (newmask+1)*sizeof(GCRef));
  96.   for (i = g->strmask; i != ~(MSize)0; i--) {  /* Rehash old table. */
  97.     GCobj *p = gcref(g->strhash[i]);
  98.     while (p) {  /* Follow each hash chain and reinsert all strings. */
  99.       MSize h = gco2str(p)->hash & newmask;
  100.       GCobj *next = gcnext(p);
  101.       /* NOBARRIER: The string table is a GC root. */
  102.       setgcrefr(p->gch.nextgc, newhash[h]);
  103.       setgcref(newhash[h], p);
  104.       p = next;
  105.     }
  106.   }
  107.   lj_mem_freevec(g, g->strhash, g->strmask+1, GCRef);
  108.   g->strmask = newmask;
  109.   g->strhash = newhash;
  110. }

  111. /* Intern a string and return string object. */
  112. GCstr *lj_str_new(lua_State *L, const char *str, size_t lenx)
  113. {
  114.   global_State *g;
  115.   GCstr *s;
  116.   GCobj *o;
  117.   MSize len = (MSize)lenx;
  118.   MSize a, b, h = len;
  119.   if (lenx >= LJ_MAX_STR)
  120.     lj_err_msg(L, LJ_ERR_STROV);
  121.   g = G(L);
  122.   /* Compute string hash. Constants taken from lookup3 hash by Bob Jenkins. */
  123.   if (len >= 4) {  /* Caveat: unaligned access! */
  124.     a = lj_getu32(str);
  125.     h ^= lj_getu32(str+len-4);
  126.     b = lj_getu32(str+(len>>1)-2);
  127.     h ^= b; h -= lj_rol(b, 14);
  128.     b += lj_getu32(str+(len>>2)-1);
  129.   } else if (len > 0) {
  130.     a = *(const uint8_t *)str;
  131.     h ^= *(const uint8_t *)(str+len-1);
  132.     b = *(const uint8_t *)(str+(len>>1));
  133.     h ^= b; h -= lj_rol(b, 14);
  134.   } else {
  135.     return &g->strempty;
  136.   }
  137.   a ^= h; a -= lj_rol(h, 11);
  138.   b ^= a; b -= lj_rol(a, 25);
  139.   h ^= b; h -= lj_rol(b, 16);
  140.   /* Check if the string has already been interned. */
  141.   o = gcref(g->strhash[h & g->strmask]);
  142.   if (LJ_LIKELY((((uintptr_t)str+len-1) & (LJ_PAGESIZE-1)) <= LJ_PAGESIZE-4)) {
  143.     while (o != NULL) {
  144.       GCstr *sx = gco2str(o);
  145.       if (sx->len == len && str_fastcmp(str, strdata(sx), len) == 0) {
  146.         /* Resurrect if dead. Can only happen with fixstring() (keywords). */
  147.         if (isdead(g, o)) flipwhite(o);
  148.         return sx;  /* Return existing string. */
  149.       }
  150.       o = gcnext(o);
  151.     }
  152.   } else/* Slow path: end of string is too close to a page boundary. */
  153.     while (o != NULL) {
  154.       GCstr *sx = gco2str(o);
  155.       if (sx->len == len && memcmp(str, strdata(sx), len) == 0) {
  156.         /* Resurrect if dead. Can only happen with fixstring() (keywords). */
  157.         if (isdead(g, o)) flipwhite(o);
  158.         return sx;  /* Return existing string. */
  159.       }
  160.       o = gcnext(o);
  161.     }
  162.   }
  163.   /* Nope, create a new string. */
  164.   s = lj_mem_newt(L, sizeof(GCstr)+len+1, GCstr);
  165.   newwhite(g, s);
  166.   s->gct = ~LJ_TSTR;
  167.   s->len = len;
  168.   s->hash = h;
  169.   s->reserved = 0;
  170.   memcpy(strdatawr(s), str, len);
  171.   strdatawr(s)[len] = '\0'/* Zero-terminate string. */
  172.   /* Add it to string hash table. */
  173.   h &= g->strmask;
  174.   s->nextgc = g->strhash[h];
  175.   /* NOBARRIER: The string table is a GC root. */
  176.   setgcref(g->strhash[h], obj2gco(s));
  177.   if (g->strnum++ > g->strmask)  /* Allow a 100% load factor. */
  178.     lj_str_resize(L, (g->strmask<<1)+1);  /* Grow string table. */
  179.   return s;  /* Return newly interned string. */
  180. }

  181. void LJ_FASTCALL lj_str_free(global_State *g, GCstr *s)
  182. {
  183.   g->strnum--;
  184.   lj_mem_free(g, s, sizestring(s));
  185. }