src/lj_alloc.c - luajit-2.0-src

Global variables defined

Data types defined

Functions defined

Macros defined

Source code

  1. /*
  2. ** Bundled memory allocator.
  3. **
  4. ** Beware: this is a HEAVILY CUSTOMIZED version of dlmalloc.
  5. ** The original bears the following remark:
  6. **
  7. **   This is a version (aka dlmalloc) of malloc/free/realloc written by
  8. **   Doug Lea and released to the public domain, as explained at
  9. **   http://creativecommons.org/licenses/publicdomain.
  10. **
  11. **   * Version pre-2.8.4 Wed Mar 29 19:46:29 2006    (dl at gee)
  12. **
  13. ** No additional copyright is claimed over the customizations.
  14. ** Please do NOT bother the original author about this version here!
  15. **
  16. ** If you want to use dlmalloc in another project, you should get
  17. ** the original from: ftp://gee.cs.oswego.edu/pub/misc/
  18. ** For thread-safe derivatives, take a look at:
  19. ** - ptmalloc: http://www.malloc.de/
  20. ** - nedmalloc: http://www.nedprod.com/programs/portable/nedmalloc/
  21. */

  22. #define lj_alloc_c
  23. #define LUA_CORE

  24. /* To get the mremap prototype. Must be defined before any system includes. */
  25. #if defined(__linux__) && !defined(_GNU_SOURCE)
  26. #define _GNU_SOURCE
  27. #endif

  28. #include "lj_def.h"
  29. #include "lj_arch.h"
  30. #include "lj_alloc.h"

  31. #ifndef LUAJIT_USE_SYSMALLOC

  32. #define MAX_SIZE_T                (~(size_t)0)
  33. #define MALLOC_ALIGNMENT        ((size_t)8U)

  34. #define DEFAULT_GRANULARITY        ((size_t)128U * (size_t)1024U)
  35. #define DEFAULT_TRIM_THRESHOLD        ((size_t)2U * (size_t)1024U * (size_t)1024U)
  36. #define DEFAULT_MMAP_THRESHOLD        ((size_t)128U * (size_t)1024U)
  37. #define MAX_RELEASE_CHECK_RATE        255

  38. /* ------------------- size_t and alignment properties -------------------- */

  39. /* The byte and bit size of a size_t */
  40. #define SIZE_T_SIZE                (sizeof(size_t))
  41. #define SIZE_T_BITSIZE                (sizeof(size_t) << 3)

  42. /* Some constants coerced to size_t */
  43. /* Annoying but necessary to avoid errors on some platforms */
  44. #define SIZE_T_ZERO                ((size_t)0)
  45. #define SIZE_T_ONE                ((size_t)1)
  46. #define SIZE_T_TWO                ((size_t)2)
  47. #define TWO_SIZE_T_SIZES        (SIZE_T_SIZE<<1)
  48. #define FOUR_SIZE_T_SIZES        (SIZE_T_SIZE<<2)
  49. #define SIX_SIZE_T_SIZES        (FOUR_SIZE_T_SIZES+TWO_SIZE_T_SIZES)

  50. /* The bit mask value corresponding to MALLOC_ALIGNMENT */
  51. #define CHUNK_ALIGN_MASK        (MALLOC_ALIGNMENT - SIZE_T_ONE)

  52. /* the number of bytes to offset an address to align it */
  53. #define align_offset(A)\
  54. ((((size_t)(A) & CHUNK_ALIGN_MASK) == 0)? 0 :\
  55.   ((MALLOC_ALIGNMENT - ((size_t)(A) & CHUNK_ALIGN_MASK)) & CHUNK_ALIGN_MASK))

  56. /* -------------------------- MMAP support ------------------------------- */

  57. #define MFAIL                        ((void *)(MAX_SIZE_T))
  58. #define CMFAIL                        ((char *)(MFAIL)) /* defined for convenience */

  59. #define IS_DIRECT_BIT                (SIZE_T_ONE)

  60. #if LJ_TARGET_WINDOWS

  61. #define WIN32_LEAN_AND_MEAN
  62. #include <windows.h>

  63. #if LJ_64 && !LJ_GC64

  64. /* Undocumented, but hey, that's what we all love so much about Windows. */
  65. typedef long (*PNTAVM)(HANDLE handle, void **addr, ULONG zbits,
  66.                        size_t *size, ULONG alloctype, ULONG prot);
  67. static PNTAVM ntavm;

  68. /* Number of top bits of the lower 32 bits of an address that must be zero.
  69. ** Apparently 0 gives us full 64 bit addresses and 1 gives us the lower 2GB.
  70. */
  71. #define NTAVM_ZEROBITS                1

  72. static void INIT_MMAP(void)
  73. {
  74.   ntavm = (PNTAVM)GetProcAddress(GetModuleHandleA("ntdll.dll"),
  75.                                  "NtAllocateVirtualMemory");
  76. }

  77. /* Win64 32 bit MMAP via NtAllocateVirtualMemory. */
  78. static LJ_AINLINE void *CALL_MMAP(size_t size)
  79. {
  80.   DWORD olderr = GetLastError();
  81.   void *ptr = NULL;
  82.   long st = ntavm(INVALID_HANDLE_VALUE, &ptr, NTAVM_ZEROBITS, &size,
  83.                   MEM_RESERVE|MEM_COMMIT, PAGE_READWRITE);
  84.   SetLastError(olderr);
  85.   return st == 0 ? ptr : MFAIL;
  86. }

  87. /* For direct MMAP, use MEM_TOP_DOWN to minimize interference */
  88. static LJ_AINLINE void *DIRECT_MMAP(size_t size)
  89. {
  90.   DWORD olderr = GetLastError();
  91.   void *ptr = NULL;
  92.   long st = ntavm(INVALID_HANDLE_VALUE, &ptr, NTAVM_ZEROBITS, &size,
  93.                   MEM_RESERVE|MEM_COMMIT|MEM_TOP_DOWN, PAGE_READWRITE);
  94.   SetLastError(olderr);
  95.   return st == 0 ? ptr : MFAIL;
  96. }

  97. #else

  98. #define INIT_MMAP()                ((void)0)

  99. /* Win32 MMAP via VirtualAlloc */
  100. static LJ_AINLINE void *CALL_MMAP(size_t size)
  101. {
  102.   DWORD olderr = GetLastError();
  103.   void *ptr = VirtualAlloc(0, size, MEM_RESERVE|MEM_COMMIT, PAGE_READWRITE);
  104.   SetLastError(olderr);
  105.   return ptr ? ptr : MFAIL;
  106. }

  107. /* For direct MMAP, use MEM_TOP_DOWN to minimize interference */
  108. static LJ_AINLINE void *DIRECT_MMAP(size_t size)
  109. {
  110.   DWORD olderr = GetLastError();
  111.   void *ptr = VirtualAlloc(0, size, MEM_RESERVE|MEM_COMMIT|MEM_TOP_DOWN,
  112.                            PAGE_READWRITE);
  113.   SetLastError(olderr);
  114.   return ptr ? ptr : MFAIL;
  115. }

  116. #endif

  117. /* This function supports releasing coalesed segments */
  118. static LJ_AINLINE int CALL_MUNMAP(void *ptr, size_t size)
  119. {
  120.   DWORD olderr = GetLastError();
  121.   MEMORY_BASIC_INFORMATION minfo;
  122.   char *cptr = (char *)ptr;
  123.   while (size) {
  124.     if (VirtualQuery(cptr, &minfo, sizeof(minfo)) == 0)
  125.       return -1;
  126.     if (minfo.BaseAddress != cptr || minfo.AllocationBase != cptr ||
  127.         minfo.State != MEM_COMMIT || minfo.RegionSize > size)
  128.       return -1;
  129.     if (VirtualFree(cptr, 0, MEM_RELEASE) == 0)
  130.       return -1;
  131.     cptr += minfo.RegionSize;
  132.     size -= minfo.RegionSize;
  133.   }
  134.   SetLastError(olderr);
  135.   return 0;
  136. }

  137. #else

  138. #include <errno.h>
  139. #include <sys/mman.h>

  140. #define MMAP_PROT                (PROT_READ|PROT_WRITE)
  141. #if !defined(MAP_ANONYMOUS) && defined(MAP_ANON)
  142. #define MAP_ANONYMOUS                MAP_ANON
  143. #endif
  144. #define MMAP_FLAGS                (MAP_PRIVATE|MAP_ANONYMOUS)

  145. #if LJ_64 && !LJ_GC64
  146. /* 64 bit mode with 32 bit pointers needs special support for allocating
  147. ** memory in the lower 2GB.
  148. */

  149. #if defined(MAP_32BIT)

  150. #if defined(__sun__)
  151. #define MMAP_REGION_START        ((uintptr_t)0x1000)
  152. #else
  153. /* Actually this only gives us max. 1GB in current Linux kernels. */
  154. #define MMAP_REGION_START        ((uintptr_t)0)
  155. #endif

  156. static LJ_AINLINE void *CALL_MMAP(size_t size)
  157. {
  158.   int olderr = errno;
  159.   void *ptr = mmap((void *)MMAP_REGION_START, size, MMAP_PROT, MAP_32BIT|MMAP_FLAGS, -1, 0);
  160.   errno = olderr;
  161.   return ptr;
  162. }

  163. #elif LJ_TARGET_OSX || LJ_TARGET_PS4 || defined(__FreeBSD__) || defined(__FreeBSD_kernel__) || defined(__NetBSD__) || defined(__OpenBSD__) || defined(__DragonFly__) || defined(__sun__)

  164. /* OSX and FreeBSD mmap() use a naive first-fit linear search.
  165. ** That's perfect for us. Except that -pagezero_size must be set for OSX,
  166. ** otherwise the lower 4GB are blocked. And the 32GB RLIMIT_DATA needs
  167. ** to be reduced to 250MB on FreeBSD.
  168. */
  169. #if LJ_TARGET_OSX || defined(__DragonFly__)
  170. #define MMAP_REGION_START        ((uintptr_t)0x10000)
  171. #elif LJ_TARGET_PS4
  172. #define MMAP_REGION_START        ((uintptr_t)0x4000)
  173. #else
  174. #define MMAP_REGION_START        ((uintptr_t)0x10000000)
  175. #endif
  176. #define MMAP_REGION_END                ((uintptr_t)0x80000000)

  177. #if (defined(__FreeBSD__) || defined(__FreeBSD_kernel__)) && !LJ_TARGET_PS4
  178. #include <sys/resource.h>
  179. #endif

  180. static LJ_AINLINE void *CALL_MMAP(size_t size)
  181. {
  182.   int olderr = errno;
  183.   /* Hint for next allocation. Doesn't need to be thread-safe. */
  184.   static uintptr_t alloc_hint = MMAP_REGION_START;
  185.   int retry = 0;
  186. #if (defined(__FreeBSD__) || defined(__FreeBSD_kernel__)) && !LJ_TARGET_PS4
  187.   static int rlimit_modified = 0;
  188.   if (LJ_UNLIKELY(rlimit_modified == 0)) {
  189.     struct rlimit rlim;
  190.     rlim.rlim_cur = rlim.rlim_max = MMAP_REGION_START;
  191.     setrlimit(RLIMIT_DATA, &rlim);  /* Ignore result. May fail below. */
  192.     rlimit_modified = 1;
  193.   }
  194. #endif
  195.   for (;;) {
  196.     void *p = mmap((void *)alloc_hint, size, MMAP_PROT, MMAP_FLAGS, -1, 0);
  197.     if ((uintptr_t)p >= MMAP_REGION_START &&
  198.         (uintptr_t)p + size < MMAP_REGION_END) {
  199.       alloc_hint = (uintptr_t)p + size;
  200.       errno = olderr;
  201.       return p;
  202.     }
  203.     if (p != CMFAIL) munmap(p, size);
  204. #if defined(__sun__) || defined(__DragonFly__)
  205.     alloc_hint += 0x1000000/* Need near-exhaustive linear scan. */
  206.     if (alloc_hint + size < MMAP_REGION_END) continue;
  207. #endif
  208.     if (retry) break;
  209.     retry = 1;
  210.     alloc_hint = MMAP_REGION_START;
  211.   }
  212.   errno = olderr;
  213.   return CMFAIL;
  214. }

  215. #else

  216. #error "NYI: need an equivalent of MAP_32BIT for this 64 bit OS"

  217. #endif

  218. #else

  219. /* 32 bit mode and GC64 mode is easy. */
  220. static LJ_AINLINE void *CALL_MMAP(size_t size)
  221. {
  222.   int olderr = errno;
  223.   void *ptr = mmap(NULL, size, MMAP_PROT, MMAP_FLAGS, -1, 0);
  224.   errno = olderr;
  225.   return ptr;
  226. }

  227. #endif

  228. #define INIT_MMAP()                ((void)0)
  229. #define DIRECT_MMAP(s)                CALL_MMAP(s)

  230. static LJ_AINLINE int CALL_MUNMAP(void *ptr, size_t size)
  231. {
  232.   int olderr = errno;
  233.   int ret = munmap(ptr, size);
  234.   errno = olderr;
  235.   return ret;
  236. }

  237. #if LJ_TARGET_LINUX
  238. /* Need to define _GNU_SOURCE to get the mremap prototype. */
  239. static LJ_AINLINE void *CALL_MREMAP_(void *ptr, size_t osz, size_t nsz,
  240.                                      int flags)
  241. {
  242.   int olderr = errno;
  243.   ptr = mremap(ptr, osz, nsz, flags);
  244.   errno = olderr;
  245.   return ptr;
  246. }

  247. #define CALL_MREMAP(addr, osz, nsz, mv) CALL_MREMAP_((addr), (osz), (nsz), (mv))
  248. #define CALL_MREMAP_NOMOVE        0
  249. #define CALL_MREMAP_MAYMOVE        1
  250. #if LJ_64 && !LJ_GC64
  251. #define CALL_MREMAP_MV                CALL_MREMAP_NOMOVE
  252. #else
  253. #define CALL_MREMAP_MV                CALL_MREMAP_MAYMOVE
  254. #endif
  255. #endif

  256. #endif

  257. #ifndef CALL_MREMAP
  258. #define CALL_MREMAP(addr, osz, nsz, mv) ((void)osz, MFAIL)
  259. #endif

  260. /* -----------------------  Chunk representations ------------------------ */

  261. struct malloc_chunk {
  262.   size_t               prev_foot;  /* Size of previous chunk (if free).  */
  263.   size_t               head;       /* Size and inuse bits. */
  264.   struct malloc_chunk *fd;         /* double links -- used only if free. */
  265.   struct malloc_chunk *bk;
  266. };

  267. typedef struct malloc_chunk  mchunk;
  268. typedef struct malloc_chunk *mchunkptr;
  269. typedef struct malloc_chunk *sbinptr/* The type of bins of chunks */
  270. typedef size_t bindex_t;               /* Described below */
  271. typedef unsigned int binmap_t;         /* Described below */
  272. typedef unsigned int flag_t;           /* The type of various bit flag sets */

  273. /* ------------------- Chunks sizes and alignments ----------------------- */

  274. #define MCHUNK_SIZE                (sizeof(mchunk))

  275. #define CHUNK_OVERHEAD                (SIZE_T_SIZE)

  276. /* Direct chunks need a second word of overhead ... */
  277. #define DIRECT_CHUNK_OVERHEAD        (TWO_SIZE_T_SIZES)
  278. /* ... and additional padding for fake next-chunk at foot */
  279. #define DIRECT_FOOT_PAD                (FOUR_SIZE_T_SIZES)

  280. /* The smallest size we can malloc is an aligned minimal chunk */
  281. #define MIN_CHUNK_SIZE\
  282.   ((MCHUNK_SIZE + CHUNK_ALIGN_MASK) & ~CHUNK_ALIGN_MASK)

  283. /* conversion from malloc headers to user pointers, and back */
  284. #define chunk2mem(p)                ((void *)((char *)(p) + TWO_SIZE_T_SIZES))
  285. #define mem2chunk(mem)                ((mchunkptr)((char *)(mem) - TWO_SIZE_T_SIZES))
  286. /* chunk associated with aligned address A */
  287. #define align_as_chunk(A)        (mchunkptr)((A) + align_offset(chunk2mem(A)))

  288. /* Bounds on request (not chunk) sizes. */
  289. #define MAX_REQUEST                ((~MIN_CHUNK_SIZE+1) << 2)
  290. #define MIN_REQUEST                (MIN_CHUNK_SIZE - CHUNK_OVERHEAD - SIZE_T_ONE)

  291. /* pad request bytes into a usable size */
  292. #define pad_request(req) \
  293.    (((req) + CHUNK_OVERHEAD + CHUNK_ALIGN_MASK) & ~CHUNK_ALIGN_MASK)

  294. /* pad request, checking for minimum (but not maximum) */
  295. #define request2size(req) \
  296.   (((req) < MIN_REQUEST)? MIN_CHUNK_SIZE : pad_request(req))

  297. /* ------------------ Operations on head and foot fields ----------------- */

  298. #define PINUSE_BIT                (SIZE_T_ONE)
  299. #define CINUSE_BIT                (SIZE_T_TWO)
  300. #define INUSE_BITS                (PINUSE_BIT|CINUSE_BIT)

  301. /* Head value for fenceposts */
  302. #define FENCEPOST_HEAD                (INUSE_BITS|SIZE_T_SIZE)

  303. /* extraction of fields from head words */
  304. #define cinuse(p)                ((p)->head & CINUSE_BIT)
  305. #define pinuse(p)                ((p)->head & PINUSE_BIT)
  306. #define chunksize(p)                ((p)->head & ~(INUSE_BITS))

  307. #define clear_pinuse(p)                ((p)->head &= ~PINUSE_BIT)
  308. #define clear_cinuse(p)                ((p)->head &= ~CINUSE_BIT)

  309. /* Treat space at ptr +/- offset as a chunk */
  310. #define chunk_plus_offset(p, s)                ((mchunkptr)(((char *)(p)) + (s)))
  311. #define chunk_minus_offset(p, s)        ((mchunkptr)(((char *)(p)) - (s)))

  312. /* Ptr to next or previous physical malloc_chunk. */
  313. #define next_chunk(p)        ((mchunkptr)(((char *)(p)) + ((p)->head & ~INUSE_BITS)))
  314. #define prev_chunk(p)        ((mchunkptr)(((char *)(p)) - ((p)->prev_foot) ))

  315. /* extract next chunk's pinuse bit */
  316. #define next_pinuse(p)        ((next_chunk(p)->head) & PINUSE_BIT)

  317. /* Get/set size at footer */
  318. #define get_foot(p, s)        (((mchunkptr)((char *)(p) + (s)))->prev_foot)
  319. #define set_foot(p, s)        (((mchunkptr)((char *)(p) + (s)))->prev_foot = (s))

  320. /* Set size, pinuse bit, and foot */
  321. #define set_size_and_pinuse_of_free_chunk(p, s)\
  322.   ((p)->head = (s|PINUSE_BIT), set_foot(p, s))

  323. /* Set size, pinuse bit, foot, and clear next pinuse */
  324. #define set_free_with_pinuse(p, s, n)\
  325.   (clear_pinuse(n), set_size_and_pinuse_of_free_chunk(p, s))

  326. #define is_direct(p)\
  327.   (!((p)->head & PINUSE_BIT) && ((p)->prev_foot & IS_DIRECT_BIT))

  328. /* Get the internal overhead associated with chunk p */
  329. #define overhead_for(p)\
  330. (is_direct(p)? DIRECT_CHUNK_OVERHEAD : CHUNK_OVERHEAD)

  331. /* ---------------------- Overlaid data structures ----------------------- */

  332. struct malloc_tree_chunk {
  333.   /* The first four fields must be compatible with malloc_chunk */
  334.   size_t                    prev_foot;
  335.   size_t                    head;
  336.   struct malloc_tree_chunk *fd;
  337.   struct malloc_tree_chunk *bk;

  338.   struct malloc_tree_chunk *child[2];
  339.   struct malloc_tree_chunk *parent;
  340.   bindex_t                  index;
  341. };

  342. typedef struct malloc_tree_chunk  tchunk;
  343. typedef struct malloc_tree_chunk *tchunkptr;
  344. typedef struct malloc_tree_chunk *tbinptr; /* The type of bins of trees */

  345. /* A little helper macro for trees */
  346. #define leftmost_child(t) ((t)->child[0] != 0? (t)->child[0] : (t)->child[1])

  347. /* ----------------------------- Segments -------------------------------- */

  348. struct malloc_segment {
  349.   char        *base;             /* base address */
  350.   size_t       size;             /* allocated size */
  351.   struct malloc_segment *next;   /* ptr to next segment */
  352. };

  353. typedef struct malloc_segment  msegment;
  354. typedef struct malloc_segment *msegmentptr;

  355. /* ---------------------------- malloc_state ----------------------------- */

  356. /* Bin types, widths and sizes */
  357. #define NSMALLBINS                (32U)
  358. #define NTREEBINS                (32U)
  359. #define SMALLBIN_SHIFT                (3U)
  360. #define SMALLBIN_WIDTH                (SIZE_T_ONE << SMALLBIN_SHIFT)
  361. #define TREEBIN_SHIFT                (8U)
  362. #define MIN_LARGE_SIZE                (SIZE_T_ONE << TREEBIN_SHIFT)
  363. #define MAX_SMALL_SIZE                (MIN_LARGE_SIZE - SIZE_T_ONE)
  364. #define MAX_SMALL_REQUEST  (MAX_SMALL_SIZE - CHUNK_ALIGN_MASK - CHUNK_OVERHEAD)

  365. struct malloc_state {
  366.   binmap_t   smallmap;
  367.   binmap_t   treemap;
  368.   size_t     dvsize;
  369.   size_t     topsize;
  370.   mchunkptr  dv;
  371.   mchunkptr  top;
  372.   size_t     trim_check;
  373.   size_t     release_checks;
  374.   mchunkptr  smallbins[(NSMALLBINS+1)*2];
  375.   tbinptr    treebins[NTREEBINS];
  376.   msegment   seg;
  377. };

  378. typedef struct malloc_state *mstate;

  379. #define is_initialized(M)        ((M)->top != 0)

  380. /* -------------------------- system alloc setup ------------------------- */

  381. /* page-align a size */
  382. #define page_align(S)\
  383. (((S) + (LJ_PAGESIZE - SIZE_T_ONE)) & ~(LJ_PAGESIZE - SIZE_T_ONE))

  384. /* granularity-align a size */
  385. #define granularity_align(S)\
  386.   (((S) + (DEFAULT_GRANULARITY - SIZE_T_ONE))\
  387.    & ~(DEFAULT_GRANULARITY - SIZE_T_ONE))

  388. #if LJ_TARGET_WINDOWS
  389. #define mmap_align(S)        granularity_align(S)
  390. #else
  391. #define mmap_align(S)        page_align(S)
  392. #endif

  393. /*  True if segment S holds address A */
  394. #define segment_holds(S, A)\
  395.   ((char *)(A) >= S->base && (char *)(A) < S->base + S->size)

  396. /* Return segment holding given address */
  397. static msegmentptr segment_holding(mstate m, char *addr)
  398. {
  399.   msegmentptr sp = &m->seg;
  400.   for (;;) {
  401.     if (addr >= sp->base && addr < sp->base + sp->size)
  402.       return sp;
  403.     if ((sp = sp->next) == 0)
  404.       return 0;
  405.   }
  406. }

  407. /* Return true if segment contains a segment link */
  408. static int has_segment_link(mstate m, msegmentptr ss)
  409. {
  410.   msegmentptr sp = &m->seg;
  411.   for (;;) {
  412.     if ((char *)sp >= ss->base && (char *)sp < ss->base + ss->size)
  413.       return 1;
  414.     if ((sp = sp->next) == 0)
  415.       return 0;
  416.   }
  417. }

  418. /*
  419.   TOP_FOOT_SIZE is padding at the end of a segment, including space
  420.   that may be needed to place segment records and fenceposts when new
  421.   noncontiguous segments are added.
  422. */
  423. #define TOP_FOOT_SIZE\
  424.   (align_offset(chunk2mem(0))+pad_request(sizeof(struct malloc_segment))+MIN_CHUNK_SIZE)

  425. /* ---------------------------- Indexing Bins ---------------------------- */

  426. #define is_small(s)                (((s) >> SMALLBIN_SHIFT) < NSMALLBINS)
  427. #define small_index(s)                ((s)  >> SMALLBIN_SHIFT)
  428. #define small_index2size(i)        ((i)  << SMALLBIN_SHIFT)
  429. #define MIN_SMALL_INDEX                (small_index(MIN_CHUNK_SIZE))

  430. /* addressing by index. See above about smallbin repositioning */
  431. #define smallbin_at(M, i)        ((sbinptr)((char *)&((M)->smallbins[(i)<<1])))
  432. #define treebin_at(M,i)                (&((M)->treebins[i]))

  433. /* assign tree index for size S to variable I */
  434. #define compute_tree_index(S, I)\
  435. {\
  436.   unsigned int X = (unsigned int)(S >> TREEBIN_SHIFT);\
  437.   if (X == 0) {\
  438.     I = 0;\
  439.   } else if (X > 0xFFFF) {\
  440.     I = NTREEBINS-1;\
  441.   } else {\
  442.     unsigned int K = lj_fls(X);\
  443.     I =  (bindex_t)((K << 1) + ((S >> (K + (TREEBIN_SHIFT-1)) & 1)));\
  444.   }\
  445. }

  446. /* Bit representing maximum resolved size in a treebin at i */
  447. #define bit_for_tree_index(i) \
  448.    (i == NTREEBINS-1)? (SIZE_T_BITSIZE-1) : (((i) >> 1) + TREEBIN_SHIFT - 2)

  449. /* Shift placing maximum resolved bit in a treebin at i as sign bit */
  450. #define leftshift_for_tree_index(i) \
  451.    ((i == NTREEBINS-1)? 0 : \
  452.     ((SIZE_T_BITSIZE-SIZE_T_ONE) - (((i) >> 1) + TREEBIN_SHIFT - 2)))

  453. /* The size of the smallest chunk held in bin with index i */
  454. #define minsize_for_tree_index(i) \
  455.    ((SIZE_T_ONE << (((i) >> 1) + TREEBIN_SHIFT)) |  \
  456.    (((size_t)((i) & SIZE_T_ONE)) << (((i) >> 1) + TREEBIN_SHIFT - 1)))

  457. /* ------------------------ Operations on bin maps ----------------------- */

  458. /* bit corresponding to given index */
  459. #define idx2bit(i)                ((binmap_t)(1) << (i))

  460. /* Mark/Clear bits with given index */
  461. #define mark_smallmap(M,i)        ((M)->smallmap |=  idx2bit(i))
  462. #define clear_smallmap(M,i)        ((M)->smallmap &= ~idx2bit(i))
  463. #define smallmap_is_marked(M,i)        ((M)->smallmap &   idx2bit(i))

  464. #define mark_treemap(M,i)        ((M)->treemap  |=  idx2bit(i))
  465. #define clear_treemap(M,i)        ((M)->treemap  &= ~idx2bit(i))
  466. #define treemap_is_marked(M,i)        ((M)->treemap  &   idx2bit(i))

  467. /* mask with all bits to left of least bit of x on */
  468. #define left_bits(x)                ((x<<1) | (~(x<<1)+1))

  469. /* Set cinuse bit and pinuse bit of next chunk */
  470. #define set_inuse(M,p,s)\
  471.   ((p)->head = (((p)->head & PINUSE_BIT)|s|CINUSE_BIT),\
  472.   ((mchunkptr)(((char *)(p)) + (s)))->head |= PINUSE_BIT)

  473. /* Set cinuse and pinuse of this chunk and pinuse of next chunk */
  474. #define set_inuse_and_pinuse(M,p,s)\
  475.   ((p)->head = (s|PINUSE_BIT|CINUSE_BIT),\
  476.   ((mchunkptr)(((char *)(p)) + (s)))->head |= PINUSE_BIT)

  477. /* Set size, cinuse and pinuse bit of this chunk */
  478. #define set_size_and_pinuse_of_inuse_chunk(M, p, s)\
  479.   ((p)->head = (s|PINUSE_BIT|CINUSE_BIT))

  480. /* ----------------------- Operations on smallbins ----------------------- */

  481. /* Link a free chunk into a smallbin  */
  482. #define insert_small_chunk(M, P, S) {\
  483.   bindex_t I = small_index(S);\
  484.   mchunkptr B = smallbin_at(M, I);\
  485.   mchunkptr F = B;\
  486.   if (!smallmap_is_marked(M, I))\
  487.     mark_smallmap(M, I);\
  488.   else\
  489.     F = B->fd;\
  490.   B->fd = P;\
  491.   F->bk = P;\
  492.   P->fd = F;\
  493.   P->bk = B;\
  494. }

  495. /* Unlink a chunk from a smallbin  */
  496. #define unlink_small_chunk(M, P, S) {\
  497.   mchunkptr F = P->fd;\
  498.   mchunkptr B = P->bk;\
  499.   bindex_t I = small_index(S);\
  500.   if (F == B) {\
  501.     clear_smallmap(M, I);\
  502.   } else {\
  503.     F->bk = B;\
  504.     B->fd = F;\
  505.   }\
  506. }

  507. /* Unlink the first chunk from a smallbin */
  508. #define unlink_first_small_chunk(M, B, P, I) {\
  509.   mchunkptr F = P->fd;\
  510.   if (B == F) {\
  511.     clear_smallmap(M, I);\
  512.   } else {\
  513.     B->fd = F;\
  514.     F->bk = B;\
  515.   }\
  516. }

  517. /* Replace dv node, binning the old one */
  518. /* Used only when dvsize known to be small */
  519. #define replace_dv(M, P, S) {\
  520.   size_t DVS = M->dvsize;\
  521.   if (DVS != 0) {\
  522.     mchunkptr DV = M->dv;\
  523.     insert_small_chunk(M, DV, DVS);\
  524.   }\
  525.   M->dvsize = S;\
  526.   M->dv = P;\
  527. }

  528. /* ------------------------- Operations on trees ------------------------- */

  529. /* Insert chunk into tree */
  530. #define insert_large_chunk(M, X, S) {\
  531.   tbinptr *H;\
  532.   bindex_t I;\
  533.   compute_tree_index(S, I);\
  534.   H = treebin_at(M, I);\
  535.   X->index = I;\
  536.   X->child[0] = X->child[1] = 0;\
  537.   if (!treemap_is_marked(M, I)) {\
  538.     mark_treemap(M, I);\
  539.     *H = X;\
  540.     X->parent = (tchunkptr)H;\
  541.     X->fd = X->bk = X;\
  542.   } else {\
  543.     tchunkptr T = *H;\
  544.     size_t K = S << leftshift_for_tree_index(I);\
  545.     for (;;) {\
  546.       if (chunksize(T) != S) {\
  547.         tchunkptr *C = &(T->child[(K >> (SIZE_T_BITSIZE-SIZE_T_ONE)) & 1]);\
  548.         K <<= 1;\
  549.         if (*C != 0) {\
  550.           T = *C;\
  551.         } else {\
  552.           *C = X;\
  553.           X->parent = T;\
  554.           X->fd = X->bk = X;\
  555.           break;\
  556.         }\
  557.       } else {\
  558.         tchunkptr F = T->fd;\
  559.         T->fd = F->bk = X;\
  560.         X->fd = F;\
  561.         X->bk = T;\
  562.         X->parent = 0;\
  563.         break;\
  564.       }\
  565.     }\
  566.   }\
  567. }

  568. #define unlink_large_chunk(M, X) {\
  569.   tchunkptr XP = X->parent;\
  570.   tchunkptr R;\
  571.   if (X->bk != X) {\
  572.     tchunkptr F = X->fd;\
  573.     R = X->bk;\
  574.     F->bk = R;\
  575.     R->fd = F;\
  576.   } else {\
  577.     tchunkptr *RP;\
  578.     if (((R = *(RP = &(X->child[1]))) != 0) ||\
  579.         ((R = *(RP = &(X->child[0]))) != 0)) {\
  580.       tchunkptr *CP;\
  581.       while ((*(CP = &(R->child[1])) != 0) ||\
  582.              (*(CP = &(R->child[0])) != 0)) {\
  583.         R = *(RP = CP);\
  584.       }\
  585.       *RP = 0;\
  586.     }\
  587.   }\
  588.   if (XP != 0) {\
  589.     tbinptr *H = treebin_at(M, X->index);\
  590.     if (X == *H) {\
  591.       if ((*H = R) == 0) \
  592.         clear_treemap(M, X->index);\
  593.     } else {\
  594.       if (XP->child[0] == X) \
  595.         XP->child[0] = R;\
  596.       else \
  597.         XP->child[1] = R;\
  598.     }\
  599.     if (R != 0) {\
  600.       tchunkptr C0, C1;\
  601.       R->parent = XP;\
  602.       if ((C0 = X->child[0]) != 0) {\
  603.         R->child[0] = C0;\
  604.         C0->parent = R;\
  605.       }\
  606.       if ((C1 = X->child[1]) != 0) {\
  607.         R->child[1] = C1;\
  608.         C1->parent = R;\
  609.       }\
  610.     }\
  611.   }\
  612. }

  613. /* Relays to large vs small bin operations */

  614. #define insert_chunk(M, P, S)\
  615.   if (is_small(S)) { insert_small_chunk(M, P, S)\
  616.   } else { tchunkptr TP = (tchunkptr)(P); insert_large_chunk(M, TP, S); }

  617. #define unlink_chunk(M, P, S)\
  618.   if (is_small(S)) { unlink_small_chunk(M, P, S)\
  619.   } else { tchunkptr TP = (tchunkptr)(P); unlink_large_chunk(M, TP); }

  620. /* -----------------------  Direct-mmapping chunks ----------------------- */

  621. static void *direct_alloc(size_t nb)
  622. {
  623.   size_t mmsize = mmap_align(nb + SIX_SIZE_T_SIZES + CHUNK_ALIGN_MASK);
  624.   if (LJ_LIKELY(mmsize > nb)) {     /* Check for wrap around 0 */
  625.     char *mm = (char *)(DIRECT_MMAP(mmsize));
  626.     if (mm != CMFAIL) {
  627.       size_t offset = align_offset(chunk2mem(mm));
  628.       size_t psize = mmsize - offset - DIRECT_FOOT_PAD;
  629.       mchunkptr p = (mchunkptr)(mm + offset);
  630.       p->prev_foot = offset | IS_DIRECT_BIT;
  631.       p->head = psize|CINUSE_BIT;
  632.       chunk_plus_offset(p, psize)->head = FENCEPOST_HEAD;
  633.       chunk_plus_offset(p, psize+SIZE_T_SIZE)->head = 0;
  634.       return chunk2mem(p);
  635.     }
  636.   }
  637.   return NULL;
  638. }

  639. static mchunkptr direct_resize(mchunkptr oldp, size_t nb)
  640. {
  641.   size_t oldsize = chunksize(oldp);
  642.   if (is_small(nb)) /* Can't shrink direct regions below small size */
  643.     return NULL;
  644.   /* Keep old chunk if big enough but not too big */
  645.   if (oldsize >= nb + SIZE_T_SIZE &&
  646.       (oldsize - nb) <= (DEFAULT_GRANULARITY >> 1)) {
  647.     return oldp;
  648.   } else {
  649.     size_t offset = oldp->prev_foot & ~IS_DIRECT_BIT;
  650.     size_t oldmmsize = oldsize + offset + DIRECT_FOOT_PAD;
  651.     size_t newmmsize = mmap_align(nb + SIX_SIZE_T_SIZES + CHUNK_ALIGN_MASK);
  652.     char *cp = (char *)CALL_MREMAP((char *)oldp - offset,
  653.                                    oldmmsize, newmmsize, CALL_MREMAP_MV);
  654.     if (cp != CMFAIL) {
  655.       mchunkptr newp = (mchunkptr)(cp + offset);
  656.       size_t psize = newmmsize - offset - DIRECT_FOOT_PAD;
  657.       newp->head = psize|CINUSE_BIT;
  658.       chunk_plus_offset(newp, psize)->head = FENCEPOST_HEAD;
  659.       chunk_plus_offset(newp, psize+SIZE_T_SIZE)->head = 0;
  660.       return newp;
  661.     }
  662.   }
  663.   return NULL;
  664. }

  665. /* -------------------------- mspace management -------------------------- */

  666. /* Initialize top chunk and its size */
  667. static void init_top(mstate m, mchunkptr p, size_t psize)
  668. {
  669.   /* Ensure alignment */
  670.   size_t offset = align_offset(chunk2mem(p));
  671.   p = (mchunkptr)((char *)p + offset);
  672.   psize -= offset;

  673.   m->top = p;
  674.   m->topsize = psize;
  675.   p->head = psize | PINUSE_BIT;
  676.   /* set size of fake trailing chunk holding overhead space only once */
  677.   chunk_plus_offset(p, psize)->head = TOP_FOOT_SIZE;
  678.   m->trim_check = DEFAULT_TRIM_THRESHOLD; /* reset on each update */
  679. }

  680. /* Initialize bins for a new mstate that is otherwise zeroed out */
  681. static void init_bins(mstate m)
  682. {
  683.   /* Establish circular links for smallbins */
  684.   bindex_t i;
  685.   for (i = 0; i < NSMALLBINS; i++) {
  686.     sbinptr bin = smallbin_at(m,i);
  687.     bin->fd = bin->bk = bin;
  688.   }
  689. }

  690. /* Allocate chunk and prepend remainder with chunk in successor base. */
  691. static void *prepend_alloc(mstate m, char *newbase, char *oldbase, size_t nb)
  692. {
  693.   mchunkptr p = align_as_chunk(newbase);
  694.   mchunkptr oldfirst = align_as_chunk(oldbase);
  695.   size_t psize = (size_t)((char *)oldfirst - (char *)p);
  696.   mchunkptr q = chunk_plus_offset(p, nb);
  697.   size_t qsize = psize - nb;
  698.   set_size_and_pinuse_of_inuse_chunk(m, p, nb);

  699.   /* consolidate remainder with first chunk of old base */
  700.   if (oldfirst == m->top) {
  701.     size_t tsize = m->topsize += qsize;
  702.     m->top = q;
  703.     q->head = tsize | PINUSE_BIT;
  704.   } else if (oldfirst == m->dv) {
  705.     size_t dsize = m->dvsize += qsize;
  706.     m->dv = q;
  707.     set_size_and_pinuse_of_free_chunk(q, dsize);
  708.   } else {
  709.     if (!cinuse(oldfirst)) {
  710.       size_t nsize = chunksize(oldfirst);
  711.       unlink_chunk(m, oldfirst, nsize);
  712.       oldfirst = chunk_plus_offset(oldfirst, nsize);
  713.       qsize += nsize;
  714.     }
  715.     set_free_with_pinuse(q, qsize, oldfirst);
  716.     insert_chunk(m, q, qsize);
  717.   }

  718.   return chunk2mem(p);
  719. }

  720. /* Add a segment to hold a new noncontiguous region */
  721. static void add_segment(mstate m, char *tbase, size_t tsize)
  722. {
  723.   /* Determine locations and sizes of segment, fenceposts, old top */
  724.   char *old_top = (char *)m->top;
  725.   msegmentptr oldsp = segment_holding(m, old_top);
  726.   char *old_end = oldsp->base + oldsp->size;
  727.   size_t ssize = pad_request(sizeof(struct malloc_segment));
  728.   char *rawsp = old_end - (ssize + FOUR_SIZE_T_SIZES + CHUNK_ALIGN_MASK);
  729.   size_t offset = align_offset(chunk2mem(rawsp));
  730.   char *asp = rawsp + offset;
  731.   char *csp = (asp < (old_top + MIN_CHUNK_SIZE))? old_top : asp;
  732.   mchunkptr sp = (mchunkptr)csp;
  733.   msegmentptr ss = (msegmentptr)(chunk2mem(sp));
  734.   mchunkptr tnext = chunk_plus_offset(sp, ssize);
  735.   mchunkptr p = tnext;

  736.   /* reset top to new space */
  737.   init_top(m, (mchunkptr)tbase, tsize - TOP_FOOT_SIZE);

  738.   /* Set up segment record */
  739.   set_size_and_pinuse_of_inuse_chunk(m, sp, ssize);
  740.   *ss = m->seg; /* Push current record */
  741.   m->seg.base = tbase;
  742.   m->seg.size = tsize;
  743.   m->seg.next = ss;

  744.   /* Insert trailing fenceposts */
  745.   for (;;) {
  746.     mchunkptr nextp = chunk_plus_offset(p, SIZE_T_SIZE);
  747.     p->head = FENCEPOST_HEAD;
  748.     if ((char *)(&(nextp->head)) < old_end)
  749.       p = nextp;
  750.     else
  751.       break;
  752.   }

  753.   /* Insert the rest of old top into a bin as an ordinary free chunk */
  754.   if (csp != old_top) {
  755.     mchunkptr q = (mchunkptr)old_top;
  756.     size_t psize = (size_t)(csp - old_top);
  757.     mchunkptr tn = chunk_plus_offset(q, psize);
  758.     set_free_with_pinuse(q, psize, tn);
  759.     insert_chunk(m, q, psize);
  760.   }
  761. }

  762. /* -------------------------- System allocation -------------------------- */

  763. static void *alloc_sys(mstate m, size_t nb)
  764. {
  765.   char *tbase = CMFAIL;
  766.   size_t tsize = 0;

  767.   /* Directly map large chunks */
  768.   if (LJ_UNLIKELY(nb >= DEFAULT_MMAP_THRESHOLD)) {
  769.     void *mem = direct_alloc(nb);
  770.     if (mem != 0)
  771.       return mem;
  772.   }

  773.   {
  774.     size_t req = nb + TOP_FOOT_SIZE + SIZE_T_ONE;
  775.     size_t rsize = granularity_align(req);
  776.     if (LJ_LIKELY(rsize > nb)) { /* Fail if wraps around zero */
  777.       char *mp = (char *)(CALL_MMAP(rsize));
  778.       if (mp != CMFAIL) {
  779.         tbase = mp;
  780.         tsize = rsize;
  781.       }
  782.     }
  783.   }

  784.   if (tbase != CMFAIL) {
  785.     msegmentptr sp = &m->seg;
  786.     /* Try to merge with an existing segment */
  787.     while (sp != 0 && tbase != sp->base + sp->size)
  788.       sp = sp->next;
  789.     if (sp != 0 && segment_holds(sp, m->top)) { /* append */
  790.       sp->size += tsize;
  791.       init_top(m, m->top, m->topsize + tsize);
  792.     } else {
  793.       sp = &m->seg;
  794.       while (sp != 0 && sp->base != tbase + tsize)
  795.         sp = sp->next;
  796.       if (sp != 0) {
  797.         char *oldbase = sp->base;
  798.         sp->base = tbase;
  799.         sp->size += tsize;
  800.         return prepend_alloc(m, tbase, oldbase, nb);
  801.       } else {
  802.         add_segment(m, tbase, tsize);
  803.       }
  804.     }

  805.     if (nb < m->topsize) { /* Allocate from new or extended top space */
  806.       size_t rsize = m->topsize -= nb;
  807.       mchunkptr p = m->top;
  808.       mchunkptr r = m->top = chunk_plus_offset(p, nb);
  809.       r->head = rsize | PINUSE_BIT;
  810.       set_size_and_pinuse_of_inuse_chunk(m, p, nb);
  811.       return chunk2mem(p);
  812.     }
  813.   }

  814.   return NULL;
  815. }

  816. /* -----------------------  system deallocation -------------------------- */

  817. /* Unmap and unlink any mmapped segments that don't contain used chunks */
  818. static size_t release_unused_segments(mstate m)
  819. {
  820.   size_t released = 0;
  821.   size_t nsegs = 0;
  822.   msegmentptr pred = &m->seg;
  823.   msegmentptr sp = pred->next;
  824.   while (sp != 0) {
  825.     char *base = sp->base;
  826.     size_t size = sp->size;
  827.     msegmentptr next = sp->next;
  828.     nsegs++;
  829.     {
  830.       mchunkptr p = align_as_chunk(base);
  831.       size_t psize = chunksize(p);
  832.       /* Can unmap if first chunk holds entire segment and not pinned */
  833.       if (!cinuse(p) && (char *)p + psize >= base + size - TOP_FOOT_SIZE) {
  834.         tchunkptr tp = (tchunkptr)p;
  835.         if (p == m->dv) {
  836.           m->dv = 0;
  837.           m->dvsize = 0;
  838.         } else {
  839.           unlink_large_chunk(m, tp);
  840.         }
  841.         if (CALL_MUNMAP(base, size) == 0) {
  842.           released += size;
  843.           /* unlink obsoleted record */
  844.           sp = pred;
  845.           sp->next = next;
  846.         } else { /* back out if cannot unmap */
  847.           insert_large_chunk(m, tp, psize);
  848.         }
  849.       }
  850.     }
  851.     pred = sp;
  852.     sp = next;
  853.   }
  854.   /* Reset check counter */
  855.   m->release_checks = nsegs > MAX_RELEASE_CHECK_RATE ?
  856.                       nsegs : MAX_RELEASE_CHECK_RATE;
  857.   return released;
  858. }

  859. static int alloc_trim(mstate m, size_t pad)
  860. {
  861.   size_t released = 0;
  862.   if (pad < MAX_REQUEST && is_initialized(m)) {
  863.     pad += TOP_FOOT_SIZE; /* ensure enough room for segment overhead */

  864.     if (m->topsize > pad) {
  865.       /* Shrink top space in granularity-size units, keeping at least one */
  866.       size_t unit = DEFAULT_GRANULARITY;
  867.       size_t extra = ((m->topsize - pad + (unit - SIZE_T_ONE)) / unit -
  868.                       SIZE_T_ONE) * unit;
  869.       msegmentptr sp = segment_holding(m, (char *)m->top);

  870.       if (sp->size >= extra &&
  871.           !has_segment_link(m, sp)) { /* can't shrink if pinned */
  872.         size_t newsize = sp->size - extra;
  873.         /* Prefer mremap, fall back to munmap */
  874.         if ((CALL_MREMAP(sp->base, sp->size, newsize, CALL_MREMAP_NOMOVE) != MFAIL) ||
  875.             (CALL_MUNMAP(sp->base + newsize, extra) == 0)) {
  876.           released = extra;
  877.         }
  878.       }

  879.       if (released != 0) {
  880.         sp->size -= released;
  881.         init_top(m, m->top, m->topsize - released);
  882.       }
  883.     }

  884.     /* Unmap any unused mmapped segments */
  885.     released += release_unused_segments(m);

  886.     /* On failure, disable autotrim to avoid repeated failed future calls */
  887.     if (released == 0 && m->topsize > m->trim_check)
  888.       m->trim_check = MAX_SIZE_T;
  889.   }

  890.   return (released != 0)? 1 : 0;
  891. }

  892. /* ---------------------------- malloc support --------------------------- */

  893. /* allocate a large request from the best fitting chunk in a treebin */
  894. static void *tmalloc_large(mstate m, size_t nb)
  895. {
  896.   tchunkptr v = 0;
  897.   size_t rsize = ~nb+1; /* Unsigned negation */
  898.   tchunkptr t;
  899.   bindex_t idx;
  900.   compute_tree_index(nb, idx);

  901.   if ((t = *treebin_at(m, idx)) != 0) {
  902.     /* Traverse tree for this bin looking for node with size == nb */
  903.     size_t sizebits = nb << leftshift_for_tree_index(idx);
  904.     tchunkptr rst = 0/* The deepest untaken right subtree */
  905.     for (;;) {
  906.       tchunkptr rt;
  907.       size_t trem = chunksize(t) - nb;
  908.       if (trem < rsize) {
  909.         v = t;
  910.         if ((rsize = trem) == 0)
  911.           break;
  912.       }
  913.       rt = t->child[1];
  914.       t = t->child[(sizebits >> (SIZE_T_BITSIZE-SIZE_T_ONE)) & 1];
  915.       if (rt != 0 && rt != t)
  916.         rst = rt;
  917.       if (t == 0) {
  918.         t = rst; /* set t to least subtree holding sizes > nb */
  919.         break;
  920.       }
  921.       sizebits <<= 1;
  922.     }
  923.   }

  924.   if (t == 0 && v == 0) { /* set t to root of next non-empty treebin */
  925.     binmap_t leftbits = left_bits(idx2bit(idx)) & m->treemap;
  926.     if (leftbits != 0)
  927.       t = *treebin_at(m, lj_ffs(leftbits));
  928.   }

  929.   while (t != 0) { /* find smallest of tree or subtree */
  930.     size_t trem = chunksize(t) - nb;
  931.     if (trem < rsize) {
  932.       rsize = trem;
  933.       v = t;
  934.     }
  935.     t = leftmost_child(t);
  936.   }

  937.   /*  If dv is a better fit, return NULL so malloc will use it */
  938.   if (v != 0 && rsize < (size_t)(m->dvsize - nb)) {
  939.     mchunkptr r = chunk_plus_offset(v, nb);
  940.     unlink_large_chunk(m, v);
  941.     if (rsize < MIN_CHUNK_SIZE) {
  942.       set_inuse_and_pinuse(m, v, (rsize + nb));
  943.     } else {
  944.       set_size_and_pinuse_of_inuse_chunk(m, v, nb);
  945.       set_size_and_pinuse_of_free_chunk(r, rsize);
  946.       insert_chunk(m, r, rsize);
  947.     }
  948.     return chunk2mem(v);
  949.   }
  950.   return NULL;
  951. }

  952. /* allocate a small request from the best fitting chunk in a treebin */
  953. static void *tmalloc_small(mstate m, size_t nb)
  954. {
  955.   tchunkptr t, v;
  956.   mchunkptr r;
  957.   size_t rsize;
  958.   bindex_t i = lj_ffs(m->treemap);

  959.   v = t = *treebin_at(m, i);
  960.   rsize = chunksize(t) - nb;

  961.   while ((t = leftmost_child(t)) != 0) {
  962.     size_t trem = chunksize(t) - nb;
  963.     if (trem < rsize) {
  964.       rsize = trem;
  965.       v = t;
  966.     }
  967.   }

  968.   r = chunk_plus_offset(v, nb);
  969.   unlink_large_chunk(m, v);
  970.   if (rsize < MIN_CHUNK_SIZE) {
  971.     set_inuse_and_pinuse(m, v, (rsize + nb));
  972.   } else {
  973.     set_size_and_pinuse_of_inuse_chunk(m, v, nb);
  974.     set_size_and_pinuse_of_free_chunk(r, rsize);
  975.     replace_dv(m, r, rsize);
  976.   }
  977.   return chunk2mem(v);
  978. }

  979. /* ----------------------------------------------------------------------- */

  980. void *lj_alloc_create(void)
  981. {
  982.   size_t tsize = DEFAULT_GRANULARITY;
  983.   char *tbase;
  984.   INIT_MMAP();
  985.   tbase = (char *)(CALL_MMAP(tsize));
  986.   if (tbase != CMFAIL) {
  987.     size_t msize = pad_request(sizeof(struct malloc_state));
  988.     mchunkptr mn;
  989.     mchunkptr msp = align_as_chunk(tbase);
  990.     mstate m = (mstate)(chunk2mem(msp));
  991.     memset(m, 0, msize);
  992.     msp->head = (msize|PINUSE_BIT|CINUSE_BIT);
  993.     m->seg.base = tbase;
  994.     m->seg.size = tsize;
  995.     m->release_checks = MAX_RELEASE_CHECK_RATE;
  996.     init_bins(m);
  997.     mn = next_chunk(mem2chunk(m));
  998.     init_top(m, mn, (size_t)((tbase + tsize) - (char *)mn) - TOP_FOOT_SIZE);
  999.     return m;
  1000.   }
  1001.   return NULL;
  1002. }

  1003. void lj_alloc_destroy(void *msp)
  1004. {
  1005.   mstate ms = (mstate)msp;
  1006.   msegmentptr sp = &ms->seg;
  1007.   while (sp != 0) {
  1008.     char *base = sp->base;
  1009.     size_t size = sp->size;
  1010.     sp = sp->next;
  1011.     CALL_MUNMAP(base, size);
  1012.   }
  1013. }

  1014. static LJ_NOINLINE void *lj_alloc_malloc(void *msp, size_t nsize)
  1015. {
  1016.   mstate ms = (mstate)msp;
  1017.   void *mem;
  1018.   size_t nb;
  1019.   if (nsize <= MAX_SMALL_REQUEST) {
  1020.     bindex_t idx;
  1021.     binmap_t smallbits;
  1022.     nb = (nsize < MIN_REQUEST)? MIN_CHUNK_SIZE : pad_request(nsize);
  1023.     idx = small_index(nb);
  1024.     smallbits = ms->smallmap >> idx;

  1025.     if ((smallbits & 0x3U) != 0) { /* Remainderless fit to a smallbin. */
  1026.       mchunkptr b, p;
  1027.       idx += ~smallbits & 1;       /* Uses next bin if idx empty */
  1028.       b = smallbin_at(ms, idx);
  1029.       p = b->fd;
  1030.       unlink_first_small_chunk(ms, b, p, idx);
  1031.       set_inuse_and_pinuse(ms, p, small_index2size(idx));
  1032.       mem = chunk2mem(p);
  1033.       return mem;
  1034.     } else if (nb > ms->dvsize) {
  1035.       if (smallbits != 0) { /* Use chunk in next nonempty smallbin */
  1036.         mchunkptr b, p, r;
  1037.         size_t rsize;
  1038.         binmap_t leftbits = (smallbits << idx) & left_bits(idx2bit(idx));
  1039.         bindex_t i = lj_ffs(leftbits);
  1040.         b = smallbin_at(ms, i);
  1041.         p = b->fd;
  1042.         unlink_first_small_chunk(ms, b, p, i);
  1043.         rsize = small_index2size(i) - nb;
  1044.         /* Fit here cannot be remainderless if 4byte sizes */
  1045.         if (SIZE_T_SIZE != 4 && rsize < MIN_CHUNK_SIZE) {
  1046.           set_inuse_and_pinuse(ms, p, small_index2size(i));
  1047.         } else {
  1048.           set_size_and_pinuse_of_inuse_chunk(ms, p, nb);
  1049.           r = chunk_plus_offset(p, nb);
  1050.           set_size_and_pinuse_of_free_chunk(r, rsize);
  1051.           replace_dv(ms, r, rsize);
  1052.         }
  1053.         mem = chunk2mem(p);
  1054.         return mem;
  1055.       } else if (ms->treemap != 0 && (mem = tmalloc_small(ms, nb)) != 0) {
  1056.         return mem;
  1057.       }
  1058.     }
  1059.   } else if (nsize >= MAX_REQUEST) {
  1060.     nb = MAX_SIZE_T; /* Too big to allocate. Force failure (in sys alloc) */
  1061.   } else {
  1062.     nb = pad_request(nsize);
  1063.     if (ms->treemap != 0 && (mem = tmalloc_large(ms, nb)) != 0) {
  1064.       return mem;
  1065.     }
  1066.   }

  1067.   if (nb <= ms->dvsize) {
  1068.     size_t rsize = ms->dvsize - nb;
  1069.     mchunkptr p = ms->dv;
  1070.     if (rsize >= MIN_CHUNK_SIZE) { /* split dv */
  1071.       mchunkptr r = ms->dv = chunk_plus_offset(p, nb);
  1072.       ms->dvsize = rsize;
  1073.       set_size_and_pinuse_of_free_chunk(r, rsize);
  1074.       set_size_and_pinuse_of_inuse_chunk(ms, p, nb);
  1075.     } else { /* exhaust dv */
  1076.       size_t dvs = ms->dvsize;
  1077.       ms->dvsize = 0;
  1078.       ms->dv = 0;
  1079.       set_inuse_and_pinuse(ms, p, dvs);
  1080.     }
  1081.     mem = chunk2mem(p);
  1082.     return mem;
  1083.   } else if (nb < ms->topsize) { /* Split top */
  1084.     size_t rsize = ms->topsize -= nb;
  1085.     mchunkptr p = ms->top;
  1086.     mchunkptr r = ms->top = chunk_plus_offset(p, nb);
  1087.     r->head = rsize | PINUSE_BIT;
  1088.     set_size_and_pinuse_of_inuse_chunk(ms, p, nb);
  1089.     mem = chunk2mem(p);
  1090.     return mem;
  1091.   }
  1092.   return alloc_sys(ms, nb);
  1093. }

  1094. static LJ_NOINLINE void *lj_alloc_free(void *msp, void *ptr)
  1095. {
  1096.   if (ptr != 0) {
  1097.     mchunkptr p = mem2chunk(ptr);
  1098.     mstate fm = (mstate)msp;
  1099.     size_t psize = chunksize(p);
  1100.     mchunkptr next = chunk_plus_offset(p, psize);
  1101.     if (!pinuse(p)) {
  1102.       size_t prevsize = p->prev_foot;
  1103.       if ((prevsize & IS_DIRECT_BIT) != 0) {
  1104.         prevsize &= ~IS_DIRECT_BIT;
  1105.         psize += prevsize + DIRECT_FOOT_PAD;
  1106.         CALL_MUNMAP((char *)p - prevsize, psize);
  1107.         return NULL;
  1108.       } else {
  1109.         mchunkptr prev = chunk_minus_offset(p, prevsize);
  1110.         psize += prevsize;
  1111.         p = prev;
  1112.         /* consolidate backward */
  1113.         if (p != fm->dv) {
  1114.           unlink_chunk(fm, p, prevsize);
  1115.         } else if ((next->head & INUSE_BITS) == INUSE_BITS) {
  1116.           fm->dvsize = psize;
  1117.           set_free_with_pinuse(p, psize, next);
  1118.           return NULL;
  1119.         }
  1120.       }
  1121.     }
  1122.     if (!cinuse(next)) {  /* consolidate forward */
  1123.       if (next == fm->top) {
  1124.         size_t tsize = fm->topsize += psize;
  1125.         fm->top = p;
  1126.         p->head = tsize | PINUSE_BIT;
  1127.         if (p == fm->dv) {
  1128.           fm->dv = 0;
  1129.           fm->dvsize = 0;
  1130.         }
  1131.         if (tsize > fm->trim_check)
  1132.           alloc_trim(fm, 0);
  1133.         return NULL;
  1134.       } else if (next == fm->dv) {
  1135.         size_t dsize = fm->dvsize += psize;
  1136.         fm->dv = p;
  1137.         set_size_and_pinuse_of_free_chunk(p, dsize);
  1138.         return NULL;
  1139.       } else {
  1140.         size_t nsize = chunksize(next);
  1141.         psize += nsize;
  1142.         unlink_chunk(fm, next, nsize);
  1143.         set_size_and_pinuse_of_free_chunk(p, psize);
  1144.         if (p == fm->dv) {
  1145.           fm->dvsize = psize;
  1146.           return NULL;
  1147.         }
  1148.       }
  1149.     } else {
  1150.       set_free_with_pinuse(p, psize, next);
  1151.     }

  1152.     if (is_small(psize)) {
  1153.       insert_small_chunk(fm, p, psize);
  1154.     } else {
  1155.       tchunkptr tp = (tchunkptr)p;
  1156.       insert_large_chunk(fm, tp, psize);
  1157.       if (--fm->release_checks == 0)
  1158.         release_unused_segments(fm);
  1159.     }
  1160.   }
  1161.   return NULL;
  1162. }

  1163. static LJ_NOINLINE void *lj_alloc_realloc(void *msp, void *ptr, size_t nsize)
  1164. {
  1165.   if (nsize >= MAX_REQUEST) {
  1166.     return NULL;
  1167.   } else {
  1168.     mstate m = (mstate)msp;
  1169.     mchunkptr oldp = mem2chunk(ptr);
  1170.     size_t oldsize = chunksize(oldp);
  1171.     mchunkptr next = chunk_plus_offset(oldp, oldsize);
  1172.     mchunkptr newp = 0;
  1173.     size_t nb = request2size(nsize);

  1174.     /* Try to either shrink or extend into top. Else malloc-copy-free */
  1175.     if (is_direct(oldp)) {
  1176.       newp = direct_resize(oldp, nb);  /* this may return NULL. */
  1177.     } else if (oldsize >= nb) { /* already big enough */
  1178.       size_t rsize = oldsize - nb;
  1179.       newp = oldp;
  1180.       if (rsize >= MIN_CHUNK_SIZE) {
  1181.         mchunkptr rem = chunk_plus_offset(newp, nb);
  1182.         set_inuse(m, newp, nb);
  1183.         set_inuse(m, rem, rsize);
  1184.         lj_alloc_free(m, chunk2mem(rem));
  1185.       }
  1186.     } else if (next == m->top && oldsize + m->topsize > nb) {
  1187.       /* Expand into top */
  1188.       size_t newsize = oldsize + m->topsize;
  1189.       size_t newtopsize = newsize - nb;
  1190.       mchunkptr newtop = chunk_plus_offset(oldp, nb);
  1191.       set_inuse(m, oldp, nb);
  1192.       newtop->head = newtopsize |PINUSE_BIT;
  1193.       m->top = newtop;
  1194.       m->topsize = newtopsize;
  1195.       newp = oldp;
  1196.     }

  1197.     if (newp != 0) {
  1198.       return chunk2mem(newp);
  1199.     } else {
  1200.       void *newmem = lj_alloc_malloc(m, nsize);
  1201.       if (newmem != 0) {
  1202.         size_t oc = oldsize - overhead_for(oldp);
  1203.         memcpy(newmem, ptr, oc < nsize ? oc : nsize);
  1204.         lj_alloc_free(m, ptr);
  1205.       }
  1206.       return newmem;
  1207.     }
  1208.   }
  1209. }

  1210. void *lj_alloc_f(void *msp, void *ptr, size_t osize, size_t nsize)
  1211. {
  1212.   (void)osize;
  1213.   if (nsize == 0) {
  1214.     return lj_alloc_free(msp, ptr);
  1215.   } else if (ptr == NULL) {
  1216.     return lj_alloc_malloc(msp, nsize);
  1217.   } else {
  1218.     return lj_alloc_realloc(msp, ptr, nsize);
  1219.   }
  1220. }

  1221. #endif