1 | // walloc.c: a small malloc implementation for use in WebAssembly targets |
2 | // Copyright (c) 2020 Igalia, S.L. |
3 | // |
4 | // Permission is hereby granted, free of charge, to any person obtaining a |
5 | // copy of this software and associated documentation files (the |
6 | // "Software"), to deal in the Software without restriction, including |
7 | // without limitation the rights to use, copy, modify, merge, publish, |
8 | // distribute, sublicense, and/or sell copies of the Software, and to |
9 | // permit persons to whom the Software is furnished to do so, subject to |
10 | // the following conditions: |
11 | // |
12 | // The above copyright notice and this permission notice shall be included |
13 | // in all copies or substantial portions of the Software. |
14 | // |
15 | // THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS |
16 | // OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF |
17 | // MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND |
18 | // NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE |
19 | // LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION |
20 | // OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION |
21 | // WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. |
22 | |
23 | typedef __SIZE_TYPE__ size_t; |
24 | typedef __UINTPTR_TYPE__ uintptr_t; |
25 | typedef __UINT8_TYPE__ uint8_t; |
26 | |
27 | #define NULL ((void *)0) |
28 | |
29 | #define STATIC_ASSERT_EQ(a, b) _Static_assert((a) == (b), "eq") |
30 | |
31 | #ifndef NDEBUG |
32 | #define ASSERT(x) \ |
33 | do \ |
34 | { \ |
35 | if (!(x)) \ |
36 | __builtin_trap(); \ |
37 | } while (0) |
38 | #else |
39 | #define ASSERT(x) \ |
40 | do \ |
41 | { \ |
42 | } while (0) |
43 | #endif |
44 | #define ASSERT_EQ(a, b) ASSERT((a) == (b)) |
45 | |
46 | static inline size_t max(size_t a, size_t b) |
47 | { |
48 | return a < b ? b : a; |
49 | } |
50 | static inline uintptr_t align(uintptr_t val, uintptr_t alignment) |
51 | { |
52 | return (val + alignment - 1) & ~(alignment - 1); |
53 | } |
54 | #define ASSERT_ALIGNED(x, y) ASSERT((x) == align((x), y)) |
55 | |
56 | #define CHUNK_SIZE 256 |
57 | #define CHUNK_SIZE_LOG_2 8 |
58 | #define CHUNK_MASK (CHUNK_SIZE - 1) |
59 | STATIC_ASSERT_EQ(CHUNK_SIZE, 1 << CHUNK_SIZE_LOG_2); |
60 | |
61 | #define PAGE_SIZE 65536 |
62 | #define PAGE_SIZE_LOG_2 16 |
63 | #define PAGE_MASK (PAGE_SIZE - 1) |
64 | STATIC_ASSERT_EQ(PAGE_SIZE, 1 << PAGE_SIZE_LOG_2); |
65 | |
66 | #define CHUNKS_PER_PAGE 256 |
67 | STATIC_ASSERT_EQ(PAGE_SIZE, CHUNK_SIZE *CHUNKS_PER_PAGE); |
68 | |
69 | #define GRANULE_SIZE 8 |
70 | #define GRANULE_SIZE_LOG_2 3 |
71 | #define LARGE_OBJECT_THRESHOLD 256 |
72 | #define LARGE_OBJECT_GRANULE_THRESHOLD 32 |
73 | |
74 | STATIC_ASSERT_EQ(GRANULE_SIZE, 1 << GRANULE_SIZE_LOG_2); |
75 | STATIC_ASSERT_EQ(LARGE_OBJECT_THRESHOLD, |
76 | LARGE_OBJECT_GRANULE_THRESHOLD *GRANULE_SIZE); |
77 | |
78 | struct chunk |
79 | { |
80 | char data[CHUNK_SIZE]; |
81 | }; |
82 | |
83 | // There are small object pages for allocations of these sizes. |
84 | #define FOR_EACH_SMALL_OBJECT_GRANULES(M) \ |
85 | M(1) \ |
86 | M(2) M(3) M(4) M(5) M(6) M(8) M(10) M(16) M(32) |
87 | |
88 | enum chunk_kind |
89 | { |
90 | #define DEFINE_SMALL_OBJECT_CHUNK_KIND(i) GRANULES_##i, |
91 | FOR_EACH_SMALL_OBJECT_GRANULES(DEFINE_SMALL_OBJECT_CHUNK_KIND) |
92 | #undef DEFINE_SMALL_OBJECT_CHUNK_KIND |
93 | |
94 | SMALL_OBJECT_CHUNK_KINDS, |
95 | FREE_LARGE_OBJECT = 254, |
96 | LARGE_OBJECT = 255 |
97 | }; |
98 | |
99 | static const uint8_t small_object_granule_sizes[] = |
100 | { |
101 | #define SMALL_OBJECT_GRANULE_SIZE(i) i, |
102 | FOR_EACH_SMALL_OBJECT_GRANULES(SMALL_OBJECT_GRANULE_SIZE) |
103 | #undef SMALL_OBJECT_GRANULE_SIZE |
104 | }; |
105 | |
106 | static enum chunk_kind granules_to_chunk_kind(unsigned granules) |
107 | { |
108 | #define TEST_GRANULE_SIZE(i) \ |
109 | if (granules <= i) \ |
110 | return GRANULES_##i; |
111 | FOR_EACH_SMALL_OBJECT_GRANULES(TEST_GRANULE_SIZE); |
112 | #undef TEST_GRANULE_SIZE |
113 | return LARGE_OBJECT; |
114 | } |
115 | |
116 | static unsigned chunk_kind_to_granules(enum chunk_kind kind) |
117 | { |
118 | switch (kind) |
119 | { |
120 | #define CHUNK_KIND_GRANULE_SIZE(i) \ |
121 | case GRANULES_##i: \ |
122 | return i; |
123 | FOR_EACH_SMALL_OBJECT_GRANULES(CHUNK_KIND_GRANULE_SIZE); |
124 | #undef CHUNK_KIND_GRANULE_SIZE |
125 | default: |
126 | return -1; |
127 | } |
128 | } |
129 | |
130 | // Given a pointer P returned by malloc(), we get a header pointer via |
131 | // P&~PAGE_MASK, and a chunk index via (P&PAGE_MASK)/CHUNKS_PER_PAGE. If |
132 | // chunk_kinds[chunk_idx] is [FREE_]LARGE_OBJECT, then the pointer is a large |
133 | // object, otherwise the kind indicates the size in granules of the objects in |
134 | // the chunk. |
135 | struct page_header |
136 | { |
137 | uint8_t chunk_kinds[CHUNKS_PER_PAGE]; |
138 | }; |
139 | |
140 | struct page |
141 | { |
142 | union |
143 | { |
144 | struct page_header header; |
145 | struct chunk chunks[CHUNKS_PER_PAGE]; |
146 | }; |
147 | }; |
148 | |
149 | #define PAGE_HEADER_SIZE (sizeof(struct page_header)) |
150 | #define FIRST_ALLOCATABLE_CHUNK 1 |
151 | STATIC_ASSERT_EQ(PAGE_HEADER_SIZE, FIRST_ALLOCATABLE_CHUNK *CHUNK_SIZE); |
152 | |
153 | static struct page *get_page(void *ptr) |
154 | { |
155 | return (struct page *)(char *)(((uintptr_t)ptr) & ~PAGE_MASK); |
156 | } |
157 | static unsigned get_chunk_index(void *ptr) |
158 | { |
159 | return (((uintptr_t)ptr) & PAGE_MASK) / CHUNK_SIZE; |
160 | } |
161 | |
162 | struct freelist |
163 | { |
164 | struct freelist *next; |
165 | }; |
166 | |
167 | struct large_object |
168 | { |
169 | struct large_object *next; |
170 | size_t size; |
171 | }; |
172 | |
173 | #define LARGE_OBJECT_HEADER_SIZE (sizeof(struct large_object)) |
174 | |
175 | static inline void *get_large_object_payload(struct large_object *obj) |
176 | { |
177 | return ((char *)obj) + LARGE_OBJECT_HEADER_SIZE; |
178 | } |
179 | static inline struct large_object *get_large_object(void *ptr) |
180 | { |
181 | return (struct large_object *)(((char *)ptr) - LARGE_OBJECT_HEADER_SIZE); |
182 | } |
183 | |
184 | static struct freelist *small_object_freelists[SMALL_OBJECT_CHUNK_KINDS]; |
185 | static struct large_object *large_objects; |
186 | |
187 | extern void __heap_base; |
188 | static size_t walloc_heap_size; |
189 | |
190 | static struct page * |
191 | allocate_pages(size_t payload_size, size_t *n_allocated) |
192 | { |
193 | size_t needed = payload_size + PAGE_HEADER_SIZE; |
194 | size_t heap_size = __builtin_wasm_memory_size(0) * PAGE_SIZE; |
195 | uintptr_t base = heap_size; |
196 | uintptr_t preallocated = 0, grow = 0; |
197 | |
198 | if (!walloc_heap_size) |
199 | { |
200 | // We are allocating the initial pages, if any. We skip the first 64 kB, |
201 | // then take any additional space up to the memory size. |
202 | uintptr_t heap_base = align((uintptr_t)&__heap_base, PAGE_SIZE); |
203 | preallocated = heap_size - heap_base; // Preallocated pages. |
204 | walloc_heap_size = preallocated; |
205 | base -= preallocated; |
206 | } |
207 | |
208 | if (preallocated < needed) |
209 | { |
210 | // Always grow the walloc heap at least by 50%. |
211 | grow = align(max(walloc_heap_size / 2, needed - preallocated), |
212 | PAGE_SIZE); |
213 | ASSERT(grow); |
214 | if (__builtin_wasm_memory_grow(0, grow >> PAGE_SIZE_LOG_2) == -1) |
215 | { |
216 | return NULL; |
217 | } |
218 | walloc_heap_size += grow; |
219 | } |
220 | |
221 | struct page *ret = (struct page *)base; |
222 | size_t size = grow + preallocated; |
223 | ASSERT(size); |
224 | ASSERT_ALIGNED(size, PAGE_SIZE); |
225 | *n_allocated = size / PAGE_SIZE; |
226 | return ret; |
227 | } |
228 | |
229 | static char * |
230 | allocate_chunk(struct page *page, unsigned idx, enum chunk_kind kind) |
231 | { |
232 | page->header.chunk_kinds[idx] = kind; |
233 | return page->chunks[idx].data; |
234 | } |
235 | |
236 | // It's possible for splitting to produce a large object of size 248 (256 minus |
237 | // the header size) -- i.e. spanning a single chunk. In that case, push the |
238 | // chunk back on the GRANULES_32 small object freelist. |
239 | static void maybe_repurpose_single_chunk_large_objects_head(void) |
240 | { |
241 | if (large_objects->size < CHUNK_SIZE) |
242 | { |
243 | unsigned idx = get_chunk_index(large_objects); |
244 | char *ptr = allocate_chunk(get_page(large_objects), idx, GRANULES_32); |
245 | large_objects = large_objects->next; |
246 | struct freelist *head = (struct freelist *)ptr; |
247 | head->next = small_object_freelists[GRANULES_32]; |
248 | small_object_freelists[GRANULES_32] = head; |
249 | } |
250 | } |
251 | |
252 | // If there have been any large-object frees since the last large object |
253 | // allocation, go through the freelist and merge any adjacent objects. |
254 | static int pending_large_object_compact = 0; |
255 | static struct large_object ** |
256 | maybe_merge_free_large_object(struct large_object **prev) |
257 | { |
258 | struct large_object *obj = *prev; |
259 | while (1) |
260 | { |
261 | char *end = get_large_object_payload(obj) + obj->size; |
262 | ASSERT_ALIGNED((uintptr_t)end, CHUNK_SIZE); |
263 | unsigned chunk = get_chunk_index(end); |
264 | if (chunk < FIRST_ALLOCATABLE_CHUNK) |
265 | { |
266 | // Merging can't create a large object that newly spans the header chunk. |
267 | // This check also catches the end-of-heap case. |
268 | return prev; |
269 | } |
270 | struct page *page = get_page(end); |
271 | if (page->header.chunk_kinds[chunk] != FREE_LARGE_OBJECT) |
272 | { |
273 | return prev; |
274 | } |
275 | struct large_object *next = (struct large_object *)end; |
276 | |
277 | struct large_object **prev_prev = &large_objects, *walk = large_objects; |
278 | while (1) |
279 | { |
280 | ASSERT(walk); |
281 | if (walk == next) |
282 | { |
283 | obj->size += LARGE_OBJECT_HEADER_SIZE + walk->size; |
284 | *prev_prev = walk->next; |
285 | if (prev == &walk->next) |
286 | { |
287 | prev = prev_prev; |
288 | } |
289 | break; |
290 | } |
291 | prev_prev = &walk->next; |
292 | walk = walk->next; |
293 | } |
294 | } |
295 | } |
296 | static void |
297 | maybe_compact_free_large_objects(void) |
298 | { |
299 | if (pending_large_object_compact) |
300 | { |
301 | pending_large_object_compact = 0; |
302 | struct large_object **prev = &large_objects; |
303 | while (*prev) |
304 | { |
305 | prev = &(*maybe_merge_free_large_object(prev))->next; |
306 | } |
307 | } |
308 | } |
309 | |
310 | // Allocate a large object with enough space for SIZE payload bytes. Returns a |
311 | // large object with a header, aligned on a chunk boundary, whose payload size |
312 | // may be larger than SIZE, and whose total size (header included) is |
313 | // chunk-aligned. Either a suitable allocation is found in the large object |
314 | // freelist, or we ask the OS for some more pages and treat those pages as a |
315 | // large object. If the allocation fits in that large object and there's more |
316 | // than an aligned chunk's worth of data free at the end, the large object is |
317 | // split. |
318 | // |
319 | // The return value's corresponding chunk in the page as starting a large |
320 | // object. |
321 | static struct large_object * |
322 | allocate_large_object(size_t size) |
323 | { |
324 | maybe_compact_free_large_objects(); |
325 | struct large_object *best = NULL, **best_prev = &large_objects; |
326 | size_t best_size = -1; |
327 | for (struct large_object **prev = &large_objects, *walk = large_objects; |
328 | walk; |
329 | prev = &walk->next, walk = walk->next) |
330 | { |
331 | if (walk->size >= size && walk->size < best_size) |
332 | { |
333 | best_size = walk->size; |
334 | best = walk; |
335 | best_prev = prev; |
336 | if (best_size + LARGE_OBJECT_HEADER_SIZE == align(size + LARGE_OBJECT_HEADER_SIZE, CHUNK_SIZE)) |
337 | // Not going to do any better than this; just return it. |
338 | break; |
339 | } |
340 | } |
341 | |
342 | if (!best) |
343 | { |
344 | // The large object freelist doesn't have an object big enough for this |
345 | // allocation. Allocate one or more pages from the OS, and treat that new |
346 | // sequence of pages as a fresh large object. It will be split if |
347 | // necessary. |
348 | size_t size_with_header = size + sizeof(struct large_object); |
349 | size_t n_allocated = 0; |
350 | struct page *page = allocate_pages(size_with_header, &n_allocated); |
351 | if (!page) |
352 | { |
353 | return NULL; |
354 | } |
355 | char *ptr = allocate_chunk(page, FIRST_ALLOCATABLE_CHUNK, LARGE_OBJECT); |
356 | best = (struct large_object *)ptr; |
357 | size_t page_header = ptr - ((char *)page); |
358 | best->next = large_objects; |
359 | best->size = best_size = |
360 | n_allocated * PAGE_SIZE - page_header - LARGE_OBJECT_HEADER_SIZE; |
361 | ASSERT(best_size >= size_with_header); |
362 | } |
363 | |
364 | allocate_chunk(get_page(best), get_chunk_index(best), LARGE_OBJECT); |
365 | |
366 | struct large_object *next = best->next; |
367 | *best_prev = next; |
368 | |
369 | size_t tail_size = (best_size - size) & ~CHUNK_MASK; |
370 | if (tail_size) |
371 | { |
372 | // The best-fitting object has 1 or more aligned chunks free after the |
373 | // requested allocation; split the tail off into a fresh aligned object. |
374 | struct page *start_page = get_page(best); |
375 | char *start = get_large_object_payload(best); |
376 | char *end = start + best_size; |
377 | |
378 | if (start_page == get_page(end - tail_size - 1)) |
379 | { |
380 | // The allocation does not span a page boundary; yay. |
381 | ASSERT_ALIGNED((uintptr_t)end, CHUNK_SIZE); |
382 | } |
383 | else if (size < PAGE_SIZE - LARGE_OBJECT_HEADER_SIZE - CHUNK_SIZE) |
384 | { |
385 | // If the allocation itself smaller than a page, split off the head, then |
386 | // fall through to maybe split the tail. |
387 | ASSERT_ALIGNED((uintptr_t)end, PAGE_SIZE); |
388 | size_t first_page_size = PAGE_SIZE - (((uintptr_t)start) & PAGE_MASK); |
389 | struct large_object *head = best; |
390 | allocate_chunk(start_page, get_chunk_index(start), FREE_LARGE_OBJECT); |
391 | head->size = first_page_size; |
392 | head->next = large_objects; |
393 | large_objects = head; |
394 | |
395 | maybe_repurpose_single_chunk_large_objects_head(); |
396 | |
397 | struct page *next_page = start_page + 1; |
398 | char *ptr = allocate_chunk(next_page, FIRST_ALLOCATABLE_CHUNK, LARGE_OBJECT); |
399 | best = (struct large_object *)ptr; |
400 | best->size = best_size = best_size - first_page_size - CHUNK_SIZE - LARGE_OBJECT_HEADER_SIZE; |
401 | ASSERT(best_size >= size); |
402 | start = get_large_object_payload(best); |
403 | tail_size = (best_size - size) & ~CHUNK_MASK; |
404 | } |
405 | else |
406 | { |
407 | // A large object that spans more than one page will consume all of its |
408 | // tail pages. Therefore if the split traverses a page boundary, round up |
409 | // to page size. |
410 | ASSERT_ALIGNED((uintptr_t)end, PAGE_SIZE); |
411 | size_t first_page_size = PAGE_SIZE - (((uintptr_t)start) & PAGE_MASK); |
412 | size_t tail_pages_size = align(size - first_page_size, PAGE_SIZE); |
413 | size = first_page_size + tail_pages_size; |
414 | tail_size = best_size - size; |
415 | } |
416 | best->size -= tail_size; |
417 | |
418 | unsigned tail_idx = get_chunk_index(end - tail_size); |
419 | while (tail_idx < FIRST_ALLOCATABLE_CHUNK && tail_size) |
420 | { |
421 | // We would be splitting in a page header; don't do that. |
422 | tail_size -= CHUNK_SIZE; |
423 | tail_idx++; |
424 | } |
425 | |
426 | if (tail_size) |
427 | { |
428 | struct page *page = get_page(end - tail_size); |
429 | char *tail_ptr = allocate_chunk(page, tail_idx, FREE_LARGE_OBJECT); |
430 | struct large_object *tail = (struct large_object *)tail_ptr; |
431 | tail->next = large_objects; |
432 | tail->size = tail_size - LARGE_OBJECT_HEADER_SIZE; |
433 | ASSERT_ALIGNED((uintptr_t)(get_large_object_payload(tail) + tail->size), CHUNK_SIZE); |
434 | large_objects = tail; |
435 | |
436 | maybe_repurpose_single_chunk_large_objects_head(); |
437 | } |
438 | } |
439 | |
440 | ASSERT_ALIGNED((uintptr_t)(get_large_object_payload(best) + best->size), CHUNK_SIZE); |
441 | return best; |
442 | } |
443 | |
444 | static struct freelist * |
445 | obtain_small_objects(enum chunk_kind kind) |
446 | { |
447 | struct freelist **whole_chunk_freelist = &small_object_freelists[GRANULES_32]; |
448 | void *chunk; |
449 | if (*whole_chunk_freelist) |
450 | { |
451 | chunk = *whole_chunk_freelist; |
452 | *whole_chunk_freelist = (*whole_chunk_freelist)->next; |
453 | } |
454 | else |
455 | { |
456 | chunk = allocate_large_object(0); |
457 | if (!chunk) |
458 | { |
459 | return NULL; |
460 | } |
461 | } |
462 | char *ptr = allocate_chunk(get_page(chunk), get_chunk_index(chunk), kind); |
463 | char *end = ptr + CHUNK_SIZE; |
464 | struct freelist *next = NULL; |
465 | size_t size = chunk_kind_to_granules(kind) * GRANULE_SIZE; |
466 | for (size_t i = size; i <= CHUNK_SIZE; i += size) |
467 | { |
468 | struct freelist *head = (struct freelist *)(end - i); |
469 | head->next = next; |
470 | next = head; |
471 | } |
472 | return next; |
473 | } |
474 | |
475 | static inline size_t size_to_granules(size_t size) |
476 | { |
477 | return (size + GRANULE_SIZE - 1) >> GRANULE_SIZE_LOG_2; |
478 | } |
479 | static struct freelist **get_small_object_freelist(enum chunk_kind kind) |
480 | { |
481 | ASSERT(kind < SMALL_OBJECT_CHUNK_KINDS); |
482 | return &small_object_freelists[kind]; |
483 | } |
484 | |
485 | static void * |
486 | allocate_small(enum chunk_kind kind) |
487 | { |
488 | struct freelist **loc = get_small_object_freelist(kind); |
489 | if (!*loc) |
490 | { |
491 | struct freelist *freelist = obtain_small_objects(kind); |
492 | if (!freelist) |
493 | { |
494 | return NULL; |
495 | } |
496 | *loc = freelist; |
497 | } |
498 | struct freelist *ret = *loc; |
499 | *loc = ret->next; |
500 | return (void *)ret; |
501 | } |
502 | |
503 | static void * |
504 | allocate_large(size_t size) |
505 | { |
506 | struct large_object *obj = allocate_large_object(size); |
507 | return obj ? get_large_object_payload(obj) : NULL; |
508 | } |
509 | |
510 | void * |
511 | malloc(size_t size) |
512 | { |
513 | size_t granules = size_to_granules(size); |
514 | enum chunk_kind kind = granules_to_chunk_kind(granules); |
515 | return (kind == LARGE_OBJECT) ? allocate_large(size) : allocate_small(kind); |
516 | } |
517 | |
518 | void free(void *ptr) |
519 | { |
520 | if (!ptr) |
521 | return; |
522 | struct page *page = get_page(ptr); |
523 | unsigned chunk = get_chunk_index(ptr); |
524 | uint8_t kind = page->header.chunk_kinds[chunk]; |
525 | if (kind == LARGE_OBJECT) |
526 | { |
527 | struct large_object *obj = get_large_object(ptr); |
528 | obj->next = large_objects; |
529 | large_objects = obj; |
530 | allocate_chunk(page, chunk, FREE_LARGE_OBJECT); |
531 | pending_large_object_compact = 1; |
532 | } |
533 | else |
534 | { |
535 | size_t granules = kind; |
536 | struct freelist **loc = get_small_object_freelist(granules); |
537 | struct freelist *obj = ptr; |
538 | obj->next = *loc; |
539 | *loc = obj; |
540 | } |
541 | } |