Skip to content

Commit c9b7088

Browse files
Introduce power-of-two size pools
Replace the RVALUE_SLOT_SIZE-multiplier based pool sizes with explicit power-of-two (and near-power-of-two) slot sizes. On 64-bit this gives 12 heaps (32, 40, 64, 80, 96, 128, 160, 256, 512, 640, 768, 1024) instead of 5, providing finer granularity and less internal fragmentation. On 32-bit the layout is 5 heaps (32, 64, 128, 256, 512).
1 parent 2567e76 commit c9b7088

5 files changed

Lines changed: 96 additions & 46 deletions

File tree

gc.rb

Lines changed: 22 additions & 13 deletions
Original file line numberDiff line numberDiff line change
@@ -269,43 +269,52 @@ def self.stat hash_or_key = nil
269269
# GC.stat_heap
270270
# # =>
271271
# {0 =>
272-
# {slot_size: 40,
272+
# {slot_size: 32,
273+
# heap_eden_pages: 24,
274+
# heap_eden_slots: 12288,
275+
# total_allocated_pages: 24,
276+
# force_major_gc_count: 0,
277+
# force_incremental_marking_finish_count: 0,
278+
# total_allocated_objects: 8450,
279+
# total_freed_objects: 3120},
280+
# 1 =>
281+
# {slot_size: 64,
273282
# heap_eden_pages: 246,
274283
# heap_eden_slots: 402802,
275284
# total_allocated_pages: 246,
276285
# force_major_gc_count: 2,
277286
# force_incremental_marking_finish_count: 1,
278287
# total_allocated_objects: 33867152,
279288
# total_freed_objects: 33520523},
280-
# 1 =>
281-
# {slot_size: 80,
289+
# 2 =>
290+
# {slot_size: 128,
282291
# heap_eden_pages: 84,
283292
# heap_eden_slots: 68746,
284293
# total_allocated_pages: 84,
285294
# force_major_gc_count: 1,
286295
# force_incremental_marking_finish_count: 4,
287296
# total_allocated_objects: 147491,
288297
# total_freed_objects: 90699},
289-
# 2 =>
290-
# {slot_size: 160,
298+
# 3 =>
299+
# {slot_size: 256,
291300
# heap_eden_pages: 157,
292301
# heap_eden_slots: 64182,
293302
# total_allocated_pages: 157,
294303
# force_major_gc_count: 0,
295304
# force_incremental_marking_finish_count: 0,
296305
# total_allocated_objects: 211460,
297306
# total_freed_objects: 190075},
298-
# 3 =>
299-
# {slot_size: 320,
307+
# 4 =>
308+
# {slot_size: 512,
300309
# heap_eden_pages: 8,
301310
# heap_eden_slots: 1631,
302311
# total_allocated_pages: 8,
303312
# force_major_gc_count: 0,
304313
# force_incremental_marking_finish_count: 0,
305314
# total_allocated_objects: 1422,
306315
# total_freed_objects: 700},
307-
# 4 =>
308-
# {slot_size: 640,
316+
# 5 =>
317+
# {slot_size: 1024,
309318
# heap_eden_pages: 16,
310319
# heap_eden_slots: 1628,
311320
# total_allocated_pages: 16,
@@ -316,17 +325,17 @@ def self.stat hash_or_key = nil
316325
#
317326
# In the example above, the keys in the outer hash are the heap identifiers:
318327
#
319-
# GC.stat_heap.keys # => [0, 1, 2, 3, 4]
328+
# GC.stat_heap.keys # => [0, 1, 2, 3, 4, 5]
320329
#
321330
# On CRuby, each heap identifier is an integer;
322331
# on other implementations, a heap identifier may be a string.
323332
#
324333
# With only argument +heap_id+ given,
325334
# returns statistics for the given heap identifier:
326335
#
327-
# GC.stat_heap(2)
336+
# GC.stat_heap(3)
328337
# # =>
329-
# {slot_size: 160,
338+
# {slot_size: 256,
330339
# heap_eden_pages: 157,
331340
# heap_eden_slots: 64182,
332341
# total_allocated_pages: 157,
@@ -338,7 +347,7 @@ def self.stat hash_or_key = nil
338347
# With arguments +heap_id+ and +key+ given,
339348
# returns the value for the given key in the given heap:
340349
#
341-
# GC.stat_heap(2, :slot_size) # => 160
350+
# GC.stat_heap(3, :slot_size) # => 256
342351
#
343352
# With arguments +nil+ and +hash+ given,
344353
# merges the statistics for all heaps into the given hash:

gc/default/default.c

Lines changed: 60 additions & 24 deletions
Original file line numberDiff line numberDiff line change
@@ -187,9 +187,41 @@ static RB_THREAD_LOCAL_SPECIFIER int malloc_increase_local;
187187
#define USE_TICK_T (PRINT_ENTER_EXIT_TICK || PRINT_ROOT_TICKS)
188188

189189
#ifndef HEAP_COUNT
190-
# define HEAP_COUNT 5
190+
# if SIZEOF_VALUE >= 8
191+
# define HEAP_COUNT 12
192+
# else
193+
# define HEAP_COUNT 5
194+
# endif
191195
#endif
192196

197+
/* Precomputed reciprocals for fast slot index calculation.
198+
* For slot size d: reciprocal = ceil(2^48 / d).
199+
* Then offset / d == (uint32_t)((offset * reciprocal) >> 48)
200+
* for all offset < HEAP_PAGE_SIZE. */
201+
#define SLOT_RECIPROCAL_SHIFT 48
202+
203+
static const uint64_t heap_slot_reciprocal_table[HEAP_COUNT] = {
204+
#if SIZEOF_VALUE >= 8
205+
/* 32 */ (1ULL << 48) / 32,
206+
/* 40 */ (1ULL << 48) / 40 + 1,
207+
/* 64 */ (1ULL << 48) / 64,
208+
/* 80 */ (1ULL << 48) / 80 + 1,
209+
/* 96 */ (1ULL << 48) / 96 + 1,
210+
/* 128 */ (1ULL << 48) / 128,
211+
/* 160 */ (1ULL << 48) / 160 + 1,
212+
/* 256 */ (1ULL << 48) / 256,
213+
/* 512 */ (1ULL << 48) / 512,
214+
/* 640 */ (1ULL << 48) / 640 + 1,
215+
/* 768 */ (1ULL << 48) / 768 + 1,
216+
/* 1024 */ (1ULL << 48) / 1024,
217+
#else
218+
/* 32 */ (1ULL << 48) / 32,
219+
/* 64 */ (1ULL << 48) / 64,
220+
/* 128 */ (1ULL << 48) / 128,
221+
/* 256 */ (1ULL << 48) / 256,
222+
/* 512 */ (1ULL << 48) / 512,
223+
#endif
224+
};
193225
typedef struct ractor_newobj_heap_cache {
194226
struct free_slot *freelist;
195227
struct heap_page *using_page;
@@ -689,15 +721,17 @@ size_t rb_gc_impl_obj_slot_size(VALUE obj);
689721

690722
#define RVALUE_SLOT_SIZE (sizeof(struct RBasic) + sizeof(VALUE[RBIMPL_RVALUE_EMBED_LEN_MAX]) + RVALUE_OVERHEAD)
691723

724+
#if SIZEOF_VALUE >= 8
692725
static const size_t pool_slot_sizes[HEAP_COUNT] = {
693-
RVALUE_SLOT_SIZE,
694-
RVALUE_SLOT_SIZE * 2,
695-
RVALUE_SLOT_SIZE * 4,
696-
RVALUE_SLOT_SIZE * 8,
697-
RVALUE_SLOT_SIZE * 16,
726+
32, 40, 64, 80, 96, 128, 160, 256, 512, 640, 768, 1024,
698727
};
699-
700-
static uint8_t size_to_heap_idx[RVALUE_SLOT_SIZE * (1 << (HEAP_COUNT - 1)) / 8 + 1];
728+
static uint8_t size_to_heap_idx[1024 / 8 + 1];
729+
#else
730+
static const size_t pool_slot_sizes[HEAP_COUNT] = {
731+
32, 64, 128, 256, 512,
732+
};
733+
static uint8_t size_to_heap_idx[512 / 8 + 1];
734+
#endif
701735

702736
#ifndef MAX
703737
# define MAX(a, b) (((a) > (b)) ? (a) : (b))
@@ -707,11 +741,12 @@ static uint8_t size_to_heap_idx[RVALUE_SLOT_SIZE * (1 << (HEAP_COUNT - 1)) / 8 +
707741
#endif
708742
#define roomof(x, y) (((x) + (y) - 1) / (y))
709743
#define CEILDIV(i, mod) roomof(i, mod)
744+
#define MIN_POOL_SLOT_SIZE 32
710745
enum {
711746
HEAP_PAGE_ALIGN = (1UL << HEAP_PAGE_ALIGN_LOG),
712747
HEAP_PAGE_ALIGN_MASK = (~(~0UL << HEAP_PAGE_ALIGN_LOG)),
713748
HEAP_PAGE_SIZE = HEAP_PAGE_ALIGN,
714-
HEAP_PAGE_BITMAP_LIMIT = CEILDIV(CEILDIV(HEAP_PAGE_SIZE, RVALUE_SLOT_SIZE), BITS_BITLENGTH),
749+
HEAP_PAGE_BITMAP_LIMIT = CEILDIV(CEILDIV(HEAP_PAGE_SIZE, MIN_POOL_SLOT_SIZE), BITS_BITLENGTH),
715750
HEAP_PAGE_BITMAP_SIZE = (BITS_SIZE * HEAP_PAGE_BITMAP_LIMIT),
716751
};
717752
#define HEAP_PAGE_ALIGN (1 << HEAP_PAGE_ALIGN_LOG)
@@ -773,8 +808,11 @@ struct free_slot {
773808
};
774809

775810
struct heap_page {
811+
/* Cache line 0: allocation fast path + SLOT_INDEX */
812+
struct free_slot *freelist;
813+
uintptr_t start;
814+
uint64_t slot_size_reciprocal;
776815
unsigned short slot_size;
777-
uint32_t slot_div_magic;
778816
unsigned short total_slots;
779817
unsigned short free_slots;
780818
unsigned short final_slots;
@@ -789,8 +827,6 @@ struct heap_page {
789827

790828
struct heap_page *free_next;
791829
struct heap_page_body *body;
792-
uintptr_t start;
793-
struct free_slot *freelist;
794830
struct ccan_list_node page_node;
795831

796832
bits_t wb_unprotected_bits[HEAP_PAGE_BITMAP_LIMIT];
@@ -851,15 +887,13 @@ heap_page_in_global_empty_pages_pool(rb_objspace_t *objspace, struct heap_page *
851887
#define GET_PAGE_HEADER(x) (&GET_PAGE_BODY(x)->header)
852888
#define GET_HEAP_PAGE(x) (GET_PAGE_HEADER(x)->page)
853889

854-
static uint32_t slot_div_magics[HEAP_COUNT];
855-
856890
static inline size_t
857-
slot_index_for_offset(size_t offset, uint32_t div_magic)
891+
slot_index_for_offset(size_t offset, uint64_t reciprocal)
858892
{
859-
return (size_t)(((uint64_t)offset * div_magic) >> 32);
893+
return (uint32_t)(((uint64_t)offset * reciprocal) >> SLOT_RECIPROCAL_SHIFT);
860894
}
861895

862-
#define SLOT_INDEX(page, p) slot_index_for_offset((uintptr_t)(p) - (page)->start, (page)->slot_div_magic)
896+
#define SLOT_INDEX(page, p) slot_index_for_offset((uintptr_t)(p) - (page)->start, (page)->slot_size_reciprocal)
863897
#define SLOT_BITMAP_INDEX(page, p) (SLOT_INDEX(page, p) / BITS_BITLENGTH)
864898
#define SLOT_BITMAP_OFFSET(page, p) (SLOT_INDEX(page, p) & (BITS_BITLENGTH - 1))
865899
#define SLOT_BITMAP_BIT(page, p) ((bits_t)1 << SLOT_BITMAP_OFFSET(page, p))
@@ -1990,19 +2024,17 @@ heap_add_page(rb_objspace_t *objspace, rb_heap_t *heap, struct heap_page *page)
19902024
GC_ASSERT(!heap->sweeping_page);
19912025
GC_ASSERT(heap_page_in_global_empty_pages_pool(objspace, page));
19922026

1993-
/* Align start to the first slot_size boundary after the page header */
2027+
/* Align start to slot_size boundary */
19942028
uintptr_t start = (uintptr_t)page->body + sizeof(struct heap_page_header);
1995-
size_t remainder = start % heap->slot_size;
1996-
if (remainder != 0) {
1997-
start += heap->slot_size - remainder;
1998-
}
2029+
uintptr_t rem = start % heap->slot_size;
2030+
if (rem) start += heap->slot_size - rem;
19992031

20002032
int slot_count = (int)((HEAP_PAGE_SIZE - (start - (uintptr_t)page->body))/heap->slot_size);
20012033

20022034
page->start = start;
20032035
page->total_slots = slot_count;
20042036
page->slot_size = heap->slot_size;
2005-
page->slot_div_magic = slot_div_magics[heap - heaps];
2037+
page->slot_size_reciprocal = heap_slot_reciprocal_table[heap - heaps];
20062038
page->heap = heap;
20072039

20082040
memset(&page->wb_unprotected_bits[0], 0, HEAP_PAGE_BITMAP_SIZE);
@@ -9521,11 +9553,15 @@ rb_gc_impl_objspace_init(void *objspace_ptr)
95219553
rb_bug("Could not preregister postponed job for GC");
95229554
}
95239555

9556+
/* A standard RVALUE (RBasic + embedded VALUEs + debug overhead) must fit
9557+
* in at least one pool. In debug builds RVALUE_OVERHEAD can push this
9558+
* beyond the 48-byte pool into the 64-byte pool, which is fine. */
9559+
GC_ASSERT(rb_gc_impl_size_allocatable_p(sizeof(struct RBasic) + sizeof(VALUE[RBIMPL_RVALUE_EMBED_LEN_MAX])));
9560+
95249561
for (int i = 0; i < HEAP_COUNT; i++) {
95259562
rb_heap_t *heap = &heaps[i];
95269563

95279564
heap->slot_size = pool_slot_sizes[i];
9528-
slot_div_magics[i] = (uint32_t)((uint64_t)UINT32_MAX / heap->slot_size + 1);
95299565

95309566
ccan_list_head_init(&heap->pages);
95319567
}

gc/mmtk/mmtk.c

Lines changed: 12 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -635,12 +635,19 @@ void rb_gc_impl_set_params(void *objspace_ptr) { }
635635

636636
static VALUE gc_verify_internal_consistency(VALUE self) { return Qnil; }
637637

638-
#define MMTK_HEAP_COUNT 6
639-
#define MMTK_MAX_OBJ_SIZE 640
640-
638+
#if SIZEOF_VALUE >= 8
639+
#define MMTK_HEAP_COUNT 12
640+
#define MMTK_MAX_OBJ_SIZE 1024
641641
static size_t heap_sizes[MMTK_HEAP_COUNT + 1] = {
642-
32, 40, 80, 160, 320, MMTK_MAX_OBJ_SIZE, 0
642+
32, 40, 64, 80, 96, 128, 160, 256, 512, 640, 768, MMTK_MAX_OBJ_SIZE, 0
643643
};
644+
#else
645+
#define MMTK_HEAP_COUNT 5
646+
#define MMTK_MAX_OBJ_SIZE 512
647+
static size_t heap_sizes[MMTK_HEAP_COUNT + 1] = {
648+
32, 64, 128, 256, MMTK_MAX_OBJ_SIZE, 0
649+
};
650+
#endif
644651

645652
void
646653
rb_gc_impl_init(void)
@@ -649,8 +656,7 @@ rb_gc_impl_init(void)
649656
rb_hash_aset(gc_constants, ID2SYM(rb_intern("RBASIC_SIZE")), SIZET2NUM(sizeof(struct RBasic)));
650657
rb_hash_aset(gc_constants, ID2SYM(rb_intern("RVALUE_OVERHEAD")), INT2NUM(0));
651658
rb_hash_aset(gc_constants, ID2SYM(rb_intern("RVARGC_MAX_ALLOCATE_SIZE")), LONG2FIX(MMTK_MAX_OBJ_SIZE));
652-
// Pretend we have 5 size pools
653-
rb_hash_aset(gc_constants, ID2SYM(rb_intern("SIZE_POOL_COUNT")), LONG2FIX(MMTK_HEAP_COUNT));
659+
rb_hash_aset(gc_constants, ID2SYM(rb_intern("HEAP_COUNT")), LONG2FIX(MMTK_HEAP_COUNT));
654660
// TODO: correctly set RVALUE_OLD_AGE when we have generational GC support
655661
rb_hash_aset(gc_constants, ID2SYM(rb_intern("RVALUE_OLD_AGE")), INT2FIX(0));
656662
OBJ_FREEZE(gc_constants);

internal/class.h

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -111,9 +111,9 @@ struct RClass_and_rb_classext_t {
111111
};
112112

113113
#if SIZEOF_VALUE >= SIZEOF_LONG_LONG
114-
// Assert that classes can be embedded in heaps[2] (which has 160B slot size)
114+
// Assert that classes can be embedded in heaps[3] (256B slot size on 64-bit).
115115
// On 32bit platforms there is no variable width allocation so it doesn't matter.
116-
STATIC_ASSERT(sizeof_rb_classext_t, sizeof(struct RClass_and_rb_classext_t) <= 4 * RVALUE_SIZE);
116+
STATIC_ASSERT(sizeof_rb_classext_t, sizeof(struct RClass_and_rb_classext_t) <= 256);
117117
#endif
118118

119119
struct RClass_boxable {
Lines changed: 0 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,4 @@
11
exclude(:test_dump_all_full, "testing behaviour specific to default GC")
22
exclude(:test_dump_flag_age, "testing behaviour specific to default GC")
33
exclude(:test_dump_flags, "testing behaviour specific to default GC")
4-
exclude(:test_dump_includes_slot_size, "can be removed when pool 0 slot size is 32 bytes")
54
exclude(:test_dump_objects_dumps_page_slot_sizes, "testing behaviour specific to default GC")

0 commit comments

Comments
 (0)