@@ -187,9 +187,41 @@ static RB_THREAD_LOCAL_SPECIFIER int malloc_increase_local;
187187#define USE_TICK_T (PRINT_ENTER_EXIT_TICK || PRINT_ROOT_TICKS)
188188
189189#ifndef HEAP_COUNT
190- # define HEAP_COUNT 5
190+ # if SIZEOF_VALUE >= 8
191+ # define HEAP_COUNT 12
192+ # else
193+ # define HEAP_COUNT 5
194+ # endif
191195#endif
192196
197+ /* Precomputed reciprocals for fast slot index calculation.
198+ * For slot size d: reciprocal = ceil(2^48 / d).
199+ * Then offset / d == (uint32_t)((offset * reciprocal) >> 48)
200+ * for all offset < HEAP_PAGE_SIZE. */
201+ #define SLOT_RECIPROCAL_SHIFT 48
202+
203+ static const uint64_t heap_slot_reciprocal_table [HEAP_COUNT ] = {
204+ #if SIZEOF_VALUE >= 8
205+ /* 32 */ (1ULL << 48 ) / 32 ,
206+ /* 40 */ (1ULL << 48 ) / 40 + 1 ,
207+ /* 64 */ (1ULL << 48 ) / 64 ,
208+ /* 80 */ (1ULL << 48 ) / 80 + 1 ,
209+ /* 96 */ (1ULL << 48 ) / 96 + 1 ,
210+ /* 128 */ (1ULL << 48 ) / 128 ,
211+ /* 160 */ (1ULL << 48 ) / 160 + 1 ,
212+ /* 256 */ (1ULL << 48 ) / 256 ,
213+ /* 512 */ (1ULL << 48 ) / 512 ,
214+ /* 640 */ (1ULL << 48 ) / 640 + 1 ,
215+ /* 768 */ (1ULL << 48 ) / 768 + 1 ,
216+ /* 1024 */ (1ULL << 48 ) / 1024 ,
217+ #else
218+ /* 32 */ (1ULL << 48 ) / 32 ,
219+ /* 64 */ (1ULL << 48 ) / 64 ,
220+ /* 128 */ (1ULL << 48 ) / 128 ,
221+ /* 256 */ (1ULL << 48 ) / 256 ,
222+ /* 512 */ (1ULL << 48 ) / 512 ,
223+ #endif
224+ };
193225typedef struct ractor_newobj_heap_cache {
194226 struct free_slot * freelist ;
195227 struct heap_page * using_page ;
@@ -689,15 +721,17 @@ size_t rb_gc_impl_obj_slot_size(VALUE obj);
689721
690722#define RVALUE_SLOT_SIZE (sizeof(struct RBasic) + sizeof(VALUE[RBIMPL_RVALUE_EMBED_LEN_MAX]) + RVALUE_OVERHEAD)
691723
724+ #if SIZEOF_VALUE >= 8
692725static const size_t pool_slot_sizes [HEAP_COUNT ] = {
693- RVALUE_SLOT_SIZE ,
694- RVALUE_SLOT_SIZE * 2 ,
695- RVALUE_SLOT_SIZE * 4 ,
696- RVALUE_SLOT_SIZE * 8 ,
697- RVALUE_SLOT_SIZE * 16 ,
726+ 32 , 40 , 64 , 80 , 96 , 128 , 160 , 256 , 512 , 640 , 768 , 1024 ,
698727};
699-
700- static uint8_t size_to_heap_idx [RVALUE_SLOT_SIZE * (1 << (HEAP_COUNT - 1 )) / 8 + 1 ];
728+ static uint8_t size_to_heap_idx [1024 / 8 + 1 ];
729+ #else
730+ static const size_t pool_slot_sizes [HEAP_COUNT ] = {
731+ 32 , 64 , 128 , 256 , 512 ,
732+ };
733+ static uint8_t size_to_heap_idx [512 / 8 + 1 ];
734+ #endif
701735
702736#ifndef MAX
703737# define MAX (a , b ) (((a) > (b)) ? (a) : (b))
@@ -707,11 +741,12 @@ static uint8_t size_to_heap_idx[RVALUE_SLOT_SIZE * (1 << (HEAP_COUNT - 1)) / 8 +
707741#endif
708742#define roomof (x , y ) (((x) + (y) - 1) / (y))
709743#define CEILDIV (i , mod ) roomof(i, mod)
744+ #define MIN_POOL_SLOT_SIZE 32
710745enum {
711746 HEAP_PAGE_ALIGN = (1UL << HEAP_PAGE_ALIGN_LOG ),
712747 HEAP_PAGE_ALIGN_MASK = (~(~0UL << HEAP_PAGE_ALIGN_LOG )),
713748 HEAP_PAGE_SIZE = HEAP_PAGE_ALIGN ,
714- HEAP_PAGE_BITMAP_LIMIT = CEILDIV (CEILDIV (HEAP_PAGE_SIZE , RVALUE_SLOT_SIZE ), BITS_BITLENGTH ),
749+ HEAP_PAGE_BITMAP_LIMIT = CEILDIV (CEILDIV (HEAP_PAGE_SIZE , MIN_POOL_SLOT_SIZE ), BITS_BITLENGTH ),
715750 HEAP_PAGE_BITMAP_SIZE = (BITS_SIZE * HEAP_PAGE_BITMAP_LIMIT ),
716751};
717752#define HEAP_PAGE_ALIGN (1 << HEAP_PAGE_ALIGN_LOG)
@@ -773,8 +808,11 @@ struct free_slot {
773808};
774809
775810struct heap_page {
811+ /* Cache line 0: allocation fast path + SLOT_INDEX */
812+ struct free_slot * freelist ;
813+ uintptr_t start ;
814+ uint64_t slot_size_reciprocal ;
776815 unsigned short slot_size ;
777- uint32_t slot_div_magic ;
778816 unsigned short total_slots ;
779817 unsigned short free_slots ;
780818 unsigned short final_slots ;
@@ -789,8 +827,6 @@ struct heap_page {
789827
790828 struct heap_page * free_next ;
791829 struct heap_page_body * body ;
792- uintptr_t start ;
793- struct free_slot * freelist ;
794830 struct ccan_list_node page_node ;
795831
796832 bits_t wb_unprotected_bits [HEAP_PAGE_BITMAP_LIMIT ];
@@ -851,15 +887,13 @@ heap_page_in_global_empty_pages_pool(rb_objspace_t *objspace, struct heap_page *
851887#define GET_PAGE_HEADER (x ) (&GET_PAGE_BODY(x)->header)
852888#define GET_HEAP_PAGE (x ) (GET_PAGE_HEADER(x)->page)
853889
854- static uint32_t slot_div_magics [HEAP_COUNT ];
855-
856890static inline size_t
857- slot_index_for_offset (size_t offset , uint32_t div_magic )
891+ slot_index_for_offset (size_t offset , uint64_t reciprocal )
858892{
859- return (size_t )(((uint64_t )offset * div_magic ) >> 32 );
893+ return (uint32_t )(((uint64_t )offset * reciprocal ) >> SLOT_RECIPROCAL_SHIFT );
860894}
861895
862- #define SLOT_INDEX (page , p ) slot_index_for_offset((uintptr_t)(p) - (page)->start, (page)->slot_div_magic )
896+ #define SLOT_INDEX (page , p ) slot_index_for_offset((uintptr_t)(p) - (page)->start, (page)->slot_size_reciprocal )
863897#define SLOT_BITMAP_INDEX (page , p ) (SLOT_INDEX(page, p) / BITS_BITLENGTH)
864898#define SLOT_BITMAP_OFFSET (page , p ) (SLOT_INDEX(page, p) & (BITS_BITLENGTH - 1))
865899#define SLOT_BITMAP_BIT (page , p ) ((bits_t)1 << SLOT_BITMAP_OFFSET(page, p))
@@ -1990,19 +2024,17 @@ heap_add_page(rb_objspace_t *objspace, rb_heap_t *heap, struct heap_page *page)
19902024 GC_ASSERT (!heap -> sweeping_page );
19912025 GC_ASSERT (heap_page_in_global_empty_pages_pool (objspace , page ));
19922026
1993- /* Align start to the first slot_size boundary after the page header */
2027+ /* Align start to slot_size boundary */
19942028 uintptr_t start = (uintptr_t )page -> body + sizeof (struct heap_page_header );
1995- size_t remainder = start % heap -> slot_size ;
1996- if (remainder != 0 ) {
1997- start += heap -> slot_size - remainder ;
1998- }
2029+ uintptr_t rem = start % heap -> slot_size ;
2030+ if (rem ) start += heap -> slot_size - rem ;
19992031
20002032 int slot_count = (int )((HEAP_PAGE_SIZE - (start - (uintptr_t )page -> body ))/heap -> slot_size );
20012033
20022034 page -> start = start ;
20032035 page -> total_slots = slot_count ;
20042036 page -> slot_size = heap -> slot_size ;
2005- page -> slot_div_magic = slot_div_magics [heap - heaps ];
2037+ page -> slot_size_reciprocal = heap_slot_reciprocal_table [heap - heaps ];
20062038 page -> heap = heap ;
20072039
20082040 memset (& page -> wb_unprotected_bits [0 ], 0 , HEAP_PAGE_BITMAP_SIZE );
@@ -9521,11 +9553,15 @@ rb_gc_impl_objspace_init(void *objspace_ptr)
95219553 rb_bug ("Could not preregister postponed job for GC" );
95229554 }
95239555
9556+ /* A standard RVALUE (RBasic + embedded VALUEs + debug overhead) must fit
9557+ * in at least one pool. In debug builds RVALUE_OVERHEAD can push this
9558+ * beyond the 48-byte pool into the 64-byte pool, which is fine. */
9559+ GC_ASSERT (rb_gc_impl_size_allocatable_p (sizeof (struct RBasic ) + sizeof (VALUE [RBIMPL_RVALUE_EMBED_LEN_MAX ])));
9560+
95249561 for (int i = 0 ; i < HEAP_COUNT ; i ++ ) {
95259562 rb_heap_t * heap = & heaps [i ];
95269563
95279564 heap -> slot_size = pool_slot_sizes [i ];
9528- slot_div_magics [i ] = (uint32_t )((uint64_t )UINT32_MAX / heap -> slot_size + 1 );
95299565
95309566 ccan_list_head_init (& heap -> pages );
95319567 }
0 commit comments