diff --git a/error.c b/error.c index 140049fdd2cc2b..1a68f6dd3b29b8 100644 --- a/error.c +++ b/error.c @@ -4243,7 +4243,7 @@ rb_warn_unchilled_literal(VALUE obj) VALUE str = obj; if (STR_SHARED_P(str)) { - str = RSTRING(obj)->as.heap.aux.shared; + str = RSTRING(obj)->as.shared; } VALUE created = get_created_info(str, &line); if (NIL_P(created)) { diff --git a/ext/-test-/string/cstr.c b/ext/-test-/string/cstr.c index 931220b46bdc40..f502c30b98aa59 100644 --- a/ext/-test-/string/cstr.c +++ b/ext/-test-/string/cstr.c @@ -61,12 +61,12 @@ bug_str_unterminated_substring(VALUE str, VALUE vbeg, VALUE vlen) if (RSTRING_LEN(str) < beg) rb_raise(rb_eIndexError, "beg: %ld", beg); if (RSTRING_LEN(str) < beg + len) rb_raise(rb_eIndexError, "end: %ld", beg + len); str = rb_str_new_shared(str); - RSTRING(str)->len = len; + RSTRING(str)->len = (uint32_t)len; if (STR_EMBED_P(str)) { - memmove(RSTRING(str)->as.embed.ary, RSTRING(str)->as.embed.ary + beg, len); + memmove(RSTRING(str)->as.ary, RSTRING(str)->as.ary + beg, len); } else { - RSTRING(str)->as.heap.ptr += beg; + RSTRING(str)->capa += (uint32_t)beg; } return str; } @@ -111,9 +111,9 @@ bug_str_s_cstr_noembed(VALUE self, VALUE str) FL_SET((str2), STR_NOEMBED); memcpy(buf, RSTRING_PTR(str), capacity); RBASIC(str2)->flags &= ~(STR_SHARED | FL_USER5 | FL_USER6); - RSTRING(str2)->as.heap.aux.capa = RSTRING_LEN(str); - RSTRING(str2)->as.heap.ptr = buf; - RSTRING(str2)->len = RSTRING_LEN(str); + RSTRING(str2)->capa = (uint32_t)RSTRING_LEN(str); + RSTRING(str2)->as.ptr = buf; + RSTRING(str2)->len = (uint32_t)RSTRING_LEN(str); TERM_FILL(RSTRING_END(str2), TERM_LEN(str)); return str2; diff --git a/ext/-test-/string/rb_str_dup.c b/ext/-test-/string/rb_str_dup.c index a0bd65820fb55d..07e0ea4761af84 100644 --- a/ext/-test-/string/rb_str_dup.c +++ b/ext/-test-/string/rb_str_dup.c @@ -21,7 +21,7 @@ bug_sharing_with_shared_p(VALUE self, VALUE str) { rb_check_type(str, T_STRING); if (bug_shared_string_p(self, str)) { - return bug_shared_string_p(self, RSTRING(str)->as.heap.aux.shared); + return bug_shared_string_p(self, RSTRING(str)->as.shared); } return Qfalse; } diff --git a/gc.c b/gc.c index 2a4dec32e9119f..a34a09c68faed7 100644 --- a/gc.c +++ b/gc.c @@ -3376,15 +3376,16 @@ rb_gc_mark_children(void *objspace, VALUE obj) case T_STRING: if (STR_SHARED_P(obj)) { - if (STR_EMBED_P(RSTRING(obj)->as.heap.aux.shared)) { + VALUE shared_root = RSTRING(obj)->as.shared; + if (STR_EMBED_P(shared_root)) { /* Embedded shared strings cannot be moved because this string * points into the slot of the shared string. There may be code * using the RSTRING_PTR on the stack, which would pin this * string but not pin the shared string, causing it to move. */ - gc_mark_and_pin_internal(RSTRING(obj)->as.heap.aux.shared); + gc_mark_and_pin_internal(shared_root); } else { - gc_mark_internal(RSTRING(obj)->as.heap.aux.shared); + gc_mark_internal(shared_root); } } break; @@ -4366,7 +4367,7 @@ rb_gc_update_object_references(void *objspace, VALUE obj) case T_STRING: { if (STR_SHARED_P(obj)) { - UPDATE_IF_MOVED(objspace, RSTRING(obj)->as.heap.aux.shared); + UPDATE_IF_MOVED(objspace, RSTRING(obj)->as.shared); } /* If, after move the string is not embedded, and can fit in the diff --git a/gc.rb b/gc.rb index 59adcbc62f64d6..e1eda59c4d82ac 100644 --- a/gc.rb +++ b/gc.rb @@ -269,7 +269,7 @@ def self.stat hash_or_key = nil # GC.stat_heap # # => # {0 => - # {slot_size: 40, + # {slot_size: 64, # heap_eden_pages: 246, # heap_eden_slots: 402802, # total_allocated_pages: 246, @@ -278,7 +278,7 @@ def self.stat hash_or_key = nil # total_allocated_objects: 33867152, # total_freed_objects: 33520523}, # 1 => - # {slot_size: 80, + # {slot_size: 128, # heap_eden_pages: 84, # heap_eden_slots: 68746, # total_allocated_pages: 84, @@ -287,7 +287,7 @@ def self.stat hash_or_key = nil # total_allocated_objects: 147491, # total_freed_objects: 90699}, # 2 => - # {slot_size: 160, + # {slot_size: 256, # heap_eden_pages: 157, # heap_eden_slots: 64182, # total_allocated_pages: 157, @@ -296,7 +296,7 @@ def self.stat hash_or_key = nil # total_allocated_objects: 211460, # total_freed_objects: 190075}, # 3 => - # {slot_size: 320, + # {slot_size: 512, # heap_eden_pages: 8, # heap_eden_slots: 1631, # total_allocated_pages: 8, @@ -305,7 +305,7 @@ def self.stat hash_or_key = nil # total_allocated_objects: 1422, # total_freed_objects: 700}, # 4 => - # {slot_size: 640, + # {slot_size: 1024, # heap_eden_pages: 16, # heap_eden_slots: 1628, # total_allocated_pages: 16, @@ -326,7 +326,7 @@ def self.stat hash_or_key = nil # # GC.stat_heap(2) # # => - # {slot_size: 160, + # {slot_size: 256, # heap_eden_pages: 157, # heap_eden_slots: 64182, # total_allocated_pages: 157, @@ -338,7 +338,7 @@ def self.stat hash_or_key = nil # With arguments +heap_id+ and +key+ given, # returns the value for the given key in the given heap: # - # GC.stat_heap(2, :slot_size) # => 160 + # GC.stat_heap(2, :slot_size) # => 256 # # With arguments +nil+ and +hash+ given, # merges the statistics for all heaps into the given hash: diff --git a/gc/default/default.c b/gc/default/default.c index 046aa146f73055..7f51ca2acdbb8c 100644 --- a/gc/default/default.c +++ b/gc/default/default.c @@ -187,7 +187,7 @@ static RB_THREAD_LOCAL_SPECIFIER int malloc_increase_local; #define USE_TICK_T (PRINT_ENTER_EXIT_TICK || PRINT_ROOT_TICKS) #ifndef HEAP_COUNT -# define HEAP_COUNT 5 +# define HEAP_COUNT 6 #endif typedef struct ractor_newobj_heap_cache { @@ -687,7 +687,12 @@ size_t rb_gc_impl_obj_slot_size(VALUE obj); # endif #endif -#define BASE_SLOT_SIZE (sizeof(struct RBasic) + sizeof(VALUE[RBIMPL_RVALUE_EMBED_LEN_MAX]) + RVALUE_OVERHEAD) +#if SIZEOF_VALUE >= 8 +#define BASE_SLOT_SIZE_LOG2 5 +#else +#define BASE_SLOT_SIZE_LOG2 4 +#endif +#define BASE_SLOT_SIZE (1 << BASE_SLOT_SIZE_LOG2) #ifndef MAX # define MAX(a, b) (((a) > (b)) ? (a) : (b)) @@ -764,7 +769,7 @@ struct free_slot { struct heap_page { unsigned short slot_size; - uint32_t slot_div_magic; + unsigned char slot_size_log2; unsigned short total_slots; unsigned short free_slots; unsigned short final_slots; @@ -841,15 +846,13 @@ heap_page_in_global_empty_pages_pool(rb_objspace_t *objspace, struct heap_page * #define GET_PAGE_HEADER(x) (&GET_PAGE_BODY(x)->header) #define GET_HEAP_PAGE(x) (GET_PAGE_HEADER(x)->page) -static uint32_t slot_div_magics[HEAP_COUNT]; - static inline size_t -slot_index_for_offset(size_t offset, uint32_t div_magic) +slot_index_for_offset(size_t offset, unsigned char slot_size_log2) { - return (size_t)(((uint64_t)offset * div_magic) >> 32); + return offset >> slot_size_log2; } -#define SLOT_INDEX(page, p) slot_index_for_offset((uintptr_t)(p) - (page)->start, (page)->slot_div_magic) +#define SLOT_INDEX(page, p) slot_index_for_offset((uintptr_t)(p) - (page)->start, (page)->slot_size_log2) #define SLOT_BITMAP_INDEX(page, p) (SLOT_INDEX(page, p) / BITS_BITLENGTH) #define SLOT_BITMAP_OFFSET(page, p) (SLOT_INDEX(page, p) & (BITS_BITLENGTH - 1)) #define SLOT_BITMAP_BIT(page, p) ((bits_t)1 << SLOT_BITMAP_OFFSET(page, p)) @@ -1977,19 +1980,16 @@ heap_add_page(rb_objspace_t *objspace, rb_heap_t *heap, struct heap_page *page) GC_ASSERT(!heap->sweeping_page); GC_ASSERT(heap_page_in_global_empty_pages_pool(objspace, page)); - /* Align start to the first slot_size boundary after the page header */ + /* Align start to slot_size boundary (both are powers of 2) */ uintptr_t start = (uintptr_t)page->body + sizeof(struct heap_page_header); - size_t remainder = start % heap->slot_size; - if (remainder != 0) { - start += heap->slot_size - remainder; - } + start = (start + heap->slot_size - 1) & ~((uintptr_t)heap->slot_size - 1); int slot_count = (int)((HEAP_PAGE_SIZE - (start - (uintptr_t)page->body))/heap->slot_size); page->start = start; page->total_slots = slot_count; page->slot_size = heap->slot_size; - page->slot_div_magic = slot_div_magics[heap - heaps]; + page->slot_size_log2 = BASE_SLOT_SIZE_LOG2 + (unsigned char)(heap - heaps); page->heap = heap; memset(&page->wb_unprotected_bits[0], 0, HEAP_PAGE_BITMAP_SIZE); @@ -2241,7 +2241,7 @@ heap_slot_size(unsigned char pool_id) { GC_ASSERT(pool_id < HEAP_COUNT); - size_t slot_size = (1 << pool_id) * BASE_SLOT_SIZE; + size_t slot_size = BASE_SLOT_SIZE << pool_id; #if RGENGC_CHECK_MODE rb_objspace_t *objspace = rb_gc_get_objspace(); @@ -2360,10 +2360,10 @@ heap_idx_for_size(size_t size) { size += RVALUE_OVERHEAD; - size_t slot_count = CEILDIV(size, BASE_SLOT_SIZE); + if (size <= BASE_SLOT_SIZE) return 0; - /* heap_idx is ceil(log2(slot_count)) */ - size_t heap_idx = 64 - nlz_int64(slot_count - 1); + /* ceil(log2(size)) - BASE_SLOT_SIZE_LOG2 */ + size_t heap_idx = 64 - nlz_int64(size - 1) - BASE_SLOT_SIZE_LOG2; if (heap_idx >= HEAP_COUNT) { rb_bug("heap_idx_for_size: allocation size too large " @@ -9511,11 +9511,12 @@ rb_gc_impl_objspace_init(void *objspace_ptr) rb_bug("Could not preregister postponed job for GC"); } + GC_ASSERT(sizeof(struct RBasic) + sizeof(VALUE[RBIMPL_RVALUE_EMBED_LEN_MAX]) + RVALUE_OVERHEAD <= (BASE_SLOT_SIZE << 1)); + for (int i = 0; i < HEAP_COUNT; i++) { rb_heap_t *heap = &heaps[i]; - heap->slot_size = (1 << i) * BASE_SLOT_SIZE; - slot_div_magics[i] = (uint32_t)((uint64_t)UINT32_MAX / heap->slot_size + 1); + heap->slot_size = BASE_SLOT_SIZE << i; ccan_list_head_init(&heap->pages); } @@ -9553,6 +9554,11 @@ rb_gc_impl_init(void) VALUE gc_constants = rb_hash_new(); rb_hash_aset(gc_constants, ID2SYM(rb_intern("DEBUG")), GC_DEBUG ? Qtrue : Qfalse); rb_hash_aset(gc_constants, ID2SYM(rb_intern("BASE_SLOT_SIZE")), SIZET2NUM(BASE_SLOT_SIZE - RVALUE_OVERHEAD)); + /* Minimum slot size for a standard RVALUE (RBasic + embedded VALUEs) */ + size_t rvalue_min = sizeof(struct RBasic) + sizeof(VALUE[RBIMPL_RVALUE_EMBED_LEN_MAX]) + RVALUE_OVERHEAD; + size_t rvalue_slot = BASE_SLOT_SIZE; + while (rvalue_slot < rvalue_min) rvalue_slot <<= 1; + rb_hash_aset(gc_constants, ID2SYM(rb_intern("RVALUE_SIZE")), SIZET2NUM(rvalue_slot - RVALUE_OVERHEAD)); rb_hash_aset(gc_constants, ID2SYM(rb_intern("RBASIC_SIZE")), SIZET2NUM(sizeof(struct RBasic))); rb_hash_aset(gc_constants, ID2SYM(rb_intern("RVALUE_OVERHEAD")), SIZET2NUM(RVALUE_OVERHEAD)); rb_hash_aset(gc_constants, ID2SYM(rb_intern("HEAP_PAGE_BITMAP_SIZE")), SIZET2NUM(HEAP_PAGE_BITMAP_SIZE)); diff --git a/gc/mmtk/mmtk.c b/gc/mmtk/mmtk.c index 4832916ce6ea2f..6b38fa9a681c33 100644 --- a/gc/mmtk/mmtk.c +++ b/gc/mmtk/mmtk.c @@ -618,17 +618,24 @@ void rb_gc_impl_set_params(void *objspace_ptr) { } static VALUE gc_verify_internal_consistency(VALUE self) { return Qnil; } #define MMTK_HEAP_COUNT 6 -#define MMTK_MAX_OBJ_SIZE 640 - +#if SIZEOF_VALUE >= 8 +#define MMTK_MAX_OBJ_SIZE 1024 static size_t heap_sizes[MMTK_HEAP_COUNT + 1] = { - 32, 40, 80, 160, 320, MMTK_MAX_OBJ_SIZE, 0 + 32, 64, 128, 256, 512, MMTK_MAX_OBJ_SIZE, 0 }; +#else +#define MMTK_MAX_OBJ_SIZE 512 +static size_t heap_sizes[MMTK_HEAP_COUNT + 1] = { + 16, 32, 64, 128, 256, MMTK_MAX_OBJ_SIZE, 0 +}; +#endif void rb_gc_impl_init(void) { VALUE gc_constants = rb_hash_new(); - rb_hash_aset(gc_constants, ID2SYM(rb_intern("BASE_SLOT_SIZE")), SIZET2NUM(sizeof(VALUE) * 5)); + rb_hash_aset(gc_constants, ID2SYM(rb_intern("BASE_SLOT_SIZE")), SIZET2NUM(SIZEOF_VALUE >= 8 ? 64 : 32)); + rb_hash_aset(gc_constants, ID2SYM(rb_intern("RVALUE_SIZE")), SIZET2NUM(SIZEOF_VALUE >= 8 ? 64 : 32)); rb_hash_aset(gc_constants, ID2SYM(rb_intern("RBASIC_SIZE")), SIZET2NUM(sizeof(struct RBasic))); rb_hash_aset(gc_constants, ID2SYM(rb_intern("RVALUE_OVERHEAD")), INT2NUM(0)); rb_hash_aset(gc_constants, ID2SYM(rb_intern("RVARGC_MAX_ALLOCATE_SIZE")), LONG2FIX(MMTK_MAX_OBJ_SIZE)); diff --git a/include/ruby/internal/abi.h b/include/ruby/internal/abi.h index e6d1fa7e8f3770..0c99d93bf9bd99 100644 --- a/include/ruby/internal/abi.h +++ b/include/ruby/internal/abi.h @@ -24,7 +24,7 @@ * In released versions of Ruby, this number is not defined since teeny * versions of Ruby should guarantee ABI compatibility. */ -#define RUBY_ABI_VERSION 1 +#define RUBY_ABI_VERSION 2 /* Windows does not support weak symbols so ruby_abi_version will not exist * in the shared library. */ diff --git a/include/ruby/internal/core/rstring.h b/include/ruby/internal/core/rstring.h index 35175ea94aca81..1721c4aa10d8b9 100644 --- a/include/ruby/internal/core/rstring.h +++ b/include/ruby/internal/core/rstring.h @@ -156,27 +156,16 @@ enum ruby_rstring_flags { */ RSTRING_NOEMBED = RUBY_FL_USER1, - /* Actually, string encodings are also encoded into the flags, using - * remaining bits.*/ + /** + * The string shares its buffer with another string (the "shared root"). + * When set, the `as.shared` union member holds the root VALUE and `capa` + * holds a byte offset into the root's buffer. + */ + RSTRING_SHARED = RUBY_FL_USER0, /** - * This flag has something to do with infamous "f"string. What is a - * fstring? Well it is a special subkind of strings that is immutable, - * deduped globally, and managed by our GC. It is much like a Symbol (in - * fact Symbols are dynamic these days and are backended using fstrings). - * This concept has been silently introduced at some point in 2.x era. - * Since then it gained wider acceptance in the core. But extension - * libraries could not know that until very recently. Strings of this flag - * live in a special Limbo deep inside of the interpreter. Never try to - * manipulate it by hand. - * - * @internal - * - * Fstrings are not the only variant strings that we implement today. - * Other things are behind-the-scene. This is the only one that is visible - * from extension library. There is no clear reason why it has to be. - * Given there are more "polite" ways to create fstrings, it seems this bit - * need not be exposed to extension libraries. Might better be hidden. + * This flag has something to do with infamous "fstring". Fstrings are + * immutable, globally deduped, and managed by our GC. */ RSTRING_FSTR = RUBY_FL_USER17 }; @@ -200,56 +189,34 @@ struct RString { /** * Length of the string, not including terminating NUL character. + * Limited to UINT32_MAX (~4GB). * * @note This is in bytes. */ - long len; + uint32_t len; + + /** + * Multi-purpose field, meaning depends on flags: + * - !NOEMBED (embedded): unused + * - NOEMBED && !SHARED: heap buffer capacity + * - NOEMBED && SHARED: byte offset into shared root's buffer + * - PRECOMPUTED_HASH: precomputed hash value (32-bit) + */ + uint32_t capa; /** String's specific fields. */ union { + /** Pointer to heap-allocated buffer (independent heap strings). */ + char *ptr; + + /** Shared root VALUE (shared strings; RSTRING_SHARED flag set). */ + VALUE shared; /** - * Strings that use separated memory region for contents use this - * pattern. + * Embedded contents (length 1 array for C compatibility). + * @see https://gcc.gnu.org/bugzilla/show_bug.cgi?id=102452 */ - struct { - /** - * Pointer to the contents of the string. In the old days each - * string had dedicated memory regions. That is no longer true - * today, but there still are strings of such properties. This - * field could be used to point such things. - */ - char *ptr; - - /** Auxiliary info. */ - union { - - /** - * Capacity of `*ptr`. A continuous memory region of at least - * `capa` bytes is expected to exist at `*ptr`. This can be - * bigger than `len`. - */ - long capa; - - /** - * Parent of the string. Nowadays strings can share their - * contents each other, constructing gigantic nest of objects. - * This situation is called "shared", and this is the field to - * control such properties. - */ - VALUE shared; - } aux; - } heap; - - /** Embedded contents. */ - struct { - /* This is a length 1 array because: - * 1. GCC has a bug that does not optimize C flexible array members - * (https://gcc.gnu.org/bugzilla/show_bug.cgi?id=102452) - * 2. Zero length arrays are not supported by all compilers - */ - char ary[1]; - } embed; + char ary[1]; } as; }; @@ -366,7 +333,7 @@ RBIMPL_ATTR_ARTIFICIAL() static inline long RSTRING_LEN(VALUE str) { - return RSTRING(str)->len; + return (long)RSTRING(str)->len; } RBIMPL_ATTR_ARTIFICIAL() @@ -380,17 +347,23 @@ RBIMPL_ATTR_ARTIFICIAL() static inline char * RSTRING_PTR(VALUE str) { - char *ptr = RB_FL_TEST_RAW(str, RSTRING_NOEMBED) ? - RSTRING(str)->as.heap.ptr : - RSTRING(str)->as.embed.ary; + char *ptr; + + if (!RB_FL_TEST_RAW(str, RSTRING_NOEMBED)) { + ptr = RSTRING(str)->as.ary; + } + else if (RB_UNLIKELY(RB_FL_TEST_RAW(str, RSTRING_SHARED))) { + VALUE root = RSTRING(str)->as.shared; + RUBY_ASSERT(RB_FL_TEST_RAW(root, RUBY_T_MASK) == RUBY_T_STRING); + ptr = (RB_FL_TEST_RAW(root, RSTRING_NOEMBED) ? + RSTRING(root)->as.ptr : RSTRING(root)->as.ary) + + RSTRING(str)->capa; + } + else { + ptr = RSTRING(str)->as.ptr; + } if (RUBY_DEBUG && RB_UNLIKELY(! ptr)) { - /* :BEWARE: @shyouhei thinks that currently, there are rooms for this - * function to return NULL. Better check here for maximum safety. - * - * Also, this is not rb_warn() because RSTRING_PTR() can be called - * during GC (see what obj_info() does). rb_warn() needs to allocate - * Ruby objects. That is not possible at this moment. */ rb_debug_rstring_null_ptr("RSTRING_PTR"); } @@ -408,17 +381,7 @@ RBIMPL_ATTR_ARTIFICIAL() static inline char * RSTRING_END(VALUE str) { - char *ptr = RB_FL_TEST_RAW(str, RSTRING_NOEMBED) ? - RSTRING(str)->as.heap.ptr : - RSTRING(str)->as.embed.ary; - long len = RSTRING_LEN(str); - - if (RUBY_DEBUG && RB_UNLIKELY(!ptr)) { - /* Ditto. */ - rb_debug_rstring_null_ptr("RSTRING_END"); - } - - return &ptr[len]; + return RSTRING_PTR(str) + RSTRING_LEN(str); } RBIMPL_ATTR_ARTIFICIAL() diff --git a/internal/class.h b/internal/class.h index ea68b07fc20968..08facfd00726e9 100644 --- a/internal/class.h +++ b/internal/class.h @@ -149,7 +149,7 @@ struct RClass_and_rb_classext_t { }; #if SIZEOF_VALUE >= SIZEOF_LONG_LONG -// Assert that classes can be embedded in heaps[2] (which has 160B slot size) +// Assert that classes can be embedded in heaps[2] (which has 256B slot size) // On 32bit platforms there is no variable width allocation so it doesn't matter. STATIC_ASSERT(sizeof_rb_classext_t, sizeof(struct RClass_and_rb_classext_t) <= 4 * RVALUE_SIZE); #endif diff --git a/object.c b/object.c index 4dcd5d615f85a9..d3036c52f0a7c0 100644 --- a/object.c +++ b/object.c @@ -93,6 +93,7 @@ static ID id_instance_variables_to_inspect; size_t rb_obj_embedded_size(uint32_t fields_count) { + if (fields_count < 1) fields_count = 1; return offsetof(struct RObject, as.ary) + (sizeof(VALUE) * fields_count); } diff --git a/shape.c b/shape.c index 90036722f10026..93ccd3eb595fdd 100644 --- a/shape.c +++ b/shape.c @@ -477,14 +477,14 @@ static attr_index_t shape_grow_capa(attr_index_t current_capa) { const attr_index_t *capacities = rb_shape_tree.capacities; + size_t heaps_count = rb_shape_tree.heaps_count; // First try to use the next size that will be embeddable in a larger object slot. - attr_index_t capa; - while ((capa = *capacities)) { + for (size_t i = 0; i < heaps_count; i++) { + attr_index_t capa = capacities[i]; if (capa > current_capa) { return capa; } - capacities++; } return (attr_index_t)rb_malloc_grow_capa(current_capa, sizeof(VALUE)); @@ -1543,8 +1543,14 @@ Init_default_shapes(void) capacities[heaps_count] = 0; size_t index; for (index = 0; index < heaps_count; index++) { - capacities[index] = (heap_sizes[index] - sizeof(struct RBasic)) / sizeof(VALUE); + if (heap_sizes[index] > sizeof(struct RBasic)) { + capacities[index] = (heap_sizes[index] - sizeof(struct RBasic)) / sizeof(VALUE); + } + else { + capacities[index] = 0; + } } + rb_shape_tree.heaps_count = heaps_count; rb_shape_tree.capacities = capacities; #ifdef HAVE_MMAP diff --git a/shape.h b/shape.h index 96c78f2bc1a356..1296e62a1a4d7f 100644 --- a/shape.h +++ b/shape.h @@ -115,6 +115,7 @@ typedef struct { rb_shape_t *shape_list; rb_shape_t *root_shape; const attr_index_t *capacities; + size_t heaps_count; rb_atomic_t next_shape_id; redblack_node_t *shape_cache; diff --git a/string.c b/string.c index 0fd3ef17b40e00..5ddd3985bbae2f 100644 --- a/string.c +++ b/string.c @@ -143,7 +143,8 @@ VALUE rb_cSymbol; #define STR_SET_EMBED(str) FL_UNSET((str), STR_NOEMBED | STR_SHARED | STR_NOFREE) #define STR_SET_LEN(str, n) do { \ - RSTRING(str)->len = (n); \ + RUBY_ASSERT((long)(n) >= 0 && (unsigned long)(n) <= UINT32_MAX); \ + RSTRING(str)->len = (uint32_t)(n); \ } while (0) #define TERM_LEN(str) (rb_str_enc_fastpath(str) ? 1 : rb_enc_mbminlen(rb_enc_from_index(ENCODING_GET(str)))) @@ -165,25 +166,28 @@ VALUE rb_cSymbol; char *const tmp = ALLOC_N(char, (size_t)(capacity) + (termlen));\ const long tlen = RSTRING_LEN(str);\ memcpy(tmp, RSTRING_PTR(str), str_embed_capa(str));\ - RSTRING(str)->as.heap.ptr = tmp;\ - RSTRING(str)->len = tlen;\ + RSTRING(str)->as.ptr = tmp;\ + STR_SET_LEN(str, tlen);\ STR_SET_NOEMBED(str);\ - RSTRING(str)->as.heap.aux.capa = (capacity);\ + RSTRING(str)->capa = (uint32_t)(capacity);\ }\ }\ else {\ RUBY_ASSERT(!FL_TEST((str), STR_SHARED)); \ - SIZED_REALLOC_N(RSTRING(str)->as.heap.ptr, char, \ + SIZED_REALLOC_N(RSTRING(str)->as.ptr, char, \ (size_t)(capacity) + (termlen), STR_HEAP_SIZE(str)); \ - RSTRING(str)->as.heap.aux.capa = (capacity);\ + RSTRING(str)->capa = (uint32_t)(capacity);\ }\ } while (0) #define STR_SET_SHARED(str, shared_str) do { \ if (!FL_TEST(str, STR_FAKESTR)) { \ - RUBY_ASSERT(RSTRING_PTR(shared_str) <= RSTRING_PTR(str)); \ - RUBY_ASSERT(RSTRING_PTR(str) <= RSTRING_PTR(shared_str) + RSTRING_LEN(shared_str)); \ - RB_OBJ_WRITE((str), &RSTRING(str)->as.heap.aux.shared, (shared_str)); \ + RUBY_ASSERT(RSTRING_PTR(shared_str) <= RSTRING(str)->as.ptr); \ + RUBY_ASSERT(RSTRING(str)->as.ptr <= RSTRING_PTR(shared_str) + RSTRING_LEN(shared_str)); \ + /* compute offset BEFORE overwriting as.ptr with shared VALUE */ \ + uint32_t _shared_off = (uint32_t)(RSTRING(str)->as.ptr - RSTRING_PTR(shared_str)); \ + RSTRING(str)->capa = _shared_off; \ + RB_OBJ_WRITE((str), &RSTRING(str)->as.shared, (shared_str)); \ FL_SET((str), STR_SHARED); \ rb_gc_register_pinning_obj(str); \ FL_SET((shared_str), STR_SHARED_ROOT); \ @@ -192,8 +196,8 @@ VALUE rb_cSymbol; } \ } while (0) -#define STR_HEAP_PTR(str) (RSTRING(str)->as.heap.ptr) -#define STR_HEAP_SIZE(str) ((size_t)RSTRING(str)->as.heap.aux.capa + TERM_LEN(str)) +#define STR_HEAP_PTR(str) (RSTRING(str)->as.ptr) +#define STR_HEAP_SIZE(str) (RUBY_ASSERT(!STR_SHARED_P(str)), (size_t)RSTRING(str)->capa + TERM_LEN(str)) /* TODO: include the terminator size in capa. */ #define STR_ENC_GET(str) get_encoding(str) @@ -211,7 +215,7 @@ VALUE rb_cSymbol; static inline long str_embed_capa(VALUE str) { - return rb_gc_obj_slot_size(str) - offsetof(struct RString, as.embed.ary); + return rb_gc_obj_slot_size(str) - offsetof(struct RString, as.ary); } bool @@ -223,7 +227,7 @@ rb_str_reembeddable_p(VALUE str) static inline size_t rb_str_embed_size(long capa, long termlen) { - size_t size = offsetof(struct RString, as.embed.ary) + capa + termlen; + size_t size = offsetof(struct RString, as.ary) + capa + termlen; if (size < sizeof(struct RString)) size = sizeof(struct RString); return size; } @@ -241,7 +245,7 @@ rb_str_size_as_embedded(VALUE str) /* if the string is not currently embedded, but it can be embedded, how * much space would it require */ else if (rb_str_reembeddable_p(str)) { - size_t capa = RSTRING(str)->as.heap.aux.capa; + size_t capa = RSTRING(str)->capa; if (FL_TEST_RAW(str, STR_PRECOMPUTED_HASH)) capa += sizeof(st_index_t); real_size = rb_str_embed_size(capa, TERM_LEN(str)); @@ -294,8 +298,8 @@ rb_str_make_embedded(VALUE str) RUBY_ASSERT(!STR_EMBED_P(str)); int termlen = TERM_LEN(str); - char *buf = RSTRING(str)->as.heap.ptr; - long old_capa = RSTRING(str)->as.heap.aux.capa + termlen; + char *buf = RSTRING(str)->as.ptr; + long old_capa = RSTRING(str)->capa + termlen; long len = RSTRING(str)->len; STR_SET_EMBED(str); @@ -306,7 +310,7 @@ rb_str_make_embedded(VALUE str) SIZED_FREE_N(buf, old_capa); } - TERM_FILL(RSTRING(str)->as.embed.ary + len, termlen); + TERM_FILL(RSTRING(str)->as.ary + len, termlen); } void @@ -434,7 +438,7 @@ fstring_concurrent_set_hash(VALUE str) st_index_t h; if (FL_TEST_RAW(str, STR_FAKESTR)) { // register_fstring precomputes the hash and stores it in capa for fake strings - h = (st_index_t)RSTRING(str)->as.heap.aux.capa; + h = (st_index_t)RSTRING(str)->capa; } else { h = rb_str_hash(str); @@ -492,18 +496,18 @@ fstring_concurrent_set_create(VALUE str, void *data) str_store_precomputed_hash(new_str, str_do_hash(str)); } else { - new_str = str_new(rb_cString, RSTRING(str)->as.heap.ptr, RSTRING(str)->len); + new_str = str_new(rb_cString, RSTRING(str)->as.ptr, RSTRING(str)->len); rb_enc_copy(new_str, str); #ifdef PRECOMPUTED_FAKESTR_HASH if (rb_str_capacity(new_str) >= RSTRING_LEN(str) + term_len + sizeof(st_index_t)) { - str_store_precomputed_hash(new_str, (st_index_t)RSTRING(str)->as.heap.aux.capa); + str_store_precomputed_hash(new_str, str_do_hash(new_str)); } #endif } str = new_str; } else { - str = str_new_static(rb_cString, RSTRING(str)->as.heap.ptr, + str = str_new_static(rb_cString, RSTRING(str)->as.ptr, RSTRING(str)->len, ENCODING_GET(str)); } @@ -565,7 +569,7 @@ register_fstring(VALUE str, bool copy, bool force_precompute_hash) if (FL_TEST_RAW(str, STR_FAKESTR)) { // if the string hasn't been interned, we'll need the hash twice, so we // compute it once and store it in capa - RSTRING(str)->as.heap.aux.capa = (long)str_do_hash(str); + RSTRING(str)->capa = (uint32_t)str_do_hash(str); } #endif @@ -628,9 +632,9 @@ setup_fake_str(struct RString *fake_str, const char *name, long len, int encidx) ENCODING_SET_INLINED((VALUE)fake_str, encidx); RBASIC_SET_CLASS_RAW((VALUE)fake_str, rb_cString); - fake_str->len = len; - fake_str->as.heap.ptr = (char *)name; - fake_str->as.heap.aux.capa = len; + fake_str->len = (uint32_t)len; + fake_str->as.ptr = (char *)name; + fake_str->capa = (uint32_t)len; return (VALUE)fake_str; } @@ -976,7 +980,7 @@ str_capacity(VALUE str, const int termlen) return RSTRING(str)->len; } else { - return RSTRING(str)->as.heap.aux.capa; + return RSTRING(str)->capa; } } @@ -1005,7 +1009,7 @@ str_alloc_embed(VALUE klass, size_t capa) T_STRING | (RGENGC_WB_PROTECTED_STRING ? FL_WB_PROTECTED : 0), size, 0); str->len = 0; - str->as.embed.ary[0] = 0; + str->as.ary[0] = 0; return (VALUE)str; } @@ -1017,8 +1021,8 @@ str_alloc_heap(VALUE klass) T_STRING | STR_NOEMBED | (RGENGC_WB_PROTECTED_STRING ? FL_WB_PROTECTED : 0), sizeof(struct RString), 0); str->len = 0; - str->as.heap.aux.capa = 0; - str->as.heap.ptr = NULL; + str->capa = 0; + str->as.ptr = NULL; return (VALUE)str; } @@ -1028,7 +1032,7 @@ empty_str_alloc(VALUE klass) { RUBY_DTRACE_CREATE_HOOK(STRING, 0); VALUE str = str_alloc_embed(klass, 0); - memset(RSTRING(str)->as.embed.ary, 0, str_embed_capa(str)); + memset(RSTRING(str)->as.ary, 0, str_embed_capa(str)); ENC_CODERANGE_SET(str, ENC_CODERANGE_7BIT); return str; } @@ -1058,11 +1062,11 @@ str_enc_new(VALUE klass, const char *ptr, long len, rb_encoding *enc) } else { str = str_alloc_heap(klass); - RSTRING(str)->as.heap.aux.capa = len; + RSTRING(str)->capa = (uint32_t)len; /* :FIXME: @shyouhei guesses `len + termlen` is guaranteed to never * integer overflow. If we can STATIC_ASSERT that, the following * mul_add_mul can be reverted to a simple ALLOC_N. */ - RSTRING(str)->as.heap.ptr = + RSTRING(str)->as.ptr = rb_xmalloc_mul_add_mul(sizeof(char), len, sizeof(char), termlen); } @@ -1159,9 +1163,9 @@ str_new_static(VALUE klass, const char *ptr, long len, int encindex) else { RUBY_DTRACE_CREATE_HOOK(STRING, len); str = str_alloc_heap(klass); - RSTRING(str)->len = len; - RSTRING(str)->as.heap.ptr = (char *)ptr; - RSTRING(str)->as.heap.aux.capa = len; + RSTRING(str)->len = (uint32_t)len; + RSTRING(str)->as.ptr = (char *)ptr; + RSTRING(str)->capa = (uint32_t)len; RBASIC(str)->flags |= STR_NOFREE; rb_enc_associate_index(str, encindex); } @@ -1441,7 +1445,7 @@ str_replace_shared_without_enc(VALUE str2, VALUE str) RSTRING_GETMEM(str, ptr, len); if (str_embed_capa(str2) >= len + termlen) { - char *ptr2 = RSTRING(str2)->as.embed.ary; + char *ptr2 = RSTRING(str2)->as.ary; STR_SET_EMBED(str2); memcpy(ptr2, RSTRING_PTR(str), len); TERM_FILL(ptr2+len, termlen); @@ -1449,7 +1453,7 @@ str_replace_shared_without_enc(VALUE str2, VALUE str) else { VALUE root; if (STR_SHARED_P(str)) { - root = RSTRING(str)->as.heap.aux.shared; + root = RSTRING(str)->as.shared; RSTRING_GETMEM(str, ptr, len); } else { @@ -1468,7 +1472,7 @@ str_replace_shared_without_enc(VALUE str2, VALUE str) } } FL_SET(str2, STR_NOEMBED); - RSTRING(str2)->as.heap.ptr = ptr; + RSTRING(str2)->as.ptr = ptr; STR_SET_SHARED(str2, root); } @@ -1530,7 +1534,7 @@ VALUE rb_str_tmp_frozen_no_embed_acquire(VALUE orig) { if (OBJ_FROZEN_RAW(orig) && !STR_EMBED_P(orig) && !rb_str_reembeddable_p(orig)) return orig; - if (STR_SHARED_P(orig) && !STR_EMBED_P(RSTRING(orig)->as.heap.aux.shared)) return rb_str_tmp_frozen_acquire(orig); + if (STR_SHARED_P(orig) && !STR_EMBED_P(RSTRING(orig)->as.shared)) return rb_str_tmp_frozen_acquire(orig); VALUE str = str_alloc_heap(0); OBJ_FREEZE(str); @@ -1544,13 +1548,13 @@ rb_str_tmp_frozen_no_embed_acquire(VALUE orig) * embedded, so we want to create a copy. If the string is a shared root * then it must be embedded, so we want to create a copy. */ if (STR_EMBED_P(orig) || FL_TEST_RAW(orig, STR_SHARED | STR_SHARED_ROOT | RSTRING_FSTR)) { - RSTRING(str)->as.heap.ptr = rb_xmalloc_mul_add_mul(sizeof(char), capa, sizeof(char), TERM_LEN(orig)); - memcpy(RSTRING(str)->as.heap.ptr, RSTRING_PTR(orig), capa); + RSTRING(str)->as.ptr = rb_xmalloc_mul_add_mul(sizeof(char), capa, sizeof(char), TERM_LEN(orig)); + memcpy(RSTRING(str)->as.ptr, RSTRING_PTR(orig), capa); } else { /* orig must be heap allocated and not shared, so we can safely transfer * the pointer to str. */ - RSTRING(str)->as.heap.ptr = RSTRING(orig)->as.heap.ptr; + RSTRING(str)->as.ptr = RSTRING(orig)->as.ptr; RBASIC(str)->flags |= RBASIC(orig)->flags & STR_NOFREE; RBASIC(orig)->flags &= ~STR_NOFREE; STR_SET_SHARED(orig, str); @@ -1561,7 +1565,7 @@ rb_str_tmp_frozen_no_embed_acquire(VALUE orig) } RSTRING(str)->len = RSTRING(orig)->len; - RSTRING(str)->as.heap.aux.capa = capa + (TERM_LEN(orig) - TERM_LEN(str)); + RSTRING(str)->capa = (uint32_t)(capa + (TERM_LEN(orig) - TERM_LEN(str))); return str; } @@ -1577,15 +1581,15 @@ rb_str_tmp_frozen_release(VALUE orig, VALUE tmp) } else if (FL_TEST_RAW(orig, STR_SHARED | STR_TMPLOCK) == STR_TMPLOCK && !OBJ_FROZEN_RAW(orig)) { - VALUE shared = RSTRING(orig)->as.heap.aux.shared; + VALUE shared = RSTRING(orig)->as.shared; if (shared == tmp && !FL_TEST_RAW(tmp, STR_BORROWED)) { - RUBY_ASSERT(RSTRING(orig)->as.heap.ptr == RSTRING(tmp)->as.heap.ptr); + RUBY_ASSERT(RSTRING_PTR(orig) == RSTRING(tmp)->as.ptr); RUBY_ASSERT(RSTRING_LEN(orig) == RSTRING_LEN(tmp)); /* Unshare orig since the root (tmp) only has this one child. */ FL_UNSET_RAW(orig, STR_SHARED); - RSTRING(orig)->as.heap.aux.capa = RSTRING(tmp)->as.heap.aux.capa; + RSTRING(orig)->capa = RSTRING(tmp)->capa; RBASIC(orig)->flags |= RBASIC(tmp)->flags & STR_NOFREE; RUBY_ASSERT(OBJ_FROZEN_RAW(tmp)); @@ -1611,8 +1615,8 @@ heap_str_make_shared(VALUE klass, VALUE orig) VALUE str = str_alloc_heap(klass); STR_SET_LEN(str, RSTRING_LEN(orig)); - RSTRING(str)->as.heap.ptr = RSTRING_PTR(orig); - RSTRING(str)->as.heap.aux.capa = RSTRING(orig)->as.heap.aux.capa; + RSTRING(str)->as.ptr = RSTRING_PTR(orig); + RSTRING(str)->capa = RSTRING(orig)->capa; RBASIC(str)->flags |= RBASIC(orig)->flags & STR_NOFREE; RBASIC(orig)->flags &= ~STR_NOFREE; STR_SET_SHARED(orig, str); @@ -1636,8 +1640,8 @@ str_new_frozen_buffer(VALUE klass, VALUE orig, int copy_encoding) } else { if (FL_TEST_RAW(orig, STR_SHARED)) { - VALUE shared = RSTRING(orig)->as.heap.aux.shared; - long ofs = RSTRING(orig)->as.heap.ptr - RSTRING_PTR(shared); + VALUE shared = RSTRING(orig)->as.shared; + long ofs = (long)RSTRING(orig)->capa; long rest = RSTRING_LEN(shared) - ofs - RSTRING_LEN(orig); RUBY_ASSERT(ofs >= 0); RUBY_ASSERT(rest >= 0); @@ -1649,7 +1653,7 @@ str_new_frozen_buffer(VALUE klass, VALUE orig, int copy_encoding) ENCODING_GET(shared) != ENCODING_GET(orig)) { str = str_new_shared(klass, shared); RUBY_ASSERT(!STR_EMBED_P(str)); - RSTRING(str)->as.heap.ptr += ofs; + RSTRING(str)->capa += (uint32_t)ofs; STR_SET_LEN(str, RSTRING_LEN(str) - (ofs + rest)); } else { @@ -1706,9 +1710,9 @@ rb_str_buf_new(long capa) VALUE str = str_alloc_heap(rb_cString); - RSTRING(str)->as.heap.aux.capa = capa; - RSTRING(str)->as.heap.ptr = ALLOC_N(char, (size_t)capa + 1); - RSTRING(str)->as.heap.ptr[0] = '\0'; + RSTRING(str)->capa = (uint32_t)capa; + RSTRING(str)->as.ptr = ALLOC_N(char, (size_t)capa + 1); + RSTRING(str)->as.ptr[0] = '\0'; return str; } @@ -1801,23 +1805,23 @@ str_shared_replace(VALUE str, VALUE str2) RUBY_ASSERT(len + termlen <= str_embed_capa(str2)); char *new_ptr = ALLOC_N(char, len + termlen); - memcpy(new_ptr, RSTRING(str2)->as.embed.ary, len + termlen); - RSTRING(str2)->as.heap.ptr = new_ptr; + memcpy(new_ptr, RSTRING(str2)->as.ary, len + termlen); + RSTRING(str2)->as.ptr = new_ptr; STR_SET_LEN(str2, len); - RSTRING(str2)->as.heap.aux.capa = len; + RSTRING(str2)->capa = (uint32_t)len; STR_SET_NOEMBED(str2); } STR_SET_NOEMBED(str); FL_UNSET(str, STR_SHARED); - RSTRING(str)->as.heap.ptr = RSTRING_PTR(str2); + RSTRING(str)->as.ptr = RSTRING_PTR(str2); if (FL_TEST(str2, STR_SHARED)) { - VALUE shared = RSTRING(str2)->as.heap.aux.shared; + VALUE shared = RSTRING(str2)->as.shared; STR_SET_SHARED(str, shared); } else { - RSTRING(str)->as.heap.aux.capa = RSTRING(str2)->as.heap.aux.capa; + RSTRING(str)->capa = RSTRING(str2)->capa; } /* abandon str2 */ @@ -1856,11 +1860,11 @@ str_replace(VALUE str, VALUE str2) len = RSTRING_LEN(str2); if (STR_SHARED_P(str2)) { - VALUE shared = RSTRING(str2)->as.heap.aux.shared; + VALUE shared = RSTRING(str2)->as.shared; RUBY_ASSERT(OBJ_FROZEN(shared)); STR_SET_NOEMBED(str); STR_SET_LEN(str, len); - RSTRING(str)->as.heap.ptr = RSTRING_PTR(str2); + RSTRING(str)->as.ptr = RSTRING_PTR(str2); STR_SET_SHARED(str, shared); rb_enc_cr_str_exact_copy(str, str2); } @@ -1892,8 +1896,8 @@ ec_str_alloc_heap(struct rb_execution_context_struct *ec, VALUE klass) NEWOBJ_OF(str, struct RString, klass, T_STRING | STR_NOEMBED | (RGENGC_WB_PROTECTED_STRING ? FL_WB_PROTECTED : 0), sizeof(struct RString), ec); - str->as.heap.aux.capa = 0; - str->as.heap.ptr = NULL; + str->capa = 0; + str->as.ptr = NULL; return (VALUE)str; } @@ -1921,7 +1925,7 @@ str_duplicate_setup_embed(VALUE klass, VALUE str, VALUE dup) RUBY_ASSERT(STR_EMBED_P(dup)); RUBY_ASSERT(str_embed_capa(dup) >= len + TERM_LEN(str)); - MEMCPY(RSTRING(dup)->as.embed.ary, RSTRING(str)->as.embed.ary, char, len + TERM_LEN(str)); + MEMCPY(RSTRING(dup)->as.ary, RSTRING(str)->as.ary, char, len + TERM_LEN(str)); STR_SET_LEN(dup, RSTRING_LEN(str)); return str_duplicate_setup_encoding(str, dup, flags); } @@ -1932,7 +1936,7 @@ str_duplicate_setup_heap(VALUE klass, VALUE str, VALUE dup) VALUE flags = FL_TEST_RAW(str, flag_mask); VALUE root = str; if (FL_TEST_RAW(str, STR_SHARED)) { - root = RSTRING(str)->as.heap.aux.shared; + root = RSTRING(str)->as.shared; } else if (UNLIKELY(!OBJ_FROZEN_RAW(str))) { root = str = str_new_frozen(klass, str); @@ -1941,7 +1945,7 @@ str_duplicate_setup_heap(VALUE klass, VALUE str, VALUE dup) RUBY_ASSERT(!STR_SHARED_P(root)); RUBY_ASSERT(RB_OBJ_FROZEN_RAW(root)); - RSTRING(dup)->as.heap.ptr = RSTRING_PTR(str); + RSTRING(dup)->as.ptr = RSTRING_PTR(str); FL_SET_RAW(dup, RSTRING_NOEMBED); STR_SET_SHARED(dup, root); flags |= RSTRING_NOEMBED | STR_SHARED; @@ -2092,20 +2096,20 @@ rb_str_init(int argc, VALUE *argv, VALUE str) if (STR_EMBED_P(str)) RUBY_ASSERT((long)osize <= str_embed_capa(str)); memcpy(new_ptr, old_ptr, osize < size ? osize : size); FL_UNSET_RAW(str, STR_SHARED|STR_NOFREE); - RSTRING(str)->as.heap.ptr = new_ptr; + RSTRING(str)->as.ptr = new_ptr; } else if (STR_HEAP_SIZE(str) != (size_t)capa + termlen) { - SIZED_REALLOC_N(RSTRING(str)->as.heap.ptr, char, + SIZED_REALLOC_N(RSTRING(str)->as.ptr, char, (size_t)capa + termlen, STR_HEAP_SIZE(str)); } STR_SET_LEN(str, len); - TERM_FILL(&RSTRING(str)->as.heap.ptr[len], termlen); + TERM_FILL(&RSTRING(str)->as.ptr[len], termlen); if (n == 1) { - memcpy(RSTRING(str)->as.heap.ptr, RSTRING_PTR(orig), len); + memcpy(RSTRING(str)->as.ptr, RSTRING_PTR(orig), len); rb_enc_cr_str_exact_copy(str, orig); } FL_SET(str, STR_NOEMBED); - RSTRING(str)->as.heap.aux.capa = capa; + RSTRING(str)->capa = (uint32_t)capa; } else if (n == 1) { rb_str_replace(str, orig); @@ -2566,8 +2570,8 @@ rb_str_times(VALUE str, VALUE times) } else { str2 = str_alloc_heap(rb_cString); - RSTRING(str2)->as.heap.aux.capa = len; - RSTRING(str2)->as.heap.ptr = ZALLOC_N(char, (size_t)len + 1); + RSTRING(str2)->capa = (uint32_t)len; + RSTRING(str2)->as.ptr = ZALLOC_N(char, (size_t)len + 1); } STR_SET_LEN(str2, len); rb_enc_copy(str2, str); @@ -2691,10 +2695,10 @@ str_make_independent_expand(VALUE str, long len, long expand, const int termlen) if (len > capa) len = capa; if (!STR_EMBED_P(str) && str_embed_capa(str) >= capa + termlen) { - ptr = RSTRING(str)->as.heap.ptr; + ptr = RSTRING_PTR(str); STR_SET_EMBED(str); - memcpy(RSTRING(str)->as.embed.ary, ptr, len); - TERM_FILL(RSTRING(str)->as.embed.ary + len, termlen); + memcpy(RSTRING(str)->as.ary, ptr, len); + TERM_FILL(RSTRING(str)->as.ary + len, termlen); STR_SET_LEN(str, len); return; } @@ -2710,9 +2714,9 @@ str_make_independent_expand(VALUE str, long len, long expand, const int termlen) STR_SET_NOEMBED(str); FL_UNSET(str, STR_SHARED|STR_NOFREE); TERM_FILL(ptr + len, termlen); - RSTRING(str)->as.heap.ptr = ptr; + RSTRING(str)->as.ptr = ptr; STR_SET_LEN(str, len); - RSTRING(str)->as.heap.aux.capa = capa; + RSTRING(str)->capa = (uint32_t)capa; } void @@ -2764,7 +2768,7 @@ str_discard(VALUE str) str_modifiable(str); if (!STR_EMBED_P(str) && !FL_TEST(str, STR_SHARED|STR_NOFREE)) { SIZED_FREE_N(STR_HEAP_PTR(str), STR_HEAP_SIZE(str)); - RSTRING(str)->as.heap.ptr = 0; + RSTRING(str)->as.ptr = 0; STR_SET_LEN(str, 0); } } @@ -2864,7 +2868,7 @@ rb_str_change_terminator_length(VALUE str, const int oldtermlen, const int terml if (!STR_EMBED_P(str)) { /* modify capa instead of realloc */ RUBY_ASSERT(!FL_TEST((str), STR_SHARED)); - RSTRING(str)->as.heap.aux.capa = capa - termlen; + RSTRING(str)->capa = (uint32_t)(capa - termlen); } if (termlen > oldtermlen) { TERM_FILL(RSTRING_PTR(str) + len, termlen); @@ -3144,7 +3148,7 @@ str_subseq(VALUE str, long beg, long len) str2 = str_alloc_heap(rb_cString); if (str_embed_capa(str2) >= len + termlen) { - char *ptr2 = RSTRING(str2)->as.embed.ary; + char *ptr2 = RSTRING(str2)->as.ary; STR_SET_EMBED(str2); memcpy(ptr2, RSTRING_PTR(str) + beg, len); TERM_FILL(ptr2+len, termlen); @@ -3156,7 +3160,7 @@ str_subseq(VALUE str, long beg, long len) str_replace_shared(str2, str); RUBY_ASSERT(!STR_EMBED_P(str2)); ENC_CODERANGE_CLEAR(str2); - RSTRING(str2)->as.heap.ptr += beg; + RSTRING(str2)->capa += (uint32_t)beg; if (RSTRING_LEN(str2) > len) { STR_SET_LEN(str2, len); } @@ -3470,18 +3474,18 @@ rb_str_resize(VALUE str, long len) if (len == slen) return str; if (str_embed_capa(str) >= len + termlen) { STR_SET_LEN(str, len); - TERM_FILL(RSTRING(str)->as.embed.ary + len, termlen); + TERM_FILL(RSTRING(str)->as.ary + len, termlen); return str; } str_make_independent_expand(str, slen, len - slen, termlen); } else if (str_embed_capa(str) >= len + termlen) { - capa = RSTRING(str)->as.heap.aux.capa; - char *ptr = STR_HEAP_PTR(str); + capa = RSTRING(str)->capa; + char *ptr = RSTRING_PTR(str); STR_SET_EMBED(str); if (slen > len) slen = len; - if (slen > 0) MEMCPY(RSTRING(str)->as.embed.ary, ptr, char, slen); - TERM_FILL(RSTRING(str)->as.embed.ary + len, termlen); + if (slen > 0) MEMCPY(RSTRING(str)->as.ary, ptr, char, slen); + TERM_FILL(RSTRING(str)->as.ary + len, termlen); STR_SET_LEN(str, len); if (independent) { SIZED_FREE_N(ptr, capa + termlen); @@ -3492,15 +3496,15 @@ rb_str_resize(VALUE str, long len) if (len == slen) return str; str_make_independent_expand(str, slen, len - slen, termlen); } - else if ((capa = RSTRING(str)->as.heap.aux.capa) < len || + else if ((capa = RSTRING(str)->capa) < len || (capa - len) > (len < 1024 ? len : 1024)) { - SIZED_REALLOC_N(RSTRING(str)->as.heap.ptr, char, + SIZED_REALLOC_N(RSTRING(str)->as.ptr, char, (size_t)len + termlen, STR_HEAP_SIZE(str)); - RSTRING(str)->as.heap.aux.capa = len; + RSTRING(str)->capa = (uint32_t)len; } else if (len == slen) return str; STR_SET_LEN(str, len); - TERM_FILL(RSTRING(str)->as.heap.ptr + len, termlen); /* sentinel */ + TERM_FILL(RSTRING(str)->as.ptr + len, termlen); /* sentinel */ } return str; } @@ -5770,10 +5774,10 @@ rb_str_drop_bytes(VALUE str, long len) nlen = olen - len; if (str_embed_capa(str) >= nlen + TERM_LEN(str)) { char *oldptr = ptr; - size_t old_capa = RSTRING(str)->as.heap.aux.capa + TERM_LEN(str); + size_t old_capa = RSTRING(str)->capa + TERM_LEN(str); int fl = (int)(RBASIC(str)->flags & (STR_NOEMBED|STR_SHARED|STR_NOFREE)); STR_SET_EMBED(str); - ptr = RSTRING(str)->as.embed.ary; + ptr = RSTRING(str)->as.ary; memmove(ptr, oldptr + len, nlen); if (fl == STR_NOEMBED) { SIZED_FREE_N(oldptr, old_capa); @@ -5785,7 +5789,8 @@ rb_str_drop_bytes(VALUE str, long len) rb_enc_cr_str_exact_copy(shared, str); OBJ_FREEZE(shared); } - ptr = RSTRING(str)->as.heap.ptr += len; + RSTRING(str)->capa += (uint32_t)len; + ptr = RSTRING_PTR(str); } STR_SET_LEN(str, nlen); @@ -8471,10 +8476,10 @@ tr_trans(VALUE str, VALUE src, VALUE repl, int sflag) SIZED_FREE_N(STR_HEAP_PTR(str), STR_HEAP_SIZE(str)); } TERM_FILL((char *)t, termlen); - RSTRING(str)->as.heap.ptr = (char *)buf; + RSTRING(str)->as.ptr = (char *)buf; STR_SET_LEN(str, t - buf); STR_SET_NOEMBED(str); - RSTRING(str)->as.heap.aux.capa = max; + RSTRING(str)->capa = (uint32_t)max; } else if (rb_enc_mbmaxlen(enc) == 1 || (singlebyte && !hash)) { while (s < send) { @@ -8555,10 +8560,10 @@ tr_trans(VALUE str, VALUE src, VALUE repl, int sflag) SIZED_FREE_N(STR_HEAP_PTR(str), STR_HEAP_SIZE(str)); } TERM_FILL((char *)t, termlen); - RSTRING(str)->as.heap.ptr = (char *)buf; + RSTRING(str)->as.ptr = (char *)buf; STR_SET_LEN(str, t - buf); STR_SET_NOEMBED(str); - RSTRING(str)->as.heap.aux.capa = max; + RSTRING(str)->capa = (uint32_t)max; } if (modify) { diff --git a/test/-ext-/string/test_capacity.rb b/test/-ext-/string/test_capacity.rb index df000f7cdb8103..80a2d5db91cd45 100644 --- a/test/-ext-/string/test_capacity.rb +++ b/test/-ext-/string/test_capacity.rb @@ -5,13 +5,13 @@ class Test_StringCapacity < Test::Unit::TestCase def test_capacity_embedded - assert_equal GC::INTERNAL_CONSTANTS[:BASE_SLOT_SIZE] - embed_header_size - 1, capa('foo') + assert_equal GC::INTERNAL_CONSTANTS[:RVALUE_SIZE] - embed_header_size - 1, capa('foo') assert_equal max_embed_len, capa('1' * max_embed_len) assert_equal max_embed_len, capa('1' * (max_embed_len - 1)) end def test_capacity_shared - sym = ("a" * GC::INTERNAL_CONSTANTS[:BASE_SLOT_SIZE]).to_sym + sym = ("a" * GC::INTERNAL_CONSTANTS[:RVALUE_SIZE]).to_sym assert_equal 0, capa(sym.to_s) end diff --git a/test/-ext-/string/test_set_len.rb b/test/-ext-/string/test_set_len.rb index 1531d76167c35c..a18bbbc70cd237 100644 --- a/test/-ext-/string/test_set_len.rb +++ b/test/-ext-/string/test_set_len.rb @@ -5,7 +5,7 @@ class Test_StrSetLen < Test::Unit::TestCase def setup # Make string long enough so that it is not embedded - @range_end = ("0".ord + GC::INTERNAL_CONSTANTS[:BASE_SLOT_SIZE]).chr + @range_end = ("0".ord + GC::INTERNAL_CONSTANTS[:RVALUE_SIZE]).chr @s0 = [*"0"..@range_end].join("").freeze @s1 = Bug::String.new(@s0) end diff --git a/test/objspace/test_objspace.rb b/test/objspace/test_objspace.rb index d631f97d1bcad8..a68d4298ddd9b5 100644 --- a/test/objspace/test_objspace.rb +++ b/test/objspace/test_objspace.rb @@ -33,8 +33,8 @@ def test_memsize_of_root_shared_string b = a.dup c = nil ObjectSpace.each_object(String) {|x| break c = x if a == x and x.frozen?} - rv_size = GC::INTERNAL_CONSTANTS[:BASE_SLOT_SIZE] - assert_equal([rv_size, rv_size, a.length + 1 + rv_size], [a, b, c].map {|x| ObjectSpace.memsize_of(x)}) + str_slot_size = ObjectSpace.memsize_of(b) + assert_equal([str_slot_size, str_slot_size, a.length + 1 + str_slot_size], [a, b, c].map {|x| ObjectSpace.memsize_of(x)}) end def test_argf_memsize @@ -473,12 +473,12 @@ def test_dump_object assert_include(info, '"embedded":true') assert_include(info, '"ivars":0') - # Non-embed object + # Non-embed object (needs > 6 ivars to exceed pool 0 embed capacity) obj = klass.new - 5.times { |i| obj.instance_variable_set("@ivar#{i}", 0) } + 7.times { |i| obj.instance_variable_set("@ivar#{i}", 0) } info = ObjectSpace.dump(obj) assert_not_include(info, '"embedded":true') - assert_include(info, '"ivars":5') + assert_include(info, '"ivars":7') end def test_dump_control_char diff --git a/test/ruby/test_file_exhaustive.rb b/test/ruby/test_file_exhaustive.rb index be9e6bd44e702d..3f9a71253331e7 100644 --- a/test/ruby/test_file_exhaustive.rb +++ b/test/ruby/test_file_exhaustive.rb @@ -897,10 +897,10 @@ def test_expand_path_memsize bug9934 = '[ruby-core:63114] [Bug #9934]' require "objspace" path = File.expand_path("/foo") - assert_operator(ObjectSpace.memsize_of(path), :<=, path.bytesize + GC::INTERNAL_CONSTANTS[:BASE_SLOT_SIZE], bug9934) + assert_operator(ObjectSpace.memsize_of(path), :<=, path.bytesize + GC::INTERNAL_CONSTANTS[:RVALUE_SIZE], bug9934) path = File.expand_path("/a"*25) assert_operator(ObjectSpace.memsize_of(path), :<=, - (path.bytesize + 1) * 2 + GC::INTERNAL_CONSTANTS[:BASE_SLOT_SIZE], bug9934) + (path.bytesize + 1) * 2 + GC::INTERNAL_CONSTANTS[:RVALUE_SIZE], bug9934) end def test_expand_path_encoding diff --git a/test/ruby/test_gc_compact.rb b/test/ruby/test_gc_compact.rb index f3da8e4e138432..2964a67710657e 100644 --- a/test/ruby/test_gc_compact.rb +++ b/test/ruby/test_gc_compact.rb @@ -315,7 +315,7 @@ def test_moving_arrays_up_heaps GC.verify_compaction_references(expand_heap: true, toward: :empty) Fiber.new { - ary = "hello".chars + ary = "hello world".chars # > 6 elements to exceed pool 0 embed capacity $arys = ARY_COUNT.times.map do x = [] ary.each { |e| x << e } diff --git a/test/ruby/test_object.rb b/test/ruby/test_object.rb index f4dfe2251b884f..e64789d3c410f6 100644 --- a/test/ruby/test_object.rb +++ b/test/ruby/test_object.rb @@ -371,12 +371,17 @@ def initialize o1 = c.new o2 = c.new - o1.instance_variable_set(:@foo, 5) + # Add enough ivars to exceed pool 0 embed capacity (6 on 64-bit) + o1.instance_variable_set(:@d, 3) + o1.instance_variable_set(:@e, 4) + o1.instance_variable_set(:@f, 5) + o1.instance_variable_set(:@foo, 6) o1.instance_variable_set(:@a, 0) o1.instance_variable_set(:@b, 1) o1.instance_variable_set(:@c, 2) refute_includes ObjectSpace.dump(o1), '"embedded":true' o1.remove_instance_variable(:@foo) + o1.remove_instance_variable(:@f) assert_includes ObjectSpace.dump(o1), '"embedded":true' o2.instance_variable_set(:@a, 0) diff --git a/test/ruby/test_optimization.rb b/test/ruby/test_optimization.rb index 5d16984eeff883..08c3531f635853 100644 --- a/test/ruby/test_optimization.rb +++ b/test/ruby/test_optimization.rb @@ -262,7 +262,7 @@ def test_string_freeze_saves_memory 'IO buffer NOT resized prematurely because will likely be reused' s.freeze - assert_equal ObjectSpace.memsize_of(data), ObjectSpace.memsize_of(s), + assert_operator ObjectSpace.memsize_of(s), :<=, ObjectSpace.memsize_of(data), 'buffer resized on freeze since it cannot be written to again' ensure r.close if r diff --git a/test/ruby/test_time.rb b/test/ruby/test_time.rb index 333edb80218a64..80b637d433ee56 100644 --- a/test/ruby/test_time.rb +++ b/test/ruby/test_time.rb @@ -1433,7 +1433,10 @@ def test_memsize RbConfig::SIZEOF["void*"] # Same size as VALUE end sizeof_vtm = RbConfig::SIZEOF["void*"] * 4 + 8 - expect = GC::INTERNAL_CONSTANTS[:BASE_SLOT_SIZE] + sizeof_timew + sizeof_vtm + data_size = GC::INTERNAL_CONSTANTS[:BASE_SLOT_SIZE] + sizeof_timew + sizeof_vtm + # Round up to the next slot size (pools are powers of 2) + expect = GC::INTERNAL_CONSTANTS[:BASE_SLOT_SIZE] + expect <<= 1 while expect < data_size assert_operator ObjectSpace.memsize_of(t), :<=, expect rescue LoadError => e omit "failed to load objspace: #{e.message}" diff --git a/yjit/src/codegen.rs b/yjit/src/codegen.rs index 2b9d3ca2560f3f..3c1fd820de8138 100644 --- a/yjit/src/codegen.rs +++ b/yjit/src/codegen.rs @@ -5977,7 +5977,7 @@ fn jit_rb_str_bytesize( asm_comment!(asm, "get string length"); let str_len_opnd = Opnd::mem( - std::os::raw::c_long::BITS as u8, + 32, // uint32_t len asm.load(recv), RUBY_OFFSET_RSTRING_LEN as i32, ); @@ -6150,7 +6150,7 @@ fn jit_rb_str_getbyte( asm_comment!(asm, "get string length"); let recv = asm.load(recv); let str_len_opnd = Opnd::mem( - std::os::raw::c_long::BITS as u8, + 32, // uint32_t len asm.load(recv), RUBY_OFFSET_RSTRING_LEN as i32, ); @@ -6278,7 +6278,7 @@ fn jit_rb_str_empty_p( asm_comment!(asm, "get string length"); let str_len_opnd = Opnd::mem( - std::os::raw::c_long::BITS as u8, + 32, // uint32_t len asm.load(recv_opnd), RUBY_OFFSET_RSTRING_LEN as i32, ); @@ -7352,6 +7352,12 @@ fn get_string_ptr(asm: &mut Assembler, string_reg: Opnd) -> Opnd { asm_comment!(asm, "get string pointer for embedded or heap"); let flags_opnd = Opnd::mem(VALUE_BITS, string_reg, RUBY_OFFSET_RBASIC_FLAGS); + + // Guard: shared strings store a VALUE in as.ptr, not a data pointer. + // Deopt and let the interpreter handle them. + asm.test(flags_opnd, (RUBY_ELTS_SHARED as u64).into()); + asm.jnz(Target::side_exit(Counter::opt_str_shared)); + asm.test(flags_opnd, (RSTRING_NOEMBED as u64).into()); let heap_ptr_opnd = asm.load(Opnd::mem( usize::BITS as u8, diff --git a/yjit/src/stats.rs b/yjit/src/stats.rs index 0b0926262fdce7..5dc82c8e719ddf 100644 --- a/yjit/src/stats.rs +++ b/yjit/src/stats.rs @@ -525,6 +525,8 @@ make_counters! { getbyte_idx_negative, getbyte_idx_out_of_bounds, + opt_str_shared, + splatkw_not_hash, splatkw_not_nil, diff --git a/zjit/src/codegen.rs b/zjit/src/codegen.rs index 0d7c70d5971dae..41ece32f0e5524 100644 --- a/zjit/src/codegen.rs +++ b/zjit/src/codegen.rs @@ -2972,6 +2972,7 @@ fn gen_string_concat(jit: &mut JITState, asm: &mut Assembler, strings: Vec } // Generate RSTRING_PTR +// TODO: add shared string guard — shared strings store a VALUE in as.ptr, not a data pointer fn get_string_ptr(asm: &mut Assembler, string: Opnd) -> Opnd { asm_comment!(asm, "get string pointer for embedded or heap"); let string = asm.load(string); diff --git a/zjit/src/cruby_methods.rs b/zjit/src/cruby_methods.rs index f44c33845cf63b..fc9c4110d23577 100644 --- a/zjit/src/cruby_methods.rs +++ b/zjit/src/cruby_methods.rs @@ -433,7 +433,7 @@ fn inline_string_bytesize(fun: &mut hir::Function, block: hir::BlockId, recv: hi recv, id: ID!(len), offset: RUBY_OFFSET_RSTRING_LEN as i32, - return_type: types::CInt64, + return_type: types::CUInt32, }); let result = fun.push_insn(block, hir::Insn::BoxFixnum { @@ -457,7 +457,7 @@ fn inline_string_getbyte(fun: &mut hir::Function, block: hir::BlockId, recv: hir recv, id: ID!(len), offset: RUBY_OFFSET_RSTRING_LEN as i32, - return_type: types::CInt64, + return_type: types::CUInt32, }); // TODO(max): Find a way to mark these guards as not needed for correctness... as in, once // the data dependency is gone (say, the StringGetbyte is elided), they can also be elided. @@ -484,7 +484,7 @@ fn inline_string_setbyte(fun: &mut hir::Function, block: hir::BlockId, recv: hir recv, id: ID!(len), offset: RUBY_OFFSET_RSTRING_LEN as i32, - return_type: types::CInt64, + return_type: types::CUInt32, }); let unboxed_index = fun.push_insn(block, hir::Insn::GuardLess { left: unboxed_index, right: len, state }); let zero = fun.push_insn(block, hir::Insn::Const { val: hir::Const::CInt64(0) }); @@ -506,7 +506,7 @@ fn inline_string_empty_p(fun: &mut hir::Function, block: hir::BlockId, recv: hir recv, id: ID!(len), offset: RUBY_OFFSET_RSTRING_LEN as i32, - return_type: types::CInt64, + return_type: types::CUInt32, }); let zero = fun.push_insn(block, hir::Insn::Const { val: hir::Const::CInt64(0) }); let is_zero = fun.push_insn(block, hir::Insn::IsBitEqual { left: len, right: zero });