From f8bc6da8881cf2109e7eda6e1bec18fe9be90669 Mon Sep 17 00:00:00 2001 From: Jean Boussier Date: Tue, 12 Nov 2024 17:34:57 +0100 Subject: [PATCH] Unpacker: `key_cache` option. MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Heavily inspired from https://github.com/ruby/json/pull/675. When parsing documents with the same structure repeatedly, a lot of time can be saved by keeping a small cache of map keys encountered. Using the existing `bench/bench.rb`, comparing with and without the cache shows a 30% performance improvement: ``` Calculating ------------------------------------- unpack-pooled 960.380k (± 1.4%) i/s - 4.865M in 5.066600s unpack-key-cache 1.245M (± 1.6%) i/s - 6.232M in 5.009060s Comparison: unpack-pooled: 960379.8 i/s unpack-key-cache: 1244517.6 i/s - 1.30x (± 0.00) faster ``` However, on the same benchmark, but with the cache filled with other keys, the performance is notably degraded: ``` Calculating ------------------------------------- unpack-pooled 926.849k (± 2.1%) i/s - 4.639M in 5.007333s unpack-key-cache 822.266k (± 2.4%) i/s - 4.113M in 5.004645s Comparison: unpack-pooled: 926849.2 i/s unpack-key-cache: 822265.6 i/s - 1.13x (± 0.00) slower ``` So this feature is powerful but situational. --- doclib/msgpack/unpacker.rb | 2 + ext/msgpack/buffer.h | 127 +++++++++++++++++++++++++++++++++++ ext/msgpack/extconf.rb | 1 + ext/msgpack/unpacker.c | 45 +++++++++++-- ext/msgpack/unpacker.h | 16 +++-- ext/msgpack/unpacker_class.c | 5 ++ 6 files changed, 188 insertions(+), 8 deletions(-) diff --git a/doclib/msgpack/unpacker.rb b/doclib/msgpack/unpacker.rb index 06ef6ad2..ea4f0786 100644 --- a/doclib/msgpack/unpacker.rb +++ b/doclib/msgpack/unpacker.rb @@ -19,6 +19,8 @@ class Unpacker # Supported options: # # * *:symbolize_keys* deserialize keys of Hash objects as Symbol instead of String + # * *:freeze* freeze the deserialized objects. Can allow string deduplication and some allocation elision. + # * *:key_cache* Enable caching of map keys, this can improve performance significantly if the same map keys are frequently encountered, but also degrade performance if that's not the case. # * *:allow_unknown_ext* allow to deserialize ext type object with unknown type id as ExtensionValue instance. Otherwise (by default), unpacker throws UnknownExtTypeError. # # See also Buffer#initialize for other options. diff --git a/ext/msgpack/buffer.h b/ext/msgpack/buffer.h index 8340f0fb..c2ed46c4 100644 --- a/ext/msgpack/buffer.h +++ b/ext/msgpack/buffer.h @@ -473,4 +473,131 @@ static inline VALUE msgpack_buffer_read_top_as_symbol(msgpack_buffer_t* b, size_ return rb_str_intern(msgpack_buffer_read_top_as_string(b, length, true, utf8)); } +// Hash keys are likely to be repeated, and are frozen. +// As such we can re-use them if we keep a cache of the ones we've seen so far, +// and save much more expensive lookups into the global fstring table. +// This cache implementation is deliberately simple, as we're optimizing for compactness, +// to be able to fit easily embeded inside msgpack_unpacker_t. +// As such, binary search into a sorted array gives a good tradeoff between compactness and +// performance. +#define MSGPACK_KEY_CACHE_CAPACITY 63 + +typedef struct msgpack_key_cache_t msgpack_key_cache_t; +struct msgpack_key_cache_t { + int length; + VALUE entries[MSGPACK_KEY_CACHE_CAPACITY]; +}; + +static inline VALUE build_interned_string(const char *str, const long length) +{ +# ifdef HAVE_RB_ENC_INTERNED_STR + return rb_enc_interned_str(str, length, rb_utf8_encoding()); +# else + VALUE rstring = rb_utf8_str_new(str, length); + return rb_funcall(rb_str_freeze(rstring), s_uminus, 0); +# endif +} + +static inline VALUE build_symbol(const char *str, const long length) +{ + return rb_str_intern(build_interned_string(str, length)); +} + +static void rvalue_cache_insert_at(msgpack_key_cache_t *cache, int index, VALUE rstring) +{ + MEMMOVE(&cache->entries[index + 1], &cache->entries[index], VALUE, cache->length - index); + cache->length++; + cache->entries[index] = rstring; +} + +static inline int rstring_cache_cmp(const char *str, const long length, VALUE rstring) +{ + long rstring_length = RSTRING_LEN(rstring); + if (length == rstring_length) { + return memcmp(str, RSTRING_PTR(rstring), length); + } else { + return (int)(length - rstring_length); + } +} + +static VALUE rstring_cache_fetch(msgpack_key_cache_t *cache, const char *str, const long length) +{ + int low = 0; + int high = cache->length - 1; + int mid = 0; + int last_cmp = 0; + + while (low <= high) { + mid = (high + low) >> 1; + VALUE entry = cache->entries[mid]; + last_cmp = rstring_cache_cmp(str, length, entry); + + if (last_cmp == 0) { + return entry; + } else if (last_cmp > 0) { + low = mid + 1; + } else { + high = mid - 1; + } + } + + VALUE rstring = build_interned_string(str, length); + + if (cache->length < MSGPACK_KEY_CACHE_CAPACITY) { + if (last_cmp > 0) { + mid += 1; + } + + rvalue_cache_insert_at(cache, mid, rstring); + } + return rstring; +} + +static VALUE rsymbol_cache_fetch(msgpack_key_cache_t *cache, const char *str, const long length) +{ + int low = 0; + int high = cache->length - 1; + int mid = 0; + int last_cmp = 0; + + while (low <= high) { + mid = (high + low) >> 1; + VALUE entry = cache->entries[mid]; + last_cmp = rstring_cache_cmp(str, length, rb_sym2str(entry)); + + if (last_cmp == 0) { + return entry; + } else if (last_cmp > 0) { + low = mid + 1; + } else { + high = mid - 1; + } + } + + VALUE rsymbol = build_symbol(str, length); + + if (cache->length < MSGPACK_KEY_CACHE_CAPACITY) { + if (last_cmp > 0) { + mid += 1; + } + + rvalue_cache_insert_at(cache, mid, rsymbol); + } + return rsymbol; +} + +static inline VALUE msgpack_buffer_read_top_as_interned_symbol(msgpack_buffer_t* b, msgpack_key_cache_t *cache, size_t length) +{ + VALUE result = rsymbol_cache_fetch(cache, b->read_buffer, length); + _msgpack_buffer_consumed(b, length); + return result; +} + +static inline VALUE msgpack_buffer_read_top_as_interned_string(msgpack_buffer_t* b, msgpack_key_cache_t *cache, size_t length) +{ + VALUE result = rstring_cache_fetch(cache, b->read_buffer, length); + _msgpack_buffer_consumed(b, length); + return result; +} + #endif diff --git a/ext/msgpack/extconf.rb b/ext/msgpack/extconf.rb index 800896ea..f6ff2d47 100644 --- a/ext/msgpack/extconf.rb +++ b/ext/msgpack/extconf.rb @@ -3,6 +3,7 @@ have_func("rb_enc_interned_str", "ruby.h") # Ruby 3.0+ have_func("rb_hash_new_capa", "ruby.h") # Ruby 3.2+ have_func("rb_proc_call_with_block", "ruby.h") # CRuby (TruffleRuby doesn't have it) +have_func("rb_gc_mark_locations", "ruby.h") # Missing on TruffleRuby append_cflags([ "-fvisibility=hidden", diff --git a/ext/msgpack/unpacker.c b/ext/msgpack/unpacker.c index 32f7e845..b31f60a3 100644 --- a/ext/msgpack/unpacker.c +++ b/ext/msgpack/unpacker.c @@ -26,6 +26,19 @@ #define rb_proc_call_with_block(recv, argc, argv, block) rb_funcallv(recv, rb_intern("call"), argc, argv) #endif +#ifndef HAVE_RB_GC_MARK_LOCATIONS +// For TruffleRuby +void rb_gc_mark_locations(const VALUE *start, const VALUE *end) +{ + VALUE *value = start; + + while (value < end) { + rb_gc_mark(*value); + value++; + } +} +#endif + struct protected_proc_call_args { VALUE proc; int argc; @@ -130,11 +143,18 @@ void msgpack_unpacker_mark_stack(msgpack_unpacker_stack_t* stack) } } +void msgpack_unpacker_mark_key_cache(msgpack_key_cache_t *cache) +{ + const VALUE *entries = &cache->entries[0]; + rb_gc_mark_locations(entries, entries + cache->length); +} + void msgpack_unpacker_mark(msgpack_unpacker_t* uk) { rb_gc_mark(uk->last_object); rb_gc_mark(uk->reading_raw); msgpack_unpacker_mark_stack(&uk->stack); + msgpack_unpacker_mark_key_cache(&uk->key_cache); /* See MessagePack_Buffer_wrap */ /* msgpack_buffer_mark(UNPACKER_BUFFER_(uk)); */ rb_gc_mark(uk->buffer_ref); @@ -374,15 +394,32 @@ static inline int read_raw_body_begin(msgpack_unpacker_t* uk, int raw_type) size_t length = uk->reading_raw_remaining; if(length <= msgpack_buffer_top_readable_size(UNPACKER_BUFFER_(uk))) { int ret; - if ((uk->optimized_symbol_ext_type && uk->symbol_ext_type == raw_type) || (uk->symbolize_keys && is_reading_map_key(uk))) { + if ((uk->optimized_symbol_ext_type && uk->symbol_ext_type == raw_type)) { VALUE symbol = msgpack_buffer_read_top_as_symbol(UNPACKER_BUFFER_(uk), length, raw_type != RAW_TYPE_BINARY); ret = object_complete_symbol(uk, symbol); + } else if (is_reading_map_key(uk) && raw_type == RAW_TYPE_STRING) { + /* don't use zerocopy for hash keys but get a frozen string directly + * because rb_hash_aset freezes keys and it causes copying */ + VALUE key; + if (uk->symbolize_keys) { + if (uk->use_key_cache) { + key = msgpack_buffer_read_top_as_interned_symbol(UNPACKER_BUFFER_(uk), &uk->key_cache, length); + } else { + key = msgpack_buffer_read_top_as_symbol(UNPACKER_BUFFER_(uk), length, true); + } + ret = object_complete_symbol(uk, key); + } else { + if (uk->use_key_cache) { + key = msgpack_buffer_read_top_as_interned_string(UNPACKER_BUFFER_(uk), &uk->key_cache, length); + } else { + key = msgpack_buffer_read_top_as_string(UNPACKER_BUFFER_(uk), length, true, true); + } + + ret = object_complete(uk, key); + } } else { bool will_freeze = uk->freeze; if(raw_type == RAW_TYPE_STRING || raw_type == RAW_TYPE_BINARY) { - /* don't use zerocopy for hash keys but get a frozen string directly - * because rb_hash_aset freezes keys and it causes copying */ - will_freeze = will_freeze || is_reading_map_key(uk); VALUE string = msgpack_buffer_read_top_as_string(UNPACKER_BUFFER_(uk), length, will_freeze, raw_type == RAW_TYPE_STRING); ret = object_complete(uk, string); } else { diff --git a/ext/msgpack/unpacker.h b/ext/msgpack/unpacker.h index 400e6610..10e13268 100644 --- a/ext/msgpack/unpacker.h +++ b/ext/msgpack/unpacker.h @@ -50,6 +50,7 @@ struct msgpack_unpacker_stack_t { struct msgpack_unpacker_t { msgpack_buffer_t buffer; msgpack_unpacker_stack_t stack; + msgpack_key_cache_t key_cache; VALUE self; VALUE last_object; @@ -66,10 +67,12 @@ struct msgpack_unpacker_t { /* options */ int symbol_ext_type; - bool symbolize_keys; - bool freeze; - bool allow_unknown_ext; - bool optimized_symbol_ext_type; + + bool use_key_cache: 1; + bool symbolize_keys: 1; + bool freeze: 1; + bool allow_unknown_ext: 1; + bool optimized_symbol_ext_type: 1; }; #define UNPACKER_BUFFER_(uk) (&(uk)->buffer) @@ -101,6 +104,11 @@ static inline void msgpack_unpacker_set_symbolized_keys(msgpack_unpacker_t* uk, uk->symbolize_keys = enable; } +static inline void msgpack_unpacker_set_key_cache(msgpack_unpacker_t* uk, bool enable) +{ + uk->use_key_cache = enable; +} + static inline void msgpack_unpacker_set_freeze(msgpack_unpacker_t* uk, bool enable) { uk->freeze = enable; diff --git a/ext/msgpack/unpacker_class.c b/ext/msgpack/unpacker_class.c index 429fc1af..c4f84699 100644 --- a/ext/msgpack/unpacker_class.c +++ b/ext/msgpack/unpacker_class.c @@ -34,6 +34,7 @@ static VALUE eUnknownExtTypeError; static VALUE mTypeError; // obsoleted. only for backward compatibility. See #86. static VALUE sym_symbolize_keys; +static VALUE sym_key_cache; static VALUE sym_freeze; static VALUE sym_allow_unknown_ext; @@ -128,6 +129,9 @@ VALUE MessagePack_Unpacker_initialize(int argc, VALUE* argv, VALUE self) if(options != Qnil) { VALUE v; + v = rb_hash_aref(options, sym_key_cache); + msgpack_unpacker_set_key_cache(uk, RTEST(v)); + v = rb_hash_aref(options, sym_symbolize_keys); msgpack_unpacker_set_symbolized_keys(uk, RTEST(v)); @@ -413,6 +417,7 @@ void MessagePack_Unpacker_module_init(VALUE mMessagePack) eUnknownExtTypeError = rb_define_class_under(mMessagePack, "UnknownExtTypeError", eUnpackError); sym_symbolize_keys = ID2SYM(rb_intern("symbolize_keys")); + sym_key_cache = ID2SYM(rb_intern("key_cache")); sym_freeze = ID2SYM(rb_intern("freeze")); sym_allow_unknown_ext = ID2SYM(rb_intern("allow_unknown_ext"));