Skip to content

Commit 698918f

Browse files
committed
Add thread-safe bounded string cache
Replaces unbounded cache with fixed 512-entry array using offset-based indexing. Provides 15% performance improvement while preventing memory growth and ensuring thread safety for concurrent reader usage.
1 parent edec975 commit 698918f

File tree

4 files changed

+81
-7
lines changed

4 files changed

+81
-7
lines changed

CHANGELOG.md

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -27,6 +27,10 @@
2727
Pointer format. For example, errors may now show "at offset 1234, path
2828
/city/names/en" or "at offset 1234, path /list/0/name" instead of just the
2929
underlying error message.
30+
- **PERFORMANCE**: Added bounded string interning optimization that provides
31+
~15% performance improvement for City lookups while maintaining thread safety
32+
for concurrent reader usage. Uses a fixed 512-entry cache with offset-based
33+
indexing to prevent unbounded memory growth.
3034

3135
## 2.0.0-beta.3 - 2025-02-16
3236

internal/decoder/data_decoder.go

Lines changed: 8 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -108,7 +108,8 @@ func (k Kind) IsScalar() bool {
108108
// DataDecoder is a decoder for the MMDB data section.
109109
// This is exported so mmdbdata package can use it, but still internal.
110110
type DataDecoder struct {
111-
buffer []byte
111+
stringCache *StringCache
112+
buffer []byte
112113
}
113114

114115
const (
@@ -118,7 +119,10 @@ const (
118119

119120
// NewDataDecoder creates a [DataDecoder].
120121
func NewDataDecoder(buffer []byte) DataDecoder {
121-
return DataDecoder{buffer: buffer}
122+
return DataDecoder{
123+
buffer: buffer,
124+
stringCache: NewStringCache(),
125+
}
122126
}
123127

124128
// Buffer returns the underlying buffer for direct access.
@@ -239,7 +243,8 @@ func (d *DataDecoder) DecodeString(size, offset uint) (string, uint, error) {
239243
}
240244

241245
newOffset := offset + size
242-
return string(d.buffer[offset:newOffset]), newOffset, nil
246+
value := d.stringCache.InternAt(offset, size, d.buffer)
247+
return value, newOffset, nil
243248
}
244249

245250
// DecodeUint16 decodes a 16-bit unsigned integer from the given offset.

internal/decoder/reflection.go

Lines changed: 3 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -672,9 +672,9 @@ func (d *ReflectionDecoder) decodeMap(
672672
elemType := mapType.Elem()
673673
var elemValue reflect.Value
674674
for range size {
675-
var key []byte
676675
var err error
677-
key, offset, err = d.DecodeKey(offset)
676+
677+
offset, err = d.decode(offset, keyValue, depth)
678678
if err != nil {
679679
return 0, err
680680
}
@@ -687,10 +687,9 @@ func (d *ReflectionDecoder) decodeMap(
687687

688688
offset, err = d.decode(offset, elemValue, depth)
689689
if err != nil {
690-
return 0, d.wrapErrorWithMapKey(err, string(key))
690+
return 0, d.wrapErrorWithMapKey(err, keyValue.String())
691691
}
692692

693-
keyValue.SetString(string(key)) // This uses the compiler optimization
694693
result.SetMapIndex(keyValue, elemValue)
695694
}
696695
return offset, nil

internal/decoder/string_cache.go

Lines changed: 66 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,66 @@
1+
package decoder
2+
3+
import "sync"
4+
5+
// StringCache provides bounded string interning using offset-based indexing.
6+
// Similar to encoding/json/v2's intern.go but uses offsets instead of hashing.
7+
// Thread-safe for concurrent use.
8+
type StringCache struct {
9+
// Fixed-size cache to prevent unbounded memory growth
10+
// Using 512 entries for 8KiB total memory footprint (512 * 16 bytes per string)
11+
cache [512]cacheEntry
12+
// RWMutex for thread safety - allows concurrent reads, exclusive writes
13+
mu sync.RWMutex
14+
}
15+
16+
type cacheEntry struct {
17+
str string
18+
offset uint
19+
}
20+
21+
// NewStringCache creates a new bounded string cache.
22+
func NewStringCache() *StringCache {
23+
return &StringCache{}
24+
}
25+
26+
// InternAt returns a canonical string for the data at the given offset and size.
27+
// Uses the offset modulo cache size as the index, similar to json/v2's approach.
28+
// Thread-safe for concurrent use.
29+
func (sc *StringCache) InternAt(offset, size uint, data []byte) string {
30+
const (
31+
minCachedLen = 2 // single byte strings not worth caching
32+
maxCachedLen = 100 // reasonable upper bound for geographic strings
33+
)
34+
35+
// Skip caching for very short or very long strings
36+
if size < minCachedLen || size > maxCachedLen {
37+
return string(data[offset : offset+size])
38+
}
39+
40+
// Use offset as cache index (modulo cache size)
41+
i := offset % uint(len(sc.cache))
42+
43+
// Fast path: check for cache hit with read lock
44+
sc.mu.RLock()
45+
entry := sc.cache[i]
46+
if entry.offset == offset && len(entry.str) == int(size) {
47+
str := entry.str
48+
sc.mu.RUnlock()
49+
return str
50+
}
51+
sc.mu.RUnlock()
52+
53+
// Cache miss - create new string and store with write lock
54+
str := string(data[offset : offset+size])
55+
56+
sc.mu.Lock()
57+
// Double-check in case another goroutine added it while we were waiting
58+
if sc.cache[i].offset == offset && len(sc.cache[i].str) == int(size) {
59+
str = sc.cache[i].str
60+
} else {
61+
sc.cache[i] = cacheEntry{offset: offset, str: str}
62+
}
63+
sc.mu.Unlock()
64+
65+
return str
66+
}

0 commit comments

Comments
 (0)