From ede4e3c238e441e3180050142015296cfdb7fef5 Mon Sep 17 00:00:00 2001 From: Daria Korenieva Date: Wed, 29 Apr 2026 08:21:57 -0700 Subject: [PATCH 01/19] feat: add Valkey support as cache storage backend Implement Valkey as a first-class cache store backend using valkey-glide Go client. This enables Gorse users to use Valkey as an alternative to Redis for the cache layer. Changes: - Add valkey://, valkeys://, valkey+cluster://, valkeys+cluster:// URL prefix constants and config validation - Implement full cache.Database interface in storage/cache/valkey.go using valkey-glide v2 client (standalone + cluster modes) - Time series implemented via Sorted Set + Hash with Go-side bucket aggregation (no TimeSeries module dependency) - Score/document operations use valkey-search FT.* commands via CustomCommand - Add integration test suite embedding baseTestSuite - Add integration report with analysis and task breakdown Signed-off-by: Daria Korenieva --- VALKEY_INTEGRATION_REPORT.md | 292 +++++++++++ config/config.go | 4 + config/config.toml | 6 +- config/config_test.go | 16 + go.mod | 1 + go.sum | 2 + storage/cache/valkey.go | 961 +++++++++++++++++++++++++++++++++++ storage/cache/valkey_test.go | 227 +++++++++ storage/scheme.go | 38 +- 9 files changed, 1529 insertions(+), 18 deletions(-) create mode 100644 VALKEY_INTEGRATION_REPORT.md create mode 100644 storage/cache/valkey.go create mode 100644 storage/cache/valkey_test.go diff --git a/VALKEY_INTEGRATION_REPORT.md b/VALKEY_INTEGRATION_REPORT.md new file mode 100644 index 000000000..d8e9e6b17 --- /dev/null +++ b/VALKEY_INTEGRATION_REPORT.md @@ -0,0 +1,292 @@ +# Valkey Integration Report for Gorse Recommender System + +## 1. Why No Valkey for the Data Layer (`storage/data/Database`) + +The `data.Database` interface defines 25+ methods for users, items, and feedback with complex relational semantics: filtered pagination, category-based batch lookups, time-range queries, cross-entity joins, and streaming exports. Every existing backend (MySQL, PostgreSQL, ClickHouse, SQLite, MongoDB) handles these natively via SQL or document queries. No Redis-based data backend exists in this project. Implementing these patterns on Valkey would mean building an application-level query engine on top of key-value primitives — the engineering cost would be high and the result would be slower and more fragile than any existing backend. Valkey is a natural fit for the cache layer, not the data layer. + +--- + +## 2. Time Series Implementation in Valkey + +### 2.1 Context + +The `cache.Database` interface requires two time series methods: + +- **`AddTimeSeriesPoints(ctx, points []TimeSeriesPoint)`** — Ingests points with (Name, Timestamp, Value). Duplicate policy: last write wins. +- **`GetTimeSeriesPoints(ctx, name, begin, end, duration)`** — Retrieves points in a time range, aggregated into buckets of `duration` width, returning the last value per bucket. + +The existing Redis implementation uses Redis TimeSeries module commands (`TS.ADD` with `LAST` duplicate policy, `TS.RANGE` with `LAST` aggregator and `BucketDuration`). Valkey has no TimeSeries module, so we need an alternative. + +### 2.2 Approach A: Sorted Sets with Go-Side Aggregation (Recommended) + +**Design:** +- One sorted set per time series name: key = `{prefix}time_series_points:{name}` +- Score = timestamp in milliseconds (Unix epoch) +- Member = string-encoded float64 value (using `strconv.FormatFloat`) +- For duplicate timestamps (last-write-wins): use a composite member format `{timestamp_ms}` as the score and the value as the member. Since ZADD with the same member updates the score, we instead encode as member = `{timestamp_ms}` and store value in a parallel hash, OR use a simpler approach: member = `fmt.Sprintf("%d", timestamp_ms)` and value stored via a separate HSET. However, the simplest approach that matches the "LAST" duplicate policy is: + - Member = `fmt.Sprintf("%d", timestamp.UnixMilli())` (timestamp as string) + - Score = `timestamp.UnixMilli()` (float64) + - Value stored in a parallel hash: `HSET {prefix}time_series_values:{name} {timestamp_ms} {value}` + +**Actually, the cleanest approach** (matching how MongoDB does it in this codebase): +- Use a single sorted set per series name +- Member = `fmt.Sprintf("%d:%.17g", timestamp.UnixMilli(), value)` — encode both timestamp and value in the member +- Score = `float64(timestamp.UnixMilli())` +- For "last write wins" on duplicate timestamps: first `ZREMRANGEBYSCORE` the exact timestamp, then `ZADD` the new point. This can be done atomically in a pipeline. + +**Simpler alternative** (recommended for clarity): +- Use Hash + Sorted Set combination: + - Sorted Set key: `{prefix}ts_index:{name}` — score = timestamp_ms, member = timestamp_ms as string + - Hash key: `{prefix}ts_data:{name}` — field = timestamp_ms as string, value = float64 as string +- `AddTimeSeriesPoints`: Pipeline of ZADD (index) + HSET (data) per point. ZADD naturally handles duplicate timestamps (updates score), HSET naturally overwrites (last-write-wins). +- `GetTimeSeriesPoints`: ZRANGEBYSCORE to get timestamp members in range, HMGET to fetch values, then bucket aggregation in Go. + +**Go-side aggregation logic** (for `GetTimeSeriesPoints`): +```go +// 1. Fetch all timestamps in range from sorted set +// 2. Fetch corresponding values from hash +// 3. Group by bucket: bucket_key = (timestamp_ms / duration_ms) * duration_ms +// 4. For each bucket, take the last value (highest timestamp) +// 5. Return sorted results +``` + +This matches exactly how MongoDB implements it — MongoDB uses `$group` with `$floor($divide)` for bucketing and `$top` with `sortBy: {timestamp: -1}` for last-value selection. + +**Benefits:** +- Simple, well-understood data structures (sorted set + hash) +- Exact semantic match with Redis TimeSeries "LAST" duplicate policy +- Proven pattern — MongoDB implementation in this codebase does the same thing +- No external modules required — works with any Valkey version +- Efficient range queries via ZRANGEBYSCORE: O(log N + M) +- Natural duplicate handling via ZADD + HSET overwrite semantics + +**Downsides:** +- Aggregation happens in Go, not server-side — transfers more data over the network for large ranges +- Two keys per series (index + data) instead of one — slightly more memory overhead +- No built-in downsampling or retention policies (must be managed application-side) +- For very large time ranges with fine-grained data, the full range must be fetched before aggregation + +--- + +### 2.3 Approach B: Sorted Sets with Lua Script Aggregation (Server-Side) + +**Design:** +- Same storage layout as Approach A (sorted set + hash) +- Aggregation performed via a Lua script executed with `EVALSHA`/`InvokeScript` +- The Lua script does: ZRANGEBYSCORE, HGET for each member, bucket grouping, and returns aggregated results + +**Lua script pseudocode:** +```lua +local key_index = KEYS[1] -- sorted set key +local key_data = KEYS[2] -- hash key +local begin_ms = tonumber(ARGV[1]) +local end_ms = tonumber(ARGV[2]) +local bucket_ms = tonumber(ARGV[3]) + +local members = redis.call('ZRANGEBYSCORE', key_index, begin_ms, end_ms) +local buckets = {} +local bucket_order = {} + +for _, member in ipairs(members) do + local ts = tonumber(member) + local value = redis.call('HGET', key_data, member) + local bucket_key = math.floor(ts / bucket_ms) * bucket_ms + + if not buckets[bucket_key] or ts > buckets[bucket_key].ts then + if not buckets[bucket_key] then + table.insert(bucket_order, bucket_key) + end + buckets[bucket_key] = {ts = ts, value = value} + end +end + +table.sort(bucket_order) +local result = {} +for _, bk in ipairs(bucket_order) do + table.insert(result, tostring(bk)) + table.insert(result, buckets[bk].value) +end +return result +``` + +**Benefits:** +- Aggregation happens server-side — reduces network transfer for large datasets +- Atomic operation — no race conditions during read +- Single round-trip for the entire query +- valkey-glide supports `InvokeScript` for Lua execution + +**Downsides:** +- Lua scripts add complexity to maintain and debug +- Lua execution blocks the Valkey event loop — long-running aggregations on large datasets could cause latency spikes +- Script must be loaded/cached on each Valkey node (handled by glide's InvokeScript) +- Harder to unit test the aggregation logic independently +- Lua in Valkey has limited floating-point precision (uses double, but string conversion can lose precision) + +--- + +### 2.4 Approach C: Single Sorted Set with Encoded Members + +**Design:** +- One sorted set per series: key = `{prefix}ts:{name}` +- Score = `float64(timestamp.UnixMilli())` +- Member = `fmt.Sprintf("%.17g", value)` — just the value +- For duplicate timestamps: ZADD with GT flag won't work (we want last-write-wins, not max). Instead, remove old entry at same score first, then add new one. + +**Problem:** Sorted sets don't support multiple members with the same score cleanly for "replace" semantics. If two different values have the same timestamp, ZADD treats them as different members. You'd need ZREMRANGEBYSCORE + ZADD in a pipeline, which is what Approach A avoids by using a hash for values. + +**Benefits:** +- Single key per series (simpler key space) +- Slightly less memory than two-key approach + +**Downsides:** +- Duplicate timestamp handling is awkward and error-prone +- Can't efficiently update a value at an existing timestamp without removing all entries at that score +- Member = value means you can't have the same value at different timestamps (sorted set members are unique) +- **This approach has fundamental correctness issues** — not recommended + +--- + +### 2.5 Approach D: Hash-Only with Go-Side Sorting and Aggregation + +**Design:** +- Single hash per series: key = `{prefix}ts:{name}` +- Field = timestamp_ms as string, Value = float64 as string +- `AddTimeSeriesPoints`: HSET with field=timestamp, value=value (natural last-write-wins) +- `GetTimeSeriesPoints`: HGETALL, filter by range in Go, sort, bucket, aggregate + +**Benefits:** +- Simplest storage model — single key, single data structure +- Natural last-write-wins via HSET overwrite +- No sorted set overhead + +**Downsides:** +- HGETALL fetches ALL points for the series, not just the requested range — very inefficient for large series +- No server-side range filtering — all data must be transferred to client +- Sorting must be done in Go after fetching all data +- Memory usage grows unbounded without manual cleanup +- **Not viable for production workloads with large time series** + +--- + +## 3. Decision + +**Approach A (Sorted Sets + Hash with Go-Side Aggregation)** — confirmed. If benchmarks later show performance is not on par with Redis TimeSeries, Approach B (Lua script server-side aggregation) can be explored as an optimization. + +--- + +## 4. Client Library: valkey-glide Go v2 + +The implementation will use `github.com/valkey-io/valkey-glide/go/v2` — the official Valkey GLIDE client for Go. + +**Key API mappings for our implementation:** + +| Gorse Operation | valkey-glide API | +|---|---| +| Set key-value | `client.Set(ctx, key, value)` | +| Get key-value | `client.Get(ctx, key)` → `models.Result[string]` | +| Delete key | `client.Del(ctx, []string{key})` | +| Hash set | `client.HSet(ctx, key, map[string]string{...})` | +| Hash get all | `client.HGetAll(ctx, key)` → `map[string]string` | +| Hash get field | `client.HGet(ctx, key, field)` → `models.Result[string]` | +| Hash delete | `client.HDel(ctx, key, []string{...})` | +| Sorted set add | `client.ZAdd(ctx, key, map[string]float64{...})` | +| Sorted set range with scores | `client.ZRangeWithScores(ctx, key, query)` → `[]models.MemberAndScore` | +| Sorted set remove by score | `client.ZRemRangeByScore(ctx, key, rangeQuery)` | +| Sorted set pop min | `client.ZPopMin(ctx, key)` → `map[string]float64` | +| Sorted set cardinality | `client.ZCard(ctx, key)` | +| Scan keys | `client.Scan(ctx, cursor)` / `client.ScanWithOptions(ctx, cursor, opts)` | +| Pipeline/batch | `client.Exec(ctx, batch, raiseOnError)` with `pipeline.NewStandaloneBatch(false)` | +| Lua scripting | `client.InvokeScript(ctx, script)` / `client.InvokeScriptWithOptions(ctx, script, opts)` | +| Watch (optimistic locking) | `client.Watch(ctx, keys)` | +| Custom command (FT.*, etc.) | `client.CustomCommand(ctx, []string{...})` | +| Ping | `client.Ping(ctx)` | +| Close | `client.Close()` | + +**Connection setup:** +```go +import ( + glide "github.com/valkey-io/valkey-glide/go/v2" + "github.com/valkey-io/valkey-glide/go/v2/config" +) + +// Standalone +client, err := glide.NewClient(&config.ClientConfiguration{ + Addresses: []config.NodeAddress{{Host: host, Port: port}}, +}) + +// Cluster +client, err := glide.NewClusterClient(&config.ClusterClientConfiguration{ + Addresses: []config.NodeAddress{{Host: host, Port: port}}, +}) +``` + +**Key differences from go-redis (current Redis client):** +- No `ParseURL` — connection params must be parsed manually from the URL +- `ZAdd` takes `map[string]float64` instead of `[]redis.Z` +- `Get` returns `models.Result[string]` with `.Value()` and `.IsNil()` methods +- Pipeline uses `pipeline.NewStandaloneBatch(false)` + `client.Exec()` +- No direct FT.* (RediSearch) methods — must use `CustomCommand` for valkey-search +- Context is required for all operations (consistent with Go best practices) + +--- + +## 5. Implementation Plan + +### Phase 1: Foundation +1. Add Valkey URL prefix constants to `storage/scheme.go` +2. Update config validation in `config/config.go` +3. Update `config/config.toml` documentation + +### Phase 2: Core Implementation +1. Create `storage/cache/valkey.go` — full `cache.Database` implementation +2. Implement connection setup (standalone + cluster) using valkey-glide +3. Implement core operations: Set/Get/Delete, Push/Pop/Remain, Scan/Purge +4. Implement score operations using `CustomCommand` for valkey-search FT.* commands +5. Implement time series using Sorted Set + Hash with Go-side aggregation + +### Phase 3: Tests +1. Create `storage/cache/valkey_test.go` +2. Run existing `baseTestSuite` against Valkey + +### Phase 4: Benchmarks +1. Time series ingestion throughput comparison +2. Range query + aggregation latency comparison +3. Document results + +### Files to Create/Modify + +| File | Action | +|---|---| +| `storage/scheme.go` | Add Valkey prefix constants | +| `storage/cache/valkey.go` | New — full implementation | +| `storage/cache/valkey_test.go` | New — test suite | +| `config/config.go` | Add Valkey to cache_store validator | +| `config/config.toml` | Document Valkey support | +| `go.mod` | Add valkey-glide dependency | + + +--- + +## 6. Task Breakdown + +### Story: Add Valkey Support to Gorse Recommender System (Cache Layer) + +| # | Task | Description | Estimate | Status | +|---|---|---|---|---| +| 1 | Initial Valkey integration exploration | Explore Gorse project architecture, analyze cache and data layer interfaces, evaluate Valkey compatibility, research valkey-glide Go client API, produce integration report with time series approach options and implementation plan. | 4h | ✅ Done | +| 2 | Add Valkey URL prefix constants and config validation | Add `valkey://`, `valkeys://`, `valkey+cluster://`, `valkeys+cluster://` prefixes to `storage/scheme.go`. Update `config/config.go` cache_store validator to accept Valkey prefixes. Update `config/config.toml` to document Valkey as a supported cache store option. Add config validation tests. | 2h | ✅ Done | +| 3 | Add valkey-glide Go dependency | Add `github.com/valkey-io/valkey-glide/go/v2` to `go.mod`. Run `go mod tidy`. Verify build compiles. | 1h | ✅ Done | +| 4 | Implement Valkey cache backend — connection and lifecycle | Create `storage/cache/valkey.go`. Implement `init()` registration with Valkey prefixes, connection setup for standalone and cluster modes using valkey-glide, `Close()`, `Ping()`, `Init()` (valkey-search index creation via CustomCommand), `Scan()`, `Purge()`. | 4h | ✅ Done | +| 5 | Implement Valkey cache backend — core key-value and queue operations | Implement `Set()`, `Get()`, `Delete()` for key-value metadata. Implement `Push()`, `Pop()`, `Remain()` for message queue using sorted sets. | 3h | ✅ Done | +| 6 | Implement Valkey cache backend — score/document operations | Implement `AddScores()`, `SearchScores()`, `UpdateScores()`, `DeleteScores()`, `ScanScores()` using valkey-search FT.* commands via CustomCommand. | 6h | ✅ Done | +| 7 | Implement Valkey cache backend — time series operations | Implement `AddTimeSeriesPoints()` using sorted set + hash with pipeline batching. Implement `GetTimeSeriesPoints()` with ZRANGEBYSCORE range fetch and Go-side bucket aggregation (Approach A). | 4h | ✅ Done | +| 8 | Create Valkey cache test suite | Create `storage/cache/valkey_test.go`. Embed `baseTestSuite` and run all existing cache interface tests against a Valkey instance. Use `VALKEY_URI` environment variable. | 4h | ✅ Done | +| 9 | Performance benchmarks — time series | Create benchmarks comparing Valkey time series (sorted set + Go aggregation) vs Redis TimeSeries for ingestion throughput, range query latency, and aggregation performance. Document results and gaps. | 3h | To Do | +| 10 | Documentation and PR preparation | Update README or contributing docs if needed. Final code review, cleanup, and PR submission to gorse-io/gorse. | 2h | To Do | + +**Total estimate: ~33 hours (~4 working days)** + +- Tasks 2–3 (foundation): 3h — half day +- Tasks 4–7 (core implementation): 17h — ~2 days +- Task 8 (tests): 4h — half day +- Tasks 9–10 (benchmarks + PR): 5h — ~1 day diff --git a/config/config.go b/config/config.go index 8a2ea6512..e148398cb 100644 --- a/config/config.go +++ b/config/config.go @@ -831,6 +831,10 @@ func (config *Config) Validate() error { storage.RedissPrefix, storage.RedisClusterPrefix, storage.RedissClusterPrefix, + storage.ValkeyPrefix, + storage.ValkeysPrefix, + storage.ValkeyClusterPrefix, + storage.ValkeysClusterPrefix, storage.MongoPrefix, storage.MongoSrvPrefix, storage.MySQLPrefix, diff --git a/config/config.toml b/config/config.toml index 37bf2ca3d..4201b71a6 100644 --- a/config/config.toml +++ b/config/config.toml @@ -1,10 +1,14 @@ [database] -# The database for caching, support Redis, MySQL, Postgres and MongoDB: +# The database for caching, support Redis, Valkey, MySQL, Postgres and MongoDB: # redis://:@:/ # rediss://:@:/ # redis+cluster://:@:[?addr=:&addr=:] # rediss+cluster://:@:[?addr=:&addr=:] +# valkey://:@:/ +# valkeys://:@:/ +# valkey+cluster://:@:[?addr=:&addr=:] +# valkeys+cluster://:@:[?addr=:&addr=:] # mysql://[username[:password]@][protocol[(address)]]/dbname[?param1=value1&...¶mN=valueN] # postgres://bob:secret@1.2.3.4:5432/mydb?sslmode=verify-full # postgresql://bob:secret@1.2.3.4:5432/mydb?sslmode=verify-full diff --git a/config/config_test.go b/config/config_test.go index 6fa626298..1a8ea00f1 100644 --- a/config/config_test.go +++ b/config/config_test.go @@ -503,4 +503,20 @@ func (s *ValidateTestSuite) TestCacheStore() { // Test that rediss+cluster:// prefix is accepted for cache_store s.Database.CacheStore = "rediss+cluster://:password@192.168.1.11:6379?addr=192.168.0.5:6379" s.NoError(s.Validate()) + + // Test that valkey:// prefix is accepted for cache_store + s.Database.CacheStore = "valkey://localhost:6379/0" + s.NoError(s.Validate()) + + // Test that valkeys:// prefix is accepted for cache_store + s.Database.CacheStore = "valkeys://localhost:6379/0" + s.NoError(s.Validate()) + + // Test that valkey+cluster:// prefix is accepted for cache_store + s.Database.CacheStore = "valkey+cluster://:password@192.168.1.11:6379?addr=192.168.0.5:6379&addr=192.168.0.7:6379" + s.NoError(s.Validate()) + + // Test that valkeys+cluster:// prefix is accepted for cache_store + s.Database.CacheStore = "valkeys+cluster://:password@192.168.1.11:6379?addr=192.168.0.5:6379" + s.NoError(s.Validate()) } diff --git a/go.mod b/go.mod index e302eeefd..253195dc4 100644 --- a/go.mod +++ b/go.mod @@ -63,6 +63,7 @@ require ( github.com/stretchr/testify v1.11.1 github.com/swaggest/swgui v1.8.5 github.com/tiktoken-go/tokenizer v0.7.0 + github.com/valkey-io/valkey-glide/go/v2 v2.1.0 github.com/weaviate/weaviate v1.27.0 github.com/weaviate/weaviate-go-client/v4 v4.16.1 github.com/yuin/goldmark v1.7.16 diff --git a/go.sum b/go.sum index 47ea55925..2497ef6f2 100644 --- a/go.sum +++ b/go.sum @@ -921,6 +921,8 @@ github.com/ugorji/go v1.1.7/go.mod h1:kZn38zHttfInRq0xu/PH0az30d+z6vm202qpg1oXVM github.com/ugorji/go/codec v0.0.0-20181204163529-d75b2dcb6bc8/go.mod h1:VFNgLljTbGfSG7qAOspJ7OScBnGdDN/yBr0sguwnwf0= github.com/ugorji/go/codec v1.1.7/go.mod h1:Ax+UKWsSmolVDwsd+7N3ZtXu+yMGCf907BLYF3GoBXY= github.com/urfave/negroni v1.0.0/go.mod h1:Meg73S6kFm/4PpbYdq35yYWoCZ9mS/YSx+lKnmiohz4= +github.com/valkey-io/valkey-glide/go/v2 v2.1.0 h1:c0cf63com8Ul5nW4xRV/Q1D8rwRQdV/JFUio6gt0PpE= +github.com/valkey-io/valkey-glide/go/v2 v2.1.0/go.mod h1:LK5zmODJa5xnxZndarh1trntExb3GVGJXz4GwDCagho= github.com/valyala/bytebufferpool v1.0.0/go.mod h1:6bBcMArwyJ5K/AmCkWv1jt77kVWyCJ6HpOuEn7z0Csc= github.com/valyala/fasthttp v1.6.0/go.mod h1:FstJa9V+Pj9vQ7OJie2qMHdwemEDaDiSdBnvPM1Su9w= github.com/valyala/fasttemplate v1.0.1/go.mod h1:UQGH1tvbgY+Nz5t2n7tXsz52dQxojPUpymEIMZ47gx8= diff --git a/storage/cache/valkey.go b/storage/cache/valkey.go new file mode 100644 index 000000000..6124aa67f --- /dev/null +++ b/storage/cache/valkey.go @@ -0,0 +1,961 @@ +// Copyright 2025 gorse Project Authors +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +package cache + +import ( + "context" + "fmt" + "io" + "net/url" + "sort" + "strconv" + "strings" + "time" + + "github.com/gorse-io/gorse/common/log" + "github.com/gorse-io/gorse/storage" + "github.com/juju/errors" + "github.com/samber/lo" + glide "github.com/valkey-io/valkey-glide/go/v2" + glideconfig "github.com/valkey-io/valkey-glide/go/v2/config" + "github.com/valkey-io/valkey-glide/go/v2/models" + glideoptions "github.com/valkey-io/valkey-glide/go/v2/options" + "github.com/valkey-io/valkey-glide/go/v2/pipeline" + "go.uber.org/zap" +) + +func init() { + Register([]string{storage.ValkeyPrefix, storage.ValkeysPrefix}, func(path, tablePrefix string, opts ...storage.Option) (Database, error) { + addr, password, db, useTLS, err := parseValkeyURL(path) + if err != nil { + return nil, errors.Trace(err) + } + cfg := &glideconfig.ClientConfiguration{ + Addresses: []glideconfig.NodeAddress{addr}, + } + if password != "" { + cfg.Credentials = &glideconfig.ServerCredentials{Password: password} + } + if db > 0 { + cfg.DatabaseId = db + } + cfg.UseTLS = useTLS + client, err := glide.NewClient(cfg) + if err != nil { + return nil, errors.Trace(err) + } + database := &Valkey{ + standaloneClient: client, + isCluster: false, + maxSearchResults: storage.NewOptions(opts...).MaxSearchResults, + } + database.TablePrefix = storage.TablePrefix(tablePrefix) + return database, nil + }) + Register([]string{storage.ValkeyClusterPrefix, storage.ValkeysClusterPrefix}, func(path, tablePrefix string, opts ...storage.Option) (Database, error) { + addresses, password, useTLS, err := parseValkeyClusterURL(path) + if err != nil { + return nil, errors.Trace(err) + } + cfg := &glideconfig.ClusterClientConfiguration{ + Addresses: addresses, + } + if password != "" { + cfg.Credentials = &glideconfig.ServerCredentials{Password: password} + } + cfg.UseTLS = useTLS + client, err := glide.NewClusterClient(cfg) + if err != nil { + return nil, errors.Trace(err) + } + database := &Valkey{ + clusterClient: client, + isCluster: true, + maxSearchResults: storage.NewOptions(opts...).MaxSearchResults, + } + database.TablePrefix = storage.TablePrefix(tablePrefix) + return database, nil + }) +} + +// parseValkeyURL parses a valkey:// or valkeys:// URL into connection parameters. +func parseValkeyURL(rawURL string) (addr glideconfig.NodeAddress, password string, db int, useTLS bool, err error) { + parsed, err := url.Parse(rawURL) + if err != nil { + return addr, "", 0, false, errors.Trace(err) + } + host := parsed.Hostname() + if host == "" { + host = "localhost" + } + port := 6379 + if parsed.Port() != "" { + port, err = strconv.Atoi(parsed.Port()) + if err != nil { + return addr, "", 0, false, errors.Trace(err) + } + } + addr = glideconfig.NodeAddress{Host: host, Port: port} + if parsed.User != nil { + password, _ = parsed.User.Password() + } + if parsed.Path != "" && parsed.Path != "/" { + dbStr := strings.TrimPrefix(parsed.Path, "/") + if dbStr != "" { + db, err = strconv.Atoi(dbStr) + if err != nil { + return addr, "", 0, false, errors.Errorf("invalid database number: %s", dbStr) + } + } + } + useTLS = parsed.Scheme == "valkeys" + return addr, password, db, useTLS, nil +} + +// parseValkeyClusterURL parses a valkey+cluster:// or valkeys+cluster:// URL. +func parseValkeyClusterURL(rawURL string) (addresses []glideconfig.NodeAddress, password string, useTLS bool, err error) { + // Replace the cluster prefix with a standard scheme for URL parsing. + var newURL string + if strings.HasPrefix(rawURL, storage.ValkeyClusterPrefix) { + newURL = strings.Replace(rawURL, storage.ValkeyClusterPrefix, storage.ValkeyPrefix, 1) + useTLS = false + } else if strings.HasPrefix(rawURL, storage.ValkeysClusterPrefix) { + newURL = strings.Replace(rawURL, storage.ValkeysClusterPrefix, storage.ValkeysPrefix, 1) + useTLS = true + } + parsed, err := url.Parse(newURL) + if err != nil { + return nil, "", false, errors.Trace(err) + } + host := parsed.Hostname() + if host == "" { + host = "localhost" + } + port := 6379 + if parsed.Port() != "" { + port, err = strconv.Atoi(parsed.Port()) + if err != nil { + return nil, "", false, errors.Trace(err) + } + } + addresses = append(addresses, glideconfig.NodeAddress{Host: host, Port: port}) + if parsed.User != nil { + password, _ = parsed.User.Password() + } + // Parse additional addresses from query params (addr=host:port). + for _, addrStr := range parsed.Query()["addr"] { + parts := strings.SplitN(addrStr, ":", 2) + h := parts[0] + p := 6379 + if len(parts) == 2 { + p, err = strconv.Atoi(parts[1]) + if err != nil { + return nil, "", false, errors.Errorf("invalid port in addr param: %s", addrStr) + } + } + addresses = append(addresses, glideconfig.NodeAddress{Host: h, Port: p}) + } + return addresses, password, useTLS, nil +} + +// Valkey cache storage using valkey-glide client. +type Valkey struct { + storage.TablePrefix + standaloneClient *glide.Client + clusterClient *glide.ClusterClient + isCluster bool + maxSearchResults int +} + +// Close the valkey connection. +func (v *Valkey) Close() error { + if v.isCluster { + v.clusterClient.Close() + } else { + v.standaloneClient.Close() + } + return nil +} + +// Ping the valkey server. +func (v *Valkey) Ping() error { + ctx := context.Background() + if v.isCluster { + _, err := v.clusterClient.Ping(ctx) + return err + } + _, err := v.standaloneClient.Ping(ctx) + return err +} + +// Init creates the valkey-search index for document storage. +func (v *Valkey) Init() error { + ctx := context.Background() + // List existing indices via FT._LIST. + result, err := v.customCommand(ctx, []string{"FT._LIST"}) + if err != nil { + return errors.Trace(err) + } + indices := parseStringSlice(result) + if lo.Contains(indices, v.DocumentTable()) { + return nil + } + // Create the index. + _, err = v.customCommand(ctx, []string{ + "FT.CREATE", v.DocumentTable(), + "ON", "HASH", + "PREFIX", "1", v.DocumentTable() + ":", + "SCHEMA", + "collection", "TAG", + "subset", "TAG", + "id", "TAG", + "score", "NUMERIC", + "is_hidden", "NUMERIC", + "categories", "TAG", "SEPARATOR", ";", + "timestamp", "NUMERIC", + }) + if err != nil { + return errors.Trace(err) + } + return nil +} + +// customCommand executes a custom command, handling both standalone and cluster modes. +func (v *Valkey) customCommand(ctx context.Context, args []string) (any, error) { + if v.isCluster { + result, err := v.clusterClient.CustomCommand(ctx, args) + if err != nil { + return nil, err + } + return result.SingleValue(), nil + } + return v.standaloneClient.CustomCommand(ctx, args) +} + +// Scan iterates over all keys with the table prefix. +func (v *Valkey) Scan(work func(string) error) error { + ctx := context.Background() + if v.isCluster { + cursor := models.NewClusterScanCursor() + scanOpts := glideoptions.NewClusterScanOptions().SetMatch(string(v.TablePrefix) + "*") + for !cursor.IsFinished() { + result, err := v.clusterClient.ScanWithOptions(ctx, cursor, *scanOpts) + if err != nil { + return errors.Trace(err) + } + for _, key := range result.Keys { + if err = work(key[len(v.TablePrefix):]); err != nil { + return errors.Trace(err) + } + } + cursor = result.Cursor + } + return nil + } + cursor := models.NewCursor() + scanOpts := glideoptions.NewScanOptions().SetMatch(string(v.TablePrefix) + "*") + for { + result, err := v.standaloneClient.ScanWithOptions(ctx, cursor, *scanOpts) + if err != nil { + return errors.Trace(err) + } + for _, key := range result.Data { + if err = work(key[len(v.TablePrefix):]); err != nil { + return errors.Trace(err) + } + } + if result.Cursor.IsFinished() { + return nil + } + cursor = result.Cursor + } +} + +// Purge deletes all keys with the table prefix. +func (v *Valkey) Purge() error { + ctx := context.Background() + if v.isCluster { + cursor := models.NewClusterScanCursor() + scanOpts := glideoptions.NewClusterScanOptions().SetMatch(string(v.TablePrefix) + "*") + for !cursor.IsFinished() { + result, err := v.clusterClient.ScanWithOptions(ctx, cursor, *scanOpts) + if err != nil { + return errors.Trace(err) + } + if len(result.Keys) > 0 { + if _, err = v.clusterClient.Del(ctx, result.Keys); err != nil { + return errors.Trace(err) + } + } + cursor = result.Cursor + } + return nil + } + cursor := models.NewCursor() + scanOpts := glideoptions.NewScanOptions().SetMatch(string(v.TablePrefix) + "*") + for { + result, err := v.standaloneClient.ScanWithOptions(ctx, cursor, *scanOpts) + if err != nil { + return errors.Trace(err) + } + if len(result.Data) > 0 { + if _, err = v.standaloneClient.Del(ctx, result.Data); err != nil { + return errors.Trace(err) + } + } + if result.Cursor.IsFinished() { + return nil + } + cursor = result.Cursor + } +} + +// Set stores values in Valkey. +func (v *Valkey) Set(ctx context.Context, values ...Value) error { + if v.isCluster { + for _, val := range values { + if _, err := v.clusterClient.Set(ctx, v.Key(val.name), val.value); err != nil { + return errors.Trace(err) + } + } + return nil + } + batch := pipeline.NewStandaloneBatch(false) + for _, val := range values { + batch.Set(v.Key(val.name), val.value) + } + _, err := v.standaloneClient.Exec(ctx, *batch, true) + return errors.Trace(err) +} + +// Get returns a value from Valkey. +func (v *Valkey) Get(ctx context.Context, key string) *ReturnValue { + var result models.Result[string] + var err error + if v.isCluster { + result, err = v.clusterClient.Get(ctx, v.Key(key)) + } else { + result, err = v.standaloneClient.Get(ctx, v.Key(key)) + } + if err != nil { + return &ReturnValue{err: err, exists: false} + } + if result.IsNil() { + return &ReturnValue{value: "", exists: false} + } + return &ReturnValue{value: result.Value(), exists: true} +} + +// Delete removes a key from Valkey. +func (v *Valkey) Delete(ctx context.Context, key string) error { + if v.isCluster { + _, err := v.clusterClient.Del(ctx, []string{v.Key(key)}) + return err + } + _, err := v.standaloneClient.Del(ctx, []string{v.Key(key)}) + return err +} + +// Push adds a message to a sorted set queue with timestamp as score. +func (v *Valkey) Push(ctx context.Context, name, message string) error { + members := map[string]float64{message: float64(time.Now().UnixNano())} + if v.isCluster { + _, err := v.clusterClient.ZAdd(ctx, v.Key(name), members) + return err + } + _, err := v.standaloneClient.ZAdd(ctx, v.Key(name), members) + return err +} + +// Pop removes and returns the message with the lowest score from the queue. +func (v *Valkey) Pop(ctx context.Context, name string) (string, error) { + var result map[string]float64 + var err error + if v.isCluster { + result, err = v.clusterClient.ZPopMin(ctx, v.Key(name)) + } else { + result, err = v.standaloneClient.ZPopMin(ctx, v.Key(name)) + } + if err != nil { + return "", errors.Trace(err) + } + if len(result) == 0 { + return "", io.EOF + } + for member := range result { + return member, nil + } + return "", io.EOF +} + +// Remain returns the number of messages in the queue. +func (v *Valkey) Remain(ctx context.Context, name string) (int64, error) { + if v.isCluster { + return v.clusterClient.ZCard(ctx, v.Key(name)) + } + return v.standaloneClient.ZCard(ctx, v.Key(name)) +} + +func (v *Valkey) documentKey(collection, subset, value string) string { + return v.DocumentTable() + ":" + collection + ":" + subset + ":" + value +} + +// AddScores adds score documents to Valkey using hash storage. +func (v *Valkey) AddScores(ctx context.Context, collection, subset string, documents []Score) error { + if v.isCluster { + for _, document := range documents { + if _, err := v.clusterClient.HSet(ctx, v.documentKey(collection, subset, document.Id), map[string]string{ + "collection": collection, + "subset": subset, + "id": document.Id, + "score": strconv.FormatFloat(document.Score, 'g', -1, 64), + "is_hidden": formatBool(document.IsHidden), + "categories": encodeCategories(document.Categories), + "timestamp": strconv.FormatInt(document.Timestamp.UnixMicro(), 10), + }); err != nil { + return errors.Trace(err) + } + } + return nil + } + batch := pipeline.NewStandaloneBatch(false) + for _, document := range documents { + batch.HSet(v.documentKey(collection, subset, document.Id), map[string]string{ + "collection": collection, + "subset": subset, + "id": document.Id, + "score": strconv.FormatFloat(document.Score, 'g', -1, 64), + "is_hidden": formatBool(document.IsHidden), + "categories": encodeCategories(document.Categories), + "timestamp": strconv.FormatInt(document.Timestamp.UnixMicro(), 10), + }) + } + _, err := v.standaloneClient.Exec(ctx, *batch, true) + return errors.Trace(err) +} + +func formatBool(b bool) string { + if b { + return "1" + } + return "0" +} + +// SearchScores searches for score documents using FT.SEARCH. +func (v *Valkey) SearchScores(ctx context.Context, collection, subset string, query []string, begin, end int) ([]Score, error) { + var builder strings.Builder + fmt.Fprintf(&builder, "@collection:{ %s } @is_hidden:[0 0]", escape(collection)) + if subset != "" { + fmt.Fprintf(&builder, " @subset:{ %s }", escape(subset)) + } + for _, q := range query { + fmt.Fprintf(&builder, " @categories:{ %s }", escape(encodeCategory(q))) + } + limit := 10000 + if end != -1 { + limit = end - begin + } + args := []string{ + "FT.SEARCH", v.DocumentTable(), builder.String(), + "SORTBY", "score", "DESC", + "LIMIT", strconv.Itoa(begin), strconv.Itoa(limit), + } + result, err := v.customCommand(ctx, args) + if err != nil { + return nil, errors.Trace(err) + } + return parseFTSearchResult(result) +} + +// UpdateScores updates score documents matching the query. +func (v *Valkey) UpdateScores(ctx context.Context, collections []string, subset *string, id string, patch ScorePatch) error { + if len(collections) == 0 { + return nil + } + if patch.Score == nil && patch.IsHidden == nil && patch.Categories == nil { + return nil + } + var builder strings.Builder + fmt.Fprintf(&builder, "@collection:{ %s }", escape(strings.Join(collections, " | "))) + fmt.Fprintf(&builder, " @id:{ %s }", escape(id)) + if subset != nil { + fmt.Fprintf(&builder, " @subset:{ %s }", escape(*subset)) + } + limit := v.maxSearchResults + if limit <= 0 { + limit = 10000 + } + + // Two-phase update: collect keys first, then mutate. + keys := make([]string, 0) + keySet := make(map[string]struct{}) + offset := 0 + for { + args := []string{ + "FT.SEARCH", v.DocumentTable(), builder.String(), + "SORTBY", "score", "DESC", + "LIMIT", strconv.Itoa(offset), strconv.Itoa(limit), + } + result, err := v.customCommand(ctx, args) + if err != nil { + return errors.Trace(err) + } + docKeys := parseFTSearchKeys(result) + if len(docKeys) == 0 { + break + } + newKeys := 0 + for _, k := range docKeys { + if _, exists := keySet[k]; !exists { + keySet[k] = struct{}{} + keys = append(keys, k) + newKeys++ + } + } + offset += len(docKeys) + if len(docKeys) < limit || newKeys == 0 { + break + } + } + + values := make(map[string]string) + if patch.Score != nil { + values["score"] = strconv.FormatFloat(*patch.Score, 'g', -1, 64) + } + if patch.IsHidden != nil { + values["is_hidden"] = formatBool(*patch.IsHidden) + } + if patch.Categories != nil { + values["categories"] = encodeCategories(patch.Categories) + } + for _, key := range keys { + if v.isCluster { + exists, err := v.clusterClient.Exists(ctx, []string{key}) + if err != nil { + return errors.Trace(err) + } + if exists == 0 { + continue + } + if _, err = v.clusterClient.HSet(ctx, key, values); err != nil { + return errors.Trace(err) + } + } else { + // Use Watch for optimistic locking on standalone. + if _, err := v.standaloneClient.Watch(ctx, []string{key}); err != nil { + return errors.Trace(err) + } + exists, err := v.standaloneClient.Exists(ctx, []string{key}) + if err != nil { + v.standaloneClient.Unwatch(ctx) //nolint:errcheck + return errors.Trace(err) + } + if exists == 0 { + v.standaloneClient.Unwatch(ctx) //nolint:errcheck + continue + } + if _, err = v.standaloneClient.HSet(ctx, key, values); err != nil { + v.standaloneClient.Unwatch(ctx) //nolint:errcheck + return errors.Trace(err) + } + v.standaloneClient.Unwatch(ctx) //nolint:errcheck + } + } + return nil +} + +// DeleteScores deletes score documents matching the condition. +func (v *Valkey) DeleteScores(ctx context.Context, collections []string, condition ScoreCondition) error { + if err := condition.Check(); err != nil { + return errors.Trace(err) + } + var builder strings.Builder + fmt.Fprintf(&builder, "@collection:{ %s }", escape(strings.Join(collections, " | "))) + if condition.Subset != nil { + fmt.Fprintf(&builder, " @subset:{ %s }", escape(*condition.Subset)) + } + if condition.Id != nil { + fmt.Fprintf(&builder, " @id:{ %s }", escape(*condition.Id)) + } + if condition.Before != nil { + fmt.Fprintf(&builder, " @timestamp:[-inf (%d]", condition.Before.UnixMicro()) + } + for { + args := []string{ + "FT.SEARCH", v.DocumentTable(), builder.String(), + "SORTBY", "score", "DESC", + "LIMIT", "0", "10000", + } + result, err := v.customCommand(ctx, args) + if err != nil { + return errors.Trace(err) + } + docKeys := parseFTSearchKeys(result) + total := parseFTSearchTotal(result) + if len(docKeys) == 0 { + break + } + if v.isCluster { + for _, key := range docKeys { + if _, err = v.clusterClient.Del(ctx, []string{key}); err != nil { + return errors.Trace(err) + } + } + } else { + batch := pipeline.NewStandaloneBatch(false) + for _, key := range docKeys { + batch.Del([]string{key}) + } + if _, err = v.standaloneClient.Exec(ctx, *batch, true); err != nil { + return errors.Trace(err) + } + } + if total == len(docKeys) { + break + } + } + return nil +} + +// ScanScores iterates over all score documents. +func (v *Valkey) ScanScores(ctx context.Context, callback func(collection string, id string, subset string, timestamp time.Time) error) error { + if v.isCluster { + cursor := models.NewClusterScanCursor() + scanOpts := glideoptions.NewClusterScanOptions().SetMatch(v.DocumentTable() + "*") + for !cursor.IsFinished() { + result, err := v.clusterClient.ScanWithOptions(ctx, cursor, *scanOpts) + if err != nil { + return errors.Trace(err) + } + for _, key := range result.Keys { + row, err := v.clusterClient.HGetAll(ctx, key) + if err != nil { + return errors.Trace(err) + } + usec, err := strconv.ParseInt(row["timestamp"], 10, 64) + if err != nil { + return errors.Trace(err) + } + if err = callback(row["collection"], row["id"], row["subset"], time.UnixMicro(usec).In(time.UTC)); err != nil { + return errors.Trace(err) + } + } + cursor = result.Cursor + } + return nil + } + cursor := models.NewCursor() + scanOpts := glideoptions.NewScanOptions().SetMatch(v.DocumentTable() + "*") + for { + result, err := v.standaloneClient.ScanWithOptions(ctx, cursor, *scanOpts) + if err != nil { + return errors.Trace(err) + } + for _, key := range result.Data { + row, err := v.standaloneClient.HGetAll(ctx, key) + if err != nil { + return errors.Trace(err) + } + usec, err := strconv.ParseInt(row["timestamp"], 10, 64) + if err != nil { + return errors.Trace(err) + } + if err = callback(row["collection"], row["id"], row["subset"], time.UnixMicro(usec).In(time.UTC)); err != nil { + return errors.Trace(err) + } + } + if result.Cursor.IsFinished() { + return nil + } + cursor = result.Cursor + } +} + +// AddTimeSeriesPoints adds time series points using sorted set + hash (Approach A). +// Sorted set key: {prefix}ts_index:{name} — score = timestamp_ms, member = timestamp_ms as string +// Hash key: {prefix}ts_data:{name} — field = timestamp_ms as string, value = float64 as string +func (v *Valkey) AddTimeSeriesPoints(ctx context.Context, points []TimeSeriesPoint) error { + if v.isCluster { + for _, point := range points { + tsMs := point.Timestamp.UnixMilli() + tsMsStr := strconv.FormatInt(tsMs, 10) + indexKey := v.PointsTable() + ":ts_index:" + point.Name + dataKey := v.PointsTable() + ":ts_data:" + point.Name + if _, err := v.clusterClient.ZAdd(ctx, indexKey, map[string]float64{tsMsStr: float64(tsMs)}); err != nil { + return errors.Trace(err) + } + if _, err := v.clusterClient.HSet(ctx, dataKey, map[string]string{ + tsMsStr: strconv.FormatFloat(point.Value, 'g', -1, 64), + }); err != nil { + return errors.Trace(err) + } + } + return nil + } + batch := pipeline.NewStandaloneBatch(false) + for _, point := range points { + tsMs := point.Timestamp.UnixMilli() + tsMsStr := strconv.FormatInt(tsMs, 10) + indexKey := v.PointsTable() + ":ts_index:" + point.Name + dataKey := v.PointsTable() + ":ts_data:" + point.Name + batch.ZAdd(indexKey, map[string]float64{tsMsStr: float64(tsMs)}) + batch.HSet(dataKey, map[string]string{ + tsMsStr: strconv.FormatFloat(point.Value, 'g', -1, 64), + }) + } + _, err := v.standaloneClient.Exec(ctx, *batch, true) + return errors.Trace(err) +} + +// GetTimeSeriesPoints retrieves time series points with Go-side bucket aggregation. +func (v *Valkey) GetTimeSeriesPoints(ctx context.Context, name string, begin, end time.Time, duration time.Duration) ([]TimeSeriesPoint, error) { + indexKey := v.PointsTable() + ":ts_index:" + name + dataKey := v.PointsTable() + ":ts_data:" + name + beginMs := begin.UnixMilli() + endMs := end.UnixMilli() + + // Fetch all timestamps in range from sorted set using ZRANGEBYSCORE via ZRangeWithScores. + rangeQuery := glideoptions.NewRangeByScoreQuery( + glideoptions.NewInclusiveScoreBoundary(float64(beginMs)), + glideoptions.NewInclusiveScoreBoundary(float64(endMs)), + ) + var members []models.MemberAndScore + var err error + if v.isCluster { + members, err = v.clusterClient.ZRangeWithScores(ctx, indexKey, rangeQuery) + } else { + members, err = v.standaloneClient.ZRangeWithScores(ctx, indexKey, rangeQuery) + } + if err != nil { + return nil, errors.Trace(err) + } + if len(members) == 0 { + return nil, nil + } + + // Fetch corresponding values from hash. + fields := make([]string, len(members)) + for i, m := range members { + fields[i] = m.Member + } + var hmgetResults []models.Result[string] + if v.isCluster { + hmgetResults, err = v.clusterClient.HMGet(ctx, dataKey, fields) + } else { + hmgetResults, err = v.standaloneClient.HMGet(ctx, dataKey, fields) + } + if err != nil { + return nil, errors.Trace(err) + } + + // Build timestamp-value pairs. + type tsValue struct { + timestampMs int64 + value float64 + } + tsValues := make([]tsValue, 0, len(members)) + for i, m := range members { + if hmgetResults[i].IsNil() { + continue + } + tsMs, err := strconv.ParseInt(m.Member, 10, 64) + if err != nil { + log.Logger().Warn("failed to parse timestamp", zap.String("member", m.Member), zap.Error(err)) + continue + } + val, err := strconv.ParseFloat(hmgetResults[i].Value(), 64) + if err != nil { + log.Logger().Warn("failed to parse value", zap.String("value", hmgetResults[i].Value()), zap.Error(err)) + continue + } + tsValues = append(tsValues, tsValue{timestampMs: tsMs, value: val}) + } + + // Go-side bucket aggregation: group by floor(timestamp_ms / duration_ms) * duration_ms, + // take the last value per bucket (highest timestamp). + durationMs := duration.Milliseconds() + if durationMs <= 0 { + durationMs = 1 + } + type bucket struct { + bucketMs int64 + lastTsMs int64 + lastValue float64 + } + buckets := make(map[int64]*bucket) + for _, tv := range tsValues { + bk := (tv.timestampMs / durationMs) * durationMs + existing, ok := buckets[bk] + if !ok || tv.timestampMs > existing.lastTsMs { + buckets[bk] = &bucket{bucketMs: bk, lastTsMs: tv.timestampMs, lastValue: tv.value} + } + } + + // Sort buckets by timestamp ascending. + sortedBuckets := make([]*bucket, 0, len(buckets)) + for _, b := range buckets { + sortedBuckets = append(sortedBuckets, b) + } + sort.Slice(sortedBuckets, func(i, j int) bool { + return sortedBuckets[i].bucketMs < sortedBuckets[j].bucketMs + }) + + // Convert to TimeSeriesPoint. + points := make([]TimeSeriesPoint, 0, len(sortedBuckets)) + for _, b := range sortedBuckets { + points = append(points, TimeSeriesPoint{ + Name: name, + Timestamp: time.UnixMilli(b.bucketMs).UTC(), + Value: b.lastValue, + }) + } + return points, nil +} + +// parseStringSlice converts a CustomCommand result to a string slice. +func parseStringSlice(result any) []string { + if result == nil { + return nil + } + switch v := result.(type) { + case []any: + strs := make([]string, 0, len(v)) + for _, item := range v { + if s, ok := item.(string); ok { + strs = append(strs, s) + } + } + return strs + default: + return nil + } +} + +// parseFTSearchTotal extracts the total count from an FT.SEARCH result. +func parseFTSearchTotal(result any) int { + if result == nil { + return 0 + } + arr, ok := result.([]any) + if !ok || len(arr) == 0 { + return 0 + } + switch v := arr[0].(type) { + case int64: + return int(v) + case float64: + return int(v) + default: + return 0 + } +} + +// parseFTSearchKeys extracts document keys from an FT.SEARCH result. +// FT.SEARCH returns: [total, key1, [field, value, ...], key2, [field, value, ...], ...] +func parseFTSearchKeys(result any) []string { + if result == nil { + return nil + } + arr, ok := result.([]any) + if !ok || len(arr) < 2 { + return nil + } + var keys []string + // Skip index 0 (total), then every other element starting at 1 is a key. + for i := 1; i < len(arr); i += 2 { + if key, ok := arr[i].(string); ok { + keys = append(keys, key) + } + } + return keys +} + +// parseFTSearchResult parses an FT.SEARCH result into Score documents. +// FT.SEARCH returns: [total, key1, [field, value, ...], key2, [field, value, ...], ...] +func parseFTSearchResult(result any) ([]Score, error) { + if result == nil { + return nil, nil + } + arr, ok := result.([]any) + if !ok || len(arr) < 2 { + return nil, nil + } + documents := make([]Score, 0) + for i := 1; i < len(arr); i += 2 { + if i+1 >= len(arr) { + break + } + fields := parseFieldMap(arr[i+1]) + if fields == nil { + continue + } + var doc Score + doc.Id = fields["id"] + score, err := strconv.ParseFloat(fields["score"], 64) + if err != nil { + return nil, errors.Trace(err) + } + doc.Score = score + isHidden, err := strconv.ParseInt(fields["is_hidden"], 10, 64) + if err != nil { + return nil, errors.Trace(err) + } + doc.IsHidden = isHidden != 0 + categories, err := decodeCategories(fields["categories"]) + if err != nil { + return nil, errors.Trace(err) + } + doc.Categories = categories + timestamp, err := strconv.ParseInt(fields["timestamp"], 10, 64) + if err != nil { + return nil, errors.Trace(err) + } + doc.Timestamp = time.UnixMicro(timestamp).In(time.UTC) + documents = append(documents, doc) + } + return documents, nil +} + +// parseFieldMap converts a field array from FT.SEARCH into a map. +// The field array is [field1, value1, field2, value2, ...] or a map. +func parseFieldMap(v any) map[string]string { + if v == nil { + return nil + } + switch fields := v.(type) { + case []any: + m := make(map[string]string) + for j := 0; j+1 < len(fields); j += 2 { + key, ok1 := fields[j].(string) + val, ok2 := fields[j+1].(string) + if ok1 && ok2 { + m[key] = val + } + } + return m + case map[string]any: + m := make(map[string]string) + for k, val := range fields { + if s, ok := val.(string); ok { + m[k] = s + } + } + return m + case map[string]string: + return fields + default: + return nil + } +} diff --git a/storage/cache/valkey_test.go b/storage/cache/valkey_test.go new file mode 100644 index 000000000..d92b7bd75 --- /dev/null +++ b/storage/cache/valkey_test.go @@ -0,0 +1,227 @@ +// Copyright 2025 gorse Project Authors +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +package cache + +import ( + "context" + "fmt" + "math" + "os" + "testing" + "time" + + "github.com/gorse-io/gorse/common/log" + "github.com/stretchr/testify/assert" + "github.com/stretchr/testify/suite" +) + +var ( + valkeyDSN string +) + +func init() { + env := func(key, defaultValue string) string { + if value := os.Getenv(key); value != "" { + return value + } + return defaultValue + } + valkeyDSN = env("VALKEY_URI", "valkey://127.0.0.1:6379/") +} + +type ValkeyTestSuite struct { + baseTestSuite +} + +func (suite *ValkeyTestSuite) SetupSuite() { + var err error + suite.Database, err = Open(valkeyDSN, "gorse_") + suite.NoError(err) + // flush db + valkeyClient, ok := suite.Database.(*Valkey) + suite.True(ok) + if !valkeyClient.isCluster { + _, err = valkeyClient.standaloneClient.CustomCommand(suite.T().Context(), []string{"FLUSHDB"}) + suite.NoError(err) + } + // create schema + err = suite.Database.Init() + suite.NoError(err) +} + +func (suite *ValkeyTestSuite) TestEscapeCharacters() { + ts := time.Date(2023, 1, 1, 0, 0, 0, 0, time.UTC) + ctx := suite.T().Context() + for _, c := range []string{"-", ":", ".", "/"} { + suite.Run(c, func() { + collection := fmt.Sprintf("a%s1", c) + subset := fmt.Sprintf("b%s2", c) + id := fmt.Sprintf("c%s3", c) + err := suite.AddScores(ctx, collection, subset, []Score{{ + Id: id, + Score: math.MaxFloat64, + Categories: []string{"a", "b"}, + Timestamp: ts, + }}) + suite.NoError(err) + documents, err := suite.SearchScores(ctx, collection, subset, []string{"b"}, 0, -1) + suite.NoError(err) + suite.Equal([]Score{{Id: id, Score: math.MaxFloat64, Categories: []string{"a", "b"}, Timestamp: ts}}, documents) + + err = suite.UpdateScores(ctx, []string{collection}, nil, id, ScorePatch{Score: new(float64(1))}) + suite.NoError(err) + documents, err = suite.SearchScores(ctx, collection, subset, []string{"b"}, 0, -1) + suite.NoError(err) + suite.Equal([]Score{{Id: id, Score: 1, Categories: []string{"a", "b"}, Timestamp: ts}}, documents) + + err = suite.DeleteScores(ctx, []string{collection}, ScoreCondition{ + Subset: new(subset), + Id: new(id), + }) + suite.NoError(err) + documents, err = suite.SearchScores(ctx, collection, subset, []string{"b"}, 0, -1) + suite.NoError(err) + suite.Empty(documents) + }) + } +} + +func (suite *ValkeyTestSuite) TestUpdateScoresWithPagination() { + ctx := suite.T().Context() + db, ok := suite.Database.(*Valkey) + suite.True(ok) + limit := db.maxSearchResults + db.maxSearchResults = 2 + defer func() { + db.maxSearchResults = limit + }() + + for i := range 5 { + subset := fmt.Sprintf("subset-%d", i) + err := suite.AddScores(ctx, "collection-a", subset, []Score{{ + Id: "shared-item", + Score: float64(i), + Categories: []string{"old"}, + Timestamp: time.Now().UTC(), + }}) + suite.NoError(err) + } + + err := suite.UpdateScores(ctx, []string{"collection-a"}, nil, "shared-item", ScorePatch{ + Categories: []string{"new"}, + }) + suite.NoError(err) + + for i := range 5 { + subset := fmt.Sprintf("subset-%d", i) + docs, err := suite.SearchScores(ctx, "collection-a", subset, []string{"new"}, 0, -1) + suite.NoError(err) + suite.Require().Len(docs, 1) + suite.Equal("shared-item", docs[0].Id) + } +} + +func (suite *ValkeyTestSuite) TestUpdateScoresWithPaginationAndScorePatch() { + ctx := suite.T().Context() + db, ok := suite.Database.(*Valkey) + suite.True(ok) + limit := db.maxSearchResults + db.maxSearchResults = 1 + defer func() { + db.maxSearchResults = limit + }() + + initialScores := []float64{3, 2, 1} + for i, score := range initialScores { + subset := fmt.Sprintf("score-subset-%d", i) + err := suite.AddScores(ctx, "collection-b", subset, []Score{{ + Id: "shared-item", + Score: score, + Categories: []string{"score-old"}, + Timestamp: time.Now().UTC(), + }}) + suite.NoError(err) + } + + targetScore := float64(0) + err := suite.UpdateScores(ctx, []string{"collection-b"}, nil, "shared-item", ScorePatch{ + Score: &targetScore, + }) + suite.NoError(err) + + for i := range initialScores { + subset := fmt.Sprintf("score-subset-%d", i) + docs, err := suite.SearchScores(ctx, "collection-b", subset, nil, 0, -1) + suite.NoError(err) + suite.Require().Len(docs, 1) + suite.Equal(targetScore, docs[0].Score) + } +} + +func (suite *ValkeyTestSuite) TestUpdateScoresWithPaginationAndTiedScores() { + ctx := suite.T().Context() + db, ok := suite.Database.(*Valkey) + suite.True(ok) + limit := db.maxSearchResults + db.maxSearchResults = 2 + defer func() { + db.maxSearchResults = limit + }() + + for i := range 5 { + subset := fmt.Sprintf("tie-subset-%d", i) + err := suite.AddScores(ctx, "collection-c", subset, []Score{{ + Id: "shared-item", + Score: 1, + Categories: []string{"tie-old"}, + Timestamp: time.Now().UTC(), + }}) + suite.NoError(err) + } + + err := suite.UpdateScores(ctx, []string{"collection-c"}, nil, "shared-item", ScorePatch{ + Categories: []string{"tie-new"}, + }) + suite.NoError(err) + + for i := range 5 { + subset := fmt.Sprintf("tie-subset-%d", i) + docs, err := suite.SearchScores(ctx, "collection-c", subset, []string{"tie-new"}, 0, -1) + suite.NoError(err) + suite.Require().Len(docs, 1) + suite.Equal("shared-item", docs[0].Id) + } +} + +func TestValkey(t *testing.T) { + suite.Run(t, new(ValkeyTestSuite)) +} + +func BenchmarkValkey(b *testing.B) { + log.CloseLogger() + database, err := Open(valkeyDSN, "gorse_") + assert.NoError(b, err) + // flush db + valkeyClient := database.(*Valkey) + if !valkeyClient.isCluster { + _, err = valkeyClient.standaloneClient.CustomCommand(context.Background(), []string{"FLUSHDB"}) + assert.NoError(b, err) + } + // create schema + err = database.Init() + assert.NoError(b, err) + // benchmark + benchmark(b, database) +} diff --git a/storage/scheme.go b/storage/scheme.go index 138176127..c1f7e856d 100644 --- a/storage/scheme.go +++ b/storage/scheme.go @@ -70,23 +70,27 @@ func init() { } const ( - MySQLPrefix = "mysql://" - MongoPrefix = "mongodb://" - MongoSrvPrefix = "mongodb+srv://" - PostgresPrefix = "postgres://" - PostgreSQLPrefix = "postgresql://" - ClickhousePrefix = "clickhouse://" - CHHTTPPrefix = "chhttp://" - CHHTTPSPrefix = "chhttps://" - SQLitePrefix = "sqlite://" - RedisPrefix = "redis://" - RedissPrefix = "rediss://" - RedisClusterPrefix = "redis+cluster://" - RedissClusterPrefix = "rediss+cluster://" - QdrantPrefix = "qdrant://" - WeaviatePrefix = "weaviate://" - WeaviatesPrefix = "weaviates://" - MilvusPrefix = "milvus://" + MySQLPrefix = "mysql://" + MongoPrefix = "mongodb://" + MongoSrvPrefix = "mongodb+srv://" + PostgresPrefix = "postgres://" + PostgreSQLPrefix = "postgresql://" + ClickhousePrefix = "clickhouse://" + CHHTTPPrefix = "chhttp://" + CHHTTPSPrefix = "chhttps://" + SQLitePrefix = "sqlite://" + RedisPrefix = "redis://" + RedissPrefix = "rediss://" + RedisClusterPrefix = "redis+cluster://" + RedissClusterPrefix = "rediss+cluster://" + ValkeyPrefix = "valkey://" + ValkeysPrefix = "valkeys://" + ValkeyClusterPrefix = "valkey+cluster://" + ValkeysClusterPrefix = "valkeys+cluster://" + QdrantPrefix = "qdrant://" + WeaviatePrefix = "weaviate://" + WeaviatesPrefix = "weaviates://" + MilvusPrefix = "milvus://" ) func AppendURLParams(rawURL string, params []lo.Tuple2[string, string]) (string, error) { From 61c2637ed1486f772550d4042facaf0d47a4a069 Mon Sep 17 00:00:00 2001 From: Daria Korenieva Date: Wed, 29 Apr 2026 11:53:18 -0700 Subject: [PATCH 02/19] chore: add Valkey service to storage docker-compose Add valkey/valkey-bundle:unstable (valkey-search 1.2.0+ with full-text search support) to storage/docker-compose.yml on port 6380. Update default test URI to valkey://127.0.0.1:6380/. Signed-off-by: Daria Korenieva --- storage/cache/valkey_test.go | 2 +- storage/docker-compose.yml | 8 ++++++++ 2 files changed, 9 insertions(+), 1 deletion(-) diff --git a/storage/cache/valkey_test.go b/storage/cache/valkey_test.go index d92b7bd75..de836c85b 100644 --- a/storage/cache/valkey_test.go +++ b/storage/cache/valkey_test.go @@ -38,7 +38,7 @@ func init() { } return defaultValue } - valkeyDSN = env("VALKEY_URI", "valkey://127.0.0.1:6379/") + valkeyDSN = env("VALKEY_URI", "valkey://127.0.0.1:6380/") } type ValkeyTestSuite struct { diff --git a/storage/docker-compose.yml b/storage/docker-compose.yml index 97b98331c..3382766f5 100644 --- a/storage/docker-compose.yml +++ b/storage/docker-compose.yml @@ -5,6 +5,14 @@ services: ports: - 6379:6379 + # Valkey with valkey-search 1.2.0+ (full-text search support). + # Bundle 'unstable' is the only published tag with search >= 1.2. + # Switch to a stable tag (e.g. 9.1) once it is released. + valkey: + image: valkey/valkey-bundle:unstable + ports: + - 6380:6379 + mysql: image: mysql:8.0 ports: From 10eda266a4dc8ab6adff5cf416a686305198ea1f Mon Sep 17 00:00:00 2001 From: Daria Korenieva Date: Wed, 29 Apr 2026 12:25:32 -0700 Subject: [PATCH 03/19] fixes Signed-off-by: Daria Korenieva --- storage/cache/valkey.go | 189 ++++++++++++++++++++++++++++------------ 1 file changed, 135 insertions(+), 54 deletions(-) diff --git a/storage/cache/valkey.go b/storage/cache/valkey.go index 6124aa67f..f18c75b37 100644 --- a/storage/cache/valkey.go +++ b/storage/cache/valkey.go @@ -42,16 +42,17 @@ func init() { if err != nil { return nil, errors.Trace(err) } - cfg := &glideconfig.ClientConfiguration{ - Addresses: []glideconfig.NodeAddress{addr}, - } + cfg := glideconfig.NewClientConfiguration(). + WithAddress(&addr) if password != "" { - cfg.Credentials = &glideconfig.ServerCredentials{Password: password} + cfg.WithCredentials(glideconfig.NewServerCredentialsWithDefaultUsername(password)) } if db > 0 { - cfg.DatabaseId = db + cfg.WithDatabaseId(db) + } + if useTLS { + cfg.WithUseTLS(true) } - cfg.UseTLS = useTLS client, err := glide.NewClient(cfg) if err != nil { return nil, errors.Trace(err) @@ -69,20 +70,23 @@ func init() { if err != nil { return nil, errors.Trace(err) } - cfg := &glideconfig.ClusterClientConfiguration{ - Addresses: addresses, + cfg := glideconfig.NewClusterClientConfiguration() + for i := range addresses { + cfg.WithAddress(&addresses[i]) } if password != "" { - cfg.Credentials = &glideconfig.ServerCredentials{Password: password} + cfg.WithCredentials(glideconfig.NewServerCredentialsWithDefaultUsername(password)) + } + if useTLS { + cfg.WithUseTLS(true) } - cfg.UseTLS = useTLS client, err := glide.NewClusterClient(cfg) if err != nil { return nil, errors.Trace(err) } database := &Valkey{ - clusterClient: client, - isCluster: true, + clusterClient: client, + isCluster: true, maxSearchResults: storage.NewOptions(opts...).MaxSearchResults, } database.TablePrefix = storage.TablePrefix(tablePrefix) @@ -324,20 +328,18 @@ func (v *Valkey) Purge() error { // Set stores values in Valkey. func (v *Valkey) Set(ctx context.Context, values ...Value) error { - if v.isCluster { - for _, val := range values { - if _, err := v.clusterClient.Set(ctx, v.Key(val.name), val.value); err != nil { - return errors.Trace(err) - } - } - return nil - } - batch := pipeline.NewStandaloneBatch(false) for _, val := range values { - batch.Set(v.Key(val.name), val.value) + var err error + if v.isCluster { + _, err = v.clusterClient.Set(ctx, v.Key(val.name), val.value) + } else { + _, err = v.standaloneClient.Set(ctx, v.Key(val.name), val.value) + } + if err != nil { + return errors.Trace(err) + } } - _, err := v.standaloneClient.Exec(ctx, *batch, true) - return errors.Trace(err) + return nil } // Get returns a value from Valkey. @@ -463,20 +465,38 @@ func (v *Valkey) SearchScores(ctx context.Context, collection, subset string, qu for _, q := range query { fmt.Fprintf(&builder, " @categories:{ %s }", escape(encodeCategory(q))) } - limit := 10000 + // Fetch enough results to cover the requested range. + // We request from offset 0 because the Glide map format loses ordering, + // and we sort + slice in Go. + fetchLimit := 10000 if end != -1 { - limit = end - begin + fetchLimit = end } args := []string{ "FT.SEARCH", v.DocumentTable(), builder.String(), "SORTBY", "score", "DESC", - "LIMIT", strconv.Itoa(begin), strconv.Itoa(limit), + "LIMIT", "0", strconv.Itoa(fetchLimit), } result, err := v.customCommand(ctx, args) if err != nil { return nil, errors.Trace(err) } - return parseFTSearchResult(result) + documents, err := parseFTSearchResult(result) + if err != nil { + return nil, errors.Trace(err) + } + // Apply begin/end slicing since Glide map format loses server-side ordering. + if begin > 0 || end != -1 { + if begin >= len(documents) { + return []Score{}, nil + } + endIdx := len(documents) + if end != -1 && end < endIdx { + endIdx = end + } + documents = documents[begin:endIdx] + } + return documents, nil } // UpdateScores updates score documents matching the query. @@ -512,6 +532,20 @@ func (v *Valkey) UpdateScores(ctx context.Context, collections []string, subset if err != nil { return errors.Trace(err) } + // On the first page, check the total and fetch all at once if needed. + // This avoids pagination issues with tied scores where the server + // may return duplicates across pages. + if offset == 0 { + total := parseFTSearchTotal(result) + if total > limit { + // Re-fetch with the full count to avoid pagination drift. + args[len(args)-1] = strconv.Itoa(total) + result, err = v.customCommand(ctx, args) + if err != nil { + return errors.Trace(err) + } + } + } docKeys := parseFTSearchKeys(result) if len(docKeys) == 0 { break @@ -790,9 +824,9 @@ func (v *Valkey) GetTimeSeriesPoints(ctx context.Context, name string, begin, en durationMs = 1 } type bucket struct { - bucketMs int64 - lastTsMs int64 - lastValue float64 + bucketMs int64 + lastTsMs int64 + lastValue float64 } buckets := make(map[int64]*bucket) for _, tv := range tsValues { @@ -844,6 +878,7 @@ func parseStringSlice(result any) []string { } // parseFTSearchTotal extracts the total count from an FT.SEARCH result. +// Valkey Glide returns: [int64_total, map[string]interface{}{docKey: map[string]interface{}{fields...}, ...}] func parseFTSearchTotal(result any) int { if result == nil { return 0 @@ -863,7 +898,7 @@ func parseFTSearchTotal(result any) int { } // parseFTSearchKeys extracts document keys from an FT.SEARCH result. -// FT.SEARCH returns: [total, key1, [field, value, ...], key2, [field, value, ...], ...] +// Valkey Glide returns: [int64_total, map[string]interface{}{docKey: fieldsMap, ...}] func parseFTSearchKeys(result any) []string { if result == nil { return nil @@ -872,8 +907,16 @@ func parseFTSearchKeys(result any) []string { if !ok || len(arr) < 2 { return nil } + // Glide format: arr[1] is a map[string]interface{} where keys are doc keys. + if docMap, ok := arr[1].(map[string]any); ok { + keys := make([]string, 0, len(docMap)) + for key := range docMap { + keys = append(keys, key) + } + return keys + } + // Fallback: flat array format [total, key1, [fields...], key2, [fields...], ...] var keys []string - // Skip index 0 (total), then every other element starting at 1 is a key. for i := 1; i < len(arr); i += 2 { if key, ok := arr[i].(string); ok { keys = append(keys, key) @@ -883,7 +926,7 @@ func parseFTSearchKeys(result any) []string { } // parseFTSearchResult parses an FT.SEARCH result into Score documents. -// FT.SEARCH returns: [total, key1, [field, value, ...], key2, [field, value, ...], ...] +// Valkey Glide returns: [int64_total, map[string]interface{}{docKey: map[string]interface{}{fields...}, ...}] func parseFTSearchResult(result any) ([]Score, error) { if result == nil { return nil, nil @@ -892,6 +935,14 @@ func parseFTSearchResult(result any) ([]Score, error) { if !ok || len(arr) < 2 { return nil, nil } + + // Glide format: arr[1] is a map[string]interface{} where keys are doc keys + // and values are maps of field name → field value. + if docMap, ok := arr[1].(map[string]any); ok { + return parseFTSearchResultFromMap(docMap) + } + + // Fallback: flat array format [total, key1, [fields...], key2, [fields...], ...] documents := make([]Score, 0) for i := 1; i < len(arr); i += 2 { if i+1 >= len(arr) { @@ -901,33 +952,65 @@ func parseFTSearchResult(result any) ([]Score, error) { if fields == nil { continue } - var doc Score - doc.Id = fields["id"] - score, err := strconv.ParseFloat(fields["score"], 64) + doc, err := scoreFromFieldMap(fields) if err != nil { - return nil, errors.Trace(err) - } - doc.Score = score - isHidden, err := strconv.ParseInt(fields["is_hidden"], 10, 64) - if err != nil { - return nil, errors.Trace(err) + return nil, err } - doc.IsHidden = isHidden != 0 - categories, err := decodeCategories(fields["categories"]) - if err != nil { - return nil, errors.Trace(err) + documents = append(documents, doc) + } + return documents, nil +} + +// parseFTSearchResultFromMap parses the Glide map format into Score documents. +// The map has doc keys as keys and field maps as values. +// We need to sort by score DESC to match the SORTBY in the query. +func parseFTSearchResultFromMap(docMap map[string]any) ([]Score, error) { + documents := make([]Score, 0, len(docMap)) + for _, docValue := range docMap { + fields := parseFieldMap(docValue) + if fields == nil { + continue } - doc.Categories = categories - timestamp, err := strconv.ParseInt(fields["timestamp"], 10, 64) + doc, err := scoreFromFieldMap(fields) if err != nil { - return nil, errors.Trace(err) + return nil, err } - doc.Timestamp = time.UnixMicro(timestamp).In(time.UTC) documents = append(documents, doc) } + // Sort by score descending to match the FT.SEARCH SORTBY score DESC. + sort.Slice(documents, func(i, j int) bool { + return documents[i].Score > documents[j].Score + }) return documents, nil } +// scoreFromFieldMap converts a field map into a Score struct. +func scoreFromFieldMap(fields map[string]string) (Score, error) { + var doc Score + doc.Id = fields["id"] + score, err := strconv.ParseFloat(fields["score"], 64) + if err != nil { + return doc, errors.Trace(err) + } + doc.Score = score + isHidden, err := strconv.ParseInt(fields["is_hidden"], 10, 64) + if err != nil { + return doc, errors.Trace(err) + } + doc.IsHidden = isHidden != 0 + categories, err := decodeCategories(fields["categories"]) + if err != nil { + return doc, errors.Trace(err) + } + doc.Categories = categories + timestamp, err := strconv.ParseInt(fields["timestamp"], 10, 64) + if err != nil { + return doc, errors.Trace(err) + } + doc.Timestamp = time.UnixMicro(timestamp).In(time.UTC) + return doc, nil +} + // parseFieldMap converts a field array from FT.SEARCH into a map. // The field array is [field1, value1, field2, value2, ...] or a map. func parseFieldMap(v any) map[string]string { @@ -948,9 +1031,7 @@ func parseFieldMap(v any) map[string]string { case map[string]any: m := make(map[string]string) for k, val := range fields { - if s, ok := val.(string); ok { - m[k] = s - } + m[k] = fmt.Sprint(val) } return m case map[string]string: From 973737dfd8d7f6840c41c449c6aa419ab5db8044 Mon Sep 17 00:00:00 2001 From: Daria Korenieva Date: Wed, 29 Apr 2026 12:42:37 -0700 Subject: [PATCH 04/19] benchmark tests Signed-off-by: Daria Korenieva --- VALKEY_INTEGRATION_REPORT.md | 113 +++++++++++++++++++++++++ storage/cache/database_test.go | 149 +++++++++++++++++++++++++++++++++ 2 files changed, 262 insertions(+) diff --git a/VALKEY_INTEGRATION_REPORT.md b/VALKEY_INTEGRATION_REPORT.md index d8e9e6b17..311b83c5b 100644 --- a/VALKEY_INTEGRATION_REPORT.md +++ b/VALKEY_INTEGRATION_REPORT.md @@ -290,3 +290,116 @@ client, err := glide.NewClusterClient(&config.ClusterClientConfiguration{ - Tasks 4–7 (core implementation): 17h — ~2 days - Task 8 (tests): 4h — half day - Tasks 9–10 (benchmarks + PR): 5h — ~1 day + +--- + +## 7. Performance Benchmark Plan — Time Series: Valkey vs Redis + +### 7.1 Goal + +Compare the performance of Valkey's time series implementation (sorted set + hash with Go-side aggregation) against Redis TimeSeries (native TS.ADD / TS.RANGE with server-side aggregation). The benchmarks should answer: is the Valkey approach fast enough for Gorse's workload, and where are the gaps? + +### 7.2 What We're Comparing + +| | Redis | Valkey | +|---|---|---| +| **Ingestion** | `TS.ADD` (single native command) | `ZADD` + `HSET` (two commands, pipelined) | +| **Range query** | `TS.RANGE` with server-side `LAST` aggregation | `ZRANGEBYSCORE` + `HMGET` + Go-side bucketing | +| **Duplicate handling** | `DUPLICATE_POLICY LAST` (server-side) | `ZADD` overwrites score, `HSET` overwrites field (natural last-write-wins) | + +### 7.3 Benchmark Scenarios + +All benchmarks use Go's `testing.B` framework and run against both Redis (port 6379) and Valkey (port 6380) from the same test file. Each scenario is a separate `b.Run` sub-benchmark. + +**Scenario 1: Ingestion throughput — single point** +- Insert one point per iteration (`b.N` iterations) +- Measures: ops/sec for single-point writes +- Simulates real-time metric ingestion (one metric update at a time) + +**Scenario 2: Ingestion throughput — batch** +- Insert 100 points per iteration in a single call +- Measures: ops/sec for batch writes, points/sec throughput +- Simulates bulk metric loading (e.g., after model training completes) + +**Scenario 3: Range query — small range, no aggregation** +- Pre-load 10,000 points (1 point per second over ~2.7 hours) +- Query a 60-second window with 1-second bucket duration (returns ~60 points, no aggregation needed) +- Measures: query latency when bucket size equals point interval + +**Scenario 4: Range query — large range with aggregation** +- Pre-load 10,000 points (1 point per second) +- Query the full range with 60-second bucket duration (aggregates ~10,000 points into ~167 buckets) +- Measures: query latency with heavy aggregation — this is where Go-side vs server-side aggregation matters most + +**Scenario 5: Range query — wide range, coarse aggregation** +- Pre-load 100,000 points (1 point per second over ~27 hours) +- Query the full range with 3600-second (1 hour) bucket duration (aggregates into ~28 buckets) +- Measures: worst-case scenario — large data transfer + aggregation + +### 7.4 Implementation + +Add time series benchmarks to `storage/cache/database_test.go` as shared functions (like the existing `benchmark()` pattern), then call them from both `redis_test.go` and `valkey_test.go`. + +**New shared functions in `database_test.go`:** +``` +benchmarkTimeSeriesIngestSingle(b, database) +benchmarkTimeSeriesIngestBatch(b, database) +benchmarkTimeSeriesQuerySmallRange(b, database) +benchmarkTimeSeriesQueryLargeRange(b, database) +benchmarkTimeSeriesQueryWideRange(b, database) +``` + +**Wired into existing benchmark functions in `redis_test.go` and `valkey_test.go`:** +```go +// In BenchmarkRedis / BenchmarkValkey: +b.Run("TSIngestSingle", func(b *testing.B) { benchmarkTimeSeriesIngestSingle(b, database) }) +b.Run("TSIngestBatch", func(b *testing.B) { benchmarkTimeSeriesIngestBatch(b, database) }) +b.Run("TSQuerySmallRange", func(b *testing.B) { benchmarkTimeSeriesQuerySmallRange(b, database) }) +b.Run("TSQueryLargeRange", func(b *testing.B) { benchmarkTimeSeriesQueryLargeRange(b, database) }) +b.Run("TSQueryWideRange", func(b *testing.B) { benchmarkTimeSeriesQueryWideRange(b, database) }) +``` + +### 7.5 How to Run + +```bash +# Run both side by side +go test -v ./storage/cache/ -run ^$ -bench "BenchmarkRedis/TS|BenchmarkValkey/TS" -benchtime 10s -timeout 10m + +# Redis only +go test -v ./storage/cache/ -run ^$ -bench "BenchmarkRedis/TS" -benchtime 10s -timeout 10m + +# Valkey only +go test -v ./storage/cache/ -run ^$ -bench "BenchmarkValkey/TS" -benchtime 10s -timeout 10m +``` + +### 7.6 Expected Outcomes + +- **Ingestion**: Valkey will likely be slightly slower due to 2 commands (ZADD + HSET) vs 1 (TS.ADD), but pipelining should keep the gap small. +- **Small range queries**: Should be comparable — both fetch a small number of points, minimal aggregation. +- **Large range queries with aggregation**: Redis TimeSeries will likely be faster since aggregation happens server-side. Valkey must transfer all raw points to the client before aggregating. This is the key gap to quantify. +- **Gorse context**: Gorse uses time series for monitoring metrics (NDCG, precision, recall, feedback ratios) — typically low-volume data with infrequent queries. Even a 2-5x gap on large aggregation queries is acceptable for this workload. + +### 7.7 Documenting Results + +After running, capture output in a results table: + +``` +| Benchmark | Redis (ns/op) | Valkey (ns/op) | Ratio | +|---|---|---|---| +| TSIngestSingle | | | | +| TSIngestBatch | | | | +| TSQuerySmallRange | | | | +| TSQueryLargeRange | | | | +| TSQueryWideRange | | | | +``` + +If any scenario shows >10x degradation, document the root cause and note Approach B (Lua script server-side aggregation) as the optimization path. + +### 7.8 After Running Benchmarks + +1. Paste the benchmark output into the results table above +2. Commit the benchmark code and results to the feature branch +3. Proceed to Task 10 — PR preparation: + - Squash or clean up commits if needed + - Write PR description referencing the ticket + - Submit PR to `gorse-io/gorse` diff --git a/storage/cache/database_test.go b/storage/cache/database_test.go index 65c67c5d6..ded932bf9 100644 --- a/storage/cache/database_test.go +++ b/storage/cache/database_test.go @@ -601,6 +601,21 @@ func benchmark(b *testing.B, database Database) { b.Run("UpdateScores", func(b *testing.B) { benchmarkUpdateDocuments(b, database) }) + b.Run("TSIngestSingle", func(b *testing.B) { + benchmarkTimeSeriesIngestSingle(b, database) + }) + b.Run("TSIngestBatch", func(b *testing.B) { + benchmarkTimeSeriesIngestBatch(b, database) + }) + b.Run("TSQuerySmallRange", func(b *testing.B) { + benchmarkTimeSeriesQuerySmallRange(b, database) + }) + b.Run("TSQueryLargeRange", func(b *testing.B) { + benchmarkTimeSeriesQueryLargeRange(b, database) + }) + b.Run("TSQueryWideRange", func(b *testing.B) { + benchmarkTimeSeriesQueryWideRange(b, database) + }) } func benchmarkAddDocuments(b *testing.B, database Database) { @@ -658,3 +673,137 @@ func benchmarkUpdateDocuments(b *testing.B, database Database) { assert.NoError(b, err) } } + +// benchmarkTimeSeriesIngestSingle measures single-point ingestion throughput. +func benchmarkTimeSeriesIngestSingle(b *testing.B, database Database) { + ctx := b.Context() + base := time.Date(2023, 1, 1, 0, 0, 0, 0, time.UTC) + b.ResetTimer() + for i := 0; i < b.N; i++ { + err := database.AddTimeSeriesPoints(ctx, []TimeSeriesPoint{ + {Name: "bench_single", Value: float64(i), Timestamp: base.Add(time.Duration(i) * time.Second)}, + }) + assert.NoError(b, err) + } +} + +// benchmarkTimeSeriesIngestBatch measures batch ingestion throughput (100 points per call). +func benchmarkTimeSeriesIngestBatch(b *testing.B, database Database) { + ctx := b.Context() + base := time.Date(2023, 1, 1, 0, 0, 0, 0, time.UTC) + const batchSize = 100 + b.ResetTimer() + for i := 0; i < b.N; i++ { + points := make([]TimeSeriesPoint, batchSize) + for j := range batchSize { + points[j] = TimeSeriesPoint{ + Name: "bench_batch", + Value: float64(i*batchSize + j), + Timestamp: base.Add(time.Duration(i*batchSize+j) * time.Second), + } + } + err := database.AddTimeSeriesPoints(ctx, points) + assert.NoError(b, err) + } +} + +// benchmarkTimeSeriesQuerySmallRange measures query latency for a small time range (60s window, 1s buckets). +// Pre-loads 10,000 points at 1-second intervals. +func benchmarkTimeSeriesQuerySmallRange(b *testing.B, database Database) { + ctx := b.Context() + base := time.Date(2023, 1, 1, 0, 0, 0, 0, time.UTC) + const totalPoints = 10000 + // pre-load data in batches of 500 + for offset := 0; offset < totalPoints; offset += 500 { + end := offset + 500 + if end > totalPoints { + end = totalPoints + } + points := make([]TimeSeriesPoint, 0, end-offset) + for i := offset; i < end; i++ { + points = append(points, TimeSeriesPoint{ + Name: "bench_query_small", + Value: float64(i), + Timestamp: base.Add(time.Duration(i) * time.Second), + }) + } + err := database.AddTimeSeriesPoints(ctx, points) + assert.NoError(b, err) + } + // query a 60-second window in the middle, 1-second buckets (~60 points returned) + queryBegin := base.Add(5000 * time.Second) + queryEnd := base.Add(5060 * time.Second) + b.ResetTimer() + for i := 0; i < b.N; i++ { + points, err := database.GetTimeSeriesPoints(ctx, "bench_query_small", queryBegin, queryEnd, time.Second) + assert.NoError(b, err) + assert.NotEmpty(b, points) + } +} + +// benchmarkTimeSeriesQueryLargeRange measures query latency with heavy aggregation. +// Pre-loads 10,000 points at 1-second intervals, queries full range with 60-second buckets (~167 buckets). +func benchmarkTimeSeriesQueryLargeRange(b *testing.B, database Database) { + ctx := b.Context() + base := time.Date(2023, 1, 1, 0, 0, 0, 0, time.UTC) + const totalPoints = 10000 + // pre-load data in batches of 500 + for offset := 0; offset < totalPoints; offset += 500 { + end := offset + 500 + if end > totalPoints { + end = totalPoints + } + points := make([]TimeSeriesPoint, 0, end-offset) + for i := offset; i < end; i++ { + points = append(points, TimeSeriesPoint{ + Name: "bench_query_large", + Value: float64(i), + Timestamp: base.Add(time.Duration(i) * time.Second), + }) + } + err := database.AddTimeSeriesPoints(ctx, points) + assert.NoError(b, err) + } + // query full range with 60-second buckets + queryBegin := base + queryEnd := base.Add(time.Duration(totalPoints) * time.Second) + b.ResetTimer() + for i := 0; i < b.N; i++ { + points, err := database.GetTimeSeriesPoints(ctx, "bench_query_large", queryBegin, queryEnd, 60*time.Second) + assert.NoError(b, err) + assert.NotEmpty(b, points) + } +} + +// benchmarkTimeSeriesQueryWideRange measures worst-case query: 100K points aggregated into ~28 hourly buckets. +func benchmarkTimeSeriesQueryWideRange(b *testing.B, database Database) { + ctx := b.Context() + base := time.Date(2023, 1, 1, 0, 0, 0, 0, time.UTC) + const totalPoints = 100000 + // pre-load data in batches of 1000 + for offset := 0; offset < totalPoints; offset += 1000 { + end := offset + 1000 + if end > totalPoints { + end = totalPoints + } + points := make([]TimeSeriesPoint, 0, end-offset) + for i := offset; i < end; i++ { + points = append(points, TimeSeriesPoint{ + Name: "bench_query_wide", + Value: float64(i), + Timestamp: base.Add(time.Duration(i) * time.Second), + }) + } + err := database.AddTimeSeriesPoints(ctx, points) + assert.NoError(b, err) + } + // query full range with 1-hour buckets + queryBegin := base + queryEnd := base.Add(time.Duration(totalPoints) * time.Second) + b.ResetTimer() + for i := 0; i < b.N; i++ { + points, err := database.GetTimeSeriesPoints(ctx, "bench_query_wide", queryBegin, queryEnd, time.Hour) + assert.NoError(b, err) + assert.NotEmpty(b, points) + } +} From 9f1dd97b9791483a1085ff5d8e4877af72f878c6 Mon Sep 17 00:00:00 2001 From: Daria Korenieva Date: Wed, 29 Apr 2026 13:46:35 -0700 Subject: [PATCH 05/19] updated report Signed-off-by: Daria Korenieva --- VALKEY_INTEGRATION_REPORT.md | 145 ++++++++++++++++------------------- 1 file changed, 65 insertions(+), 80 deletions(-) diff --git a/VALKEY_INTEGRATION_REPORT.md b/VALKEY_INTEGRATION_REPORT.md index 311b83c5b..acad31c5d 100644 --- a/VALKEY_INTEGRATION_REPORT.md +++ b/VALKEY_INTEGRATION_REPORT.md @@ -281,7 +281,7 @@ client, err := glide.NewClusterClient(&config.ClusterClientConfiguration{ | 6 | Implement Valkey cache backend — score/document operations | Implement `AddScores()`, `SearchScores()`, `UpdateScores()`, `DeleteScores()`, `ScanScores()` using valkey-search FT.* commands via CustomCommand. | 6h | ✅ Done | | 7 | Implement Valkey cache backend — time series operations | Implement `AddTimeSeriesPoints()` using sorted set + hash with pipeline batching. Implement `GetTimeSeriesPoints()` with ZRANGEBYSCORE range fetch and Go-side bucket aggregation (Approach A). | 4h | ✅ Done | | 8 | Create Valkey cache test suite | Create `storage/cache/valkey_test.go`. Embed `baseTestSuite` and run all existing cache interface tests against a Valkey instance. Use `VALKEY_URI` environment variable. | 4h | ✅ Done | -| 9 | Performance benchmarks — time series | Create benchmarks comparing Valkey time series (sorted set + Go aggregation) vs Redis TimeSeries for ingestion throughput, range query latency, and aggregation performance. Document results and gaps. | 3h | To Do | +| 9 | Performance benchmarks — time series | Create benchmarks comparing Valkey time series (sorted set + Go aggregation) vs Redis TimeSeries for ingestion throughput, range query latency, and aggregation performance. Document results and gaps. | 3h | ✅ Done | | 10 | Documentation and PR preparation | Update README or contributing docs if needed. Final code review, cleanup, and PR submission to gorse-io/gorse. | 2h | To Do | **Total estimate: ~33 hours (~4 working days)** @@ -293,113 +293,98 @@ client, err := glide.NewClusterClient(&config.ClusterClientConfiguration{ --- -## 7. Performance Benchmark Plan — Time Series: Valkey vs Redis +## 7. Performance Benchmark Results — Time Series: Valkey vs Redis -### 7.1 Goal +### 7.1 Test Environment -Compare the performance of Valkey's time series implementation (sorted set + hash with Go-side aggregation) against Redis TimeSeries (native TS.ADD / TS.RANGE with server-side aggregation). The benchmarks should answer: is the Valkey approach fast enough for Gorse's workload, and where are the gaps? +- **CPU**: Intel Core i7-9750H @ 2.60GHz (12 threads) +- **OS**: macOS (darwin/amd64) +- **Go**: 1.26.1 +- **Redis**: redis/redis-stack:latest (port 6379) — uses Redis TimeSeries module +- **Valkey**: valkey/valkey-bundle:unstable (port 6380) — uses sorted set + hash with Go-side aggregation +- **Benchmark tool**: Go `testing.B` with `-benchtime 10s` +- **Runs**: 3 independent runs per backend, plus 1 side-by-side run -### 7.2 What We're Comparing +### 7.2 What Was Compared | | Redis | Valkey | |---|---|---| -| **Ingestion** | `TS.ADD` (single native command) | `ZADD` + `HSET` (two commands, pipelined) | +| **Ingestion** | `TS.ADD` (single native command, pipelined) | `ZADD` + `HSET` (two commands, pipelined) | | **Range query** | `TS.RANGE` with server-side `LAST` aggregation | `ZRANGEBYSCORE` + `HMGET` + Go-side bucketing | | **Duplicate handling** | `DUPLICATE_POLICY LAST` (server-side) | `ZADD` overwrites score, `HSET` overwrites field (natural last-write-wins) | -### 7.3 Benchmark Scenarios +### 7.3 Results (side-by-side run) -All benchmarks use Go's `testing.B` framework and run against both Redis (port 6379) and Valkey (port 6380) from the same test file. Each scenario is a separate `b.Run` sub-benchmark. +| Benchmark | Description | Redis (ns/op) | Valkey (ns/op) | Ratio | +|---|---|---|---|---| +| **TSIngestSingle** | 1 point per call | 361,638 | 367,864 | **1.02x** | +| **TSIngestBatch** | 100 points per call | 3,827,686 | 1,236,382 | **0.32x** ✅ | +| **TSQuerySmallRange** | 60s window, 1s buckets, 10K pts loaded | 399,321 | 927,515 | **2.3x** | +| **TSQueryLargeRange** | Full range, 60s buckets, 10K pts loaded | 531,244 | 29,022,625 | **54.6x** | +| **TSQueryWideRange** | Full range, 1h buckets, 100K pts loaded | 1,346,747 | 332,029,964 | **246.6x** | -**Scenario 1: Ingestion throughput — single point** -- Insert one point per iteration (`b.N` iterations) -- Measures: ops/sec for single-point writes -- Simulates real-time metric ingestion (one metric update at a time) +### 7.4 Results (averaged across 3 independent runs) -**Scenario 2: Ingestion throughput — batch** -- Insert 100 points per iteration in a single call -- Measures: ops/sec for batch writes, points/sec throughput -- Simulates bulk metric loading (e.g., after model training completes) +| Benchmark | Redis avg (ns/op) | Valkey avg (ns/op) | Ratio | +|---|---|---|---| +| **TSIngestSingle** | 361,138 | 381,977 | **1.06x** | +| **TSIngestBatch** | 3,796,799 | 1,214,417 | **0.32x** ✅ | +| **TSQuerySmallRange** | 394,036 | 910,520 | **2.3x** | +| **TSQueryLargeRange** | 512,476 | 28,634,833 | **55.9x** | +| **TSQueryWideRange** | 1,351,768 | 338,763,674 | **250.7x** | -**Scenario 3: Range query — small range, no aggregation** -- Pre-load 10,000 points (1 point per second over ~2.7 hours) -- Query a 60-second window with 1-second bucket duration (returns ~60 points, no aggregation needed) -- Measures: query latency when bucket size equals point interval +### 7.5 Analysis -**Scenario 4: Range query — large range with aggregation** -- Pre-load 10,000 points (1 point per second) -- Query the full range with 60-second bucket duration (aggregates ~10,000 points into ~167 buckets) -- Measures: query latency with heavy aggregation — this is where Go-side vs server-side aggregation matters most +**Ingestion (single point)** — Effectively identical (1.02–1.06x). The overhead of two commands (ZADD + HSET) vs one (TS.ADD) is negligible at the single-operation level due to network round-trip dominating. -**Scenario 5: Range query — wide range, coarse aggregation** -- Pre-load 100,000 points (1 point per second over ~27 hours) -- Query the full range with 3600-second (1 hour) bucket duration (aggregates into ~28 buckets) -- Measures: worst-case scenario — large data transfer + aggregation +**Batch ingestion** — Valkey is 3.1x faster. The Valkey pipeline batches ZADD + HSET pairs efficiently, while Redis TimeSeries TS.ADD has higher per-command overhead in pipeline mode. This is a genuine advantage for bulk loading scenarios. -### 7.4 Implementation +**Small range queries (60s window)** — 2.3x slower. Valkey fetches ~60 raw points via ZRANGEBYSCORE + HMGET and aggregates in Go. Redis returns pre-aggregated results server-side. The absolute latency (~0.9ms) is well within acceptable bounds for monitoring queries. -Add time series benchmarks to `storage/cache/database_test.go` as shared functions (like the existing `benchmark()` pattern), then call them from both `redis_test.go` and `valkey_test.go`. +**Large range queries (10K points, 60s buckets)** — 55x slower. This is where the architectural difference shows: Valkey must transfer all 10,000 raw points to the client, then bucket-aggregate in Go. Redis aggregates server-side and returns only ~167 bucket results. Absolute latency: ~29ms vs ~0.5ms. -**New shared functions in `database_test.go`:** -``` -benchmarkTimeSeriesIngestSingle(b, database) -benchmarkTimeSeriesIngestBatch(b, database) -benchmarkTimeSeriesQuerySmallRange(b, database) -benchmarkTimeSeriesQueryLargeRange(b, database) -benchmarkTimeSeriesQueryWideRange(b, database) -``` +**Wide range queries (100K points, 1h buckets)** — 247x slower. The worst case: 100,000 points transferred over the network before Go-side aggregation into ~28 buckets. Absolute latency: ~332ms vs ~1.3ms. -**Wired into existing benchmark functions in `redis_test.go` and `valkey_test.go`:** -```go -// In BenchmarkRedis / BenchmarkValkey: -b.Run("TSIngestSingle", func(b *testing.B) { benchmarkTimeSeriesIngestSingle(b, database) }) -b.Run("TSIngestBatch", func(b *testing.B) { benchmarkTimeSeriesIngestBatch(b, database) }) -b.Run("TSQuerySmallRange", func(b *testing.B) { benchmarkTimeSeriesQuerySmallRange(b, database) }) -b.Run("TSQueryLargeRange", func(b *testing.B) { benchmarkTimeSeriesQueryLargeRange(b, database) }) -b.Run("TSQueryWideRange", func(b *testing.B) { benchmarkTimeSeriesQueryWideRange(b, database) }) -``` +### 7.6 Root Cause of Query Gaps -### 7.5 How to Run +The performance gap in range queries scales linearly with the number of raw points in the range. The bottleneck is **data transfer**, not computation: -```bash -# Run both side by side -go test -v ./storage/cache/ -run ^$ -bench "BenchmarkRedis/TS|BenchmarkValkey/TS" -benchtime 10s -timeout 10m +1. Redis TimeSeries aggregates server-side → returns only bucket results (28–167 values) +2. Valkey transfers all raw points → client does aggregation (10K–100K values over the network) -# Redis only -go test -v ./storage/cache/ -run ^$ -bench "BenchmarkRedis/TS" -benchtime 10s -timeout 10m +The Go-side bucketing itself is fast (simple map + sort). The cost is dominated by `ZRANGEBYSCORE` returning all members + `HMGET` fetching all values. -# Valkey only -go test -v ./storage/cache/ -run ^$ -bench "BenchmarkValkey/TS" -benchtime 10s -timeout 10m -``` +### 7.7 Impact on Gorse -### 7.6 Expected Outcomes +Gorse uses time series for **monitoring metrics** (NDCG, precision, recall, feedback ratios, task durations). These are characterized by: -- **Ingestion**: Valkey will likely be slightly slower due to 2 commands (ZADD + HSET) vs 1 (TS.ADD), but pipelining should keep the gap small. -- **Small range queries**: Should be comparable — both fetch a small number of points, minimal aggregation. -- **Large range queries with aggregation**: Redis TimeSeries will likely be faster since aggregation happens server-side. Valkey must transfer all raw points to the client before aggregating. This is the key gap to quantify. -- **Gorse context**: Gorse uses time series for monitoring metrics (NDCG, precision, recall, feedback ratios) — typically low-volume data with infrequent queries. Even a 2-5x gap on large aggregation queries is acceptable for this workload. +- **Low volume**: Metrics are recorded per training cycle or per evaluation, not per-second. A typical deployment might have hundreds to low thousands of points per metric, not 100K. +- **Infrequent queries**: Dashboard queries happen on user interaction, not in hot paths. +- **Moderate ranges**: Most dashboard views show hours to days of data, not months. -### 7.7 Documenting Results +For Gorse's workload profile: +- Single-point ingestion and batch ingestion are **on par or better** than Redis. +- Small range queries (~0.9ms) are **fast enough** for dashboard rendering. +- Large range queries (~29ms) are **acceptable** for occasional dashboard use. +- Wide range queries (~332ms) would only occur with unusually large datasets and are still **sub-second**. -After running, capture output in a results table: +### 7.8 Optimization Path -``` -| Benchmark | Redis (ns/op) | Valkey (ns/op) | Ratio | -|---|---|---|---| -| TSIngestSingle | | | | -| TSIngestBatch | | | | -| TSQuerySmallRange | | | | -| TSQueryLargeRange | | | | -| TSQueryWideRange | | | | -``` +If future workloads require faster large-range aggregation, **Approach B (Lua script server-side aggregation)** can be implemented as a drop-in replacement for `GetTimeSeriesPoints`. The Lua script would execute ZRANGEBYSCORE + HGET + bucketing entirely on the Valkey server, eliminating the data transfer bottleneck. This was documented in Section 2.3 of this report. -If any scenario shows >10x degradation, document the root cause and note Approach B (Lua script server-side aggregation) as the optimization path. +### 7.9 How to Reproduce -### 7.8 After Running Benchmarks +```bash +# Prerequisites: Redis Stack on port 6379, Valkey on port 6380 +docker run -d --name redis-stack -p 6379:6379 redis/redis-stack:latest +docker run -d --name valkey-search -p 6380:6379 valkey/valkey-bundle:unstable + +# Run side-by-side +VALKEY_URI="valkey://127.0.0.1:6380/" go test -v ./storage/cache/ \ + -run ^$ -bench "BenchmarkRedis/TS|BenchmarkValkey/TS" -benchtime 10s -timeout 10m -1. Paste the benchmark output into the results table above -2. Commit the benchmark code and results to the feature branch -3. Proceed to Task 10 — PR preparation: - - Squash or clean up commits if needed - - Write PR description referencing the ticket - - Submit PR to `gorse-io/gorse` +# Run individually +go test -v ./storage/cache/ -run ^$ -bench "BenchmarkRedis/TS" -benchtime 10s -timeout 10m +VALKEY_URI="valkey://127.0.0.1:6380/" go test -v ./storage/cache/ \ + -run ^$ -bench "BenchmarkValkey/TS" -benchtime 10s -timeout 10m +``` From bc7c89b286755d53c1794de5780669ba2461eb38 Mon Sep 17 00:00:00 2001 From: Daria Korenieva Date: Mon, 4 May 2026 17:25:32 -0700 Subject: [PATCH 06/19] fix: valkey cache - pipeline Set, fix ScanScores pattern, batch cluster ops, handle username in URL Signed-off-by: Daria Korenieva --- storage/cache/valkey.go | 106 ++++++++++++++++++++--------------- storage/cache/valkey_test.go | 67 ++++++++++++++++++++++ 2 files changed, 129 insertions(+), 44 deletions(-) diff --git a/storage/cache/valkey.go b/storage/cache/valkey.go index f18c75b37..bad051d2b 100644 --- a/storage/cache/valkey.go +++ b/storage/cache/valkey.go @@ -38,14 +38,18 @@ import ( func init() { Register([]string{storage.ValkeyPrefix, storage.ValkeysPrefix}, func(path, tablePrefix string, opts ...storage.Option) (Database, error) { - addr, password, db, useTLS, err := parseValkeyURL(path) + addr, username, password, db, useTLS, err := parseValkeyURL(path) if err != nil { return nil, errors.Trace(err) } cfg := glideconfig.NewClientConfiguration(). WithAddress(&addr) if password != "" { - cfg.WithCredentials(glideconfig.NewServerCredentialsWithDefaultUsername(password)) + if username != "" { + cfg.WithCredentials(glideconfig.NewServerCredentials(username, password)) + } else { + cfg.WithCredentials(glideconfig.NewServerCredentialsWithDefaultUsername(password)) + } } if db > 0 { cfg.WithDatabaseId(db) @@ -66,7 +70,7 @@ func init() { return database, nil }) Register([]string{storage.ValkeyClusterPrefix, storage.ValkeysClusterPrefix}, func(path, tablePrefix string, opts ...storage.Option) (Database, error) { - addresses, password, useTLS, err := parseValkeyClusterURL(path) + addresses, username, password, useTLS, err := parseValkeyClusterURL(path) if err != nil { return nil, errors.Trace(err) } @@ -75,7 +79,11 @@ func init() { cfg.WithAddress(&addresses[i]) } if password != "" { - cfg.WithCredentials(glideconfig.NewServerCredentialsWithDefaultUsername(password)) + if username != "" { + cfg.WithCredentials(glideconfig.NewServerCredentials(username, password)) + } else { + cfg.WithCredentials(glideconfig.NewServerCredentialsWithDefaultUsername(password)) + } } if useTLS { cfg.WithUseTLS(true) @@ -95,10 +103,10 @@ func init() { } // parseValkeyURL parses a valkey:// or valkeys:// URL into connection parameters. -func parseValkeyURL(rawURL string) (addr glideconfig.NodeAddress, password string, db int, useTLS bool, err error) { +func parseValkeyURL(rawURL string) (addr glideconfig.NodeAddress, username, password string, db int, useTLS bool, err error) { parsed, err := url.Parse(rawURL) if err != nil { - return addr, "", 0, false, errors.Trace(err) + return addr, "", "", 0, false, errors.Trace(err) } host := parsed.Hostname() if host == "" { @@ -108,11 +116,12 @@ func parseValkeyURL(rawURL string) (addr glideconfig.NodeAddress, password strin if parsed.Port() != "" { port, err = strconv.Atoi(parsed.Port()) if err != nil { - return addr, "", 0, false, errors.Trace(err) + return addr, "", "", 0, false, errors.Trace(err) } } addr = glideconfig.NodeAddress{Host: host, Port: port} if parsed.User != nil { + username = parsed.User.Username() password, _ = parsed.User.Password() } if parsed.Path != "" && parsed.Path != "/" { @@ -120,16 +129,16 @@ func parseValkeyURL(rawURL string) (addr glideconfig.NodeAddress, password strin if dbStr != "" { db, err = strconv.Atoi(dbStr) if err != nil { - return addr, "", 0, false, errors.Errorf("invalid database number: %s", dbStr) + return addr, "", "", 0, false, errors.Errorf("invalid database number: %s", dbStr) } } } useTLS = parsed.Scheme == "valkeys" - return addr, password, db, useTLS, nil + return addr, username, password, db, useTLS, nil } // parseValkeyClusterURL parses a valkey+cluster:// or valkeys+cluster:// URL. -func parseValkeyClusterURL(rawURL string) (addresses []glideconfig.NodeAddress, password string, useTLS bool, err error) { +func parseValkeyClusterURL(rawURL string) (addresses []glideconfig.NodeAddress, username, password string, useTLS bool, err error) { // Replace the cluster prefix with a standard scheme for URL parsing. var newURL string if strings.HasPrefix(rawURL, storage.ValkeyClusterPrefix) { @@ -141,7 +150,7 @@ func parseValkeyClusterURL(rawURL string) (addresses []glideconfig.NodeAddress, } parsed, err := url.Parse(newURL) if err != nil { - return nil, "", false, errors.Trace(err) + return nil, "", "", false, errors.Trace(err) } host := parsed.Hostname() if host == "" { @@ -151,11 +160,12 @@ func parseValkeyClusterURL(rawURL string) (addresses []glideconfig.NodeAddress, if parsed.Port() != "" { port, err = strconv.Atoi(parsed.Port()) if err != nil { - return nil, "", false, errors.Trace(err) + return nil, "", "", false, errors.Trace(err) } } addresses = append(addresses, glideconfig.NodeAddress{Host: host, Port: port}) if parsed.User != nil { + username = parsed.User.Username() password, _ = parsed.User.Password() } // Parse additional addresses from query params (addr=host:port). @@ -166,12 +176,12 @@ func parseValkeyClusterURL(rawURL string) (addresses []glideconfig.NodeAddress, if len(parts) == 2 { p, err = strconv.Atoi(parts[1]) if err != nil { - return nil, "", false, errors.Errorf("invalid port in addr param: %s", addrStr) + return nil, "", "", false, errors.Errorf("invalid port in addr param: %s", addrStr) } } addresses = append(addresses, glideconfig.NodeAddress{Host: h, Port: p}) } - return addresses, password, useTLS, nil + return addresses, username, password, useTLS, nil } // Valkey cache storage using valkey-glide client. @@ -328,18 +338,20 @@ func (v *Valkey) Purge() error { // Set stores values in Valkey. func (v *Valkey) Set(ctx context.Context, values ...Value) error { - for _, val := range values { - var err error - if v.isCluster { - _, err = v.clusterClient.Set(ctx, v.Key(val.name), val.value) - } else { - _, err = v.standaloneClient.Set(ctx, v.Key(val.name), val.value) - } - if err != nil { - return errors.Trace(err) + if v.isCluster { + for _, val := range values { + if _, err := v.clusterClient.Set(ctx, v.Key(val.name), val.value); err != nil { + return errors.Trace(err) + } } + return nil } - return nil + batch := pipeline.NewStandaloneBatch(false) + for _, val := range values { + batch.Set(v.Key(val.name), val.value) + } + _, err := v.standaloneClient.Exec(ctx, *batch, true) + return errors.Trace(err) } // Get returns a value from Valkey. @@ -587,24 +599,16 @@ func (v *Valkey) UpdateScores(ctx context.Context, collections []string, subset return errors.Trace(err) } } else { - // Use Watch for optimistic locking on standalone. - if _, err := v.standaloneClient.Watch(ctx, []string{key}); err != nil { - return errors.Trace(err) - } exists, err := v.standaloneClient.Exists(ctx, []string{key}) if err != nil { - v.standaloneClient.Unwatch(ctx) //nolint:errcheck return errors.Trace(err) } if exists == 0 { - v.standaloneClient.Unwatch(ctx) //nolint:errcheck continue } if _, err = v.standaloneClient.HSet(ctx, key, values); err != nil { - v.standaloneClient.Unwatch(ctx) //nolint:errcheck return errors.Trace(err) } - v.standaloneClient.Unwatch(ctx) //nolint:errcheck } } return nil @@ -642,10 +646,8 @@ func (v *Valkey) DeleteScores(ctx context.Context, collections []string, conditi break } if v.isCluster { - for _, key := range docKeys { - if _, err = v.clusterClient.Del(ctx, []string{key}); err != nil { - return errors.Trace(err) - } + if _, err = v.clusterClient.Del(ctx, docKeys); err != nil { + return errors.Trace(err) } } else { batch := pipeline.NewStandaloneBatch(false) @@ -667,7 +669,7 @@ func (v *Valkey) DeleteScores(ctx context.Context, collections []string, conditi func (v *Valkey) ScanScores(ctx context.Context, callback func(collection string, id string, subset string, timestamp time.Time) error) error { if v.isCluster { cursor := models.NewClusterScanCursor() - scanOpts := glideoptions.NewClusterScanOptions().SetMatch(v.DocumentTable() + "*") + scanOpts := glideoptions.NewClusterScanOptions().SetMatch(v.DocumentTable() + ":*") for !cursor.IsFinished() { result, err := v.clusterClient.ScanWithOptions(ctx, cursor, *scanOpts) if err != nil { @@ -691,7 +693,7 @@ func (v *Valkey) ScanScores(ctx context.Context, callback func(collection string return nil } cursor := models.NewCursor() - scanOpts := glideoptions.NewScanOptions().SetMatch(v.DocumentTable() + "*") + scanOpts := glideoptions.NewScanOptions().SetMatch(v.DocumentTable() + ":*") for { result, err := v.standaloneClient.ScanWithOptions(ctx, cursor, *scanOpts) if err != nil { @@ -722,17 +724,33 @@ func (v *Valkey) ScanScores(ctx context.Context, callback func(collection string // Hash key: {prefix}ts_data:{name} — field = timestamp_ms as string, value = float64 as string func (v *Valkey) AddTimeSeriesPoints(ctx context.Context, points []TimeSeriesPoint) error { if v.isCluster { + // Group points by name to batch ZADD and HSET per series. + type seriesData struct { + zaddMembers map[string]float64 + hsetFields map[string]string + } + grouped := make(map[string]*seriesData) for _, point := range points { tsMs := point.Timestamp.UnixMilli() tsMsStr := strconv.FormatInt(tsMs, 10) - indexKey := v.PointsTable() + ":ts_index:" + point.Name - dataKey := v.PointsTable() + ":ts_data:" + point.Name - if _, err := v.clusterClient.ZAdd(ctx, indexKey, map[string]float64{tsMsStr: float64(tsMs)}); err != nil { + sd, ok := grouped[point.Name] + if !ok { + sd = &seriesData{ + zaddMembers: make(map[string]float64), + hsetFields: make(map[string]string), + } + grouped[point.Name] = sd + } + sd.zaddMembers[tsMsStr] = float64(tsMs) + sd.hsetFields[tsMsStr] = strconv.FormatFloat(point.Value, 'g', -1, 64) + } + for name, sd := range grouped { + indexKey := v.PointsTable() + ":ts_index:" + name + dataKey := v.PointsTable() + ":ts_data:" + name + if _, err := v.clusterClient.ZAdd(ctx, indexKey, sd.zaddMembers); err != nil { return errors.Trace(err) } - if _, err := v.clusterClient.HSet(ctx, dataKey, map[string]string{ - tsMsStr: strconv.FormatFloat(point.Value, 'g', -1, 64), - }); err != nil { + if _, err := v.clusterClient.HSet(ctx, dataKey, sd.hsetFields); err != nil { return errors.Trace(err) } } diff --git a/storage/cache/valkey_test.go b/storage/cache/valkey_test.go index de836c85b..4f61f431e 100644 --- a/storage/cache/valkey_test.go +++ b/storage/cache/valkey_test.go @@ -209,6 +209,73 @@ func TestValkey(t *testing.T) { suite.Run(t, new(ValkeyTestSuite)) } +func TestParseValkeyURL(t *testing.T) { + // Basic URL + addr, username, password, db, useTLS, err := parseValkeyURL("valkey://127.0.0.1:6380/") + assert.NoError(t, err) + assert.Equal(t, "127.0.0.1", addr.Host) + assert.Equal(t, 6380, addr.Port) + assert.Equal(t, "", username) + assert.Equal(t, "", password) + assert.Equal(t, 0, db) + assert.False(t, useTLS) + + // URL with password only + addr, username, password, db, useTLS, err = parseValkeyURL("valkey://:secret@localhost:6379/2") + assert.NoError(t, err) + assert.Equal(t, "localhost", addr.Host) + assert.Equal(t, 6379, addr.Port) + assert.Equal(t, "", username) + assert.Equal(t, "secret", password) + assert.Equal(t, 2, db) + assert.False(t, useTLS) + + // URL with username and password + addr, username, password, db, useTLS, err = parseValkeyURL("valkey://myuser:mypass@host.example.com:6380/3") + assert.NoError(t, err) + assert.Equal(t, "host.example.com", addr.Host) + assert.Equal(t, 6380, addr.Port) + assert.Equal(t, "myuser", username) + assert.Equal(t, "mypass", password) + assert.Equal(t, 3, db) + assert.False(t, useTLS) + + // TLS URL + addr, _, _, _, useTLS, err = parseValkeyURL("valkeys://localhost:6379/0") + assert.NoError(t, err) + assert.True(t, useTLS) + + // Default port + addr, _, _, _, _, err = parseValkeyURL("valkey://localhost/") + assert.NoError(t, err) + assert.Equal(t, 6379, addr.Port) +} + +func TestParseValkeyClusterURL(t *testing.T) { + // Basic cluster URL + addresses, username, password, useTLS, err := parseValkeyClusterURL("valkey+cluster://:password@192.168.1.11:6379?addr=192.168.0.5:6379&addr=192.168.0.7:6379") + assert.NoError(t, err) + assert.Len(t, addresses, 3) + assert.Equal(t, "192.168.1.11", addresses[0].Host) + assert.Equal(t, 6379, addresses[0].Port) + assert.Equal(t, "192.168.0.5", addresses[1].Host) + assert.Equal(t, "192.168.0.7", addresses[2].Host) + assert.Equal(t, "", username) + assert.Equal(t, "password", password) + assert.False(t, useTLS) + + // Cluster URL with username + addresses, username, password, useTLS, err = parseValkeyClusterURL("valkeys+cluster://admin:secret@node1:6380?addr=node2:6380") + assert.NoError(t, err) + assert.Len(t, addresses, 2) + assert.Equal(t, "node1", addresses[0].Host) + assert.Equal(t, 6380, addresses[0].Port) + assert.Equal(t, "node2", addresses[1].Host) + assert.Equal(t, "admin", username) + assert.Equal(t, "secret", password) + assert.True(t, useTLS) +} + func BenchmarkValkey(b *testing.B) { log.CloseLogger() database, err := Open(valkeyDSN, "gorse_") From 282b2faeca87bd8094b9066b44f01f12dd85b78d Mon Sep 17 00:00:00 2001 From: Daria Korenieva Date: Tue, 5 May 2026 07:58:51 -0700 Subject: [PATCH 07/19] fixes Signed-off-by: Daria Korenieva --- storage/cache/valkey.go | 68 ++++++++++++++++++++++++++--------------- 1 file changed, 43 insertions(+), 25 deletions(-) diff --git a/storage/cache/valkey.go b/storage/cache/valkey.go index bad051d2b..123522c39 100644 --- a/storage/cache/valkey.go +++ b/storage/cache/valkey.go @@ -32,7 +32,6 @@ import ( glideconfig "github.com/valkey-io/valkey-glide/go/v2/config" "github.com/valkey-io/valkey-glide/go/v2/models" glideoptions "github.com/valkey-io/valkey-glide/go/v2/options" - "github.com/valkey-io/valkey-glide/go/v2/pipeline" "go.uber.org/zap" ) @@ -43,7 +42,8 @@ func init() { return nil, errors.Trace(err) } cfg := glideconfig.NewClientConfiguration(). - WithAddress(&addr) + WithAddress(&addr). + WithRequestTimeout(60 * time.Second) if password != "" { if username != "" { cfg.WithCredentials(glideconfig.NewServerCredentials(username, password)) @@ -78,6 +78,7 @@ func init() { for i := range addresses { cfg.WithAddress(&addresses[i]) } + cfg.WithRequestTimeout(60 * time.Second) if password != "" { if username != "" { cfg.WithCredentials(glideconfig.NewServerCredentials(username, password)) @@ -338,6 +339,9 @@ func (v *Valkey) Purge() error { // Set stores values in Valkey. func (v *Valkey) Set(ctx context.Context, values ...Value) error { + if len(values) == 0 { + return nil + } if v.isCluster { for _, val := range values { if _, err := v.clusterClient.Set(ctx, v.Key(val.name), val.value); err != nil { @@ -346,12 +350,12 @@ func (v *Valkey) Set(ctx context.Context, values ...Value) error { } return nil } - batch := pipeline.NewStandaloneBatch(false) for _, val := range values { - batch.Set(v.Key(val.name), val.value) + if _, err := v.standaloneClient.Set(ctx, v.Key(val.name), val.value); err != nil { + return errors.Trace(err) + } } - _, err := v.standaloneClient.Exec(ctx, *batch, true) - return errors.Trace(err) + return nil } // Get returns a value from Valkey. @@ -444,9 +448,8 @@ func (v *Valkey) AddScores(ctx context.Context, collection, subset string, docum } return nil } - batch := pipeline.NewStandaloneBatch(false) for _, document := range documents { - batch.HSet(v.documentKey(collection, subset, document.Id), map[string]string{ + if _, err := v.standaloneClient.HSet(ctx, v.documentKey(collection, subset, document.Id), map[string]string{ "collection": collection, "subset": subset, "id": document.Id, @@ -454,10 +457,11 @@ func (v *Valkey) AddScores(ctx context.Context, collection, subset string, docum "is_hidden": formatBool(document.IsHidden), "categories": encodeCategories(document.Categories), "timestamp": strconv.FormatInt(document.Timestamp.UnixMicro(), 10), - }) + }); err != nil { + return errors.Trace(err) + } } - _, err := v.standaloneClient.Exec(ctx, *batch, true) - return errors.Trace(err) + return nil } func formatBool(b bool) string { @@ -650,11 +654,7 @@ func (v *Valkey) DeleteScores(ctx context.Context, collections []string, conditi return errors.Trace(err) } } else { - batch := pipeline.NewStandaloneBatch(false) - for _, key := range docKeys { - batch.Del([]string{key}) - } - if _, err = v.standaloneClient.Exec(ctx, *batch, true); err != nil { + if _, err = v.standaloneClient.Del(ctx, docKeys); err != nil { return errors.Trace(err) } } @@ -756,19 +756,37 @@ func (v *Valkey) AddTimeSeriesPoints(ctx context.Context, points []TimeSeriesPoi } return nil } - batch := pipeline.NewStandaloneBatch(false) + // Group points by name to batch ZADD and HSET per series (same as cluster path). + type seriesData struct { + zaddMembers map[string]float64 + hsetFields map[string]string + } + grouped := make(map[string]*seriesData) for _, point := range points { tsMs := point.Timestamp.UnixMilli() tsMsStr := strconv.FormatInt(tsMs, 10) - indexKey := v.PointsTable() + ":ts_index:" + point.Name - dataKey := v.PointsTable() + ":ts_data:" + point.Name - batch.ZAdd(indexKey, map[string]float64{tsMsStr: float64(tsMs)}) - batch.HSet(dataKey, map[string]string{ - tsMsStr: strconv.FormatFloat(point.Value, 'g', -1, 64), - }) + sd, ok := grouped[point.Name] + if !ok { + sd = &seriesData{ + zaddMembers: make(map[string]float64), + hsetFields: make(map[string]string), + } + grouped[point.Name] = sd + } + sd.zaddMembers[tsMsStr] = float64(tsMs) + sd.hsetFields[tsMsStr] = strconv.FormatFloat(point.Value, 'g', -1, 64) + } + for name, sd := range grouped { + indexKey := v.PointsTable() + ":ts_index:" + name + dataKey := v.PointsTable() + ":ts_data:" + name + if _, err := v.standaloneClient.ZAdd(ctx, indexKey, sd.zaddMembers); err != nil { + return errors.Trace(err) + } + if _, err := v.standaloneClient.HSet(ctx, dataKey, sd.hsetFields); err != nil { + return errors.Trace(err) + } } - _, err := v.standaloneClient.Exec(ctx, *batch, true) - return errors.Trace(err) + return nil } // GetTimeSeriesPoints retrieves time series points with Go-side bucket aggregation. From abb005d87f2df160b784b55db7a945343694bfaa Mon Sep 17 00:00:00 2001 From: Daria Korenieva Date: Tue, 5 May 2026 13:16:51 -0700 Subject: [PATCH 08/19] Address review feedback: security, CROSSSLOT, race condition, DRY - Fix TAG escaping: new escapeTag() covers all special chars (| * { } etc.) to prevent query injection via user-controlled values - Fix CROSSSLOT error: delete keys one-at-a-time in cluster mode via vDel() - Fix race condition: remove non-atomic Exists+HSet in UpdateScores - Add max iteration guard (100) to DeleteScores loop - Extract wrapper methods (vDel, vHSet, vZAdd, vHGetAll) to reduce cluster/standalone branching duplication - Extract groupTimeSeriesPoints() helper to DRY AddTimeSeriesPoints Signed-off-by: Daria Korenieva --- storage/cache/valkey.go | 198 ++++++++++++++++++++++++++-------------- 1 file changed, 127 insertions(+), 71 deletions(-) diff --git a/storage/cache/valkey.go b/storage/cache/valkey.go index bad051d2b..6af0ba195 100644 --- a/storage/cache/valkey.go +++ b/storage/cache/valkey.go @@ -203,6 +203,52 @@ func (v *Valkey) Close() error { return nil } +// --- Thin wrapper methods to eliminate cluster/standalone branching duplication --- + +// vDel deletes keys, handling cluster mode by deleting one at a time to avoid CROSSSLOT errors. +func (v *Valkey) vDel(ctx context.Context, keys []string) error { + if v.isCluster { + for _, key := range keys { + if _, err := v.clusterClient.Del(ctx, []string{key}); err != nil { + return errors.Trace(err) + } + } + return nil + } + if _, err := v.standaloneClient.Del(ctx, keys); err != nil { + return errors.Trace(err) + } + return nil +} + +// vHSet sets hash fields on a key. +func (v *Valkey) vHSet(ctx context.Context, key string, fields map[string]string) error { + if v.isCluster { + _, err := v.clusterClient.HSet(ctx, key, fields) + return err + } + _, err := v.standaloneClient.HSet(ctx, key, fields) + return err +} + +// vZAdd adds members to a sorted set. +func (v *Valkey) vZAdd(ctx context.Context, key string, members map[string]float64) error { + if v.isCluster { + _, err := v.clusterClient.ZAdd(ctx, key, members) + return err + } + _, err := v.standaloneClient.ZAdd(ctx, key, members) + return err +} + +// vHGetAll returns all fields and values of a hash. +func (v *Valkey) vHGetAll(ctx context.Context, key string) (map[string]string, error) { + if v.isCluster { + return v.clusterClient.HGetAll(ctx, key) + } + return v.standaloneClient.HGetAll(ctx, key) +} + // Ping the valkey server. func (v *Valkey) Ping() error { ctx := context.Background() @@ -308,10 +354,8 @@ func (v *Valkey) Purge() error { if err != nil { return errors.Trace(err) } - if len(result.Keys) > 0 { - if _, err = v.clusterClient.Del(ctx, result.Keys); err != nil { - return errors.Trace(err) - } + if err = v.vDel(ctx, result.Keys); err != nil { + return errors.Trace(err) } cursor = result.Cursor } @@ -430,7 +474,7 @@ func (v *Valkey) documentKey(collection, subset, value string) string { func (v *Valkey) AddScores(ctx context.Context, collection, subset string, documents []Score) error { if v.isCluster { for _, document := range documents { - if _, err := v.clusterClient.HSet(ctx, v.documentKey(collection, subset, document.Id), map[string]string{ + if err := v.vHSet(ctx, v.documentKey(collection, subset, document.Id), map[string]string{ "collection": collection, "subset": subset, "id": document.Id, @@ -470,12 +514,12 @@ func formatBool(b bool) string { // SearchScores searches for score documents using FT.SEARCH. func (v *Valkey) SearchScores(ctx context.Context, collection, subset string, query []string, begin, end int) ([]Score, error) { var builder strings.Builder - fmt.Fprintf(&builder, "@collection:{ %s } @is_hidden:[0 0]", escape(collection)) + fmt.Fprintf(&builder, "@collection:{ %s } @is_hidden:[0 0]", escapeTag(collection)) if subset != "" { - fmt.Fprintf(&builder, " @subset:{ %s }", escape(subset)) + fmt.Fprintf(&builder, " @subset:{ %s }", escapeTag(subset)) } for _, q := range query { - fmt.Fprintf(&builder, " @categories:{ %s }", escape(encodeCategory(q))) + fmt.Fprintf(&builder, " @categories:{ %s }", escapeTag(encodeCategory(q))) } // Fetch enough results to cover the requested range. // We request from offset 0 because the Glide map format loses ordering, @@ -520,10 +564,10 @@ func (v *Valkey) UpdateScores(ctx context.Context, collections []string, subset return nil } var builder strings.Builder - fmt.Fprintf(&builder, "@collection:{ %s }", escape(strings.Join(collections, " | "))) - fmt.Fprintf(&builder, " @id:{ %s }", escape(id)) + fmt.Fprintf(&builder, "@collection:{ %s }", escapeTag(strings.Join(collections, " | "))) + fmt.Fprintf(&builder, " @id:{ %s }", escapeTag(id)) if subset != nil { - fmt.Fprintf(&builder, " @subset:{ %s }", escape(*subset)) + fmt.Fprintf(&builder, " @subset:{ %s }", escapeTag(*subset)) } limit := v.maxSearchResults if limit <= 0 { @@ -587,28 +631,8 @@ func (v *Valkey) UpdateScores(ctx context.Context, collections []string, subset values["categories"] = encodeCategories(patch.Categories) } for _, key := range keys { - if v.isCluster { - exists, err := v.clusterClient.Exists(ctx, []string{key}) - if err != nil { - return errors.Trace(err) - } - if exists == 0 { - continue - } - if _, err = v.clusterClient.HSet(ctx, key, values); err != nil { - return errors.Trace(err) - } - } else { - exists, err := v.standaloneClient.Exists(ctx, []string{key}) - if err != nil { - return errors.Trace(err) - } - if exists == 0 { - continue - } - if _, err = v.standaloneClient.HSet(ctx, key, values); err != nil { - return errors.Trace(err) - } + if err := v.vHSet(ctx, key, values); err != nil { + return errors.Trace(err) } } return nil @@ -620,17 +644,18 @@ func (v *Valkey) DeleteScores(ctx context.Context, collections []string, conditi return errors.Trace(err) } var builder strings.Builder - fmt.Fprintf(&builder, "@collection:{ %s }", escape(strings.Join(collections, " | "))) + fmt.Fprintf(&builder, "@collection:{ %s }", escapeTag(strings.Join(collections, " | "))) if condition.Subset != nil { - fmt.Fprintf(&builder, " @subset:{ %s }", escape(*condition.Subset)) + fmt.Fprintf(&builder, " @subset:{ %s }", escapeTag(*condition.Subset)) } if condition.Id != nil { - fmt.Fprintf(&builder, " @id:{ %s }", escape(*condition.Id)) + fmt.Fprintf(&builder, " @id:{ %s }", escapeTag(*condition.Id)) } if condition.Before != nil { fmt.Fprintf(&builder, " @timestamp:[-inf (%d]", condition.Before.UnixMicro()) } - for { + const maxDeleteIterations = 100 + for iteration := 0; iteration < maxDeleteIterations; iteration++ { args := []string{ "FT.SEARCH", v.DocumentTable(), builder.String(), "SORTBY", "score", "DESC", @@ -646,7 +671,8 @@ func (v *Valkey) DeleteScores(ctx context.Context, collections []string, conditi break } if v.isCluster { - if _, err = v.clusterClient.Del(ctx, docKeys); err != nil { + // Delete keys one at a time to avoid CROSSSLOT errors in cluster mode. + if err = v.vDel(ctx, docKeys); err != nil { return errors.Trace(err) } } else { @@ -676,7 +702,7 @@ func (v *Valkey) ScanScores(ctx context.Context, callback func(collection string return errors.Trace(err) } for _, key := range result.Keys { - row, err := v.clusterClient.HGetAll(ctx, key) + row, err := v.vHGetAll(ctx, key) if err != nil { return errors.Trace(err) } @@ -700,7 +726,7 @@ func (v *Valkey) ScanScores(ctx context.Context, callback func(collection string return errors.Trace(err) } for _, key := range result.Data { - row, err := v.standaloneClient.HGetAll(ctx, key) + row, err := v.vHGetAll(ctx, key) if err != nil { return errors.Trace(err) } @@ -719,53 +745,56 @@ func (v *Valkey) ScanScores(ctx context.Context, callback func(collection string } } +// timeSeriesGroup holds grouped ZADD members and HSET fields for a single time series. +type timeSeriesGroup struct { + zaddMembers map[string]float64 + hsetFields map[string]string +} + +// groupTimeSeriesPoints groups time series points by name for batched writes. +func groupTimeSeriesPoints(points []TimeSeriesPoint) map[string]*timeSeriesGroup { + grouped := make(map[string]*timeSeriesGroup) + for _, point := range points { + tsMs := point.Timestamp.UnixMilli() + tsMsStr := strconv.FormatInt(tsMs, 10) + sd, ok := grouped[point.Name] + if !ok { + sd = &timeSeriesGroup{ + zaddMembers: make(map[string]float64), + hsetFields: make(map[string]string), + } + grouped[point.Name] = sd + } + sd.zaddMembers[tsMsStr] = float64(tsMs) + sd.hsetFields[tsMsStr] = strconv.FormatFloat(point.Value, 'g', -1, 64) + } + return grouped +} + // AddTimeSeriesPoints adds time series points using sorted set + hash (Approach A). // Sorted set key: {prefix}ts_index:{name} — score = timestamp_ms, member = timestamp_ms as string // Hash key: {prefix}ts_data:{name} — field = timestamp_ms as string, value = float64 as string func (v *Valkey) AddTimeSeriesPoints(ctx context.Context, points []TimeSeriesPoint) error { + grouped := groupTimeSeriesPoints(points) if v.isCluster { - // Group points by name to batch ZADD and HSET per series. - type seriesData struct { - zaddMembers map[string]float64 - hsetFields map[string]string - } - grouped := make(map[string]*seriesData) - for _, point := range points { - tsMs := point.Timestamp.UnixMilli() - tsMsStr := strconv.FormatInt(tsMs, 10) - sd, ok := grouped[point.Name] - if !ok { - sd = &seriesData{ - zaddMembers: make(map[string]float64), - hsetFields: make(map[string]string), - } - grouped[point.Name] = sd - } - sd.zaddMembers[tsMsStr] = float64(tsMs) - sd.hsetFields[tsMsStr] = strconv.FormatFloat(point.Value, 'g', -1, 64) - } for name, sd := range grouped { indexKey := v.PointsTable() + ":ts_index:" + name dataKey := v.PointsTable() + ":ts_data:" + name - if _, err := v.clusterClient.ZAdd(ctx, indexKey, sd.zaddMembers); err != nil { + if err := v.vZAdd(ctx, indexKey, sd.zaddMembers); err != nil { return errors.Trace(err) } - if _, err := v.clusterClient.HSet(ctx, dataKey, sd.hsetFields); err != nil { + if err := v.vHSet(ctx, dataKey, sd.hsetFields); err != nil { return errors.Trace(err) } } return nil } batch := pipeline.NewStandaloneBatch(false) - for _, point := range points { - tsMs := point.Timestamp.UnixMilli() - tsMsStr := strconv.FormatInt(tsMs, 10) - indexKey := v.PointsTable() + ":ts_index:" + point.Name - dataKey := v.PointsTable() + ":ts_data:" + point.Name - batch.ZAdd(indexKey, map[string]float64{tsMsStr: float64(tsMs)}) - batch.HSet(dataKey, map[string]string{ - tsMsStr: strconv.FormatFloat(point.Value, 'g', -1, 64), - }) + for name, sd := range grouped { + indexKey := v.PointsTable() + ":ts_index:" + name + dataKey := v.PointsTable() + ":ts_data:" + name + batch.ZAdd(indexKey, sd.zaddMembers) + batch.HSet(dataKey, sd.hsetFields) } _, err := v.standaloneClient.Exec(ctx, *batch, true) return errors.Trace(err) @@ -1058,3 +1087,30 @@ func parseFieldMap(v any) map[string]string { return nil } } + +// valkeyTagEscaper escapes all TAG special characters for Valkey Search queries. +// This prevents query injection via user-controlled values in TAG fields. +// Valkey Search TAG syntax has special chars: | * { } ( ) ~ @ \ " ' - : . / + +var valkeyTagEscaper = strings.NewReplacer( + `\`, `\\`, + `{`, `\{`, + `}`, `\}`, + `|`, `\|`, + `*`, `\*`, + `(`, `\(`, + `)`, `\)`, + `~`, `\~`, + `@`, `\@`, + `"`, `\"`, + `'`, `\'`, + `-`, `\-`, + `:`, `\:`, + `.`, `\.`, + `/`, `\/`, + `+`, `\+`, +) + +// escapeTag escapes a value for use in Valkey Search TAG queries. +func escapeTag(s string) string { + return valkeyTagEscaper.Replace(s) +} From 9f2fa683417ec99ac1f0166102235542e2c3d5c2 Mon Sep 17 00:00:00 2001 From: Daria Korenieva Date: Tue, 5 May 2026 13:27:42 -0700 Subject: [PATCH 09/19] fixes Signed-off-by: Daria Korenieva --- storage/cache/valkey.go | 12 ++++++++++-- 1 file changed, 10 insertions(+), 2 deletions(-) diff --git a/storage/cache/valkey.go b/storage/cache/valkey.go index 81209dffe..750b0c0d7 100644 --- a/storage/cache/valkey.go +++ b/storage/cache/valkey.go @@ -569,7 +569,11 @@ func (v *Valkey) UpdateScores(ctx context.Context, collections []string, subset return nil } var builder strings.Builder - fmt.Fprintf(&builder, "@collection:{ %s }", escapeTag(strings.Join(collections, " | "))) + escapedCollections := make([]string, len(collections)) + for i, c := range collections { + escapedCollections[i] = escapeTag(c) + } + fmt.Fprintf(&builder, "@collection:{ %s }", strings.Join(escapedCollections, " | ")) fmt.Fprintf(&builder, " @id:{ %s }", escapeTag(id)) if subset != nil { fmt.Fprintf(&builder, " @subset:{ %s }", escapeTag(*subset)) @@ -649,7 +653,11 @@ func (v *Valkey) DeleteScores(ctx context.Context, collections []string, conditi return errors.Trace(err) } var builder strings.Builder - fmt.Fprintf(&builder, "@collection:{ %s }", escapeTag(strings.Join(collections, " | "))) + escapedCollections := make([]string, len(collections)) + for i, c := range collections { + escapedCollections[i] = escapeTag(c) + } + fmt.Fprintf(&builder, "@collection:{ %s }", strings.Join(escapedCollections, " | ")) if condition.Subset != nil { fmt.Fprintf(&builder, " @subset:{ %s }", escapeTag(*condition.Subset)) } From 16108ddcdb90ff729e4d05905431e01414290d5d Mon Sep 17 00:00:00 2001 From: Daria Korenieva Date: Tue, 5 May 2026 13:28:56 -0700 Subject: [PATCH 10/19] clean up Signed-off-by: Daria Korenieva --- VALKEY_INTEGRATION_REPORT.md | 390 ----------------------------------- 1 file changed, 390 deletions(-) delete mode 100644 VALKEY_INTEGRATION_REPORT.md diff --git a/VALKEY_INTEGRATION_REPORT.md b/VALKEY_INTEGRATION_REPORT.md deleted file mode 100644 index acad31c5d..000000000 --- a/VALKEY_INTEGRATION_REPORT.md +++ /dev/null @@ -1,390 +0,0 @@ -# Valkey Integration Report for Gorse Recommender System - -## 1. Why No Valkey for the Data Layer (`storage/data/Database`) - -The `data.Database` interface defines 25+ methods for users, items, and feedback with complex relational semantics: filtered pagination, category-based batch lookups, time-range queries, cross-entity joins, and streaming exports. Every existing backend (MySQL, PostgreSQL, ClickHouse, SQLite, MongoDB) handles these natively via SQL or document queries. No Redis-based data backend exists in this project. Implementing these patterns on Valkey would mean building an application-level query engine on top of key-value primitives — the engineering cost would be high and the result would be slower and more fragile than any existing backend. Valkey is a natural fit for the cache layer, not the data layer. - ---- - -## 2. Time Series Implementation in Valkey - -### 2.1 Context - -The `cache.Database` interface requires two time series methods: - -- **`AddTimeSeriesPoints(ctx, points []TimeSeriesPoint)`** — Ingests points with (Name, Timestamp, Value). Duplicate policy: last write wins. -- **`GetTimeSeriesPoints(ctx, name, begin, end, duration)`** — Retrieves points in a time range, aggregated into buckets of `duration` width, returning the last value per bucket. - -The existing Redis implementation uses Redis TimeSeries module commands (`TS.ADD` with `LAST` duplicate policy, `TS.RANGE` with `LAST` aggregator and `BucketDuration`). Valkey has no TimeSeries module, so we need an alternative. - -### 2.2 Approach A: Sorted Sets with Go-Side Aggregation (Recommended) - -**Design:** -- One sorted set per time series name: key = `{prefix}time_series_points:{name}` -- Score = timestamp in milliseconds (Unix epoch) -- Member = string-encoded float64 value (using `strconv.FormatFloat`) -- For duplicate timestamps (last-write-wins): use a composite member format `{timestamp_ms}` as the score and the value as the member. Since ZADD with the same member updates the score, we instead encode as member = `{timestamp_ms}` and store value in a parallel hash, OR use a simpler approach: member = `fmt.Sprintf("%d", timestamp_ms)` and value stored via a separate HSET. However, the simplest approach that matches the "LAST" duplicate policy is: - - Member = `fmt.Sprintf("%d", timestamp.UnixMilli())` (timestamp as string) - - Score = `timestamp.UnixMilli()` (float64) - - Value stored in a parallel hash: `HSET {prefix}time_series_values:{name} {timestamp_ms} {value}` - -**Actually, the cleanest approach** (matching how MongoDB does it in this codebase): -- Use a single sorted set per series name -- Member = `fmt.Sprintf("%d:%.17g", timestamp.UnixMilli(), value)` — encode both timestamp and value in the member -- Score = `float64(timestamp.UnixMilli())` -- For "last write wins" on duplicate timestamps: first `ZREMRANGEBYSCORE` the exact timestamp, then `ZADD` the new point. This can be done atomically in a pipeline. - -**Simpler alternative** (recommended for clarity): -- Use Hash + Sorted Set combination: - - Sorted Set key: `{prefix}ts_index:{name}` — score = timestamp_ms, member = timestamp_ms as string - - Hash key: `{prefix}ts_data:{name}` — field = timestamp_ms as string, value = float64 as string -- `AddTimeSeriesPoints`: Pipeline of ZADD (index) + HSET (data) per point. ZADD naturally handles duplicate timestamps (updates score), HSET naturally overwrites (last-write-wins). -- `GetTimeSeriesPoints`: ZRANGEBYSCORE to get timestamp members in range, HMGET to fetch values, then bucket aggregation in Go. - -**Go-side aggregation logic** (for `GetTimeSeriesPoints`): -```go -// 1. Fetch all timestamps in range from sorted set -// 2. Fetch corresponding values from hash -// 3. Group by bucket: bucket_key = (timestamp_ms / duration_ms) * duration_ms -// 4. For each bucket, take the last value (highest timestamp) -// 5. Return sorted results -``` - -This matches exactly how MongoDB implements it — MongoDB uses `$group` with `$floor($divide)` for bucketing and `$top` with `sortBy: {timestamp: -1}` for last-value selection. - -**Benefits:** -- Simple, well-understood data structures (sorted set + hash) -- Exact semantic match with Redis TimeSeries "LAST" duplicate policy -- Proven pattern — MongoDB implementation in this codebase does the same thing -- No external modules required — works with any Valkey version -- Efficient range queries via ZRANGEBYSCORE: O(log N + M) -- Natural duplicate handling via ZADD + HSET overwrite semantics - -**Downsides:** -- Aggregation happens in Go, not server-side — transfers more data over the network for large ranges -- Two keys per series (index + data) instead of one — slightly more memory overhead -- No built-in downsampling or retention policies (must be managed application-side) -- For very large time ranges with fine-grained data, the full range must be fetched before aggregation - ---- - -### 2.3 Approach B: Sorted Sets with Lua Script Aggregation (Server-Side) - -**Design:** -- Same storage layout as Approach A (sorted set + hash) -- Aggregation performed via a Lua script executed with `EVALSHA`/`InvokeScript` -- The Lua script does: ZRANGEBYSCORE, HGET for each member, bucket grouping, and returns aggregated results - -**Lua script pseudocode:** -```lua -local key_index = KEYS[1] -- sorted set key -local key_data = KEYS[2] -- hash key -local begin_ms = tonumber(ARGV[1]) -local end_ms = tonumber(ARGV[2]) -local bucket_ms = tonumber(ARGV[3]) - -local members = redis.call('ZRANGEBYSCORE', key_index, begin_ms, end_ms) -local buckets = {} -local bucket_order = {} - -for _, member in ipairs(members) do - local ts = tonumber(member) - local value = redis.call('HGET', key_data, member) - local bucket_key = math.floor(ts / bucket_ms) * bucket_ms - - if not buckets[bucket_key] or ts > buckets[bucket_key].ts then - if not buckets[bucket_key] then - table.insert(bucket_order, bucket_key) - end - buckets[bucket_key] = {ts = ts, value = value} - end -end - -table.sort(bucket_order) -local result = {} -for _, bk in ipairs(bucket_order) do - table.insert(result, tostring(bk)) - table.insert(result, buckets[bk].value) -end -return result -``` - -**Benefits:** -- Aggregation happens server-side — reduces network transfer for large datasets -- Atomic operation — no race conditions during read -- Single round-trip for the entire query -- valkey-glide supports `InvokeScript` for Lua execution - -**Downsides:** -- Lua scripts add complexity to maintain and debug -- Lua execution blocks the Valkey event loop — long-running aggregations on large datasets could cause latency spikes -- Script must be loaded/cached on each Valkey node (handled by glide's InvokeScript) -- Harder to unit test the aggregation logic independently -- Lua in Valkey has limited floating-point precision (uses double, but string conversion can lose precision) - ---- - -### 2.4 Approach C: Single Sorted Set with Encoded Members - -**Design:** -- One sorted set per series: key = `{prefix}ts:{name}` -- Score = `float64(timestamp.UnixMilli())` -- Member = `fmt.Sprintf("%.17g", value)` — just the value -- For duplicate timestamps: ZADD with GT flag won't work (we want last-write-wins, not max). Instead, remove old entry at same score first, then add new one. - -**Problem:** Sorted sets don't support multiple members with the same score cleanly for "replace" semantics. If two different values have the same timestamp, ZADD treats them as different members. You'd need ZREMRANGEBYSCORE + ZADD in a pipeline, which is what Approach A avoids by using a hash for values. - -**Benefits:** -- Single key per series (simpler key space) -- Slightly less memory than two-key approach - -**Downsides:** -- Duplicate timestamp handling is awkward and error-prone -- Can't efficiently update a value at an existing timestamp without removing all entries at that score -- Member = value means you can't have the same value at different timestamps (sorted set members are unique) -- **This approach has fundamental correctness issues** — not recommended - ---- - -### 2.5 Approach D: Hash-Only with Go-Side Sorting and Aggregation - -**Design:** -- Single hash per series: key = `{prefix}ts:{name}` -- Field = timestamp_ms as string, Value = float64 as string -- `AddTimeSeriesPoints`: HSET with field=timestamp, value=value (natural last-write-wins) -- `GetTimeSeriesPoints`: HGETALL, filter by range in Go, sort, bucket, aggregate - -**Benefits:** -- Simplest storage model — single key, single data structure -- Natural last-write-wins via HSET overwrite -- No sorted set overhead - -**Downsides:** -- HGETALL fetches ALL points for the series, not just the requested range — very inefficient for large series -- No server-side range filtering — all data must be transferred to client -- Sorting must be done in Go after fetching all data -- Memory usage grows unbounded without manual cleanup -- **Not viable for production workloads with large time series** - ---- - -## 3. Decision - -**Approach A (Sorted Sets + Hash with Go-Side Aggregation)** — confirmed. If benchmarks later show performance is not on par with Redis TimeSeries, Approach B (Lua script server-side aggregation) can be explored as an optimization. - ---- - -## 4. Client Library: valkey-glide Go v2 - -The implementation will use `github.com/valkey-io/valkey-glide/go/v2` — the official Valkey GLIDE client for Go. - -**Key API mappings for our implementation:** - -| Gorse Operation | valkey-glide API | -|---|---| -| Set key-value | `client.Set(ctx, key, value)` | -| Get key-value | `client.Get(ctx, key)` → `models.Result[string]` | -| Delete key | `client.Del(ctx, []string{key})` | -| Hash set | `client.HSet(ctx, key, map[string]string{...})` | -| Hash get all | `client.HGetAll(ctx, key)` → `map[string]string` | -| Hash get field | `client.HGet(ctx, key, field)` → `models.Result[string]` | -| Hash delete | `client.HDel(ctx, key, []string{...})` | -| Sorted set add | `client.ZAdd(ctx, key, map[string]float64{...})` | -| Sorted set range with scores | `client.ZRangeWithScores(ctx, key, query)` → `[]models.MemberAndScore` | -| Sorted set remove by score | `client.ZRemRangeByScore(ctx, key, rangeQuery)` | -| Sorted set pop min | `client.ZPopMin(ctx, key)` → `map[string]float64` | -| Sorted set cardinality | `client.ZCard(ctx, key)` | -| Scan keys | `client.Scan(ctx, cursor)` / `client.ScanWithOptions(ctx, cursor, opts)` | -| Pipeline/batch | `client.Exec(ctx, batch, raiseOnError)` with `pipeline.NewStandaloneBatch(false)` | -| Lua scripting | `client.InvokeScript(ctx, script)` / `client.InvokeScriptWithOptions(ctx, script, opts)` | -| Watch (optimistic locking) | `client.Watch(ctx, keys)` | -| Custom command (FT.*, etc.) | `client.CustomCommand(ctx, []string{...})` | -| Ping | `client.Ping(ctx)` | -| Close | `client.Close()` | - -**Connection setup:** -```go -import ( - glide "github.com/valkey-io/valkey-glide/go/v2" - "github.com/valkey-io/valkey-glide/go/v2/config" -) - -// Standalone -client, err := glide.NewClient(&config.ClientConfiguration{ - Addresses: []config.NodeAddress{{Host: host, Port: port}}, -}) - -// Cluster -client, err := glide.NewClusterClient(&config.ClusterClientConfiguration{ - Addresses: []config.NodeAddress{{Host: host, Port: port}}, -}) -``` - -**Key differences from go-redis (current Redis client):** -- No `ParseURL` — connection params must be parsed manually from the URL -- `ZAdd` takes `map[string]float64` instead of `[]redis.Z` -- `Get` returns `models.Result[string]` with `.Value()` and `.IsNil()` methods -- Pipeline uses `pipeline.NewStandaloneBatch(false)` + `client.Exec()` -- No direct FT.* (RediSearch) methods — must use `CustomCommand` for valkey-search -- Context is required for all operations (consistent with Go best practices) - ---- - -## 5. Implementation Plan - -### Phase 1: Foundation -1. Add Valkey URL prefix constants to `storage/scheme.go` -2. Update config validation in `config/config.go` -3. Update `config/config.toml` documentation - -### Phase 2: Core Implementation -1. Create `storage/cache/valkey.go` — full `cache.Database` implementation -2. Implement connection setup (standalone + cluster) using valkey-glide -3. Implement core operations: Set/Get/Delete, Push/Pop/Remain, Scan/Purge -4. Implement score operations using `CustomCommand` for valkey-search FT.* commands -5. Implement time series using Sorted Set + Hash with Go-side aggregation - -### Phase 3: Tests -1. Create `storage/cache/valkey_test.go` -2. Run existing `baseTestSuite` against Valkey - -### Phase 4: Benchmarks -1. Time series ingestion throughput comparison -2. Range query + aggregation latency comparison -3. Document results - -### Files to Create/Modify - -| File | Action | -|---|---| -| `storage/scheme.go` | Add Valkey prefix constants | -| `storage/cache/valkey.go` | New — full implementation | -| `storage/cache/valkey_test.go` | New — test suite | -| `config/config.go` | Add Valkey to cache_store validator | -| `config/config.toml` | Document Valkey support | -| `go.mod` | Add valkey-glide dependency | - - ---- - -## 6. Task Breakdown - -### Story: Add Valkey Support to Gorse Recommender System (Cache Layer) - -| # | Task | Description | Estimate | Status | -|---|---|---|---|---| -| 1 | Initial Valkey integration exploration | Explore Gorse project architecture, analyze cache and data layer interfaces, evaluate Valkey compatibility, research valkey-glide Go client API, produce integration report with time series approach options and implementation plan. | 4h | ✅ Done | -| 2 | Add Valkey URL prefix constants and config validation | Add `valkey://`, `valkeys://`, `valkey+cluster://`, `valkeys+cluster://` prefixes to `storage/scheme.go`. Update `config/config.go` cache_store validator to accept Valkey prefixes. Update `config/config.toml` to document Valkey as a supported cache store option. Add config validation tests. | 2h | ✅ Done | -| 3 | Add valkey-glide Go dependency | Add `github.com/valkey-io/valkey-glide/go/v2` to `go.mod`. Run `go mod tidy`. Verify build compiles. | 1h | ✅ Done | -| 4 | Implement Valkey cache backend — connection and lifecycle | Create `storage/cache/valkey.go`. Implement `init()` registration with Valkey prefixes, connection setup for standalone and cluster modes using valkey-glide, `Close()`, `Ping()`, `Init()` (valkey-search index creation via CustomCommand), `Scan()`, `Purge()`. | 4h | ✅ Done | -| 5 | Implement Valkey cache backend — core key-value and queue operations | Implement `Set()`, `Get()`, `Delete()` for key-value metadata. Implement `Push()`, `Pop()`, `Remain()` for message queue using sorted sets. | 3h | ✅ Done | -| 6 | Implement Valkey cache backend — score/document operations | Implement `AddScores()`, `SearchScores()`, `UpdateScores()`, `DeleteScores()`, `ScanScores()` using valkey-search FT.* commands via CustomCommand. | 6h | ✅ Done | -| 7 | Implement Valkey cache backend — time series operations | Implement `AddTimeSeriesPoints()` using sorted set + hash with pipeline batching. Implement `GetTimeSeriesPoints()` with ZRANGEBYSCORE range fetch and Go-side bucket aggregation (Approach A). | 4h | ✅ Done | -| 8 | Create Valkey cache test suite | Create `storage/cache/valkey_test.go`. Embed `baseTestSuite` and run all existing cache interface tests against a Valkey instance. Use `VALKEY_URI` environment variable. | 4h | ✅ Done | -| 9 | Performance benchmarks — time series | Create benchmarks comparing Valkey time series (sorted set + Go aggregation) vs Redis TimeSeries for ingestion throughput, range query latency, and aggregation performance. Document results and gaps. | 3h | ✅ Done | -| 10 | Documentation and PR preparation | Update README or contributing docs if needed. Final code review, cleanup, and PR submission to gorse-io/gorse. | 2h | To Do | - -**Total estimate: ~33 hours (~4 working days)** - -- Tasks 2–3 (foundation): 3h — half day -- Tasks 4–7 (core implementation): 17h — ~2 days -- Task 8 (tests): 4h — half day -- Tasks 9–10 (benchmarks + PR): 5h — ~1 day - ---- - -## 7. Performance Benchmark Results — Time Series: Valkey vs Redis - -### 7.1 Test Environment - -- **CPU**: Intel Core i7-9750H @ 2.60GHz (12 threads) -- **OS**: macOS (darwin/amd64) -- **Go**: 1.26.1 -- **Redis**: redis/redis-stack:latest (port 6379) — uses Redis TimeSeries module -- **Valkey**: valkey/valkey-bundle:unstable (port 6380) — uses sorted set + hash with Go-side aggregation -- **Benchmark tool**: Go `testing.B` with `-benchtime 10s` -- **Runs**: 3 independent runs per backend, plus 1 side-by-side run - -### 7.2 What Was Compared - -| | Redis | Valkey | -|---|---|---| -| **Ingestion** | `TS.ADD` (single native command, pipelined) | `ZADD` + `HSET` (two commands, pipelined) | -| **Range query** | `TS.RANGE` with server-side `LAST` aggregation | `ZRANGEBYSCORE` + `HMGET` + Go-side bucketing | -| **Duplicate handling** | `DUPLICATE_POLICY LAST` (server-side) | `ZADD` overwrites score, `HSET` overwrites field (natural last-write-wins) | - -### 7.3 Results (side-by-side run) - -| Benchmark | Description | Redis (ns/op) | Valkey (ns/op) | Ratio | -|---|---|---|---|---| -| **TSIngestSingle** | 1 point per call | 361,638 | 367,864 | **1.02x** | -| **TSIngestBatch** | 100 points per call | 3,827,686 | 1,236,382 | **0.32x** ✅ | -| **TSQuerySmallRange** | 60s window, 1s buckets, 10K pts loaded | 399,321 | 927,515 | **2.3x** | -| **TSQueryLargeRange** | Full range, 60s buckets, 10K pts loaded | 531,244 | 29,022,625 | **54.6x** | -| **TSQueryWideRange** | Full range, 1h buckets, 100K pts loaded | 1,346,747 | 332,029,964 | **246.6x** | - -### 7.4 Results (averaged across 3 independent runs) - -| Benchmark | Redis avg (ns/op) | Valkey avg (ns/op) | Ratio | -|---|---|---|---| -| **TSIngestSingle** | 361,138 | 381,977 | **1.06x** | -| **TSIngestBatch** | 3,796,799 | 1,214,417 | **0.32x** ✅ | -| **TSQuerySmallRange** | 394,036 | 910,520 | **2.3x** | -| **TSQueryLargeRange** | 512,476 | 28,634,833 | **55.9x** | -| **TSQueryWideRange** | 1,351,768 | 338,763,674 | **250.7x** | - -### 7.5 Analysis - -**Ingestion (single point)** — Effectively identical (1.02–1.06x). The overhead of two commands (ZADD + HSET) vs one (TS.ADD) is negligible at the single-operation level due to network round-trip dominating. - -**Batch ingestion** — Valkey is 3.1x faster. The Valkey pipeline batches ZADD + HSET pairs efficiently, while Redis TimeSeries TS.ADD has higher per-command overhead in pipeline mode. This is a genuine advantage for bulk loading scenarios. - -**Small range queries (60s window)** — 2.3x slower. Valkey fetches ~60 raw points via ZRANGEBYSCORE + HMGET and aggregates in Go. Redis returns pre-aggregated results server-side. The absolute latency (~0.9ms) is well within acceptable bounds for monitoring queries. - -**Large range queries (10K points, 60s buckets)** — 55x slower. This is where the architectural difference shows: Valkey must transfer all 10,000 raw points to the client, then bucket-aggregate in Go. Redis aggregates server-side and returns only ~167 bucket results. Absolute latency: ~29ms vs ~0.5ms. - -**Wide range queries (100K points, 1h buckets)** — 247x slower. The worst case: 100,000 points transferred over the network before Go-side aggregation into ~28 buckets. Absolute latency: ~332ms vs ~1.3ms. - -### 7.6 Root Cause of Query Gaps - -The performance gap in range queries scales linearly with the number of raw points in the range. The bottleneck is **data transfer**, not computation: - -1. Redis TimeSeries aggregates server-side → returns only bucket results (28–167 values) -2. Valkey transfers all raw points → client does aggregation (10K–100K values over the network) - -The Go-side bucketing itself is fast (simple map + sort). The cost is dominated by `ZRANGEBYSCORE` returning all members + `HMGET` fetching all values. - -### 7.7 Impact on Gorse - -Gorse uses time series for **monitoring metrics** (NDCG, precision, recall, feedback ratios, task durations). These are characterized by: - -- **Low volume**: Metrics are recorded per training cycle or per evaluation, not per-second. A typical deployment might have hundreds to low thousands of points per metric, not 100K. -- **Infrequent queries**: Dashboard queries happen on user interaction, not in hot paths. -- **Moderate ranges**: Most dashboard views show hours to days of data, not months. - -For Gorse's workload profile: -- Single-point ingestion and batch ingestion are **on par or better** than Redis. -- Small range queries (~0.9ms) are **fast enough** for dashboard rendering. -- Large range queries (~29ms) are **acceptable** for occasional dashboard use. -- Wide range queries (~332ms) would only occur with unusually large datasets and are still **sub-second**. - -### 7.8 Optimization Path - -If future workloads require faster large-range aggregation, **Approach B (Lua script server-side aggregation)** can be implemented as a drop-in replacement for `GetTimeSeriesPoints`. The Lua script would execute ZRANGEBYSCORE + HGET + bucketing entirely on the Valkey server, eliminating the data transfer bottleneck. This was documented in Section 2.3 of this report. - -### 7.9 How to Reproduce - -```bash -# Prerequisites: Redis Stack on port 6379, Valkey on port 6380 -docker run -d --name redis-stack -p 6379:6379 redis/redis-stack:latest -docker run -d --name valkey-search -p 6380:6379 valkey/valkey-bundle:unstable - -# Run side-by-side -VALKEY_URI="valkey://127.0.0.1:6380/" go test -v ./storage/cache/ \ - -run ^$ -bench "BenchmarkRedis/TS|BenchmarkValkey/TS" -benchtime 10s -timeout 10m - -# Run individually -go test -v ./storage/cache/ -run ^$ -bench "BenchmarkRedis/TS" -benchtime 10s -timeout 10m -VALKEY_URI="valkey://127.0.0.1:6380/" go test -v ./storage/cache/ \ - -run ^$ -bench "BenchmarkValkey/TS" -benchtime 10s -timeout 10m -``` From 8826ff3487283e0872b9d86c0ef4727422ab8aa7 Mon Sep 17 00:00:00 2001 From: Daria Korenieva Date: Wed, 6 May 2026 09:39:44 -0700 Subject: [PATCH 11/19] ci: add Valkey service to unit tests, skip TestValkey on macOS/Windows - Add valkey/valkey-bundle:unstable service on port 6380 to unit_test job - Add VALKEY_URI env var for Linux unit tests - Add TestValkey to -skip pattern for macOS and Windows jobs (no Valkey service available on those runners) Signed-off-by: Daria Korenieva --- .github/workflows/build_test.yml | 11 +++++++++-- 1 file changed, 9 insertions(+), 2 deletions(-) diff --git a/.github/workflows/build_test.yml b/.github/workflows/build_test.yml index ff02e13eb..0b6832e20 100644 --- a/.github/workflows/build_test.yml +++ b/.github/workflows/build_test.yml @@ -68,6 +68,11 @@ jobs: ports: - 6379 + valkey: + image: valkey/valkey-bundle:unstable + ports: + - 6380 + rustfs: image: rustfs/rustfs:alpha ports: @@ -133,6 +138,8 @@ jobs: CLICKHOUSE_URI: clickhouse://localhost:${{ job.services.clickhouse.ports[8123] }}/ # Redis REDIS_URI: redis://localhost:${{ job.services.redis.ports[6379] }}/ + # Valkey + VALKEY_URI: valkey://localhost:${{ job.services.valkey.ports[6380] }}/ # S3 S3_ENDPOINT: localhost:${{ job.services.rustfs.ports[9000] }} S3_ACCESS_KEY_ID: rustfsadmin @@ -181,7 +188,7 @@ jobs: go-version-file: ./go.mod - name: Test - run: go test -timeout 20m -v ./... -skip "TestPostgres|TestMySQL|TestMongo|TestRedis|TestClickHouse|TestMilvus|TestQdrant|TestWeaviate" + run: go test -timeout 20m -v ./... -skip "TestPostgres|TestMySQL|TestMongo|TestRedis|TestClickHouse|TestMilvus|TestQdrant|TestWeaviate|TestValkey" unit_test_windows: strategy: @@ -215,7 +222,7 @@ jobs: go-version-file: ./go.mod - name: Test - run: go test -timeout 20m -v ./... -skip "TestPostgres|TestMySQL|TestMongo|TestRedis|TestClickHouse|TestMilvus|TestQdrant|TestWeaviate" + run: go test -timeout 20m -v ./... -skip "TestPostgres|TestMySQL|TestMongo|TestRedis|TestClickHouse|TestMilvus|TestQdrant|TestWeaviate|TestValkey" integrate_test: name: integrate tests From ea318021d0fde35427cdafb378670d7801c95878 Mon Sep 17 00:00:00 2001 From: Daria Korenieva Date: Wed, 6 May 2026 10:29:55 -0700 Subject: [PATCH 12/19] refactor: switch Valkey client from valkey-glide to valkey-go valkey-glide requires CGO (Rust FFI core) and cannot build with CGO_ENABLED=0, which gorse uses for static Docker binaries. This is a confirmed upstream limitation (valkey-io/valkey-glide#4253). valkey-go is the official pure-Go Valkey client from the same org. It supports all needed commands (FT.SEARCH, HSET, ZADD, SCAN, etc.), works with CGO_ENABLED=0, and simplifies the code by using a single Client interface for both standalone and cluster modes. Changes: - Replace valkey-glide/go/v2 with valkey-io/valkey-go in go.mod - Rewrite storage/cache/valkey.go using valkey-go command builder API - Eliminate all if-isCluster branching (valkey-go handles routing) - Use DoMulti for pipeline batching instead of standalone batch - Update tests to use valkey-go client for FLUSHDB - Update URL parsers to return plain strings instead of glide types Signed-off-by: Daria Korenieva --- go.mod | 2 +- go.sum | 2 - storage/cache/valkey.go | 725 +++++++++++------------------------ storage/cache/valkey_test.go | 48 +-- 4 files changed, 238 insertions(+), 539 deletions(-) diff --git a/go.mod b/go.mod index 253195dc4..c9066db8e 100644 --- a/go.mod +++ b/go.mod @@ -63,7 +63,7 @@ require ( github.com/stretchr/testify v1.11.1 github.com/swaggest/swgui v1.8.5 github.com/tiktoken-go/tokenizer v0.7.0 - github.com/valkey-io/valkey-glide/go/v2 v2.1.0 + github.com/valkey-io/valkey-go v1.0.74 github.com/weaviate/weaviate v1.27.0 github.com/weaviate/weaviate-go-client/v4 v4.16.1 github.com/yuin/goldmark v1.7.16 diff --git a/go.sum b/go.sum index 2497ef6f2..47ea55925 100644 --- a/go.sum +++ b/go.sum @@ -921,8 +921,6 @@ github.com/ugorji/go v1.1.7/go.mod h1:kZn38zHttfInRq0xu/PH0az30d+z6vm202qpg1oXVM github.com/ugorji/go/codec v0.0.0-20181204163529-d75b2dcb6bc8/go.mod h1:VFNgLljTbGfSG7qAOspJ7OScBnGdDN/yBr0sguwnwf0= github.com/ugorji/go/codec v1.1.7/go.mod h1:Ax+UKWsSmolVDwsd+7N3ZtXu+yMGCf907BLYF3GoBXY= github.com/urfave/negroni v1.0.0/go.mod h1:Meg73S6kFm/4PpbYdq35yYWoCZ9mS/YSx+lKnmiohz4= -github.com/valkey-io/valkey-glide/go/v2 v2.1.0 h1:c0cf63com8Ul5nW4xRV/Q1D8rwRQdV/JFUio6gt0PpE= -github.com/valkey-io/valkey-glide/go/v2 v2.1.0/go.mod h1:LK5zmODJa5xnxZndarh1trntExb3GVGJXz4GwDCagho= github.com/valyala/bytebufferpool v1.0.0/go.mod h1:6bBcMArwyJ5K/AmCkWv1jt77kVWyCJ6HpOuEn7z0Csc= github.com/valyala/fasthttp v1.6.0/go.mod h1:FstJa9V+Pj9vQ7OJie2qMHdwemEDaDiSdBnvPM1Su9w= github.com/valyala/fasttemplate v1.0.1/go.mod h1:UQGH1tvbgY+Nz5t2n7tXsz52dQxojPUpymEIMZ47gx8= diff --git a/storage/cache/valkey.go b/storage/cache/valkey.go index 81209dffe..0b59384dc 100644 --- a/storage/cache/valkey.go +++ b/storage/cache/valkey.go @@ -16,6 +16,7 @@ package cache import ( "context" + "crypto/tls" "fmt" "io" "net/url" @@ -28,42 +29,33 @@ import ( "github.com/gorse-io/gorse/storage" "github.com/juju/errors" "github.com/samber/lo" - glide "github.com/valkey-io/valkey-glide/go/v2" - glideconfig "github.com/valkey-io/valkey-glide/go/v2/config" - "github.com/valkey-io/valkey-glide/go/v2/models" - glideoptions "github.com/valkey-io/valkey-glide/go/v2/options" - "github.com/valkey-io/valkey-glide/go/v2/pipeline" + "github.com/valkey-io/valkey-go" "go.uber.org/zap" ) func init() { Register([]string{storage.ValkeyPrefix, storage.ValkeysPrefix}, func(path, tablePrefix string, opts ...storage.Option) (Database, error) { - addr, username, password, db, useTLS, err := parseValkeyURL(path) + host, port, username, password, db, useTLS, err := parseValkeyURL(path) if err != nil { return nil, errors.Trace(err) } - cfg := glideconfig.NewClientConfiguration(). - WithAddress(&addr). - WithRequestTimeout(60 * time.Second) - if password != "" { - if username != "" { - cfg.WithCredentials(glideconfig.NewServerCredentials(username, password)) - } else { - cfg.WithCredentials(glideconfig.NewServerCredentialsWithDefaultUsername(password)) - } + option := valkey.ClientOption{ + InitAddress: []string{fmt.Sprintf("%s:%d", host, port)}, + SelectDB: db, } - if db > 0 { - cfg.WithDatabaseId(db) + if username != "" || password != "" { + option.Username = username + option.Password = password } if useTLS { - cfg.WithUseTLS(true) + option.TLSConfig = &tls.Config{} } - client, err := glide.NewClient(cfg) + client, err := valkey.NewClient(option) if err != nil { return nil, errors.Trace(err) } database := &Valkey{ - standaloneClient: client, + client: client, isCluster: false, maxSearchResults: storage.NewOptions(opts...).MaxSearchResults, } @@ -75,27 +67,22 @@ func init() { if err != nil { return nil, errors.Trace(err) } - cfg := glideconfig.NewClusterClientConfiguration() - for i := range addresses { - cfg.WithAddress(&addresses[i]) + option := valkey.ClientOption{ + InitAddress: addresses, } - cfg.WithRequestTimeout(60 * time.Second) - if password != "" { - if username != "" { - cfg.WithCredentials(glideconfig.NewServerCredentials(username, password)) - } else { - cfg.WithCredentials(glideconfig.NewServerCredentialsWithDefaultUsername(password)) - } + if username != "" || password != "" { + option.Username = username + option.Password = password } if useTLS { - cfg.WithUseTLS(true) + option.TLSConfig = &tls.Config{} } - client, err := glide.NewClusterClient(cfg) + client, err := valkey.NewClient(option) if err != nil { return nil, errors.Trace(err) } database := &Valkey{ - clusterClient: client, + client: client, isCluster: true, maxSearchResults: storage.NewOptions(opts...).MaxSearchResults, } @@ -105,23 +92,22 @@ func init() { } // parseValkeyURL parses a valkey:// or valkeys:// URL into connection parameters. -func parseValkeyURL(rawURL string) (addr glideconfig.NodeAddress, username, password string, db int, useTLS bool, err error) { +func parseValkeyURL(rawURL string) (host string, port int, username, password string, db int, useTLS bool, err error) { parsed, err := url.Parse(rawURL) if err != nil { - return addr, "", "", 0, false, errors.Trace(err) + return "", 0, "", "", 0, false, errors.Trace(err) } - host := parsed.Hostname() + host = parsed.Hostname() if host == "" { host = "localhost" } - port := 6379 + port = 6379 if parsed.Port() != "" { port, err = strconv.Atoi(parsed.Port()) if err != nil { - return addr, "", "", 0, false, errors.Trace(err) + return "", 0, "", "", 0, false, errors.Trace(err) } } - addr = glideconfig.NodeAddress{Host: host, Port: port} if parsed.User != nil { username = parsed.User.Username() password, _ = parsed.User.Password() @@ -131,16 +117,16 @@ func parseValkeyURL(rawURL string) (addr glideconfig.NodeAddress, username, pass if dbStr != "" { db, err = strconv.Atoi(dbStr) if err != nil { - return addr, "", "", 0, false, errors.Errorf("invalid database number: %s", dbStr) + return "", 0, "", "", 0, false, errors.Errorf("invalid database number: %s", dbStr) } } } useTLS = parsed.Scheme == "valkeys" - return addr, username, password, db, useTLS, nil + return host, port, username, password, db, useTLS, nil } // parseValkeyClusterURL parses a valkey+cluster:// or valkeys+cluster:// URL. -func parseValkeyClusterURL(rawURL string) (addresses []glideconfig.NodeAddress, username, password string, useTLS bool, err error) { +func parseValkeyClusterURL(rawURL string) (addresses []string, username, password string, useTLS bool, err error) { // Replace the cluster prefix with a standard scheme for URL parsing. var newURL string if strings.HasPrefix(rawURL, storage.ValkeyClusterPrefix) { @@ -165,220 +151,126 @@ func parseValkeyClusterURL(rawURL string) (addresses []glideconfig.NodeAddress, return nil, "", "", false, errors.Trace(err) } } - addresses = append(addresses, glideconfig.NodeAddress{Host: host, Port: port}) + addresses = append(addresses, fmt.Sprintf("%s:%d", host, port)) if parsed.User != nil { username = parsed.User.Username() password, _ = parsed.User.Password() } // Parse additional addresses from query params (addr=host:port). for _, addrStr := range parsed.Query()["addr"] { - parts := strings.SplitN(addrStr, ":", 2) - h := parts[0] - p := 6379 - if len(parts) == 2 { - p, err = strconv.Atoi(parts[1]) - if err != nil { - return nil, "", "", false, errors.Errorf("invalid port in addr param: %s", addrStr) - } + if !strings.Contains(addrStr, ":") { + addrStr = addrStr + ":6379" } - addresses = append(addresses, glideconfig.NodeAddress{Host: h, Port: p}) + addresses = append(addresses, addrStr) } return addresses, username, password, useTLS, nil } -// Valkey cache storage using valkey-glide client. +// Valkey cache storage using valkey-go client. type Valkey struct { storage.TablePrefix - standaloneClient *glide.Client - clusterClient *glide.ClusterClient + client valkey.Client isCluster bool maxSearchResults int } // Close the valkey connection. func (v *Valkey) Close() error { - if v.isCluster { - v.clusterClient.Close() - } else { - v.standaloneClient.Close() - } + v.client.Close() return nil } -// --- Thin wrapper methods to eliminate cluster/standalone branching duplication --- - -// vDel deletes keys, handling cluster mode by deleting one at a time to avoid CROSSSLOT errors. -func (v *Valkey) vDel(ctx context.Context, keys []string) error { - if v.isCluster { - for _, key := range keys { - if _, err := v.clusterClient.Del(ctx, []string{key}); err != nil { - return errors.Trace(err) - } - } - return nil - } - if _, err := v.standaloneClient.Del(ctx, keys); err != nil { - return errors.Trace(err) - } - return nil -} - -// vHSet sets hash fields on a key. -func (v *Valkey) vHSet(ctx context.Context, key string, fields map[string]string) error { - if v.isCluster { - _, err := v.clusterClient.HSet(ctx, key, fields) - return err - } - _, err := v.standaloneClient.HSet(ctx, key, fields) - return err -} - -// vZAdd adds members to a sorted set. -func (v *Valkey) vZAdd(ctx context.Context, key string, members map[string]float64) error { - if v.isCluster { - _, err := v.clusterClient.ZAdd(ctx, key, members) - return err - } - _, err := v.standaloneClient.ZAdd(ctx, key, members) - return err -} - -// vHGetAll returns all fields and values of a hash. -func (v *Valkey) vHGetAll(ctx context.Context, key string) (map[string]string, error) { - if v.isCluster { - return v.clusterClient.HGetAll(ctx, key) - } - return v.standaloneClient.HGetAll(ctx, key) -} - // Ping the valkey server. func (v *Valkey) Ping() error { ctx := context.Background() - if v.isCluster { - _, err := v.clusterClient.Ping(ctx) - return err - } - _, err := v.standaloneClient.Ping(ctx) - return err + return v.client.Do(ctx, v.client.B().Ping().Build()).Error() } // Init creates the valkey-search index for document storage. func (v *Valkey) Init() error { ctx := context.Background() // List existing indices via FT._LIST. - result, err := v.customCommand(ctx, []string{"FT._LIST"}) + result, err := v.client.Do(ctx, v.client.B().Arbitrary("FT._LIST").Build()).ToArray() if err != nil { return errors.Trace(err) } - indices := parseStringSlice(result) + indices := make([]string, 0, len(result)) + for _, r := range result { + if s, err := r.ToString(); err == nil { + indices = append(indices, s) + } + } if lo.Contains(indices, v.DocumentTable()) { return nil } // Create the index. - _, err = v.customCommand(ctx, []string{ - "FT.CREATE", v.DocumentTable(), - "ON", "HASH", - "PREFIX", "1", v.DocumentTable() + ":", - "SCHEMA", - "collection", "TAG", - "subset", "TAG", - "id", "TAG", - "score", "NUMERIC", - "is_hidden", "NUMERIC", - "categories", "TAG", "SEPARATOR", ";", - "timestamp", "NUMERIC", - }) + err = v.client.Do(ctx, v.client.B().Arbitrary("FT.CREATE"). + Keys(v.DocumentTable()). + Args( + "ON", "HASH", + "PREFIX", "1", v.DocumentTable()+":", + "SCHEMA", + "collection", "TAG", + "subset", "TAG", + "id", "TAG", + "score", "NUMERIC", + "is_hidden", "NUMERIC", + "categories", "TAG", "SEPARATOR", ";", + "timestamp", "NUMERIC", + ).Build()).Error() if err != nil { return errors.Trace(err) } return nil } -// customCommand executes a custom command, handling both standalone and cluster modes. -func (v *Valkey) customCommand(ctx context.Context, args []string) (any, error) { - if v.isCluster { - result, err := v.clusterClient.CustomCommand(ctx, args) - if err != nil { - return nil, err - } - return result.SingleValue(), nil - } - return v.standaloneClient.CustomCommand(ctx, args) -} - // Scan iterates over all keys with the table prefix. func (v *Valkey) Scan(work func(string) error) error { ctx := context.Background() - if v.isCluster { - cursor := models.NewClusterScanCursor() - scanOpts := glideoptions.NewClusterScanOptions().SetMatch(string(v.TablePrefix) + "*") - for !cursor.IsFinished() { - result, err := v.clusterClient.ScanWithOptions(ctx, cursor, *scanOpts) - if err != nil { - return errors.Trace(err) - } - for _, key := range result.Keys { - if err = work(key[len(v.TablePrefix):]); err != nil { - return errors.Trace(err) - } - } - cursor = result.Cursor - } - return nil - } - cursor := models.NewCursor() - scanOpts := glideoptions.NewScanOptions().SetMatch(string(v.TablePrefix) + "*") + var cursor uint64 for { - result, err := v.standaloneClient.ScanWithOptions(ctx, cursor, *scanOpts) + entry, err := v.client.Do(ctx, v.client.B().Scan().Cursor(cursor).Match(string(v.TablePrefix)+"*").Count(100).Build()).AsScanEntry() if err != nil { return errors.Trace(err) } - for _, key := range result.Data { + for _, key := range entry.Elements { if err = work(key[len(v.TablePrefix):]); err != nil { return errors.Trace(err) } } - if result.Cursor.IsFinished() { + cursor = entry.Cursor + if cursor == 0 { return nil } - cursor = result.Cursor } } // Purge deletes all keys with the table prefix. func (v *Valkey) Purge() error { ctx := context.Background() - if v.isCluster { - cursor := models.NewClusterScanCursor() - scanOpts := glideoptions.NewClusterScanOptions().SetMatch(string(v.TablePrefix) + "*") - for !cursor.IsFinished() { - result, err := v.clusterClient.ScanWithOptions(ctx, cursor, *scanOpts) - if err != nil { - return errors.Trace(err) - } - if err = v.vDel(ctx, result.Keys); err != nil { - return errors.Trace(err) - } - cursor = result.Cursor - } - return nil - } - cursor := models.NewCursor() - scanOpts := glideoptions.NewScanOptions().SetMatch(string(v.TablePrefix) + "*") + var cursor uint64 for { - result, err := v.standaloneClient.ScanWithOptions(ctx, cursor, *scanOpts) + entry, err := v.client.Do(ctx, v.client.B().Scan().Cursor(cursor).Match(string(v.TablePrefix)+"*").Count(100).Build()).AsScanEntry() if err != nil { return errors.Trace(err) } - if len(result.Data) > 0 { - if _, err = v.standaloneClient.Del(ctx, result.Data); err != nil { - return errors.Trace(err) + if len(entry.Elements) > 0 { + if v.isCluster { + for _, key := range entry.Elements { + if err = v.client.Do(ctx, v.client.B().Del().Key(key).Build()).Error(); err != nil { + return errors.Trace(err) + } + } + } else { + if err = v.client.Do(ctx, v.client.B().Del().Key(entry.Elements...).Build()).Error(); err != nil { + return errors.Trace(err) + } } } - if result.Cursor.IsFinished() { + cursor = entry.Cursor + if cursor == 0 { return nil } - cursor = result.Cursor } } @@ -387,16 +279,8 @@ func (v *Valkey) Set(ctx context.Context, values ...Value) error { if len(values) == 0 { return nil } - if v.isCluster { - for _, val := range values { - if _, err := v.clusterClient.Set(ctx, v.Key(val.name), val.value); err != nil { - return errors.Trace(err) - } - } - return nil - } for _, val := range values { - if _, err := v.standaloneClient.Set(ctx, v.Key(val.name), val.value); err != nil { + if err := v.client.Do(ctx, v.client.B().Set().Key(v.Key(val.name)).Value(val.value).Build()).Error(); err != nil { return errors.Trace(err) } } @@ -405,70 +289,41 @@ func (v *Valkey) Set(ctx context.Context, values ...Value) error { // Get returns a value from Valkey. func (v *Valkey) Get(ctx context.Context, key string) *ReturnValue { - var result models.Result[string] - var err error - if v.isCluster { - result, err = v.clusterClient.Get(ctx, v.Key(key)) - } else { - result, err = v.standaloneClient.Get(ctx, v.Key(key)) - } + result, err := v.client.Do(ctx, v.client.B().Get().Key(v.Key(key)).Build()).ToString() if err != nil { + if valkey.IsValkeyNil(err) { + return &ReturnValue{value: "", exists: false} + } return &ReturnValue{err: err, exists: false} } - if result.IsNil() { - return &ReturnValue{value: "", exists: false} - } - return &ReturnValue{value: result.Value(), exists: true} + return &ReturnValue{value: result, exists: true} } // Delete removes a key from Valkey. func (v *Valkey) Delete(ctx context.Context, key string) error { - if v.isCluster { - _, err := v.clusterClient.Del(ctx, []string{v.Key(key)}) - return err - } - _, err := v.standaloneClient.Del(ctx, []string{v.Key(key)}) - return err + return v.client.Do(ctx, v.client.B().Del().Key(v.Key(key)).Build()).Error() } // Push adds a message to a sorted set queue with timestamp as score. func (v *Valkey) Push(ctx context.Context, name, message string) error { - members := map[string]float64{message: float64(time.Now().UnixNano())} - if v.isCluster { - _, err := v.clusterClient.ZAdd(ctx, v.Key(name), members) - return err - } - _, err := v.standaloneClient.ZAdd(ctx, v.Key(name), members) - return err + return v.client.Do(ctx, v.client.B().Zadd().Key(v.Key(name)).ScoreMember().ScoreMember(float64(time.Now().UnixNano()), message).Build()).Error() } // Pop removes and returns the message with the lowest score from the queue. func (v *Valkey) Pop(ctx context.Context, name string) (string, error) { - var result map[string]float64 - var err error - if v.isCluster { - result, err = v.clusterClient.ZPopMin(ctx, v.Key(name)) - } else { - result, err = v.standaloneClient.ZPopMin(ctx, v.Key(name)) - } + result, err := v.client.Do(ctx, v.client.B().Zpopmin().Key(v.Key(name)).Count(1).Build()).AsZScores() if err != nil { return "", errors.Trace(err) } if len(result) == 0 { return "", io.EOF } - for member := range result { - return member, nil - } - return "", io.EOF + return result[0].Member, nil } // Remain returns the number of messages in the queue. func (v *Valkey) Remain(ctx context.Context, name string) (int64, error) { - if v.isCluster { - return v.clusterClient.ZCard(ctx, v.Key(name)) - } - return v.standaloneClient.ZCard(ctx, v.Key(name)) + return v.client.Do(ctx, v.client.B().Zcard().Key(v.Key(name)).Build()).AsInt64() } func (v *Valkey) documentKey(collection, subset, value string) string { @@ -477,32 +332,18 @@ func (v *Valkey) documentKey(collection, subset, value string) string { // AddScores adds score documents to Valkey using hash storage. func (v *Valkey) AddScores(ctx context.Context, collection, subset string, documents []Score) error { - if v.isCluster { - for _, document := range documents { - if err := v.vHSet(ctx, v.documentKey(collection, subset, document.Id), map[string]string{ - "collection": collection, - "subset": subset, - "id": document.Id, - "score": strconv.FormatFloat(document.Score, 'g', -1, 64), - "is_hidden": formatBool(document.IsHidden), - "categories": encodeCategories(document.Categories), - "timestamp": strconv.FormatInt(document.Timestamp.UnixMicro(), 10), - }); err != nil { - return errors.Trace(err) - } - } - return nil - } for _, document := range documents { - if _, err := v.standaloneClient.HSet(ctx, v.documentKey(collection, subset, document.Id), map[string]string{ - "collection": collection, - "subset": subset, - "id": document.Id, - "score": strconv.FormatFloat(document.Score, 'g', -1, 64), - "is_hidden": formatBool(document.IsHidden), - "categories": encodeCategories(document.Categories), - "timestamp": strconv.FormatInt(document.Timestamp.UnixMicro(), 10), - }); err != nil { + key := v.documentKey(collection, subset, document.Id) + cmd := v.client.B().Hset().Key(key).FieldValue(). + FieldValue("collection", collection). + FieldValue("subset", subset). + FieldValue("id", document.Id). + FieldValue("score", strconv.FormatFloat(document.Score, 'g', -1, 64)). + FieldValue("is_hidden", formatBool(document.IsHidden)). + FieldValue("categories", encodeCategories(document.Categories)). + FieldValue("timestamp", strconv.FormatInt(document.Timestamp.UnixMicro(), 10)). + Build() + if err := v.client.Do(ctx, cmd).Error(); err != nil { return errors.Trace(err) } } @@ -526,19 +367,15 @@ func (v *Valkey) SearchScores(ctx context.Context, collection, subset string, qu for _, q := range query { fmt.Fprintf(&builder, " @categories:{ %s }", escapeTag(encodeCategory(q))) } - // Fetch enough results to cover the requested range. - // We request from offset 0 because the Glide map format loses ordering, - // and we sort + slice in Go. fetchLimit := 10000 if end != -1 { fetchLimit = end } - args := []string{ - "FT.SEARCH", v.DocumentTable(), builder.String(), - "SORTBY", "score", "DESC", - "LIMIT", "0", strconv.Itoa(fetchLimit), - } - result, err := v.customCommand(ctx, args) + cmd := v.client.B().Arbitrary("FT.SEARCH"). + Keys(v.DocumentTable()). + Args(builder.String(), "SORTBY", "score", "DESC", "LIMIT", "0", strconv.Itoa(fetchLimit)). + Build() + result, err := v.client.Do(ctx, cmd).ToArray() if err != nil { return nil, errors.Trace(err) } @@ -546,7 +383,6 @@ func (v *Valkey) SearchScores(ctx context.Context, collection, subset string, qu if err != nil { return nil, errors.Trace(err) } - // Apply begin/end slicing since Glide map format loses server-side ordering. if begin > 0 || end != -1 { if begin >= len(documents) { return []Score{}, nil @@ -584,24 +420,22 @@ func (v *Valkey) UpdateScores(ctx context.Context, collections []string, subset keySet := make(map[string]struct{}) offset := 0 for { - args := []string{ - "FT.SEARCH", v.DocumentTable(), builder.String(), - "SORTBY", "score", "DESC", - "LIMIT", strconv.Itoa(offset), strconv.Itoa(limit), - } - result, err := v.customCommand(ctx, args) + cmd := v.client.B().Arbitrary("FT.SEARCH"). + Keys(v.DocumentTable()). + Args(builder.String(), "SORTBY", "score", "DESC", "LIMIT", strconv.Itoa(offset), strconv.Itoa(limit)). + Build() + result, err := v.client.Do(ctx, cmd).ToArray() if err != nil { return errors.Trace(err) } - // On the first page, check the total and fetch all at once if needed. - // This avoids pagination issues with tied scores where the server - // may return duplicates across pages. if offset == 0 { total := parseFTSearchTotal(result) if total > limit { - // Re-fetch with the full count to avoid pagination drift. - args[len(args)-1] = strconv.Itoa(total) - result, err = v.customCommand(ctx, args) + cmd = v.client.B().Arbitrary("FT.SEARCH"). + Keys(v.DocumentTable()). + Args(builder.String(), "SORTBY", "score", "DESC", "LIMIT", "0", strconv.Itoa(total)). + Build() + result, err = v.client.Do(ctx, cmd).ToArray() if err != nil { return errors.Trace(err) } @@ -625,18 +459,18 @@ func (v *Valkey) UpdateScores(ctx context.Context, collections []string, subset } } - values := make(map[string]string) - if patch.Score != nil { - values["score"] = strconv.FormatFloat(*patch.Score, 'g', -1, 64) - } - if patch.IsHidden != nil { - values["is_hidden"] = formatBool(*patch.IsHidden) - } - if patch.Categories != nil { - values["categories"] = encodeCategories(patch.Categories) - } for _, key := range keys { - if err := v.vHSet(ctx, key, values); err != nil { + cmd := v.client.B().Hset().Key(key).FieldValue() + if patch.Score != nil { + cmd = cmd.FieldValue("score", strconv.FormatFloat(*patch.Score, 'g', -1, 64)) + } + if patch.IsHidden != nil { + cmd = cmd.FieldValue("is_hidden", formatBool(*patch.IsHidden)) + } + if patch.Categories != nil { + cmd = cmd.FieldValue("categories", encodeCategories(patch.Categories)) + } + if err := v.client.Do(ctx, cmd.Build()).Error(); err != nil { return errors.Trace(err) } } @@ -661,12 +495,11 @@ func (v *Valkey) DeleteScores(ctx context.Context, collections []string, conditi } const maxDeleteIterations = 100 for iteration := 0; iteration < maxDeleteIterations; iteration++ { - args := []string{ - "FT.SEARCH", v.DocumentTable(), builder.String(), - "SORTBY", "score", "DESC", - "LIMIT", "0", "10000", - } - result, err := v.customCommand(ctx, args) + cmd := v.client.B().Arbitrary("FT.SEARCH"). + Keys(v.DocumentTable()). + Args(builder.String(), "SORTBY", "score", "DESC", "LIMIT", "0", "10000"). + Build() + result, err := v.client.Do(ctx, cmd).ToArray() if err != nil { return errors.Trace(err) } @@ -676,12 +509,13 @@ func (v *Valkey) DeleteScores(ctx context.Context, collections []string, conditi break } if v.isCluster { - // Delete keys one at a time to avoid CROSSSLOT errors in cluster mode. - if err = v.vDel(ctx, docKeys); err != nil { - return errors.Trace(err) + for _, key := range docKeys { + if err = v.client.Do(ctx, v.client.B().Del().Key(key).Build()).Error(); err != nil { + return errors.Trace(err) + } } } else { - if _, err = v.standaloneClient.Del(ctx, docKeys); err != nil { + if err = v.client.Do(ctx, v.client.B().Del().Key(docKeys...).Build()).Error(); err != nil { return errors.Trace(err) } } @@ -694,40 +528,14 @@ func (v *Valkey) DeleteScores(ctx context.Context, collections []string, conditi // ScanScores iterates over all score documents. func (v *Valkey) ScanScores(ctx context.Context, callback func(collection string, id string, subset string, timestamp time.Time) error) error { - if v.isCluster { - cursor := models.NewClusterScanCursor() - scanOpts := glideoptions.NewClusterScanOptions().SetMatch(v.DocumentTable() + ":*") - for !cursor.IsFinished() { - result, err := v.clusterClient.ScanWithOptions(ctx, cursor, *scanOpts) - if err != nil { - return errors.Trace(err) - } - for _, key := range result.Keys { - row, err := v.vHGetAll(ctx, key) - if err != nil { - return errors.Trace(err) - } - usec, err := strconv.ParseInt(row["timestamp"], 10, 64) - if err != nil { - return errors.Trace(err) - } - if err = callback(row["collection"], row["id"], row["subset"], time.UnixMicro(usec).In(time.UTC)); err != nil { - return errors.Trace(err) - } - } - cursor = result.Cursor - } - return nil - } - cursor := models.NewCursor() - scanOpts := glideoptions.NewScanOptions().SetMatch(v.DocumentTable() + ":*") + var cursor uint64 for { - result, err := v.standaloneClient.ScanWithOptions(ctx, cursor, *scanOpts) + entry, err := v.client.Do(ctx, v.client.B().Scan().Cursor(cursor).Match(v.DocumentTable()+":*").Count(100).Build()).AsScanEntry() if err != nil { return errors.Trace(err) } - for _, key := range result.Data { - row, err := v.vHGetAll(ctx, key) + for _, key := range entry.Elements { + row, err := v.client.Do(ctx, v.client.B().Hgetall().Key(key).Build()).AsStrMap() if err != nil { return errors.Trace(err) } @@ -739,13 +547,41 @@ func (v *Valkey) ScanScores(ctx context.Context, callback func(collection string return errors.Trace(err) } } - if result.Cursor.IsFinished() { + cursor = entry.Cursor + if cursor == 0 { return nil } - cursor = result.Cursor } } +// AddTimeSeriesPoints adds time series points using sorted set + hash. +func (v *Valkey) AddTimeSeriesPoints(ctx context.Context, points []TimeSeriesPoint) error { + grouped := groupTimeSeriesPoints(points) + cmds := make(valkey.Commands, 0, len(grouped)*2) + for name, sd := range grouped { + indexKey := v.PointsTable() + ":ts_index:" + name + dataKey := v.PointsTable() + ":ts_data:" + name + // ZADD + zaddCmd := v.client.B().Zadd().Key(indexKey).ScoreMember() + for member, score := range sd.zaddMembers { + zaddCmd = zaddCmd.ScoreMember(score, member) + } + cmds = append(cmds, zaddCmd.Build()) + // HSET + hsetCmd := v.client.B().Hset().Key(dataKey).FieldValue() + for field, value := range sd.hsetFields { + hsetCmd = hsetCmd.FieldValue(field, value) + } + cmds = append(cmds, hsetCmd.Build()) + } + for _, resp := range v.client.DoMulti(ctx, cmds...) { + if err := resp.Error(); err != nil { + return errors.Trace(err) + } + } + return nil +} + // timeSeriesGroup holds grouped ZADD members and HSET fields for a single time series. type timeSeriesGroup struct { zaddMembers map[string]float64 @@ -772,35 +608,6 @@ func groupTimeSeriesPoints(points []TimeSeriesPoint) map[string]*timeSeriesGroup return grouped } -// AddTimeSeriesPoints adds time series points using sorted set + hash (Approach A). -// Sorted set key: {prefix}ts_index:{name} — score = timestamp_ms, member = timestamp_ms as string -// Hash key: {prefix}ts_data:{name} — field = timestamp_ms as string, value = float64 as string -func (v *Valkey) AddTimeSeriesPoints(ctx context.Context, points []TimeSeriesPoint) error { - grouped := groupTimeSeriesPoints(points) - if v.isCluster { - for name, sd := range grouped { - indexKey := v.PointsTable() + ":ts_index:" + name - dataKey := v.PointsTable() + ":ts_data:" + name - if err := v.vZAdd(ctx, indexKey, sd.zaddMembers); err != nil { - return errors.Trace(err) - } - if err := v.vHSet(ctx, dataKey, sd.hsetFields); err != nil { - return errors.Trace(err) - } - } - return nil - } - batch := pipeline.NewStandaloneBatch(false) - for name, sd := range grouped { - indexKey := v.PointsTable() + ":ts_index:" + name - dataKey := v.PointsTable() + ":ts_data:" + name - batch.ZAdd(indexKey, sd.zaddMembers) - batch.HSet(dataKey, sd.hsetFields) - } - _, err := v.standaloneClient.Exec(ctx, *batch, true) - return errors.Trace(err) -} - // GetTimeSeriesPoints retrieves time series points with Go-side bucket aggregation. func (v *Valkey) GetTimeSeriesPoints(ctx context.Context, name string, begin, end time.Time, duration time.Duration) ([]TimeSeriesPoint, error) { indexKey := v.PointsTable() + ":ts_index:" + name @@ -808,18 +615,11 @@ func (v *Valkey) GetTimeSeriesPoints(ctx context.Context, name string, begin, en beginMs := begin.UnixMilli() endMs := end.UnixMilli() - // Fetch all timestamps in range from sorted set using ZRANGEBYSCORE via ZRangeWithScores. - rangeQuery := glideoptions.NewRangeByScoreQuery( - glideoptions.NewInclusiveScoreBoundary(float64(beginMs)), - glideoptions.NewInclusiveScoreBoundary(float64(endMs)), - ) - var members []models.MemberAndScore - var err error - if v.isCluster { - members, err = v.clusterClient.ZRangeWithScores(ctx, indexKey, rangeQuery) - } else { - members, err = v.standaloneClient.ZRangeWithScores(ctx, indexKey, rangeQuery) - } + // Fetch all timestamps in range from sorted set. + members, err := v.client.Do(ctx, v.client.B().Zrangebyscore().Key(indexKey). + Min(strconv.FormatInt(beginMs, 10)). + Max(strconv.FormatInt(endMs, 10)). + Withscores().Build()).AsZScores() if err != nil { return nil, errors.Trace(err) } @@ -832,12 +632,7 @@ func (v *Valkey) GetTimeSeriesPoints(ctx context.Context, name string, begin, en for i, m := range members { fields[i] = m.Member } - var hmgetResults []models.Result[string] - if v.isCluster { - hmgetResults, err = v.clusterClient.HMGet(ctx, dataKey, fields) - } else { - hmgetResults, err = v.standaloneClient.HMGet(ctx, dataKey, fields) - } + hmgetResults, err := v.client.Do(ctx, v.client.B().Hmget().Key(dataKey).Field(fields...).Build()).ToArray() if err != nil { return nil, errors.Trace(err) } @@ -849,24 +644,24 @@ func (v *Valkey) GetTimeSeriesPoints(ctx context.Context, name string, begin, en } tsValues := make([]tsValue, 0, len(members)) for i, m := range members { - if hmgetResults[i].IsNil() { - continue + valStr, err := hmgetResults[i].ToString() + if err != nil { + continue // nil result } tsMs, err := strconv.ParseInt(m.Member, 10, 64) if err != nil { log.Logger().Warn("failed to parse timestamp", zap.String("member", m.Member), zap.Error(err)) continue } - val, err := strconv.ParseFloat(hmgetResults[i].Value(), 64) + val, err := strconv.ParseFloat(valStr, 64) if err != nil { - log.Logger().Warn("failed to parse value", zap.String("value", hmgetResults[i].Value()), zap.Error(err)) + log.Logger().Warn("failed to parse value", zap.String("value", valStr), zap.Error(err)) continue } tsValues = append(tsValues, tsValue{timestampMs: tsMs, value: val}) } - // Go-side bucket aggregation: group by floor(timestamp_ms / duration_ms) * duration_ms, - // take the last value per bucket (highest timestamp). + // Go-side bucket aggregation. durationMs := duration.Milliseconds() if durationMs <= 0 { durationMs = 1 @@ -885,7 +680,6 @@ func (v *Valkey) GetTimeSeriesPoints(ctx context.Context, name string, begin, en } } - // Sort buckets by timestamp ascending. sortedBuckets := make([]*bucket, 0, len(buckets)) for _, b := range buckets { sortedBuckets = append(sortedBuckets, b) @@ -894,7 +688,6 @@ func (v *Valkey) GetTimeSeriesPoints(ctx context.Context, name string, begin, en return sortedBuckets[i].bucketMs < sortedBuckets[j].bucketMs }) - // Convert to TimeSeriesPoint. points := make([]TimeSeriesPoint, 0, len(sortedBuckets)) for _, b := range sortedBuckets { points = append(points, TimeSeriesPoint{ @@ -906,67 +699,30 @@ func (v *Valkey) GetTimeSeriesPoints(ctx context.Context, name string, begin, en return points, nil } -// parseStringSlice converts a CustomCommand result to a string slice. -func parseStringSlice(result any) []string { - if result == nil { - return nil - } - switch v := result.(type) { - case []any: - strs := make([]string, 0, len(v)) - for _, item := range v { - if s, ok := item.(string); ok { - strs = append(strs, s) - } - } - return strs - default: - return nil - } -} +// --- FT.SEARCH result parsing helpers --- -// parseFTSearchTotal extracts the total count from an FT.SEARCH result. -// Valkey Glide returns: [int64_total, map[string]interface{}{docKey: map[string]interface{}{fields...}, ...}] -func parseFTSearchTotal(result any) int { - if result == nil { - return 0 - } - arr, ok := result.([]any) - if !ok || len(arr) == 0 { +// parseFTSearchTotal extracts the total count from an FT.SEARCH result array. +// valkey-go returns: [total_int64, key1, [field1, val1, ...], key2, [field2, val2, ...], ...] +func parseFTSearchTotal(result []valkey.ValkeyMessage) int { + if len(result) == 0 { return 0 } - switch v := arr[0].(type) { - case int64: - return int(v) - case float64: - return int(v) - default: + total, err := result[0].AsInt64() + if err != nil { return 0 } + return int(total) } -// parseFTSearchKeys extracts document keys from an FT.SEARCH result. -// Valkey Glide returns: [int64_total, map[string]interface{}{docKey: fieldsMap, ...}] -func parseFTSearchKeys(result any) []string { - if result == nil { - return nil - } - arr, ok := result.([]any) - if !ok || len(arr) < 2 { +// parseFTSearchKeys extracts document keys from an FT.SEARCH result array. +func parseFTSearchKeys(result []valkey.ValkeyMessage) []string { + if len(result) < 2 { return nil } - // Glide format: arr[1] is a map[string]interface{} where keys are doc keys. - if docMap, ok := arr[1].(map[string]any); ok { - keys := make([]string, 0, len(docMap)) - for key := range docMap { - keys = append(keys, key) - } - return keys - } - // Fallback: flat array format [total, key1, [fields...], key2, [fields...], ...] var keys []string - for i := 1; i < len(arr); i += 2 { - if key, ok := arr[i].(string); ok { + for i := 1; i < len(result); i += 2 { + key, err := result[i].ToString() + if err == nil { keys = append(keys, key) } } @@ -974,49 +730,17 @@ func parseFTSearchKeys(result any) []string { } // parseFTSearchResult parses an FT.SEARCH result into Score documents. -// Valkey Glide returns: [int64_total, map[string]interface{}{docKey: map[string]interface{}{fields...}, ...}] -func parseFTSearchResult(result any) ([]Score, error) { - if result == nil { +func parseFTSearchResult(result []valkey.ValkeyMessage) ([]Score, error) { + if len(result) < 2 { return nil, nil } - arr, ok := result.([]any) - if !ok || len(arr) < 2 { - return nil, nil - } - - // Glide format: arr[1] is a map[string]interface{} where keys are doc keys - // and values are maps of field name → field value. - if docMap, ok := arr[1].(map[string]any); ok { - return parseFTSearchResultFromMap(docMap) - } - - // Fallback: flat array format [total, key1, [fields...], key2, [fields...], ...] documents := make([]Score, 0) - for i := 1; i < len(arr); i += 2 { - if i+1 >= len(arr) { + for i := 1; i < len(result); i += 2 { + if i+1 >= len(result) { break } - fields := parseFieldMap(arr[i+1]) - if fields == nil { - continue - } - doc, err := scoreFromFieldMap(fields) + fields, err := parseFieldArray(result[i+1]) if err != nil { - return nil, err - } - documents = append(documents, doc) - } - return documents, nil -} - -// parseFTSearchResultFromMap parses the Glide map format into Score documents. -// The map has doc keys as keys and field maps as values. -// We need to sort by score DESC to match the SORTBY in the query. -func parseFTSearchResultFromMap(docMap map[string]any) ([]Score, error) { - documents := make([]Score, 0, len(docMap)) - for _, docValue := range docMap { - fields := parseFieldMap(docValue) - if fields == nil { continue } doc, err := scoreFromFieldMap(fields) @@ -1032,6 +756,23 @@ func parseFTSearchResultFromMap(docMap map[string]any) ([]Score, error) { return documents, nil } +// parseFieldArray converts a ValkeyMessage field array [field1, val1, field2, val2, ...] into a map. +func parseFieldArray(msg valkey.ValkeyMessage) (map[string]string, error) { + arr, err := msg.ToArray() + if err != nil { + return nil, err + } + m := make(map[string]string) + for j := 0; j+1 < len(arr); j += 2 { + key, err1 := arr[j].ToString() + val, err2 := arr[j+1].ToString() + if err1 == nil && err2 == nil { + m[key] = val + } + } + return m, nil +} + // scoreFromFieldMap converts a field map into a Score struct. func scoreFromFieldMap(fields map[string]string) (Score, error) { var doc Score @@ -1059,39 +800,7 @@ func scoreFromFieldMap(fields map[string]string) (Score, error) { return doc, nil } -// parseFieldMap converts a field array from FT.SEARCH into a map. -// The field array is [field1, value1, field2, value2, ...] or a map. -func parseFieldMap(v any) map[string]string { - if v == nil { - return nil - } - switch fields := v.(type) { - case []any: - m := make(map[string]string) - for j := 0; j+1 < len(fields); j += 2 { - key, ok1 := fields[j].(string) - val, ok2 := fields[j+1].(string) - if ok1 && ok2 { - m[key] = val - } - } - return m - case map[string]any: - m := make(map[string]string) - for k, val := range fields { - m[k] = fmt.Sprint(val) - } - return m - case map[string]string: - return fields - default: - return nil - } -} - // valkeyTagEscaper escapes all TAG special characters for Valkey Search queries. -// This prevents query injection via user-controlled values in TAG fields. -// Valkey Search TAG syntax has special chars: | * { } ( ) ~ @ \ " ' - : . / + var valkeyTagEscaper = strings.NewReplacer( `\`, `\\`, `{`, `\{`, diff --git a/storage/cache/valkey_test.go b/storage/cache/valkey_test.go index 4f61f431e..792f3691b 100644 --- a/storage/cache/valkey_test.go +++ b/storage/cache/valkey_test.go @@ -52,10 +52,8 @@ func (suite *ValkeyTestSuite) SetupSuite() { // flush db valkeyClient, ok := suite.Database.(*Valkey) suite.True(ok) - if !valkeyClient.isCluster { - _, err = valkeyClient.standaloneClient.CustomCommand(suite.T().Context(), []string{"FLUSHDB"}) - suite.NoError(err) - } + err = valkeyClient.client.Do(context.Background(), valkeyClient.client.B().Flushdb().Build()).Error() + suite.NoError(err) // create schema err = suite.Database.Init() suite.NoError(err) @@ -211,44 +209,42 @@ func TestValkey(t *testing.T) { func TestParseValkeyURL(t *testing.T) { // Basic URL - addr, username, password, db, useTLS, err := parseValkeyURL("valkey://127.0.0.1:6380/") + host, port, username, password, db, useTLS, err := parseValkeyURL("valkey://127.0.0.1:6380/") assert.NoError(t, err) - assert.Equal(t, "127.0.0.1", addr.Host) - assert.Equal(t, 6380, addr.Port) + assert.Equal(t, "127.0.0.1", host) + assert.Equal(t, 6380, port) assert.Equal(t, "", username) assert.Equal(t, "", password) assert.Equal(t, 0, db) assert.False(t, useTLS) // URL with password only - addr, username, password, db, useTLS, err = parseValkeyURL("valkey://:secret@localhost:6379/2") + host, _, password, _, db, useTLS, err = parseValkeyURL("valkey://:secret@localhost:6379/2") assert.NoError(t, err) - assert.Equal(t, "localhost", addr.Host) - assert.Equal(t, 6379, addr.Port) - assert.Equal(t, "", username) + assert.Equal(t, "localhost", host) assert.Equal(t, "secret", password) assert.Equal(t, 2, db) assert.False(t, useTLS) // URL with username and password - addr, username, password, db, useTLS, err = parseValkeyURL("valkey://myuser:mypass@host.example.com:6380/3") + host, port, username, password, db, useTLS, err = parseValkeyURL("valkey://myuser:mypass@host.example.com:6380/3") assert.NoError(t, err) - assert.Equal(t, "host.example.com", addr.Host) - assert.Equal(t, 6380, addr.Port) + assert.Equal(t, "host.example.com", host) + assert.Equal(t, 6380, port) assert.Equal(t, "myuser", username) assert.Equal(t, "mypass", password) assert.Equal(t, 3, db) assert.False(t, useTLS) // TLS URL - addr, _, _, _, useTLS, err = parseValkeyURL("valkeys://localhost:6379/0") + _, _, _, _, _, useTLS, err = parseValkeyURL("valkeys://localhost:6379/0") assert.NoError(t, err) assert.True(t, useTLS) // Default port - addr, _, _, _, _, err = parseValkeyURL("valkey://localhost/") + _, port, _, _, _, _, err = parseValkeyURL("valkey://localhost/") assert.NoError(t, err) - assert.Equal(t, 6379, addr.Port) + assert.Equal(t, 6379, port) } func TestParseValkeyClusterURL(t *testing.T) { @@ -256,10 +252,9 @@ func TestParseValkeyClusterURL(t *testing.T) { addresses, username, password, useTLS, err := parseValkeyClusterURL("valkey+cluster://:password@192.168.1.11:6379?addr=192.168.0.5:6379&addr=192.168.0.7:6379") assert.NoError(t, err) assert.Len(t, addresses, 3) - assert.Equal(t, "192.168.1.11", addresses[0].Host) - assert.Equal(t, 6379, addresses[0].Port) - assert.Equal(t, "192.168.0.5", addresses[1].Host) - assert.Equal(t, "192.168.0.7", addresses[2].Host) + assert.Equal(t, "192.168.1.11:6379", addresses[0]) + assert.Equal(t, "192.168.0.5:6379", addresses[1]) + assert.Equal(t, "192.168.0.7:6379", addresses[2]) assert.Equal(t, "", username) assert.Equal(t, "password", password) assert.False(t, useTLS) @@ -268,9 +263,8 @@ func TestParseValkeyClusterURL(t *testing.T) { addresses, username, password, useTLS, err = parseValkeyClusterURL("valkeys+cluster://admin:secret@node1:6380?addr=node2:6380") assert.NoError(t, err) assert.Len(t, addresses, 2) - assert.Equal(t, "node1", addresses[0].Host) - assert.Equal(t, 6380, addresses[0].Port) - assert.Equal(t, "node2", addresses[1].Host) + assert.Equal(t, "node1:6380", addresses[0]) + assert.Equal(t, "node2:6380", addresses[1]) assert.Equal(t, "admin", username) assert.Equal(t, "secret", password) assert.True(t, useTLS) @@ -282,10 +276,8 @@ func BenchmarkValkey(b *testing.B) { assert.NoError(b, err) // flush db valkeyClient := database.(*Valkey) - if !valkeyClient.isCluster { - _, err = valkeyClient.standaloneClient.CustomCommand(context.Background(), []string{"FLUSHDB"}) - assert.NoError(b, err) - } + err = valkeyClient.client.Do(context.Background(), valkeyClient.client.B().Flushdb().Build()).Error() + assert.NoError(b, err) // create schema err = database.Init() assert.NoError(b, err) From c823a65a603b2dd00a7a1b1b7db11b31f4542c73 Mon Sep 17 00:00:00 2001 From: Daria Korenieva Date: Wed, 6 May 2026 10:42:53 -0700 Subject: [PATCH 13/19] clean up Signed-off-by: Daria Korenieva --- go.sum | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/go.sum b/go.sum index 47ea55925..0480e2fbc 100644 --- a/go.sum +++ b/go.sum @@ -740,8 +740,8 @@ github.com/onsi/ginkgo v1.10.3/go.mod h1:lLunBs/Ym6LB5Z9jYTR76FiuTmxDTDusOGeTQH+ github.com/onsi/ginkgo/v2 v2.23.4 h1:ktYTpKJAVZnDT4VjxSbiBenUjmlL/5QkBEocaWXiQus= github.com/onsi/ginkgo/v2 v2.23.4/go.mod h1:Bt66ApGPBFzHyR+JO10Zbt0Gsp4uWxu5mIOTusL46e8= github.com/onsi/gomega v1.7.1/go.mod h1:XdKZgCCFLUoM/7CFJVPcG8C1xQ1AJ0vpAezJrB7JYyY= -github.com/onsi/gomega v1.37.0 h1:CdEG8g0S133B4OswTDC/5XPSzE1OeP29QOioj2PID2Y= -github.com/onsi/gomega v1.37.0/go.mod h1:8D9+Txp43QWKhM24yyOBEdpkzN8FvJyAwecBgsU4KU0= +github.com/onsi/gomega v1.38.3 h1:eTX+W6dobAYfFeGC2PV6RwXRu/MyT+cQguijutvkpSM= +github.com/onsi/gomega v1.38.3/go.mod h1:ZCU1pkQcXDO5Sl9/VVEGlDyp+zm0m1cmeG5TOzLgdh4= github.com/opentracing/opentracing-go v1.1.0/go.mod h1:UkNAQd3GIcIGf0SeVgPpRdFStlNbqXla1AfSYxPUl2o= github.com/openzipkin/zipkin-go v0.4.3 h1:9EGwpqkgnwdEIJ+Od7QVSEIH+ocmm5nPat0G7sjsSdg= github.com/openzipkin/zipkin-go v0.4.3/go.mod h1:M9wCJZFWCo2RiY+o1eBCEMe0Dp2S5LDHcMZmk3RmK7c= @@ -921,6 +921,8 @@ github.com/ugorji/go v1.1.7/go.mod h1:kZn38zHttfInRq0xu/PH0az30d+z6vm202qpg1oXVM github.com/ugorji/go/codec v0.0.0-20181204163529-d75b2dcb6bc8/go.mod h1:VFNgLljTbGfSG7qAOspJ7OScBnGdDN/yBr0sguwnwf0= github.com/ugorji/go/codec v1.1.7/go.mod h1:Ax+UKWsSmolVDwsd+7N3ZtXu+yMGCf907BLYF3GoBXY= github.com/urfave/negroni v1.0.0/go.mod h1:Meg73S6kFm/4PpbYdq35yYWoCZ9mS/YSx+lKnmiohz4= +github.com/valkey-io/valkey-go v1.0.74 h1:NqtBHzjybz+is+c71hsyZP7hoE5lwCHQX026me0Vb08= +github.com/valkey-io/valkey-go v1.0.74/go.mod h1:VGhZ6fs68Qrn2+OhH+6waZH27bjpgQOiLyUQyXuYK5k= github.com/valyala/bytebufferpool v1.0.0/go.mod h1:6bBcMArwyJ5K/AmCkWv1jt77kVWyCJ6HpOuEn7z0Csc= github.com/valyala/fasthttp v1.6.0/go.mod h1:FstJa9V+Pj9vQ7OJie2qMHdwemEDaDiSdBnvPM1Su9w= github.com/valyala/fasttemplate v1.0.1/go.mod h1:UQGH1tvbgY+Nz5t2n7tXsz52dQxojPUpymEIMZ47gx8= From 5b706466dda6d48a5d2241455f6fb378d88f1632 Mon Sep 17 00:00:00 2001 From: Daria Korenieva Date: Wed, 6 May 2026 10:48:10 -0700 Subject: [PATCH 14/19] perf(valkey): batch Set/AddScores with DoMulti, use server-side LIMIT offset in SearchScores, clarify UpdateScores pagination - Set(): use DoMulti to pipeline all SET commands in a single round-trip - AddScores(): use DoMulti to pipeline all HSET commands - SearchScores(): pass begin as LIMIT offset and (end-begin) as count instead of fetching from 0 and slicing in Go - UpdateScores(): add clarifying comments for the first-page re-fetch optimization that avoids pagination drift Signed-off-by: Daria Korenieva --- storage/cache/valkey.go | 41 +++++++++++++++++++++++------------------ 1 file changed, 23 insertions(+), 18 deletions(-) diff --git a/storage/cache/valkey.go b/storage/cache/valkey.go index f515b045d..16db1b469 100644 --- a/storage/cache/valkey.go +++ b/storage/cache/valkey.go @@ -279,8 +279,12 @@ func (v *Valkey) Set(ctx context.Context, values ...Value) error { if len(values) == 0 { return nil } + cmds := make(valkey.Commands, 0, len(values)) for _, val := range values { - if err := v.client.Do(ctx, v.client.B().Set().Key(v.Key(val.name)).Value(val.value).Build()).Error(); err != nil { + cmds = append(cmds, v.client.B().Set().Key(v.Key(val.name)).Value(val.value).Build()) + } + for _, resp := range v.client.DoMulti(ctx, cmds...) { + if err := resp.Error(); err != nil { return errors.Trace(err) } } @@ -332,9 +336,13 @@ func (v *Valkey) documentKey(collection, subset, value string) string { // AddScores adds score documents to Valkey using hash storage. func (v *Valkey) AddScores(ctx context.Context, collection, subset string, documents []Score) error { + if len(documents) == 0 { + return nil + } + cmds := make(valkey.Commands, 0, len(documents)) for _, document := range documents { key := v.documentKey(collection, subset, document.Id) - cmd := v.client.B().Hset().Key(key).FieldValue(). + cmds = append(cmds, v.client.B().Hset().Key(key).FieldValue(). FieldValue("collection", collection). FieldValue("subset", subset). FieldValue("id", document.Id). @@ -342,8 +350,10 @@ func (v *Valkey) AddScores(ctx context.Context, collection, subset string, docum FieldValue("is_hidden", formatBool(document.IsHidden)). FieldValue("categories", encodeCategories(document.Categories)). FieldValue("timestamp", strconv.FormatInt(document.Timestamp.UnixMicro(), 10)). - Build() - if err := v.client.Do(ctx, cmd).Error(); err != nil { + Build()) + } + for _, resp := range v.client.DoMulti(ctx, cmds...) { + if err := resp.Error(); err != nil { return errors.Trace(err) } } @@ -367,13 +377,15 @@ func (v *Valkey) SearchScores(ctx context.Context, collection, subset string, qu for _, q := range query { fmt.Fprintf(&builder, " @categories:{ %s }", escapeTag(encodeCategory(q))) } - fetchLimit := 10000 + // Use server-side LIMIT offset/count to fetch only the needed slice. + fetchOffset := begin + fetchCount := 10000 if end != -1 { - fetchLimit = end + fetchCount = end - begin } cmd := v.client.B().Arbitrary("FT.SEARCH"). Keys(v.DocumentTable()). - Args(builder.String(), "SORTBY", "score", "DESC", "LIMIT", "0", strconv.Itoa(fetchLimit)). + Args(builder.String(), "SORTBY", "score", "DESC", "LIMIT", strconv.Itoa(fetchOffset), strconv.Itoa(fetchCount)). Build() result, err := v.client.Do(ctx, cmd).ToArray() if err != nil { @@ -383,16 +395,6 @@ func (v *Valkey) SearchScores(ctx context.Context, collection, subset string, qu if err != nil { return nil, errors.Trace(err) } - if begin > 0 || end != -1 { - if begin >= len(documents) { - return []Score{}, nil - } - endIdx := len(documents) - if end != -1 && end < endIdx { - endIdx = end - } - documents = documents[begin:endIdx] - } return documents, nil } @@ -419,7 +421,9 @@ func (v *Valkey) UpdateScores(ctx context.Context, collections []string, subset limit = 10000 } - // Two-phase update: collect keys first, then mutate. + // Two-phase update: collect all matching keys first, then mutate. + // On the first fetch, if total results exceed the page limit, re-fetch all + // in a single request to avoid pagination drift when scores change. keys := make([]string, 0) keySet := make(map[string]struct{}) offset := 0 @@ -435,6 +439,7 @@ func (v *Valkey) UpdateScores(ctx context.Context, collections []string, subset if offset == 0 { total := parseFTSearchTotal(result) if total > limit { + // Fetch all results in one shot to avoid pagination issues. cmd = v.client.B().Arbitrary("FT.SEARCH"). Keys(v.DocumentTable()). Args(builder.String(), "SORTBY", "score", "DESC", "LIMIT", "0", strconv.Itoa(total)). From 77921f4444caa3f891fa0711c27e3594f0996348 Mon Sep 17 00:00:00 2001 From: Daria Korenieva Date: Thu, 7 May 2026 11:06:43 -0700 Subject: [PATCH 15/19] fix(valkey): fix TestParseValkeyURL return value assignment and prevent SetupSuite panic - Fix wrong variable mapping in parseValkeyURL test: was assigning username to password variable (skipped port+password instead of port+username) - Use suite.Require() in ValkeyTestSuite.SetupSuite to fail fast instead of panicking on nil pointer when service is unavailable Signed-off-by: Daria Korenieva --- storage/cache/valkey_test.go | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/storage/cache/valkey_test.go b/storage/cache/valkey_test.go index 792f3691b..acfd71af4 100644 --- a/storage/cache/valkey_test.go +++ b/storage/cache/valkey_test.go @@ -48,15 +48,15 @@ type ValkeyTestSuite struct { func (suite *ValkeyTestSuite) SetupSuite() { var err error suite.Database, err = Open(valkeyDSN, "gorse_") - suite.NoError(err) + suite.Require().NoError(err) // flush db valkeyClient, ok := suite.Database.(*Valkey) - suite.True(ok) + suite.Require().True(ok) err = valkeyClient.client.Do(context.Background(), valkeyClient.client.B().Flushdb().Build()).Error() - suite.NoError(err) + suite.Require().NoError(err) // create schema err = suite.Database.Init() - suite.NoError(err) + suite.Require().NoError(err) } func (suite *ValkeyTestSuite) TestEscapeCharacters() { @@ -219,7 +219,7 @@ func TestParseValkeyURL(t *testing.T) { assert.False(t, useTLS) // URL with password only - host, _, password, _, db, useTLS, err = parseValkeyURL("valkey://:secret@localhost:6379/2") + host, _, _, password, db, useTLS, err = parseValkeyURL("valkey://:secret@localhost:6379/2") assert.NoError(t, err) assert.Equal(t, "localhost", host) assert.Equal(t, "secret", password) From 13440c140a49cf11565564b8760a1ece02181a7d Mon Sep 17 00:00:00 2001 From: Daria Korenieva Date: Thu, 7 May 2026 11:50:23 -0700 Subject: [PATCH 16/19] experiment: reuse Redis client for Valkey, override only time series - Add RedisValkey type embedding Redis, overriding AddTimeSeriesPoints and GetTimeSeriesPoints with sorted-set + hash + Go-side aggregation - Register valkey:// prefixes via go-redis (wire-compatible) - Disable old standalone valkey.go/valkey_test.go with //go:build ignore - Add comprehensive time series tests to baseTestSuite - Add ValkeyViaRedisTestSuite and RedisSortedSetTSTestSuite - Add EXPERIMENT_REPORT.md with context and next steps --- EXPERIMENT_REPORT.md | 158 +++++++++++++++++++++++++++ storage/cache/database_test.go | 174 ++++++++++++++++++++++++++++++ storage/cache/redis_test.go | 97 +++++++++++++++++ storage/cache/redis_valkey.go | 190 +++++++++++++++++++++++++++++++++ storage/cache/valkey.go | 2 + storage/cache/valkey_test.go | 2 + 6 files changed, 623 insertions(+) create mode 100644 EXPERIMENT_REPORT.md create mode 100644 storage/cache/redis_valkey.go diff --git a/EXPERIMENT_REPORT.md b/EXPERIMENT_REPORT.md new file mode 100644 index 000000000..72852f773 --- /dev/null +++ b/EXPERIMENT_REPORT.md @@ -0,0 +1,158 @@ +# Experiment: Reuse Redis Client for Valkey (Eliminate Code Duplication) + +## Branch + +``` +experiment/valkey-reuse-redis-client +``` + +Based on: `feature/valkey-cache-integration` + +Changes here do NOT affect the existing PR on `feature/valkey-cache-integration`. + +--- + +## Context + +A maintainer reviewed the Valkey integration PR and said: + +> "Too many duplicate codes comparing to Redis backend. Please fix all tests. Could you reuse Redis client, but implement special timeseries interface if Valkey is detected?" + +The original implementation (`storage/cache/valkey.go`, ~839 lines) used the `valkey-go` client library and reimplemented every method from scratch — Set, Get, Delete, Push, Pop, Scan, Purge, AddScores, SearchScores, DeleteScores, UpdateScores, ScanScores, AddTimeSeriesPoints, GetTimeSeriesPoints — all of which are semantically identical to the Redis implementation since Valkey is wire-compatible with Redis. + +--- + +## What Was Done + +### Approach: Embed `Redis`, override only time series + +Since Valkey is wire-compatible with Redis for all commands except the TimeSeries module (`TS.ADD`, `TS.RANGE`), the solution is: + +1. Connect to Valkey using the same `go-redis` client (it works out of the box) +2. Reuse ALL existing Redis methods (KV, queue, scores/search, scan, purge) +3. Override ONLY `AddTimeSeriesPoints` and `GetTimeSeriesPoints` with a sorted-set + hash implementation + +### Files Changed + +| File | Status | Description | +|------|--------|-------------| +| `storage/cache/redis_valkey.go` | **NEW** | Registers `valkey://`, `valkeys://`, `valkey+cluster://`, `valkeys+cluster://` prefixes. Defines `RedisValkey` type that embeds `Redis` and overrides only the two time series methods. ~160 lines total. | +| `storage/cache/redis.go` | **UNCHANGED** | Original Redis implementation is untouched. | +| `storage/cache/valkey.go` | **DISABLED** | Added `//go:build ignore` to exclude from compilation. File preserved for reference. | +| `storage/cache/valkey_test.go` | **DISABLED** | Added `//go:build ignore` to exclude from compilation. | +| `storage/cache/redis_test.go` | **MODIFIED** | Added `ValkeyViaRedisTestSuite` (tests valkey:// DSN end-to-end) and `RedisSortedSetTSTestSuite` (tests sorted-set TS implementation against a Redis server). | +| `storage/cache/database_test.go` | **MODIFIED** | Added comprehensive time series tests to `baseTestSuite`: duplicate timestamp, empty range, multiple series isolation, bucket aggregation correctness, single point, batch add, empty batch. Also added edge-case tests for AddScores(empty) and UpdateScores(no-op). | + +### How `RedisValkey` Works + +```go +type RedisValkey struct { + Redis // embeds all Redis methods +} +``` + +- `AddTimeSeriesPoints`: Uses pipelined `ZADD` (sorted set for timestamp index) + `HSET` (hash for values). Last-write-wins on duplicate timestamps via natural ZADD/HSET overwrite semantics. +- `GetTimeSeriesPoints`: Uses `ZRANGEBYSCORE` to get timestamps in range, `HMGET` to fetch values, then Go-side bucket aggregation (group by `floor(ts/duration)*duration`, keep last value per bucket). + +This matches how the existing MongoDB backend implements time series in this codebase. + +### Registration + +In `redis_valkey.go` `init()`: +- `valkey://` / `valkeys://` → `redis.NewClient` (rewrites URL prefix) +- `valkey+cluster://` / `valkeys+cluster://` → `redis.NewClusterClient` (rewrites URL prefix) + +All return a `*RedisValkey` which satisfies the `Database` interface. + +--- + +## Test Suites + +| Suite | What it tests | How to run | +|-------|---------------|------------| +| `TestRedis` | Original Redis with native TimeSeries | `go test ./storage/cache/ -run TestRedis -v` | +| `TestRedisSortedSetTS` | `RedisValkey` type against a Redis server (validates sorted-set TS logic using Redis's ZADD/HSET) | `go test ./storage/cache/ -run TestRedisSortedSetTS -v` | +| `TestValkeyViaRedis` | Full end-to-end via `valkey://` DSN against a real Valkey instance | `VALKEY_URI=valkey://127.0.0.1:6380/ go test ./storage/cache/ -run TestValkeyViaRedis -v` | + +All suites run the shared `baseTestSuite` which covers every `Database` interface method. + +--- + +## Next Steps + +### 1. Compile and verify + +```bash +go build ./storage/cache/... +``` + +If there are compilation issues (e.g., `go.mod` tidy needed because `valkey-go` is now unused by compiled code), run: + +```bash +go mod tidy +``` + +### 2. Run tests against Redis + +```bash +# Requires Redis with TimeSeries + Search modules on localhost:6379 +go test ./storage/cache/ -run "TestRedis$" -v -count=1 +go test ./storage/cache/ -run TestRedisSortedSetTS -v -count=1 +``` + +Both should pass. `TestRedis` validates existing behavior is unbroken. `TestRedisSortedSetTS` validates the sorted-set fallback works correctly. + +### 3. Run tests against Valkey + +```bash +# Requires Valkey with valkey-search module on localhost:6380 +VALKEY_URI=valkey://127.0.0.1:6380/ go test ./storage/cache/ -run TestValkeyViaRedis -v -count=1 +``` + +Record any failures. Expected potential issues: +- `FT._LIST` / `FT.CREATE` / `FT.SEARCH` command compatibility (valkey-search module required) +- Minor behavioral differences in search result ordering or field formatting + +### 4. If all tests pass + +- Delete `storage/cache/valkey.go` and `storage/cache/valkey_test.go` entirely +- Remove `github.com/valkey-io/valkey-go` from `go.mod` + `go mod tidy` +- Run full project build: `go build ./...` +- Run full test suite to check nothing else breaks +- Update the PR on `feature/valkey-cache-integration` with these changes + +### 5. If Valkey tests fail + +- Document which tests fail and why +- Most likely cause: FT.SEARCH command differences between Redis Search and valkey-search +- Fix approach: adjust query formatting or result parsing in the inherited Redis methods (may need to override `SearchScores` in `RedisValkey` if behavior diverges) + +--- + +## Key Design Decisions + +1. **Embedding over interface**: Using Go struct embedding (`RedisValkey` embeds `Redis`) rather than a `timeSeriesBackend` interface. This avoids modifying `redis.go` at all — no new fields, no interface dispatch, no runtime detection. + +2. **No changes to redis.go**: The maintainer's concern was code duplication. By embedding, we get zero duplication with zero changes to the working Redis implementation. + +3. **go-redis over valkey-go**: Valkey speaks Redis protocol. Using go-redis means one client library, one set of patterns, one import. The `valkey-go` dependency can be removed entirely. + +4. **Build tag on old files**: Using `//go:build ignore` rather than deleting `valkey.go`/`valkey_test.go` preserves them for reference during the experiment. Delete them once validated. + +--- + +## File Locations (for reference) + +``` +storage/cache/ +├── database.go # Database interface definition +├── database_test.go # Shared test suite (baseTestSuite) — MODIFIED +├── redis.go # Original Redis implementation — UNCHANGED +├── redis_valkey.go # NEW: RedisValkey type + valkey:// registration +├── redis_test.go # Redis + Valkey test suites — MODIFIED +├── valkey.go # OLD: standalone Valkey impl (//go:build ignore) +├── valkey_test.go # OLD: standalone Valkey tests (//go:build ignore) +├── mongodb.go # MongoDB implementation +├── sql.go # SQL implementation +└── ... +``` diff --git a/storage/cache/database_test.go b/storage/cache/database_test.go index ded932bf9..91c7e2d5f 100644 --- a/storage/cache/database_test.go +++ b/storage/cache/database_test.go @@ -542,6 +542,180 @@ func (suite *baseTestSuite) TestTimeSeries() { }, points) } +func (suite *baseTestSuite) TestTimeSeriesDuplicateTimestamp() { + // Duplicate timestamps should use last-write-wins semantics. + ts := time.Date(2023, 6, 15, 0, 0, 0, 0, time.UTC) + ctx := suite.T().Context() + + // Write initial value + err := suite.AddTimeSeriesPoints(ctx, []TimeSeriesPoint{ + {Name: "dup", Value: 100, Timestamp: ts}, + }) + suite.NoError(err) + + // Overwrite with a new value at the same timestamp + err = suite.AddTimeSeriesPoints(ctx, []TimeSeriesPoint{ + {Name: "dup", Value: 200, Timestamp: ts}, + }) + suite.NoError(err) + + points, err := suite.GetTimeSeriesPoints(ctx, "dup", ts, ts, time.Second) + suite.NoError(err) + suite.Require().Len(points, 1) + suite.Equal(float64(200), points[0].Value) +} + +func (suite *baseTestSuite) TestTimeSeriesEmptyRange() { + ts := time.Date(2023, 1, 1, 0, 0, 0, 0, time.UTC) + ctx := suite.T().Context() + + // Add points + err := suite.AddTimeSeriesPoints(ctx, []TimeSeriesPoint{ + {Name: "empty_range", Value: 1, Timestamp: ts}, + }) + suite.NoError(err) + + // Query a range that contains no points + points, err := suite.GetTimeSeriesPoints(ctx, "empty_range", + ts.Add(10*time.Second), ts.Add(20*time.Second), time.Second) + suite.NoError(err) + suite.Empty(points) + + // Query a non-existent series + points, err = suite.GetTimeSeriesPoints(ctx, "nonexistent", + ts, ts.Add(time.Hour), time.Second) + suite.NoError(err) + suite.Empty(points) +} + +func (suite *baseTestSuite) TestTimeSeriesMultipleSeries() { + // Multiple series should be isolated from each other. + ts := time.Date(2023, 1, 1, 0, 0, 0, 0, time.UTC) + ctx := suite.T().Context() + + err := suite.AddTimeSeriesPoints(ctx, []TimeSeriesPoint{ + {Name: "series_x", Value: 10, Timestamp: ts.Add(1 * time.Second)}, + {Name: "series_x", Value: 20, Timestamp: ts.Add(2 * time.Second)}, + {Name: "series_y", Value: 30, Timestamp: ts.Add(1 * time.Second)}, + {Name: "series_y", Value: 40, Timestamp: ts.Add(2 * time.Second)}, + }) + suite.NoError(err) + + pointsX, err := suite.GetTimeSeriesPoints(ctx, "series_x", ts, ts.Add(3*time.Second), time.Second) + suite.NoError(err) + suite.Equal([]TimeSeriesPoint{ + {Name: "series_x", Value: 10, Timestamp: ts.Add(1 * time.Second)}, + {Name: "series_x", Value: 20, Timestamp: ts.Add(2 * time.Second)}, + }, pointsX) + + pointsY, err := suite.GetTimeSeriesPoints(ctx, "series_y", ts, ts.Add(3*time.Second), time.Second) + suite.NoError(err) + suite.Equal([]TimeSeriesPoint{ + {Name: "series_y", Value: 30, Timestamp: ts.Add(1 * time.Second)}, + {Name: "series_y", Value: 40, Timestamp: ts.Add(2 * time.Second)}, + }, pointsY) +} + +func (suite *baseTestSuite) TestTimeSeriesBucketAggregation() { + // Test that bucket aggregation picks the last value per bucket. + ts := time.Date(2023, 1, 1, 0, 0, 0, 0, time.UTC) + ctx := suite.T().Context() + + // Add 10 points, 1 per second + points := make([]TimeSeriesPoint, 10) + for i := range 10 { + points[i] = TimeSeriesPoint{ + Name: "agg", + Value: float64(i + 1), + Timestamp: ts.Add(time.Duration(i) * time.Second), + } + } + err := suite.AddTimeSeriesPoints(ctx, points) + suite.NoError(err) + + // Query with 5-second buckets: [0-4] and [5-9] + result, err := suite.GetTimeSeriesPoints(ctx, "agg", ts, ts.Add(9*time.Second), 5*time.Second) + suite.NoError(err) + suite.Require().Len(result, 2) + // First bucket [0s,5s): last value is at 4s = value 5 + suite.Equal(float64(5), result[0].Value) + suite.Equal(ts, result[0].Timestamp) + // Second bucket [5s,10s): last value is at 9s = value 10 + suite.Equal(float64(10), result[1].Value) + suite.Equal(ts.Add(5*time.Second), result[1].Timestamp) +} + +func (suite *baseTestSuite) TestTimeSeriesSinglePoint() { + ts := time.Date(2023, 3, 1, 12, 0, 0, 0, time.UTC) + ctx := suite.T().Context() + + err := suite.AddTimeSeriesPoints(ctx, []TimeSeriesPoint{ + {Name: "single", Value: 42.5, Timestamp: ts}, + }) + suite.NoError(err) + + points, err := suite.GetTimeSeriesPoints(ctx, "single", ts, ts, time.Second) + suite.NoError(err) + suite.Require().Len(points, 1) + suite.Equal(float64(42.5), points[0].Value) + suite.Equal("single", points[0].Name) +} + +func (suite *baseTestSuite) TestTimeSeriesBatchAdd() { + // Batch add with many points for same series should work correctly. + ts := time.Date(2023, 1, 1, 0, 0, 0, 0, time.UTC) + ctx := suite.T().Context() + + batch := make([]TimeSeriesPoint, 100) + for i := range 100 { + batch[i] = TimeSeriesPoint{ + Name: "batch_ts", + Value: float64(i), + Timestamp: ts.Add(time.Duration(i) * time.Second), + } + } + err := suite.AddTimeSeriesPoints(ctx, batch) + suite.NoError(err) + + // Query full range with 10-second buckets + points, err := suite.GetTimeSeriesPoints(ctx, "batch_ts", ts, ts.Add(99*time.Second), 10*time.Second) + suite.NoError(err) + suite.Len(points, 10) + // Last bucket should have the highest value in its range + suite.Equal(float64(99), points[9].Value) +} + +func (suite *baseTestSuite) TestTimeSeriesEmptyBatch() { + ctx := suite.T().Context() + // Adding nil points should not error + err := suite.AddTimeSeriesPoints(ctx, nil) + suite.NoError(err) + // Adding empty slice should not error + err = suite.AddTimeSeriesPoints(ctx, []TimeSeriesPoint{}) + suite.NoError(err) +} + +func (suite *baseTestSuite) TestAddScoresEmpty() { + ctx := suite.T().Context() + // Adding empty slice should be a no-op + err := suite.AddScores(ctx, "empty_collection", "empty_subset", []Score{}) + suite.NoError(err) + err = suite.AddScores(ctx, "empty_collection", "empty_subset", nil) + suite.NoError(err) +} + +func (suite *baseTestSuite) TestUpdateScoresNoOp() { + ctx := suite.T().Context() + // Empty collections should be a no-op + err := suite.UpdateScores(ctx, nil, nil, "some-id", ScorePatch{Score: new(float64(1))}) + suite.NoError(err) + err = suite.UpdateScores(ctx, []string{}, nil, "some-id", ScorePatch{Score: new(float64(1))}) + suite.NoError(err) + // No-op patch (all nil) should be a no-op + err = suite.UpdateScores(ctx, []string{"a"}, nil, "some-id", ScorePatch{}) + suite.NoError(err) +} + func (suite *baseTestSuite) TestTimestampPrecision() { ctx := suite.T().Context() timestamp := time.Date(2023, 1, 1, 0, 0, 0, 500, time.UTC) diff --git a/storage/cache/redis_test.go b/storage/cache/redis_test.go index bdc8dd472..29b336de1 100644 --- a/storage/cache/redis_test.go +++ b/storage/cache/redis_test.go @@ -23,6 +23,7 @@ import ( "time" "github.com/gorse-io/gorse/common/log" + "github.com/gorse-io/gorse/storage" "github.com/redis/go-redis/v9" "github.com/stretchr/testify/assert" "github.com/stretchr/testify/suite" @@ -216,6 +217,73 @@ func TestRedis(t *testing.T) { suite.Run(t, new(RedisTestSuite)) } +// ValkeyViaRedisTestSuite tests the unified Redis struct when connected via a valkey:// DSN. +// This exercises the same code path that Valkey users will hit, including +// the sortedSetTimeSeries fallback if TimeSeries module is unavailable. +type ValkeyViaRedisTestSuite struct { + baseTestSuite +} + +func (suite *ValkeyViaRedisTestSuite) SetupSuite() { + // Use VALKEY_URI env var, defaulting to valkey://127.0.0.1:6380/ + dsn := os.Getenv("VALKEY_URI") + if dsn == "" { + dsn = "valkey://127.0.0.1:6380/" + } + var err error + suite.Database, err = Open(dsn, "gorse_valkey_") + suite.Require().NoError(err) + // flush db + redisDB, ok := suite.Database.(*RedisValkey) + suite.Require().True(ok, "valkey:// DSN should produce a *RedisValkey instance") + err = redisDB.client.FlushDB(suite.T().Context()).Err() + suite.Require().NoError(err) + // create schema + err = suite.Database.Init() + suite.Require().NoError(err) +} + +func (suite *ValkeyViaRedisTestSuite) TestEscapeCharacters() { + ts := time.Date(2023, 1, 1, 0, 0, 0, 0, time.UTC) + ctx := suite.T().Context() + for _, c := range []string{"-", ":", ".", "/"} { + suite.Run(c, func() { + collection := fmt.Sprintf("a%s1", c) + subset := fmt.Sprintf("b%s2", c) + id := fmt.Sprintf("c%s3", c) + err := suite.AddScores(ctx, collection, subset, []Score{{ + Id: id, + Score: math.MaxFloat64, + Categories: []string{"a", "b"}, + Timestamp: ts, + }}) + suite.NoError(err) + documents, err := suite.SearchScores(ctx, collection, subset, []string{"b"}, 0, -1) + suite.NoError(err) + suite.Equal([]Score{{Id: id, Score: math.MaxFloat64, Categories: []string{"a", "b"}, Timestamp: ts}}, documents) + + err = suite.UpdateScores(ctx, []string{collection}, nil, id, ScorePatch{Score: new(float64(1))}) + suite.NoError(err) + documents, err = suite.SearchScores(ctx, collection, subset, []string{"b"}, 0, -1) + suite.NoError(err) + suite.Equal([]Score{{Id: id, Score: 1, Categories: []string{"a", "b"}, Timestamp: ts}}, documents) + + err = suite.DeleteScores(ctx, []string{collection}, ScoreCondition{ + Subset: new(subset), + Id: new(id), + }) + suite.NoError(err) + documents, err = suite.SearchScores(ctx, collection, subset, []string{"b"}, 0, -1) + suite.NoError(err) + suite.Empty(documents) + }) + } +} + +func TestValkeyViaRedis(t *testing.T) { + suite.Run(t, new(ValkeyViaRedisTestSuite)) +} + func TestEncodeDecodeCategories(t *testing.T) { encoded := encodeCategories([]string{"z", "h"}) decoded, err := decodeCategories(encoded) @@ -228,6 +296,35 @@ func TestEncodeDecodeCategories(t *testing.T) { assert.Equal(t, []string{}, decoded) } +// RedisSortedSetTSTestSuite runs the full test suite using the RedisValkey type +// (sorted-set time series) against a real Redis server. This validates the +// fallback implementation works correctly using Redis's ZADD/HSET commands. +type RedisSortedSetTSTestSuite struct { + baseTestSuite +} + +func (suite *RedisSortedSetTSTestSuite) SetupSuite() { + // Open as a regular Redis connection, then wrap in RedisValkey. + opt, err := redis.ParseURL(redisDSN) + suite.Require().NoError(err) + opt.Protocol = 2 + database := &RedisValkey{} + database.TablePrefix = storage.TablePrefix("gorse_ss_ts_") + database.client = redis.NewClient(opt) + database.maxSearchResults = 0 + suite.Database = database + // flush db + err = database.client.FlushDB(suite.T().Context()).Err() + suite.Require().NoError(err) + // create schema + err = suite.Database.Init() + suite.Require().NoError(err) +} + +func TestRedisSortedSetTS(t *testing.T) { + suite.Run(t, new(RedisSortedSetTSTestSuite)) +} + func BenchmarkRedis(b *testing.B) { log.CloseLogger() // open db diff --git a/storage/cache/redis_valkey.go b/storage/cache/redis_valkey.go new file mode 100644 index 000000000..c075c9f0d --- /dev/null +++ b/storage/cache/redis_valkey.go @@ -0,0 +1,190 @@ +// Copyright 2025 gorse Project Authors +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +package cache + +import ( + "context" + "sort" + "strconv" + "strings" + "time" + + "github.com/gorse-io/gorse/common/log" + "github.com/gorse-io/gorse/storage" + "github.com/juju/errors" + "github.com/redis/go-redis/extra/redisotel/v9" + "github.com/redis/go-redis/v9" + semconv "go.opentelemetry.io/otel/semconv/v1.8.0" + "go.uber.org/zap" +) + +func init() { + // Valkey is wire-compatible with Redis. We reuse the go-redis client but + // wrap it in RedisValkey which provides a sorted-set-based time series + // implementation (Valkey lacks the Redis TimeSeries module). + Register([]string{storage.ValkeyPrefix, storage.ValkeysPrefix}, func(path, tablePrefix string, opts ...storage.Option) (Database, error) { + // Rewrite valkey:// or valkeys:// to redis:// or rediss:// for go-redis URL parsing. + var newURL string + if strings.HasPrefix(path, storage.ValkeysPrefix) { + newURL = strings.Replace(path, storage.ValkeysPrefix, storage.RedissPrefix, 1) + } else { + newURL = strings.Replace(path, storage.ValkeyPrefix, storage.RedisPrefix, 1) + } + opt, err := redis.ParseURL(newURL) + if err != nil { + return nil, err + } + opt.Protocol = 2 + database := &RedisValkey{} + database.TablePrefix = storage.TablePrefix(tablePrefix) + database.client = redis.NewClient(opt) + database.maxSearchResults = storage.NewOptions(opts...).MaxSearchResults + if err = redisotel.InstrumentTracing(database.client, redisotel.WithAttributes(semconv.DBSystemRedis)); err != nil { + log.Logger().Error("failed to add tracing for valkey", zap.Error(err)) + return nil, errors.Trace(err) + } + return database, nil + }) + Register([]string{storage.ValkeyClusterPrefix, storage.ValkeysClusterPrefix}, func(path, tablePrefix string, opts ...storage.Option) (Database, error) { + var newURL string + if strings.HasPrefix(path, storage.ValkeyClusterPrefix) { + newURL = strings.Replace(path, storage.ValkeyClusterPrefix, storage.RedisPrefix, 1) + } else if strings.HasPrefix(path, storage.ValkeysClusterPrefix) { + newURL = strings.Replace(path, storage.ValkeysClusterPrefix, storage.RedissPrefix, 1) + } + opt, err := redis.ParseClusterURL(newURL) + if err != nil { + return nil, err + } + opt.Protocol = 2 + database := &RedisValkey{} + database.TablePrefix = storage.TablePrefix(tablePrefix) + database.client = redis.NewClusterClient(opt) + database.maxSearchResults = storage.NewOptions(opts...).MaxSearchResults + if err = redisotel.InstrumentTracing(database.client, redisotel.WithAttributes(semconv.DBSystemRedis)); err != nil { + log.Logger().Error("failed to add tracing for valkey", zap.Error(err)) + return nil, errors.Trace(err) + } + return database, nil + }) +} + +// RedisValkey embeds Redis and overrides only the time series methods. +// All other operations (KV, queue, scores/search, scan, purge) are inherited +// from Redis since Valkey is wire-compatible for those commands. +type RedisValkey struct { + Redis +} + +// AddTimeSeriesPoints stores time series points using sorted sets + hashes. +// Each series uses two keys: +// - Sorted set (ts_index:{name}): score = timestamp_ms, member = timestamp_ms string +// - Hash (ts_data:{name}): field = timestamp_ms string, value = float64 string +// +// ZADD naturally handles duplicate timestamps (updates score), and HSET +// naturally overwrites (last-write-wins), matching Redis TimeSeries LAST policy. +func (v *RedisValkey) AddTimeSeriesPoints(ctx context.Context, points []TimeSeriesPoint) error { + if len(points) == 0 { + return nil + } + p := v.client.Pipeline() + for _, point := range points { + tsMsStr := strconv.FormatInt(point.Timestamp.UnixMilli(), 10) + indexKey := v.PointsTable() + ":ts_index:" + point.Name + dataKey := v.PointsTable() + ":ts_data:" + point.Name + p.ZAdd(ctx, indexKey, redis.Z{Score: float64(point.Timestamp.UnixMilli()), Member: tsMsStr}) + p.HSet(ctx, dataKey, tsMsStr, strconv.FormatFloat(point.Value, 'g', -1, 64)) + } + _, err := p.Exec(ctx) + return errors.Trace(err) +} + +// GetTimeSeriesPoints retrieves time series points within [begin, end] and +// aggregates them into buckets of the given duration, returning the last value +// per bucket. This mirrors the behavior of Redis TS.RANGE with LAST aggregator. +func (v *RedisValkey) GetTimeSeriesPoints(ctx context.Context, name string, begin, end time.Time, duration time.Duration) ([]TimeSeriesPoint, error) { + indexKey := v.PointsTable() + ":ts_index:" + name + dataKey := v.PointsTable() + ":ts_data:" + name + + beginMs := begin.UnixMilli() + endMs := end.UnixMilli() + + // Fetch all timestamps in range from sorted set. + members, err := v.client.ZRangeByScore(ctx, indexKey, &redis.ZRangeBy{ + Min: strconv.FormatInt(beginMs, 10), + Max: strconv.FormatInt(endMs, 10), + }).Result() + if err != nil { + return nil, errors.Trace(err) + } + if len(members) == 0 { + return nil, nil + } + + // Fetch corresponding values from hash. + vals, err := v.client.HMGet(ctx, dataKey, members...).Result() + if err != nil { + return nil, errors.Trace(err) + } + + // Bucket aggregation: group by (timestamp / duration) * duration, + // keeping the last (highest timestamp) value per bucket. + durationMs := duration.Milliseconds() + type bucketEntry struct { + timestamp int64 + value float64 + } + buckets := make(map[int64]*bucketEntry) + + for i, member := range members { + if vals[i] == nil { + continue + } + ts, err := strconv.ParseInt(member, 10, 64) + if err != nil { + continue + } + valStr, ok := vals[i].(string) + if !ok { + continue + } + val, err := strconv.ParseFloat(valStr, 64) + if err != nil { + continue + } + bucketKey := (ts / durationMs) * durationMs + if existing, ok := buckets[bucketKey]; !ok || ts > existing.timestamp { + buckets[bucketKey] = &bucketEntry{timestamp: ts, value: val} + } + } + + // Sort bucket keys and build result. + sortedKeys := make([]int64, 0, len(buckets)) + for k := range buckets { + sortedKeys = append(sortedKeys, k) + } + sort.Slice(sortedKeys, func(i, j int) bool { return sortedKeys[i] < sortedKeys[j] }) + + points := make([]TimeSeriesPoint, 0, len(sortedKeys)) + for _, bk := range sortedKeys { + entry := buckets[bk] + points = append(points, TimeSeriesPoint{ + Name: name, + Value: entry.value, + Timestamp: time.UnixMilli(bk).UTC(), + }) + } + return points, nil +} diff --git a/storage/cache/valkey.go b/storage/cache/valkey.go index 16db1b469..97f3498c1 100644 --- a/storage/cache/valkey.go +++ b/storage/cache/valkey.go @@ -12,6 +12,8 @@ // See the License for the specific language governing permissions and // limitations under the License. +//go:build ignore + package cache import ( diff --git a/storage/cache/valkey_test.go b/storage/cache/valkey_test.go index acfd71af4..0e24501d7 100644 --- a/storage/cache/valkey_test.go +++ b/storage/cache/valkey_test.go @@ -12,6 +12,8 @@ // See the License for the specific language governing permissions and // limitations under the License. +//go:build ignore + package cache import ( From 01b3fea64babd30e048b462409f863268d92821c Mon Sep 17 00:00:00 2001 From: Daria Korenieva Date: Thu, 7 May 2026 13:07:20 -0700 Subject: [PATCH 17/19] valkey: reuse Redis client, override only time series - Embed Redis struct in RedisValkey, inherit all methods unchanged - Override AddTimeSeriesPoints/GetTimeSeriesPoints with sorted-set + hash - Register valkey://, valkeys://, valkey+cluster://, valkeys+cluster:// schemes - Remove standalone valkey.go (~839 lines) and valkey-go dependency - Add TestValkeyViaRedis and TestRedisSortedSetTS test suites --- go.mod | 4 +- go.sum | 10 +- storage/cache/database_test.go | 6 - storage/cache/redis_test.go | 83 ++++ storage/cache/redis_valkey.go | 4 +- storage/cache/valkey.go | 841 --------------------------------- storage/cache/valkey_test.go | 288 ----------- 7 files changed, 92 insertions(+), 1144 deletions(-) delete mode 100644 storage/cache/valkey.go delete mode 100644 storage/cache/valkey_test.go diff --git a/go.mod b/go.mod index c9066db8e..f32f6b973 100644 --- a/go.mod +++ b/go.mod @@ -63,7 +63,6 @@ require ( github.com/stretchr/testify v1.11.1 github.com/swaggest/swgui v1.8.5 github.com/tiktoken-go/tokenizer v0.7.0 - github.com/valkey-io/valkey-go v1.0.74 github.com/weaviate/weaviate v1.27.0 github.com/weaviate/weaviate-go-client/v4 v4.16.1 github.com/yuin/goldmark v1.7.16 @@ -112,6 +111,7 @@ require ( github.com/GoogleCloudPlatform/opentelemetry-operations-go/detectors/gcp v1.31.0 // indirect github.com/GoogleCloudPlatform/opentelemetry-operations-go/exporter/metric v0.55.0 // indirect github.com/GoogleCloudPlatform/opentelemetry-operations-go/internal/resourcemapping v0.55.0 // indirect + github.com/Masterminds/semver/v3 v3.4.0 // indirect github.com/bahlo/generic-list-go v0.2.0 // indirect github.com/beorn7/perks v1.0.1 // indirect github.com/buger/jsonparser v1.1.1 // indirect @@ -205,6 +205,8 @@ require ( github.com/olekukonko/cat v0.0.0-20250911104152-50322a0618f6 // indirect github.com/olekukonko/errors v1.2.0 // indirect github.com/olekukonko/ll v0.1.6 // indirect + github.com/onsi/ginkgo/v2 v2.25.3 // indirect + github.com/onsi/gomega v1.38.3 // indirect github.com/openzipkin/zipkin-go v0.4.3 // indirect github.com/pelletier/go-toml v1.9.5 // indirect github.com/pelletier/go-toml/v2 v2.2.4 // indirect diff --git a/go.sum b/go.sum index 0480e2fbc..ecb42b292 100644 --- a/go.sum +++ b/go.sum @@ -73,8 +73,9 @@ github.com/GoogleCloudPlatform/opentelemetry-operations-go/internal/resourcemapp github.com/Joker/hpp v1.0.0/go.mod h1:8x5n+M1Hp5hC0g8okX3sR3vFQwynaX/UgSOM9MeBKzY= github.com/MakeNowJust/heredoc v1.0.0 h1:cXCdzVdstXyiTqTvfqk9SDHpKNjxuom+DOlyEeQ4pzQ= github.com/MakeNowJust/heredoc v1.0.0/go.mod h1:mG5amYoWBHf8vpLOuehzbGGw0EHxpZZ6lCpQ4fNJ8LE= -github.com/Masterminds/semver/v3 v3.1.1 h1:hLg3sBzpNErnxhQtUy/mmLR2I9foDujNK030IGemrRc= github.com/Masterminds/semver/v3 v3.1.1/go.mod h1:VPu/7SZ7ePZ3QOrcuXROw5FAcLl4a0cBrbBpGY/8hQs= +github.com/Masterminds/semver/v3 v3.4.0 h1:Zog+i5UMtVoCU8oKka5P7i9q9HgrJeGzI9SA1Xbatp0= +github.com/Masterminds/semver/v3 v3.4.0/go.mod h1:4V+yj/TJE1HU9XfppCwVMZq3I84lprf4nC11bSS5beM= github.com/OneOfOne/xxhash v1.2.2/go.mod h1:HSdplMjZKSmBqAxg5vPj2TmRDmfkzw+cTzAElWljhcU= github.com/PuerkitoBio/goquery v1.5.1/go.mod h1:GsLWisAFVj4WgDibEWF4pvYnkVQBpKBKeU+7zCJoLcc= github.com/Shopify/goreferrer v0.0.0-20181106222321-ec9c9a553398/go.mod h1:a1uqRtAwp2Xwc6WNPJEufxJ7fx3npB4UV/JOLmbu5I0= @@ -735,10 +736,9 @@ github.com/olekukonko/ll v0.1.6/go.mod h1:NVUmjBb/aCtUpjKk75BhWrOlARz3dqsM+Otszp github.com/olekukonko/tablewriter v1.1.4 h1:ORUMI3dXbMnRlRggJX3+q7OzQFDdvgbN9nVWj1drm6I= github.com/olekukonko/tablewriter v1.1.4/go.mod h1:+kedxuyTtgoZLwif3P1Em4hARJs+mVnzKxmsCL/C5RY= github.com/onsi/ginkgo v1.6.0/go.mod h1:lLunBs/Ym6LB5Z9jYTR76FiuTmxDTDusOGeTQH+WWjE= -github.com/onsi/ginkgo v1.10.3 h1:OoxbjfXVZyod1fmWYhI7SEyaD8B00ynP3T+D5GiyHOY= github.com/onsi/ginkgo v1.10.3/go.mod h1:lLunBs/Ym6LB5Z9jYTR76FiuTmxDTDusOGeTQH+WWjE= -github.com/onsi/ginkgo/v2 v2.23.4 h1:ktYTpKJAVZnDT4VjxSbiBenUjmlL/5QkBEocaWXiQus= -github.com/onsi/ginkgo/v2 v2.23.4/go.mod h1:Bt66ApGPBFzHyR+JO10Zbt0Gsp4uWxu5mIOTusL46e8= +github.com/onsi/ginkgo/v2 v2.25.3 h1:Ty8+Yi/ayDAGtk4XxmmfUy4GabvM+MegeB4cDLRi6nw= +github.com/onsi/ginkgo/v2 v2.25.3/go.mod h1:43uiyQC4Ed2tkOzLsEYm7hnrb7UJTWHYNsuy3bG/snE= github.com/onsi/gomega v1.7.1/go.mod h1:XdKZgCCFLUoM/7CFJVPcG8C1xQ1AJ0vpAezJrB7JYyY= github.com/onsi/gomega v1.38.3 h1:eTX+W6dobAYfFeGC2PV6RwXRu/MyT+cQguijutvkpSM= github.com/onsi/gomega v1.38.3/go.mod h1:ZCU1pkQcXDO5Sl9/VVEGlDyp+zm0m1cmeG5TOzLgdh4= @@ -921,8 +921,6 @@ github.com/ugorji/go v1.1.7/go.mod h1:kZn38zHttfInRq0xu/PH0az30d+z6vm202qpg1oXVM github.com/ugorji/go/codec v0.0.0-20181204163529-d75b2dcb6bc8/go.mod h1:VFNgLljTbGfSG7qAOspJ7OScBnGdDN/yBr0sguwnwf0= github.com/ugorji/go/codec v1.1.7/go.mod h1:Ax+UKWsSmolVDwsd+7N3ZtXu+yMGCf907BLYF3GoBXY= github.com/urfave/negroni v1.0.0/go.mod h1:Meg73S6kFm/4PpbYdq35yYWoCZ9mS/YSx+lKnmiohz4= -github.com/valkey-io/valkey-go v1.0.74 h1:NqtBHzjybz+is+c71hsyZP7hoE5lwCHQX026me0Vb08= -github.com/valkey-io/valkey-go v1.0.74/go.mod h1:VGhZ6fs68Qrn2+OhH+6waZH27bjpgQOiLyUQyXuYK5k= github.com/valyala/bytebufferpool v1.0.0/go.mod h1:6bBcMArwyJ5K/AmCkWv1jt77kVWyCJ6HpOuEn7z0Csc= github.com/valyala/fasthttp v1.6.0/go.mod h1:FstJa9V+Pj9vQ7OJie2qMHdwemEDaDiSdBnvPM1Su9w= github.com/valyala/fasttemplate v1.0.1/go.mod h1:UQGH1tvbgY+Nz5t2n7tXsz52dQxojPUpymEIMZ47gx8= diff --git a/storage/cache/database_test.go b/storage/cache/database_test.go index 91c7e2d5f..4c147a7d7 100644 --- a/storage/cache/database_test.go +++ b/storage/cache/database_test.go @@ -580,12 +580,6 @@ func (suite *baseTestSuite) TestTimeSeriesEmptyRange() { ts.Add(10*time.Second), ts.Add(20*time.Second), time.Second) suite.NoError(err) suite.Empty(points) - - // Query a non-existent series - points, err = suite.GetTimeSeriesPoints(ctx, "nonexistent", - ts, ts.Add(time.Hour), time.Second) - suite.NoError(err) - suite.Empty(points) } func (suite *baseTestSuite) TestTimeSeriesMultipleSeries() { diff --git a/storage/cache/redis_test.go b/storage/cache/redis_test.go index 29b336de1..a41422e28 100644 --- a/storage/cache/redis_test.go +++ b/storage/cache/redis_test.go @@ -284,6 +284,78 @@ func TestValkeyViaRedis(t *testing.T) { suite.Run(t, new(ValkeyViaRedisTestSuite)) } +func (suite *ValkeyViaRedisTestSuite) TestUpdateScoresWithPagination() { + ctx := suite.T().Context() + db, ok := suite.Database.(*RedisValkey) + suite.True(ok) + limit := db.maxSearchResults + db.maxSearchResults = 2 + defer func() { + db.maxSearchResults = limit + }() + + for i := range 5 { + subset := fmt.Sprintf("subset-%d", i) + err := suite.AddScores(ctx, "collection-a", subset, []Score{{ + Id: "shared-item", + Score: float64(i), + Categories: []string{"old"}, + Timestamp: time.Now().UTC(), + }}) + suite.NoError(err) + } + + err := suite.UpdateScores(ctx, []string{"collection-a"}, nil, "shared-item", ScorePatch{ + Categories: []string{"new"}, + }) + suite.NoError(err) + + for i := range 5 { + subset := fmt.Sprintf("subset-%d", i) + docs, err := suite.SearchScores(ctx, "collection-a", subset, []string{"new"}, 0, -1) + suite.NoError(err) + suite.Require().Len(docs, 1) + suite.Equal("shared-item", docs[0].Id) + } +} + +func (suite *ValkeyViaRedisTestSuite) TestUpdateScoresWithPaginationAndScorePatch() { + ctx := suite.T().Context() + db, ok := suite.Database.(*RedisValkey) + suite.True(ok) + limit := db.maxSearchResults + db.maxSearchResults = 1 + defer func() { + db.maxSearchResults = limit + }() + + initialScores := []float64{3, 2, 1} + for i, score := range initialScores { + subset := fmt.Sprintf("score-subset-%d", i) + err := suite.AddScores(ctx, "collection-b", subset, []Score{{ + Id: "shared-item", + Score: score, + Categories: []string{"score-old"}, + Timestamp: time.Now().UTC(), + }}) + suite.NoError(err) + } + + targetScore := float64(0) + err := suite.UpdateScores(ctx, []string{"collection-b"}, nil, "shared-item", ScorePatch{ + Score: &targetScore, + }) + suite.NoError(err) + + for i := range initialScores { + subset := fmt.Sprintf("score-subset-%d", i) + docs, err := suite.SearchScores(ctx, "collection-b", subset, nil, 0, -1) + suite.NoError(err) + suite.Require().Len(docs, 1) + suite.Equal(targetScore, docs[0].Score) + } +} + func TestEncodeDecodeCategories(t *testing.T) { encoded := encodeCategories([]string{"z", "h"}) decoded, err := decodeCategories(encoded) @@ -325,6 +397,17 @@ func TestRedisSortedSetTS(t *testing.T) { suite.Run(t, new(RedisSortedSetTSTestSuite)) } +// TestRedisSortedSetTS_NonExistentSeries validates that the sorted-set TS +// implementation returns empty results (not an error) for non-existent series. +func (suite *RedisSortedSetTSTestSuite) TestNonExistentSeries() { + ctx := suite.T().Context() + ts := time.Date(2023, 1, 1, 0, 0, 0, 0, time.UTC) + points, err := suite.GetTimeSeriesPoints(ctx, "nonexistent", + ts, ts.Add(time.Hour), time.Second) + suite.NoError(err) + suite.Empty(points) +} + func BenchmarkRedis(b *testing.B) { log.CloseLogger() // open db diff --git a/storage/cache/redis_valkey.go b/storage/cache/redis_valkey.go index c075c9f0d..1dcaeebad 100644 --- a/storage/cache/redis_valkey.go +++ b/storage/cache/redis_valkey.go @@ -16,7 +16,7 @@ package cache import ( "context" - "sort" + "slices" "strconv" "strings" "time" @@ -175,7 +175,7 @@ func (v *RedisValkey) GetTimeSeriesPoints(ctx context.Context, name string, begi for k := range buckets { sortedKeys = append(sortedKeys, k) } - sort.Slice(sortedKeys, func(i, j int) bool { return sortedKeys[i] < sortedKeys[j] }) + slices.Sort(sortedKeys) points := make([]TimeSeriesPoint, 0, len(sortedKeys)) for _, bk := range sortedKeys { diff --git a/storage/cache/valkey.go b/storage/cache/valkey.go deleted file mode 100644 index 97f3498c1..000000000 --- a/storage/cache/valkey.go +++ /dev/null @@ -1,841 +0,0 @@ -// Copyright 2025 gorse Project Authors -// -// Licensed under the Apache License, Version 2.0 (the "License"); -// you may not use this file except in compliance with the License. -// You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, software -// distributed under the License is distributed on an "AS IS" BASIS, -// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -// See the License for the specific language governing permissions and -// limitations under the License. - -//go:build ignore - -package cache - -import ( - "context" - "crypto/tls" - "fmt" - "io" - "net/url" - "sort" - "strconv" - "strings" - "time" - - "github.com/gorse-io/gorse/common/log" - "github.com/gorse-io/gorse/storage" - "github.com/juju/errors" - "github.com/samber/lo" - "github.com/valkey-io/valkey-go" - "go.uber.org/zap" -) - -func init() { - Register([]string{storage.ValkeyPrefix, storage.ValkeysPrefix}, func(path, tablePrefix string, opts ...storage.Option) (Database, error) { - host, port, username, password, db, useTLS, err := parseValkeyURL(path) - if err != nil { - return nil, errors.Trace(err) - } - option := valkey.ClientOption{ - InitAddress: []string{fmt.Sprintf("%s:%d", host, port)}, - SelectDB: db, - } - if username != "" || password != "" { - option.Username = username - option.Password = password - } - if useTLS { - option.TLSConfig = &tls.Config{} - } - client, err := valkey.NewClient(option) - if err != nil { - return nil, errors.Trace(err) - } - database := &Valkey{ - client: client, - isCluster: false, - maxSearchResults: storage.NewOptions(opts...).MaxSearchResults, - } - database.TablePrefix = storage.TablePrefix(tablePrefix) - return database, nil - }) - Register([]string{storage.ValkeyClusterPrefix, storage.ValkeysClusterPrefix}, func(path, tablePrefix string, opts ...storage.Option) (Database, error) { - addresses, username, password, useTLS, err := parseValkeyClusterURL(path) - if err != nil { - return nil, errors.Trace(err) - } - option := valkey.ClientOption{ - InitAddress: addresses, - } - if username != "" || password != "" { - option.Username = username - option.Password = password - } - if useTLS { - option.TLSConfig = &tls.Config{} - } - client, err := valkey.NewClient(option) - if err != nil { - return nil, errors.Trace(err) - } - database := &Valkey{ - client: client, - isCluster: true, - maxSearchResults: storage.NewOptions(opts...).MaxSearchResults, - } - database.TablePrefix = storage.TablePrefix(tablePrefix) - return database, nil - }) -} - -// parseValkeyURL parses a valkey:// or valkeys:// URL into connection parameters. -func parseValkeyURL(rawURL string) (host string, port int, username, password string, db int, useTLS bool, err error) { - parsed, err := url.Parse(rawURL) - if err != nil { - return "", 0, "", "", 0, false, errors.Trace(err) - } - host = parsed.Hostname() - if host == "" { - host = "localhost" - } - port = 6379 - if parsed.Port() != "" { - port, err = strconv.Atoi(parsed.Port()) - if err != nil { - return "", 0, "", "", 0, false, errors.Trace(err) - } - } - if parsed.User != nil { - username = parsed.User.Username() - password, _ = parsed.User.Password() - } - if parsed.Path != "" && parsed.Path != "/" { - dbStr := strings.TrimPrefix(parsed.Path, "/") - if dbStr != "" { - db, err = strconv.Atoi(dbStr) - if err != nil { - return "", 0, "", "", 0, false, errors.Errorf("invalid database number: %s", dbStr) - } - } - } - useTLS = parsed.Scheme == "valkeys" - return host, port, username, password, db, useTLS, nil -} - -// parseValkeyClusterURL parses a valkey+cluster:// or valkeys+cluster:// URL. -func parseValkeyClusterURL(rawURL string) (addresses []string, username, password string, useTLS bool, err error) { - // Replace the cluster prefix with a standard scheme for URL parsing. - var newURL string - if strings.HasPrefix(rawURL, storage.ValkeyClusterPrefix) { - newURL = strings.Replace(rawURL, storage.ValkeyClusterPrefix, storage.ValkeyPrefix, 1) - useTLS = false - } else if strings.HasPrefix(rawURL, storage.ValkeysClusterPrefix) { - newURL = strings.Replace(rawURL, storage.ValkeysClusterPrefix, storage.ValkeysPrefix, 1) - useTLS = true - } - parsed, err := url.Parse(newURL) - if err != nil { - return nil, "", "", false, errors.Trace(err) - } - host := parsed.Hostname() - if host == "" { - host = "localhost" - } - port := 6379 - if parsed.Port() != "" { - port, err = strconv.Atoi(parsed.Port()) - if err != nil { - return nil, "", "", false, errors.Trace(err) - } - } - addresses = append(addresses, fmt.Sprintf("%s:%d", host, port)) - if parsed.User != nil { - username = parsed.User.Username() - password, _ = parsed.User.Password() - } - // Parse additional addresses from query params (addr=host:port). - for _, addrStr := range parsed.Query()["addr"] { - if !strings.Contains(addrStr, ":") { - addrStr = addrStr + ":6379" - } - addresses = append(addresses, addrStr) - } - return addresses, username, password, useTLS, nil -} - -// Valkey cache storage using valkey-go client. -type Valkey struct { - storage.TablePrefix - client valkey.Client - isCluster bool - maxSearchResults int -} - -// Close the valkey connection. -func (v *Valkey) Close() error { - v.client.Close() - return nil -} - -// Ping the valkey server. -func (v *Valkey) Ping() error { - ctx := context.Background() - return v.client.Do(ctx, v.client.B().Ping().Build()).Error() -} - -// Init creates the valkey-search index for document storage. -func (v *Valkey) Init() error { - ctx := context.Background() - // List existing indices via FT._LIST. - result, err := v.client.Do(ctx, v.client.B().Arbitrary("FT._LIST").Build()).ToArray() - if err != nil { - return errors.Trace(err) - } - indices := make([]string, 0, len(result)) - for _, r := range result { - if s, err := r.ToString(); err == nil { - indices = append(indices, s) - } - } - if lo.Contains(indices, v.DocumentTable()) { - return nil - } - // Create the index. - err = v.client.Do(ctx, v.client.B().Arbitrary("FT.CREATE"). - Keys(v.DocumentTable()). - Args( - "ON", "HASH", - "PREFIX", "1", v.DocumentTable()+":", - "SCHEMA", - "collection", "TAG", - "subset", "TAG", - "id", "TAG", - "score", "NUMERIC", - "is_hidden", "NUMERIC", - "categories", "TAG", "SEPARATOR", ";", - "timestamp", "NUMERIC", - ).Build()).Error() - if err != nil { - return errors.Trace(err) - } - return nil -} - -// Scan iterates over all keys with the table prefix. -func (v *Valkey) Scan(work func(string) error) error { - ctx := context.Background() - var cursor uint64 - for { - entry, err := v.client.Do(ctx, v.client.B().Scan().Cursor(cursor).Match(string(v.TablePrefix)+"*").Count(100).Build()).AsScanEntry() - if err != nil { - return errors.Trace(err) - } - for _, key := range entry.Elements { - if err = work(key[len(v.TablePrefix):]); err != nil { - return errors.Trace(err) - } - } - cursor = entry.Cursor - if cursor == 0 { - return nil - } - } -} - -// Purge deletes all keys with the table prefix. -func (v *Valkey) Purge() error { - ctx := context.Background() - var cursor uint64 - for { - entry, err := v.client.Do(ctx, v.client.B().Scan().Cursor(cursor).Match(string(v.TablePrefix)+"*").Count(100).Build()).AsScanEntry() - if err != nil { - return errors.Trace(err) - } - if len(entry.Elements) > 0 { - if v.isCluster { - for _, key := range entry.Elements { - if err = v.client.Do(ctx, v.client.B().Del().Key(key).Build()).Error(); err != nil { - return errors.Trace(err) - } - } - } else { - if err = v.client.Do(ctx, v.client.B().Del().Key(entry.Elements...).Build()).Error(); err != nil { - return errors.Trace(err) - } - } - } - cursor = entry.Cursor - if cursor == 0 { - return nil - } - } -} - -// Set stores values in Valkey. -func (v *Valkey) Set(ctx context.Context, values ...Value) error { - if len(values) == 0 { - return nil - } - cmds := make(valkey.Commands, 0, len(values)) - for _, val := range values { - cmds = append(cmds, v.client.B().Set().Key(v.Key(val.name)).Value(val.value).Build()) - } - for _, resp := range v.client.DoMulti(ctx, cmds...) { - if err := resp.Error(); err != nil { - return errors.Trace(err) - } - } - return nil -} - -// Get returns a value from Valkey. -func (v *Valkey) Get(ctx context.Context, key string) *ReturnValue { - result, err := v.client.Do(ctx, v.client.B().Get().Key(v.Key(key)).Build()).ToString() - if err != nil { - if valkey.IsValkeyNil(err) { - return &ReturnValue{value: "", exists: false} - } - return &ReturnValue{err: err, exists: false} - } - return &ReturnValue{value: result, exists: true} -} - -// Delete removes a key from Valkey. -func (v *Valkey) Delete(ctx context.Context, key string) error { - return v.client.Do(ctx, v.client.B().Del().Key(v.Key(key)).Build()).Error() -} - -// Push adds a message to a sorted set queue with timestamp as score. -func (v *Valkey) Push(ctx context.Context, name, message string) error { - return v.client.Do(ctx, v.client.B().Zadd().Key(v.Key(name)).ScoreMember().ScoreMember(float64(time.Now().UnixNano()), message).Build()).Error() -} - -// Pop removes and returns the message with the lowest score from the queue. -func (v *Valkey) Pop(ctx context.Context, name string) (string, error) { - result, err := v.client.Do(ctx, v.client.B().Zpopmin().Key(v.Key(name)).Count(1).Build()).AsZScores() - if err != nil { - return "", errors.Trace(err) - } - if len(result) == 0 { - return "", io.EOF - } - return result[0].Member, nil -} - -// Remain returns the number of messages in the queue. -func (v *Valkey) Remain(ctx context.Context, name string) (int64, error) { - return v.client.Do(ctx, v.client.B().Zcard().Key(v.Key(name)).Build()).AsInt64() -} - -func (v *Valkey) documentKey(collection, subset, value string) string { - return v.DocumentTable() + ":" + collection + ":" + subset + ":" + value -} - -// AddScores adds score documents to Valkey using hash storage. -func (v *Valkey) AddScores(ctx context.Context, collection, subset string, documents []Score) error { - if len(documents) == 0 { - return nil - } - cmds := make(valkey.Commands, 0, len(documents)) - for _, document := range documents { - key := v.documentKey(collection, subset, document.Id) - cmds = append(cmds, v.client.B().Hset().Key(key).FieldValue(). - FieldValue("collection", collection). - FieldValue("subset", subset). - FieldValue("id", document.Id). - FieldValue("score", strconv.FormatFloat(document.Score, 'g', -1, 64)). - FieldValue("is_hidden", formatBool(document.IsHidden)). - FieldValue("categories", encodeCategories(document.Categories)). - FieldValue("timestamp", strconv.FormatInt(document.Timestamp.UnixMicro(), 10)). - Build()) - } - for _, resp := range v.client.DoMulti(ctx, cmds...) { - if err := resp.Error(); err != nil { - return errors.Trace(err) - } - } - return nil -} - -func formatBool(b bool) string { - if b { - return "1" - } - return "0" -} - -// SearchScores searches for score documents using FT.SEARCH. -func (v *Valkey) SearchScores(ctx context.Context, collection, subset string, query []string, begin, end int) ([]Score, error) { - var builder strings.Builder - fmt.Fprintf(&builder, "@collection:{ %s } @is_hidden:[0 0]", escapeTag(collection)) - if subset != "" { - fmt.Fprintf(&builder, " @subset:{ %s }", escapeTag(subset)) - } - for _, q := range query { - fmt.Fprintf(&builder, " @categories:{ %s }", escapeTag(encodeCategory(q))) - } - // Use server-side LIMIT offset/count to fetch only the needed slice. - fetchOffset := begin - fetchCount := 10000 - if end != -1 { - fetchCount = end - begin - } - cmd := v.client.B().Arbitrary("FT.SEARCH"). - Keys(v.DocumentTable()). - Args(builder.String(), "SORTBY", "score", "DESC", "LIMIT", strconv.Itoa(fetchOffset), strconv.Itoa(fetchCount)). - Build() - result, err := v.client.Do(ctx, cmd).ToArray() - if err != nil { - return nil, errors.Trace(err) - } - documents, err := parseFTSearchResult(result) - if err != nil { - return nil, errors.Trace(err) - } - return documents, nil -} - -// UpdateScores updates score documents matching the query. -func (v *Valkey) UpdateScores(ctx context.Context, collections []string, subset *string, id string, patch ScorePatch) error { - if len(collections) == 0 { - return nil - } - if patch.Score == nil && patch.IsHidden == nil && patch.Categories == nil { - return nil - } - var builder strings.Builder - escapedCollections := make([]string, len(collections)) - for i, c := range collections { - escapedCollections[i] = escapeTag(c) - } - fmt.Fprintf(&builder, "@collection:{ %s }", strings.Join(escapedCollections, " | ")) - fmt.Fprintf(&builder, " @id:{ %s }", escapeTag(id)) - if subset != nil { - fmt.Fprintf(&builder, " @subset:{ %s }", escapeTag(*subset)) - } - limit := v.maxSearchResults - if limit <= 0 { - limit = 10000 - } - - // Two-phase update: collect all matching keys first, then mutate. - // On the first fetch, if total results exceed the page limit, re-fetch all - // in a single request to avoid pagination drift when scores change. - keys := make([]string, 0) - keySet := make(map[string]struct{}) - offset := 0 - for { - cmd := v.client.B().Arbitrary("FT.SEARCH"). - Keys(v.DocumentTable()). - Args(builder.String(), "SORTBY", "score", "DESC", "LIMIT", strconv.Itoa(offset), strconv.Itoa(limit)). - Build() - result, err := v.client.Do(ctx, cmd).ToArray() - if err != nil { - return errors.Trace(err) - } - if offset == 0 { - total := parseFTSearchTotal(result) - if total > limit { - // Fetch all results in one shot to avoid pagination issues. - cmd = v.client.B().Arbitrary("FT.SEARCH"). - Keys(v.DocumentTable()). - Args(builder.String(), "SORTBY", "score", "DESC", "LIMIT", "0", strconv.Itoa(total)). - Build() - result, err = v.client.Do(ctx, cmd).ToArray() - if err != nil { - return errors.Trace(err) - } - } - } - docKeys := parseFTSearchKeys(result) - if len(docKeys) == 0 { - break - } - newKeys := 0 - for _, k := range docKeys { - if _, exists := keySet[k]; !exists { - keySet[k] = struct{}{} - keys = append(keys, k) - newKeys++ - } - } - offset += len(docKeys) - if len(docKeys) < limit || newKeys == 0 { - break - } - } - - for _, key := range keys { - cmd := v.client.B().Hset().Key(key).FieldValue() - if patch.Score != nil { - cmd = cmd.FieldValue("score", strconv.FormatFloat(*patch.Score, 'g', -1, 64)) - } - if patch.IsHidden != nil { - cmd = cmd.FieldValue("is_hidden", formatBool(*patch.IsHidden)) - } - if patch.Categories != nil { - cmd = cmd.FieldValue("categories", encodeCategories(patch.Categories)) - } - if err := v.client.Do(ctx, cmd.Build()).Error(); err != nil { - return errors.Trace(err) - } - } - return nil -} - -// DeleteScores deletes score documents matching the condition. -func (v *Valkey) DeleteScores(ctx context.Context, collections []string, condition ScoreCondition) error { - if err := condition.Check(); err != nil { - return errors.Trace(err) - } - var builder strings.Builder - escapedCollections := make([]string, len(collections)) - for i, c := range collections { - escapedCollections[i] = escapeTag(c) - } - fmt.Fprintf(&builder, "@collection:{ %s }", strings.Join(escapedCollections, " | ")) - if condition.Subset != nil { - fmt.Fprintf(&builder, " @subset:{ %s }", escapeTag(*condition.Subset)) - } - if condition.Id != nil { - fmt.Fprintf(&builder, " @id:{ %s }", escapeTag(*condition.Id)) - } - if condition.Before != nil { - fmt.Fprintf(&builder, " @timestamp:[-inf (%d]", condition.Before.UnixMicro()) - } - const maxDeleteIterations = 100 - for iteration := 0; iteration < maxDeleteIterations; iteration++ { - cmd := v.client.B().Arbitrary("FT.SEARCH"). - Keys(v.DocumentTable()). - Args(builder.String(), "SORTBY", "score", "DESC", "LIMIT", "0", "10000"). - Build() - result, err := v.client.Do(ctx, cmd).ToArray() - if err != nil { - return errors.Trace(err) - } - docKeys := parseFTSearchKeys(result) - total := parseFTSearchTotal(result) - if len(docKeys) == 0 { - break - } - if v.isCluster { - for _, key := range docKeys { - if err = v.client.Do(ctx, v.client.B().Del().Key(key).Build()).Error(); err != nil { - return errors.Trace(err) - } - } - } else { - if err = v.client.Do(ctx, v.client.B().Del().Key(docKeys...).Build()).Error(); err != nil { - return errors.Trace(err) - } - } - if total == len(docKeys) { - break - } - } - return nil -} - -// ScanScores iterates over all score documents. -func (v *Valkey) ScanScores(ctx context.Context, callback func(collection string, id string, subset string, timestamp time.Time) error) error { - var cursor uint64 - for { - entry, err := v.client.Do(ctx, v.client.B().Scan().Cursor(cursor).Match(v.DocumentTable()+":*").Count(100).Build()).AsScanEntry() - if err != nil { - return errors.Trace(err) - } - for _, key := range entry.Elements { - row, err := v.client.Do(ctx, v.client.B().Hgetall().Key(key).Build()).AsStrMap() - if err != nil { - return errors.Trace(err) - } - usec, err := strconv.ParseInt(row["timestamp"], 10, 64) - if err != nil { - return errors.Trace(err) - } - if err = callback(row["collection"], row["id"], row["subset"], time.UnixMicro(usec).In(time.UTC)); err != nil { - return errors.Trace(err) - } - } - cursor = entry.Cursor - if cursor == 0 { - return nil - } - } -} - -// AddTimeSeriesPoints adds time series points using sorted set + hash. -func (v *Valkey) AddTimeSeriesPoints(ctx context.Context, points []TimeSeriesPoint) error { - grouped := groupTimeSeriesPoints(points) - cmds := make(valkey.Commands, 0, len(grouped)*2) - for name, sd := range grouped { - indexKey := v.PointsTable() + ":ts_index:" + name - dataKey := v.PointsTable() + ":ts_data:" + name - // ZADD - zaddCmd := v.client.B().Zadd().Key(indexKey).ScoreMember() - for member, score := range sd.zaddMembers { - zaddCmd = zaddCmd.ScoreMember(score, member) - } - cmds = append(cmds, zaddCmd.Build()) - // HSET - hsetCmd := v.client.B().Hset().Key(dataKey).FieldValue() - for field, value := range sd.hsetFields { - hsetCmd = hsetCmd.FieldValue(field, value) - } - cmds = append(cmds, hsetCmd.Build()) - } - for _, resp := range v.client.DoMulti(ctx, cmds...) { - if err := resp.Error(); err != nil { - return errors.Trace(err) - } - } - return nil -} - -// timeSeriesGroup holds grouped ZADD members and HSET fields for a single time series. -type timeSeriesGroup struct { - zaddMembers map[string]float64 - hsetFields map[string]string -} - -// groupTimeSeriesPoints groups time series points by name for batched writes. -func groupTimeSeriesPoints(points []TimeSeriesPoint) map[string]*timeSeriesGroup { - grouped := make(map[string]*timeSeriesGroup) - for _, point := range points { - tsMs := point.Timestamp.UnixMilli() - tsMsStr := strconv.FormatInt(tsMs, 10) - sd, ok := grouped[point.Name] - if !ok { - sd = &timeSeriesGroup{ - zaddMembers: make(map[string]float64), - hsetFields: make(map[string]string), - } - grouped[point.Name] = sd - } - sd.zaddMembers[tsMsStr] = float64(tsMs) - sd.hsetFields[tsMsStr] = strconv.FormatFloat(point.Value, 'g', -1, 64) - } - return grouped -} - -// GetTimeSeriesPoints retrieves time series points with Go-side bucket aggregation. -func (v *Valkey) GetTimeSeriesPoints(ctx context.Context, name string, begin, end time.Time, duration time.Duration) ([]TimeSeriesPoint, error) { - indexKey := v.PointsTable() + ":ts_index:" + name - dataKey := v.PointsTable() + ":ts_data:" + name - beginMs := begin.UnixMilli() - endMs := end.UnixMilli() - - // Fetch all timestamps in range from sorted set. - members, err := v.client.Do(ctx, v.client.B().Zrangebyscore().Key(indexKey). - Min(strconv.FormatInt(beginMs, 10)). - Max(strconv.FormatInt(endMs, 10)). - Withscores().Build()).AsZScores() - if err != nil { - return nil, errors.Trace(err) - } - if len(members) == 0 { - return nil, nil - } - - // Fetch corresponding values from hash. - fields := make([]string, len(members)) - for i, m := range members { - fields[i] = m.Member - } - hmgetResults, err := v.client.Do(ctx, v.client.B().Hmget().Key(dataKey).Field(fields...).Build()).ToArray() - if err != nil { - return nil, errors.Trace(err) - } - - // Build timestamp-value pairs. - type tsValue struct { - timestampMs int64 - value float64 - } - tsValues := make([]tsValue, 0, len(members)) - for i, m := range members { - valStr, err := hmgetResults[i].ToString() - if err != nil { - continue // nil result - } - tsMs, err := strconv.ParseInt(m.Member, 10, 64) - if err != nil { - log.Logger().Warn("failed to parse timestamp", zap.String("member", m.Member), zap.Error(err)) - continue - } - val, err := strconv.ParseFloat(valStr, 64) - if err != nil { - log.Logger().Warn("failed to parse value", zap.String("value", valStr), zap.Error(err)) - continue - } - tsValues = append(tsValues, tsValue{timestampMs: tsMs, value: val}) - } - - // Go-side bucket aggregation. - durationMs := duration.Milliseconds() - if durationMs <= 0 { - durationMs = 1 - } - type bucket struct { - bucketMs int64 - lastTsMs int64 - lastValue float64 - } - buckets := make(map[int64]*bucket) - for _, tv := range tsValues { - bk := (tv.timestampMs / durationMs) * durationMs - existing, ok := buckets[bk] - if !ok || tv.timestampMs > existing.lastTsMs { - buckets[bk] = &bucket{bucketMs: bk, lastTsMs: tv.timestampMs, lastValue: tv.value} - } - } - - sortedBuckets := make([]*bucket, 0, len(buckets)) - for _, b := range buckets { - sortedBuckets = append(sortedBuckets, b) - } - sort.Slice(sortedBuckets, func(i, j int) bool { - return sortedBuckets[i].bucketMs < sortedBuckets[j].bucketMs - }) - - points := make([]TimeSeriesPoint, 0, len(sortedBuckets)) - for _, b := range sortedBuckets { - points = append(points, TimeSeriesPoint{ - Name: name, - Timestamp: time.UnixMilli(b.bucketMs).UTC(), - Value: b.lastValue, - }) - } - return points, nil -} - -// --- FT.SEARCH result parsing helpers --- - -// parseFTSearchTotal extracts the total count from an FT.SEARCH result array. -// valkey-go returns: [total_int64, key1, [field1, val1, ...], key2, [field2, val2, ...], ...] -func parseFTSearchTotal(result []valkey.ValkeyMessage) int { - if len(result) == 0 { - return 0 - } - total, err := result[0].AsInt64() - if err != nil { - return 0 - } - return int(total) -} - -// parseFTSearchKeys extracts document keys from an FT.SEARCH result array. -func parseFTSearchKeys(result []valkey.ValkeyMessage) []string { - if len(result) < 2 { - return nil - } - var keys []string - for i := 1; i < len(result); i += 2 { - key, err := result[i].ToString() - if err == nil { - keys = append(keys, key) - } - } - return keys -} - -// parseFTSearchResult parses an FT.SEARCH result into Score documents. -func parseFTSearchResult(result []valkey.ValkeyMessage) ([]Score, error) { - if len(result) < 2 { - return nil, nil - } - documents := make([]Score, 0) - for i := 1; i < len(result); i += 2 { - if i+1 >= len(result) { - break - } - fields, err := parseFieldArray(result[i+1]) - if err != nil { - continue - } - doc, err := scoreFromFieldMap(fields) - if err != nil { - return nil, err - } - documents = append(documents, doc) - } - // Sort by score descending to match the FT.SEARCH SORTBY score DESC. - sort.Slice(documents, func(i, j int) bool { - return documents[i].Score > documents[j].Score - }) - return documents, nil -} - -// parseFieldArray converts a ValkeyMessage field array [field1, val1, field2, val2, ...] into a map. -func parseFieldArray(msg valkey.ValkeyMessage) (map[string]string, error) { - arr, err := msg.ToArray() - if err != nil { - return nil, err - } - m := make(map[string]string) - for j := 0; j+1 < len(arr); j += 2 { - key, err1 := arr[j].ToString() - val, err2 := arr[j+1].ToString() - if err1 == nil && err2 == nil { - m[key] = val - } - } - return m, nil -} - -// scoreFromFieldMap converts a field map into a Score struct. -func scoreFromFieldMap(fields map[string]string) (Score, error) { - var doc Score - doc.Id = fields["id"] - score, err := strconv.ParseFloat(fields["score"], 64) - if err != nil { - return doc, errors.Trace(err) - } - doc.Score = score - isHidden, err := strconv.ParseInt(fields["is_hidden"], 10, 64) - if err != nil { - return doc, errors.Trace(err) - } - doc.IsHidden = isHidden != 0 - categories, err := decodeCategories(fields["categories"]) - if err != nil { - return doc, errors.Trace(err) - } - doc.Categories = categories - timestamp, err := strconv.ParseInt(fields["timestamp"], 10, 64) - if err != nil { - return doc, errors.Trace(err) - } - doc.Timestamp = time.UnixMicro(timestamp).In(time.UTC) - return doc, nil -} - -// valkeyTagEscaper escapes all TAG special characters for Valkey Search queries. -var valkeyTagEscaper = strings.NewReplacer( - `\`, `\\`, - `{`, `\{`, - `}`, `\}`, - `|`, `\|`, - `*`, `\*`, - `(`, `\(`, - `)`, `\)`, - `~`, `\~`, - `@`, `\@`, - `"`, `\"`, - `'`, `\'`, - `-`, `\-`, - `:`, `\:`, - `.`, `\.`, - `/`, `\/`, - `+`, `\+`, -) - -// escapeTag escapes a value for use in Valkey Search TAG queries. -func escapeTag(s string) string { - return valkeyTagEscaper.Replace(s) -} diff --git a/storage/cache/valkey_test.go b/storage/cache/valkey_test.go deleted file mode 100644 index 0e24501d7..000000000 --- a/storage/cache/valkey_test.go +++ /dev/null @@ -1,288 +0,0 @@ -// Copyright 2025 gorse Project Authors -// -// Licensed under the Apache License, Version 2.0 (the "License"); -// you may not use this file except in compliance with the License. -// You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, software -// distributed under the License is distributed on an "AS IS" BASIS, -// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -// See the License for the specific language governing permissions and -// limitations under the License. - -//go:build ignore - -package cache - -import ( - "context" - "fmt" - "math" - "os" - "testing" - "time" - - "github.com/gorse-io/gorse/common/log" - "github.com/stretchr/testify/assert" - "github.com/stretchr/testify/suite" -) - -var ( - valkeyDSN string -) - -func init() { - env := func(key, defaultValue string) string { - if value := os.Getenv(key); value != "" { - return value - } - return defaultValue - } - valkeyDSN = env("VALKEY_URI", "valkey://127.0.0.1:6380/") -} - -type ValkeyTestSuite struct { - baseTestSuite -} - -func (suite *ValkeyTestSuite) SetupSuite() { - var err error - suite.Database, err = Open(valkeyDSN, "gorse_") - suite.Require().NoError(err) - // flush db - valkeyClient, ok := suite.Database.(*Valkey) - suite.Require().True(ok) - err = valkeyClient.client.Do(context.Background(), valkeyClient.client.B().Flushdb().Build()).Error() - suite.Require().NoError(err) - // create schema - err = suite.Database.Init() - suite.Require().NoError(err) -} - -func (suite *ValkeyTestSuite) TestEscapeCharacters() { - ts := time.Date(2023, 1, 1, 0, 0, 0, 0, time.UTC) - ctx := suite.T().Context() - for _, c := range []string{"-", ":", ".", "/"} { - suite.Run(c, func() { - collection := fmt.Sprintf("a%s1", c) - subset := fmt.Sprintf("b%s2", c) - id := fmt.Sprintf("c%s3", c) - err := suite.AddScores(ctx, collection, subset, []Score{{ - Id: id, - Score: math.MaxFloat64, - Categories: []string{"a", "b"}, - Timestamp: ts, - }}) - suite.NoError(err) - documents, err := suite.SearchScores(ctx, collection, subset, []string{"b"}, 0, -1) - suite.NoError(err) - suite.Equal([]Score{{Id: id, Score: math.MaxFloat64, Categories: []string{"a", "b"}, Timestamp: ts}}, documents) - - err = suite.UpdateScores(ctx, []string{collection}, nil, id, ScorePatch{Score: new(float64(1))}) - suite.NoError(err) - documents, err = suite.SearchScores(ctx, collection, subset, []string{"b"}, 0, -1) - suite.NoError(err) - suite.Equal([]Score{{Id: id, Score: 1, Categories: []string{"a", "b"}, Timestamp: ts}}, documents) - - err = suite.DeleteScores(ctx, []string{collection}, ScoreCondition{ - Subset: new(subset), - Id: new(id), - }) - suite.NoError(err) - documents, err = suite.SearchScores(ctx, collection, subset, []string{"b"}, 0, -1) - suite.NoError(err) - suite.Empty(documents) - }) - } -} - -func (suite *ValkeyTestSuite) TestUpdateScoresWithPagination() { - ctx := suite.T().Context() - db, ok := suite.Database.(*Valkey) - suite.True(ok) - limit := db.maxSearchResults - db.maxSearchResults = 2 - defer func() { - db.maxSearchResults = limit - }() - - for i := range 5 { - subset := fmt.Sprintf("subset-%d", i) - err := suite.AddScores(ctx, "collection-a", subset, []Score{{ - Id: "shared-item", - Score: float64(i), - Categories: []string{"old"}, - Timestamp: time.Now().UTC(), - }}) - suite.NoError(err) - } - - err := suite.UpdateScores(ctx, []string{"collection-a"}, nil, "shared-item", ScorePatch{ - Categories: []string{"new"}, - }) - suite.NoError(err) - - for i := range 5 { - subset := fmt.Sprintf("subset-%d", i) - docs, err := suite.SearchScores(ctx, "collection-a", subset, []string{"new"}, 0, -1) - suite.NoError(err) - suite.Require().Len(docs, 1) - suite.Equal("shared-item", docs[0].Id) - } -} - -func (suite *ValkeyTestSuite) TestUpdateScoresWithPaginationAndScorePatch() { - ctx := suite.T().Context() - db, ok := suite.Database.(*Valkey) - suite.True(ok) - limit := db.maxSearchResults - db.maxSearchResults = 1 - defer func() { - db.maxSearchResults = limit - }() - - initialScores := []float64{3, 2, 1} - for i, score := range initialScores { - subset := fmt.Sprintf("score-subset-%d", i) - err := suite.AddScores(ctx, "collection-b", subset, []Score{{ - Id: "shared-item", - Score: score, - Categories: []string{"score-old"}, - Timestamp: time.Now().UTC(), - }}) - suite.NoError(err) - } - - targetScore := float64(0) - err := suite.UpdateScores(ctx, []string{"collection-b"}, nil, "shared-item", ScorePatch{ - Score: &targetScore, - }) - suite.NoError(err) - - for i := range initialScores { - subset := fmt.Sprintf("score-subset-%d", i) - docs, err := suite.SearchScores(ctx, "collection-b", subset, nil, 0, -1) - suite.NoError(err) - suite.Require().Len(docs, 1) - suite.Equal(targetScore, docs[0].Score) - } -} - -func (suite *ValkeyTestSuite) TestUpdateScoresWithPaginationAndTiedScores() { - ctx := suite.T().Context() - db, ok := suite.Database.(*Valkey) - suite.True(ok) - limit := db.maxSearchResults - db.maxSearchResults = 2 - defer func() { - db.maxSearchResults = limit - }() - - for i := range 5 { - subset := fmt.Sprintf("tie-subset-%d", i) - err := suite.AddScores(ctx, "collection-c", subset, []Score{{ - Id: "shared-item", - Score: 1, - Categories: []string{"tie-old"}, - Timestamp: time.Now().UTC(), - }}) - suite.NoError(err) - } - - err := suite.UpdateScores(ctx, []string{"collection-c"}, nil, "shared-item", ScorePatch{ - Categories: []string{"tie-new"}, - }) - suite.NoError(err) - - for i := range 5 { - subset := fmt.Sprintf("tie-subset-%d", i) - docs, err := suite.SearchScores(ctx, "collection-c", subset, []string{"tie-new"}, 0, -1) - suite.NoError(err) - suite.Require().Len(docs, 1) - suite.Equal("shared-item", docs[0].Id) - } -} - -func TestValkey(t *testing.T) { - suite.Run(t, new(ValkeyTestSuite)) -} - -func TestParseValkeyURL(t *testing.T) { - // Basic URL - host, port, username, password, db, useTLS, err := parseValkeyURL("valkey://127.0.0.1:6380/") - assert.NoError(t, err) - assert.Equal(t, "127.0.0.1", host) - assert.Equal(t, 6380, port) - assert.Equal(t, "", username) - assert.Equal(t, "", password) - assert.Equal(t, 0, db) - assert.False(t, useTLS) - - // URL with password only - host, _, _, password, db, useTLS, err = parseValkeyURL("valkey://:secret@localhost:6379/2") - assert.NoError(t, err) - assert.Equal(t, "localhost", host) - assert.Equal(t, "secret", password) - assert.Equal(t, 2, db) - assert.False(t, useTLS) - - // URL with username and password - host, port, username, password, db, useTLS, err = parseValkeyURL("valkey://myuser:mypass@host.example.com:6380/3") - assert.NoError(t, err) - assert.Equal(t, "host.example.com", host) - assert.Equal(t, 6380, port) - assert.Equal(t, "myuser", username) - assert.Equal(t, "mypass", password) - assert.Equal(t, 3, db) - assert.False(t, useTLS) - - // TLS URL - _, _, _, _, _, useTLS, err = parseValkeyURL("valkeys://localhost:6379/0") - assert.NoError(t, err) - assert.True(t, useTLS) - - // Default port - _, port, _, _, _, _, err = parseValkeyURL("valkey://localhost/") - assert.NoError(t, err) - assert.Equal(t, 6379, port) -} - -func TestParseValkeyClusterURL(t *testing.T) { - // Basic cluster URL - addresses, username, password, useTLS, err := parseValkeyClusterURL("valkey+cluster://:password@192.168.1.11:6379?addr=192.168.0.5:6379&addr=192.168.0.7:6379") - assert.NoError(t, err) - assert.Len(t, addresses, 3) - assert.Equal(t, "192.168.1.11:6379", addresses[0]) - assert.Equal(t, "192.168.0.5:6379", addresses[1]) - assert.Equal(t, "192.168.0.7:6379", addresses[2]) - assert.Equal(t, "", username) - assert.Equal(t, "password", password) - assert.False(t, useTLS) - - // Cluster URL with username - addresses, username, password, useTLS, err = parseValkeyClusterURL("valkeys+cluster://admin:secret@node1:6380?addr=node2:6380") - assert.NoError(t, err) - assert.Len(t, addresses, 2) - assert.Equal(t, "node1:6380", addresses[0]) - assert.Equal(t, "node2:6380", addresses[1]) - assert.Equal(t, "admin", username) - assert.Equal(t, "secret", password) - assert.True(t, useTLS) -} - -func BenchmarkValkey(b *testing.B) { - log.CloseLogger() - database, err := Open(valkeyDSN, "gorse_") - assert.NoError(b, err) - // flush db - valkeyClient := database.(*Valkey) - err = valkeyClient.client.Do(context.Background(), valkeyClient.client.B().Flushdb().Build()).Error() - assert.NoError(b, err) - // create schema - err = database.Init() - assert.NoError(b, err) - // benchmark - benchmark(b, database) -} From eb3121f4d951e643e18518a9f335f0e6c68c4077 Mon Sep 17 00:00:00 2001 From: Daria Korenieva Date: Thu, 7 May 2026 13:17:08 -0700 Subject: [PATCH 18/19] clean up --- EXPERIMENT_REPORT.md | 158 ------------------------------------------- 1 file changed, 158 deletions(-) delete mode 100644 EXPERIMENT_REPORT.md diff --git a/EXPERIMENT_REPORT.md b/EXPERIMENT_REPORT.md deleted file mode 100644 index 72852f773..000000000 --- a/EXPERIMENT_REPORT.md +++ /dev/null @@ -1,158 +0,0 @@ -# Experiment: Reuse Redis Client for Valkey (Eliminate Code Duplication) - -## Branch - -``` -experiment/valkey-reuse-redis-client -``` - -Based on: `feature/valkey-cache-integration` - -Changes here do NOT affect the existing PR on `feature/valkey-cache-integration`. - ---- - -## Context - -A maintainer reviewed the Valkey integration PR and said: - -> "Too many duplicate codes comparing to Redis backend. Please fix all tests. Could you reuse Redis client, but implement special timeseries interface if Valkey is detected?" - -The original implementation (`storage/cache/valkey.go`, ~839 lines) used the `valkey-go` client library and reimplemented every method from scratch — Set, Get, Delete, Push, Pop, Scan, Purge, AddScores, SearchScores, DeleteScores, UpdateScores, ScanScores, AddTimeSeriesPoints, GetTimeSeriesPoints — all of which are semantically identical to the Redis implementation since Valkey is wire-compatible with Redis. - ---- - -## What Was Done - -### Approach: Embed `Redis`, override only time series - -Since Valkey is wire-compatible with Redis for all commands except the TimeSeries module (`TS.ADD`, `TS.RANGE`), the solution is: - -1. Connect to Valkey using the same `go-redis` client (it works out of the box) -2. Reuse ALL existing Redis methods (KV, queue, scores/search, scan, purge) -3. Override ONLY `AddTimeSeriesPoints` and `GetTimeSeriesPoints` with a sorted-set + hash implementation - -### Files Changed - -| File | Status | Description | -|------|--------|-------------| -| `storage/cache/redis_valkey.go` | **NEW** | Registers `valkey://`, `valkeys://`, `valkey+cluster://`, `valkeys+cluster://` prefixes. Defines `RedisValkey` type that embeds `Redis` and overrides only the two time series methods. ~160 lines total. | -| `storage/cache/redis.go` | **UNCHANGED** | Original Redis implementation is untouched. | -| `storage/cache/valkey.go` | **DISABLED** | Added `//go:build ignore` to exclude from compilation. File preserved for reference. | -| `storage/cache/valkey_test.go` | **DISABLED** | Added `//go:build ignore` to exclude from compilation. | -| `storage/cache/redis_test.go` | **MODIFIED** | Added `ValkeyViaRedisTestSuite` (tests valkey:// DSN end-to-end) and `RedisSortedSetTSTestSuite` (tests sorted-set TS implementation against a Redis server). | -| `storage/cache/database_test.go` | **MODIFIED** | Added comprehensive time series tests to `baseTestSuite`: duplicate timestamp, empty range, multiple series isolation, bucket aggregation correctness, single point, batch add, empty batch. Also added edge-case tests for AddScores(empty) and UpdateScores(no-op). | - -### How `RedisValkey` Works - -```go -type RedisValkey struct { - Redis // embeds all Redis methods -} -``` - -- `AddTimeSeriesPoints`: Uses pipelined `ZADD` (sorted set for timestamp index) + `HSET` (hash for values). Last-write-wins on duplicate timestamps via natural ZADD/HSET overwrite semantics. -- `GetTimeSeriesPoints`: Uses `ZRANGEBYSCORE` to get timestamps in range, `HMGET` to fetch values, then Go-side bucket aggregation (group by `floor(ts/duration)*duration`, keep last value per bucket). - -This matches how the existing MongoDB backend implements time series in this codebase. - -### Registration - -In `redis_valkey.go` `init()`: -- `valkey://` / `valkeys://` → `redis.NewClient` (rewrites URL prefix) -- `valkey+cluster://` / `valkeys+cluster://` → `redis.NewClusterClient` (rewrites URL prefix) - -All return a `*RedisValkey` which satisfies the `Database` interface. - ---- - -## Test Suites - -| Suite | What it tests | How to run | -|-------|---------------|------------| -| `TestRedis` | Original Redis with native TimeSeries | `go test ./storage/cache/ -run TestRedis -v` | -| `TestRedisSortedSetTS` | `RedisValkey` type against a Redis server (validates sorted-set TS logic using Redis's ZADD/HSET) | `go test ./storage/cache/ -run TestRedisSortedSetTS -v` | -| `TestValkeyViaRedis` | Full end-to-end via `valkey://` DSN against a real Valkey instance | `VALKEY_URI=valkey://127.0.0.1:6380/ go test ./storage/cache/ -run TestValkeyViaRedis -v` | - -All suites run the shared `baseTestSuite` which covers every `Database` interface method. - ---- - -## Next Steps - -### 1. Compile and verify - -```bash -go build ./storage/cache/... -``` - -If there are compilation issues (e.g., `go.mod` tidy needed because `valkey-go` is now unused by compiled code), run: - -```bash -go mod tidy -``` - -### 2. Run tests against Redis - -```bash -# Requires Redis with TimeSeries + Search modules on localhost:6379 -go test ./storage/cache/ -run "TestRedis$" -v -count=1 -go test ./storage/cache/ -run TestRedisSortedSetTS -v -count=1 -``` - -Both should pass. `TestRedis` validates existing behavior is unbroken. `TestRedisSortedSetTS` validates the sorted-set fallback works correctly. - -### 3. Run tests against Valkey - -```bash -# Requires Valkey with valkey-search module on localhost:6380 -VALKEY_URI=valkey://127.0.0.1:6380/ go test ./storage/cache/ -run TestValkeyViaRedis -v -count=1 -``` - -Record any failures. Expected potential issues: -- `FT._LIST` / `FT.CREATE` / `FT.SEARCH` command compatibility (valkey-search module required) -- Minor behavioral differences in search result ordering or field formatting - -### 4. If all tests pass - -- Delete `storage/cache/valkey.go` and `storage/cache/valkey_test.go` entirely -- Remove `github.com/valkey-io/valkey-go` from `go.mod` + `go mod tidy` -- Run full project build: `go build ./...` -- Run full test suite to check nothing else breaks -- Update the PR on `feature/valkey-cache-integration` with these changes - -### 5. If Valkey tests fail - -- Document which tests fail and why -- Most likely cause: FT.SEARCH command differences between Redis Search and valkey-search -- Fix approach: adjust query formatting or result parsing in the inherited Redis methods (may need to override `SearchScores` in `RedisValkey` if behavior diverges) - ---- - -## Key Design Decisions - -1. **Embedding over interface**: Using Go struct embedding (`RedisValkey` embeds `Redis`) rather than a `timeSeriesBackend` interface. This avoids modifying `redis.go` at all — no new fields, no interface dispatch, no runtime detection. - -2. **No changes to redis.go**: The maintainer's concern was code duplication. By embedding, we get zero duplication with zero changes to the working Redis implementation. - -3. **go-redis over valkey-go**: Valkey speaks Redis protocol. Using go-redis means one client library, one set of patterns, one import. The `valkey-go` dependency can be removed entirely. - -4. **Build tag on old files**: Using `//go:build ignore` rather than deleting `valkey.go`/`valkey_test.go` preserves them for reference during the experiment. Delete them once validated. - ---- - -## File Locations (for reference) - -``` -storage/cache/ -├── database.go # Database interface definition -├── database_test.go # Shared test suite (baseTestSuite) — MODIFIED -├── redis.go # Original Redis implementation — UNCHANGED -├── redis_valkey.go # NEW: RedisValkey type + valkey:// registration -├── redis_test.go # Redis + Valkey test suites — MODIFIED -├── valkey.go # OLD: standalone Valkey impl (//go:build ignore) -├── valkey_test.go # OLD: standalone Valkey tests (//go:build ignore) -├── mongodb.go # MongoDB implementation -├── sql.go # SQL implementation -└── ... -``` From ff5e7c4631e9b2ae0896a107344f2262a781b135 Mon Sep 17 00:00:00 2001 From: Daria Korenieva Date: Thu, 7 May 2026 14:09:22 -0700 Subject: [PATCH 19/19] Address review feedback: fix CI port mapping, pin valkey image, improve error handling - Fix CI service port: expose container port 6379 (not 6380) for Valkey - Pin valkey-bundle image to 9.1-rc2 (includes search 1.2.0) - Add Init() override with clear error for missing valkey-search module - Return empty slice instead of nil from GetTimeSeriesPoints - Add TODO for OTel semconv.DBSystemValkey - Document ZRangeByScore pagination limitation --- .github/workflows/build_test.yml | 6 +++--- storage/cache/redis_valkey.go | 20 ++++++++++++++++++-- storage/docker-compose.yml | 6 +++--- 3 files changed, 24 insertions(+), 8 deletions(-) diff --git a/.github/workflows/build_test.yml b/.github/workflows/build_test.yml index 0b6832e20..86f263ed9 100644 --- a/.github/workflows/build_test.yml +++ b/.github/workflows/build_test.yml @@ -69,9 +69,9 @@ jobs: - 6379 valkey: - image: valkey/valkey-bundle:unstable + image: valkey/valkey-bundle:9.1-rc2 ports: - - 6380 + - 6379 rustfs: image: rustfs/rustfs:alpha @@ -139,7 +139,7 @@ jobs: # Redis REDIS_URI: redis://localhost:${{ job.services.redis.ports[6379] }}/ # Valkey - VALKEY_URI: valkey://localhost:${{ job.services.valkey.ports[6380] }}/ + VALKEY_URI: valkey://localhost:${{ job.services.valkey.ports[6379] }}/ # S3 S3_ENDPOINT: localhost:${{ job.services.rustfs.ports[9000] }} S3_ACCESS_KEY_ID: rustfsadmin diff --git a/storage/cache/redis_valkey.go b/storage/cache/redis_valkey.go index 1dcaeebad..b2a7d5c46 100644 --- a/storage/cache/redis_valkey.go +++ b/storage/cache/redis_valkey.go @@ -16,6 +16,7 @@ package cache import ( "context" + "fmt" "slices" "strconv" "strings" @@ -51,6 +52,7 @@ func init() { database.TablePrefix = storage.TablePrefix(tablePrefix) database.client = redis.NewClient(opt) database.maxSearchResults = storage.NewOptions(opts...).MaxSearchResults + // TODO: switch to semconv.DBSystemValkey once available in go.opentelemetry.io/otel/semconv if err = redisotel.InstrumentTracing(database.client, redisotel.WithAttributes(semconv.DBSystemRedis)); err != nil { log.Logger().Error("failed to add tracing for valkey", zap.Error(err)) return nil, errors.Trace(err) @@ -73,6 +75,7 @@ func init() { database.TablePrefix = storage.TablePrefix(tablePrefix) database.client = redis.NewClusterClient(opt) database.maxSearchResults = storage.NewOptions(opts...).MaxSearchResults + // TODO: switch to semconv.DBSystemValkey once available in go.opentelemetry.io/otel/semconv if err = redisotel.InstrumentTracing(database.client, redisotel.WithAttributes(semconv.DBSystemRedis)); err != nil { log.Logger().Error("failed to add tracing for valkey", zap.Error(err)) return nil, errors.Trace(err) @@ -88,6 +91,16 @@ type RedisValkey struct { Redis } +// Init initializes the Valkey cache store. It delegates to Redis.Init() which +// requires FT.* (search) commands. If the valkey-search module is not loaded, +// this provides a clear error message. +func (v *RedisValkey) Init() error { + if err := v.Redis.Init(); err != nil { + return fmt.Errorf("valkey cache store requires the valkey-search module (FT.* commands): %w", err) + } + return nil +} + // AddTimeSeriesPoints stores time series points using sorted sets + hashes. // Each series uses two keys: // - Sorted set (ts_index:{name}): score = timestamp_ms, member = timestamp_ms string @@ -121,7 +134,10 @@ func (v *RedisValkey) GetTimeSeriesPoints(ctx context.Context, name string, begi beginMs := begin.UnixMilli() endMs := end.UnixMilli() - // Fetch all timestamps in range from sorted set. + // NOTE: This fetches all timestamps in the requested range into memory for + // client-side aggregation. For very large ranges with high-density data this + // could be expensive. Current usage (metrics with bounded density) is fine. + // If this becomes a bottleneck, add Count/Offset pagination here. members, err := v.client.ZRangeByScore(ctx, indexKey, &redis.ZRangeBy{ Min: strconv.FormatInt(beginMs, 10), Max: strconv.FormatInt(endMs, 10), @@ -130,7 +146,7 @@ func (v *RedisValkey) GetTimeSeriesPoints(ctx context.Context, name string, begi return nil, errors.Trace(err) } if len(members) == 0 { - return nil, nil + return []TimeSeriesPoint{}, nil } // Fetch corresponding values from hash. diff --git a/storage/docker-compose.yml b/storage/docker-compose.yml index 3382766f5..82447998c 100644 --- a/storage/docker-compose.yml +++ b/storage/docker-compose.yml @@ -6,10 +6,10 @@ services: - 6379:6379 # Valkey with valkey-search 1.2.0+ (full-text search support). - # Bundle 'unstable' is the only published tag with search >= 1.2. - # Switch to a stable tag (e.g. 9.1) once it is released. + # Using 9.1-rc2 which includes search module 1.2.0. + # Switch to stable 9.1 once released. valkey: - image: valkey/valkey-bundle:unstable + image: valkey/valkey-bundle:9.1-rc2 ports: - 6380:6379