From 8e526b2c5fa173eb05346fee902c79522dd48b98 Mon Sep 17 00:00:00 2001 From: "redisdocsapp[bot]" <177626021+redisdocsapp[bot]@users.noreply.github.com> Date: Fri, 4 Jul 2025 00:24:27 +0000 Subject: [PATCH] Update for redisvl 0.8.0 --- content/develop/ai/redisvl/api/_index.md | 5 - content/develop/ai/redisvl/api/cache.md | 15 +- content/develop/ai/redisvl/api/filter.md | 2 - .../develop/ai/redisvl/api/message_history.md | 4 +- content/develop/ai/redisvl/api/query.md | 14 +- content/develop/ai/redisvl/api/reranker.md | 2 - content/develop/ai/redisvl/api/router.md | 6 +- content/develop/ai/redisvl/api/schema.md | 2 - content/develop/ai/redisvl/api/searchindex.md | 82 +++++--- content/develop/ai/redisvl/api/vectorizer.md | 8 +- content/develop/ai/redisvl/overview/_index.md | 2 - content/develop/ai/redisvl/overview/cli.md | 29 ++- .../ai/redisvl/overview/installation.md | 2 - .../develop/ai/redisvl/user_guide/_index.md | 41 +--- .../ai/redisvl/user_guide/embeddings_cache.md | 75 ++++++- .../ai/redisvl/user_guide/getting_started.md | 197 +++--------------- .../ai/redisvl/user_guide/hash_vs_json.md | 90 ++++---- .../ai/redisvl/user_guide/hybrid_queries.md | 160 ++++++-------- .../develop/ai/redisvl/user_guide/llmcache.md | 132 ++++++------ .../ai/redisvl/user_guide/message_history.md | 48 ++++- .../ai/redisvl/user_guide/rerankers.md | 2 - .../ai/redisvl/user_guide/semantic_router.md | 91 ++++---- .../ai/redisvl/user_guide/vectorizers.md | 6 +- 23 files changed, 469 insertions(+), 546 deletions(-) diff --git a/content/develop/ai/redisvl/api/_index.md b/content/develop/ai/redisvl/api/_index.md index 079e66ced..444d0b425 100644 --- a/content/develop/ai/redisvl/api/_index.md +++ b/content/develop/ai/redisvl/api/_index.md @@ -3,8 +3,6 @@ linkTitle: RedisVL API title: RedisVL API weight: 5 hideListLinks: true -aliases: -- /integrate/redisvl/api --- @@ -59,6 +57,3 @@ Reference documentation for the RedisVL API. * [Route](router/#route) * [Route Match](router/#route-match) * [Distance Aggregation Method](router/#distance-aggregation-method) -* [Threshold Optimizers](threshold_optimizer/) - * [CacheThresholdOptimizer](threshold_optimizer/#cachethresholdoptimizer) - * [RouterThresholdOptimizer](threshold_optimizer/#routerthresholdoptimizer) diff --git a/content/develop/ai/redisvl/api/cache.md b/content/develop/ai/redisvl/api/cache.md index 699ddacb6..fde267c79 100644 --- a/content/develop/ai/redisvl/api/cache.md +++ b/content/develop/ai/redisvl/api/cache.md @@ -1,8 +1,6 @@ --- linkTitle: LLM cache title: LLM Cache -aliases: -- /integrate/redisvl/api/cache --- @@ -79,7 +77,7 @@ LLM responses. ```python response = await cache.acheck( - prompt="What is the captial city of France?" + prompt="What is the capital city of France?" ) ``` @@ -169,7 +167,7 @@ Async stores the specified key-value pair in the cache along with metadata. ```python key = await cache.astore( - prompt="What is the captial city of France?", + prompt="What is the capital city of France?", response="Paris", metadata={"city": "Paris", "country": "France"} ) @@ -233,7 +231,7 @@ LLM responses. ```python response = cache.check( - prompt="What is the captial city of France?" + prompt="What is the capital city of France?" ) ``` @@ -347,7 +345,7 @@ Stores the specified key-value pair in the cache along with metadata. ```python key = cache.store( - prompt="What is the captial city of France?", + prompt="What is the capital city of France?", response="Paris", metadata={"city": "Paris", "country": "France"} ) @@ -408,7 +406,7 @@ The default TTL, in seconds, for entries in the cache. -### `class EmbeddingsCache(name='embedcache', ttl=None, redis_client=None, redis_url='redis://localhost:6379', connection_kwargs={})` +### `class EmbeddingsCache(name='embedcache', ttl=None, redis_client=None, async_redis_client=None, redis_url='redis://localhost:6379', connection_kwargs={})` Bases: `BaseCache` @@ -419,9 +417,10 @@ Initialize an embeddings cache. * **Parameters:** * **name** (*str*) – The name of the cache. Defaults to “embedcache”. * **ttl** (*Optional* *[* *int* *]*) – The time-to-live for cached embeddings. Defaults to None. - * **redis_client** (*Optional* *[* *Redis* *]*) – Redis client instance. Defaults to None. + * **redis_client** (*Optional* *[* *SyncRedisClient* *]*) – Redis client instance. Defaults to None. * **redis_url** (*str*) – Redis URL for connection. Defaults to “redis://localhost:6379”. * **connection_kwargs** (*Dict* *[* *str* *,* *Any* *]*) – Redis connection arguments. Defaults to {}. + * **async_redis_client** (*Redis* *|* *RedisCluster* *|* *None*) * **Raises:** **ValueError** – If vector dimensions are invalid diff --git a/content/develop/ai/redisvl/api/filter.md b/content/develop/ai/redisvl/api/filter.md index b5cd683c2..23f028aaf 100644 --- a/content/develop/ai/redisvl/api/filter.md +++ b/content/develop/ai/redisvl/api/filter.md @@ -1,8 +1,6 @@ --- linkTitle: Filter title: Filter -aliases: -- /integrate/redisvl/api/filter --- diff --git a/content/develop/ai/redisvl/api/message_history.md b/content/develop/ai/redisvl/api/message_history.md index 4c283d008..8c7f47029 100644 --- a/content/develop/ai/redisvl/api/message_history.md +++ b/content/develop/ai/redisvl/api/message_history.md @@ -1,8 +1,6 @@ --- linkTitle: LLM message history title: LLM Message History -aliases: -- /integrate/redisvl/api/message_history --- @@ -93,7 +91,7 @@ Remove a specific exchange from the message history. #### `get_recent(top_k=5, as_text=False, raw=False, session_tag=None)` -Retreive the recent message history in sequential order. +Retrieve the recent message history in sequential order. * **Parameters:** * **top_k** (*int*) – The number of previous exchanges to return. Default is 5. diff --git a/content/develop/ai/redisvl/api/query.md b/content/develop/ai/redisvl/api/query.md index ec594f2f6..59b262f15 100644 --- a/content/develop/ai/redisvl/api/query.md +++ b/content/develop/ai/redisvl/api/query.md @@ -1,8 +1,6 @@ --- linkTitle: Query title: Query -aliases: -- /integrate/redisvl/api/query --- @@ -174,6 +172,8 @@ Add fields to return fields. Use a different scoring function to evaluate document relevance. Default is TFIDF. +Since Redis 8.0 default was changed to BM25STD. + * **Parameters:** **scorer** (*str*) – The scoring function to use (e.g. TFIDF.DOCNORM or BM25) @@ -488,6 +488,8 @@ Add fields to return fields. Use a different scoring function to evaluate document relevance. Default is TFIDF. +Since Redis 8.0 default was changed to BM25STD. + * **Parameters:** **scorer** (*str*) – The scoring function to use (e.g. TFIDF.DOCNORM or BM25) @@ -934,7 +936,7 @@ A query for running a full text search, along with an optional filter expression * **text_field_name** (*str*) – The name of the document field to perform text search on. * **text_scorer** (*str* *,* *optional*) – The text scoring algorithm to use. Defaults to BM25STD. Options are {TFIDF, BM25STD, BM25, TFIDF.DOCNORM, DISMAX, DOCSCORE}. - See [https://redis.io/docs/latest/develop/ai/search-and-query/advanced-concepts/scoring/](https://redis.io/docs/latest/develop/ai/search-and-query/advanced-concepts/scoring/) + See [https://redis.io/docs/latest/develop/interact/search-and-query/advanced-concepts/scoring/](https://redis.io/docs/latest/develop/interact/search-and-query/advanced-concepts/scoring/) * **filter_expression** (*Union* *[* *str* *,* [*FilterExpression*]({{< relref "filter/#filterexpression" >}}) *]* *,* *optional*) – A filter to apply along with the text search. Defaults to None. * **return_fields** (*List* *[* *str* *]*) – The declared fields to return with search @@ -1070,6 +1072,8 @@ Add fields to return fields. Use a different scoring function to evaluate document relevance. Default is TFIDF. +Since Redis 8.0 default was changed to BM25STD. + * **Parameters:** **scorer** (*str*) – The scoring function to use (e.g. TFIDF.DOCNORM or BM25) @@ -1282,6 +1286,8 @@ Add fields to return fields. Use a different scoring function to evaluate document relevance. Default is TFIDF. +Since Redis 8.0 default was changed to BM25STD. + * **Parameters:** **scorer** (*str*) – The scoring function to use (e.g. TFIDF.DOCNORM or BM25) @@ -1499,6 +1505,8 @@ Add fields to return fields. Use a different scoring function to evaluate document relevance. Default is TFIDF. +Since Redis 8.0 default was changed to BM25STD. + * **Parameters:** **scorer** (*str*) – The scoring function to use (e.g. TFIDF.DOCNORM or BM25) diff --git a/content/develop/ai/redisvl/api/reranker.md b/content/develop/ai/redisvl/api/reranker.md index 0eda469c1..9aabd96c2 100644 --- a/content/develop/ai/redisvl/api/reranker.md +++ b/content/develop/ai/redisvl/api/reranker.md @@ -1,8 +1,6 @@ --- linkTitle: Rerankers title: Rerankers -aliases: -- /integrate/redisvl/api/reranker --- diff --git a/content/develop/ai/redisvl/api/router.md b/content/develop/ai/redisvl/api/router.md index 12c67c3f8..48b7e3ef6 100644 --- a/content/develop/ai/redisvl/api/router.md +++ b/content/develop/ai/redisvl/api/router.md @@ -1,8 +1,6 @@ --- linkTitle: Semantic router title: Semantic Router -aliases: -- /integrate/redisvl/api/router --- @@ -21,7 +19,7 @@ Initialize the SemanticRouter. * **routes** (*List* *[*[Route](#route) *]*) – List of Route objects. * **vectorizer** (*BaseVectorizer* *,* *optional*) – The vectorizer used to embed route references. Defaults to default HFTextVectorizer. * **routing_config** ([RoutingConfig](#routingconfig) *,* *optional*) – Configuration for routing behavior. Defaults to the default RoutingConfig. - * **redis_client** (*Optional* *[* *Redis* *]* *,* *optional*) – Redis client for connection. Defaults to None. + * **redis_client** (*Optional* *[* *SyncRedisClient* *]* *,* *optional*) – Redis client for connection. Defaults to None. * **redis_url** (*str* *,* *optional*) – The redis url. Defaults to redis://localhost:6379. * **overwrite** (*bool* *,* *optional*) – Whether to overwrite existing index. Defaults to False. * **connection_kwargs** (*Dict* *[* *str* *,* *Any* *]*) – The connection arguments @@ -99,7 +97,7 @@ Return SemanticRouter instance from existing index. * **Parameters:** * **name** (*str*) - * **redis_client** (*Redis* *|* *None*) + * **redis_client** (*Redis* *|* *RedisCluster* *|* *None*) * **redis_url** (*str*) * **Return type:** [SemanticRouter](#semanticrouter) diff --git a/content/develop/ai/redisvl/api/schema.md b/content/develop/ai/redisvl/api/schema.md index cc7f43548..9f82e8ab7 100644 --- a/content/develop/ai/redisvl/api/schema.md +++ b/content/develop/ai/redisvl/api/schema.md @@ -1,8 +1,6 @@ --- linkTitle: Schema title: Schema -aliases: -- /integrate/redisvl/api/schema --- diff --git a/content/develop/ai/redisvl/api/searchindex.md b/content/develop/ai/redisvl/api/searchindex.md index 513123071..85567ff8e 100644 --- a/content/develop/ai/redisvl/api/searchindex.md +++ b/content/develop/ai/redisvl/api/searchindex.md @@ -1,8 +1,6 @@ --- linkTitle: Search index classes title: Search Index Classes -aliases: -- /integrate/redisvl/api/searchindex --- @@ -49,7 +47,7 @@ kwargs. * **Parameters:** * **schema** ([*IndexSchema*]({{< relref "schema/#indexschema" >}})) – Index schema object. - * **redis_client** (*Optional* *[* *redis.Redis* *]*) – An + * **redis_client** (*Optional* *[* *Redis* *]*) – An instantiated redis client. * **redis_url** (*Optional* *[* *str* *]*) – The URL of the Redis server to connect to. @@ -89,13 +87,13 @@ This method takes a list of queries and optionally query params and returns a list of Result objects for each query. Results are returned in the same order as the queries. +NOTE: Cluster users may need to incorporate hash tags into their query +to avoid cross-slot operations. + * **Parameters:** - * **queries** (*List* *[* *SearchParams* *]*) – The queries to search for. batch_size - * **(* ***int** – The number of queries to search for at a time. - Defaults to 10. - * **optional****)** – The number of queries to search for at a time. + * **queries** (*List* *[* *SearchParams* *]*) – The queries to search for. + * **batch_size** (*int* *,* *optional*) – The number of queries to search for at a time. Defaults to 10. - * **batch_size** (*int*) * **Returns:** The search results for each query. * **Return type:** @@ -106,6 +104,10 @@ returned in the same order as the queries. Clear all keys in Redis associated with the index, leaving the index available and in-place for future insertions or updates. +NOTE: This method requires custom behavior for Redis Cluster because +here, we can’t easily give control of the keys we’re clearing to the +user so they can separate them based on hash tag. + * **Returns:** Count of records deleted from Redis. * **Return type:** @@ -177,6 +179,10 @@ Remove documents from the index by their document IDs. This method converts document IDs to Redis keys automatically by applying the index’s key prefix and separator configuration. +NOTE: Cluster users will need to incorporate hash tags into their +document IDs and only call this method with documents from a single hash +tag at a time. + * **Parameters:** **ids** (*Union* *[* *str* *,* *List* *[* *str* *]* *]*) – The document ID or IDs to remove from the index. * **Returns:** @@ -262,7 +268,7 @@ Initialize from an existing search index in Redis by index name. * **Parameters:** * **name** (*str*) – Name of the search index in Redis. - * **redis_client** (*Optional* *[* *redis.Redis* *]*) – An + * **redis_client** (*Optional* *[* *Redis* *]*) – An instantiated redis client. * **redis_url** (*Optional* *[* *str* *]*) – The URL of the Redis server to connect to. @@ -437,12 +443,12 @@ Async Redis client. It is useful for cases where an external, custom-configured client is preferred instead of creating a new one. * **Parameters:** - **redis_client** (*redis.Redis*) – A Redis or Async Redis + **redis_client** (*Redis*) – A Redis or Async Redis client instance to be used for the connection. * **Raises:** **TypeError** – If the provided client is not valid. -#### `property client: Redis | None` +#### `property client: Redis | RedisCluster | None` The underlying redis-py client object. @@ -504,7 +510,7 @@ Initialize the RedisVL async search index with a schema. * **schema** ([*IndexSchema*]({{< relref "schema/#indexschema" >}})) – Index schema object. * **redis_url** (*Optional* *[* *str* *]* *,* *optional*) – The URL of the Redis server to connect to. - * **redis_client** (*Optional* *[* *aredis.Redis* *]*) – An + * **redis_client** (*Optional* *[* *AsyncRedis* *]*) – An instantiated redis client. * **connection_kwargs** (*Optional* *[* *Dict* *[* *str* *,* *Any* *]* *]*) – Redis client connection args. @@ -536,28 +542,42 @@ Asynchronously execute a batch of queries and process results. #### `async batch_search(queries, batch_size=10)` -Perform a search against the index for multiple queries. +Asynchronously execute a batch of search queries. + +This method takes a list of search queries and executes them in batches +to improve performance when dealing with multiple queries. -This method takes a list of queries and returns a list of Result objects -for each query. Results are returned in the same order as the queries. +NOTE: Cluster users may need to incorporate hash tags into their query +to avoid cross-slot operations. * **Parameters:** - * **queries** (*List* *[* *SearchParams* *]*) – The queries to search for. batch_size - * **(* ***int** – The number of queries to search for at a time. - Defaults to 10. - * **optional****)** – The number of queries to search for at a time. - Defaults to 10. - * **batch_size** (*int*) + * **queries** (*List* *[* *SearchParams* *]*) – A list of search queries to execute. + Each query can be either a string or a tuple of (query, params). + * **batch_size** (*int* *,* *optional*) – The number of queries to execute in each + batch. Defaults to 10. * **Returns:** - The search results for each query. + A list of search results corresponding to each query. * **Return type:** List[Result] +```python +queries = [ + "hello world", + ("goodbye world", {"num_results": 5}), +] + +results = await index.batch_search(queries) +``` + #### `async clear()` Clear all keys in Redis associated with the index, leaving the index available and in-place for future insertions or updates. +NOTE: This method requires custom behavior for Redis Cluster because here, +we can’t easily give control of the keys we’re clearing to the user so they +can separate them based on hash tag. + * **Returns:** Count of records deleted from Redis. * **Return type:** @@ -618,6 +638,10 @@ Remove documents from the index by their document IDs. This method converts document IDs to Redis keys automatically by applying the index’s key prefix and separator configuration. +NOTE: Cluster users will need to incorporate hash tags into their +document IDs and only call this method with documents from a single hash +tag at a time. + * **Parameters:** **ids** (*Union* *[* *str* *,* *List* *[* *str* *]* *]*) – The document ID or IDs to remove from the index. * **Returns:** @@ -701,7 +725,7 @@ Initialize from an existing search index in Redis by index name. * **Parameters:** * **name** (*str*) – Name of the search index in Redis. - * **redis_client** (*Optional* *[* *redis.Redis* *]*) – An + * **redis_client** (*Optional* *[* *Redis* *]*) – An instantiated redis client. * **redis_url** (*Optional* *[* *str* *]*) – The URL of the Redis server to connect to. @@ -873,11 +897,11 @@ results = await index.query(query) #### `async search(*args, **kwargs)` -Perform a search on this index. +Perform an async search against the index. -Wrapper around redis.search.Search that adds the index name -to the search query and passes along the rest of the arguments -to the redis-py ft.search() method. +Wrapper around the search API that adds the index name +to the query and passes along the rest of the arguments +to the redis-py ft().search() method. * **Returns:** Raw Redis search results. @@ -890,9 +914,9 @@ to the redis-py ft.search() method. This method is deprecated; please provide connection parameters in \_\_init_\_. * **Parameters:** - **redis_client** (*Redis* *|* *Redis*) + **redis_client** (*Redis* *|* *RedisCluster* *|* *Redis* *|* *RedisCluster*) -#### `property client: Redis | None` +#### `property client: Redis | RedisCluster | None` The underlying redis-py client object. diff --git a/content/develop/ai/redisvl/api/vectorizer.md b/content/develop/ai/redisvl/api/vectorizer.md index 968ed8897..9133cec9b 100644 --- a/content/develop/ai/redisvl/api/vectorizer.md +++ b/content/develop/ai/redisvl/api/vectorizer.md @@ -1,8 +1,6 @@ --- linkTitle: Vectorizers title: Vectorizers -aliases: -- /integrate/redisvl/api/vectorizer --- @@ -545,7 +543,7 @@ embeddings for text data. This vectorizer is designed to accept a provided callable text vectorizer and provides a class definition to allow for compatibility with RedisVL. The vectorizer may support both synchronous and asynchronous operations which -allows for batch processing of texts, but at a minimum only syncronous embedding +allows for batch processing of texts, but at a minimum only synchronous embedding is required to satisfy the ‘embed()’ method. You can optionally enable caching to improve performance when generating @@ -585,8 +583,8 @@ Initialize the Custom vectorizer. * **Parameters:** * **embed** (*Callable*) – a Callable function that accepts a string object and returns a list of floats. * **embed_many** (*Optional* *[* *Callable* *]*) – a Callable function that accepts a list of string objects and returns a list containing lists of floats. Defaults to None. - * **aembed** (*Optional* *[* *Callable* *]*) – an asyncronous Callable function that accepts a string object and returns a lists of floats. Defaults to None. - * **aembed_many** (*Optional* *[* *Callable* *]*) – an asyncronous Callable function that accepts a list of string objects and returns a list containing lists of floats. Defaults to None. + * **aembed** (*Optional* *[* *Callable* *]*) – an asynchronous Callable function that accepts a string object and returns a lists of floats. Defaults to None. + * **aembed_many** (*Optional* *[* *Callable* *]*) – an asynchronous Callable function that accepts a list of string objects and returns a list containing lists of floats. Defaults to None. * **dtype** (*str*) – the default datatype to use when embedding text as byte arrays. Used when setting as_buffer=True in calls to embed() and embed_many(). Defaults to ‘float32’. diff --git a/content/develop/ai/redisvl/overview/_index.md b/content/develop/ai/redisvl/overview/_index.md index ebfdcbdbd..9b0599ecd 100644 --- a/content/develop/ai/redisvl/overview/_index.md +++ b/content/develop/ai/redisvl/overview/_index.md @@ -3,8 +3,6 @@ linkTitle: Overview title: Overview weight: 3 hideListLinks: true -aliases: -- /integrate/redisvl/overview --- diff --git a/content/develop/ai/redisvl/overview/cli.md b/content/develop/ai/redisvl/overview/cli.md index c0a500512..40cacfd56 100644 --- a/content/develop/ai/redisvl/overview/cli.md +++ b/content/develop/ai/redisvl/overview/cli.md @@ -1,8 +1,6 @@ --- linkTitle: The RedisVL CLI title: The RedisVL CLI -aliases: -- /integrate/redisvl/overview/cli --- @@ -20,7 +18,7 @@ Before running this notebook, be sure to !rvl version ``` - 19:16:18 [RedisVL] INFO RedisVL version 0.5.2 + 12:42:18 [RedisVL] INFO RedisVL version 0.8.0 ## Commands @@ -75,7 +73,7 @@ fields: !rvl index create -s schema.yaml ``` - 19:16:21 [RedisVL] INFO Index created successfully + 12:42:45 [RedisVL] INFO Index created successfully @@ -84,8 +82,8 @@ fields: !rvl index listall ``` - 19:16:24 [RedisVL] INFO Indices: - 19:16:24 [RedisVL] INFO 1. vectorizers + 12:42:47 [RedisVL] INFO Indices: + 12:42:47 [RedisVL] INFO 1. vectorizers @@ -117,7 +115,7 @@ fields: !rvl index delete -i vectorizers ``` - 19:16:29 [RedisVL] INFO Index deleted successfully + 12:42:54 [RedisVL] INFO Index deleted successfully @@ -126,7 +124,7 @@ fields: !rvl index listall ``` - 19:16:32 [RedisVL] INFO Indices: + 12:42:56 [RedisVL] INFO Indices: ## Stats @@ -140,7 +138,7 @@ The ``rvl stats`` command will return some basic information about the index. Th !rvl index create -s schema.yaml ``` - 19:16:35 [RedisVL] INFO Index created successfully + 12:42:59 [RedisVL] INFO Index created successfully @@ -149,8 +147,8 @@ The ``rvl stats`` command will return some basic information about the index. Th !rvl index listall ``` - 19:16:38 [RedisVL] INFO Indices: - 19:16:38 [RedisVL] INFO 1. vectorizers + 12:43:01 [RedisVL] INFO Indices: + 12:43:01 [RedisVL] INFO 1. vectorizers @@ -207,8 +205,8 @@ By default rvl first checks if you have `REDIS_URL` environment variable defined !rvl index listall --host localhost --port 6379 ``` - 19:16:43 [RedisVL] INFO Indices: - 19:16:43 [RedisVL] INFO 1. vectorizers + 12:43:06 [RedisVL] INFO Indices: + 12:43:06 [RedisVL] INFO 1. vectorizers ### Using SSL encription @@ -221,13 +219,10 @@ You can similarly specify the username and password to construct the full Redis !rvl index listall --user jane_doe -a password123 --ssl ``` - 19:16:46 [RedisVL] ERROR Error 8 connecting to rediss:6379. nodename nor servname provided, or not known. - - ```python !rvl index destroy -i vectorizers ``` - 19:16:49 [RedisVL] INFO Index deleted successfully + 12:43:09 [RedisVL] INFO Index deleted successfully diff --git a/content/develop/ai/redisvl/overview/installation.md b/content/develop/ai/redisvl/overview/installation.md index 347274348..4d0e88bf3 100644 --- a/content/develop/ai/redisvl/overview/installation.md +++ b/content/develop/ai/redisvl/overview/installation.md @@ -1,8 +1,6 @@ --- linkTitle: Install RedisVL title: Install RedisVL -aliases: -- /integrate/redisvl/overview/installation --- diff --git a/content/develop/ai/redisvl/user_guide/_index.md b/content/develop/ai/redisvl/user_guide/_index.md index fe7fd536d..eb934d2ba 100644 --- a/content/develop/ai/redisvl/user_guide/_index.md +++ b/content/develop/ai/redisvl/user_guide/_index.md @@ -3,8 +3,6 @@ linkTitle: User guides title: User Guides weight: 4 hideListLinks: true -aliases: -- /integrate/redisvl/user_guide --- @@ -30,13 +28,12 @@ User guides provide helpful resources for using RedisVL and its different compon * [Count Queries](hybrid_queries/#count-queries) * [Range Queries](hybrid_queries/#range-queries) * [Advanced Query Modifiers](hybrid_queries/#advanced-query-modifiers) -* [Semantic Caching for LLMs](llmcache/) - * [Initializing `SemanticCache`](llmcache/#initializing-semanticcache) - * [Basic Cache Usage](llmcache/#basic-cache-usage) - * [Customize the Distance Threshhold](llmcache/#customize-the-distance-threshhold) - * [Utilize TTL](llmcache/#utilize-ttl) - * [Simple Performance Testing](llmcache/#simple-performance-testing) - * [Cache Access Controls, Tags & Filters](llmcache/#cache-access-controls-tags-filters) +* [Initializing `SemanticCache`](llmcache/) +* [Basic Cache Usage](llmcache/#basic-cache-usage) +* [Customize the Distance Threshold](llmcache/#customize-the-distance-threshold) +* [Utilize TTL](llmcache/#utilize-ttl) +* [Simple Performance Testing](llmcache/#simple-performance-testing) +* [Cache Access Controls, Tags & Filters](llmcache/#cache-access-controls-tags-filters) * [Caching Embeddings](embeddings_cache/) * [Setup](embeddings_cache/#setup) * [Initializing the EmbeddingsCache](embeddings_cache/#initializing-the-embeddingscache) @@ -71,27 +68,7 @@ User guides provide helpful resources for using RedisVL and its different compon * [Simple routing](semantic_router/#simple-routing) * [Update the routing config](semantic_router/#update-the-routing-config) * [Router serialization](semantic_router/#router-serialization) -* [Add route references](semantic_router/#add-route-references) -* [Get route references](semantic_router/#get-route-references) -* [Delete route references](semantic_router/#delete-route-references) + * [Add route references](semantic_router/#add-route-references) + * [Get route references](semantic_router/#get-route-references) + * [Delete route references](semantic_router/#delete-route-references) * [Clean up the router](semantic_router/#clean-up-the-router) -* [Threshold Optimization](threshold_optimization/) -* [CacheThresholdOptimizer](threshold_optimization/#cachethresholdoptimizer) - * [Define test_data and optimize](threshold_optimization/#define-test-data-and-optimize) -* [RouterThresholdOptimizer](threshold_optimization/#routerthresholdoptimizer) - * [Define the routes](threshold_optimization/#define-the-routes) - * [Initialize the SemanticRouter](threshold_optimization/#initialize-the-semanticrouter) - * [Provide test_data](threshold_optimization/#provide-test-data) - * [Optimize](threshold_optimization/#optimize) - * [Test it out](threshold_optimization/#test-it-out) - * [Cleanup](threshold_optimization/#cleanup) -* [Release Guides](release_guide/) - * [0.5.1 Feature Overview](release_guide/0_5_0_release/) - * [HybridQuery class](release_guide/0_5_0_release/#hybridquery-class) - * [TextQueries](release_guide/0_5_0_release/#textqueries) - * [Threshold optimization](release_guide/0_5_0_release/#threshold-optimization) - * [Schema validation](release_guide/0_5_0_release/#schema-validation) - * [Timestamp filters](release_guide/0_5_0_release/#timestamp-filters) - * [Batch search](release_guide/0_5_0_release/#batch-search) - * [Vector normalization](release_guide/0_5_0_release/#vector-normalization) - * [Hybrid policy on knn with filters](release_guide/0_5_0_release/#hybrid-policy-on-knn-with-filters) diff --git a/content/develop/ai/redisvl/user_guide/embeddings_cache.md b/content/develop/ai/redisvl/user_guide/embeddings_cache.md index 2271e63e0..505b5fbf0 100644 --- a/content/develop/ai/redisvl/user_guide/embeddings_cache.md +++ b/content/develop/ai/redisvl/user_guide/embeddings_cache.md @@ -2,8 +2,6 @@ linkTitle: Caching embeddings title: Caching Embeddings weight: 10 -aliases: -- /integrate/redisvl/user_guide/embeddings_cache --- @@ -44,9 +42,19 @@ vectorizer = HFTextVectorizer( ) ``` - /Users/tyler.hutcherson/Library/Caches/pypoetry/virtualenvs/redisvl-VnTEShF2-py3.13/lib/python3.13/site-packages/tqdm/auto.py:21: TqdmWarning: IProgress not found. Please update jupyter and ipywidgets. See https://ipywidgets.readthedocs.io/en/stable/user_install.html + /Users/tyler.hutcherson/Documents/AppliedAI/redis-vl-python/.venv/lib/python3.13/site-packages/tqdm/auto.py:21: TqdmWarning: IProgress not found. Please update jupyter and ipywidgets. See https://ipywidgets.readthedocs.io/en/stable/user_install.html from .autonotebook import tqdm as notebook_tqdm - Compiling the model with `torch.compile` and using a `torch.mps` device is not supported. Falling back to non-compiled mode. + + + 13:06:09 sentence_transformers.SentenceTransformer INFO Use pytorch device_name: mps + 13:06:09 sentence_transformers.SentenceTransformer INFO Load pretrained SentenceTransformer: redis/langcache-embed-v1 + 13:06:09 sentence_transformers.SentenceTransformer WARNING You try to use a model that was created with version 4.1.0, however, your version is 3.4.1. This might cause unexpected behavior or errors. In that case, try to update to the latest version. + + + + + + Batches: 100%|██████████| 1/1 [00:00<00:00, 4.09it/s] ## Initializing the EmbeddingsCache @@ -97,9 +105,14 @@ key = cache.set( print(f"Stored with key: {key[:15]}...") ``` + Batches: 100%|██████████| 1/1 [00:00<00:00, 3.18it/s] + Stored with key: embedcache:909f... + + + ### Retrieving Embeddings To retrieve an embedding from the cache, use the `get` method with the original text and model name: @@ -251,11 +264,18 @@ cache.mdrop(texts, model_name) # cache.mdrop_by_keys(keys) # Delete by keys ``` + Batches: 100%|██████████| 1/1 [00:00<00:00, 21.37it/s] + Batches: 100%|██████████| 1/1 [00:00<00:00, 9.04it/s] + Batches: 100%|██████████| 1/1 [00:00<00:00, 20.84it/s] + Stored 3 embeddings with batch operation All embeddings exist: True Retrieved 3 embeddings in one operation + + + Batch operations are particularly beneficial when working with large numbers of embeddings. They provide the same functionality as individual operations but with better performance by reducing network roundtrips. For asynchronous applications, async versions of all batch methods are also available with the `am` prefix (e.g., `amset`, `amget`, `amexists`, `amdrop`). @@ -427,6 +447,19 @@ for query in set(queries): # Use set to get unique queries example_cache.drop(text=query, model_name=model_name) ``` + 13:06:20 sentence_transformers.SentenceTransformer INFO Use pytorch device_name: mps + 13:06:20 sentence_transformers.SentenceTransformer INFO Load pretrained SentenceTransformer: redis/langcache-embed-v1 + 13:06:20 sentence_transformers.SentenceTransformer WARNING You try to use a model that was created with version 4.1.0, however, your version is 3.4.1. This might cause unexpected behavior or errors. In that case, try to update to the latest version. + + + + + + Batches: 100%|██████████| 1/1 [00:00<00:00, 21.84it/s] + Batches: 100%|██████████| 1/1 [00:00<00:00, 22.04it/s] + Batches: 100%|██████████| 1/1 [00:00<00:00, 22.62it/s] + Batches: 100%|██████████| 1/1 [00:00<00:00, 22.71it/s] + Statistics: Total queries: 5 @@ -435,6 +468,9 @@ for query in set(queries): # Use set to get unique queries Cache hit rate: 40.0% + + + ## Performance Benchmark Let's run benchmarks to compare the performance of embedding with and without caching, as well as batch versus individual operations. @@ -483,17 +519,36 @@ print(f"Latency reduction: {latency_reduction:.4f} seconds per query") ``` Benchmarking without caching: - Time taken without caching: 0.4735 seconds - Average time per embedding: 0.0474 seconds + + + Batches: 100%|██████████| 1/1 [00:00<00:00, 21.51it/s] + Batches: 100%|██████████| 1/1 [00:00<00:00, 23.21it/s] + Batches: 100%|██████████| 1/1 [00:00<00:00, 23.96it/s] + Batches: 100%|██████████| 1/1 [00:00<00:00, 23.28it/s] + Batches: 100%|██████████| 1/1 [00:00<00:00, 22.69it/s] + Batches: 100%|██████████| 1/1 [00:00<00:00, 22.98it/s] + Batches: 100%|██████████| 1/1 [00:00<00:00, 23.17it/s] + Batches: 100%|██████████| 1/1 [00:00<00:00, 24.12it/s] + Batches: 100%|██████████| 1/1 [00:00<00:00, 23.37it/s] + Batches: 100%|██████████| 1/1 [00:00<00:00, 23.24it/s] + + + Time taken without caching: 0.4549 seconds + Average time per embedding: 0.0455 seconds Benchmarking with caching: - Time taken with caching: 0.0663 seconds + + + Batches: 100%|██████████| 1/1 [00:00<00:00, 23.69it/s] + + + Time taken with caching: 0.0664 seconds Average time per embedding: 0.0066 seconds Performance comparison: - Speedup with caching: 7.14x faster - Time saved: 0.4073 seconds (86.0%) - Latency reduction: 0.0407 seconds per query + Speedup with caching: 6.86x faster + Time saved: 0.3885 seconds (85.4%) + Latency reduction: 0.0389 seconds per query ## Common Use Cases for Embedding Caching diff --git a/content/develop/ai/redisvl/user_guide/getting_started.md b/content/develop/ai/redisvl/user_guide/getting_started.md index ef82f4b78..0e6f6277b 100644 --- a/content/develop/ai/redisvl/user_guide/getting_started.md +++ b/content/develop/ai/redisvl/user_guide/getting_started.md @@ -2,8 +2,6 @@ linkTitle: Getting started with RedisVL title: Getting Started with RedisVL weight: 01 -aliases: -- /integrate/redisvl/user_guide/getting_started --- `redisvl` is a versatile Python library with an integrated CLI, designed to enhance AI applications using Redis. This guide will walk you through the following steps: @@ -172,6 +170,9 @@ Now that we are connected to Redis, we need to run the create command. index.create(overwrite=True) ``` + 13:00:22 redisvl.index.index INFO Index already exists, overwriting. + + Note that at this point, the index has no entries. Data loading follows. ## Inspect with the `rvl` CLI @@ -182,8 +183,8 @@ Use the `rvl` CLI to inspect the created index and its fields: !rvl index listall ``` - 19:17:09 [RedisVL] INFO Indices: - 19:17:09 [RedisVL] INFO 1. user_simple + 13:00:24 [RedisVL] INFO Indices: + 13:00:24 [RedisVL] INFO 1. user_simple @@ -225,164 +226,30 @@ keys = index.load(data) print(keys) ``` - ['user_simple_docs:01JT4PPPNJZMSK2395RKD208T9', 'user_simple_docs:01JT4PPPNM63J55ZESZ4TV1VR8', 'user_simple_docs:01JT4PPPNM59RCKS2YQ58B1HQW'] + ['user_simple_docs:01JY4J4Y08GFY10VMB9D4YDMZQ', 'user_simple_docs:01JY4J4Y0AY2MKJ24QXQS2Q2YS', 'user_simple_docs:01JY4J4Y0A9GFF2XG1R81EFD4Z'] By default, `load` will create a unique Redis key as a combination of the index key `prefix` and a random ULID. You can also customize the key by providing direct keys or pointing to a specified `id_field` on load. -### Load invalid data +### Load INVALID data This will raise a `SchemaValidationError` if `validate_on_load` is set to true in the `SearchIndex` class. ```python # NBVAL_SKIP -keys = index.load([{"user_embedding": True}]) +try: + keys = index.load([{"user_embedding": True}]) +except Exception as e: + print(str(e)) ``` - 19:17:21 redisvl.index.index ERROR Schema validation error while loading data - Traceback (most recent call last): - File "/Users/justin.cechmanek/Documents/redisvl/redisvl/index/storage.py", line 204, in _preprocess_and_validate_objects - processed_obj = self._validate(processed_obj) - File "/Users/justin.cechmanek/Documents/redisvl/redisvl/index/storage.py", line 160, in _validate - return validate_object(self.index_schema, obj) - File "/Users/justin.cechmanek/Documents/redisvl/redisvl/schema/validation.py", line 276, in validate_object - validated = model_class.model_validate(flat_obj) - File "/Users/justin.cechmanek/.pyenv/versions/3.13/envs/redisvl-dev/lib/python3.13/site-packages/pydantic/main.py", line 627, in model_validate - return cls.__pydantic_validator__.validate_python( - ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~^ - obj, strict=strict, from_attributes=from_attributes, context=context - ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ - ) - ^ - pydantic_core._pydantic_core.ValidationError: 1 validation error for user_simple__PydanticModel - user_embedding - Input should be a valid bytes [type=bytes_type, input_value=True, input_type=bool] - For further information visit https://errors.pydantic.dev/2.10/v/bytes_type - - The above exception was the direct cause of the following exception: - - Traceback (most recent call last): - File "/Users/justin.cechmanek/Documents/redisvl/redisvl/index/index.py", line 686, in load - return self._storage.write( - ~~~~~~~~~~~~~~~~~~~^ - self._redis_client, # type: ignore - ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ - ...<6 lines>... - validate=self._validate_on_load, - ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ - ) - ^ - File "/Users/justin.cechmanek/Documents/redisvl/redisvl/index/storage.py", line 265, in write - prepared_objects = self._preprocess_and_validate_objects( - list(objects), # Convert Iterable to List - ...<3 lines>... - validate=validate, - ) - File "/Users/justin.cechmanek/Documents/redisvl/redisvl/index/storage.py", line 211, in _preprocess_and_validate_objects - raise SchemaValidationError(str(e), index=i) from e - redisvl.exceptions.SchemaValidationError: Validation failed for object at index 0: 1 validation error for user_simple__PydanticModel - user_embedding - Input should be a valid bytes [type=bytes_type, input_value=True, input_type=bool] - For further information visit https://errors.pydantic.dev/2.10/v/bytes_type - - - - --------------------------------------------------------------------------- - - ValidationError Traceback (most recent call last) - - File ~/Documents/redisvl/redisvl/index/storage.py:204, in BaseStorage._preprocess_and_validate_objects(self, objects, id_field, keys, preprocess, validate) - 203 if validate: - --> 204 processed_obj = self._validate(processed_obj) - 206 # Store valid object with its key for writing - - - File ~/Documents/redisvl/redisvl/index/storage.py:160, in BaseStorage._validate(self, obj) - 159 # Pass directly to validation function and let any errors propagate - --> 160 return validate_object(self.index_schema, obj) - - - File ~/Documents/redisvl/redisvl/schema/validation.py:276, in validate_object(schema, obj) - 275 # Validate against model - --> 276 validated = model_class.model_validate(flat_obj) - 277 return validated.model_dump(exclude_none=True) - - - File ~/.pyenv/versions/3.13/envs/redisvl-dev/lib/python3.13/site-packages/pydantic/main.py:627, in BaseModel.model_validate(cls, obj, strict, from_attributes, context) - 626 __tracebackhide__ = True - --> 627 return cls.__pydantic_validator__.validate_python( - 628 obj, strict=strict, from_attributes=from_attributes, context=context - 629 ) - - - ValidationError: 1 validation error for user_simple__PydanticModel - user_embedding - Input should be a valid bytes [type=bytes_type, input_value=True, input_type=bool] - For further information visit https://errors.pydantic.dev/2.10/v/bytes_type - - - The above exception was the direct cause of the following exception: - - - SchemaValidationError Traceback (most recent call last) - - Cell In[31], line 3 - 1 # NBVAL_SKIP - ----> 3 keys = index.load([{"user_embedding": True}]) - - - File ~/Documents/redisvl/redisvl/index/index.py:686, in SearchIndex.load(self, data, id_field, keys, ttl, preprocess, batch_size) - 656 """Load objects to the Redis database. Returns the list of keys loaded - 657 to Redis. - 658 - (...) - 683 RedisVLError: If there's an error loading data to Redis. - 684 """ - 685 try: - --> 686 return self._storage.write( - 687 self._redis_client, # type: ignore - 688 objects=data, - 689 id_field=id_field, - 690 keys=keys, - 691 ttl=ttl, - 692 preprocess=preprocess, - 693 batch_size=batch_size, - 694 validate=self._validate_on_load, - 695 ) - 696 except SchemaValidationError: - 697 # Pass through validation errors directly - 698 logger.exception("Schema validation error while loading data") - - - File ~/Documents/redisvl/redisvl/index/storage.py:265, in BaseStorage.write(self, redis_client, objects, id_field, keys, ttl, preprocess, batch_size, validate) - 262 return [] - 264 # Pass 1: Preprocess and validate all objects - --> 265 prepared_objects = self._preprocess_and_validate_objects( - 266 list(objects), # Convert Iterable to List - 267 id_field=id_field, - 268 keys=keys, - 269 preprocess=preprocess, - 270 validate=validate, - 271 ) - 273 # Pass 2: Write all valid objects in batches - 274 added_keys = [] - - - File ~/Documents/redisvl/redisvl/index/storage.py:211, in BaseStorage._preprocess_and_validate_objects(self, objects, id_field, keys, preprocess, validate) - 207 prepared_objects.append((key, processed_obj)) - 209 except ValidationError as e: - 210 # Convert Pydantic ValidationError to SchemaValidationError with index context - --> 211 raise SchemaValidationError(str(e), index=i) from e - 212 except Exception as e: - 213 # Capture other exceptions with context - 214 object_id = f"at index {i}" - - - SchemaValidationError: Validation failed for object at index 0: 1 validation error for user_simple__PydanticModel - user_embedding - Input should be a valid bytes [type=bytes_type, input_value=True, input_type=bool] - For further information visit https://errors.pydantic.dev/2.10/v/bytes_type + 13:00:27 redisvl.index.index ERROR Data validation failed during load operation + Schema validation failed for object at index 0. Field 'user_embedding' expects bytes (vector data), but got boolean value 'True'. If this should be a vector field, provide a list of numbers or bytes. If this should be a different field type, check your schema definition. + Object data: { + "user_embedding": true + } + Hint: Check that your data types match the schema field definitions. Use index.schema.fields to view expected field types. ### Upsert the index with new data @@ -403,7 +270,7 @@ keys = index.load(new_data) print(keys) ``` - ['user_simple_docs:01JT4PPX63CH5YRN2BGEYB5TS2'] + ['user_simple_docs:01JY4J4Y0N4CNR9Y6R67MMVG7Q'] ## Creating `VectorQuery` Objects @@ -433,7 +300,7 @@ result_print(results) ``` -
vector_distanceuseragejobcredit_score
0john1engineerhigh
0mary2doctorlow
0.0566299557686tyler9engineerhigh
+
vector_distanceuseragejobcredit_score
0john1engineerhigh
0mary2doctorlow
0john1engineerhigh
## Using an Asynchronous Redis Client @@ -480,7 +347,7 @@ result_print(results) ``` -
vector_distanceuseragejobcredit_score
0john1engineerhigh
0mary2doctorlow
0.0566299557686tyler9engineerhigh
+
vector_distanceuseragejobcredit_score
0john1engineerhigh
0mary2doctorlow
0john1engineerhigh
## Updating a schema @@ -517,7 +384,7 @@ index.schema.add_fields([ await index.create(overwrite=True, drop=False) ``` - 19:17:29 redisvl.index.index INFO Index already exists, overwriting. + 13:00:27 redisvl.index.index INFO Index already exists, overwriting. @@ -528,7 +395,7 @@ result_print(results) ``` -
vector_distanceuseragejobcredit_score
0john1engineerhigh
0mary2doctorlow
0.0566299557686tyler9engineerhigh
+
vector_distanceuseragejobcredit_score
0mary2doctorlow
0john1engineerhigh
0john1engineerhigh
## Check Index Stats @@ -544,25 +411,25 @@ Use the `rvl` CLI to check the stats for the index: ╭─────────────────────────────┬────────────╮ │ Stat Key │ Value │ ├─────────────────────────────┼────────────┤ - │ num_docs │ 4 │ + │ num_docs │ 10 │ │ num_terms │ 0 │ - │ max_doc_id │ 4 │ - │ num_records │ 20 │ + │ max_doc_id │ 10 │ + │ num_records │ 50 │ │ percent_indexed │ 1 │ │ hash_indexing_failures │ 0 │ │ number_of_uses │ 2 │ - │ bytes_per_record_avg │ 48.2000007 │ - │ doc_table_size_mb │ 4.23431396 │ - │ inverted_sz_mb │ 9.19342041 │ - │ key_table_size_mb │ 1.93595886 │ + │ bytes_per_record_avg │ 19.5200004 │ + │ doc_table_size_mb │ 0.00105857 │ + │ inverted_sz_mb │ 9.30786132 │ + │ key_table_size_mb │ 4.70161437 │ │ offset_bits_per_record_avg │ nan │ │ offset_vectors_sz_mb │ 0 │ │ offsets_per_term_avg │ 0 │ │ records_per_doc_avg │ 5 │ │ sortable_values_size_mb │ 0 │ - │ total_indexing_time │ 0.74400001 │ + │ total_indexing_time │ 0.16899999 │ │ total_inverted_index_blocks │ 11 │ - │ vector_index_sz_mb │ 0.23560333 │ + │ vector_index_sz_mb │ 0.23619842 │ ╰─────────────────────────────┴────────────╯ @@ -583,7 +450,7 @@ await index.clear() - 4 + 10 diff --git a/content/develop/ai/redisvl/user_guide/hash_vs_json.md b/content/develop/ai/redisvl/user_guide/hash_vs_json.md index 879c85857..8314c4efa 100644 --- a/content/develop/ai/redisvl/user_guide/hash_vs_json.md +++ b/content/develop/ai/redisvl/user_guide/hash_vs_json.md @@ -2,8 +2,6 @@ linkTitle: Hash vs JSON storage title: Hash vs JSON Storage weight: 05 -aliases: -- /integrate/redisvl/user_guide/hash_vs_json --- @@ -45,7 +43,7 @@ table_print(data) ``` -
useragejobcredit_scoreoffice_locationuser_embedding
john18engineerhigh-122.4194,37.7749b'\xcd\xcc\xcc=\xcd\xcc\xcc=\x00\x00\x00?'
derrick14doctorlow-122.4194,37.7749b'\xcd\xcc\xcc=\xcd\xcc\xcc=\x00\x00\x00?'
nancy94doctorhigh-122.4194,37.7749b'333?\xcd\xcc\xcc=\x00\x00\x00?'
tyler100engineerhigh-122.0839,37.3861b'\xcd\xcc\xcc=\xcd\xcc\xcc>\x00\x00\x00?'
tim12dermatologisthigh-122.0839,37.3861b'\xcd\xcc\xcc>\xcd\xcc\xcc>\x00\x00\x00?'
taimur15CEOlow-122.0839,37.3861b'\x9a\x99\x19?\xcd\xcc\xcc=\x00\x00\x00?'
joe35dentistmedium-122.0839,37.3861b'fff?fff?\xcd\xcc\xcc='
+
useragejobcredit_scoreoffice_locationuser_embeddinglast_updated
john18engineerhigh-122.4194,37.7749b'\xcd\xcc\xcc=\xcd\xcc\xcc=\x00\x00\x00?'1741627789
derrick14doctorlow-122.4194,37.7749b'\xcd\xcc\xcc=\xcd\xcc\xcc=\x00\x00\x00?'1741627789
nancy94doctorhigh-122.4194,37.7749b'333?\xcd\xcc\xcc=\x00\x00\x00?'1710696589
tyler100engineerhigh-122.0839,37.3861b'\xcd\xcc\xcc=\xcd\xcc\xcc>\x00\x00\x00?'1742232589
tim12dermatologisthigh-122.0839,37.3861b'\xcd\xcc\xcc>\xcd\xcc\xcc>\x00\x00\x00?'1739644189
taimur15CEOlow-122.0839,37.3861b'\x9a\x99\x19?\xcd\xcc\xcc=\x00\x00\x00?'1742232589
joe35dentistmedium-122.0839,37.3861b'fff?fff?\xcd\xcc\xcc='1742232589
## Hash or JSON -- how to choose? @@ -139,7 +137,8 @@ data[0] 'job': 'engineer', 'credit_score': 'high', 'office_location': '-122.4194,37.7749', - 'user_embedding': b'\xcd\xcc\xcc=\xcd\xcc\xcc=\x00\x00\x00?'} + 'user_embedding': b'\xcd\xcc\xcc=\xcd\xcc\xcc=\x00\x00\x00?', + 'last_updated': 1741627789} @@ -156,29 +155,29 @@ keys = hindex.load(data) Statistics: - ╭─────────────────────────────┬─────────────╮ - │ Stat Key │ Value │ - ├─────────────────────────────┼─────────────┤ - │ num_docs │ 7 │ - │ num_terms │ 6 │ - │ max_doc_id │ 7 │ - │ num_records │ 44 │ - │ percent_indexed │ 1 │ - │ hash_indexing_failures │ 0 │ - │ number_of_uses │ 1 │ - │ bytes_per_record_avg │ 3.40909 │ - │ doc_table_size_mb │ 0.000767708 │ - │ inverted_sz_mb │ 0.000143051 │ - │ key_table_size_mb │ 0.000248909 │ - │ offset_bits_per_record_avg │ 8 │ - │ offset_vectors_sz_mb │ 8.58307e-06 │ - │ offsets_per_term_avg │ 0.204545 │ - │ records_per_doc_avg │ 6.28571 │ - │ sortable_values_size_mb │ 0 │ - │ total_indexing_time │ 1.053 │ - │ total_inverted_index_blocks │ 18 │ - │ vector_index_sz_mb │ 0.0202332 │ - ╰─────────────────────────────┴─────────────╯ + ╭─────────────────────────────┬────────────╮ + │ Stat Key │ Value │ + ├─────────────────────────────┼────────────┤ + │ num_docs │ 7 │ + │ num_terms │ 6 │ + │ max_doc_id │ 7 │ + │ num_records │ 44 │ + │ percent_indexed │ 1 │ + │ hash_indexing_failures │ 0 │ + │ number_of_uses │ 1 │ + │ bytes_per_record_avg │ 40.2954559 │ + │ doc_table_size_mb │ 7.27653503 │ + │ inverted_sz_mb │ 0.00169086 │ + │ key_table_size_mb │ 2.48908996 │ + │ offset_bits_per_record_avg │ 8 │ + │ offset_vectors_sz_mb │ 8.58306884 │ + │ offsets_per_term_avg │ 0.20454545 │ + │ records_per_doc_avg │ 6.28571414 │ + │ sortable_values_size_mb │ 0 │ + │ total_indexing_time │ 0.25799998 │ + │ total_inverted_index_blocks │ 18 │ + │ vector_index_sz_mb │ 0.02023315 │ + ╰─────────────────────────────┴────────────╯ #### Performing Queries @@ -189,7 +188,7 @@ Once our index is created and data is loaded into the right format, we can run q from redisvl.query import VectorQuery from redisvl.query.filter import Tag, Text, Num -t = (Tag("credit_score") == "high") & (Text("job") % "enginee*") & (Num("age") > 17) +t = (Tag("credit_score") == "high") & (Text("job") % "enginee*") & (Num("age") > 17) # codespell:ignore enginee v = VectorQuery( vector=[0.1, 0.1, 0.5], @@ -267,8 +266,8 @@ jindex.create(overwrite=True) !rvl index listall ``` - 11:54:18 [RedisVL] INFO Indices: - 11:54:18 [RedisVL] INFO 1. user-json + 13:02:56 [RedisVL] INFO Indices: + 13:02:56 [RedisVL] INFO 1. user-json #### Vectors as float arrays @@ -296,7 +295,8 @@ json_data[0] 'job': 'engineer', 'credit_score': 'high', 'office_location': '-122.4194,37.7749', - 'user_embedding': [0.10000000149011612, 0.10000000149011612, 0.5]} + 'user_embedding': [0.10000000149011612, 0.10000000149011612, 0.5], + 'last_updated': 1741627789} @@ -413,8 +413,17 @@ bike_schema = { } ``` - /Users/robert.shelton/.pyenv/versions/3.11.9/lib/python3.11/site-packages/huggingface_hub/file_download.py:1142: FutureWarning: `resume_download` is deprecated and will be removed in version 1.0.0. Downloads always resume when possible. If you want to force a new download, use `force_download=True`. - warnings.warn( + /Users/tyler.hutcherson/Documents/AppliedAI/redis-vl-python/.venv/lib/python3.13/site-packages/tqdm/auto.py:21: TqdmWarning: IProgress not found. Please update jupyter and ipywidgets. See https://ipywidgets.readthedocs.io/en/stable/user_install.html + from .autonotebook import tqdm as notebook_tqdm + + + 13:02:58 sentence_transformers.SentenceTransformer INFO Use pytorch device_name: mps + 13:02:58 sentence_transformers.SentenceTransformer INFO Load pretrained SentenceTransformer: sentence-transformers/all-mpnet-base-v2 + + + Batches: 100%|██████████| 1/1 [00:00<00:00, 7.23it/s] + Batches: 100%|██████████| 1/1 [00:00<00:00, 12.93it/s] + Batches: 100%|██████████| 1/1 [00:00<00:00, 14.10it/s] @@ -434,8 +443,8 @@ bike_index.load(bike_data) - ['bike-json:de92cb9955434575b20f4e87a30b03d5', - 'bike-json:054ab3718b984532b924946fa5ce00c6'] + ['bike-json:01JY4J9M48CXF7F4Y6HRGEMT9B', + 'bike-json:01JY4J9M48RRY6F80HR82CVZ5G'] @@ -459,6 +468,9 @@ v = VectorQuery( results = bike_index.query(v) ``` + Batches: 100%|██████████| 1/1 [00:00<00:00, 11.72it/s] + + **Note:** As shown in the example if you want to retrieve a field from json object that was not indexed you will also need to supply the full path as with `$.metadata.type`. @@ -469,12 +481,12 @@ results - [{'id': 'bike-json:054ab3718b984532b924946fa5ce00c6', - 'vector_distance': '0.519989073277', + [{'id': 'bike-json:01JY4J9M48RRY6F80HR82CVZ5G', + 'vector_distance': '0.519989132881', 'brand': 'Trek', '$.metadata.type': 'Enduro bikes'}, - {'id': 'bike-json:de92cb9955434575b20f4e87a30b03d5', - 'vector_distance': '0.657624483109', + {'id': 'bike-json:01JY4J9M48CXF7F4Y6HRGEMT9B', + 'vector_distance': '0.657624304295', 'brand': 'Specialized', '$.metadata.type': 'Enduro bikes'}] diff --git a/content/develop/ai/redisvl/user_guide/hybrid_queries.md b/content/develop/ai/redisvl/user_guide/hybrid_queries.md index 1f45e5b34..eae5ba3c4 100644 --- a/content/develop/ai/redisvl/user_guide/hybrid_queries.md +++ b/content/develop/ai/redisvl/user_guide/hybrid_queries.md @@ -2,8 +2,6 @@ linkTitle: Querying with RedisVL title: Querying with RedisVL weight: 02 -aliases: -- /integrate/redisvl/user_guide/hybrid_queries --- @@ -68,15 +66,16 @@ index = SearchIndex.from_dict(schema, redis_url="redis://localhost:6379") index.create(overwrite=True) ``` - 11:40:25 redisvl.index.index INFO Index already exists, overwriting. - - ```python # use the CLI to see the created index !rvl index listall ``` + 13:00:56 [RedisVL] INFO Indices: + 13:00:56 [RedisVL] INFO 1. user_queries + + ```python # load data to redis @@ -148,7 +147,7 @@ result_print(index.query(v)) ``` -
vector_distanceusercredit_scoreagejoboffice_location
0johnhigh18engineer-122.4194,37.7749
0johnhigh18engineer-122.4194,37.7749
0.109129190445tylerhigh100engineer-122.0839,37.3861
0.109129190445tylerhigh100engineer-122.0839,37.3861
0.158808946609timhigh12dermatologist-122.0839,37.3861
0.158808946609timhigh12dermatologist-122.0839,37.3861
0.266666650772nancyhigh94doctor-122.4194,37.7749
0.266666650772nancyhigh94doctor-122.4194,37.7749
0.653301358223joemedium35dentist-122.0839,37.3861
0.653301358223joemedium35dentist-122.0839,37.3861
+
vector_distanceusercredit_scoreagejoboffice_locationlast_updated
0johnhigh18engineer-122.4194,37.77491741627789
0.109129190445tylerhigh100engineer-122.0839,37.38611742232589
0.158808946609timhigh12dermatologist-122.0839,37.38611739644189
0.266666650772nancyhigh94doctor-122.4194,37.77491710696589
0.653301358223joemedium35dentist-122.0839,37.38611742232589
@@ -161,7 +160,7 @@ result_print(index.query(v)) ``` -
vector_distanceusercredit_scoreagejoboffice_location
0johnhigh18engineer-122.4194,37.7749
0johnhigh18engineer-122.4194,37.7749
0.109129190445tylerhigh100engineer-122.0839,37.3861
0.109129190445tylerhigh100engineer-122.0839,37.3861
0.158808946609timhigh12dermatologist-122.0839,37.3861
0.158808946609timhigh12dermatologist-122.0839,37.3861
0.266666650772nancyhigh94doctor-122.4194,37.7749
0.266666650772nancyhigh94doctor-122.4194,37.7749
0.653301358223joemedium35dentist-122.0839,37.3861
0.653301358223joemedium35dentist-122.0839,37.3861
+
vector_distanceusercredit_scoreagejoboffice_locationlast_updated
0johnhigh18engineer-122.4194,37.77491741627789
0.109129190445tylerhigh100engineer-122.0839,37.38611742232589
0.158808946609timhigh12dermatologist-122.0839,37.38611739644189
0.266666650772nancyhigh94doctor-122.4194,37.77491710696589
0.653301358223joemedium35dentist-122.0839,37.38611742232589
What about scenarios where you might want to dynamically generate a list of tags? Have no fear. RedisVL allows you to do this gracefully without having to check for the **empty case**. The **empty case** is when you attempt to run a Tag filter on a field with no defined values to match: @@ -180,7 +179,7 @@ result_print(index.query(v)) ``` -
vector_distanceusercredit_scoreagejoboffice_location
0johnhigh18engineer-122.4194,37.7749
0derricklow14doctor-122.4194,37.7749
0johnhigh18engineer-122.4194,37.7749
0derricklow14doctor-122.4194,37.7749
0.109129190445tylerhigh100engineer-122.0839,37.3861
0.109129190445tylerhigh100engineer-122.0839,37.3861
0.158808946609timhigh12dermatologist-122.0839,37.3861
0.158808946609timhigh12dermatologist-122.0839,37.3861
0.217882037163taimurlow15CEO-122.0839,37.3861
0.217882037163taimurlow15CEO-122.0839,37.3861
+
vector_distanceusercredit_scoreagejoboffice_locationlast_updated
0johnhigh18engineer-122.4194,37.77491741627789
0derricklow14doctor-122.4194,37.77491741627789
0.109129190445tylerhigh100engineer-122.0839,37.38611742232589
0.158808946609timhigh12dermatologist-122.0839,37.38611739644189
0.217882037163taimurlow15CEO-122.0839,37.38611742232589
0.266666650772nancyhigh94doctor-122.4194,37.77491710696589
0.653301358223joemedium35dentist-122.0839,37.38611742232589
### Numeric Filters @@ -211,7 +210,7 @@ result_print(index.query(v)) ``` -
vector_distanceusercredit_scoreagejoboffice_location
0derricklow14doctor-122.4194,37.7749
0derricklow14doctor-122.4194,37.7749
+
vector_distanceusercredit_scoreagejoboffice_locationlast_updated
0derricklow14doctor-122.4194,37.77491741627789
@@ -224,7 +223,7 @@ result_print(index.query(v)) ``` -
vector_distanceusercredit_scoreagejoboffice_location
0johnhigh18engineer-122.4194,37.7749
0johnhigh18engineer-122.4194,37.7749
0.109129190445tylerhigh100engineer-122.0839,37.3861
0.109129190445tylerhigh100engineer-122.0839,37.3861
0.158808946609timhigh12dermatologist-122.0839,37.3861
0.158808946609timhigh12dermatologist-122.0839,37.3861
0.217882037163taimurlow15CEO-122.0839,37.3861
0.217882037163taimurlow15CEO-122.0839,37.3861
0.266666650772nancyhigh94doctor-122.4194,37.7749
0.266666650772nancyhigh94doctor-122.4194,37.7749
+
vector_distanceusercredit_scoreagejoboffice_locationlast_updated
0johnhigh18engineer-122.4194,37.77491741627789
0.109129190445tylerhigh100engineer-122.0839,37.38611742232589
0.158808946609timhigh12dermatologist-122.0839,37.38611739644189
0.217882037163taimurlow15CEO-122.0839,37.38611742232589
0.266666650772nancyhigh94doctor-122.4194,37.77491710696589
0.653301358223joemedium35dentist-122.0839,37.38611742232589
### Timestamp Filters @@ -271,7 +270,7 @@ result_print(index.query(v)) -
vector_distanceusercredit_scoreagejoboffice_locationlast_updated
0derricklow14doctor-122.4194,37.77491741627789
0johnhigh18engineer-122.4194,37.77491741627789
0.158808946609timhigh12dermatologist-122.0839,37.38611739644189
0.266666650772nancyhigh94doctor-122.4194,37.77491710696589
+
vector_distanceusercredit_scoreagejoboffice_locationlast_updated
0johnhigh18engineer-122.4194,37.77491741627789
0derricklow14doctor-122.4194,37.77491741627789
0.158808946609timhigh12dermatologist-122.0839,37.38611739644189
0.266666650772nancyhigh94doctor-122.4194,37.77491710696589
@@ -294,7 +293,7 @@ result_print(index.query(v)) -
vector_distanceusercredit_scoreagejoboffice_locationlast_updated
0derricklow14doctor-122.4194,37.77491741627789
0johnhigh18engineer-122.4194,37.77491741627789
0.158808946609timhigh12dermatologist-122.0839,37.38611739644189
+
vector_distanceusercredit_scoreagejoboffice_locationlast_updated
0johnhigh18engineer-122.4194,37.77491741627789
0derricklow14doctor-122.4194,37.77491741627789
0.158808946609timhigh12dermatologist-122.0839,37.38611739644189
### Text Filters @@ -326,7 +325,7 @@ result_print(index.query(v)) ``` -
vector_distanceusercredit_scoreagejoboffice_location
0johnhigh18engineer-122.4194,37.7749
0johnhigh18engineer-122.4194,37.7749
0.109129190445tylerhigh100engineer-122.0839,37.3861
0.109129190445tylerhigh100engineer-122.0839,37.3861
0.158808946609timhigh12dermatologist-122.0839,37.3861
0.158808946609timhigh12dermatologist-122.0839,37.3861
0.217882037163taimurlow15CEO-122.0839,37.3861
0.217882037163taimurlow15CEO-122.0839,37.3861
0.653301358223joemedium35dentist-122.0839,37.3861
0.653301358223joemedium35dentist-122.0839,37.3861
+
vector_distanceusercredit_scoreagejoboffice_locationlast_updated
0johnhigh18engineer-122.4194,37.77491741627789
0.109129190445tylerhigh100engineer-122.0839,37.38611742232589
0.158808946609timhigh12dermatologist-122.0839,37.38611739644189
0.217882037163taimurlow15CEO-122.0839,37.38611742232589
0.653301358223joemedium35dentist-122.0839,37.38611742232589
@@ -339,7 +338,7 @@ result_print(index.query(v)) ``` -
vector_distanceusercredit_scoreagejoboffice_location
0derricklow14doctor-122.4194,37.7749
0derricklow14doctor-122.4194,37.7749
0.266666650772nancyhigh94doctor-122.4194,37.7749
0.266666650772nancyhigh94doctor-122.4194,37.7749
+
vector_distanceusercredit_scoreagejoboffice_locationlast_updated
0derricklow14doctor-122.4194,37.77491741627789
0.266666650772nancyhigh94doctor-122.4194,37.77491710696589
@@ -352,7 +351,7 @@ result_print(index.query(v)) ``` -
vector_distanceusercredit_scoreagejoboffice_location
0johnhigh18engineer-122.4194,37.7749
0johnhigh18engineer-122.4194,37.7749
0.109129190445tylerhigh100engineer-122.0839,37.3861
0.109129190445tylerhigh100engineer-122.0839,37.3861
+
vector_distanceusercredit_scoreagejoboffice_locationlast_updated
0johnhigh18engineer-122.4194,37.77491741627789
0.109129190445tylerhigh100engineer-122.0839,37.38611742232589
@@ -365,7 +364,7 @@ result_print(index.query(v)) ``` -
vector_distanceusercredit_scoreagejoboffice_location
0johnhigh18engineer-122.4194,37.7749
0derricklow14doctor-122.4194,37.7749
0johnhigh18engineer-122.4194,37.7749
0derricklow14doctor-122.4194,37.7749
0.109129190445tylerhigh100engineer-122.0839,37.3861
0.109129190445tylerhigh100engineer-122.0839,37.3861
0.266666650772nancyhigh94doctor-122.4194,37.7749
0.266666650772nancyhigh94doctor-122.4194,37.7749
+
vector_distanceusercredit_scoreagejoboffice_locationlast_updated
0johnhigh18engineer-122.4194,37.77491741627789
0derricklow14doctor-122.4194,37.77491741627789
0.109129190445tylerhigh100engineer-122.0839,37.38611742232589
0.266666650772nancyhigh94doctor-122.4194,37.77491710696589
@@ -378,7 +377,7 @@ result_print(index.query(v)) ``` -
vector_distanceusercredit_scoreagejoboffice_location
0johnhigh18engineer-122.4194,37.7749
0derricklow14doctor-122.4194,37.7749
0johnhigh18engineer-122.4194,37.7749
0derricklow14doctor-122.4194,37.7749
0.109129190445tylerhigh100engineer-122.0839,37.3861
0.109129190445tylerhigh100engineer-122.0839,37.3861
0.158808946609timhigh12dermatologist-122.0839,37.3861
0.158808946609timhigh12dermatologist-122.0839,37.3861
0.217882037163taimurlow15CEO-122.0839,37.3861
0.217882037163taimurlow15CEO-122.0839,37.3861
+
vector_distanceusercredit_scoreagejoboffice_locationlast_updated
0johnhigh18engineer-122.4194,37.77491741627789
0derricklow14doctor-122.4194,37.77491741627789
0.109129190445tylerhigh100engineer-122.0839,37.38611742232589
0.158808946609timhigh12dermatologist-122.0839,37.38611739644189
0.217882037163taimurlow15CEO-122.0839,37.38611742232589
0.266666650772nancyhigh94doctor-122.4194,37.77491710696589
0.653301358223joemedium35dentist-122.0839,37.38611742232589
Use raw query strings as input. Below we use the `~` flag to indicate that the full text query is optional. We also choose the BM25 scorer and return document scores along with the result. @@ -394,86 +393,69 @@ index.query(v) - [{'id': 'user_queries_docs:01JMJJHE28ZW4F33ZNRKXRHYCS', - 'score': 1.8181817787737895, - 'vector_distance': '0', - 'user': 'john', - 'credit_score': 'high', - 'age': '18', - 'job': 'engineer', - 'office_location': '-122.4194,37.7749'}, - {'id': 'user_queries_docs:01JMJJHE2899024DYPXT6424N9', - 'score': 0.0, - 'vector_distance': '0', - 'user': 'derrick', - 'credit_score': 'low', - 'age': '14', - 'job': 'doctor', - 'office_location': '-122.4194,37.7749'}, - {'id': 'user_queries_docs:01JMJJPEYCQ89ZQW6QR27J72WT', - 'score': 1.8181817787737895, + [{'id': 'user_queries_docs:01JY4J5VC91SV4C91BM4D0FCV2', + 'score': 0.9090908893868948, 'vector_distance': '0', 'user': 'john', 'credit_score': 'high', 'age': '18', 'job': 'engineer', - 'office_location': '-122.4194,37.7749'}, - {'id': 'user_queries_docs:01JMJJPEYD544WB1TKDBJ3Z3J9', + 'office_location': '-122.4194,37.7749', + 'last_updated': '1741627789'}, + {'id': 'user_queries_docs:01JY4J5VC90DRSFJ0WKXXN49JT', 'score': 0.0, 'vector_distance': '0', 'user': 'derrick', 'credit_score': 'low', 'age': '14', 'job': 'doctor', - 'office_location': '-122.4194,37.7749'}, - {'id': 'user_queries_docs:01JMJJHE28B5R6T00DH37A7KSJ', - 'score': 1.8181817787737895, - 'vector_distance': '0.109129190445', - 'user': 'tyler', - 'credit_score': 'high', - 'age': '100', - 'job': 'engineer', - 'office_location': '-122.0839,37.3861'}, - {'id': 'user_queries_docs:01JMJJPEYDPF9S5328WHCQN0ND', - 'score': 1.8181817787737895, + 'office_location': '-122.4194,37.7749', + 'last_updated': '1741627789'}, + {'id': 'user_queries_docs:01JY4J5VC9QTPMCD60YP40Q6PW', + 'score': 0.9090908893868948, 'vector_distance': '0.109129190445', 'user': 'tyler', 'credit_score': 'high', 'age': '100', 'job': 'engineer', - 'office_location': '-122.0839,37.3861'}, - {'id': 'user_queries_docs:01JMJJHE28G5F943YGWMB1ZX1V', - 'score': 0.0, - 'vector_distance': '0.158808946609', - 'user': 'tim', - 'credit_score': 'high', - 'age': '12', - 'job': 'dermatologist', - 'office_location': '-122.0839,37.3861'}, - {'id': 'user_queries_docs:01JMJJPEYDKA9ARKHRK1D7KPXQ', + 'office_location': '-122.0839,37.3861', + 'last_updated': '1742232589'}, + {'id': 'user_queries_docs:01JY4J5VC9FW7QQNJKDJ4Z7PRG', 'score': 0.0, 'vector_distance': '0.158808946609', 'user': 'tim', 'credit_score': 'high', 'age': '12', 'job': 'dermatologist', - 'office_location': '-122.0839,37.3861'}, - {'id': 'user_queries_docs:01JMJJHE28NR7KF0EZEA433T2J', + 'office_location': '-122.0839,37.3861', + 'last_updated': '1739644189'}, + {'id': 'user_queries_docs:01JY4J5VC940DJ9F47EJ6KN2MH', 'score': 0.0, 'vector_distance': '0.217882037163', 'user': 'taimur', 'credit_score': 'low', 'age': '15', 'job': 'CEO', - 'office_location': '-122.0839,37.3861'}, - {'id': 'user_queries_docs:01JMJJPEYD9EAVGJ2AZ8K9VX7Q', + 'office_location': '-122.0839,37.3861', + 'last_updated': '1742232589'}, + {'id': 'user_queries_docs:01JY4J5VC9D53KQD7ZTRP14KCE', 'score': 0.0, - 'vector_distance': '0.217882037163', - 'user': 'taimur', - 'credit_score': 'low', - 'age': '15', - 'job': 'CEO', - 'office_location': '-122.0839,37.3861'}] + 'vector_distance': '0.266666650772', + 'user': 'nancy', + 'credit_score': 'high', + 'age': '94', + 'job': 'doctor', + 'office_location': '-122.4194,37.7749', + 'last_updated': '1710696589'}, + {'id': 'user_queries_docs:01JY4J5VC9806MD90GBZNP0MNY', + 'score': 0.0, + 'vector_distance': '0.653301358223', + 'user': 'joe', + 'credit_score': 'medium', + 'age': '35', + 'job': 'dentist', + 'office_location': '-122.0839,37.3861', + 'last_updated': '1742232589'}] @@ -493,7 +475,7 @@ result_print(index.query(v)) ``` -
scorevector_distanceusercredit_scoreagejoboffice_location
0.45454544469344740johnhigh18engineer-122.4194,37.7749
0.45454544469344740derricklow14doctor-122.4194,37.7749
0.45454544469344740johnhigh18engineer-122.4194,37.7749
0.45454544469344740derricklow14doctor-122.4194,37.7749
0.45454544469344740.266666650772nancyhigh94doctor-122.4194,37.7749
0.45454544469344740.266666650772nancyhigh94doctor-122.4194,37.7749
+
scorevector_distanceusercredit_scoreagejoboffice_locationlast_updated
0.45454544469344740johnhigh18engineer-122.4194,37.77491741627789
0.45454544469344740derricklow14doctor-122.4194,37.77491741627789
0.45454544469344740.266666650772nancyhigh94doctor-122.4194,37.77491710696589
@@ -506,7 +488,7 @@ result_print(index.query(v)) ``` -
scorevector_distanceusercredit_scoreagejoboffice_location
0.45454544469344740johnhigh18engineer-122.4194,37.7749
0.45454544469344740derricklow14doctor-122.4194,37.7749
0.45454544469344740johnhigh18engineer-122.4194,37.7749
0.45454544469344740derricklow14doctor-122.4194,37.7749
0.45454544469344740.109129190445tylerhigh100engineer-122.0839,37.3861
0.45454544469344740.109129190445tylerhigh100engineer-122.0839,37.3861
0.45454544469344740.158808946609timhigh12dermatologist-122.0839,37.3861
0.45454544469344740.158808946609timhigh12dermatologist-122.0839,37.3861
0.45454544469344740.217882037163taimurlow15CEO-122.0839,37.3861
0.45454544469344740.217882037163taimurlow15CEO-122.0839,37.3861
+
scorevector_distanceusercredit_scoreagejoboffice_locationlast_updated
0.45454544469344740johnhigh18engineer-122.4194,37.77491741627789
0.45454544469344740derricklow14doctor-122.4194,37.77491741627789
0.45454544469344740.109129190445tylerhigh100engineer-122.0839,37.38611742232589
0.45454544469344740.158808946609timhigh12dermatologist-122.0839,37.38611739644189
0.45454544469344740.217882037163taimurlow15CEO-122.0839,37.38611742232589
0.45454544469344740.266666650772nancyhigh94doctor-122.4194,37.77491710696589
0.45454544469344740.653301358223joemedium35dentist-122.0839,37.38611742232589
@@ -519,7 +501,7 @@ result_print(index.query(v)) ``` -
scorevector_distanceusercredit_scoreagejoboffice_location
0.00.109129190445tylerhigh100engineer-122.0839,37.3861
0.00.109129190445tylerhigh100engineer-122.0839,37.3861
0.00.158808946609timhigh12dermatologist-122.0839,37.3861
0.00.158808946609timhigh12dermatologist-122.0839,37.3861
0.00.217882037163taimurlow15CEO-122.0839,37.3861
0.00.217882037163taimurlow15CEO-122.0839,37.3861
0.00.653301358223joemedium35dentist-122.0839,37.3861
0.00.653301358223joemedium35dentist-122.0839,37.3861
+
scorevector_distanceusercredit_scoreagejoboffice_locationlast_updated
0.00.109129190445tylerhigh100engineer-122.0839,37.38611742232589
0.00.158808946609timhigh12dermatologist-122.0839,37.38611739644189
0.00.217882037163taimurlow15CEO-122.0839,37.38611742232589
0.00.653301358223joemedium35dentist-122.0839,37.38611742232589
## Combining Filters @@ -602,7 +584,7 @@ result_print(index.query(v)) ``` -
vector_distanceusercredit_scoreagejoboffice_location
0.109129190445tylerhigh100engineer-122.0839,37.3861
0.109129190445tylerhigh100engineer-122.0839,37.3861
+
vector_distanceusercredit_scoreagejoboffice_location
0.109129190445tylerhigh100engineer-122.0839,37.3861
@@ -614,7 +596,7 @@ result_print(index.query(v)) ``` -
vector_distanceusercredit_scoreagejoboffice_location
0.109129190445tylerhigh100engineer-122.0839,37.3861
0.109129190445tylerhigh100engineer-122.0839,37.3861
0.266666650772nancyhigh94doctor-122.4194,37.7749
0.266666650772nancyhigh94doctor-122.4194,37.7749
+
vector_distanceusercredit_scoreagejoboffice_location
0.109129190445tylerhigh100engineer-122.0839,37.3861
0.266666650772nancyhigh94doctor-122.4194,37.7749
@@ -626,7 +608,7 @@ result_print(index.query(v)) ``` -
vector_distanceusercredit_scoreagejoboffice_location
0.109129190445tylerhigh100engineer-122.0839,37.3861
0.109129190445tylerhigh100engineer-122.0839,37.3861
0.266666650772nancyhigh94doctor-122.4194,37.7749
0.266666650772nancyhigh94doctor-122.4194,37.7749
0.653301358223joemedium35dentist-122.0839,37.3861
0.653301358223joemedium35dentist-122.0839,37.3861
+
vector_distanceusercredit_scoreagejoboffice_location
0.109129190445tylerhigh100engineer-122.0839,37.3861
0.266666650772nancyhigh94doctor-122.4194,37.7749
0.653301358223joemedium35dentist-122.0839,37.3861
@@ -638,12 +620,12 @@ result_print(index.query(v)) ``` -
vector_distanceusercredit_scoreagejoboffice_location
0johnhigh18engineer-122.4194,37.7749
0derricklow14doctor-122.4194,37.7749
0johnhigh18engineer-122.4194,37.7749
0derricklow14doctor-122.4194,37.7749
0.109129190445tylerhigh100engineer-122.0839,37.3861
0.109129190445tylerhigh100engineer-122.0839,37.3861
0.158808946609timhigh12dermatologist-122.0839,37.3861
0.158808946609timhigh12dermatologist-122.0839,37.3861
0.217882037163taimurlow15CEO-122.0839,37.3861
0.217882037163taimurlow15CEO-122.0839,37.3861
+
vector_distanceusercredit_scoreagejoboffice_location
0johnhigh18engineer-122.4194,37.7749
0derricklow14doctor-122.4194,37.7749
0.109129190445tylerhigh100engineer-122.0839,37.3861
0.158808946609timhigh12dermatologist-122.0839,37.3861
0.217882037163taimurlow15CEO-122.0839,37.3861
0.266666650772nancyhigh94doctor-122.4194,37.7749
0.653301358223joemedium35dentist-122.0839,37.3861
## Non-vector Queries -In some cases, you may not want to run a vector query, but just use a ``FilterExpression`` similar to a SQL query. The ``FilterQuery`` class enable this functionality. It is similar to the ``VectorQuery`` class but soley takes a ``FilterExpression``. +In some cases, you may not want to run a vector query, but just use a ``FilterExpression`` similar to a SQL query. The ``FilterQuery`` class enable this functionality. It is similar to the ``VectorQuery`` class but solely takes a ``FilterExpression``. ```python @@ -662,7 +644,7 @@ result_print(results) ``` -
usercredit_scoreagejob
derricklow14doctor
taimurlow15CEO
derricklow14doctor
taimurlow15CEO
+
usercredit_scoreagejob
derricklow14doctor
taimurlow15CEO
## Count Queries @@ -682,7 +664,7 @@ count = index.query(filter_query) print(f"{count} records match the filter expression {str(has_low_credit)} for the given index.") ``` - 4 records match the filter expression @credit_score:{low} for the given index. + 2 records match the filter expression @credit_score:{low} for the given index. ## Range Queries @@ -707,7 +689,7 @@ result_print(results) ``` -
vector_distanceusercredit_scoreagejob
0johnhigh18engineer
0derricklow14doctor
0johnhigh18engineer
0derricklow14doctor
0.109129190445tylerhigh100engineer
0.109129190445tylerhigh100engineer
0.158808946609timhigh12dermatologist
0.158808946609timhigh12dermatologist
+
vector_distanceusercredit_scoreagejob
0johnhigh18engineer
0derricklow14doctor
0.109129190445tylerhigh100engineer
0.158808946609timhigh12dermatologist
We can also change the distance threshold of the query object between uses if we like. Here we will set ``distance_threshold==0.1``. This means that the query object will return all matches that are within 0.1 of the query object. This is a small distance, so we expect to get fewer matches than before. @@ -720,7 +702,7 @@ result_print(index.query(range_query)) ``` -
vector_distanceusercredit_scoreagejob
0johnhigh18engineer
0derricklow14doctor
0johnhigh18engineer
0derricklow14doctor
+
vector_distanceusercredit_scoreagejob
0johnhigh18engineer
0derricklow14doctor
Range queries can also be used with filters like any other query type. The following limits the results to only include records with a ``job`` of ``engineer`` while also being within the vector range (aka distance). @@ -735,12 +717,12 @@ result_print(index.query(range_query)) ``` -
vector_distanceusercredit_scoreagejob
0johnhigh18engineer
0johnhigh18engineer
+
vector_distanceusercredit_scoreagejob
0johnhigh18engineer
## Advanced Query Modifiers -See all modifier options available on the query API docs: https://redis.io/docs/latest/integrate/redisvl/api/query +See all modifier options available on the query API docs: https://redis.io/docs/latest/develop/ai/redisvl/api/query ```python @@ -758,7 +740,7 @@ result_print(result) ``` -
vector_distanceageusercredit_scorejoboffice_location
0.109129190445100tylerhighengineer-122.0839,37.3861
0.109129190445100tylerhighengineer-122.0839,37.3861
018johnhighengineer-122.4194,37.7749
018johnhighengineer-122.4194,37.7749
+
vector_distanceageusercredit_scorejoboffice_location
0.109129190445100tylerhighengineer-122.0839,37.3861
018johnhighengineer-122.4194,37.7749
### Raw Redis Query String @@ -820,14 +802,10 @@ for r in results.docs: print(r.__dict__) ``` - {'id': 'user_queries_docs:01JMJJHE28G5F943YGWMB1ZX1V', 'payload': None, 'user': 'tim', 'age': '12', 'job': 'dermatologist', 'credit_score': 'high', 'office_location': '-122.0839,37.3861', 'user_embedding': '>>\x00\x00\x00?'} - {'id': 'user_queries_docs:01JMJJHE28ZW4F33ZNRKXRHYCS', 'payload': None, 'user': 'john', 'age': '18', 'job': 'engineer', 'credit_score': 'high', 'office_location': '-122.4194,37.7749', 'user_embedding': '==\x00\x00\x00?'} - {'id': 'user_queries_docs:01JMJJHE28B5R6T00DH37A7KSJ', 'payload': None, 'user': 'tyler', 'age': '100', 'job': 'engineer', 'credit_score': 'high', 'office_location': '-122.0839,37.3861', 'user_embedding': '=>\x00\x00\x00?'} - {'id': 'user_queries_docs:01JMJJHE28EX13NEE7BGBM8FH3', 'payload': None, 'user': 'nancy', 'age': '94', 'job': 'doctor', 'credit_score': 'high', 'office_location': '-122.4194,37.7749', 'user_embedding': '333?=\x00\x00\x00?'} - {'id': 'user_queries_docs:01JMJJPEYCQ89ZQW6QR27J72WT', 'payload': None, 'user': 'john', 'age': '18', 'job': 'engineer', 'credit_score': 'high', 'office_location': '-122.4194,37.7749', 'user_embedding': '==\x00\x00\x00?'} - {'id': 'user_queries_docs:01JMJJPEYDAN0M3V7EQEVPS6HX', 'payload': None, 'user': 'nancy', 'age': '94', 'job': 'doctor', 'credit_score': 'high', 'office_location': '-122.4194,37.7749', 'user_embedding': '333?=\x00\x00\x00?'} - {'id': 'user_queries_docs:01JMJJPEYDPF9S5328WHCQN0ND', 'payload': None, 'user': 'tyler', 'age': '100', 'job': 'engineer', 'credit_score': 'high', 'office_location': '-122.0839,37.3861', 'user_embedding': '=>\x00\x00\x00?'} - {'id': 'user_queries_docs:01JMJJPEYDKA9ARKHRK1D7KPXQ', 'payload': None, 'user': 'tim', 'age': '12', 'job': 'dermatologist', 'credit_score': 'high', 'office_location': '-122.0839,37.3861', 'user_embedding': '>>\x00\x00\x00?'} + {'id': 'user_queries_docs:01JY4J5VC91SV4C91BM4D0FCV2', 'payload': None, 'user': 'john', 'age': '18', 'job': 'engineer', 'credit_score': 'high', 'office_location': '-122.4194,37.7749', 'user_embedding': '==\x00\x00\x00?', 'last_updated': '1741627789'} + {'id': 'user_queries_docs:01JY4J5VC9D53KQD7ZTRP14KCE', 'payload': None, 'user': 'nancy', 'age': '94', 'job': 'doctor', 'credit_score': 'high', 'office_location': '-122.4194,37.7749', 'user_embedding': '333?=\x00\x00\x00?', 'last_updated': '1710696589'} + {'id': 'user_queries_docs:01JY4J5VC9QTPMCD60YP40Q6PW', 'payload': None, 'user': 'tyler', 'age': '100', 'job': 'engineer', 'credit_score': 'high', 'office_location': '-122.0839,37.3861', 'user_embedding': '=>\x00\x00\x00?', 'last_updated': '1742232589'} + {'id': 'user_queries_docs:01JY4J5VC9FW7QQNJKDJ4Z7PRG', 'payload': None, 'user': 'tim', 'age': '12', 'job': 'dermatologist', 'credit_score': 'high', 'office_location': '-122.0839,37.3861', 'user_embedding': '>>\x00\x00\x00?', 'last_updated': '1739644189'} diff --git a/content/develop/ai/redisvl/user_guide/llmcache.md b/content/develop/ai/redisvl/user_guide/llmcache.md index f2d32a9c2..64a9c314c 100644 --- a/content/develop/ai/redisvl/user_guide/llmcache.md +++ b/content/develop/ai/redisvl/user_guide/llmcache.md @@ -1,18 +1,10 @@ --- -linkTitle: Semantic caching for LLMs -title: Semantic Caching for LLMs +linkTitle: First, we will import [openai](https://platform.openai.com) to use their API for responding to user prompts. we will also create a simple `ask_openai` helper method to assist. +title: First, we will import [OpenAI](https://platform.openai.com) to use their API for responding to user prompts. We will also create a simple `ask_openai` helper method to assist. weight: 03 -aliases: -- /integrate/redisvl/user_guide/llmcache --- -RedisVL provides a ``SemanticCache`` interface to utilize Redis' built-in caching capabilities AND vector search in order to store responses from previously-answered questions. This reduces the number of requests and tokens sent to the Large Language Models (LLM) service, decreasing costs and enhancing application throughput (by reducing the time taken to generate responses). - -This notebook will go over how to use Redis as a Semantic Cache for your applications - -First, we will import [OpenAI](https://platform.openai.com) to use their API for responding to user prompts. We will also create a simple `ask_openai` helper method to assist. - ```python import os @@ -44,7 +36,6 @@ def ask_openai(question: str) -> str: print(ask_openai("What is the capital of France?")) ``` - 19:17:51 httpx INFO HTTP Request: POST https://api.openai.com/v1/completions "HTTP/1.1 200 OK" The capital of France is Paris. @@ -54,8 +45,11 @@ print(ask_openai("What is the capital of France?")) ```python +import warnings +warnings.filterwarnings('ignore') + from redisvl.extensions.cache.llm import SemanticCache -from redisvl.utils .vectorize import HFTextVectorizer +from redisvl.utils.vectorize import HFTextVectorizer llmcache = SemanticCache( name="llmcache", # underlying search index name @@ -65,11 +59,15 @@ llmcache = SemanticCache( ) ``` - 19:17:51 sentence_transformers.SentenceTransformer INFO Use pytorch device_name: mps - 19:17:51 sentence_transformers.SentenceTransformer INFO Load pretrained SentenceTransformer: redis/langcache-embed-v1 + 13:02:02 sentence_transformers.SentenceTransformer INFO Use pytorch device_name: mps + 13:02:02 sentence_transformers.SentenceTransformer INFO Load pretrained SentenceTransformer: redis/langcache-embed-v1 + 13:02:02 sentence_transformers.SentenceTransformer WARNING You try to use a model that was created with version 4.1.0, however, your version is 3.4.1. This might cause unexpected behavior or errors. In that case, try to update to the latest version. + + + - Batches: 100%|██████████| 1/1 [00:00<00:00, 17.57it/s] + Batches: 100%|██████████| 1/1 [00:00<00:00, 3.79it/s] @@ -114,7 +112,7 @@ else: print("Empty cache") ``` - Batches: 100%|██████████| 1/1 [00:00<00:00, 18.30it/s] + Batches: 100%|██████████| 1/1 [00:00<00:00, 7.79it/s] Empty cache @@ -135,7 +133,7 @@ llmcache.store( ) ``` - Batches: 100%|██████████| 1/1 [00:00<00:00, 26.10it/s] + Batches: 100%|██████████| 1/1 [00:00<00:00, 19.62it/s] @@ -156,12 +154,14 @@ else: print("Empty cache") ``` - Batches: 100%|██████████| 1/1 [00:00<00:00, 12.36it/s] - + Batches: 100%|██████████| 1/1 [00:00<00:00, 18.65it/s] [{'prompt': 'What is the capital of France?', 'response': 'Paris', 'metadata': {'city': 'Paris', 'country': 'france'}, 'key': 'llmcache:115049a298532be2f181edb03f766770c0db84c22aff39003fec340deaec7545'}] + + + ```python # Check for a semantically similar result @@ -169,7 +169,7 @@ question = "What actually is the capital of France?" llmcache.check(prompt=question)[0]['response'] ``` - Batches: 100%|██████████| 1/1 [00:00<00:00, 12.22it/s] + Batches: 100%|██████████| 1/1 [00:00<00:00, 7.81it/s] @@ -179,12 +179,12 @@ llmcache.check(prompt=question)[0]['response'] -## Customize the Distance Threshhold +## Customize the Distance Threshold -For most use cases, the right semantic similarity threshhold is not a fixed quantity. Depending on the choice of embedding model, -the properties of the input query, and even business use case -- the threshhold might need to change. +For most use cases, the right semantic similarity threshold is not a fixed quantity. Depending on the choice of embedding model, +the properties of the input query, and even business use case -- the threshold might need to change. -Fortunately, you can seamlessly adjust the threshhold at any point like below: +Fortunately, you can seamlessly adjust the threshold at any point like below: ```python @@ -200,7 +200,7 @@ question = "What is the capital city of the country in Europe that also has a ci llmcache.check(prompt=question)[0]['response'] ``` - Batches: 100%|██████████| 1/1 [00:00<00:00, 19.20it/s] + Batches: 100%|██████████| 1/1 [00:00<00:00, 8.37it/s] @@ -215,11 +215,11 @@ llmcache.check(prompt=question)[0]['response'] # Invalidate the cache completely by clearing it out llmcache.clear() -# should be empty now +# Should be empty now llmcache.check(prompt=question) ``` - Batches: 100%|██████████| 1/1 [00:00<00:00, 26.71it/s] + Batches: 100%|██████████| 1/1 [00:00<00:00, 21.23it/s] @@ -248,7 +248,7 @@ llmcache.store("This is a TTL test", "This is a TTL test response") time.sleep(6) ``` - Batches: 100%|██████████| 1/1 [00:00<00:00, 20.45it/s] + Batches: 100%|██████████| 1/1 [00:00<00:00, 8.53it/s] @@ -259,7 +259,7 @@ result = llmcache.check("This is a TTL test") print(result) ``` - Batches: 100%|██████████| 1/1 [00:00<00:00, 17.02it/s] + Batches: 100%|██████████| 1/1 [00:00<00:00, 12.54it/s] [] @@ -311,14 +311,14 @@ print(f"Without caching, a call to openAI to answer this simple question took {e llmcache.store(prompt=question, response="George Washington") ``` - Batches: 100%|██████████| 1/1 [00:00<00:00, 14.88it/s] + Batches: 100%|██████████| 1/1 [00:00<00:00, 8.09it/s] - 19:18:04 httpx INFO HTTP Request: POST https://api.openai.com/v1/completions "HTTP/1.1 200 OK" - Without caching, a call to openAI to answer this simple question took 0.8826751708984375 seconds. + 13:02:17 httpx INFO HTTP Request: POST https://api.openai.com/v1/completions "HTTP/1.1 200 OK" + Without caching, a call to openAI to answer this simple question took 1.7948627471923828 seconds. - Batches: 100%|██████████| 1/1 [00:00<00:00, 18.38it/s] + Batches: 100%|██████████| 1/1 [00:00<00:00, 12.93it/s] @@ -344,22 +344,20 @@ print(f"Avg time taken with LLM cache enabled: {avg_time_with_cache}") print(f"Percentage of time saved: {round(((end - start) - avg_time_with_cache) / (end - start) * 100, 2)}%") ``` - Batches: 100%|██████████| 1/1 [00:00<00:00, 13.65it/s] - Batches: 100%|██████████| 1/1 [00:00<00:00, 27.94it/s] - Batches: 100%|██████████| 1/1 [00:00<00:00, 27.19it/s] - Batches: 100%|██████████| 1/1 [00:00<00:00, 27.53it/s] - Batches: 100%|██████████| 1/1 [00:00<00:00, 28.12it/s] - Batches: 100%|██████████| 1/1 [00:00<00:00, 27.38it/s] - Batches: 100%|██████████| 1/1 [00:00<00:00, 25.39it/s] - Batches: 100%|██████████| 1/1 [00:00<00:00, 26.34it/s] - Batches: 100%|██████████| 1/1 [00:00<00:00, 28.07it/s] - Batches: 100%|██████████| 1/1 [00:00<00:00, 27.35it/s] - - Avg time taken with LLM cache enabled: 0.0463670015335083 - Percentage of time saved: 94.75% + Batches: 100%|██████████| 1/1 [00:00<00:00, 20.90it/s] + Batches: 100%|██████████| 1/1 [00:00<00:00, 23.24it/s] + Batches: 100%|██████████| 1/1 [00:00<00:00, 22.85it/s] + Batches: 100%|██████████| 1/1 [00:00<00:00, 21.98it/s] + Batches: 100%|██████████| 1/1 [00:00<00:00, 22.65it/s] + Batches: 100%|██████████| 1/1 [00:00<00:00, 22.65it/s] + Batches: 100%|██████████| 1/1 [00:00<00:00, 21.84it/s] + Batches: 100%|██████████| 1/1 [00:00<00:00, 20.67it/s] + Batches: 100%|██████████| 1/1 [00:00<00:00, 22.08it/s] + Batches: 100%|██████████| 1/1 [00:00<00:00, 21.14it/s] - + Avg time taken with LLM cache enabled: 0.049193501472473145 + Percentage of time saved: 97.26% @@ -389,7 +387,7 @@ print(f"Percentage of time saved: {round(((end - start) - avg_time_with_cache) / │ offsets_per_term_avg │ 0.75862067 │ │ records_per_doc_avg │ 29 │ │ sortable_values_size_mb │ 0 │ - │ total_indexing_time │ 3.875 │ + │ total_indexing_time │ 14.3260002 │ │ total_inverted_index_blocks │ 21 │ │ vector_index_sz_mb │ 3.01609802 │ ╰─────────────────────────────┴────────────╯ @@ -426,14 +424,17 @@ private_cache.store( ) ``` - 19:18:07 [RedisVL] WARNING The default vectorizer has changed from `sentence-transformers/all-mpnet-base-v2` to `redis/langcache-embed-v1` in version 0.6.0 of RedisVL. For more information about this model, please refer to https://arxiv.org/abs/2504.02268 or visit https://huggingface.co/redis/langcache-embed-v1. To continue using the old vectorizer, please specify it explicitly in the constructor as: vectorizer=HFTextVectorizer(model='sentence-transformers/all-mpnet-base-v2') - 19:18:07 sentence_transformers.SentenceTransformer INFO Use pytorch device_name: mps - 19:18:07 sentence_transformers.SentenceTransformer INFO Load pretrained SentenceTransformer: redis/langcache-embed-v1 + 13:02:20 sentence_transformers.SentenceTransformer INFO Use pytorch device_name: mps + 13:02:20 sentence_transformers.SentenceTransformer INFO Load pretrained SentenceTransformer: redis/langcache-embed-v1 + 13:02:20 sentence_transformers.SentenceTransformer WARNING You try to use a model that was created with version 4.1.0, however, your version is 3.4.1. This might cause unexpected behavior or errors. In that case, try to update to the latest version. + + + - Batches: 100%|██████████| 1/1 [00:00<00:00, 8.98it/s] - Batches: 100%|██████████| 1/1 [00:00<00:00, 24.89it/s] - Batches: 100%|██████████| 1/1 [00:00<00:00, 26.95it/s] + Batches: 100%|██████████| 1/1 [00:00<00:00, 17.15it/s] + Batches: 100%|██████████| 1/1 [00:00<00:00, 21.23it/s] + Batches: 100%|██████████| 1/1 [00:00<00:00, 21.71it/s] @@ -459,7 +460,7 @@ response = private_cache.check( print(f"found {len(response)} entry \n{response[0]['response']}") ``` - Batches: 100%|██████████| 1/1 [00:00<00:00, 27.98it/s] + Batches: 100%|██████████| 1/1 [00:00<00:00, 22.36it/s] found 1 entry The number on file is 123-555-0000 @@ -510,16 +511,19 @@ complex_cache.store( ) ``` - 19:18:09 [RedisVL] WARNING The default vectorizer has changed from `sentence-transformers/all-mpnet-base-v2` to `redis/langcache-embed-v1` in version 0.6.0 of RedisVL. For more information about this model, please refer to https://arxiv.org/abs/2504.02268 or visit https://huggingface.co/redis/langcache-embed-v1. To continue using the old vectorizer, please specify it explicitly in the constructor as: vectorizer=HFTextVectorizer(model='sentence-transformers/all-mpnet-base-v2') - 19:18:09 sentence_transformers.SentenceTransformer INFO Use pytorch device_name: mps - 19:18:09 sentence_transformers.SentenceTransformer INFO Load pretrained SentenceTransformer: redis/langcache-embed-v1 + 13:02:21 sentence_transformers.SentenceTransformer INFO Use pytorch device_name: mps + 13:02:21 sentence_transformers.SentenceTransformer INFO Load pretrained SentenceTransformer: redis/langcache-embed-v1 + 13:02:21 sentence_transformers.SentenceTransformer WARNING You try to use a model that was created with version 4.1.0, however, your version is 3.4.1. This might cause unexpected behavior or errors. In that case, try to update to the latest version. + + + - Batches: 100%|██████████| 1/1 [00:00<00:00, 13.54it/s] - Batches: 100%|██████████| 1/1 [00:00<00:00, 16.76it/s] - Batches: 100%|██████████| 1/1 [00:00<00:00, 21.82it/s] - Batches: 100%|██████████| 1/1 [00:00<00:00, 28.80it/s] - Batches: 100%|██████████| 1/1 [00:00<00:00, 21.04it/s] + Batches: 100%|██████████| 1/1 [00:00<00:00, 21.08it/s] + Batches: 100%|██████████| 1/1 [00:00<00:00, 8.74it/s] + Batches: 100%|██████████| 1/1 [00:00<00:00, 8.01it/s] + Batches: 100%|██████████| 1/1 [00:00<00:00, 21.70it/s] + Batches: 100%|██████████| 1/1 [00:00<00:00, 16.74it/s] @@ -548,7 +552,7 @@ print(f'found {len(response)} entry') print(response[0]["response"]) ``` - Batches: 100%|██████████| 1/1 [00:00<00:00, 28.15it/s] + Batches: 100%|██████████| 1/1 [00:00<00:00, 19.91it/s] found 1 entry Your most recent transaction was for $350 diff --git a/content/develop/ai/redisvl/user_guide/message_history.md b/content/develop/ai/redisvl/user_guide/message_history.md index 0e84bd8de..db536ddd6 100644 --- a/content/develop/ai/redisvl/user_guide/message_history.md +++ b/content/develop/ai/redisvl/user_guide/message_history.md @@ -2,12 +2,10 @@ linkTitle: LLM message history title: LLM Message History weight: 07 -aliases: -- /integrate/redisvl/user_guide/message_history --- -Large Language Models are inherently stateless and have no knowledge of previous interactions with a user, or even of previous parts of the current conversation. While this may not be noticable when asking simple questions, it becomes a hinderance when engaging in long running conversations that rely on conversational context. +Large Language Models are inherently stateless and have no knowledge of previous interactions with a user, or even of previous parts of the current conversation. While this may not be noticeable when asking simple questions, it becomes a hindrance when engaging in long running conversations that rely on conversational context. The solution to this problem is to append the previous conversation history to each subsequent call to the LLM. @@ -16,12 +14,10 @@ This notebook will show how to use Redis to structure and store and retrieve thi ```python from redisvl.extensions.message_history import MessageHistory + chat_history = MessageHistory(name='student tutor') ``` - 12:24:11 redisvl.index.index INFO Index already exists, not overwriting. - - To align with common LLM APIs, Redis stores messages with `role` and `content` fields. The supported roles are "system", "user" and "llm". @@ -131,7 +127,23 @@ semantic_history = SemanticMessageHistory(name='tutor') semantic_history.add_messages(chat_history.get_recent(top_k=8)) ``` - 12:24:15 redisvl.index.index INFO Index already exists, not overwriting. + /Users/tyler.hutcherson/Documents/AppliedAI/redis-vl-python/.venv/lib/python3.13/site-packages/tqdm/auto.py:21: TqdmWarning: IProgress not found. Please update jupyter and ipywidgets. See https://ipywidgets.readthedocs.io/en/stable/user_install.html + from .autonotebook import tqdm as notebook_tqdm + + + 13:03:39 sentence_transformers.SentenceTransformer INFO Use pytorch device_name: mps + 13:03:39 sentence_transformers.SentenceTransformer INFO Load pretrained SentenceTransformer: sentence-transformers/all-mpnet-base-v2 + + + Batches: 100%|██████████| 1/1 [00:00<00:00, 6.59it/s] + Batches: 100%|██████████| 1/1 [00:00<00:00, 10.33it/s] + Batches: 100%|██████████| 1/1 [00:00<00:00, 9.91it/s] + Batches: 100%|██████████| 1/1 [00:00<00:00, 12.52it/s] + Batches: 100%|██████████| 1/1 [00:00<00:00, 57.92it/s] + Batches: 100%|██████████| 1/1 [00:00<00:00, 60.45it/s] + Batches: 100%|██████████| 1/1 [00:00<00:00, 13.38it/s] + Batches: 100%|██████████| 1/1 [00:00<00:00, 13.65it/s] + Batches: 100%|██████████| 1/1 [00:00<00:00, 62.33it/s] @@ -143,13 +155,17 @@ for message in context: print(message) ``` + Batches: 100%|██████████| 1/1 [00:00<00:00, 56.30it/s] + {'role': 'user', 'content': 'what is the size of England compared to Portugal?'} - {'role': 'llm', 'content': 'England is larger in land area than Portal by about 15000 square miles.'} + + + You can adjust the degree of semantic similarity needed to be included in your context. -Setting a distance threshold close to 0.0 will require an exact semantic match, while a distance threshold of 1.0 will include everthing. +Setting a distance threshold close to 0.0 will require an exact semantic match, while a distance threshold of 1.0 will include everything. ```python @@ -160,10 +176,16 @@ for message in larger_context: print(message) ``` + Batches: 100%|██████████| 1/1 [00:00<00:00, 50.04it/s] + {'role': 'user', 'content': 'what is the size of England compared to Portugal?'} {'role': 'llm', 'content': 'England is larger in land area than Portal by about 15000 square miles.'} {'role': 'user', 'content': 'What is the population of Great Britain?'} {'role': 'llm', 'content': 'As of 2023 the population of Great Britain is approximately 67 million people.'} + {'role': 'user', 'content': 'And what is the capital of Spain?'} + + + ## Conversation control @@ -175,7 +197,7 @@ LLMs can hallucinate on occasion and when this happens it can be useful to prune semantic_history.store( prompt="what is the smallest country in Europe?", response="Monaco is the smallest country in Europe at 0.78 square miles." # Incorrect. Vatican City is the smallest country in Europe - ) +) # get the key of the incorrect message context = semantic_history.get_recent(top_k=1, raw=True) @@ -187,6 +209,9 @@ for message in corrected_context: print(message) ``` + Batches: 100%|██████████| 1/1 [00:00<00:00, 54.73it/s] + Batches: 100%|██████████| 1/1 [00:00<00:00, 10.63it/s] + {'role': 'user', 'content': 'What is the population of Great Britain?'} {'role': 'llm', 'content': 'As of 2023 the population of Great Britain is approximately 67 million people.'} {'role': 'user', 'content': 'what is the size of England compared to Portugal?'} @@ -194,6 +219,9 @@ for message in corrected_context: {'role': 'user', 'content': 'what is the smallest country in Europe?'} + + + ```python chat_history.clear() diff --git a/content/develop/ai/redisvl/user_guide/rerankers.md b/content/develop/ai/redisvl/user_guide/rerankers.md index 9ec9c94a7..307087937 100644 --- a/content/develop/ai/redisvl/user_guide/rerankers.md +++ b/content/develop/ai/redisvl/user_guide/rerankers.md @@ -2,8 +2,6 @@ linkTitle: Rerankers title: Rerankers weight: 06 -aliases: -- /integrate/redisvl/user_guide/rerankers --- diff --git a/content/develop/ai/redisvl/user_guide/semantic_router.md b/content/develop/ai/redisvl/user_guide/semantic_router.md index e6358e99b..428e0e02c 100644 --- a/content/develop/ai/redisvl/user_guide/semantic_router.md +++ b/content/develop/ai/redisvl/user_guide/semantic_router.md @@ -2,8 +2,6 @@ linkTitle: Semantic routing title: Semantic Routing weight: 08 -aliases: -- /integrate/redisvl/user_guide/semantic_router --- @@ -29,7 +27,6 @@ Additionally, each route has a `distance_threshold` which determines the maximum ```python from redisvl.extensions.router import Route - # Define routes for the semantic router technology = Route( name="technology", @@ -91,14 +88,18 @@ router = SemanticRouter( ) ``` - 19:18:32 sentence_transformers.SentenceTransformer INFO Use pytorch device_name: mps - 19:18:32 sentence_transformers.SentenceTransformer INFO Load pretrained SentenceTransformer: sentence-transformers/all-mpnet-base-v2 + /Users/tyler.hutcherson/Documents/AppliedAI/redis-vl-python/.venv/lib/python3.13/site-packages/tqdm/auto.py:21: TqdmWarning: IProgress not found. Please update jupyter and ipywidgets. See https://ipywidgets.readthedocs.io/en/stable/user_install.html + from .autonotebook import tqdm as notebook_tqdm + + + 13:03:49 sentence_transformers.SentenceTransformer INFO Use pytorch device_name: mps + 13:03:49 sentence_transformers.SentenceTransformer INFO Load pretrained SentenceTransformer: sentence-transformers/all-mpnet-base-v2 - Batches: 100%|██████████| 1/1 [00:00<00:00, 17.78it/s] - Batches: 100%|██████████| 1/1 [00:00<00:00, 37.43it/s] - Batches: 100%|██████████| 1/1 [00:00<00:00, 27.28it/s] - Batches: 100%|██████████| 1/1 [00:00<00:00, 48.76it/s] + Batches: 100%|██████████| 1/1 [00:00<00:00, 6.31it/s] + Batches: 100%|██████████| 1/1 [00:00<00:00, 7.02it/s] + Batches: 100%|██████████| 1/1 [00:00<00:00, 8.21it/s] + Batches: 100%|██████████| 1/1 [00:00<00:00, 54.33it/s] @@ -147,13 +148,13 @@ route_match = router("Can you tell me about the latest in artificial intelligenc route_match ``` - Batches: 100%|██████████| 1/1 [00:00<00:00, 6.40it/s] + Batches: 100%|██████████| 1/1 [00:00<00:00, 8.63it/s] - RouteMatch(name='technology', distance=0.419145842393) + RouteMatch(name='technology', distance=0.419145941734) @@ -164,7 +165,7 @@ route_match = router("are aliens real?") route_match ``` - Batches: 100%|██████████| 1/1 [00:00<00:00, 39.83it/s] + Batches: 100%|██████████| 1/1 [00:00<00:00, 11.71it/s] @@ -183,14 +184,14 @@ route_matches = router.route_many("How is AI used in basketball?", max_k=3) route_matches ``` - Batches: 100%|██████████| 1/1 [00:00<00:00, 40.50it/s] + Batches: 100%|██████████| 1/1 [00:00<00:00, 12.12it/s] - [RouteMatch(name='technology', distance=0.556493878365), - RouteMatch(name='sports', distance=0.671060125033)] + [RouteMatch(name='technology', distance=0.556493639946), + RouteMatch(name='sports', distance=0.671060085297)] @@ -203,13 +204,13 @@ route_matches = router.route_many("How is AI used in basketball?", aggregation_m route_matches ``` - Batches: 100%|██████████| 1/1 [00:00<00:00, 66.18it/s] + Batches: 100%|██████████| 1/1 [00:00<00:00, 56.69it/s] - [RouteMatch(name='technology', distance=0.556493878365), + [RouteMatch(name='technology', distance=0.556493639946), RouteMatch(name='sports', distance=0.629264354706)] @@ -233,13 +234,13 @@ route_matches = router.route_many("Lebron James") route_matches ``` - Batches: 100%|██████████| 1/1 [00:00<00:00, 41.89it/s] + Batches: 100%|██████████| 1/1 [00:00<00:00, 13.20it/s] - [RouteMatch(name='sports', distance=0.663254022598)] + [RouteMatch(name='sports', distance=0.663253903389)] @@ -287,13 +288,13 @@ router2 = SemanticRouter.from_dict(router.to_dict(), redis_url="redis://localhos assert router2.to_dict() == router.to_dict() ``` - 19:18:38 sentence_transformers.SentenceTransformer INFO Use pytorch device_name: mps - 19:18:38 sentence_transformers.SentenceTransformer INFO Load pretrained SentenceTransformer: sentence-transformers/all-mpnet-base-v2 + 13:03:54 sentence_transformers.SentenceTransformer INFO Use pytorch device_name: mps + 13:03:54 sentence_transformers.SentenceTransformer INFO Load pretrained SentenceTransformer: sentence-transformers/all-mpnet-base-v2 - Batches: 100%|██████████| 1/1 [00:00<00:00, 54.94it/s] + Batches: 100%|██████████| 1/1 [00:00<00:00, 53.91it/s] - 19:18:40 redisvl.index.index INFO Index already exists, not overwriting. + 13:03:54 redisvl.index.index INFO Index already exists, not overwriting. @@ -311,26 +312,26 @@ router3 = SemanticRouter.from_yaml("router.yaml", redis_url="redis://localhost:6 assert router3.to_dict() == router2.to_dict() == router.to_dict() ``` - 19:18:40 sentence_transformers.SentenceTransformer INFO Use pytorch device_name: mps - 19:18:40 sentence_transformers.SentenceTransformer INFO Load pretrained SentenceTransformer: sentence-transformers/all-mpnet-base-v2 + 13:03:54 sentence_transformers.SentenceTransformer INFO Use pytorch device_name: mps + 13:03:54 sentence_transformers.SentenceTransformer INFO Load pretrained SentenceTransformer: sentence-transformers/all-mpnet-base-v2 - Batches: 100%|██████████| 1/1 [00:00<00:00, 18.77it/s] + Batches: 100%|██████████| 1/1 [00:00<00:00, 51.94it/s] - 19:18:41 redisvl.index.index INFO Index already exists, not overwriting. + 13:03:55 redisvl.index.index INFO Index already exists, not overwriting. -# Add route references +## Add route references ```python router.add_route_references(route_name="technology", references=["latest AI trends", "new tech gadgets"]) ``` - Batches: 100%|██████████| 1/1 [00:00<00:00, 13.22it/s] + Batches: 100%|██████████| 1/1 [00:00<00:00, 8.12it/s] @@ -341,7 +342,7 @@ router.add_route_references(route_name="technology", references=["latest AI tren -# Get route references +## Get route references ```python @@ -353,26 +354,26 @@ refs - [{'id': 'topic-router:technology:7e4bca5853c1c3298b4d001de13c3c7a79a6e0f134f81acc2e7cddbd6845961f', - 'reference_id': '7e4bca5853c1c3298b4d001de13c3c7a79a6e0f134f81acc2e7cddbd6845961f', + [{'id': 'topic-router:technology:85cc73a1437df27caa2f075a29c497e5a2e532023fbb75378aedbae80779ab37', + 'reference_id': '85cc73a1437df27caa2f075a29c497e5a2e532023fbb75378aedbae80779ab37', 'route_name': 'technology', - 'reference': 'new tech gadgets'}, + 'reference': 'tell me about the newest gadgets'}, + {'id': 'topic-router:technology:851f51cce5a9ccfbbcb66993908be6b7871479af3e3a4b139ad292a1bf7e0676', + 'reference_id': '851f51cce5a9ccfbbcb66993908be6b7871479af3e3a4b139ad292a1bf7e0676', + 'route_name': 'technology', + 'reference': 'what are the latest advancements in AI?'}, {'id': 'topic-router:technology:f243fb2d073774e81c7815247cb3013794e6225df3cbe3769cee8c6cefaca777', 'reference_id': 'f243fb2d073774e81c7815247cb3013794e6225df3cbe3769cee8c6cefaca777', 'route_name': 'technology', 'reference': 'latest AI trends'}, - {'id': 'topic-router:technology:851f51cce5a9ccfbbcb66993908be6b7871479af3e3a4b139ad292a1bf7e0676', - 'reference_id': '851f51cce5a9ccfbbcb66993908be6b7871479af3e3a4b139ad292a1bf7e0676', + {'id': 'topic-router:technology:7e4bca5853c1c3298b4d001de13c3c7a79a6e0f134f81acc2e7cddbd6845961f', + 'reference_id': '7e4bca5853c1c3298b4d001de13c3c7a79a6e0f134f81acc2e7cddbd6845961f', 'route_name': 'technology', - 'reference': 'what are the latest advancements in AI?'}, + 'reference': 'new tech gadgets'}, {'id': 'topic-router:technology:149a9c9919c58534aa0f369e85ad95ba7f00aa0513e0f81e2aff2ea4a717b0e0', 'reference_id': '149a9c9919c58534aa0f369e85ad95ba7f00aa0513e0f81e2aff2ea4a717b0e0', 'route_name': 'technology', - 'reference': "what's trending in tech?"}, - {'id': 'topic-router:technology:85cc73a1437df27caa2f075a29c497e5a2e532023fbb75378aedbae80779ab37', - 'reference_id': '85cc73a1437df27caa2f075a29c497e5a2e532023fbb75378aedbae80779ab37', - 'route_name': 'technology', - 'reference': 'tell me about the newest gadgets'}] + 'reference': "what's trending in tech?"}] @@ -386,14 +387,14 @@ refs - [{'id': 'topic-router:technology:7e4bca5853c1c3298b4d001de13c3c7a79a6e0f134f81acc2e7cddbd6845961f', - 'reference_id': '7e4bca5853c1c3298b4d001de13c3c7a79a6e0f134f81acc2e7cddbd6845961f', + [{'id': 'topic-router:technology:85cc73a1437df27caa2f075a29c497e5a2e532023fbb75378aedbae80779ab37', + 'reference_id': '85cc73a1437df27caa2f075a29c497e5a2e532023fbb75378aedbae80779ab37', 'route_name': 'technology', - 'reference': 'new tech gadgets'}] + 'reference': 'tell me about the newest gadgets'}] -# Delete route references +## Delete route references ```python diff --git a/content/develop/ai/redisvl/user_guide/vectorizers.md b/content/develop/ai/redisvl/user_guide/vectorizers.md index 8eb2cf38c..effde1564 100644 --- a/content/develop/ai/redisvl/user_guide/vectorizers.md +++ b/content/develop/ai/redisvl/user_guide/vectorizers.md @@ -2,8 +2,6 @@ linkTitle: Vectorizers title: Vectorizers weight: 04 -aliases: -- /integrate/redisvl/user_guide/vectorizers --- @@ -126,7 +124,7 @@ embeddings[0][:10] ```python -# openai also supports asyncronous requests, which we can use to speed up the vectorization process. +# openai also supports asynchronous requests, which we can use to speed up the vectorization process. embeddings = await oai.aembed_many(sentences) print("Number of Embeddings:", len(embeddings)) @@ -409,7 +407,7 @@ from redisvl.utils.vectorize import MistralAITextVectorizer mistral = MistralAITextVectorizer() -# embed a sentence using their asyncronous method +# embed a sentence using their asynchronous method test = await mistral.aembed("This is a test sentence.") print("Vector dimensions: ", len(test)) print(test[:10])