diff --git a/README.md b/README.md index ca5c7f01..514f58cf 100644 --- a/README.md +++ b/README.md @@ -288,6 +288,104 @@ customers.Where(x => x.LastName == "Bond" && x.FirstName == "James"); customers.Where(x=>x.NickNames.Contains("Jim")); ``` +### Vectors + +Redis OM .NET also supports storing and querying Vectors stored in Redis. + +A `Vector` is a representation of an object that can be transformed into a vector by a Vectorizer. + +A `VectorizerAttribute` is the abstract class you use to decorate your Vector fields, it is responsible for defining the logic to convert your Vectors into Embeddings. In the package `Redis.OM.Vectorizers` we provide vectorizers for HuggingFace, OpenAI, and AzureOpenAI to allow you to easily integrate them into your workflows. + +#### Define a Vector in your Model. + +To define a vector in your model, simply decorate a `Vector` field with and `Indexed` and a `Vectorizer` attribute (in this case we'll use OpenAI): + +```cs +[Document(StorageType = StorageType.Json)] +public class OpenAIQuery +{ + [RedisIdField] + public string Id { get; set; } + + [Indexed(DistanceMetric = DistanceMetric.COSINE)] + [OpenAIVectorizer] + public Vector Prompt { get; set; } + + public string Response { get; set; } + + [Indexed] + public string Language { get; set; } + + [Indexed] + public DateTime TimeStamp { get; set; } +} +``` + +#### Insert Vectors into Redis + +With the vector defined in our model, all we need to do is create Vectors of the generic type, and insert them with our model. Using our `RedisCollection`, you can do this by simply using `Insert`: + +```cs +var query = new OpenAIQuery +{ + Language = "en_us", + Prompt = Vector.Of("What is the Capital of France?"), + Response = "Paris", + TimeStamp = DateTime.Now - TimeSpan.FromHours(3) +}; +collection.Insert(query); +``` + +The Vectorizer will manage the embedding generation for you without you having to intervene. + +#### Query Vectors in Redis + +To query vector fields in Redis, all you need to do is use the `VectorRange` method on a vector within our normal LINQ queries, and/or use the `NearestNeighbors` with whatever other filters you want to use, here's some examples: + +```cs +var queryPrompt = Vector.Of("What really is the Capital of France?"); + +// simple vector range, find first within .15 +var result = collection.First(x => x.Prompt.VectorRange(queryPrompt, .15)); + +// simple nearest neighbors query, finds first nearest neighbor +result = collection.NearestNeighbors(x => x.Prompt, 1, queryPrompt).First(); + +// hybrid query, pre-filters result set for english responses, then runs a nearest neighbors search. +result = collection.Where(x=>x.Language == "en_us").NearestNeighbors(x => x.Prompt, 1, queryPrompt).First(); + +// hybrid query, pre-filters responses newer than 4 hours, and finds first result within .15 +var ts = DateTimeOffset.Now - TimeSpan.FromHours(4); +result = collection.First(x=>x.TimeStamp > ts && x.Prompt.VectorRange(queryPrompt, .15)); +``` + +#### What Happens to the Embeddings? + +With Redis OM, the embeddings can be completely transparent to you, they are generated and bound to the `Vector` when you query/insert your vectors. If however you needed your embedding after the insertion/Query, they are available at `Vector.Embedding`, and be queried either as the raw bytes, as an array of doubles or as an array of floats (depending on your vectorizer). + +#### Configuration + +The Vectorizers provided by the `Redis.OM.Vectorizers` package have some configuration parameters that it will pull in either from your `appsettings.json` file, or your environment variables (with your appsettings taking precedence). + +| Configuration Parameter | Description | +|-------------------------------- |-----------------------------------------------| +| REDIS_OM_HF_TOKEN | HuggingFace Authorization token. | +| REDIS_OM_OAI_TOKEN | OpenAI Authorization token | +| REDIS_OM_OAI_API_URL | OpenAI URL | +| REDIS_OM_AZURE_OAI_TOKEN | Azure OpenAI api key | +| REDIS_OM_AZURE_OAI_RESOURCE_NAME | Azure resource name | +| REDIS_OM_AZURE_OAI_DEPLOYMENT_NAME | Azure deployment | + +### Semantic Caching + +Redis OM also provides the ability to use Semantic Caching, as well as providers for OpenAI, HuggingFace, and Azure OpenAI to perform semantic caching. To use a Semantic Cache, simply pull one out of the RedisConnectionProvider and use `Store` to insert items, and `GetSimilar` to retrieve items. For example: + +```cs +var cache = _provider.OpenAISemanticCache(token); +cache.Store("What is the capital of France?", "Paris"); +var res = cache.GetSimilar("What really is the capital of France?").First(); +``` + ### 🖩 Aggregations We can also run aggregations on the customer object, again using expressions in LINQ: diff --git a/src/Redis.OM.Vectorizers/AzureOpenAISentenceVectorizer.cs b/src/Redis.OM.Vectorizers/AzureOpenAIVectorizer.cs similarity index 91% rename from src/Redis.OM.Vectorizers/AzureOpenAISentenceVectorizer.cs rename to src/Redis.OM.Vectorizers/AzureOpenAIVectorizer.cs index 06359d3c..4ec34545 100644 --- a/src/Redis.OM.Vectorizers/AzureOpenAISentenceVectorizer.cs +++ b/src/Redis.OM.Vectorizers/AzureOpenAIVectorizer.cs @@ -6,13 +6,13 @@ namespace Redis.OM.Vectorizers; -public class AzureOpenAISentenceVectorizer : IVectorizer +public class AzureOpenAIVectorizer : IVectorizer { private readonly string _apiKey; private readonly string _resourceName; private readonly string _deploymentName; - public AzureOpenAISentenceVectorizer(string apiKey, string resourceName, string deploymentName, int dim) + public AzureOpenAIVectorizer(string apiKey, string resourceName, string deploymentName, int dim) { _apiKey = apiKey; _resourceName = resourceName; diff --git a/src/Redis.OM.Vectorizers/AzureOpenAISentenceVectorizerAttribute.cs b/src/Redis.OM.Vectorizers/AzureOpenAIVectorizerAttribute.cs similarity index 68% rename from src/Redis.OM.Vectorizers/AzureOpenAISentenceVectorizerAttribute.cs rename to src/Redis.OM.Vectorizers/AzureOpenAIVectorizerAttribute.cs index 907b3930..43344ed2 100644 --- a/src/Redis.OM.Vectorizers/AzureOpenAISentenceVectorizerAttribute.cs +++ b/src/Redis.OM.Vectorizers/AzureOpenAIVectorizerAttribute.cs @@ -4,15 +4,15 @@ namespace Redis.OM.Vectorizers; /// -public class AzureOpenAISentenceVectorizerAttribute : VectorizerAttribute +public class AzureOpenAIVectorizerAttribute : VectorizerAttribute { /// - public AzureOpenAISentenceVectorizerAttribute(string deploymentName, string resourceName, int dim) + public AzureOpenAIVectorizerAttribute(string deploymentName, string resourceName, int dim) { DeploymentName = deploymentName; ResourceName = resourceName; Dim = dim; - Vectorizer = new AzureOpenAISentenceVectorizer(Configuration.Instance.AzureOpenAIApiKey, ResourceName, DeploymentName, Dim); + Vectorizer = new AzureOpenAIVectorizer(Configuration.Instance.AzureOpenAIApiKey, ResourceName, DeploymentName, Dim); } /// @@ -42,7 +42,7 @@ public override byte[] Vectorize(object obj) throw new ArgumentException("Object must be a string to be embedded", nameof(obj)); } - var floats = AzureOpenAISentenceVectorizer.GetFloats(s, ResourceName, DeploymentName, Configuration.Instance.AzureOpenAIApiKey); + var floats = AzureOpenAIVectorizer.GetFloats(s, ResourceName, DeploymentName, Configuration.Instance.AzureOpenAIApiKey); return floats.SelectMany(BitConverter.GetBytes).ToArray(); } } \ No newline at end of file diff --git a/src/Redis.OM.Vectorizers/HuggingFaceApiSentenceVectorizer.cs b/src/Redis.OM.Vectorizers/HuggingFaceVectorizer.cs similarity index 91% rename from src/Redis.OM.Vectorizers/HuggingFaceApiSentenceVectorizer.cs rename to src/Redis.OM.Vectorizers/HuggingFaceVectorizer.cs index ada56754..c07224c3 100644 --- a/src/Redis.OM.Vectorizers/HuggingFaceApiSentenceVectorizer.cs +++ b/src/Redis.OM.Vectorizers/HuggingFaceVectorizer.cs @@ -5,9 +5,9 @@ namespace Redis.OM.Vectorizers; -public class HuggingFaceApiSentenceVectorizer : IVectorizer +public class HuggingFaceVectorizer : IVectorizer { - public HuggingFaceApiSentenceVectorizer(string authToken, string modelId, int dim) + public HuggingFaceVectorizer(string authToken, string modelId, int dim) { _huggingFaceAuthToken = authToken; ModelId = modelId; diff --git a/src/Redis.OM.Vectorizers/HuggingFaceApiSentenceVectorizerAttribute.cs b/src/Redis.OM.Vectorizers/HuggingFaceVectorizerAttribute.cs similarity index 85% rename from src/Redis.OM.Vectorizers/HuggingFaceApiSentenceVectorizerAttribute.cs rename to src/Redis.OM.Vectorizers/HuggingFaceVectorizerAttribute.cs index e18ed559..85a42af7 100644 --- a/src/Redis.OM.Vectorizers/HuggingFaceApiSentenceVectorizerAttribute.cs +++ b/src/Redis.OM.Vectorizers/HuggingFaceVectorizerAttribute.cs @@ -7,7 +7,7 @@ namespace Redis.OM.Vectorizers; /// /// An attribute that provides a Hugging Face API Sentence Vectorizer. /// -public class HuggingFaceApiSentenceVectorizerAttribute : VectorizerAttribute +public class HuggingFaceVectorizerAttribute : VectorizerAttribute { public string? ModelId { get; set; } @@ -26,7 +26,7 @@ public override IVectorizer Vectorizer throw new InvalidOperationException("Need a Model ID in order to process vector"); } - _vectorizer = new HuggingFaceApiSentenceVectorizer(Configuration.Instance.HuggingFaceAuthorizationToken, ModelId, Dim); + _vectorizer = new HuggingFaceVectorizer(Configuration.Instance.HuggingFaceAuthorizationToken, ModelId, Dim); } return _vectorizer; @@ -73,6 +73,6 @@ public float[] GetFloats(string s) { var modelId = ModelId ?? Configuration.Instance["REDIS_OM_HF_MODEL_ID"]; if (modelId is null) throw new InvalidOperationException("Model Id Required to use Hugging Face API."); - return HuggingFaceApiSentenceVectorizer.GetFloats(s, modelId, Configuration.Instance.HuggingFaceAuthorizationToken); + return HuggingFaceVectorizer.GetFloats(s, modelId, Configuration.Instance.HuggingFaceAuthorizationToken); } } \ No newline at end of file diff --git a/src/Redis.OM.Vectorizers/OpenAISentenceVectorizer.cs b/src/Redis.OM.Vectorizers/OpenAIVectorizer.cs similarity index 91% rename from src/Redis.OM.Vectorizers/OpenAISentenceVectorizer.cs rename to src/Redis.OM.Vectorizers/OpenAIVectorizer.cs index 93dcf51d..5851fe82 100644 --- a/src/Redis.OM.Vectorizers/OpenAISentenceVectorizer.cs +++ b/src/Redis.OM.Vectorizers/OpenAIVectorizer.cs @@ -5,12 +5,12 @@ namespace Redis.OM.Vectorizers; -public class OpenAISentenceVectorizer : IVectorizer +public class OpenAIVectorizer : IVectorizer { private readonly string _openAIAuthToken; private readonly string _model; - public OpenAISentenceVectorizer(string openAIAuthToken, string model = "text-embedding-ada-002", int dim = 1536) + public OpenAIVectorizer(string openAIAuthToken, string model = "text-embedding-ada-002", int dim = 1536) { _openAIAuthToken = openAIAuthToken; _model = model; diff --git a/src/Redis.OM.Vectorizers/OpenAISentenceVectorizerAttribute.cs b/src/Redis.OM.Vectorizers/OpenAIVectorizerAttribute.cs similarity index 75% rename from src/Redis.OM.Vectorizers/OpenAISentenceVectorizerAttribute.cs rename to src/Redis.OM.Vectorizers/OpenAIVectorizerAttribute.cs index 177dd272..026e5718 100644 --- a/src/Redis.OM.Vectorizers/OpenAISentenceVectorizerAttribute.cs +++ b/src/Redis.OM.Vectorizers/OpenAIVectorizerAttribute.cs @@ -6,7 +6,7 @@ namespace Redis.OM.Vectorizers; /// /// An OpenAI Sentence Vectorizer. /// -public class OpenAISentenceVectorizerAttribute : VectorizerAttribute +public class OpenAIVectorizerAttribute : VectorizerAttribute { private const string DefaultModel = "text-embedding-ada-002"; @@ -28,7 +28,7 @@ public override IVectorizer Vectorizer { get { - return _vectorizer ??= new OpenAISentenceVectorizer(Configuration.Instance.OpenAiAuthorizationToken, ModelId, Dim); + return _vectorizer ??= new OpenAIVectorizer(Configuration.Instance.OpenAiAuthorizationToken, ModelId, Dim); } } @@ -42,6 +42,6 @@ public override byte[] Vectorize(object obj) internal float[] GetFloats(string s) { - return OpenAISentenceVectorizer.GetFloats(s, ModelId, Configuration.Instance.OpenAiAuthorizationToken); + return OpenAIVectorizer.GetFloats(s, ModelId, Configuration.Instance.OpenAiAuthorizationToken); } } \ No newline at end of file diff --git a/src/Redis.OM.Vectorizers/RedisConnectionProviderExtensions.cs b/src/Redis.OM.Vectorizers/RedisConnectionProviderExtensions.cs index 7cc54069..4540b42d 100644 --- a/src/Redis.OM.Vectorizers/RedisConnectionProviderExtensions.cs +++ b/src/Redis.OM.Vectorizers/RedisConnectionProviderExtensions.cs @@ -6,7 +6,7 @@ public static class RedisConnectionProviderExtensions { public static ISemanticCache HuggingFaceSemanticCache(this IRedisConnectionProvider provider, string huggingFaceAuthToken, double threshold = .15, string modelId = "sentence-transformers/all-mpnet-base-v2", int dim = 768, string indexName = "HuggingFaceSemanticCache", string? prefix = null, long? ttl = null) { - var vectorizer = new HuggingFaceApiSentenceVectorizer(huggingFaceAuthToken, modelId, dim); + var vectorizer = new HuggingFaceVectorizer(huggingFaceAuthToken, modelId, dim); var connection = provider.Connection; var info = connection.GetIndexInfo(indexName); var cache = new SemanticCache(indexName, prefix ?? indexName, threshold, ttl, vectorizer, connection); @@ -20,7 +20,7 @@ public static ISemanticCache HuggingFaceSemanticCache(this IRedisConnectionProvi public static ISemanticCache OpenAISemanticCache(this IRedisConnectionProvider provider, string openAIAuthToken, double threshold = .15, string indexName = "OpenAISemanticCache", string? prefix = null, long? ttl = null) { - var vectorizer = new OpenAISentenceVectorizer(openAIAuthToken); + var vectorizer = new OpenAIVectorizer(openAIAuthToken); var connection = provider.Connection; var info = connection.GetIndexInfo(indexName); var cache = new SemanticCache(indexName, prefix ?? indexName, threshold, ttl, vectorizer, connection); @@ -34,7 +34,7 @@ public static ISemanticCache OpenAISemanticCache(this IRedisConnectionProvider p public static ISemanticCache AzureOpenAISemanticCache(this IRedisConnectionProvider provider, string apiKey, string resourceName, string deploymentId, int dim, double threshold = .15, string indexName = "AzureOpenAISemanticCache", string? prefix = null, long? ttl = null) { - var vectorizer = new AzureOpenAISentenceVectorizer(apiKey, resourceName, deploymentId, dim); + var vectorizer = new AzureOpenAIVectorizer(apiKey, resourceName, deploymentId, dim); var connection = provider.Connection; var cache = new SemanticCache(indexName, prefix ?? indexName, threshold, ttl, vectorizer, connection); var info = connection.GetIndexInfo(indexName); diff --git a/test/Redis.OM.Unit.Tests/RediSearchTests/VectorTests/HuggingFaceVectors.cs b/test/Redis.OM.Unit.Tests/RediSearchTests/VectorTests/HuggingFaceVectors.cs index df69cfa4..585de4fc 100644 --- a/test/Redis.OM.Unit.Tests/RediSearchTests/VectorTests/HuggingFaceVectors.cs +++ b/test/Redis.OM.Unit.Tests/RediSearchTests/VectorTests/HuggingFaceVectors.cs @@ -11,7 +11,7 @@ public class HuggingFaceVectors public string Id { get; set; } [Indexed] - [HuggingFaceApiSentenceVectorizer(ModelId = "sentence-transformers/all-MiniLM-L6-v2")] + [HuggingFaceVectorizer(ModelId = "sentence-transformers/all-MiniLM-L6-v2")] public Vector Sentence { get; set; } [Indexed] diff --git a/test/Redis.OM.Unit.Tests/RediSearchTests/VectorTests/OpenAIQuery.cs b/test/Redis.OM.Unit.Tests/RediSearchTests/VectorTests/OpenAIQuery.cs new file mode 100644 index 00000000..399ec004 --- /dev/null +++ b/test/Redis.OM.Unit.Tests/RediSearchTests/VectorTests/OpenAIQuery.cs @@ -0,0 +1,24 @@ +using System; +using Redis.OM.Modeling; +using Redis.OM.Vectorizers; + +namespace Redis.OM.Unit.Tests; + +[Document(StorageType = StorageType.Json)] +public class OpenAIQuery +{ + [RedisIdField] + public string Id { get; set; } + + [Indexed(DistanceMetric = DistanceMetric.COSINE)] + [OpenAIVectorizer] + public Vector Prompt { get; set; } + + public string Response { get; set; } + + [Indexed] + public string Language { get; set; } + + [Indexed] + public DateTime TimeStamp { get; set; } +} \ No newline at end of file diff --git a/test/Redis.OM.Unit.Tests/RediSearchTests/VectorTests/OpenAIVectors.cs b/test/Redis.OM.Unit.Tests/RediSearchTests/VectorTests/OpenAIVectors.cs index 6ec4d647..c99df107 100644 --- a/test/Redis.OM.Unit.Tests/RediSearchTests/VectorTests/OpenAIVectors.cs +++ b/test/Redis.OM.Unit.Tests/RediSearchTests/VectorTests/OpenAIVectors.cs @@ -11,7 +11,7 @@ public class OpenAIVectors public string Id { get; set; } [Indexed] - [OpenAISentenceVectorizer] + [OpenAIVectorizer] public Vector Sentence { get; set; } [Indexed] diff --git a/test/Redis.OM.Unit.Tests/RediSearchTests/VectorTests/SemanticCachingTests.cs b/test/Redis.OM.Unit.Tests/RediSearchTests/VectorTests/SemanticCachingTests.cs index 523bd684..674bb439 100644 --- a/test/Redis.OM.Unit.Tests/RediSearchTests/VectorTests/SemanticCachingTests.cs +++ b/test/Redis.OM.Unit.Tests/RediSearchTests/VectorTests/SemanticCachingTests.cs @@ -32,7 +32,7 @@ public void HuggingFaceSemanticCache() { var token = Environment.GetEnvironmentVariable("REDIS_OM_HF_TOKEN"); Assert.NotNull(token); - var cache = _provider.HuggingFaceSemanticCache(token); + var cache = _provider.HuggingFaceSemanticCache(token, threshold: .15); cache.Store("What is the capital of France?", "Paris"); var res = cache.GetSimilar("What really is the capital of France?").First(); Assert.Equal("Paris",res.Response); diff --git a/test/Redis.OM.Unit.Tests/RediSearchTests/VectorTests/VectorFunctionalTests.cs b/test/Redis.OM.Unit.Tests/RediSearchTests/VectorTests/VectorFunctionalTests.cs index 3c8a9a7d..7d002b46 100644 --- a/test/Redis.OM.Unit.Tests/RediSearchTests/VectorTests/VectorFunctionalTests.cs +++ b/test/Redis.OM.Unit.Tests/RediSearchTests/VectorTests/VectorFunctionalTests.cs @@ -308,4 +308,43 @@ public void Insert() Assert.Equal(simpleVectorizedVector.Value, res.SimpleVectorizedVector.Value); Assert.Equal(simpleVectorizedVector.Embedding, res.SimpleVectorizedVector.Embedding); } + + [Fact] + public void OpenAIQueryTest() + { + _connection.DropIndexAndAssociatedRecords(typeof(OpenAIQuery)); + _connection.CreateIndex(typeof(OpenAIQuery)); + + var collection = new RedisCollection(_connection); + var query = new OpenAIQuery + { + Language = "en_us", + Prompt = Vector.Of("What is the Capital of France?"), + Response = "Paris", + TimeStamp = DateTime.Now - TimeSpan.FromHours(3) + }; + collection.Insert(query); + var queryPrompt = Vector.Of("What really is the Capital of France?"); + var result = collection.First(x => x.Prompt.VectorRange(queryPrompt, .15)); + + Assert.Equal("Paris", result.Response); + Assert.NotNull(queryPrompt.Embedding); + + result = collection.NearestNeighbors(x => x.Prompt, 1, queryPrompt).First(); + Assert.Equal("Paris", result.Response); + Assert.NotNull(queryPrompt.Embedding); + + result = collection.Where(x=>x.Language == "en_us").NearestNeighbors(x => x.Prompt, 1, queryPrompt).First(); + Assert.Equal("Paris", result.Response); + Assert.NotNull(queryPrompt.Embedding); + + result = collection.First(x=>x.Language == "en_us" && x.Prompt.VectorRange(queryPrompt, .15)); + Assert.Equal("Paris", result.Response); + Assert.NotNull(queryPrompt.Embedding); + + var ts = DateTimeOffset.Now - TimeSpan.FromHours(4); + result = collection.First(x=>x.TimeStamp > ts && x.Prompt.VectorRange(queryPrompt, .15)); + Assert.Equal("Paris", result.Response); + Assert.NotNull(queryPrompt.Embedding); + } } \ No newline at end of file