Skip to content

Commit

Permalink
removing warnings
Browse files Browse the repository at this point in the history
  • Loading branch information
slorello89 committed Dec 4, 2023
1 parent 59b2b13 commit 843a81d
Show file tree
Hide file tree
Showing 17 changed files with 196 additions and 31 deletions.
22 changes: 17 additions & 5 deletions src/Redis.OM.Vectorizers.AllMiniLML6V2/SentenceVectorizer.cs
Original file line number Diff line number Diff line change
Expand Up @@ -7,9 +7,15 @@

namespace Redis.OM.Vectorizers.AllMiniLML6V2;

/// <summary>
/// A vectorizer to Vectorize sentences using ALl Mini LM L6 V2 Model.
/// </summary>
public class SentenceVectorizer : IVectorizer<string>
{
/// <inheritdoc />
public VectorType VectorType => VectorType.FLOAT32;

/// <inheritdoc />
public int Dim => 384;
private static Lazy<TokenizerBase> Tokenizer => new Lazy<TokenizerBase>(AllMiniLML6V2Tokenizer.Create);
private static Lazy<InferenceSession> InferenceSession => new Lazy<InferenceSession>(LoadInferenceSession);
Expand All @@ -27,14 +33,20 @@ private static InferenceSession LoadInferenceSession()
_ = stream.Read(resourceBytes, 0, resourceBytes.Length);
return new InferenceSession(resourceBytes);
}


/// <inheritdoc />
public byte[] Vectorize(string obj)
{
return Encode(new[] { obj })[0].SelectMany(BitConverter.GetBytes).ToArray();
}

private static Lazy<string[]> OutputNames => new (() => InferenceSession.Value.OutputMetadata.Keys.ToArray());

/// <summary>
/// Vectorizers an array of sentences (which are vectorized individually).
/// </summary>
/// <param name="sentences">The Sentences</param>
/// <returns></returns>
public static float[][] Encode(string[] sentences)
{
const int MaxTokens = 512;
Expand All @@ -50,7 +62,7 @@ public static float[][] Encode(string[] sentences)

var tokenIndexes = tokens.Take(MaxTokens).Select(token => (long)token.VocabularyIndex).Concat(padding).ToArray();
var segmentIndexes = tokens.Take(MaxTokens).Select(token => token.SegmentIndex).Concat(padding).ToArray();
var inputMask = tokens.Take(MaxTokens).Select(o => 1L).Concat(padding).ToArray();
var inputMask = tokens.Take(MaxTokens).Select(_ => 1L).Concat(padding).ToArray();
return (tokenIndexes, TokenTypeIds: segmentIndexes, inputMask);
}).ToList();
var tokenCount = encoded.First().InputIds.Length;
Expand All @@ -77,7 +89,7 @@ public static float[][] Encode(string[] sentences)

var dimensions = new[] { numSentences, tokenCount };

var input = new NamedOnnxValue[3]
var input = new []
{
NamedOnnxValue.CreateFromTensor("input_ids", new DenseTensor<long>(flattenIDs, dimensions)),
NamedOnnxValue.CreateFromTensor("attention_mask", new DenseTensor<long>(flattenAttentionMask,dimensions)),
Expand Down Expand Up @@ -109,7 +121,7 @@ public static float[][] Encode(string[] sentences)
return outputFlatten;
}

public static DenseTensor<float> Normalize(DenseTensor<float> input_dense, float eps = 1e-12f)
internal static DenseTensor<float> Normalize(DenseTensor<float> input_dense, float eps = 1e-12f)
{
//Computes sum(abs(x)^2)^(1/2)

Expand Down Expand Up @@ -142,7 +154,7 @@ public static DenseTensor<float> Normalize(DenseTensor<float> input_dense, float
}


public static DenseTensor<float> MeanPooling(DenseTensor<float> token_embeddings_dense, List<(long[] InputIds, long[] TokenTypeIds, long[] AttentionMask)> encodedSentences, float eps = 1e-9f)
internal static DenseTensor<float> MeanPooling(DenseTensor<float> token_embeddings_dense, List<(long[] InputIds, long[] TokenTypeIds, long[] AttentionMask)> encodedSentences, float eps = 1e-9f)
{
var sentencesCount = token_embeddings_dense.Dimensions[0];
var sentenceLength = token_embeddings_dense.Dimensions[1];
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -3,11 +3,20 @@

namespace Redis.OM.Vectorizers.AllMiniLML6V2;

/// <summary>
///
/// </summary>
public class SentenceVectorizerAttribute : VectorizerAttribute<string>
{
/// <inheritdoc />
public override VectorType VectorType => Vectorizer.VectorType;

/// <inheritdoc />
public override int Dim => Vectorizer.Dim;

/// <inheritdoc />
public override byte[] Vectorize(object obj) => Vectorizer.Vectorize((string)obj);

/// <inheritdoc />
public override IVectorizer<string> Vectorizer => new SentenceVectorizer();
}
Original file line number Diff line number Diff line change
Expand Up @@ -8,7 +8,7 @@ protected CasedTokenizer(string[] vocabulary) : base(vocabulary)

protected override IEnumerable<string> TokenizeSentence(string text)
{
return text.Split(new string[] { " ", " ", "\r\n" }, StringSplitOptions.None)
return text.Split(new [] { " ", " ", "\r\n" }, StringSplitOptions.None)
.SelectMany(o => o.SplitAndKeep(".,;:\\/?!#$%()=+-*\"'–_`<>&^@{}[]|~'".ToArray()));
}
}
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
namespace Redis.OM.Vectorizers.AllMiniLML6V2.Tokenizers;

public class Tokens
internal class Tokens
{
public const string Padding = "";
public const string Unknown = "[UNK]";
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -8,7 +8,7 @@ public UncasedTokenizer(string[] vocabulary) : base(vocabulary)

protected override IEnumerable<string> TokenizeSentence(string text)
{
return text.Split(new string[] { " ", " ", "\r\n" }, StringSplitOptions.None)
return text.Split(new [] { " ", " ", "\r\n" }, StringSplitOptions.None)
.SelectMany(o => o.SplitAndKeep(".,;:\\/?!#$%()=+-*\"'–_`<>&^@{}[]|~'".ToArray()))
.Select(o => o.ToLower());
}
Expand Down
14 changes: 12 additions & 2 deletions src/Redis.OM.Vectorizers.Resnet18/ImageModelObjects.cs
Original file line number Diff line number Diff line change
Expand Up @@ -3,14 +3,24 @@

namespace Redis.OM.Vectorizers.Resnet18;

public class ImageInput
internal class ImageInput
{
[ColumnName(@"ImageSource")]
public string ImageSource { get; set; }

public ImageInput(string imageSource)
{
ImageSource = imageSource;
}
}

public class InMemoryImageData
internal class InMemoryImageData
{
[ImageType(224,224)]
public MLImage Image;

public InMemoryImageData(MLImage image)
{
Image = image;
}
}
17 changes: 11 additions & 6 deletions src/Redis.OM.Vectorizers.Resnet18/ImageVectorizer.cs
Original file line number Diff line number Diff line change
Expand Up @@ -31,7 +31,7 @@ public byte[] Vectorize(string obj)
};
var imageStream = Configuration.Instance.Client.Send(request).Content.ReadAsStream();
var image = MLImage.CreateFromStream(imageStream);
var vector = VectorizeBitMaps(new [] { image })[0].SelectMany(BitConverter.GetBytes).ToArray();
var vector = VectorizeImages(new [] { image })[0].SelectMany(BitConverter.GetBytes).ToArray();
return vector;
}

Expand All @@ -44,7 +44,7 @@ public byte[] Vectorize(string obj)
return VectorizeFiles(new[] { obj })[0].SelectMany(BitConverter.GetBytes).ToArray();
}

private static Lazy<EstimatorChain<TransformerChain<ColumnCopyingTransformer>>> FilePipeline = new(CreateFilePipeline);
private static readonly Lazy<EstimatorChain<TransformerChain<ColumnCopyingTransformer>>> FilePipeline = new(CreateFilePipeline);

private static readonly Lazy<MLContext> MlContext = new(()=>new MLContext());

Expand All @@ -68,7 +68,7 @@ private static EstimatorChain<TransformerChain<ColumnCopyingTransformer>> Create
/// <returns></returns>
public static float[][] VectorizeFiles(IEnumerable<string> imagePaths)
{
var images = imagePaths.Select(x => new ImageInput { ImageSource = x });
var images = imagePaths.Select(x => new ImageInput(x));
var mlContext = MlContext.Value;
var dataView = mlContext.Data.LoadFromEnumerable(images);

Expand All @@ -77,7 +77,7 @@ public static float[][] VectorizeFiles(IEnumerable<string> imagePaths)
return vector;
}

public static Lazy<EstimatorChain<TransformerChain<ColumnCopyingTransformer>>> BitmapPipeline = new(CreateBitmapPipeline);
private static readonly Lazy<EstimatorChain<TransformerChain<ColumnCopyingTransformer>>> BitmapPipeline = new(CreateBitmapPipeline);

private static EstimatorChain<TransformerChain<ColumnCopyingTransformer>> CreateBitmapPipeline()
{
Expand All @@ -91,9 +91,14 @@ private static EstimatorChain<TransformerChain<ColumnCopyingTransformer>> Create
return pipeline;
}

public static float[][] VectorizeBitMaps(IEnumerable<MLImage> mlImages)
/// <summary>
/// Encodes a collection of images.
/// </summary>
/// <param name="mlImages"></param>
/// <returns></returns>
public static float[][] VectorizeImages(IEnumerable<MLImage> mlImages)
{
var images = mlImages.Select(x => new InMemoryImageData { Image = x });
var images = mlImages.Select(x => new InMemoryImageData(x));
var mlContext = MlContext.Value;
var dataView = mlContext.Data.LoadFromEnumerable(images);
var transformedData = BitmapPipeline.Value.Fit(dataView).Transform(dataView);
Expand Down
10 changes: 10 additions & 0 deletions src/Redis.OM.Vectorizers.Resnet18/ImageVectorizerAttribute.cs
Original file line number Diff line number Diff line change
Expand Up @@ -3,10 +3,20 @@

namespace Redis.OM.Vectorizers.Resnet18;

/// <summary>
/// A Vectorizer Attribute for encoding images
/// </summary>
public class ImageVectorizerAttribute : VectorizerAttribute<string>
{
/// <inheritdoc />
public override VectorType VectorType => Vectorizer.VectorType;

/// <inheritdoc />
public override int Dim => Vectorizer.Dim;

/// <inheritdoc />
public override byte[] Vectorize(object obj) => Vectorizer.Vectorize((string)obj);

/// <inheritdoc />
public override IVectorizer<string> Vectorizer { get; } = new ImageVectorizer();
}
15 changes: 15 additions & 0 deletions src/Redis.OM.Vectorizers/AzureOpenAIVectorizer.cs
Original file line number Diff line number Diff line change
Expand Up @@ -6,12 +6,22 @@

namespace Redis.OM.Vectorizers;

/// <summary>
/// Vectorizer for Azure's OpenAI REST API
/// </summary>
public class AzureOpenAIVectorizer : IVectorizer<string>
{
private readonly string _apiKey;
private readonly string _resourceName;
private readonly string _deploymentName;

/// <summary>
/// Initializes vectorizer
/// </summary>
/// <param name="apiKey">The Vectorizers API Key</param>
/// <param name="resourceName">The Azure Resource Name.</param>
/// <param name="deploymentName">The Azure Deployment Name.</param>
/// <param name="dim">The dimensions of the model addressed by this resource/deployment.</param>
public AzureOpenAIVectorizer(string apiKey, string resourceName, string deploymentName, int dim)
{
_apiKey = apiKey;
Expand All @@ -20,8 +30,13 @@ public AzureOpenAIVectorizer(string apiKey, string resourceName, string deployme
Dim = dim;
}

/// <inheritdoc />
public VectorType VectorType => VectorType.FLOAT32;

/// <inheritdoc />
public int Dim { get; }

/// <inheritdoc />
public byte[] Vectorize(string str) => GetFloats(str, _resourceName, _deploymentName, _apiKey).SelectMany(BitConverter.GetBytes).ToArray();

internal static float[] GetFloats(string s, string resourceName, string deploymentName, string apiKey)
Expand Down
42 changes: 40 additions & 2 deletions src/Redis.OM.Vectorizers/Configuration.cs
Original file line number Diff line number Diff line change
@@ -1,29 +1,67 @@
using System.Net.Http.Headers;
using System.Runtime.CompilerServices;
using Microsoft.Extensions.Configuration;

[assembly: InternalsVisibleTo("Redis.OM.Vectorizers.Resnet18")]
namespace Redis.OM;

public class Configuration
/// <summary>
/// Some Configuration Items.
/// </summary>
internal class Configuration
{
/// <summary>
/// Gets the configuration item at the given key.
/// </summary>
/// <param name="str"></param>
public string? this[string str] => _settings[str];

/// <summary>
/// The bearer authorization token for Hugging Face's model API.
/// </summary>
public string HuggingFaceAuthorizationToken => _settings["REDIS_OM_HF_TOKEN"] ?? string.Empty;

/// <summary>
/// Bearer token for Open AI's API.
/// </summary>
public string OpenAiAuthorizationToken => _settings["REDIS_OM_OAI_TOKEN"] ?? string.Empty;

/// <summary>
/// Azure OpenAI Api Key.
/// </summary>
public string AzureOpenAIApiKey => _settings["REDIS_OM_AZURE_OAI_TOKEN"] ?? string.Empty;

/// <summary>
/// Hugging Face Model Id
/// </summary>
public string ModelId => _settings["REDIS_OM_HF_MODEL_ID"] ?? string.Empty;

/// <summary>
/// Base Address for Hugging Face Feature Extraction API
/// </summary>
public string HuggingFaceBaseAddress => _settings["REDIS_OM_HF_FEATURE_EXTRACTION_URL"] ?? string.Empty;

private const string DefaultHuggingFaceApiUrl = "https://api-inference.huggingface.co";

private const string DefaultOpenAiApiUrl = "https://api.openai.com";

/// <summary>
/// URL for OpenAI API.
/// </summary>
public string OpenAiApiUrl => _settings["REDIS_OM_OAI_API_URL"] ?? String.Empty;

private readonly IConfiguration _settings;

private static readonly object LockObject = new ();
private static Configuration? _instance;

/// <summary>
/// Common HTTP Client.
/// </summary>
public readonly HttpClient Client;

/// <summary>
/// Singleton Instance.
/// </summary>
public static Configuration Instance
{
get
Expand Down
30 changes: 28 additions & 2 deletions src/Redis.OM.Vectorizers/HuggingFaceVectorizer.cs
Original file line number Diff line number Diff line change
Expand Up @@ -5,8 +5,17 @@

namespace Redis.OM.Vectorizers;

/// <summary>
/// Vectorizer for HuggingFace API.
/// </summary>
public class HuggingFaceVectorizer : IVectorizer<string>
{
/// <summary>
/// Initializes the Vectorizer.
/// </summary>
/// <param name="authToken">Auth token.</param>
/// <param name="modelId">Model Id.</param>
/// <param name="dim">Dimensions for the output tensors of the model.</param>
public HuggingFaceVectorizer(string authToken, string modelId, int dim)
{
_huggingFaceAuthToken = authToken;
Expand All @@ -15,16 +24,33 @@ public HuggingFaceVectorizer(string authToken, string modelId, int dim)
}

private readonly string _huggingFaceAuthToken;

/// <summary>
/// The Model Id.
/// </summary>
public string ModelId { get; }

/// <inheritdoc />
public VectorType VectorType => VectorType.FLOAT32;


/// <inheritdoc />
public int Dim { get; }

/// <inheritdoc />
public byte[] Vectorize(string str)
{
return GetFloats(str, ModelId, _huggingFaceAuthToken).SelectMany(BitConverter.GetBytes).ToArray();
}

public static float[] GetFloats(string s, string modelId, string huggingFaceAuthToken)
/// <summary>
/// Gets the floats for the sentence.
/// </summary>
/// <param name="s">the string.</param>
/// <param name="modelId">The Model Id.</param>
/// <param name="huggingFaceAuthToken">The HF token.</param>
/// <returns></returns>
/// <exception cref="Exception"></exception>
internal static float[] GetFloats(string s, string modelId, string huggingFaceAuthToken)
{
var client = Configuration.Instance.Client;
var requestContent = JsonContent.Create(new
Expand Down
Loading

0 comments on commit 843a81d

Please sign in to comment.