From 75ffa4bb0647ff042aa93ad02f99b7edef6ccf06 Mon Sep 17 00:00:00 2001 From: Yaliang Wu Date: Tue, 28 May 2024 14:56:39 -0700 Subject: [PATCH] add titan embeeding v2 to blueprint (#2480) Signed-off-by: Yaliang Wu (cherry picked from commit 9b072c43fa99c0dfb86373e5ae95c86ccc94233f) --- ...ock_connector_titan_embedding_blueprint.md | 45 +++++++++++++++---- ...enerate_embeddings_for_arrays_of_object.md | 3 +- 2 files changed, 38 insertions(+), 10 deletions(-) diff --git a/docs/remote_inference_blueprints/bedrock_connector_titan_embedding_blueprint.md b/docs/remote_inference_blueprints/bedrock_connector_titan_embedding_blueprint.md index 97002fb934..299ab321d9 100644 --- a/docs/remote_inference_blueprints/bedrock_connector_titan_embedding_blueprint.md +++ b/docs/remote_inference_blueprints/bedrock_connector_titan_embedding_blueprint.md @@ -19,6 +19,7 @@ PUT /_cluster/settings If you are using self-managed Opensearch, you should supply AWS credentials: +If you are using Titan Text Embedding V2, change "model" to `amazon.titan-embed-text-v2:0` ```json POST /_plugins/_ml/connectors/_create { @@ -28,7 +29,8 @@ POST /_plugins/_ml/connectors/_create "protocol": "aws_sigv4", "parameters": { "region": "", - "service_name": "bedrock" + "service_name": "bedrock", + "model": "amazon.titan-embed-text-v1" }, "credential": { "access_key": "", @@ -39,14 +41,14 @@ POST /_plugins/_ml/connectors/_create { "action_type": "predict", "method": "POST", - "url": "https://bedrock-runtime.us-east-1.amazonaws.com/model/amazon.titan-embed-text-v1/invoke", + "url": "https://bedrock-runtime.${parameters.region}.amazonaws.com/model/${parameters.model}/invoke", "headers": { "content-type": "application/json", "x-amz-content-sha256": "required" }, "request_body": "{ \"inputText\": \"${parameters.inputText}\" }", - "pre_process_function": "\n StringBuilder builder = new StringBuilder();\n builder.append(\"\\\"\");\n String first = params.text_docs[0];\n builder.append(first);\n builder.append(\"\\\"\");\n def parameters = \"{\" +\"\\\"inputText\\\":\" + builder + \"}\";\n return \"{\" +\"\\\"parameters\\\":\" + parameters + \"}\";", - "post_process_function": "\n def name = \"sentence_embedding\";\n def dataType = \"FLOAT32\";\n if (params.embedding == null || params.embedding.length == 0) {\n return params.message;\n }\n def shape = [params.embedding.length];\n def json = \"{\" +\n \"\\\"name\\\":\\\"\" + name + \"\\\",\" +\n \"\\\"data_type\\\":\\\"\" + dataType + \"\\\",\" +\n \"\\\"shape\\\":\" + shape + \",\" +\n \"\\\"data\\\":\" + params.embedding +\n \"}\";\n return json;\n " + "pre_process_function": "connector.pre_process.bedrock.embedding", + "post_process_function": "connector.post_process.bedrock.embedding" } ] } @@ -64,7 +66,8 @@ POST /_plugins/_ml/connectors/_create "protocol": "aws_sigv4", "parameters": { "region": "", - "service_name": "bedrock" + "service_name": "bedrock", + "model": "amazon.titan-embed-text-v1" }, "credential": { "roleArn": "" @@ -79,8 +82,8 @@ POST /_plugins/_ml/connectors/_create "x-amz-content-sha256": "required" }, "request_body": "{ \"inputText\": \"${parameters.inputText}\" }", - "pre_process_function": "\n StringBuilder builder = new StringBuilder();\n builder.append(\"\\\"\");\n String first = params.text_docs[0];\n builder.append(first);\n builder.append(\"\\\"\");\n def parameters = \"{\" +\"\\\"inputText\\\":\" + builder + \"}\";\n return \"{\" +\"\\\"parameters\\\":\" + parameters + \"}\";", - "post_process_function": "\n def name = \"sentence_embedding\";\n def dataType = \"FLOAT32\";\n if (params.embedding == null || params.embedding.length == 0) {\n return params.message;\n }\n def shape = [params.embedding.length];\n def json = \"{\" +\n \"\\\"name\\\":\\\"\" + name + \"\\\",\" +\n \"\\\"data_type\\\":\\\"\" + dataType + \"\\\",\" +\n \"\\\"shape\\\":\" + shape + \",\" +\n \"\\\"data\\\":\" + params.embedding +\n \"}\";\n return json;\n " + "pre_process_function": "connector.pre_process.bedrock.embedding", + "post_process_function": "connector.post_process.bedrock.embedding" } ] } @@ -151,7 +154,7 @@ POST /_plugins/_ml/models/sKR9PIsBQRofe4CSlUov/_predict } ``` -Sample response: +Sample response of Titan Text Embedding V1: ```json { "inference_results": [ @@ -177,3 +180,29 @@ Sample response: } ``` +Sample response of Titan Text Embedding V2: +```json +{ + "inference_results": [ + { + "output": [ + { + "name": "sentence_embedding", + "data_type": "FLOAT32", + "shape": [ + 1024 + ], + "data": [ + -0.041385926, + 0.08503958, + 0.0026220535, + ... + ] + } + ], + "status_code": 200 + } + ] +} +``` + diff --git a/docs/tutorials/semantic_search/generate_embeddings_for_arrays_of_object.md b/docs/tutorials/semantic_search/generate_embeddings_for_arrays_of_object.md index 0e52f6e03e..0f6ed31542 100644 --- a/docs/tutorials/semantic_search/generate_embeddings_for_arrays_of_object.md +++ b/docs/tutorials/semantic_search/generate_embeddings_for_arrays_of_object.md @@ -81,7 +81,7 @@ PUT my_books Create sub-pipeline to generate embedding for one item in the array. This pipeline contains 3 processors -- set processor: The `text_embedding` processor is unable to identify "_ingest._value.title". You need to copy "_ingest._value.title" to a temporary field for text_embedding to process it. +- set processor: The `text_embedding` processor is unable to identify "_ingest._value.title". You need to copy "_ingest._value.title" to a non-existing temporary field for text_embedding to process it. - text_embedding processor: convert value of the temporary field to embedding - remove processor: remove temporary field ``` @@ -228,7 +228,6 @@ Response "description": "This is first book" }, { - "title": "second book", "description": "This is second book" } ]