From 5b6dd7a62d8017aa2759360f4a1ce28f758863e4 Mon Sep 17 00:00:00 2001
From: Kyle Steckler <kylesteckler@google.com>
Date: Mon, 22 Jan 2024 18:26:04 +0000
Subject: [PATCH] add rag solution nb

---
 Makefile                                      |    7 +-
 kernels/langchain.sh                          |   33 +
 .../retrieval_augmented_generation.ipynb      | 1034 +++++++++++++++++
 3 files changed, 1073 insertions(+), 1 deletion(-)
 create mode 100755 kernels/langchain.sh
 create mode 100644 notebooks/vertex_genai/solutions/retrieval_augmented_generation.ipynb

diff --git a/Makefile b/Makefile
index 02f27d51..f1a88a1a 100644
--- a/Makefile
+++ b/Makefile
@@ -18,7 +18,8 @@ kernels: \
  reinforcement_learning_kernel \
  tf_recommenders_kernel \
  object_detection_kernel \
- pytorch_kfp_kernel
+ pytorch_kfp_kernel \
+ langchain_kernel
 
 .PHONY: clean
 clean:
@@ -41,6 +42,10 @@ install:
 precommit:
 	@pre-commit run --all-files
 
+.PHONY: langchain_kernel
+langchain_kernel:
+	./kernels/langchain.sh
+
 .PHONY: reinforcement_learning_kernel
 reinforcement_learning_kernel:
 	./kernels/reinforcement_learning.sh
diff --git a/kernels/langchain.sh b/kernels/langchain.sh
new file mode 100755
index 00000000..60a854c7
--- /dev/null
+++ b/kernels/langchain.sh
@@ -0,0 +1,33 @@
+#!/bin/bash
+#
+# To build the kernel:  ./kernels/object_detection.sh
+# To remove the kernel: ./kernels/object_detection.sh remove
+#
+# This scripts will create a ipython kernel named $ENVNAME
+
+MODULE=vertex_genai
+ENVNAME=langchain_kernel
+REPO_ROOT_DIR="$(dirname $(cd $(dirname $BASH_SOURCE) && pwd))"
+
+# Cleaning up the kernel and exiting if first arg is 'remove'
+if [ "$1" == "remove" ]; then
+  echo Removing kernel $ENVNAME
+  jupyter kernelspec remove $ENVNAME
+  rm -r "$REPO_ROOT_DIR/notebooks/$MODULE/$ENVNAME"
+  exit 0
+fi
+
+cd $REPO_ROOT_DIR/notebooks/$MODULE
+
+# Setup virtual env and kernel
+python3 -m venv $ENVNAME --system-site-packages
+source $ENVNAME/bin/activate
+python -m ipykernel install --user --name=$ENVNAME
+
+pip install -q -U pip
+pip install langchain==0.0.217
+pip install wikipedia==1.4.0
+pip install chromadb==0.3.26
+pip install google-cloud-aiplatform==1.26.1
+
+deactivate
diff --git a/notebooks/vertex_genai/solutions/retrieval_augmented_generation.ipynb b/notebooks/vertex_genai/solutions/retrieval_augmented_generation.ipynb
new file mode 100644
index 00000000..9abbc620
--- /dev/null
+++ b/notebooks/vertex_genai/solutions/retrieval_augmented_generation.ipynb
@@ -0,0 +1,1034 @@
+{
+ "cells": [
+  {
+   "cell_type": "markdown",
+   "id": "26b46970-5565-49b4-ae2a-e915e90b6c2f",
+   "metadata": {},
+   "source": [
+    "# Retrieval Augmented Generation Systems\n",
+    "\n",
+    "This notebook walks through building a question/answer system that retrieves information to formulate responses, effectively grounding the LLM with specific information. A pre-trained LLM, or likely even a fine-tuned LLM will not be sufficient (in and of itself) when you want a system that understands specific, possibly private data or information that was not in its training dataset.\n",
+    "\n",
+    "In this lab you will:\n",
+    "* Learn about the different components of a retrieval augmented system\n",
+    "* Build a simple retrieval augmented generation system \n",
+    "* Use LangChain to simplify and scale the process"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "id": "ed0b2b6e-670e-49c1-ba5c-4e7bd5bc61c9",
+   "metadata": {},
+   "source": [
+    "This lab uses a special kernel with langchain dependencies. Run the cell below to create the kernel."
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 1,
+   "id": "39d15f9a-3ab7-4d99-994d-d5d235ddee3d",
+   "metadata": {
+    "tags": []
+   },
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "./kernels/langchain.sh\n",
+      "Installed kernelspec langchain_kernel in /home/jupyter/.local/share/jupyter/kernels/langchain_kernel\n",
+      "Collecting langchain==0.0.217\n",
+      "  Downloading langchain-0.0.217-py3-none-any.whl.metadata (13 kB)\n",
+      "Collecting PyYAML>=5.4.1 (from langchain==0.0.217)\n",
+      "  Downloading PyYAML-6.0.1-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl.metadata (2.1 kB)\n",
+      "Requirement already satisfied: SQLAlchemy<3,>=1.4 in /opt/conda/lib/python3.10/site-packages (from langchain==0.0.217) (2.0.25)\n",
+      "Requirement already satisfied: aiohttp<4.0.0,>=3.8.3 in /opt/conda/lib/python3.10/site-packages (from langchain==0.0.217) (3.9.1)\n",
+      "Requirement already satisfied: async-timeout<5.0.0,>=4.0.0 in /opt/conda/lib/python3.10/site-packages (from langchain==0.0.217) (4.0.3)\n",
+      "Collecting dataclasses-json<0.6.0,>=0.5.7 (from langchain==0.0.217)\n",
+      "  Downloading dataclasses_json-0.5.14-py3-none-any.whl.metadata (22 kB)\n",
+      "Collecting langchainplus-sdk>=0.0.17 (from langchain==0.0.217)\n",
+      "  Downloading langchainplus_sdk-0.0.20-py3-none-any.whl.metadata (8.7 kB)\n",
+      "Collecting numexpr<3.0.0,>=2.8.4 (from langchain==0.0.217)\n",
+      "  Downloading numexpr-2.8.8-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl.metadata (7.9 kB)\n",
+      "Requirement already satisfied: numpy<2,>=1 in /opt/conda/lib/python3.10/site-packages (from langchain==0.0.217) (1.23.5)\n",
+      "Collecting openapi-schema-pydantic<2.0,>=1.2 (from langchain==0.0.217)\n",
+      "  Downloading openapi_schema_pydantic-1.2.4-py3-none-any.whl (90 kB)\n",
+      "\u001b[2K     \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m90.0/90.0 kB\u001b[0m \u001b[31m1.8 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0ma \u001b[36m0:00:01\u001b[0m\n",
+      "\u001b[?25hRequirement already satisfied: pydantic<2,>=1 in /home/jupyter/.local/lib/python3.10/site-packages (from langchain==0.0.217) (1.10.14)\n",
+      "Requirement already satisfied: requests<3,>=2 in /opt/conda/lib/python3.10/site-packages (from langchain==0.0.217) (2.31.0)\n",
+      "Requirement already satisfied: tenacity<9.0.0,>=8.1.0 in /opt/conda/lib/python3.10/site-packages (from langchain==0.0.217) (8.2.3)\n",
+      "Requirement already satisfied: attrs>=17.3.0 in /opt/conda/lib/python3.10/site-packages (from aiohttp<4.0.0,>=3.8.3->langchain==0.0.217) (23.2.0)\n",
+      "Requirement already satisfied: multidict<7.0,>=4.5 in /opt/conda/lib/python3.10/site-packages (from aiohttp<4.0.0,>=3.8.3->langchain==0.0.217) (6.0.4)\n",
+      "Requirement already satisfied: yarl<2.0,>=1.0 in /opt/conda/lib/python3.10/site-packages (from aiohttp<4.0.0,>=3.8.3->langchain==0.0.217) (1.9.3)\n",
+      "Requirement already satisfied: frozenlist>=1.1.1 in /opt/conda/lib/python3.10/site-packages (from aiohttp<4.0.0,>=3.8.3->langchain==0.0.217) (1.4.1)\n",
+      "Requirement already satisfied: aiosignal>=1.1.2 in /opt/conda/lib/python3.10/site-packages (from aiohttp<4.0.0,>=3.8.3->langchain==0.0.217) (1.3.1)\n",
+      "Collecting marshmallow<4.0.0,>=3.18.0 (from dataclasses-json<0.6.0,>=0.5.7->langchain==0.0.217)\n",
+      "  Downloading marshmallow-3.20.2-py3-none-any.whl.metadata (7.5 kB)\n",
+      "Collecting typing-inspect<1,>=0.4.0 (from dataclasses-json<0.6.0,>=0.5.7->langchain==0.0.217)\n",
+      "  Downloading typing_inspect-0.9.0-py3-none-any.whl.metadata (1.5 kB)\n",
+      "Requirement already satisfied: typing-extensions>=4.2.0 in /opt/conda/lib/python3.10/site-packages (from pydantic<2,>=1->langchain==0.0.217) (4.9.0)\n",
+      "Requirement already satisfied: charset-normalizer<4,>=2 in /opt/conda/lib/python3.10/site-packages (from requests<3,>=2->langchain==0.0.217) (3.3.2)\n",
+      "Requirement already satisfied: idna<4,>=2.5 in /opt/conda/lib/python3.10/site-packages (from requests<3,>=2->langchain==0.0.217) (3.6)\n",
+      "Requirement already satisfied: urllib3<3,>=1.21.1 in /opt/conda/lib/python3.10/site-packages (from requests<3,>=2->langchain==0.0.217) (1.26.18)\n",
+      "Requirement already satisfied: certifi>=2017.4.17 in /opt/conda/lib/python3.10/site-packages (from requests<3,>=2->langchain==0.0.217) (2023.11.17)\n",
+      "Requirement already satisfied: greenlet!=0.4.17 in /opt/conda/lib/python3.10/site-packages (from SQLAlchemy<3,>=1.4->langchain==0.0.217) (3.0.3)\n",
+      "Requirement already satisfied: packaging>=17.0 in /opt/conda/lib/python3.10/site-packages (from marshmallow<4.0.0,>=3.18.0->dataclasses-json<0.6.0,>=0.5.7->langchain==0.0.217) (23.2)\n",
+      "Collecting mypy-extensions>=0.3.0 (from typing-inspect<1,>=0.4.0->dataclasses-json<0.6.0,>=0.5.7->langchain==0.0.217)\n",
+      "  Downloading mypy_extensions-1.0.0-py3-none-any.whl (4.7 kB)\n",
+      "Downloading langchain-0.0.217-py3-none-any.whl (1.1 MB)\n",
+      "\u001b[2K   \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m1.1/1.1 MB\u001b[0m \u001b[31m10.0 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m00:01\u001b[0m0:01\u001b[0m\n",
+      "\u001b[?25hDownloading dataclasses_json-0.5.14-py3-none-any.whl (26 kB)\n",
+      "Downloading langchainplus_sdk-0.0.20-py3-none-any.whl (25 kB)\n",
+      "Downloading numexpr-2.8.8-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl (374 kB)\n",
+      "\u001b[2K   \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m374.8/374.8 kB\u001b[0m \u001b[31m30.6 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
+      "\u001b[?25hDownloading PyYAML-6.0.1-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl (705 kB)\n",
+      "\u001b[2K   \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m705.5/705.5 kB\u001b[0m \u001b[31m41.3 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
+      "\u001b[?25hDownloading marshmallow-3.20.2-py3-none-any.whl (49 kB)\n",
+      "\u001b[2K   \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m49.4/49.4 kB\u001b[0m \u001b[31m5.2 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
+      "\u001b[?25hDownloading typing_inspect-0.9.0-py3-none-any.whl (8.8 kB)\n",
+      "Installing collected packages: PyYAML, numexpr, mypy-extensions, marshmallow, typing-inspect, openapi-schema-pydantic, langchainplus-sdk, dataclasses-json, langchain\n",
+      "  Attempting uninstall: PyYAML\n",
+      "    Found existing installation: PyYAML 5.3.1\n",
+      "    Not uninstalling pyyaml at /home/jupyter/.local/lib/python3.10/site-packages, outside environment /home/jupyter/asl-ml-immersion/notebooks/vertex_genai/langchain_kernel\n",
+      "    Can't uninstall 'PyYAML'. No files were found to uninstall.\n",
+      "\u001b[31mERROR: pip's dependency resolver does not currently take into account all the packages that are installed. This behaviour is the source of the following dependency conflicts.\n",
+      "datasets 2.14.5 requires fsspec[http]<2023.9.0,>=2023.1.0, but you have fsspec 2023.12.2 which is incompatible.\n",
+      "tf-models-official 2.12.0 requires pyyaml<6.0,>=5.1, but you have pyyaml 6.0.1 which is incompatible.\n",
+      "ydata-profiling 4.6.4 requires pydantic>=2, but you have pydantic 1.10.14 which is incompatible.\n",
+      "ydata-profiling 4.6.4 requires typeguard<5,>=4.1.2, but you have typeguard 2.13.3 which is incompatible.\u001b[0m\u001b[31m\n",
+      "\u001b[0mSuccessfully installed PyYAML-6.0.1 dataclasses-json-0.5.14 langchain-0.0.217 langchainplus-sdk-0.0.20 marshmallow-3.20.2 mypy-extensions-1.0.0 numexpr-2.8.8 openapi-schema-pydantic-1.2.4 typing-inspect-0.9.0\n",
+      "Collecting wikipedia==1.4.0\n",
+      "  Downloading wikipedia-1.4.0.tar.gz (27 kB)\n",
+      "  Preparing metadata (setup.py) ... \u001b[?25ldone\n",
+      "\u001b[?25hRequirement already satisfied: beautifulsoup4 in /opt/conda/lib/python3.10/site-packages (from wikipedia==1.4.0) (4.12.2)\n",
+      "Requirement already satisfied: requests<3.0.0,>=2.0.0 in /opt/conda/lib/python3.10/site-packages (from wikipedia==1.4.0) (2.31.0)\n",
+      "Requirement already satisfied: charset-normalizer<4,>=2 in /opt/conda/lib/python3.10/site-packages (from requests<3.0.0,>=2.0.0->wikipedia==1.4.0) (3.3.2)\n",
+      "Requirement already satisfied: idna<4,>=2.5 in /opt/conda/lib/python3.10/site-packages (from requests<3.0.0,>=2.0.0->wikipedia==1.4.0) (3.6)\n",
+      "Requirement already satisfied: urllib3<3,>=1.21.1 in /opt/conda/lib/python3.10/site-packages (from requests<3.0.0,>=2.0.0->wikipedia==1.4.0) (1.26.18)\n",
+      "Requirement already satisfied: certifi>=2017.4.17 in /opt/conda/lib/python3.10/site-packages (from requests<3.0.0,>=2.0.0->wikipedia==1.4.0) (2023.11.17)\n",
+      "Requirement already satisfied: soupsieve>1.2 in /opt/conda/lib/python3.10/site-packages (from beautifulsoup4->wikipedia==1.4.0) (2.5)\n",
+      "Building wheels for collected packages: wikipedia\n",
+      "  Building wheel for wikipedia (setup.py) ... \u001b[?25ldone\n",
+      "\u001b[?25h  Created wheel for wikipedia: filename=wikipedia-1.4.0-py3-none-any.whl size=11678 sha256=7b5232045a4a53cbae5e0d35244d901d0f2b514b6dcff9ba3b49bf399da43478\n",
+      "  Stored in directory: /home/jupyter/.cache/pip/wheels/5e/b6/c5/93f3dec388ae76edc830cb42901bb0232504dfc0df02fc50de\n",
+      "Successfully built wikipedia\n",
+      "Installing collected packages: wikipedia\n",
+      "Successfully installed wikipedia-1.4.0\n",
+      "Collecting chromadb==0.3.26\n",
+      "  Downloading chromadb-0.3.26-py3-none-any.whl.metadata (6.8 kB)\n",
+      "Requirement already satisfied: pandas>=1.3 in /opt/conda/lib/python3.10/site-packages (from chromadb==0.3.26) (2.1.4)\n",
+      "Requirement already satisfied: requests>=2.28 in /opt/conda/lib/python3.10/site-packages (from chromadb==0.3.26) (2.31.0)\n",
+      "Requirement already satisfied: pydantic>=1.9 in /home/jupyter/.local/lib/python3.10/site-packages (from chromadb==0.3.26) (1.10.14)\n",
+      "Collecting hnswlib>=0.7 (from chromadb==0.3.26)\n",
+      "  Downloading hnswlib-0.8.0.tar.gz (36 kB)\n",
+      "  Installing build dependencies ... \u001b[?25ldone\n",
+      "\u001b[?25h  Getting requirements to build wheel ... \u001b[?25ldone\n",
+      "\u001b[?25h  Preparing metadata (pyproject.toml) ... \u001b[?25ldone\n",
+      "\u001b[?25hCollecting clickhouse-connect>=0.5.7 (from chromadb==0.3.26)\n",
+      "  Downloading clickhouse_connect-0.6.23-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl.metadata (2.8 kB)\n",
+      "Collecting duckdb>=0.7.1 (from chromadb==0.3.26)\n",
+      "  Downloading duckdb-0.9.2-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl.metadata (760 bytes)\n",
+      "Requirement already satisfied: fastapi>=0.85.1 in /opt/conda/lib/python3.10/site-packages (from chromadb==0.3.26) (0.109.0)\n",
+      "Requirement already satisfied: uvicorn>=0.18.3 in /opt/conda/lib/python3.10/site-packages (from uvicorn[standard]>=0.18.3->chromadb==0.3.26) (0.25.0)\n",
+      "Requirement already satisfied: numpy>=1.21.6 in /opt/conda/lib/python3.10/site-packages (from chromadb==0.3.26) (1.23.5)\n",
+      "Collecting posthog>=2.4.0 (from chromadb==0.3.26)\n",
+      "  Downloading posthog-3.3.2-py2.py3-none-any.whl.metadata (2.0 kB)\n",
+      "Requirement already satisfied: typing-extensions>=4.5.0 in /opt/conda/lib/python3.10/site-packages (from chromadb==0.3.26) (4.9.0)\n",
+      "Collecting pulsar-client>=3.1.0 (from chromadb==0.3.26)\n",
+      "  Downloading pulsar_client-3.4.0-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl.metadata (1.0 kB)\n",
+      "Collecting onnxruntime>=1.14.1 (from chromadb==0.3.26)\n",
+      "  Downloading onnxruntime-1.16.3-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl.metadata (4.3 kB)\n",
+      "Collecting tokenizers>=0.13.2 (from chromadb==0.3.26)\n",
+      "  Downloading tokenizers-0.15.1-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl.metadata (6.7 kB)\n",
+      "Requirement already satisfied: tqdm>=4.65.0 in /opt/conda/lib/python3.10/site-packages (from chromadb==0.3.26) (4.66.1)\n",
+      "Requirement already satisfied: overrides>=7.3.1 in /opt/conda/lib/python3.10/site-packages (from chromadb==0.3.26) (7.4.0)\n",
+      "Requirement already satisfied: certifi in /opt/conda/lib/python3.10/site-packages (from clickhouse-connect>=0.5.7->chromadb==0.3.26) (2023.11.17)\n",
+      "Requirement already satisfied: urllib3>=1.26 in /opt/conda/lib/python3.10/site-packages (from clickhouse-connect>=0.5.7->chromadb==0.3.26) (1.26.18)\n",
+      "Requirement already satisfied: pytz in /opt/conda/lib/python3.10/site-packages (from clickhouse-connect>=0.5.7->chromadb==0.3.26) (2023.3.post1)\n",
+      "Requirement already satisfied: zstandard in /opt/conda/lib/python3.10/site-packages (from clickhouse-connect>=0.5.7->chromadb==0.3.26) (0.22.0)\n",
+      "Requirement already satisfied: lz4 in /opt/conda/lib/python3.10/site-packages (from clickhouse-connect>=0.5.7->chromadb==0.3.26) (4.3.3)\n",
+      "Requirement already satisfied: starlette<0.36.0,>=0.35.0 in /opt/conda/lib/python3.10/site-packages (from fastapi>=0.85.1->chromadb==0.3.26) (0.35.1)\n",
+      "Collecting coloredlogs (from onnxruntime>=1.14.1->chromadb==0.3.26)\n",
+      "  Downloading coloredlogs-15.0.1-py2.py3-none-any.whl (46 kB)\n",
+      "\u001b[2K     \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m46.0/46.0 kB\u001b[0m \u001b[31m3.1 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
+      "\u001b[?25hRequirement already satisfied: flatbuffers in /opt/conda/lib/python3.10/site-packages (from onnxruntime>=1.14.1->chromadb==0.3.26) (23.5.26)\n",
+      "Requirement already satisfied: packaging in /opt/conda/lib/python3.10/site-packages (from onnxruntime>=1.14.1->chromadb==0.3.26) (23.2)\n",
+      "Requirement already satisfied: protobuf in /opt/conda/lib/python3.10/site-packages (from onnxruntime>=1.14.1->chromadb==0.3.26) (3.20.3)\n",
+      "Collecting sympy (from onnxruntime>=1.14.1->chromadb==0.3.26)\n",
+      "  Downloading sympy-1.12-py3-none-any.whl (5.7 MB)\n",
+      "\u001b[2K     \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m5.7/5.7 MB\u001b[0m \u001b[31m26.0 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m00:01\u001b[0m00:01\u001b[0m\n",
+      "\u001b[?25hRequirement already satisfied: python-dateutil>=2.8.2 in /opt/conda/lib/python3.10/site-packages (from pandas>=1.3->chromadb==0.3.26) (2.8.2)\n",
+      "Requirement already satisfied: tzdata>=2022.1 in /opt/conda/lib/python3.10/site-packages (from pandas>=1.3->chromadb==0.3.26) (2023.4)\n",
+      "Requirement already satisfied: six>=1.5 in /opt/conda/lib/python3.10/site-packages (from posthog>=2.4.0->chromadb==0.3.26) (1.16.0)\n",
+      "Collecting monotonic>=1.5 (from posthog>=2.4.0->chromadb==0.3.26)\n",
+      "  Downloading monotonic-1.6-py2.py3-none-any.whl (8.2 kB)\n",
+      "Requirement already satisfied: backoff>=1.10.0 in /opt/conda/lib/python3.10/site-packages (from posthog>=2.4.0->chromadb==0.3.26) (2.2.1)\n",
+      "Requirement already satisfied: charset-normalizer<4,>=2 in /opt/conda/lib/python3.10/site-packages (from requests>=2.28->chromadb==0.3.26) (3.3.2)\n",
+      "Requirement already satisfied: idna<4,>=2.5 in /opt/conda/lib/python3.10/site-packages (from requests>=2.28->chromadb==0.3.26) (3.6)\n",
+      "Requirement already satisfied: huggingface_hub<1.0,>=0.16.4 in /home/jupyter/.local/lib/python3.10/site-packages (from tokenizers>=0.13.2->chromadb==0.3.26) (0.17.2)\n",
+      "Requirement already satisfied: click>=7.0 in /opt/conda/lib/python3.10/site-packages (from uvicorn>=0.18.3->uvicorn[standard]>=0.18.3->chromadb==0.3.26) (8.1.7)\n",
+      "Requirement already satisfied: h11>=0.8 in /opt/conda/lib/python3.10/site-packages (from uvicorn>=0.18.3->uvicorn[standard]>=0.18.3->chromadb==0.3.26) (0.14.0)\n",
+      "Requirement already satisfied: httptools>=0.5.0 in /opt/conda/lib/python3.10/site-packages (from uvicorn[standard]>=0.18.3->chromadb==0.3.26) (0.6.1)\n",
+      "Requirement already satisfied: python-dotenv>=0.13 in /opt/conda/lib/python3.10/site-packages (from uvicorn[standard]>=0.18.3->chromadb==0.3.26) (1.0.0)\n",
+      "Requirement already satisfied: pyyaml>=5.1 in ./langchain_kernel/lib/python3.10/site-packages (from uvicorn[standard]>=0.18.3->chromadb==0.3.26) (6.0.1)\n",
+      "Requirement already satisfied: uvloop!=0.15.0,!=0.15.1,>=0.14.0 in /opt/conda/lib/python3.10/site-packages (from uvicorn[standard]>=0.18.3->chromadb==0.3.26) (0.19.0)\n",
+      "Requirement already satisfied: watchfiles>=0.13 in /opt/conda/lib/python3.10/site-packages (from uvicorn[standard]>=0.18.3->chromadb==0.3.26) (0.21.0)\n",
+      "Requirement already satisfied: websockets>=10.4 in /opt/conda/lib/python3.10/site-packages (from uvicorn[standard]>=0.18.3->chromadb==0.3.26) (12.0)\n",
+      "Requirement already satisfied: filelock in /opt/conda/lib/python3.10/site-packages (from huggingface_hub<1.0,>=0.16.4->tokenizers>=0.13.2->chromadb==0.3.26) (3.13.1)\n",
+      "Requirement already satisfied: fsspec in /opt/conda/lib/python3.10/site-packages (from huggingface_hub<1.0,>=0.16.4->tokenizers>=0.13.2->chromadb==0.3.26) (2023.12.2)\n",
+      "Requirement already satisfied: anyio<5,>=3.4.0 in /opt/conda/lib/python3.10/site-packages (from starlette<0.36.0,>=0.35.0->fastapi>=0.85.1->chromadb==0.3.26) (4.2.0)\n",
+      "Collecting humanfriendly>=9.1 (from coloredlogs->onnxruntime>=1.14.1->chromadb==0.3.26)\n",
+      "  Downloading humanfriendly-10.0-py2.py3-none-any.whl (86 kB)\n",
+      "\u001b[2K     \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m86.8/86.8 kB\u001b[0m \u001b[31m9.3 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
+      "\u001b[?25hCollecting mpmath>=0.19 (from sympy->onnxruntime>=1.14.1->chromadb==0.3.26)\n",
+      "  Downloading mpmath-1.3.0-py3-none-any.whl (536 kB)\n",
+      "\u001b[2K     \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m536.2/536.2 kB\u001b[0m \u001b[31m36.4 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
+      "\u001b[?25hRequirement already satisfied: sniffio>=1.1 in /opt/conda/lib/python3.10/site-packages (from anyio<5,>=3.4.0->starlette<0.36.0,>=0.35.0->fastapi>=0.85.1->chromadb==0.3.26) (1.3.0)\n",
+      "Requirement already satisfied: exceptiongroup>=1.0.2 in /opt/conda/lib/python3.10/site-packages (from anyio<5,>=3.4.0->starlette<0.36.0,>=0.35.0->fastapi>=0.85.1->chromadb==0.3.26) (1.2.0)\n",
+      "Downloading chromadb-0.3.26-py3-none-any.whl (123 kB)\n",
+      "\u001b[2K   \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m123.6/123.6 kB\u001b[0m \u001b[31m12.6 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
+      "\u001b[?25hDownloading clickhouse_connect-0.6.23-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl (964 kB)\n",
+      "\u001b[2K   \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m964.5/964.5 kB\u001b[0m \u001b[31m60.0 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
+      "\u001b[?25hDownloading duckdb-0.9.2-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl (16.5 MB)\n",
+      "\u001b[2K   \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m16.5/16.5 MB\u001b[0m \u001b[31m52.5 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m00:01\u001b[0m00:01\u001b[0m\n",
+      "\u001b[?25hDownloading onnxruntime-1.16.3-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl (6.4 MB)\n",
+      "\u001b[2K   \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m6.4/6.4 MB\u001b[0m \u001b[31m60.1 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m00:01\u001b[0m:00:01\u001b[0m\n",
+      "\u001b[?25hDownloading posthog-3.3.2-py2.py3-none-any.whl (40 kB)\n",
+      "\u001b[2K   \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m40.7/40.7 kB\u001b[0m \u001b[31m3.9 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
+      "\u001b[?25hDownloading pulsar_client-3.4.0-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl (5.4 MB)\n",
+      "\u001b[2K   \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m5.4/5.4 MB\u001b[0m \u001b[31m67.1 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m:00:01\u001b[0m00:01\u001b[0m\n",
+      "\u001b[?25hDownloading tokenizers-0.15.1-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl (3.6 MB)\n",
+      "\u001b[2K   \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m3.6/3.6 MB\u001b[0m \u001b[31m62.5 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0mta \u001b[36m0:00:01\u001b[0m\n",
+      "\u001b[?25hBuilding wheels for collected packages: hnswlib\n",
+      "  Building wheel for hnswlib (pyproject.toml) ... \u001b[?25ldone\n",
+      "\u001b[?25h  Created wheel for hnswlib: filename=hnswlib-0.8.0-cp310-cp310-linux_x86_64.whl size=197540 sha256=093530a7c011605149af02e57d33a4af675ca769a11ed4fc93285bb2782cabd7\n",
+      "  Stored in directory: /home/jupyter/.cache/pip/wheels/af/a9/3e/3e5d59ee41664eb31a4e6de67d1846f86d16d93c45f277c4e7\n",
+      "Successfully built hnswlib\n",
+      "Installing collected packages: mpmath, monotonic, sympy, pulsar-client, humanfriendly, hnswlib, duckdb, clickhouse-connect, posthog, coloredlogs, tokenizers, onnxruntime, chromadb\n",
+      "Successfully installed chromadb-0.3.26 clickhouse-connect-0.6.23 coloredlogs-15.0.1 duckdb-0.9.2 hnswlib-0.8.0 humanfriendly-10.0 monotonic-1.6 mpmath-1.3.0 onnxruntime-1.16.3 posthog-3.3.2 pulsar-client-3.4.0 sympy-1.12 tokenizers-0.15.1\n",
+      "Collecting google-cloud-aiplatform==1.26.1\n",
+      "  Downloading google_cloud_aiplatform-1.26.1-py2.py3-none-any.whl.metadata (24 kB)\n",
+      "Requirement already satisfied: google-api-core!=2.0.*,!=2.1.*,!=2.2.*,!=2.3.*,!=2.4.*,!=2.5.*,!=2.6.*,!=2.7.*,<3.0.0dev,>=1.32.0 in /opt/conda/lib/python3.10/site-packages (from google-api-core[grpc]!=2.0.*,!=2.1.*,!=2.2.*,!=2.3.*,!=2.4.*,!=2.5.*,!=2.6.*,!=2.7.*,<3.0.0dev,>=1.32.0->google-cloud-aiplatform==1.26.1) (1.34.0)\n",
+      "Requirement already satisfied: proto-plus<2.0.0dev,>=1.22.0 in /opt/conda/lib/python3.10/site-packages (from google-cloud-aiplatform==1.26.1) (1.23.0)\n",
+      "Requirement already satisfied: protobuf!=3.20.0,!=3.20.1,!=4.21.0,!=4.21.1,!=4.21.2,!=4.21.3,!=4.21.4,!=4.21.5,<5.0.0dev,>=3.19.5 in /opt/conda/lib/python3.10/site-packages (from google-cloud-aiplatform==1.26.1) (3.20.3)\n",
+      "Requirement already satisfied: packaging>=14.3 in /opt/conda/lib/python3.10/site-packages (from google-cloud-aiplatform==1.26.1) (23.2)\n",
+      "Requirement already satisfied: google-cloud-storage<3.0.0dev,>=1.32.0 in /opt/conda/lib/python3.10/site-packages (from google-cloud-aiplatform==1.26.1) (2.14.0)\n",
+      "Requirement already satisfied: google-cloud-bigquery<4.0.0dev,>=1.15.0 in /opt/conda/lib/python3.10/site-packages (from google-cloud-aiplatform==1.26.1) (3.15.0)\n",
+      "Requirement already satisfied: google-cloud-resource-manager<3.0.0dev,>=1.3.3 in /opt/conda/lib/python3.10/site-packages (from google-cloud-aiplatform==1.26.1) (1.11.0)\n",
+      "Requirement already satisfied: shapely<2.0.0 in /home/jupyter/.local/lib/python3.10/site-packages (from google-cloud-aiplatform==1.26.1) (1.8.5.post1)\n",
+      "Requirement already satisfied: googleapis-common-protos<2.0dev,>=1.56.2 in /opt/conda/lib/python3.10/site-packages (from google-api-core!=2.0.*,!=2.1.*,!=2.2.*,!=2.3.*,!=2.4.*,!=2.5.*,!=2.6.*,!=2.7.*,<3.0.0dev,>=1.32.0->google-api-core[grpc]!=2.0.*,!=2.1.*,!=2.2.*,!=2.3.*,!=2.4.*,!=2.5.*,!=2.6.*,!=2.7.*,<3.0.0dev,>=1.32.0->google-cloud-aiplatform==1.26.1) (1.62.0)\n",
+      "Requirement already satisfied: google-auth<3.0dev,>=1.25.0 in /opt/conda/lib/python3.10/site-packages (from google-api-core!=2.0.*,!=2.1.*,!=2.2.*,!=2.3.*,!=2.4.*,!=2.5.*,!=2.6.*,!=2.7.*,<3.0.0dev,>=1.32.0->google-api-core[grpc]!=2.0.*,!=2.1.*,!=2.2.*,!=2.3.*,!=2.4.*,!=2.5.*,!=2.6.*,!=2.7.*,<3.0.0dev,>=1.32.0->google-cloud-aiplatform==1.26.1) (2.26.2)\n",
+      "Requirement already satisfied: requests<3.0.0dev,>=2.18.0 in /opt/conda/lib/python3.10/site-packages (from google-api-core!=2.0.*,!=2.1.*,!=2.2.*,!=2.3.*,!=2.4.*,!=2.5.*,!=2.6.*,!=2.7.*,<3.0.0dev,>=1.32.0->google-api-core[grpc]!=2.0.*,!=2.1.*,!=2.2.*,!=2.3.*,!=2.4.*,!=2.5.*,!=2.6.*,!=2.7.*,<3.0.0dev,>=1.32.0->google-cloud-aiplatform==1.26.1) (2.31.0)\n",
+      "Requirement already satisfied: grpcio<2.0dev,>=1.33.2 in /home/jupyter/.local/lib/python3.10/site-packages (from google-api-core[grpc]!=2.0.*,!=2.1.*,!=2.2.*,!=2.3.*,!=2.4.*,!=2.5.*,!=2.6.*,!=2.7.*,<3.0.0dev,>=1.32.0->google-cloud-aiplatform==1.26.1) (1.60.0)\n",
+      "Requirement already satisfied: grpcio-status<2.0dev,>=1.33.2 in /home/jupyter/.local/lib/python3.10/site-packages (from google-api-core[grpc]!=2.0.*,!=2.1.*,!=2.2.*,!=2.3.*,!=2.4.*,!=2.5.*,!=2.6.*,!=2.7.*,<3.0.0dev,>=1.32.0->google-cloud-aiplatform==1.26.1) (1.47.0)\n",
+      "Requirement already satisfied: google-cloud-core<3.0.0dev,>=1.6.0 in /opt/conda/lib/python3.10/site-packages (from google-cloud-bigquery<4.0.0dev,>=1.15.0->google-cloud-aiplatform==1.26.1) (2.4.1)\n",
+      "Requirement already satisfied: google-resumable-media<3.0dev,>=0.6.0 in /opt/conda/lib/python3.10/site-packages (from google-cloud-bigquery<4.0.0dev,>=1.15.0->google-cloud-aiplatform==1.26.1) (2.7.0)\n",
+      "Requirement already satisfied: python-dateutil<3.0dev,>=2.7.2 in /opt/conda/lib/python3.10/site-packages (from google-cloud-bigquery<4.0.0dev,>=1.15.0->google-cloud-aiplatform==1.26.1) (2.8.2)\n",
+      "Requirement already satisfied: grpc-google-iam-v1<1.0.0dev,>=0.12.4 in /opt/conda/lib/python3.10/site-packages (from google-cloud-resource-manager<3.0.0dev,>=1.3.3->google-cloud-aiplatform==1.26.1) (0.12.7)\n",
+      "Requirement already satisfied: google-crc32c<2.0dev,>=1.0 in /opt/conda/lib/python3.10/site-packages (from google-cloud-storage<3.0.0dev,>=1.32.0->google-cloud-aiplatform==1.26.1) (1.5.0)\n",
+      "Requirement already satisfied: cachetools<6.0,>=2.0.0 in /opt/conda/lib/python3.10/site-packages (from google-auth<3.0dev,>=1.25.0->google-api-core!=2.0.*,!=2.1.*,!=2.2.*,!=2.3.*,!=2.4.*,!=2.5.*,!=2.6.*,!=2.7.*,<3.0.0dev,>=1.32.0->google-api-core[grpc]!=2.0.*,!=2.1.*,!=2.2.*,!=2.3.*,!=2.4.*,!=2.5.*,!=2.6.*,!=2.7.*,<3.0.0dev,>=1.32.0->google-cloud-aiplatform==1.26.1) (4.2.4)\n",
+      "Requirement already satisfied: pyasn1-modules>=0.2.1 in /opt/conda/lib/python3.10/site-packages (from google-auth<3.0dev,>=1.25.0->google-api-core!=2.0.*,!=2.1.*,!=2.2.*,!=2.3.*,!=2.4.*,!=2.5.*,!=2.6.*,!=2.7.*,<3.0.0dev,>=1.32.0->google-api-core[grpc]!=2.0.*,!=2.1.*,!=2.2.*,!=2.3.*,!=2.4.*,!=2.5.*,!=2.6.*,!=2.7.*,<3.0.0dev,>=1.32.0->google-cloud-aiplatform==1.26.1) (0.3.0)\n",
+      "Requirement already satisfied: rsa<5,>=3.1.4 in /opt/conda/lib/python3.10/site-packages (from google-auth<3.0dev,>=1.25.0->google-api-core!=2.0.*,!=2.1.*,!=2.2.*,!=2.3.*,!=2.4.*,!=2.5.*,!=2.6.*,!=2.7.*,<3.0.0dev,>=1.32.0->google-api-core[grpc]!=2.0.*,!=2.1.*,!=2.2.*,!=2.3.*,!=2.4.*,!=2.5.*,!=2.6.*,!=2.7.*,<3.0.0dev,>=1.32.0->google-cloud-aiplatform==1.26.1) (4.9)\n",
+      "Requirement already satisfied: six>=1.5 in /opt/conda/lib/python3.10/site-packages (from python-dateutil<3.0dev,>=2.7.2->google-cloud-bigquery<4.0.0dev,>=1.15.0->google-cloud-aiplatform==1.26.1) (1.16.0)\n",
+      "Requirement already satisfied: charset-normalizer<4,>=2 in /opt/conda/lib/python3.10/site-packages (from requests<3.0.0dev,>=2.18.0->google-api-core!=2.0.*,!=2.1.*,!=2.2.*,!=2.3.*,!=2.4.*,!=2.5.*,!=2.6.*,!=2.7.*,<3.0.0dev,>=1.32.0->google-api-core[grpc]!=2.0.*,!=2.1.*,!=2.2.*,!=2.3.*,!=2.4.*,!=2.5.*,!=2.6.*,!=2.7.*,<3.0.0dev,>=1.32.0->google-cloud-aiplatform==1.26.1) (3.3.2)\n",
+      "Requirement already satisfied: idna<4,>=2.5 in /opt/conda/lib/python3.10/site-packages (from requests<3.0.0dev,>=2.18.0->google-api-core!=2.0.*,!=2.1.*,!=2.2.*,!=2.3.*,!=2.4.*,!=2.5.*,!=2.6.*,!=2.7.*,<3.0.0dev,>=1.32.0->google-api-core[grpc]!=2.0.*,!=2.1.*,!=2.2.*,!=2.3.*,!=2.4.*,!=2.5.*,!=2.6.*,!=2.7.*,<3.0.0dev,>=1.32.0->google-cloud-aiplatform==1.26.1) (3.6)\n",
+      "Requirement already satisfied: urllib3<3,>=1.21.1 in /opt/conda/lib/python3.10/site-packages (from requests<3.0.0dev,>=2.18.0->google-api-core!=2.0.*,!=2.1.*,!=2.2.*,!=2.3.*,!=2.4.*,!=2.5.*,!=2.6.*,!=2.7.*,<3.0.0dev,>=1.32.0->google-api-core[grpc]!=2.0.*,!=2.1.*,!=2.2.*,!=2.3.*,!=2.4.*,!=2.5.*,!=2.6.*,!=2.7.*,<3.0.0dev,>=1.32.0->google-cloud-aiplatform==1.26.1) (1.26.18)\n",
+      "Requirement already satisfied: certifi>=2017.4.17 in /opt/conda/lib/python3.10/site-packages (from requests<3.0.0dev,>=2.18.0->google-api-core!=2.0.*,!=2.1.*,!=2.2.*,!=2.3.*,!=2.4.*,!=2.5.*,!=2.6.*,!=2.7.*,<3.0.0dev,>=1.32.0->google-api-core[grpc]!=2.0.*,!=2.1.*,!=2.2.*,!=2.3.*,!=2.4.*,!=2.5.*,!=2.6.*,!=2.7.*,<3.0.0dev,>=1.32.0->google-cloud-aiplatform==1.26.1) (2023.11.17)\n",
+      "Requirement already satisfied: pyasn1<0.6.0,>=0.4.6 in /opt/conda/lib/python3.10/site-packages (from pyasn1-modules>=0.2.1->google-auth<3.0dev,>=1.25.0->google-api-core!=2.0.*,!=2.1.*,!=2.2.*,!=2.3.*,!=2.4.*,!=2.5.*,!=2.6.*,!=2.7.*,<3.0.0dev,>=1.32.0->google-api-core[grpc]!=2.0.*,!=2.1.*,!=2.2.*,!=2.3.*,!=2.4.*,!=2.5.*,!=2.6.*,!=2.7.*,<3.0.0dev,>=1.32.0->google-cloud-aiplatform==1.26.1) (0.5.1)\n",
+      "Downloading google_cloud_aiplatform-1.26.1-py2.py3-none-any.whl (2.6 MB)\n",
+      "\u001b[2K   \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m2.6/2.6 MB\u001b[0m \u001b[31m13.7 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m00:01\u001b[0m0:01\u001b[0m\n",
+      "\u001b[?25hInstalling collected packages: google-cloud-aiplatform\n",
+      "  Attempting uninstall: google-cloud-aiplatform\n",
+      "    Found existing installation: google-cloud-aiplatform 1.33.1\n",
+      "    Not uninstalling google-cloud-aiplatform at /home/jupyter/.local/lib/python3.10/site-packages, outside environment /home/jupyter/asl-ml-immersion/notebooks/vertex_genai/langchain_kernel\n",
+      "    Can't uninstall 'google-cloud-aiplatform'. No files were found to uninstall.\n",
+      "Successfully installed google-cloud-aiplatform-1.26.1\n"
+     ]
+    }
+   ],
+   "source": [
+    "!cd ~/asl-ml-immersion && make langchain_kernel"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "id": "78f4e10c-2fce-444a-a302-e153a68b0a1e",
+   "metadata": {
+    "tags": []
+   },
+   "source": [
+    "Select the kernel `langchain_kernel` in the top right before going forward in the notebook."
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "id": "769e8220-6d91-4938-8188-7b3d986b2845",
+   "metadata": {
+    "tags": []
+   },
+   "source": [
+    "### Setup"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 1,
+   "id": "d6855635-3a1c-4295-acb3-999a634db4e4",
+   "metadata": {
+    "tags": []
+   },
+   "outputs": [],
+   "source": [
+    "import pandas as pd\n",
+    "import scipy\n",
+    "from langchain.chains import ConversationalRetrievalChain, RetrievalQA\n",
+    "from langchain.document_loaders import WikipediaLoader\n",
+    "from langchain.embeddings import VertexAIEmbeddings\n",
+    "from langchain.llms import VertexAI\n",
+    "from langchain.memory import ConversationBufferMemory\n",
+    "from langchain.text_splitter import RecursiveCharacterTextSplitter\n",
+    "from langchain.vectorstores import Chroma\n",
+    "from vertexai.language_models import TextEmbeddingModel, TextGenerationModel"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "id": "53fafe53-dee2-4b22-b66a-d43f0b7d6fa4",
+   "metadata": {},
+   "source": [
+    "### Build a simple retrieval augmented generation system"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "id": "168271d5-86a6-451f-ab66-3dbdd902f82a",
+   "metadata": {
+    "tags": []
+   },
+   "source": [
+    "In this toy example, we want to ground an LLM on information that an off-the-shelf LLM would not know. For example, instructions left for a house sitter that will be watching two pets."
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 2,
+   "id": "428d7ff0-426f-4c0b-bb58-2a6177088e17",
+   "metadata": {
+    "tags": []
+   },
+   "outputs": [
+    {
+     "data": {
+      "text/html": [
+       "<div>\n",
+       "<style scoped>\n",
+       "    .dataframe tbody tr th:only-of-type {\n",
+       "        vertical-align: middle;\n",
+       "    }\n",
+       "\n",
+       "    .dataframe tbody tr th {\n",
+       "        vertical-align: top;\n",
+       "    }\n",
+       "\n",
+       "    .dataframe thead th {\n",
+       "        text-align: right;\n",
+       "    }\n",
+       "</style>\n",
+       "<table border=\"1\" class=\"dataframe\">\n",
+       "  <thead>\n",
+       "    <tr style=\"text-align: right;\">\n",
+       "      <th></th>\n",
+       "      <th>text</th>\n",
+       "    </tr>\n",
+       "  </thead>\n",
+       "  <tbody>\n",
+       "    <tr>\n",
+       "      <th>0</th>\n",
+       "      <td>Estrella is a dog</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>1</th>\n",
+       "      <td>Finnegan is a cat</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>2</th>\n",
+       "      <td>Finnegan gets fed five times daily. Estrella g...</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>3</th>\n",
+       "      <td>Estrella usually goes on one long walk per day...</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>4</th>\n",
+       "      <td>Please play with Finnegan for 30 minutes each ...</td>\n",
+       "    </tr>\n",
+       "  </tbody>\n",
+       "</table>\n",
+       "</div>"
+      ],
+      "text/plain": [
+       "                                                text\n",
+       "0                                  Estrella is a dog\n",
+       "1                                  Finnegan is a cat\n",
+       "2  Finnegan gets fed five times daily. Estrella g...\n",
+       "3  Estrella usually goes on one long walk per day...\n",
+       "4  Please play with Finnegan for 30 minutes each ..."
+      ]
+     },
+     "execution_count": 2,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
+   "source": [
+    "# List of things we want to ground the LLM on.\n",
+    "information = [\n",
+    "    \"Estrella is a dog\",\n",
+    "    \"Finnegan is a cat\",\n",
+    "    \"Finnegan gets fed five times daily. Estrella gets fed three times daily.\",\n",
+    "    \"Estrella usually goes on one long walk per day, but needs to go outside every 4-6 hours\",\n",
+    "    \"Please play with Finnegan for 30 minutes each day. His favorite toy is the fake mouse!\",\n",
+    "]\n",
+    "\n",
+    "information_df = pd.DataFrame({\"text\": information})\n",
+    "information_df.head()"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "id": "cbfca1b4-0600-44fc-909e-730ab4afcd56",
+   "metadata": {},
+   "source": [
+    "At the core of most retrieval generation systems is a vector database. A vector database stores embedded representations of information. \n",
+    "\n",
+    "Let's add a column to our information dataframe that is an embedded representation of the text. We will use the [Vertex AI text-embeddings API](https://cloud.google.com/vertex-ai/docs/generative-ai/embeddings/get-text-embeddings)."
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 3,
+   "id": "d0d3e8c6-f8d5-49b2-8cd4-6d9a54ecd6e0",
+   "metadata": {
+    "tags": []
+   },
+   "outputs": [
+    {
+     "data": {
+      "text/html": [
+       "<div>\n",
+       "<style scoped>\n",
+       "    .dataframe tbody tr th:only-of-type {\n",
+       "        vertical-align: middle;\n",
+       "    }\n",
+       "\n",
+       "    .dataframe tbody tr th {\n",
+       "        vertical-align: top;\n",
+       "    }\n",
+       "\n",
+       "    .dataframe thead th {\n",
+       "        text-align: right;\n",
+       "    }\n",
+       "</style>\n",
+       "<table border=\"1\" class=\"dataframe\">\n",
+       "  <thead>\n",
+       "    <tr style=\"text-align: right;\">\n",
+       "      <th></th>\n",
+       "      <th>text</th>\n",
+       "      <th>vector</th>\n",
+       "    </tr>\n",
+       "  </thead>\n",
+       "  <tbody>\n",
+       "    <tr>\n",
+       "      <th>0</th>\n",
+       "      <td>Estrella is a dog</td>\n",
+       "      <td>[0.020266957581043243, 0.010058210231363773, -...</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>1</th>\n",
+       "      <td>Finnegan is a cat</td>\n",
+       "      <td>[-0.027006812393665314, -0.029612787067890167,...</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>2</th>\n",
+       "      <td>Finnegan gets fed five times daily. Estrella g...</td>\n",
+       "      <td>[-0.02811681292951107, 0.006287926342338324, -...</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>3</th>\n",
+       "      <td>Estrella usually goes on one long walk per day...</td>\n",
+       "      <td>[0.00867326557636261, 0.03359326347708702, -0....</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>4</th>\n",
+       "      <td>Please play with Finnegan for 30 minutes each ...</td>\n",
+       "      <td>[0.0019507030956447124, -0.017996784299612045,...</td>\n",
+       "    </tr>\n",
+       "  </tbody>\n",
+       "</table>\n",
+       "</div>"
+      ],
+      "text/plain": [
+       "                                                text  \\\n",
+       "0                                  Estrella is a dog   \n",
+       "1                                  Finnegan is a cat   \n",
+       "2  Finnegan gets fed five times daily. Estrella g...   \n",
+       "3  Estrella usually goes on one long walk per day...   \n",
+       "4  Please play with Finnegan for 30 minutes each ...   \n",
+       "\n",
+       "                                              vector  \n",
+       "0  [0.020266957581043243, 0.010058210231363773, -...  \n",
+       "1  [-0.027006812393665314, -0.029612787067890167,...  \n",
+       "2  [-0.02811681292951107, 0.006287926342338324, -...  \n",
+       "3  [0.00867326557636261, 0.03359326347708702, -0....  \n",
+       "4  [0.0019507030956447124, -0.017996784299612045,...  "
+      ]
+     },
+     "execution_count": 3,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
+   "source": [
+    "embedding_model = TextEmbeddingModel.from_pretrained(\"textembedding-gecko\")\n",
+    "information_df[\"vector\"] = [\n",
+    "    x.values for x in embedding_model.get_embeddings(information)\n",
+    "]\n",
+    "information_df.head()"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "id": "28bab4e4-d701-4cfd-b0fe-71307bd5a1b5",
+   "metadata": {},
+   "source": [
+    "Retrieval systems need a way of finding the most relevant information to answer a given query. This is done with a nearest neighbor (semantic similarity) search. Let's define a function to take in a query (text) input and return a distance metric for each text in our information. We will need to: \n",
+    "* Embed the query with the same embedding model used for the information \n",
+    "* Computes a distance metric between the query vector and each information vector \n",
+    "* Returns a list of distance metrics between the query vector and each information vector "
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 4,
+   "id": "71088247-87d3-4aeb-a5a9-c8dc3f74f4ba",
+   "metadata": {
+    "tags": []
+   },
+   "outputs": [],
+   "source": [
+    "def embed_and_compute_distances(query: str):\n",
+    "    # Get vector for query string\n",
+    "    query_embedding = embedding_model.get_embeddings([query])[\n",
+    "        0\n",
+    "    ].values  # Query embedding\n",
+    "\n",
+    "    distances = []\n",
+    "\n",
+    "    # Compute distances between query vector and all information vectors\n",
+    "    for _, row in information_df.iterrows():\n",
+    "        distances.append(\n",
+    "            {\n",
+    "                \"information\": row.text,\n",
+    "                \"distance\": scipy.spatial.distance.cosine(\n",
+    "                    query_embedding, row.vector\n",
+    "                ),\n",
+    "            }\n",
+    "        )\n",
+    "\n",
+    "    return distances"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "id": "78aa74a6-28e8-429b-9afc-04caf1efa71c",
+   "metadata": {},
+   "source": [
+    "Test this function out on an example."
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 5,
+   "id": "6dfed57b-8edb-4a17-963d-e796669f3f3c",
+   "metadata": {
+    "tags": []
+   },
+   "outputs": [
+    {
+     "data": {
+      "text/plain": [
+       "[{'information': 'Estrella is a dog', 'distance': 0.1424507274732465},\n",
+       " {'information': 'Finnegan is a cat', 'distance': 0.4717404010234435},\n",
+       " {'information': 'Finnegan gets fed five times daily. Estrella gets fed three times daily.',\n",
+       "  'distance': 0.27034889233707404},\n",
+       " {'information': 'Estrella usually goes on one long walk per day, but needs to go outside every 4-6 hours',\n",
+       "  'distance': 0.20111010858109857},\n",
+       " {'information': 'Please play with Finnegan for 30 minutes each day. His favorite toy is the fake mouse!',\n",
+       "  'distance': 0.4042683765833356}]"
+      ]
+     },
+     "execution_count": 5,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
+   "source": [
+    "embed_and_compute_distances(query=\"What type of animal is Estrella?\")"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "id": "ebda4660-ce3c-4c7e-b61b-d682ea43c9ab",
+   "metadata": {},
+   "source": [
+    "Notice that the vector that has the lowest cosine similarity (meaning most similiar) to the vector for \"What type of animal is Estrella?\" is the vector for \"Estrella is a dog\". This highlights the core assumption that underpins retrieval augmented systems: information relevant to answering a question will be close in vector space to the question itself.\n",
+    "\n",
+    "Now all we have to do is write a function that incorporates the text corresponding to the closest information vectors in a prompt, then send that prompt to an LLM to answer the question with the information.\n",
+    "\n",
+    "Start by writing a helper function to put together this prompt."
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 6,
+   "id": "c6444d77-92a9-4411-b233-c70e687fcfd9",
+   "metadata": {
+    "tags": []
+   },
+   "outputs": [],
+   "source": [
+    "def get_prompt(query: str, context: list[str]):\n",
+    "    prompt = f\"\"\"\n",
+    "    Using only the provided context, answer the question.\n",
+    "    \n",
+    "    Context:\n",
+    "    {','.join(context)}\n",
+    "    \n",
+    "    Question: {query}.\n",
+    "    \n",
+    "    If you cannot answer the question using only the provided context, repond that you do not have the context needed to answer the question.\n",
+    "    \"\"\"\n",
+    "    return prompt"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "id": "bebd9157-94f2-4c34-aa6a-f7979393c9cb",
+   "metadata": {},
+   "source": [
+    "Now put everything together in a function that \n",
+    "* Embeds the query\n",
+    "* Computes the distance between query vector and all information vectors \n",
+    "* Gets the k most relevant information texts by sorting by distance \n",
+    "* Uses the k most relevant information texts in a prompt to an LLM along with the query \n",
+    "* Returns the LLM response and the information used (citations) "
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 7,
+   "id": "5ff87db5-0ca3-4c6b-8c9f-cc47f0705010",
+   "metadata": {
+    "tags": []
+   },
+   "outputs": [],
+   "source": [
+    "model = TextGenerationModel.from_pretrained(\"text-bison@002\")\n",
+    "\n",
+    "\n",
+    "def retrieval_chain(query: str, k: int = 2):\n",
+    "    # Compute distances for query and all information vectors\n",
+    "    distances = embed_and_compute_distances(query)\n",
+    "\n",
+    "    # Sort the information from smallest distance to greatest distance\n",
+    "    sorted_distances = sorted(distances, key=lambda x: x[\"distance\"])\n",
+    "\n",
+    "    # Get the text corresponding to the k closest vectors\n",
+    "    closest_information_texts = [x[\"information\"] for x in sorted_distances[:k]]\n",
+    "\n",
+    "    # Incorporate the closest k information texts in a prompt to an LLM\n",
+    "    prompt = get_prompt(query, closest_information_texts)\n",
+    "\n",
+    "    # Send prompt through LLM\n",
+    "    response = model.predict(prompt)\n",
+    "    print(f\"Response: {response.text}\")\n",
+    "    print(f\"Information used: {closest_information_texts}\")"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 8,
+   "id": "1d812fae-17c7-4840-9781-44b18256116b",
+   "metadata": {
+    "tags": []
+   },
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Response:  Estrella is a dog.\n",
+      "Information used: ['Estrella is a dog', 'Estrella usually goes on one long walk per day, but needs to go outside every 4-6 hours']\n"
+     ]
+    }
+   ],
+   "source": [
+    "retrieval_chain(\"What type of animal is Estrella?\")"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 9,
+   "id": "79964f00-2741-4d8f-a3ae-47c810b941e6",
+   "metadata": {
+    "tags": []
+   },
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Response:  Finnegan gets fed five times daily.\n",
+      "Information used: ['Finnegan gets fed five times daily. Estrella gets fed three times daily.', 'Please play with Finnegan for 30 minutes each day. His favorite toy is the fake mouse!']\n"
+     ]
+    }
+   ],
+   "source": [
+    "retrieval_chain(\"How many times a day do I need to feed Finnegan?\")"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 10,
+   "id": "b24083e3-c49a-4424-8965-0541cf4eb8a1",
+   "metadata": {
+    "tags": []
+   },
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Response:  The provided context does not mention anything about stocks or investments, so I cannot answer this question.\n",
+      "Information used: ['Please play with Finnegan for 30 minutes each day. His favorite toy is the fake mouse!', 'Finnegan gets fed five times daily. Estrella gets fed three times daily.']\n"
+     ]
+    }
+   ],
+   "source": [
+    "retrieval_chain(\"What stock should I invest in this month?\")"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "id": "a45c3d6c-1a10-4e53-9f30-85c2160e874a",
+   "metadata": {},
+   "source": [
+    "Notice that the prompt is constructed such that if a question is asked that cannot be answered from the information provided, the LLM will not try to answer it.\n",
+    "\n",
+    "It is also worth noting that we are arbitrarily setting k=2 (including the closest 2 information texts in the prompt). Different use cases require different k's and there is no perfect one-size-fits-all. "
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "id": "699f361a-ed4a-47bc-91a7-99ca8721a228",
+   "metadata": {},
+   "source": [
+    "### Simplify and Scale with LangChain and Chroma\n",
+    "Of course with only 5 examples of grounding information, we could easily include all five in a prompt. In other words, the extra retrieval step to identify *what* is needed in the prompt was unnessesary. Of course in the real world we may have thousands or millions of grounding information examples. Additionally as the number of grounding examples grows, simply computing a distance for every single vector is incredibly innefficient. In other words, production retrieval augmented generation systems require:\n",
+    "* Scalable vector databases to store large amounts of information\n",
+    "* Efficient ways of performing nearest neighbor searches \n",
+    "\n",
+    "Of course there are many options for a vectorstore, including managed and scalable offerings like [Vertex AI Vector Search](https://cloud.google.com/vertex-ai/docs/vector-search/overview). For simplicity, in this lab we will use [Chroma](https://python.langchain.com/docs/integrations/vectorstores/chroma) as a vectorstore and [Langchain](https://github.com/langchain-ai/langchain) to orchestrate the retrieval system. Langchain will provide classes and methods that help simplify the steps we had to implement ourselves in the toy example above.   "
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "id": "9e6113e3-d700-4e53-a599-3362935190ab",
+   "metadata": {},
+   "source": [
+    "#### Document Loading\n",
+    "\n",
+    "Langchain provides classes to load data from different sources. Some useful data loaders are [Google Cloud Storage Directory Loader](https://python.langchain.com/docs/modules/data_connection/document_loaders/integrations/google_cloud_storage_directory), [Google Drive Loader](https://python.langchain.com/docs/modules/data_connection/document_loaders/integrations/google_drive), [Recursive URL Loader](https://python.langchain.com/docs/modules/data_connection/document_loaders/integrations/recursive_url_loader), [PDF Loader](https://python.langchain.com/docs/modules/data_connection/document_loaders/how_to/pdf), [JSON Loader](https://python.langchain.com/docs/modules/data_connection/document_loaders/how_to/json), [Wikipedia Loader](https://python.langchain.com/docs/modules/data_connection/document_loaders/integrations/wikipedia), and [more](https://python.langchain.com/docs/modules/data_connection/document_loaders/).\n",
+    "\n",
+    "In this notebook we will use the Wikipedia loader to create a private knowledge base of wikipedia articles about large language models, but the overall process is similiar regardless of which document loader you use."
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 11,
+   "id": "aeb93ade-e33f-4408-aba3-10f6c05c01eb",
+   "metadata": {
+    "tags": []
+   },
+   "outputs": [
+    {
+     "data": {
+      "text/plain": [
+       "Document(page_content='A large language model (LLM) is a language model notable for its ability to achieve general-purpose language generation. LLMs acquire these abilities by learning statistical relationships from text documents during a computationally intensive self-supervised and semi-supervised training process. LLMs are artificial neural networks typically built with a transformer-based architecture. Some recent implementations are based on alternative architectures such as recurrent neural network variants and Mamba (a state space model).LLMs can be used for text generation, a form of generative AI, by taking an input text and repeatedly predicting the next token or word. Up to 2020, fine tuning was the only way a model could be adapted to be able to accomplish specific tasks. Larger sized models, such as GPT-3, however, can be prompt-engineered to achieve similar results. They are thought to acquire knowledge about syntax, semantics and \"ontology\" inherent in human language corpora, but also inaccuracies and biases present in the corpora.Some notable LLMs are OpenAI\\'s GPT models (e.g., GPT-3.5 and GPT-4, used in ChatGPT), Google\\'s PaLM and Gemini (used in Bard), Meta\\'s LLaMA family of open-source models, and Anthropic\\'s Claude models.\\n\\n\\n== History ==\\nAt the 2017 NeurIPS conference, Google researchers presented their landmark paper \"Attention Is All You Need\", which, with the goal of improving upon 2014 Seq2seq technology, introduced the transformer architecture, based mainly on the attention mechanism, developed by Bahdanau et. al. in 2014. The following year in 2018, BERT was introduced, which quickly became \"ubiquitous\".Although GPT-1 was introduced in 2018, it was GPT-2 in 2019 that caught widespread attention because OpenAI at first deemed it too powerful to release publicly, out of fear of malicious use. GPT-3 in 2020 went a step further and as of 2024 is available only via API with no offering of downloading the model to execute locally. But it was the 2022 consumer-facing browser-based ChatGPT that captured the imaginations of the general population and \"completely changed the world\". The 2023 GPT-4 was praised for its increased accuracy and as a \"holy grail\" for its multimodal capabilities. OpenAI did not reveal high-level architecture and the number of parameters of GPT-4.\\nIn the meantime, competing language models have for the most part been playing catch-up to the GPT series, at least in terms of number of parameters. Notable exceptions in terms of number of parameters included Google\\'s 2019 T5-11B and 2022 PaLM-E.\\nSince 2022, open source models have been gaining popularity, especially at first with BLOOM and LLaMA, though the latter is restricted to only noncommercial uses. Mistral AI\\'s models Mistral 7B and Mixtral 8x7b have the more permissive Apache License. As of January 2024, Mixtral 8x7b is the most powerful open source LLM according to the LMSYS Chatbot Arena Leaderboard, being more powerful than GPT-3.5 but not as powerful as GPT-4.\\n\\n\\n== Dataset preprocessing ==\\n\\n\\n=== Probabilistic tokenization ===\\nUsing a modification of byte-pair encoding, in the first step, all unique characters (including blanks and punctuation marks) are treated as an initial set of n-grams (i.e. initial set of uni-grams). Successively the most frequent pair of adjacent characters is merged into a bi-gram and all instances of the pair are replaced by it. All occurrences of adjacent pairs of (previously merged) n-grams that most frequently occur together are then again merged into even lengthier n-gram repeatedly until a vocabulary of prescribed size is obtained (in case of GPT-3, the size is 50257). Token vocabulary consists of integers, spanning from zero up to the size of the token vocabulary. New words can always be interpreted as combinations of the tokens and the initial-set uni-grams.A token vocabulary based on the frequencies extracted from mainly English corpora uses as few tokens as possible for an average English word. An average word in a', metadata={'title': 'Large language model', 'summary': 'A large language model (LLM) is a language model notable for its ability to achieve general-purpose language generation. LLMs acquire these abilities by learning statistical relationships from text documents during a computationally intensive self-supervised and semi-supervised training process. LLMs are artificial neural networks typically built with a transformer-based architecture. Some recent implementations are based on alternative architectures such as recurrent neural network variants and Mamba (a state space model).LLMs can be used for text generation, a form of generative AI, by taking an input text and repeatedly predicting the next token or word. Up to 2020, fine tuning was the only way a model could be adapted to be able to accomplish specific tasks. Larger sized models, such as GPT-3, however, can be prompt-engineered to achieve similar results. They are thought to acquire knowledge about syntax, semantics and \"ontology\" inherent in human language corpora, but also inaccuracies and biases present in the corpora.Some notable LLMs are OpenAI\\'s GPT models (e.g., GPT-3.5 and GPT-4, used in ChatGPT), Google\\'s PaLM and Gemini (used in Bard), Meta\\'s LLaMA family of open-source models, and Anthropic\\'s Claude models.\\n\\n', 'source': 'https://en.wikipedia.org/wiki/Large_language_model'})"
+      ]
+     },
+     "execution_count": 11,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
+   "source": [
+    "docs = WikipediaLoader(query=\"Large Language Models\", load_max_docs=10).load()\n",
+    "\n",
+    "# Take a look at a single document\n",
+    "docs[0]"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "id": "1077ce8e-fb29-43e7-b7ec-313b043ee84b",
+   "metadata": {},
+   "source": [
+    "#### Split text into chunks \n",
+    "Now that we have the documents we will split them into chunks. Each chunk will become one vector in the vector store. To do this we will define a chunk size (number of characters) and a chunk overlap (amount of overlap i.e. sliding window). The perfect chunk size can be difficult to determine. Too large of a chunk size leads to too much information per chunk (individual chunks not specific enough), however too small of a chunk size leads to not enough information per chunk. In both cases, nearest neighbors lookup with a query/question embedding may struggle to retrieve the actually relevant chunks, or fail altogether if the chunks are too large to use as context with an LLM query.\n",
+    "\n",
+    "In this notebook we will use a chunk size of 800 chacters and a chunk overlap of 400 characters, but feel free to experiment with other sizes! Note: you can specify a custom `length_function` with `RecursiveCharacterTextSplitter` if you want chunk size/overlap to be determined by something other than Python's len function. In addition to `RecursiveCharacterTextSplitter`, there are [other text splitters](https://python.langchain.com/docs/modules/data_connection/document_transformers/) you can consider."
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 12,
+   "id": "ff137784-b004-4a96-99db-142f9208571e",
+   "metadata": {
+    "tags": []
+   },
+   "outputs": [
+    {
+     "data": {
+      "text/plain": [
+       "[Document(page_content='A large language model (LLM) is a language model notable for its ability to achieve general-purpose language generation. LLMs acquire these abilities by learning statistical relationships from text documents during a computationally intensive self-supervised and semi-supervised training process. LLMs are artificial neural networks typically built with a transformer-based architecture. Some recent implementations are based on alternative architectures such as recurrent neural network variants and Mamba (a state space model).LLMs can be used for text generation, a form of generative AI, by taking an input text and repeatedly predicting the next token or word. Up to 2020, fine tuning was the only way a model could be adapted to be able to accomplish specific tasks. Larger sized models, such', metadata={'title': 'Large language model', 'summary': 'A large language model (LLM) is a language model notable for its ability to achieve general-purpose language generation. LLMs acquire these abilities by learning statistical relationships from text documents during a computationally intensive self-supervised and semi-supervised training process. LLMs are artificial neural networks typically built with a transformer-based architecture. Some recent implementations are based on alternative architectures such as recurrent neural network variants and Mamba (a state space model).LLMs can be used for text generation, a form of generative AI, by taking an input text and repeatedly predicting the next token or word. Up to 2020, fine tuning was the only way a model could be adapted to be able to accomplish specific tasks. Larger sized models, such as GPT-3, however, can be prompt-engineered to achieve similar results. They are thought to acquire knowledge about syntax, semantics and \"ontology\" inherent in human language corpora, but also inaccuracies and biases present in the corpora.Some notable LLMs are OpenAI\\'s GPT models (e.g., GPT-3.5 and GPT-4, used in ChatGPT), Google\\'s PaLM and Gemini (used in Bard), Meta\\'s LLaMA family of open-source models, and Anthropic\\'s Claude models.\\n\\n', 'source': 'https://en.wikipedia.org/wiki/Large_language_model'}),\n",
+       " Document(page_content='implementations are based on alternative architectures such as recurrent neural network variants and Mamba (a state space model).LLMs can be used for text generation, a form of generative AI, by taking an input text and repeatedly predicting the next token or word. Up to 2020, fine tuning was the only way a model could be adapted to be able to accomplish specific tasks. Larger sized models, such as GPT-3, however, can be prompt-engineered to achieve similar results. They are thought to acquire knowledge about syntax, semantics and \"ontology\" inherent in human language corpora, but also inaccuracies and biases present in the corpora.Some notable LLMs are OpenAI\\'s GPT models (e.g., GPT-3.5 and GPT-4, used in ChatGPT), Google\\'s PaLM and Gemini (used in Bard), Meta\\'s LLaMA family of', metadata={'title': 'Large language model', 'summary': 'A large language model (LLM) is a language model notable for its ability to achieve general-purpose language generation. LLMs acquire these abilities by learning statistical relationships from text documents during a computationally intensive self-supervised and semi-supervised training process. LLMs are artificial neural networks typically built with a transformer-based architecture. Some recent implementations are based on alternative architectures such as recurrent neural network variants and Mamba (a state space model).LLMs can be used for text generation, a form of generative AI, by taking an input text and repeatedly predicting the next token or word. Up to 2020, fine tuning was the only way a model could be adapted to be able to accomplish specific tasks. Larger sized models, such as GPT-3, however, can be prompt-engineered to achieve similar results. They are thought to acquire knowledge about syntax, semantics and \"ontology\" inherent in human language corpora, but also inaccuracies and biases present in the corpora.Some notable LLMs are OpenAI\\'s GPT models (e.g., GPT-3.5 and GPT-4, used in ChatGPT), Google\\'s PaLM and Gemini (used in Bard), Meta\\'s LLaMA family of open-source models, and Anthropic\\'s Claude models.\\n\\n', 'source': 'https://en.wikipedia.org/wiki/Large_language_model'})]"
+      ]
+     },
+     "execution_count": 12,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
+   "source": [
+    "text_splitter = RecursiveCharacterTextSplitter(\n",
+    "    chunk_size=800,\n",
+    "    chunk_overlap=400,\n",
+    "    length_function=len,\n",
+    ")\n",
+    "\n",
+    "chunks = text_splitter.split_documents(docs)\n",
+    "\n",
+    "# Look at the first two chunks\n",
+    "chunks[0:2]"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 13,
+   "id": "c31316e9-273f-4ca6-a252-8de9f0e9224e",
+   "metadata": {
+    "tags": []
+   },
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Number of documents: 10\n",
+      "Number of chunks: 91\n"
+     ]
+    }
+   ],
+   "source": [
+    "print(f\"Number of documents: {len(docs)}\")\n",
+    "print(f\"Number of chunks: {len(chunks)}\")"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "id": "52830b13-f2fe-4a28-a8a3-d529da45757f",
+   "metadata": {},
+   "source": [
+    "#### Embed Document Chunks \n",
+    "Now we need to embed the document chunks and store them in a vectorstore. For this, we can use any text embedding model, however we need to be sure to use the same text embedding model when we embed our queries/questions at prediction time. To make things simple we will use the PaLM API for Embeddings. The langchain library provides a nice wrapper class around the PaLM Embeddings API, VertexAIEmbeddings().\n",
+    "\n",
+    "Since Vertex AI Vector Search takes awhile (~45 minutes) to create an index, we will use Chroma instead to keep things simple. Of course, in a real-world use case with a large private knowledge-base, you may not be able to fit everything in memory. Langchain has a nice wrapper class for Chroma which allows us to pass in a list of documents, and an embedding class to create the vector store."
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 14,
+   "id": "012aa63d-726d-4393-8293-88776458c40b",
+   "metadata": {
+    "tags": []
+   },
+   "outputs": [],
+   "source": [
+    "embedding = VertexAIEmbeddings(\n",
+    "    model_name=\"textembedding-gecko@001\"\n",
+    ")  # PaLM embedding API\n",
+    "\n",
+    "# set persist directory so the vector store is saved to disk\n",
+    "db = Chroma.from_documents(chunks, embedding, persist_directory=\"./vectorstore\")"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "id": "b8f28527-2b38-4c51-a739-b56e499383c9",
+   "metadata": {},
+   "source": [
+    "#### Putting it all together \n",
+    "\n",
+    "Now that everything is in place, we can tie it all together with a langchain chain. A langchain chain simply orchestrates the multiple steps required to use an LLM for a specific use case. In this case the process we will chain together first embeds the query/question, then performs a nearest neighbors lookup to find the relevant chunks, then uses the relevant chunks to formulate a response with an LLM. We will use the Chroma database as our vector store and PaLM as our LLM. Langchain provides a wrapper around PaLM, `VertexAI()`.\n",
+    "\n",
+    "For this simple Q/A use case we can use langchain's `RetrievalQA` to link together the process."
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 15,
+   "id": "0c1b4f33-0fe2-46e8-a898-ade9e26e7510",
+   "metadata": {
+    "tags": []
+   },
+   "outputs": [],
+   "source": [
+    "# vector store\n",
+    "retriever = db.as_retriever(\n",
+    "    search_type=\"similarity\",\n",
+    "    search_kwargs={\"k\": 10},  # number of nearest neighbors to retrieve\n",
+    ")\n",
+    "\n",
+    "# PaLM API\n",
+    "# You can also set temperature, top_p, top_k\n",
+    "llm = VertexAI(model_name=\"text-bison@001\", max_output_tokens=1024)\n",
+    "\n",
+    "# q/a chain\n",
+    "qa = RetrievalQA.from_chain_type(\n",
+    "    llm=llm,\n",
+    "    chain_type=\"stuff\",\n",
+    "    retriever=retriever,\n",
+    "    return_source_documents=True,\n",
+    ")"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "id": "7f6d0811-c32c-4059-a7e0-8f8771dd68d2",
+   "metadata": {},
+   "source": [
+    "Now that everything is tied together we can send queries and get answers!"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 16,
+   "id": "474abe04-d499-4066-8d41-8ff38d67b932",
+   "metadata": {
+    "tags": []
+   },
+   "outputs": [],
+   "source": [
+    "def ask_question(question: str):\n",
+    "    response = qa({\"query\": question})\n",
+    "    print(f\"Response: {response['result']}\\n\")\n",
+    "\n",
+    "    citations = {doc.metadata[\"source\"] for doc in response[\"source_documents\"]}\n",
+    "    print(f\"Citations: {citations}\\n\")\n",
+    "\n",
+    "    # uncomment below to print source chunks used\n",
+    "    # print(f\"Source Chunks Used: {response['source_documents']}\")"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 17,
+   "id": "535cd186-d1c0-4f1f-9496-f1ea35a07429",
+   "metadata": {
+    "tags": []
+   },
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Response: The technology that underpins large language models is the transformer architecture.\n",
+      "\n",
+      "Citations: {'https://en.wikipedia.org/wiki/Large_language_model', 'https://en.wikipedia.org/wiki/GPT-3', 'https://en.wikipedia.org/wiki/Generative_pre-trained_transformer', 'https://en.wikipedia.org/wiki/Language_model', 'https://en.wikipedia.org/wiki/Open-source_artificial_intelligence'}\n",
+      "\n"
+     ]
+    }
+   ],
+   "source": [
+    "ask_question(\"What technology underpins large language models?\")"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 18,
+   "id": "f6926718-6237-415d-8633-fd554528323e",
+   "metadata": {
+    "tags": []
+   },
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Response: The transformer architecture was introduced in 2017.\n",
+      "\n",
+      "Citations: {'https://en.wikipedia.org/wiki/Large_language_model', 'https://en.wikipedia.org/wiki/GPT-3', 'https://en.wikipedia.org/wiki/Generative_pre-trained_transformer', 'https://en.wikipedia.org/wiki/BERT_(language_model)', 'https://en.wikipedia.org/wiki/Prompt_engineering'}\n",
+      "\n"
+     ]
+    }
+   ],
+   "source": [
+    "ask_question(\"When was the transformer introduced?\")"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "id": "e67fa985-4a03-4002-bf42-d19fc27c75a3",
+   "metadata": {},
+   "source": [
+    "Congrats! You have now built a toy retrieval augmented generation system from scratch and applied the learnings to build a more real system using a vector database and orchestration with langchain."
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "893a1d12-b469-4561-b5e1-fe056b997c41",
+   "metadata": {},
+   "outputs": [],
+   "source": []
+  }
+ ],
+ "metadata": {
+  "environment": {
+   "kernel": "langchain_kernel",
+   "name": "tf2-gpu.2-12.m115",
+   "type": "gcloud",
+   "uri": "gcr.io/deeplearning-platform-release/tf2-gpu.2-12:m115"
+  },
+  "kernelspec": {
+   "display_name": "langchain_kernel (Local)",
+   "language": "python",
+   "name": "langchain_kernel"
+  },
+  "language_info": {
+   "codemirror_mode": {
+    "name": "ipython",
+    "version": 3
+   },
+   "file_extension": ".py",
+   "mimetype": "text/x-python",
+   "name": "python",
+   "nbconvert_exporter": "python",
+   "pygments_lexer": "ipython3",
+   "version": "3.10.13"
+  }
+ },
+ "nbformat": 4,
+ "nbformat_minor": 5
+}