Skip to content
This repository has been archived by the owner on Nov 13, 2024. It is now read-only.

Commit

Permalink
Merge branch 'dev' into kb-docstrings
Browse files Browse the repository at this point in the history
  • Loading branch information
miararoy authored Oct 30, 2023
2 parents 88e7ac3 + aa7424a commit 4ee8414
Show file tree
Hide file tree
Showing 15 changed files with 204 additions and 94 deletions.
44 changes: 44 additions & 0 deletions .github/actions/install-deps-and-canopy/action.yml
Original file line number Diff line number Diff line change
@@ -0,0 +1,44 @@
name: Install dependencies and canopy
description: "Installs Poetry, dependencies and optionally canopy library"
inputs:
python-version:
description: "Python version"
required: true
default: "3.9"
install-canopy:
description: "Whether to install canopy library, or dependencies only"
required: true
default: "true"

runs:
using: "composite"
steps:
- name: Install Poetry
uses: snok/install-poetry@v1
with:
version: 1.3.2
virtualenvs-create: true
virtualenvs-in-project: true
installer-parallel: true
#----------------------------------------------
# load cached venv if cache exists
#----------------------------------------------
- name: Load cached venv
id: cached-poetry-dependencies
uses: actions/cache@v3
with:
path: |
.venv
poetry.lock
key: venv-${{ runner.os }}-${{ inputs.python-version }}-${{ hashFiles('pyproject.toml') }}
#----------------------------------------------
# install dependencies if cache does not exist
#----------------------------------------------
- name: Install dependencies
shell: bash
if: steps.cached-poetry-dependencies.outputs.cache-hit != 'true'
run: poetry install --no-interaction --no-root --with dev
- name: Install project
if: ${{ inputs.install-canopy == 'true' }}
shell: bash
run: poetry install --no-interaction
63 changes: 63 additions & 0 deletions .github/workflows/PR.yml
Original file line number Diff line number Diff line change
@@ -0,0 +1,63 @@
name: Run unit tests and linters

on: pull_request

# Cancel previous runs on the same branch \ PR number if they are still running
# From: https://stackoverflow.com/a/72408109
concurrency:
group: ${{ github.workflow }}-${{ github.event.pull_request.number || github.ref }}
cancel-in-progress: true


jobs:
run-linters:
name: Run linters
runs-on: ubuntu-latest
strategy:
matrix:
python-version: [3.9, '3.10', 3.11]

steps:
- uses: actions/checkout@v3
- name: Set up Python 3.9
uses: actions/setup-python@v4
with:
python-version: ${{ matrix.python-version }}
- name: Install dependencies
uses: ./.github/actions/install-deps-and-canopy
with:
python-version: ${{ matrix.python-version }}
install-canopy: false
- name: Run flake8 Check
run: poetry run flake8 .
- name: Run mypy check
run: poetry run mypy src

run-tests:
name: Run tests
runs-on: ubuntu-latest
needs: run-linters
strategy:
matrix:
python-version: [3.9, '3.10', 3.11]
pinecone-plan: ["paid", "starter"]

steps:
- uses: actions/checkout@v3
- name: Set up Python ${{ matrix.python-version }}
uses: actions/setup-python@v4
with:
python-version: ${{ matrix.python-version }}
- name: Install dependencies
uses: ./.github/actions/install-deps-and-canopy
with:
python-version: ${{ matrix.python-version }}
install-canopy: true
- name: Run unit tests
run: poetry run pytest --html=report.html --self-contained-html tests/unit
- name: upload pytest report.html
uses: actions/upload-artifact@v3
if: always()
with:
name: pytest-report-py${{ matrix.python-version }}-${{ matrix.pinecone-plan }}
path: report*.html
67 changes: 10 additions & 57 deletions .github/workflows/CI.yml → .github/workflows/merege_queue.yml
Original file line number Diff line number Diff line change
@@ -1,13 +1,6 @@
name: CI

on: pull_request

# Cancel previous runs on the same branch \ PR number if they are still running
# From: https://stackoverflow.com/a/72408109
concurrency:
group: ${{ github.workflow }}-${{ github.event.pull_request.number || github.ref }}
cancel-in-progress: true
name: Run system and e2e tests

on: merge_group

jobs:
run-linters:
Expand All @@ -23,31 +16,11 @@ jobs:
uses: actions/setup-python@v4
with:
python-version: ${{ matrix.python-version }}

- name: Install Poetry
uses: snok/install-poetry@v1
with:
version: 1.3.2
virtualenvs-create: true
virtualenvs-in-project: true
installer-parallel: true
#----------------------------------------------
# load cached venv if cache exists
#----------------------------------------------
- name: Load cached venv
id: cached-poetry-dependencies
uses: actions/cache@v3
with:
path: .venv
key: venv-${{ runner.os }}-${{ matrix.python-version }}-${{ hashFiles('pyproject.toml') }}
#----------------------------------------------
# install dependencies if cache does not exist
#----------------------------------------------
- name: Install dependencies
if: steps.cached-poetry-dependencies.outputs.cache-hit != 'true'
run: poetry install --no-interaction --no-root --with dev
- name: Install project
run: poetry install --no-interaction
uses: ./.github/actions/install-deps-and-canopy
with:
python-version: ${{ matrix.python-version }}
install-canopy: false
- name: Run flake8 Check
run: poetry run flake8 .
- name: Run mypy check
Expand All @@ -68,31 +41,11 @@ jobs:
uses: actions/setup-python@v4
with:
python-version: ${{ matrix.python-version }}

- name: Install Poetry
uses: snok/install-poetry@v1
with:
version: 1.3.2
virtualenvs-create: true
virtualenvs-in-project: true
installer-parallel: true
#----------------------------------------------
# load cached venv if cache exists
#----------------------------------------------
- name: Load cached venv
id: cached-poetry-dependencies
uses: actions/cache@v3
- name: Install dependencies and canopy
uses: ./.github/actions/install-deps-and-canopy
with:
path: .venv
key: venv-${{ runner.os }}-${{ matrix.python-version }}-${{ hashFiles('pyproject.toml') }}
#----------------------------------------------
# install dependencies if cache does not exist
#----------------------------------------------
- name: Install dependencies
if: steps.cached-poetry-dependencies.outputs.cache-hit != 'true'
run: poetry install --no-interaction --no-root --with dev
- name: Install project
run: poetry install --no-interaction
python-version: ${{ matrix.python-version }}
install-canopy: true
- name: Run unit tests
run: poetry run pytest --html=report.html --self-contained-html tests/unit
- name: Set Index Name for System / E2E
Expand Down
2 changes: 1 addition & 1 deletion README.md
Original file line number Diff line number Diff line change
Expand Up @@ -188,8 +188,8 @@ INFO: Uvicorn running on http://0.0.0.0:8000 (Press CTRL+C to quit)
> **_📝 NOTE:_**
>
> The canopy start command will keep the terminal occupied. To proceed with the next steps, please open a new terminal window.
> and make sure all the environment variables described in the [installation](#how-to-install) section are set.
> If you want to run the service in the background, you can use the following command - **```nohup canopy start &```**
> However, this is not recommended.

### 4. Chat with your data
Expand Down
2 changes: 1 addition & 1 deletion config/config.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -46,7 +46,7 @@ chat_engine:
type: OpenAIRecordEncoder
params:
model_name: text-embedding-ada-002
batch_size: 100
batch_size: 400

chunker:
type: MarkdownChunker
Expand Down
4 changes: 3 additions & 1 deletion pyproject.toml
Original file line number Diff line number Diff line change
Expand Up @@ -6,7 +6,8 @@ authors = ["Relevance Team <[email protected]>"]
readme = "README.md"
license = "Apache-2.0"
packages = [{include = "canopy", from = "src"},
{include = "canopy_cli", from = "src"},]
{include = "canopy_cli", from = "src"},
{include = "canopy_server", from = "src"},]

[tool.poetry.dependencies]
python = "^3.9"
Expand Down Expand Up @@ -78,3 +79,4 @@ skip-checking-raises = true

[tool.poetry.scripts]
canopy = "canopy_cli.cli:cli"

2 changes: 2 additions & 0 deletions src/canopy/knowledge_base/__init__.py
Original file line number Diff line number Diff line change
@@ -1 +1,3 @@
from .knowledge_base import connect_to_pinecone
from .knowledge_base import list_canopy_indexes
from .knowledge_base import KnowledgeBase
46 changes: 34 additions & 12 deletions src/canopy/knowledge_base/knowledge_base.py
Original file line number Diff line number Diff line change
Expand Up @@ -37,6 +37,37 @@
DELETE_STARTER_CHUNKS_PER_DOC = 32


def connect_to_pinecone():
"""
Connect to Pinecone.
This method is called automatically when creating a new KnowledgeBase object.
Or when calling `list_canopy_indexes()`.
"""
try:
pinecone_init()
pinecone_whoami()
except Exception as e:
raise RuntimeError("Failed to connect to Pinecone. "
"Please check your credentials and try again") from e


def list_canopy_indexes() -> List[str]:
"""
List all Canopy indexes in the current Pinecone account.
Example:
>>> from canopy.knowledge_base import list_canopy_indexes
>>> list_canopy_indexes()
['canopy--my_index', 'canopy--my_index2']
Returns:
A list of Canopy index names.
"""

connect_to_pinecone()
return [index for index in list_indexes() if index.startswith(INDEX_NAME_PREFIX)]


class KnowledgeBase(BaseKnowledgeBase):

"""
Expand Down Expand Up @@ -160,20 +191,11 @@ def __init__(self,
# `create_canopy_index()`
self._index: Optional[Index] = None

@staticmethod
def _connect_pinecone():
try:
pinecone_init()
pinecone_whoami()
except Exception as e:
raise RuntimeError("Failed to connect to Pinecone. "
"Please check your credentials and try again") from e

def _connect_index(self,
connect_pinecone: bool = True
) -> None:
if connect_pinecone:
self._connect_pinecone()
connect_to_pinecone()

if self.index_name not in list_indexes():
raise RuntimeError(
Expand Down Expand Up @@ -297,7 +319,7 @@ def create_canopy_index(self,
"Please provide the vectors' dimension")

# connect to pinecone and create index
self._connect_pinecone()
connect_to_pinecone()

if self.index_name in list_indexes():
raise RuntimeError(
Expand Down Expand Up @@ -468,7 +490,7 @@ def _query_index(self,
def upsert(self,
documents: List[Document],
namespace: str = "",
batch_size: int = 100,
batch_size: int = 200,
show_progress_bar: bool = False):
"""
Upsert documents into the knowledge base.
Expand Down
2 changes: 1 addition & 1 deletion src/canopy/knowledge_base/record_encoder/openai.py
Original file line number Diff line number Diff line change
Expand Up @@ -30,7 +30,7 @@ class OpenAIRecordEncoder(DenseRecordEncoder):
def __init__(self,
*,
model_name: str = "text-embedding-ada-002",
batch_size: int = 100,
batch_size: int = 400,
**kwargs):
"""
create an instance of OpenAIEncoder with the given model name.
Expand Down
Loading

0 comments on commit 4ee8414

Please sign in to comment.