diff --git a/config/config.yaml b/config/config.yaml index dba56a62..85ccbe99 100644 --- a/config/config.yaml +++ b/config/config.yaml @@ -1,3 +1,12 @@ +# =========================================================== +# Configuration file for Canopy Server +# =========================================================== + +# --------------------------------------------------------------------------------- +# LLM prompts +# Defined here for convenience, then referenced in the chat engine configuration +# Note: line breaks in the prompts are important, and may affect the LLM's behavior +# --------------------------------------------------------------------------------- system_prompt: &system_prompt | Use the following pieces of context to answer the user question at the next messages. This context retrieved from a knowledge database and you should use only the facts from the context to answer. Always remember to include the source to the documents you used from their 'source' field in the format 'Source: $SOURCE_HERE'. If you don't know the answer, just say that you don't know, don't try to make up an answer, use the context. @@ -6,54 +15,105 @@ query_builder_prompt: &query_builder_prompt | Your task is to formulate search queries for a search engine, to assist in responding to the user's question. You should break down complex questions into sub-queries if needed. + +# ------------------------------------------------------------------------------------------- +# Tokenizer configuration +# A Tokenizer singleton instance must be initialized before initializing any other components +# ------------------------------------------------------------------------------------------- tokenizer: - type: OpenAITokenizer + type: OpenAITokenizer # Options: [OpenAITokenizer] params: model_name: gpt-3.5-turbo + +# ------------------------------------------------------------------------------------------------------------- +# Chat engine configuration +# The chat engine is the main component of the server, generating responses to the `/chat.completion` API call. +# The configuration is recursive, so that each component contains a subsection for each of its sub components. +# ------------------------------------------------------------------------------------------------------------- chat_engine: params: - max_prompt_tokens: 4096 - max_generated_tokens: null # Will use the LLM's default max generated tokens - max_context_tokens: null # Will use 70% of the max_prompt_tokens - system_prompt: *system_prompt - history_pruning: recent # Options: [raise, recent] - min_history_messages: 1 + max_prompt_tokens: 4096 # The maximum number of tokens to use for input prompt to the LLM + max_generated_tokens: null # Leaving `null` will use the default of the underlying LLM + max_context_tokens: null # Leaving `null` will use 70% of `max_prompt_tokens` + system_prompt: *system_prompt # The chat engine's system prompt for calling the LLM + history_pruning: recent # How to prune messages if chat history is too long. Options: [raise, recent] + min_history_messages: 1 # Minimal number of messages to keep in history + + # ------------------------------------------------------------------------------------------------------------- + # LLM configuration + # Configuration of the LLM (Large Language Model) + # ------------------------------------------------------------------------------------------------------------- llm: &llm - type: OpenAILLM + type: OpenAILLM # Options: [OpenAILLM] params: - model_name: gpt-3.5-turbo + model_name: gpt-3.5-turbo # The name of the model to use. + + # -------------------------------------------------------------------- + # Configuration for the QueryBuilder subcomponent of the chat engine. + # The query builder is responsible for generating textual queries given user message history. + # -------------------------------------------------------------------- query_builder: - type: FunctionCallingQueryGenerator + type: FunctionCallingQueryGenerator # Options: [FunctionCallingQueryGenerator] params: - prompt: *query_builder_prompt - function_description: Query search engine for relevant information + prompt: *query_builder_prompt # The query builder's system prompt for calling the LLM + function_description: # A function description passed to the LLM's `function_calling` API + Query search engine for relevant information - llm: + llm: # The LLM that the query builder will use to generate queries. Leave `*llm` to use the chat engine's LLM <<: *llm + + # ------------------------------------------------------------------------------------------------------------- + # ContextEngine configuration + # The context engine is responsible for generating textual context for the `/query` API calls. + # ------------------------------------------------------------------------------------------------------------- context_engine: params: - global_metadata_filter: null # An optional metadata filter to apply to all queries + global_metadata_filter: null # An optional metadata filter to apply to all queries + + + # ------------------------------------------------------------------------- + # Configuration for the ContextBuilder subcomponent of the context engine. + # The context builder is responsible for formulating a textual context given query results. + # ------------------------------------------------------------------------- + context_builder: + type: StuffingContextBuilder # Options: [StuffingContextBuilder] + + # ----------------------------------------------------------------------------------------------------------- + # KnowledgeBase configuration + # The KnowledgeBase is a responsible for storing and indexing the user's documents + # ----------------------------------------------------------------------------------------------------------- knowledge_base: params: - default_top_k: 5 + default_top_k: 5 # The default number of document chunks to retrieve for each query +# index_params: # Optional - index creation parameters for `create_canopy_index()` or `canopy new` +# metric: cosine +# pod_type: p1 - record_encoder: - type: OpenAIRecordEncoder - params: - model_name: text-embedding-ada-002 - batch_size: 400 + # -------------------------------------------------------------------------- + # Configuration for the Chunker subcomponent of the knowledge base. + # The chunker is responsible for splitting documents' text into smaller chunks. + # -------------------------------------------------------------------------- chunker: - type: MarkdownChunker + type: MarkdownChunker # Options: [MarkdownChunker, RecursiveCharacterChunker] params: - chunk_size: 256 - chunk_overlap: 0 - keep_separator: true + chunk_size: 256 # The maximum number of tokens in each chunk + chunk_overlap: 0 # The number of tokens to overlap between chunks + keep_separator: true # Whether to keep the separator in the chunks - context_builder: - type: StuffingContextBuilder + + # -------------------------------------------------------------------------- + # Configuration for the RecordEncoder subcomponent of the knowledge base. + # The record encoder is responsible for encoding document chunks to a vector representation + # -------------------------------------------------------------------------- + record_encoder: + type: OpenAIRecordEncoder # Options: [OpenAIRecordEncoder] + params: + model_name: # The name of the model to use for encoding + text-embedding-ada-002 + batch_size: 400 # The number of document chunks to encode in each call to the encoding model