Skip to content

Commit

Permalink
Bump llama_index version (#341)
Browse files Browse the repository at this point in the history
* Update poetry

* Fix poetry

* bump llama_index to v0.12.3

* fix deps

* Fix dropdown value

* fix deps

* generate poetry.lock

* add html2text

* add html2text

* Fix test

* fix tests

---------

Co-authored-by: Yue Fei <[email protected]>
  • Loading branch information
moria97 and Yue Fei authored Jan 16, 2025
1 parent 97c7b10 commit cefc173
Show file tree
Hide file tree
Showing 38 changed files with 2,265 additions and 2,709 deletions.
2 changes: 1 addition & 1 deletion Dockerfile
Original file line number Diff line number Diff line change
Expand Up @@ -24,7 +24,7 @@ ENV VIRTUAL_ENV=/app/.venv \
ENABLE_AIOHTTPCLIENT=false \
ENABLE_HTTPX=false

RUN apt-get update && apt-get install -y libgl1 libglib2.0-0 curl
RUN apt-get update && apt-get install -y libgl1 libglib2.0-0 libgomp1 curl

# setup paddleocr dependencies
RUN mkdir -p /root/.paddleocr/whl/det/ch/ch_PP-OCRv4_det_infer \
Expand Down
2 changes: 1 addition & 1 deletion Dockerfile_gpu
Original file line number Diff line number Diff line change
Expand Up @@ -27,7 +27,7 @@ ENV VIRTUAL_ENV=/app/.venv \
ENABLE_AIOHTTPCLIENT=false \
ENABLE_HTTPX=false

RUN apt-get update && apt-get install -y libgl1 libglib2.0-0 curl
RUN apt-get update && apt-get install -y libgl1 libglib2.0-0 libgomp1 curl

# setup paddleocr dependencies
RUN mkdir -p /root/.paddleocr/whl/det/ch/ch_PP-OCRv4_det_infer \
Expand Down
4,297 changes: 1,800 additions & 2,497 deletions poetry.lock

Large diffs are not rendered by default.

69 changes: 34 additions & 35 deletions pyproject.toml
Original file line number Diff line number Diff line change
Expand Up @@ -11,32 +11,39 @@ readme = "README.md"

[tool.poetry.dependencies]
python = ">=3.11.0,<3.12"
fastapi = "^0.110.1"
fastapi = "0.115.2"
uvicorn = "^0.29.0"
llama-index-core = "0.10.62"
llama-index-embeddings-openai = "^0.1.7"
llama-index-embeddings-azure-openai = "^0.1.7"
llama-index-embeddings-dashscope = "^0.1.3"
llama-index-llms-openai = "^0.1.27"
llama-index-llms-azure-openai = "^0.1.6"
llama-index-llms-dashscope = "^0.1.2"
llama-index-readers-database = "^0.1.3"
llama-index-vector-stores-faiss = "^0.1.2"
llama-index-vector-stores-analyticdb = "^0.1.1"
llama-index-vector-stores-elasticsearch = "^0.2.0"
llama-index-vector-stores-milvus = "^0.1.10"
gradio = "3.41.0"
llama-index-core = "^0.12.2"
llama-index-embeddings-openai = "^0.3.0"
llama-index-embeddings-azure-openai = "^0.3.0"
llama-index-embeddings-dashscope = "^0.3.0"
llama-index-llms-openai = "^0.3.0"
llama-index-llms-azure-openai = "^0.3.0"
llama-index-llms-dashscope = "^0.3.0"
llama-index-readers-database = "^0.3.0"
llama-index-vector-stores-faiss = "^0.3.0"
llama-index-vector-stores-analyticdb = "^0.3.0"
llama-index-vector-stores-elasticsearch = "^0.4.0"
llama-index-vector-stores-milvus = "^0.3.0"
llama-index-vector-stores-alibabacloud-opensearch = "^0.3.0"
llama-index-embeddings-huggingface = "^0.4.0"
llama-index-readers-file = "^0.4.3"
llama-index-postprocessor-flag-embedding-reranker = "^0.3.0"
llama-index-storage-chat-store-redis = "^0.4.0"
llama-index-tools-google = "^0.3.0"
llama-index-tools-duckduckgo = "^0.3.0"
llama-index-llms-paieas = "^0.3.0"
llama-index-agent-openai = "^0.4.1"
llama-index-multi-modal-llms-openai = "^0.4.2"
gradio = "^5.12.0"
faiss-cpu = "^1.8.0"
hologres-vector = "^0.0.9"
dynaconf = "^3.2.5"
docx2txt = "^0.8"
click = "^8.1.7"
pydantic = "^2.7.0"
pytest = "^8.1.1"
llama-index-retrievers-bm25 = "^0.1.3"
jieba = "^0.42.1"
llama-index-embeddings-huggingface = "^0.2.0"
llama-index-postprocessor-flag-embedding-reranker = "^0.1.3"
flagembedding = "^1.2.10"
sentencepiece = "^0.2.0"
oss2 = "^2.18.5"
Expand All @@ -50,10 +57,9 @@ torchvision = [
{version = "0.18.0+cpu", source = "pytorch_cpu", markers = "sys_platform != 'darwin'"},
{version = "0.17.2", markers = "sys_platform == 'darwin'"}
]
transformers = "4.42.4"
transformers = "^4.42.4"
openpyxl = "^3.1.2"
pdf2image = "^1.17.0"
llama-index-storage-chat-store-redis = "^0.1.3"
python-bidi = "0.4.2"
easyocr = "^1.7.1"
opencv-python = "^4.6.0.66"
Expand All @@ -62,10 +68,7 @@ pypdf2 = "^3.0.1"
pdfplumber = "^0.11.0"
pdfminer-six = "^20231228"
openinference-semantic-conventions = "^0.1.9"
llama-index-tools-google = "^0.1.5"
llama-index-tools-duckduckgo = "^0.1.1"
openinference-instrumentation = "^0.1.12"
llama-index-llms-huggingface = "^0.2.0"
pytest-asyncio = "^0.23.7"
pytest-cov = "^5.0.0"
xlrd = "^2.0.1"
Expand All @@ -74,41 +77,37 @@ chardet = "^5.2.0"
locust = "^2.29.0"
gunicorn = "^22.0.0"
umap-learn = "^0.5.6"
protobuf = "3.20.2"
protobuf = "^3.20.2"
modelscope = "^1.16.0"
llama-index-multi-modal-llms-dashscope = "^0.1.2"
llama-index-vector-stores-alibabacloud-opensearch = "^0.1.0"
asyncpg = "^0.29.0"
pgvector = "^0.3.2"
pre-commit = "^3.8.0"
cn-clip = "^1.5.1"
llama-index-llms-paieas = "^0.1.0"
pymysql = "^1.1.1"
llama-index-experimental = "^0.2.0"
llama-index-readers-web = "^0.1.23"
milvus-lite = "^2.4.9"
rapidocr-onnxruntime = "^1.3.24"
rapid-table = "^0.1.3"
bs4 = "^0.0.2"
httpx = "0.27.0"
httpx = "^0.27.0"
detectron2 = [
{markers = "sys_platform == 'linux'", url = "https://pai-rag.oss-cn-hangzhou.aliyuncs.com/packages/python_wheels/detectron2-0.6%2B864913fpt2.3.0cpu-cp311-cp311-linux_x86_64.whl"},
{markers = "sys_platform == 'win32'", url = "https://pai-rag.oss-cn-hangzhou.aliyuncs.com/packages/python_wheels/detectron2-0.6%2B864913fpt2.3.0cpu-cp311-cp311-win_amd64.whl"},
{markers = "sys_platform != 'win32' and sys_platform != 'linux' ", url = "https://pai-rag.oss-cn-hangzhou.aliyuncs.com/packages/python_wheels/detectron2-0.6%2B864913fpt2.2.2cpu-cp311-cp311-macosx_10_9_universal2.whl"}
]
magic-pdf = {extras = ["full"], version = "^0.10.6"}
magic-pdf = {version = "0.10.6", extras = ["full"]}
peft = "^0.12.0"
duckduckgo-search = "6.2.12"
aliyun-bootstrap = "1.0.2"
duckduckgo-search = "^6.2.12"
aliyun-bootstrap = "^1.0.2"
docx = "^0.2.4"
python-pptx = "^1.0.2"
aspose-slides = "^24.10.0"
ultralytics = "^8.3.58"
ultralytics = "^8.3.43"
datasketch = "^1.6.5"
primp = "0.9.1"
primp = "^0.9.1"
tablestore = "^6.1.0"
anyio = "4.6.2.post1"
anyio = "^4.6.2.post1"
mistletoe = "^1.4.0"
html2text = "^2024.2.26"

[tool.poetry.scripts]
pai_rag = "pai_rag.main:run"
Expand Down
56 changes: 27 additions & 29 deletions pyproject_gpu.toml
Original file line number Diff line number Diff line change
Expand Up @@ -11,32 +11,38 @@ readme = "README.md"

[tool.poetry.dependencies]
python = ">=3.11.0,<3.12"
fastapi = "^0.110.1"
fastapi = "0.115.2"
uvicorn = "^0.29.0"
llama-index-core = "0.10.62"
llama-index-embeddings-openai = "^0.1.7"
llama-index-embeddings-azure-openai = "^0.1.7"
llama-index-embeddings-dashscope = "^0.1.3"
llama-index-llms-openai = "^0.1.27"
llama-index-llms-azure-openai = "^0.1.6"
llama-index-llms-dashscope = "^0.1.2"
llama-index-readers-database = "^0.1.3"
llama-index-vector-stores-faiss = "^0.1.2"
llama-index-vector-stores-analyticdb = "^0.1.1"
llama-index-vector-stores-elasticsearch = "^0.2.0"
llama-index-vector-stores-milvus = "^0.1.10"
gradio = "3.41.0"
llama-index-core = "^0.12.2"
llama-index-embeddings-openai = "^0.3.0"
llama-index-embeddings-azure-openai = "^0.3.0"
llama-index-embeddings-dashscope = "^0.3.0"
llama-index-llms-openai = "^0.3.0"
llama-index-llms-azure-openai = "^0.3.0"
llama-index-llms-dashscope = "^0.3.0"
llama-index-readers-database = "^0.3.0"
llama-index-vector-stores-faiss = "^0.3.0"
llama-index-vector-stores-analyticdb = "^0.3.0"
llama-index-vector-stores-elasticsearch = "^0.4.0"
llama-index-vector-stores-milvus = "^0.3.0"
llama-index-agent-openai = "^0.4.1"
llama-index-embeddings-huggingface = "^0.4.0"
llama-index-postprocessor-flag-embedding-reranker = "^0.3.0"
llama-index-storage-chat-store-redis = "^0.4.0"
llama-index-tools-google = "^0.3.0"
llama-index-tools-duckduckgo = "^0.3.0"
llama-index-multi-modal-llms-openai = "^0.4.2"
llama-index-vector-stores-alibabacloud-opensearch = "^0.3.0"
llama-index-llms-paieas = "^0.3.0"
gradio = "^5.12.0"
faiss-cpu = "^1.8.0"
hologres-vector = "^0.0.9"
dynaconf = "^3.2.5"
docx2txt = "^0.8"
click = "^8.1.7"
pydantic = "^2.7.0"
pytest = "^8.1.1"
llama-index-retrievers-bm25 = "^0.1.3"
jieba = "^0.42.1"
llama-index-embeddings-huggingface = "^0.2.0"
llama-index-postprocessor-flag-embedding-reranker = "^0.1.3"
flagembedding = "^1.2.10"
sentencepiece = "^0.2.0"
oss2 = "^2.18.5"
Expand All @@ -47,7 +53,6 @@ torchvision = "0.17.2"
transformers = "4.42.4"
openpyxl = "^3.1.2"
pdf2image = "^1.17.0"
llama-index-storage-chat-store-redis = "^0.1.3"
python-bidi = "0.4.2"
easyocr = "^1.7.1"
opencv-python = "^4.6.0.66"
Expand All @@ -56,10 +61,7 @@ pypdf2 = "^3.0.1"
pdfplumber = "^0.11.0"
pdfminer-six = "^20231228"
openinference-semantic-conventions = "^0.1.9"
llama-index-tools-google = "^0.1.5"
llama-index-tools-duckduckgo = "^0.1.1"
openinference-instrumentation = "^0.1.12"
llama-index-llms-huggingface = "^0.2.0"
pytest-asyncio = "^0.23.7"
pytest-cov = "^5.0.0"
xlrd = "^2.0.1"
Expand All @@ -68,18 +70,13 @@ chardet = "^5.2.0"
locust = "^2.29.0"
gunicorn = "^22.0.0"
umap-learn = "^0.5.6"
protobuf = "3.20.2"
protobuf = "^3.20.2"
modelscope = "^1.16.0"
llama-index-multi-modal-llms-dashscope = "^0.1.2"
llama-index-vector-stores-alibabacloud-opensearch = "^0.1.0"
asyncpg = "^0.29.0"
pgvector = "^0.3.2"
pre-commit = "^3.8.0"
cn-clip = "^1.5.1"
llama-index-llms-paieas = "^0.1.0"
pymysql = "^1.1.1"
llama-index-experimental = "^0.2.0"
llama-index-readers-web = "^0.1.23"
milvus-lite = "^2.4.9"
rapidocr-onnxruntime = "^1.3.24"
rapid-table = "^0.1.3"
Expand All @@ -100,11 +97,12 @@ aliyun-bootstrap = "^1.0.2"
docx = "^0.2.4"
python-pptx = "^1.0.2"
aspose-slides = "^24.10.0"
ultralytics = "8.3.58"
ultralytics = "^8.3.43"
datasketch = "^1.6.5"
tablestore = "^6.1.0"
anyio = "^4.6.2.post1"
mistletoe = "^1.4.0"
anyio = "4.6.2.post1"
html2text = "^2024.2.26"

[tool.poetry.scripts]
pai_rag = "pai_rag.main:run"
Expand Down
2 changes: 1 addition & 1 deletion src/pai_rag/app/web/index_utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -180,7 +180,7 @@ def index_to_components_settings(
{"value": ""},
{"value": ""},
{"value": ""},
{"value": ""},
{"value": "cn-hangzhou"},
{"value": ""},
{"value": ""},
{"value": ""},
Expand Down
1 change: 1 addition & 0 deletions src/pai_rag/app/web/rag_client.py
Original file line number Diff line number Diff line change
Expand Up @@ -208,6 +208,7 @@ def _format_rag_response(
formatted_answer += f"**Reference**:\n {referenced_docs}"

response["result"] = formatted_answer

return response

def check_health(self):
Expand Down
1 change: 0 additions & 1 deletion src/pai_rag/app/web/tabs/data_analysis_tab.py
Original file line number Diff line number Diff line change
Expand Up @@ -57,7 +57,6 @@ def load_db_info_fn(input_elements: List[Any]):
else:
update_dict["enable_query_preprocessor"] = False
update_dict["enable_db_preretriever"] = False
print("update_dict:", update_dict)
rag_client.patch_config(update_dict)
except RagApiError as api_error:
raise gr.Error(f"HTTP {api_error.code} Error: {api_error.msg}")
Expand Down
2 changes: 1 addition & 1 deletion src/pai_rag/app/web/tabs/settings_tab.py
Original file line number Diff line number Diff line change
Expand Up @@ -261,7 +261,7 @@ def create_setting_tab() -> Dict[str, Any]:
oss_endpoint = gr.Textbox(
label="OSS Endpoint",
elem_id="oss_endpoint",
default="oss-cn-hangzhou.aliyuncs.com",
placeholder="oss-cn-hangzhou.aliyuncs.com",
)
use_oss.input(
fn=ev_listeners.change_use_oss,
Expand Down
10 changes: 7 additions & 3 deletions src/pai_rag/app/web/tabs/upload_tab.py
Original file line number Diff line number Diff line change
Expand Up @@ -124,7 +124,7 @@ def upload_knowledge(

result = {"Info": ["StartTime", "EndTime", "Duration(s)", "Status"]}
error_msg = ""
while not all(file.finished is True for file in my_upload_files):
while True:
for file in my_upload_files:
try:
response = asyncio.run(
Expand All @@ -144,12 +144,16 @@ def upload_knowledge(
gr.update(visible=True, value=pd.DataFrame(result)),
gr.update(visible=False),
]
if not all(file.finished is True for file in my_upload_files):
time.sleep(2)

if all(file.finished is True for file in my_upload_files):
break

time.sleep(2)

upload_result = "Upload success."
if error_msg:
upload_result = f"Upload failed: {error_msg}"

yield [
gr.update(visible=True, value=pd.DataFrame(result)),
gr.update(
Expand Down
1 change: 1 addition & 0 deletions src/pai_rag/app/web/tabs/vector_db_panel.py
Original file line number Diff line number Diff line change
Expand Up @@ -47,6 +47,7 @@ def create_vector_db_panel() -> Dict[str, Any]:
"cn-shenzhen",
"cn-chengdu",
],
value="cn-hangzhou",
label="RegionId",
elem_id="adb_region_id",
)
Expand Down
21 changes: 16 additions & 5 deletions src/pai_rag/app/web/ui_constants.py
Original file line number Diff line number Diff line change
Expand Up @@ -19,16 +19,27 @@
# """

WELCOME_MESSAGE = """
# \N{fire} PAI-RAG Dashboard
#### \N{rocket} Join the DingTalk Q&A Group: 27370042974
"""
<div class="title"> <span class="head"> \N{fire} PAI-RAG Dashboard</span><span class="subhead"> \N{rocket} Join the DingTalk Q&A Group: 27370042974</span> </div>
"""

DEFAULT_CSS_STYPE = """
h1, h3, h4 {
.title {
margin-block-start: 0.2em;
margin-block-end: 0.2em;
text-align: center;
display:block;
}
.head {
font-size: 2em;
font-weight: bold;
}
.subhead {
margin: 1.5em;
font-size: 1em;
font-weight: bold;
}
"""

DEFAULT_EMBED_SIZE = 1024
Expand Down
7 changes: 2 additions & 5 deletions src/pai_rag/app/web/webui.py
Original file line number Diff line number Diff line change
@@ -1,4 +1,3 @@
from urllib.parse import urljoin
from fastapi import FastAPI
import gradio as gr
from pai_rag.app.web import event_listeners
Expand Down Expand Up @@ -135,13 +134,11 @@ def configure_webapp(app: FastAPI, web_url, rag_url=DEFAULT_LOCAL_URL) -> gr.Blo
rag_client.set_endpoint(rag_url)

chat_page = create_chat_ui()
chat_page.queue(concurrency_count=1, max_size=64)
chat_page._queue.set_url(urljoin(web_url, "chat/"))
chat_page.queue(api_open=True, max_size=64)
gr.mount_gradio_app(app, chat_page, path="/chat")

home = make_homepage()
home.queue(concurrency_count=1, max_size=64)
home._queue.set_url(urljoin(web_url, "/"))
chat_page.queue(api_open=True, max_size=64)
logger.info(f"web_url: {web_url}")
gr.mount_gradio_app(app, home, path="/")
return
1 change: 0 additions & 1 deletion src/pai_rag/evaluation/metrics/response/correctness.py
Original file line number Diff line number Diff line change
Expand Up @@ -101,7 +101,6 @@ class Correctness(LlmMetric):
Passing is defined as a score greater than or equal to the given threshold.
Args:
service_context (Optional[ServiceContext]): Service context.
eval_template (Optional[Union[BasePromptTemplate, str]]):
Template for the evaluation prompt.
score_threshold (float): Numerical threshold for passing the evaluation,
Expand Down
2 changes: 1 addition & 1 deletion src/pai_rag/integrations/agent/pai/pai_agent.py
Original file line number Diff line number Diff line change
Expand Up @@ -7,7 +7,7 @@
Type,
)
import json
from llama_index.agent.openai.step import OpenAIAgentWorker
from llama_index.agent.openai import OpenAIAgentWorker
from llama_index.core.agent.runner.base import AgentRunner
from llama_index.core.callbacks import CallbackManager
from llama_index.core.base.llms.types import ChatMessage
Expand Down
Loading

0 comments on commit cefc173

Please sign in to comment.