-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathMakefile
193 lines (147 loc) · 6.1 KB
/
Makefile
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
FILE_NAME:=.env
# The environment file is used to store all the environment variables for the project.
ENVIRONMENT:=DEV
DEBUG:=True
BACKEND_SERVER_HOST:=127.0.0.1
BACKEND_SERVER_PORT:=8000
BACKEND_SERVER_WORKERS:=4
BACKEND_SERVER_VERSION:=v0.0.1
TIMEZONE:="UTC"
IS_ALLOWED_CREDENTIALS:=True
# JWT Token
JWT_SECRET_KEY:=YOUR-JWT-SECRET-KEY
JWT_SUBJECT:=YOUR-JWT-SUBJECT
JWT_TOKEN_PREFIX:=YOUR-TOKEN-PREFIX
JWT_ALGORITHM:=HS256
JWT_MIN:=60
JWT_HOUR:=23
JWT_DAY:=6
# Hash Functions
HASHING_ALGORITHM_LAYER_1:=bcrypt
HASHING_ALGORITHM_LAYER_2:=argon2
HASHING_SALT:=YOUR-RANDOM-SALTY-SALT
# Codecov (Login to Codecov and get your TOKEN)
# CODECOV_TOKEN:=CODECOV_TOKEN=
DOCKER_VOLUME_DIRECTORY:=
# CPU Accelerate Inference Engine
INFERENCE_ENG:=llamacpp
INFERENCE_ENG_PORT:=8080
INFERENCE_ENG_VERSION:=server--b1-2321a5e
NUM_CPU_CORES:=8.00
NUM_CPU_CORES_EMBEDDING:=4.00
# Embedding engine and it uses same version with Inference Engine
EMBEDDING_ENG:=embedding_eng
EMBEDDING_ENG_PORT:=8080
# Language model, default is phi3-mini-4k-instruct-q4.gguf
# https://github.com/SkywardAI/llama.cpp/blob/9b2f16f8055265c67e074025350736adc1ea0666/tests/test-chat-template.cpp#L91-L92
LANGUAGE_MODEL_NAME:=Phi3-mini-4k-instruct-Q4.gguf
LANGUAGE_MODEL_URL:=https://huggingface.co/aisuko/Phi-3-mini-4k-instruct-gguf/resolve/main/Phi3-mini-4k-instruct-Q4.gguf?download=true
INSTRUCTION:="A chat between a curious human and an artificial intelligence assistant. The assistant gives helpful, detailed, and polite answers to the questions from human."
EMBEDDING_MODEL_NAME:=all-MiniLM-L6-v2-Q4_K_M-v2.gguf
EMBEDDING_MODEL_URL:=https://huggingface.co/aisuko/all-MiniLM-L6-v2-gguf/resolve/main/all-MiniLM-L6-v2-Q4_K_M-v2.gguf?download=true
# yeager
METRICS_PATHS:=runs
# DEFAULT_RAG_DS_NAME
# DEFAULT_RAG_DS_NAME:=aisuko/squad01-v2
.PHONY: env
env:
@echo "ENVIRONMENT=$(ENVIRONMENT)"> $(FILE_NAME)
@echo "DEBUG=$(DEBUG)">> $(FILE_NAME)
@echo "BACKEND_SERVER_HOST=$(BACKEND_SERVER_HOST)">> $(FILE_NAME)
@echo "BACKEND_SERVER_PORT=$(BACKEND_SERVER_PORT)">> $(FILE_NAME)
@echo "BACKEND_SERVER_WORKERS=$(BACKEND_SERVER_WORKERS)">> $(FILE_NAME)
@echo "BACKEND_SERVER_VERSION=$(BACKEND_SERVER_VERSION)">> $(FILE_NAME)
@echo "IS_ALLOWED_CREDENTIALS=$(IS_ALLOWED_CREDENTIALS)">> $(FILE_NAME)
@echo "API_TOKEN=$(API_TOKEN)">> $(FILE_NAME)
@echo "AUTH_TOKEN=$(AUTH_TOKEN)">> $(FILE_NAME)
@echo "JWT_SECRET_KEY=$(JWT_SECRET_KEY)">> $(FILE_NAME)
@echo "JWT_SUBJECT=$(JWT_SUBJECT)">> $(FILE_NAME)
@echo "JWT_TOKEN_PREFIX=$(JWT_TOKEN_PREFIX)">> $(FILE_NAME)
@echo "JWT_ALGORITHM=$(JWT_ALGORITHM)">> $(FILE_NAME)
@echo "JWT_MIN=$(JWT_MIN)">> $(FILE_NAME)
@echo "JWT_HOUR=$(JWT_HOUR)">> $(FILE_NAME)
@echo "JWT_DAY=$(JWT_DAY)">> $(FILE_NAME)
@echo "HASHING_ALGORITHM_LAYER_1=$(HASHING_ALGORITHM_LAYER_1)">> $(FILE_NAME)
@echo "HASHING_ALGORITHM_LAYER_2=$(HASHING_ALGORITHM_LAYER_2)">> $(FILE_NAME)
@echo "HASHING_SALT=$(HASHING_SALT)">> $(FILE_NAME)
@echo "DOCKER_VOLUME_DIRECTORY=$(DOCKER_VOLUME_DIRECTORY)">> $(FILE_NAME)
@echo "METRICS_PATHS=$(METRICS_PATHS)" >> $(FILE_NAME)
@echo "INFERENCE_ENG=$(INFERENCE_ENG)">> $(FILE_NAME)
@echo "INFERENCE_ENG_PORT=$(INFERENCE_ENG_PORT)">> $(FILE_NAME)
@echo "INFERENCE_ENG_VERSION=$(INFERENCE_ENG_VERSION)">> $(FILE_NAME)
@echo "EMBEDDING_ENG=$(EMBEDDING_ENG)">> $(FILE_NAME)
@echo "EMBEDDING_ENG_PORT=$(EMBEDDING_ENG_PORT)">> $(FILE_NAME)
@echo "NUM_CPU_CORES=$(NUM_CPU_CORES)">> $(FILE_NAME)
@echo "NUM_CPU_CORES_EMBEDDING=$(NUM_CPU_CORES_EMBEDDING)" >> $(FILE_NAME)
@echo "LANGUAGE_MODEL_NAME=$(LANGUAGE_MODEL_NAME)">> $(FILE_NAME)
@echo "TIMEZONE=$(TIMEZONE)">> $(FILE_NAME)
@echo "INSTRUCTION"=$(INSTRUCTION)>> $(FILE_NAME)
@echo "EMBEDDING_MODEL_NAME"=$(EMBEDDING_MODEL_NAME) >> $(FILE_NAME)
.PHONY: prepare
prepare: env
prepare: lm
############################################################################################################
# For development, require Nvidia GPU
.PHONY: build
build: env
docker compose -f docker-compose.yaml build
.PHONY: up
up: build lm
docker compose -f docker-compose.yaml up -d
.PHONY: stop
stop:
docker compose -f docker-compose.yaml stop
.PHONY: logs
logs:
@docker compose -f docker-compose.yaml logs -f
############################################################################################################
# For demo, without GPU augumentation, but slow for inference. Might include some bugs.
.PHONY: demo
demo: env lm
docker compose -f docker-compose.demo.yaml up -d
.PHONY: demo-stop
demo-stop:
docker compose -f docker-compose.demo.yaml stop
.PHONY: demo-logs
demo-logs:
docker compose -f docker-compose.demo.yaml logs -f
############################################################################################################
# Linter
.PHONY: ruff
ruff:
@ruff check --output-format=github chimera/src/ --config ruff.toml
############################################################################################################
# Download model from Hugging Face
.PHONY: lm
lm:
# @echo skip_lm_part_for_now
@mkdir -p data/models && [ -f data/models/$(LANGUAGE_MODEL_NAME) ] || wget -O data/models/$(LANGUAGE_MODEL_NAME) $(LANGUAGE_MODEL_URL)
# @mkdir -p models && [ -f data/models/$(EMBEDDING_MODEL_NAME) ] || wget -O data/models/$(EMBEDDING_MODEL_NAME) $(EMBEDDING_MODEL_URL)
.PHONY: localinfer
localinfer: lm
@docker run -p 8080:8080 -v ./data/models:/models gclub/llama.cpp:$(INFERENCE_ENG_VERSION) -m models/$(LANGUAGE_MODEL_NAME) -c 512 -cnv -i --metrics --host 0.0.0.0 --port 8080
############################################################################################################
# Poetry
.PHONY: poetry
poetry:
@pipx install poetry==1.8.2
.PHONY: lock
lock:
@poetry -C chimera lock
.PHONY: install
install:
@poetry -C chimera install --no-root -vvv
.PHONY: install-dev
install-dev:
@poetry -C chimera install --only dev --no-root -vvv
.PHONY: plugin
plugin:
@poetry -C chimera self add poetry-plugin-export
.PHONY: expo
expo:
@poetry -C chimera export -f requirements.txt --output chimera/requirements.txt
############################################################################################################
# Testing
.PHONY: test
test:
@pytest chimera/tests