-
Notifications
You must be signed in to change notification settings - Fork 298
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
test: Add script to test model loading below n_parameters threshold (#…
…1698) * add model loading test for models below 2B params * add failure message to include model namne * use the real get_model_meta * use cache folder * teardown per function * fix directory removal * write to file * wip loading from before * wip * Rename model_loading_testing.py to model_loading.py * Delete tests/test_models/test_model_loading.py * checks for models below 2B * try not using cache folder * update script with scan_cache_dir and add args * add github CI: detect changed model files and run model loading test * install all model dependencies * dependecy installations and move file location * should trigger a model load test in CI * find correct commit for diff * explicity fetch base branch * add make command * try to run in python instead and add pytest * fix attribute error and add read mode * separate script calling * let pip install be cached and specify repo path * check ancestry * add cache and rebase * try to merge instead of rebase * try without merge base * check if file exists first * Apply suggestions from code review Co-authored-by: Kenneth Enevoldsen <[email protected]> * Update .github/workflows/model_loading.yml Co-authored-by: Kenneth Enevoldsen <[email protected]> * address review comments to run test once from CI and not pytest --------- Co-authored-by: Kenneth Enevoldsen <[email protected]>
- Loading branch information
1 parent
752d2b8
commit 8d033f3
Showing
8 changed files
with
425 additions
and
2 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,24 @@ | ||
name: Model Loading | ||
|
||
on: | ||
pull_request: | ||
paths: | ||
- 'mteb/models/**.py' | ||
|
||
jobs: | ||
extract-and-run: | ||
runs-on: ubuntu-latest | ||
|
||
steps: | ||
- name: Checkout repository | ||
uses: actions/checkout@v3 | ||
|
||
- name: Set up Python | ||
uses: actions/setup-python@v4 | ||
with: | ||
python-version: '3.10' | ||
cache: 'pip' | ||
|
||
- name: Install dependencies and run tests | ||
run: | | ||
make model-load-test |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,63 @@ | ||
from __future__ import annotations | ||
|
||
import ast | ||
import sys | ||
from pathlib import Path | ||
|
||
from git import Repo | ||
|
||
|
||
def get_changed_files(base_branch="main"): | ||
repo_path = Path(__file__).parent.parent | ||
repo = Repo(repo_path) | ||
repo.remotes.origin.fetch(base_branch) | ||
|
||
base_commit = repo.commit(f"origin/{base_branch}") | ||
head_commit = repo.commit("HEAD") | ||
|
||
diff = repo.git.diff("--name-only", base_commit, head_commit) | ||
|
||
changed_files = diff.splitlines() | ||
return [ | ||
f for f in changed_files if f.startswith("mteb/models/") and f.endswith(".py") | ||
] | ||
|
||
|
||
def extract_model_names(files: list[str]) -> list[str]: | ||
model_names = [] | ||
for file in files: | ||
with open(file) as f: | ||
tree = ast.parse(f.read()) | ||
for node in ast.walk(tree): | ||
if isinstance(node, ast.Assign): | ||
for target in node.targets: | ||
if ( | ||
isinstance(target, ast.Name) | ||
and isinstance(node.value, ast.Call) | ||
and isinstance(node.value.func, ast.Name) | ||
and node.value.func.id == "ModelMeta" | ||
): | ||
model_name = next( | ||
( | ||
kw.value.value | ||
for kw in node.value.keywords | ||
if kw.arg == "name" | ||
), | ||
None, | ||
) | ||
if model_name: | ||
model_names.append(model_name) | ||
return model_names | ||
|
||
|
||
if __name__ == "__main__": | ||
""" | ||
Can pass in base branch as an argument. Defaults to 'main'. | ||
e.g. python extract_model_names.py mieb | ||
""" | ||
base_branch = sys.argv[1] if len(sys.argv) > 1 else "main" | ||
changed_files = get_changed_files(base_branch) | ||
model_names = extract_model_names(changed_files) | ||
output_file = Path(__file__).parent / "model_names.txt" | ||
with output_file.open("w") as f: | ||
f.write(" ".join(model_names)) |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,197 @@ | ||
{ | ||
"Alibaba-NLP/gte-Qwen1.5-7B-instruct": "Over threshold. Not tested.", | ||
"Alibaba-NLP/gte-Qwen2-1.5B-instruct": "None", | ||
"Alibaba-NLP/gte-Qwen2-7B-instruct": "Over threshold. Not tested.", | ||
"BAAI/bge-base-en-v1.5": "None", | ||
"BAAI/bge-large-en-v1.5": "Over threshold. Not tested.", | ||
"BAAI/bge-reranker-v2-m3": "None", | ||
"BAAI/bge-small-en-v1.5": "None", | ||
"BAAI/bge-small-en-v1.5 BAAI/bge-base-en-v1.5 BAAI/bge-large-en-v1.5": null, | ||
"BeastyZ/e5-R-mistral-7b": "Over threshold. Not tested.", | ||
"Cohere/Cohere-embed-english-light-v3.0": "None", | ||
"Cohere/Cohere-embed-english-v3.0": "None", | ||
"Cohere/Cohere-embed-multilingual-light-v3.0": "None", | ||
"Cohere/Cohere-embed-multilingual-v3.0": "None", | ||
"DeepPavlov/distilrubert-small-cased-conversational": "None", | ||
"DeepPavlov/rubert-base-cased": "None", | ||
"DeepPavlov/rubert-base-cased-sentence": "None", | ||
"Gameselo/STS-multilingual-mpnet-base-v2": "None", | ||
"GritLM/GritLM-7B": "Over threshold. Not tested.", | ||
"GritLM/GritLM-8x7B": "Over threshold. Not tested.", | ||
"HIT-TMG/KaLM-embedding-multilingual-mini-instruct-v1": "None", | ||
"HIT-TMG/KaLM-embedding-multilingual-mini-v1": "None", | ||
"Haon-Chen/speed-embedding-7b-instruct": "Over threshold. Not tested.", | ||
"Hum-Works/lodestone-base-4096-v1": "None", | ||
"Jaume/gemma-2b-embeddings": "Over threshold. Not tested.", | ||
"Lajavaness/bilingual-embedding-base": "None", | ||
"Lajavaness/bilingual-embedding-large": "None", | ||
"Lajavaness/bilingual-embedding-small": "None", | ||
"Linq-AI-Research/Linq-Embed-Mistral": "Over threshold. Not tested.", | ||
"McGill-NLP/LLM2Vec-Llama-2-7b-chat-hf-mntp-supervised": "Over threshold. Not tested.", | ||
"McGill-NLP/LLM2Vec-Llama-2-7b-chat-hf-mntp-unsup-simcse": "Over threshold. Not tested.", | ||
"McGill-NLP/LLM2Vec-Meta-Llama-3-8B-Instruct-mntp-supervised": "Over threshold. Not tested.", | ||
"McGill-NLP/LLM2Vec-Meta-Llama-3-8B-Instruct-mntp-unsup-simcse": "Over threshold. Not tested.", | ||
"McGill-NLP/LLM2Vec-Mistral-7B-Instruct-v2-mntp-supervised": "Over threshold. Not tested.", | ||
"McGill-NLP/LLM2Vec-Mistral-7B-Instruct-v2-mntp-unsup-simcse": "Over threshold. Not tested.", | ||
"McGill-NLP/LLM2Vec-Sheared-LLaMA-mntp-supervised": "Over threshold. Not tested.", | ||
"McGill-NLP/LLM2Vec-Sheared-LLaMA-mntp-unsup-simcse": "Over threshold. Not tested.", | ||
"Mihaiii/Bulbasaur": "None", | ||
"Mihaiii/Ivysaur": "None", | ||
"Mihaiii/Squirtle": "None", | ||
"Mihaiii/Venusaur": "None", | ||
"Mihaiii/Wartortle": "None", | ||
"Mihaiii/gte-micro": "None", | ||
"Mihaiii/gte-micro-v4": "None", | ||
"Omartificial-Intelligence-Space/Arabert-all-nli-triplet-Matryoshka": "None", | ||
"Omartificial-Intelligence-Space/Arabic-MiniLM-L12-v2-all-nli-triplet": "None", | ||
"Omartificial-Intelligence-Space/Arabic-all-nli-triplet-Matryoshka": "None", | ||
"Omartificial-Intelligence-Space/Arabic-labse-Matryoshka": "None", | ||
"Omartificial-Intelligence-Space/Arabic-mpnet-base-all-nli-triplet": "None", | ||
"Omartificial-Intelligence-Space/Marbert-all-nli-triplet-Matryoshka": "None", | ||
"OrdalieTech/Solon-embeddings-large-0.1": "None", | ||
"OrlikB/KartonBERT-USE-base-v1": "None", | ||
"OrlikB/st-polish-kartonberta-base-alpha-v1": "None", | ||
"Salesforce/SFR-Embedding-2_R": "Over threshold. Not tested.", | ||
"Salesforce/SFR-Embedding-Mistral": "Over threshold. Not tested.", | ||
"Snowflake/snowflake-arctic-embed-l": "None", | ||
"Snowflake/snowflake-arctic-embed-l-v2.0": "None", | ||
"Snowflake/snowflake-arctic-embed-m": "None", | ||
"Snowflake/snowflake-arctic-embed-m-long": "None", | ||
"Snowflake/snowflake-arctic-embed-m-v1.5": "None", | ||
"Snowflake/snowflake-arctic-embed-m-v2.0": "None", | ||
"Snowflake/snowflake-arctic-embed-s": "None", | ||
"Snowflake/snowflake-arctic-embed-xs": "None", | ||
"WhereIsAI/UAE-Large-V1": "None", | ||
"aari1995/German_Semantic_STS_V2": "None", | ||
"abhinand/MedEmbed-small-v0.1": "None", | ||
"ai-forever/ru-en-RoSBERTa": "None", | ||
"ai-forever/sbert_large_mt_nlu_ru": "None", | ||
"ai-forever/sbert_large_nlu_ru": "None", | ||
"avsolatorio/GIST-Embedding-v0": "None", | ||
"avsolatorio/GIST-all-MiniLM-L6-v2": "None", | ||
"avsolatorio/GIST-large-Embedding-v0": "None", | ||
"avsolatorio/GIST-small-Embedding-v0": "None", | ||
"avsolatorio/NoInstruct-small-Embedding-v0": "None", | ||
"bigscience/sgpt-bloom-7b1-msmarco": "None", | ||
"bm25s": "None", | ||
"brahmairesearch/slx-v0.1": "None", | ||
"castorini/monobert-large-msmarco": "None", | ||
"castorini/monot5-3b-msmarco-10k": "None", | ||
"castorini/monot5-base-msmarco-10k": "None", | ||
"castorini/monot5-large-msmarco-10k": "None", | ||
"castorini/monot5-small-msmarco-10k": "None", | ||
"castorini/repllama-v1-7b-lora-passage": "You are trying to access a gated repo.\nMake sure to have access to it at https://huggingface.co/meta-llama/Llama-2-7b-hf.\n401 Client Error. (Request ID: Root=1-67794457-7e56cbf325381c760c430207;a79cc472-a4fc-49dc-80f0-9d4b8cb5ef42)\n\nCannot access gated repo for url https://huggingface.co/meta-llama/Llama-2-7b-hf/resolve/main/config.json.\nAccess to model meta-llama/Llama-2-7b-hf is restricted. You must have access to it and be authenticated to access it. Please log in.", | ||
"cointegrated/LaBSE-en-ru": "None", | ||
"cointegrated/rubert-tiny": "None", | ||
"cointegrated/rubert-tiny2": "None", | ||
"colbert-ir/colbertv2.0": "None", | ||
"consciousAI/cai-lunaris-text-embeddings": "None", | ||
"consciousAI/cai-stellaris-text-embeddings": "None", | ||
"deepfile/embedder-100p": "None", | ||
"deepvk/USER-base": "None", | ||
"deepvk/USER-bge-m3": "None", | ||
"deepvk/deberta-v1-base": "None", | ||
"dunzhang/stella_en_1.5B_v5": "None", | ||
"dunzhang/stella_en_400M_v5": "None", | ||
"dwzhu/e5-base-4k": "None", | ||
"google/flan-t5-base": "None", | ||
"google/flan-t5-large": "None", | ||
"google/flan-t5-xl": "None", | ||
"google/flan-t5-xxl": "None", | ||
"google/text-embedding-004": "None", | ||
"google/text-embedding-005": "None", | ||
"google/text-multilingual-embedding-002": "None", | ||
"ibm-granite/granite-embedding-107m-multilingual": "None", | ||
"ibm-granite/granite-embedding-125m-english": "None", | ||
"ibm-granite/granite-embedding-278m-multilingual": "None", | ||
"ibm-granite/granite-embedding-30m-english": "None", | ||
"infgrad/jasper_en_vision_language_v1": "Over threshold. Not tested.", | ||
"infgrad/stella-base-en-v2": "None", | ||
"intfloat/e5-base": "None", | ||
"intfloat/e5-base-v2": "None", | ||
"intfloat/e5-large": "None", | ||
"intfloat/e5-large-v2": "None", | ||
"intfloat/e5-mistral-7b-instruct": "Over threshold. Not tested.", | ||
"intfloat/e5-small": "None", | ||
"intfloat/e5-small-v2": "None", | ||
"intfloat/multilingual-e5-base": "None", | ||
"intfloat/multilingual-e5-large": "None", | ||
"intfloat/multilingual-e5-large-instruct": "None", | ||
"intfloat/multilingual-e5-small": "None", | ||
"izhx/udever-bloom-1b1": "None", | ||
"izhx/udever-bloom-3b": "None", | ||
"izhx/udever-bloom-560m": "None", | ||
"izhx/udever-bloom-7b1": "None", | ||
"jhu-clsp/FollowIR-7B": "None", | ||
"jinaai/jina-colbert-v2": "None", | ||
"jinaai/jina-embedding-b-en-v1": "None", | ||
"jinaai/jina-embedding-s-en-v1": "None", | ||
"jinaai/jina-embeddings-v2-base-en": "None", | ||
"jinaai/jina-embeddings-v2-small-en": "None", | ||
"jinaai/jina-embeddings-v3": "None", | ||
"jinaai/jina-reranker-v2-base-multilingual": "None", | ||
"keeeeenw/MicroLlama-text-embedding": "None", | ||
"malenia1/ternary-weight-embedding": "None", | ||
"manu/bge-m3-custom-fr": "None", | ||
"manu/sentence_croissant_alpha_v0.2": "None", | ||
"manu/sentence_croissant_alpha_v0.3": "Over threshold. Not tested.", | ||
"manu/sentence_croissant_alpha_v0.4": "Over threshold. Not tested.", | ||
"meta-llama/Llama-2-7b-chat-hf": "None", | ||
"meta-llama/Llama-2-7b-hf": "None", | ||
"minishlab/M2V_base_glove": "None", | ||
"minishlab/M2V_base_glove_subword": "None", | ||
"minishlab/M2V_base_output": "None", | ||
"minishlab/M2V_multilingual_output": "None", | ||
"minishlab/potion-base-2M": "None", | ||
"minishlab/potion-base-4M": "None", | ||
"minishlab/potion-base-8M": "None", | ||
"mistralai/Mistral-7B-Instruct-v0.2": "None", | ||
"mixedbread-ai/mxbai-embed-large-v1": "None", | ||
"nomic-ai/nomic-embed-text-v1": "None", | ||
"nomic-ai/nomic-embed-text-v1-ablated": "None", | ||
"nomic-ai/nomic-embed-text-v1-unsupervised": "None", | ||
"nomic-ai/nomic-embed-text-v1.5": "None", | ||
"nvidia/NV-Embed-v1": "Over threshold. Not tested.", | ||
"nvidia/NV-Embed-v2": "Over threshold. Not tested.", | ||
"omarelshehy/arabic-english-sts-matryoshka": "None", | ||
"openai/text-embedding-3-large": "None", | ||
"openai/text-embedding-3-small": "None", | ||
"openai/text-embedding-ada-002": "None", | ||
"openbmb/MiniCPM-Embedding": "Over threshold. Not tested.", | ||
"samaya-ai/RepLLaMA-reproduced": "You are trying to access a gated repo.\nMake sure to have access to it at https://huggingface.co/meta-llama/Llama-2-7b-hf.\n401 Client Error. (Request ID: Root=1-6779403c-1bd84d333e938afa4e7cf86b;b873eea6-3c10-4659-b6da-2288d83e721b)\n\nCannot access gated repo for url https://huggingface.co/meta-llama/Llama-2-7b-hf/resolve/main/config.json.\nAccess to model meta-llama/Llama-2-7b-hf is restricted. You must have access to it and be authenticated to access it. Please log in.", | ||
"samaya-ai/promptriever-llama2-7b-v1": "You are trying to access a gated repo.\nMake sure to have access to it at https://huggingface.co/meta-llama/Llama-2-7b-hf.\n401 Client Error. (Request ID: Root=1-677940f7-6c2bfcaa7985abb1165185ff;efdd2ef8-60a0-45c3-a92b-b24784b30b43)\n\nCannot access gated repo for url https://huggingface.co/meta-llama/Llama-2-7b-hf/resolve/main/config.json.\nAccess to model meta-llama/Llama-2-7b-hf is restricted. You must have access to it and be authenticated to access it. Please log in.", | ||
"samaya-ai/promptriever-llama3.1-8b-instruct-v1": "You are trying to access a gated repo.\nMake sure to have access to it at https://huggingface.co/meta-llama/Meta-Llama-3.1-8B-Instruct.\n401 Client Error. (Request ID: Root=1-6779430b-3277d7961f3c88ab56ecf91f;a476a013-b28f-47c6-bd95-e3d6fe823468)\n\nCannot access gated repo for url https://huggingface.co/meta-llama/Meta-Llama-3.1-8B-Instruct/resolve/main/config.json.\nAccess to model meta-llama/Llama-3.1-8B-Instruct is restricted. You must have access to it and be authenticated to access it. Please log in.", | ||
"samaya-ai/promptriever-llama3.1-8b-v1": "You are trying to access a gated repo.\nMake sure to have access to it at https://huggingface.co/meta-llama/Meta-Llama-3.1-8B.\n401 Client Error. (Request ID: Root=1-677bba8f-608cf825273d8d2b0670b5ad;066bb2fa-3bef-4fb9-b3cb-4c5ffee41047)\n\nCannot access gated repo for url https://huggingface.co/meta-llama/Meta-Llama-3.1-8B/resolve/main/config.json.\nAccess to model meta-llama/Llama-3.1-8B is restricted. You must have access to it and be authenticated to access it. Please log in.", | ||
"samaya-ai/promptriever-mistral-v0.1-7b-v1": "You are trying to access a gated repo.\nMake sure to have access to it at https://huggingface.co/mistralai/Mistral-7B-v0.1.\n401 Client Error. (Request ID: Root=1-67794457-688a6d9c24a9e8f15cf70d28;da3a233f-7c7c-4919-9cee-72a1d66acdb6)\n\nCannot access gated repo for url https://huggingface.co/mistralai/Mistral-7B-v0.1/resolve/main/config.json.\nAccess to model mistralai/Mistral-7B-v0.1 is restricted. You must have access to it and be authenticated to access it. Please log in.", | ||
"sdadas/mmlw-e5-base": "None", | ||
"sdadas/mmlw-e5-large": "None", | ||
"sdadas/mmlw-e5-small": "None", | ||
"sdadas/mmlw-roberta-base": "None", | ||
"sdadas/mmlw-roberta-large": "None", | ||
"sentence-transformer/multi-qa-MiniLM-L6-cos-v1": "sentence-transformer/multi-qa-MiniLM-L6-cos-v1 is not a local folder and is not a valid model identifier listed on 'https://huggingface.co/models'\nIf this is a private repository, make sure to pass a token having permission to this repo either by logging in with `huggingface-cli login` or by passing `token=<your_token>`", | ||
"sentence-transformers/LaBSE": "None", | ||
"sentence-transformers/all-MiniLM-L12-v2": "None", | ||
"sentence-transformers/all-MiniLM-L6-v2": "None", | ||
"sentence-transformers/all-mpnet-base-v2": "None", | ||
"sentence-transformers/paraphrase-multilingual-MiniLM-L12-v2": "None", | ||
"sentence-transformers/paraphrase-multilingual-mpnet-base-v2": "None", | ||
"sergeyzh/LaBSE-ru-turbo": "None", | ||
"sergeyzh/rubert-tiny-turbo": "None", | ||
"shibing624/text2vec-base-multilingual": "None", | ||
"silma-ai/silma-embeddding-matryoshka-v0.1": "None", | ||
"thenlper/gte-base": "None", | ||
"thenlper/gte-large": "None", | ||
"thenlper/gte-small": "None", | ||
"unicamp-dl/mt5-13b-mmarco-100k": "None", | ||
"unicamp-dl/mt5-base-mmarco-v2": "None", | ||
"voyage-large-2": "None", | ||
"voyageai/voyage-2": "None", | ||
"voyageai/voyage-3": "None", | ||
"voyageai/voyage-3-lite": "None", | ||
"voyageai/voyage-code-2": "None", | ||
"voyageai/voyage-finance-2": "None", | ||
"voyageai/voyage-large-2-instruct": "None", | ||
"voyageai/voyage-law-2": "None", | ||
"voyageai/voyage-multilingual-2": "None", | ||
"zeta-alpha-ai/Zeta-Alpha-E5-Mistral": "Over threshold. Not tested." | ||
} |
Oops, something went wrong.