Skip to content

Commit

Permalink
Add huggingface ModelWrapper (#1555)
Browse files Browse the repository at this point in the history
## Describe your changes
- Implemented a new huggingface `ModelWrapper` that acts as in interface
with huggingface models. It keeps maps for different model types that
allows the user to get model attributes and submodules.
- All code using the previous mappings directly have been updated
accordingly.

## Checklist before requesting a review
- [ ] Add unit tests for this change.
- [ ] Make sure all tests can pass.
- [ ] Update documents if necessary.
- [ ] Lint and apply fixes to your code by running `lintrunner -a`
- [ ] Is this a user-facing change? If yes, give a description of this
change to be included in the release notes.
- [ ] Is this PR including examples changes? If yes, please remember to
update [example
documentation](https://github.com/microsoft/Olive/blob/main/docs/source/examples.md)
in a follow-up PR.

## (Optional) Issue link
  • Loading branch information
jambayk authored Jan 22, 2025
1 parent cdb8693 commit e510074
Show file tree
Hide file tree
Showing 15 changed files with 517 additions and 246 deletions.
85 changes: 0 additions & 85 deletions olive/common/hf/mappings.py
Original file line number Diff line number Diff line change
Expand Up @@ -11,78 +11,6 @@
# TODO(jambayk): see if we need more task types
}

# model_type -> name for layers
MODELS_TO_LAYERS_MAPPING = {
"bloom": "transformer.h",
"falcon": "transformer.h",
"gemma": "model.layers",
"gemma2": "model.layers",
"gpt2": "transformer.h",
"gpt_neox": "gpt_neox.layers",
"gptj": "transformer.h",
"llama": "model.layers",
"mistral": "model.layers",
"opt": "model.decoder.layers",
"phi": "model.layers",
"phi3": "model.layers",
"qwen": "transformer.h",
"qwen2": "model.layers",
}

# model_type -> name for embedding, these are the modules before the first layer
MODELS_TO_EMBEDDINGS_MAPPING = {
"bloom": ["transformer.word_embeddings", "transformer.word_embeddings_layernorm"],
"falcon": ["transformer.word_embeddings"],
"gemma": ["model.embed_tokens"],
"gemma2": ["model.embed_tokens"],
"gpt2": ["transformer.wte", "transformer.wpe"],
"gpt_neox": ["gpt_neox.embed_in"],
"gptj": ["transformer.wte"],
"llama": ["model.embed_tokens"],
"mistral": ["model.embed_tokens"],
"opt": [
"model.decoder.embed_tokens",
"model.decoder.embed_positions",
"model.decoder.project_out",
"model.decoder.project_in",
],
"phi": ["model.embed_tokens"],
"phi3": ["model.embed_tokens"],
"qwen": ["transformer.wte", "transformer.rotary_emb"],
"qwen2": ["model.embed_tokens"],
}

# model_type -> max length of the model, extracted from the config
# will be int if not present in the config
MODELS_TO_MAX_LENGTH_MAPPING = {
"__default__": "max_position_embeddings",
"bloom": 2048,
"gpt2": "n_positions",
"gpt_neox": "max_position_embeddings",
"gptj": "n_postions",
"llama": "max_position_embeddings",
"mistral": "max_position_embeddings",
"opt": "max_position_embeddings",
"phi": "max_position_embeddings",
"phi3": "max_position_embeddings",
"qwen": "seq_length",
"qwen2": "max_position_embeddings",
}


# To extend following list/map from huggingface config
# there is the priority order: NUM_HEADS_NAMES[0] and HIDDEN_SIZE_NAMES[0] are the first choice
# which means user can override the value in config file
NUM_HEADS_NAMES = (
"num_heads",
"num_attention_heads",
"n_head",
"n_heads",
"encoder_attention_heads",
)
NUM_HIDDEN_LAYER_NAMES = ("num_hidden_layers", "num_layers", "n_layer", "n_layers")
NUM_KEY_VALUE_HEADS_NAMES = ("num_key_value_heads",)
HIDDEN_SIZE_NAMES = ("hidden_size", "dim", "d_model", "n_embd")
MODEL_TYPE_MAPPING = {
"whisper": "bart",
"camembert": "bert",
Expand All @@ -96,17 +24,4 @@
"phi3": "phi",
}

MODEL_OUTSIDE_LAYER_MODULES = {
"phi3": ["model.embed_tokens", "embed_dropout", "model.norm"],
}

MODEL_INSIDE_LAYER_MODULES = {
"phi3": [
["self_attn.qkv_proj"],
["self_attn.o_proj"],
["mlp.gate_up_proj"],
["mlp.down_proj"],
]
}

MODELS_TO_LORA_TARGET_MODULES_MAPPING = {"phi3": ["o_proj", "qkv_proj"]}
29 changes: 1 addition & 28 deletions olive/common/hf/utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -8,7 +8,7 @@

from transformers import AutoConfig, AutoModel, AutoTokenizer, GenerationConfig

from olive.common.hf.mappings import MODELS_TO_MAX_LENGTH_MAPPING, TASK_TO_PEFT_TASK_TYPE
from olive.common.hf.mappings import TASK_TO_PEFT_TASK_TYPE
from olive.common.hf.mlflow import get_pretrained_name_or_path
from olive.common.utils import hardlink_copy_file

Expand Down Expand Up @@ -216,30 +216,3 @@ def get_peft_task_type_from_task(task: str, fail_on_not_found=False) -> str:
elif peft_task_type is None:
logger.warning(not_found_msg)
return peft_task_type


def get_model_max_length(model_name_or_path: str, fail_on_not_found=False) -> int:
"""Get max length of the model, extracted from the config."""
model_config = get_model_config(model_name_or_path)
model_type = model_config.model_type

max_length = MODELS_TO_MAX_LENGTH_MAPPING.get(model_type, None)
if isinstance(max_length, int):
return max_length
elif isinstance(max_length, str):
return getattr(model_config, max_length)
else:
logger.debug(
"No max length mapping found in MODELS_TO_MAX_LENGTH_MAPPING for model type %s, trying __default__",
model_type,
)
default_max_length = MODELS_TO_MAX_LENGTH_MAPPING["__default__"]
try:
return getattr(model_config, default_max_length)
except AttributeError:
not_found_msg = f"Could not find max length for model type {model_type}"
if fail_on_not_found:
raise ValueError(not_found_msg) from None
else:
logger.warning(not_found_msg)
return None
Loading

0 comments on commit e510074

Please sign in to comment.