Add huggingface ModelWrapper (#1555)

## Describe your changes - Implemented a new huggingface `ModelWrapper` that acts as in interface with huggingface models. It keeps maps for different model types that allows the user to get model attributes and submodules. - All code using the previous mappings directly have been updated accordingly. ## Checklist before requesting a review - [ ] Add unit tests for this change. - [ ] Make sure all tests can pass. - [ ] Update documents if necessary. - [ ] Lint and apply fixes to your code by running `lintrunner -a` - [ ] Is this a user-facing change? If yes, give a description of this change to be included in the release notes. - [ ] Is this PR including examples changes? If yes, please remember to update [example documentation](https://github.com/microsoft/Olive/blob/main/docs/source/examples.md) in a follow-up PR. ## (Optional) Issue link
microsoft · Jan 22, 2025 · e510074 · e510074
1 parent cdb8693
commit e510074
Show file tree

Hide file tree

Showing 15 changed files with 517 additions and 246 deletions.
diff --git a/olive/common/hf/mappings.py b/olive/common/hf/mappings.py
@@ -11,78 +11,6 @@
     # TODO(jambayk): see if we need more task types
 }
 
-# model_type -> name for layers
-MODELS_TO_LAYERS_MAPPING = {
-    "bloom": "transformer.h",
-    "falcon": "transformer.h",
-    "gemma": "model.layers",
-    "gemma2": "model.layers",
-    "gpt2": "transformer.h",
-    "gpt_neox": "gpt_neox.layers",
-    "gptj": "transformer.h",
-    "llama": "model.layers",
-    "mistral": "model.layers",
-    "opt": "model.decoder.layers",
-    "phi": "model.layers",
-    "phi3": "model.layers",
-    "qwen": "transformer.h",
-    "qwen2": "model.layers",
-}
-
-# model_type -> name for embedding, these are the modules before the first layer
-MODELS_TO_EMBEDDINGS_MAPPING = {
-    "bloom": ["transformer.word_embeddings", "transformer.word_embeddings_layernorm"],
-    "falcon": ["transformer.word_embeddings"],
-    "gemma": ["model.embed_tokens"],
-    "gemma2": ["model.embed_tokens"],
-    "gpt2": ["transformer.wte", "transformer.wpe"],
-    "gpt_neox": ["gpt_neox.embed_in"],
-    "gptj": ["transformer.wte"],
-    "llama": ["model.embed_tokens"],
-    "mistral": ["model.embed_tokens"],
-    "opt": [
-        "model.decoder.embed_tokens",
-        "model.decoder.embed_positions",
-        "model.decoder.project_out",
-        "model.decoder.project_in",
-    ],
-    "phi": ["model.embed_tokens"],
-    "phi3": ["model.embed_tokens"],
-    "qwen": ["transformer.wte", "transformer.rotary_emb"],
-    "qwen2": ["model.embed_tokens"],
-}
-
-# model_type -> max length of the model, extracted from the config
-# will be int if not present in the config
-MODELS_TO_MAX_LENGTH_MAPPING = {
-    "__default__": "max_position_embeddings",
-    "bloom": 2048,
-    "gpt2": "n_positions",
-    "gpt_neox": "max_position_embeddings",
-    "gptj": "n_postions",
-    "llama": "max_position_embeddings",
-    "mistral": "max_position_embeddings",
-    "opt": "max_position_embeddings",
-    "phi": "max_position_embeddings",
-    "phi3": "max_position_embeddings",
-    "qwen": "seq_length",
-    "qwen2": "max_position_embeddings",
-}
-
-
-# To extend following list/map from huggingface config
-# there is the priority order: NUM_HEADS_NAMES[0] and HIDDEN_SIZE_NAMES[0] are the first choice
-# which means user can override the value in config file
-NUM_HEADS_NAMES = (
-    "num_heads",
-    "num_attention_heads",
-    "n_head",
-    "n_heads",
-    "encoder_attention_heads",
-)
-NUM_HIDDEN_LAYER_NAMES = ("num_hidden_layers", "num_layers", "n_layer", "n_layers")
-NUM_KEY_VALUE_HEADS_NAMES = ("num_key_value_heads",)
-HIDDEN_SIZE_NAMES = ("hidden_size", "dim", "d_model", "n_embd")
 MODEL_TYPE_MAPPING = {
     "whisper": "bart",
     "camembert": "bert",
@@ -96,17 +24,4 @@
     "phi3": "phi",
 }
 
-MODEL_OUTSIDE_LAYER_MODULES = {
-    "phi3": ["model.embed_tokens", "embed_dropout", "model.norm"],
-}
-
-MODEL_INSIDE_LAYER_MODULES = {
-    "phi3": [
-        ["self_attn.qkv_proj"],
-        ["self_attn.o_proj"],
-        ["mlp.gate_up_proj"],
-        ["mlp.down_proj"],
-    ]
-}
-
 MODELS_TO_LORA_TARGET_MODULES_MAPPING = {"phi3": ["o_proj", "qkv_proj"]}
diff --git a/olive/common/hf/utils.py b/olive/common/hf/utils.py
@@ -8,7 +8,7 @@
 
 from transformers import AutoConfig, AutoModel, AutoTokenizer, GenerationConfig
 
-from olive.common.hf.mappings import MODELS_TO_MAX_LENGTH_MAPPING, TASK_TO_PEFT_TASK_TYPE
+from olive.common.hf.mappings import TASK_TO_PEFT_TASK_TYPE
 from olive.common.hf.mlflow import get_pretrained_name_or_path
 from olive.common.utils import hardlink_copy_file
 
@@ -216,30 +216,3 @@ def get_peft_task_type_from_task(task: str, fail_on_not_found=False) -> str:
     elif peft_task_type is None:
         logger.warning(not_found_msg)
     return peft_task_type
-
-
-def get_model_max_length(model_name_or_path: str, fail_on_not_found=False) -> int:
-    """Get max length of the model, extracted from the config."""
-    model_config = get_model_config(model_name_or_path)
-    model_type = model_config.model_type
-
-    max_length = MODELS_TO_MAX_LENGTH_MAPPING.get(model_type, None)
-    if isinstance(max_length, int):
-        return max_length
-    elif isinstance(max_length, str):
-        return getattr(model_config, max_length)
-    else:
-        logger.debug(
-            "No max length mapping found in MODELS_TO_MAX_LENGTH_MAPPING for model type %s, trying __default__",
-            model_type,
-        )
-        default_max_length = MODELS_TO_MAX_LENGTH_MAPPING["__default__"]
-        try:
-            return getattr(model_config, default_max_length)
-        except AttributeError:
-            not_found_msg = f"Could not find max length for model type {model_type}"
-            if fail_on_not_found:
-                raise ValueError(not_found_msg) from None
-            else:
-                logger.warning(not_found_msg)
-                return None