diff --git a/.github/workflows/gpu.yml b/.github/workflows/gpu.yml new file mode 100644 index 00000000..c2196cb3 --- /dev/null +++ b/.github/workflows/gpu.yml @@ -0,0 +1,39 @@ +name: Tests on GPU + +on: + push: + branches: + - main + pull_request: + # This will trigger the workflow for pull requests to any branch + types: [opened, synchronize, reopened] + +jobs: + gpu-tests: + name: python + runs-on: self-hosted + + steps: + - uses: actions/checkout@v4 + with: + submodules: true + + - name: Install uv + uses: astral-sh/setup-uv@v3 + with: + enable-cache: true + cache-dependency-glob: "uv.lock" + + + + - name: Set up Python + run: uv python install 3.10.13 + + - name: Install the project + run: uv sync --all-extras --dev + + - name: Install flash attention + run: uv pip install flash-attn --no-build-isolation + + - name: Run tests + run: uv run pytest tests \ No newline at end of file diff --git a/src/zeroband/train.py b/src/zeroband/train.py index fc47503b..14d02b99 100644 --- a/src/zeroband/train.py +++ b/src/zeroband/train.py @@ -21,6 +21,7 @@ from zeroband.loss import cross_entropy_max_z_loss from zeroband.utils import ( + FakeTokenizer, GPUMemoryMonitor, PerfCounter, get_module_signature, @@ -137,7 +138,9 @@ def train(config: Config): config.ckpt.interval % config.diloco.inner_steps == 0 ), "ckpt interval must be a multiple of diloco inner steps as we only save at the end of an outer step" - if config.type_model == "llama2": + if config.data.fake and config.name_model == "debugmodel": + tokenizer = FakeTokenizer() + elif config.type_model == "llama2": tokenizer = AutoTokenizer.from_pretrained("mistralai/Mistral-7B-v0.1", use_fast=True) elif config.type_model == "llama3": tokenizer = AutoTokenizer.from_pretrained("meta-llama/Meta-Llama-3-8B", use_fast=True) diff --git a/src/zeroband/utils/__init__.py b/src/zeroband/utils/__init__.py index 2638d156..1bb454fb 100644 --- a/src/zeroband/utils/__init__.py +++ b/src/zeroband/utils/__init__.py @@ -229,3 +229,11 @@ def get_random_available_port_list(num_port): def get_random_available_port(): return get_random_available_port_list(1)[0] + + +class FakeTokenizer(object): + def __init__(self): + self.vocab_size = 1000 + self.bos_token_id = 0 + self.eos_token_id = 1 + self.pad_token_id = 2 diff --git a/tests/test_torchrun/test_train.py b/tests/test_torchrun/test_train.py index 69b66644..e5703fe3 100644 --- a/tests/test_torchrun/test_train.py +++ b/tests/test_torchrun/test_train.py @@ -6,6 +6,10 @@ from zeroband.diloco import Compression +import torch + +num_gpu = torch.cuda.device_count() + def get_random_available_port_list(num_port): # https://stackoverflow.com/questions/1365265/on-localhost-how-do-i-pick-a-free-port-number @@ -77,7 +81,7 @@ def test_multi_gpu(num_gpus): _test_multi_gpu(num_gpus, "debug/normal.toml") -@pytest.mark.parametrize("num_gpus", [[2, 1], [2, 2]]) +@pytest.mark.parametrize("num_gpus", [[2, 1], [2, 2]] if num_gpu >= 4 else [[2, 1]]) def test_multi_gpu_diloco(num_gpus): _test_multi_gpu(num_gpus, "debug/diloco.toml", diloco=True)