From 429b5d94533816e29c14e1b3ed6d8984ba491ed0 Mon Sep 17 00:00:00 2001 From: Sami Jaghouar Date: Fri, 15 Nov 2024 11:14:24 +0000 Subject: [PATCH] add cool down config --- configs/10B/H100_cooldown.toml | 38 ++++++++++++++++++++++++++++++++++ 1 file changed, 38 insertions(+) create mode 100644 configs/10B/H100_cooldown.toml diff --git a/configs/10B/H100_cooldown.toml b/configs/10B/H100_cooldown.toml new file mode 100644 index 00000000..b9e96506 --- /dev/null +++ b/configs/10B/H100_cooldown.toml @@ -0,0 +1,38 @@ +name_model = "10B" +project = "10B_zero_band" +wandb_resume = false + +[train] +micro_bs = 1 +ac_ckpt = true + +[optim] +sched_type = "wsd-sqrt" +batch_size = 128 #1M tokens bs +warmup_steps = 1000 +num_stable_steps = 74700 +total_steps = 90400 +lr = 7.5e-5 + +adam_betas1 = 0.9 +adam_betas2 = 0.95 +weight_decay = 0.1 + +z_loss = true + +[data] +seq_length = 8192 +dataset_name_or_paths = "/data/datasets/fineweb-edu,/data/datasets/fineweb,/data/datasets/StackV1-popular" +dataset_ratio = "80:10:10" +num_workers = 4 +reverse_data_files = false + +[diloco] +inner_steps = 100 +compression = "uint8" + +[ckpt] +interval = 100 +topk = 40 +path = "/data/10B" +remote_data_path = "/data/10B_data_ckpt"