Skip to content

Commit

Permalink
fix config to use shard grad op
Browse files Browse the repository at this point in the history
  • Loading branch information
samsja committed Sep 26, 2024
1 parent 294c36d commit ba392de
Show file tree
Hide file tree
Showing 3 changed files with 3 additions and 3 deletions.
2 changes: 1 addition & 1 deletion configs/150M/3090.toml
Original file line number Diff line number Diff line change
Expand Up @@ -3,7 +3,7 @@ project = "debug_150m_zero_band"

[train]
micro_bs = 16 # change this base on the gpu
sharding_strategy = "NO_SHARD"
sharding_strategy = "SHARD_GRAD_OP"

[optim]
batch_size = 512
Expand Down
2 changes: 1 addition & 1 deletion configs/150M/A40.toml
Original file line number Diff line number Diff line change
Expand Up @@ -3,7 +3,7 @@ project = "debug_150m_zero_band"

[train]
micro_bs = 32 # change this base on the gpu
sharding_strategy = "NO_SHARD"
sharding_strategy = "SHARD_GRAD_OP"

[optim]
batch_size = 512
Expand Down
2 changes: 1 addition & 1 deletion configs/150M/H100.toml
Original file line number Diff line number Diff line change
Expand Up @@ -3,7 +3,7 @@ project = "debug_150m_zero_band"

[train]
micro_bs = 64 # change this base on the gpu
sharding_strategy = "NO_SHARD"
sharding_strategy = "SHARD_GRAD_OP"

[optim]
batch_size = 512
Expand Down

0 comments on commit ba392de

Please sign in to comment.