Skip to content

Commit

Permalink
update wa slurm scripts
Browse files Browse the repository at this point in the history
  • Loading branch information
zhudotexe committed Dec 16, 2024
1 parent b6218ea commit 1c3a5cc
Show file tree
Hide file tree
Showing 47 changed files with 81 additions and 128 deletions.
3 changes: 1 addition & 2 deletions slurm/claude/webarena-1-full.sh
Original file line number Diff line number Diff line change
Expand Up @@ -11,7 +11,6 @@
#SBATCH --gpus=0
#SBATCH [email protected]
#SBATCH --mail-type=END,FAIL
#SBATCH --nodelist=nlpgpu04,nlpgpu05,nlpgpu08


source slurm/env.sh
Expand All @@ -23,6 +22,6 @@ source slurm/webarena-env.sh
bash slurm/webarena-startup.sh
sleep 600
python bench_webarena.py --config full --model-class claude --large-model claude-3-5-sonnet-20241022 --small-model claude-3-5-haiku-20241022 --save-dir /nlpgpu/data/andrz/redel/experiments/webarena/claude/full
curl -X GET ${RESTART_URL}
bash slurm/webarena-startup.sh
sleep 600
kill $DOCKER_PID
3 changes: 1 addition & 2 deletions slurm/claude/webarena-2-root-fc.sh
Original file line number Diff line number Diff line change
Expand Up @@ -11,7 +11,6 @@
#SBATCH --gpus=0
#SBATCH [email protected]
#SBATCH --mail-type=END,FAIL
#SBATCH --nodelist=nlpgpu04,nlpgpu05,nlpgpu08


source slurm/env.sh
Expand All @@ -23,6 +22,6 @@ source slurm/webarena-env.sh
bash slurm/webarena-startup.sh
sleep 600
python bench_webarena.py --config root-fc --model-class claude --large-model claude-3-5-sonnet-20241022 --small-model claude-3-5-haiku-20241022 --save-dir /nlpgpu/data/andrz/redel/experiments/webarena/claude/root-fc
curl -X GET ${RESTART_URL}
bash slurm/webarena-startup.sh
sleep 600
kill $DOCKER_PID
3 changes: 1 addition & 2 deletions slurm/claude/webarena-3-baseline.sh
Original file line number Diff line number Diff line change
Expand Up @@ -11,7 +11,6 @@
#SBATCH --gpus=0
#SBATCH [email protected]
#SBATCH --mail-type=END,FAIL
#SBATCH --nodelist=nlpgpu04,nlpgpu05,nlpgpu08


source slurm/env.sh
Expand All @@ -23,6 +22,6 @@ source slurm/webarena-env.sh
bash slurm/webarena-startup.sh
sleep 600
python bench_webarena.py --config baseline --model-class claude --large-model claude-3-5-sonnet-20241022 --small-model claude-3-5-haiku-20241022 --save-dir /nlpgpu/data/andrz/redel/experiments/webarena/claude/baseline
curl -X GET ${RESTART_URL}
bash slurm/webarena-startup.sh
sleep 600
kill $DOCKER_PID
3 changes: 1 addition & 2 deletions slurm/claude/webarena-4-small-leaf.sh
Original file line number Diff line number Diff line change
Expand Up @@ -11,7 +11,6 @@
#SBATCH --gpus=0
#SBATCH [email protected]
#SBATCH --mail-type=END,FAIL
#SBATCH --nodelist=nlpgpu04,nlpgpu05,nlpgpu08


source slurm/env.sh
Expand All @@ -23,6 +22,6 @@ source slurm/webarena-env.sh
bash slurm/webarena-startup.sh
sleep 600
python bench_webarena.py --config small-leaf --model-class claude --large-model claude-3-5-sonnet-20241022 --small-model claude-3-5-haiku-20241022 --save-dir /nlpgpu/data/andrz/redel/experiments/webarena/claude/small-leaf
curl -X GET ${RESTART_URL}
bash slurm/webarena-startup.sh
sleep 600
kill $DOCKER_PID
3 changes: 1 addition & 2 deletions slurm/claude/webarena-5-small-all.sh
Original file line number Diff line number Diff line change
Expand Up @@ -11,7 +11,6 @@
#SBATCH --gpus=0
#SBATCH [email protected]
#SBATCH --mail-type=END,FAIL
#SBATCH --nodelist=nlpgpu04,nlpgpu05,nlpgpu08


source slurm/env.sh
Expand All @@ -23,6 +22,6 @@ source slurm/webarena-env.sh
bash slurm/webarena-startup.sh
sleep 600
python bench_webarena.py --config small-all --model-class claude --large-model claude-3-5-sonnet-20241022 --small-model claude-3-5-haiku-20241022 --save-dir /nlpgpu/data/andrz/redel/experiments/webarena/claude/small-all
curl -X GET ${RESTART_URL}
bash slurm/webarena-startup.sh
sleep 600
kill $DOCKER_PID
3 changes: 1 addition & 2 deletions slurm/claude/webarena-6-small-baseline.sh
Original file line number Diff line number Diff line change
Expand Up @@ -11,7 +11,6 @@
#SBATCH --gpus=0
#SBATCH [email protected]
#SBATCH --mail-type=END,FAIL
#SBATCH --nodelist=nlpgpu04,nlpgpu05,nlpgpu08


source slurm/env.sh
Expand All @@ -23,6 +22,6 @@ source slurm/webarena-env.sh
bash slurm/webarena-startup.sh
sleep 600
python bench_webarena.py --config small-baseline --model-class claude --large-model claude-3-5-sonnet-20241022 --small-model claude-3-5-haiku-20241022 --save-dir /nlpgpu/data/andrz/redel/experiments/webarena/claude/small-baseline
curl -X GET ${RESTART_URL}
bash slurm/webarena-startup.sh
sleep 600
kill $DOCKER_PID
3 changes: 1 addition & 2 deletions slurm/claude/webarena-7-short-context.sh
Original file line number Diff line number Diff line change
Expand Up @@ -11,7 +11,6 @@
#SBATCH --gpus=0
#SBATCH [email protected]
#SBATCH --mail-type=END,FAIL
#SBATCH --nodelist=nlpgpu04,nlpgpu05,nlpgpu08


source slurm/env.sh
Expand All @@ -23,6 +22,6 @@ source slurm/webarena-env.sh
bash slurm/webarena-startup.sh
sleep 600
python bench_webarena.py --config short-context --model-class claude --large-model claude-3-5-sonnet-20241022 --small-model claude-3-5-haiku-20241022 --save-dir /nlpgpu/data/andrz/redel/experiments/webarena/claude/short-context
curl -X GET ${RESTART_URL}
bash slurm/webarena-startup.sh
sleep 600
kill $DOCKER_PID
3 changes: 1 addition & 2 deletions slurm/claude/webarena-8-short-baseline.sh
Original file line number Diff line number Diff line change
Expand Up @@ -11,7 +11,6 @@
#SBATCH --gpus=0
#SBATCH [email protected]
#SBATCH --mail-type=END,FAIL
#SBATCH --nodelist=nlpgpu04,nlpgpu05,nlpgpu08


source slurm/env.sh
Expand All @@ -23,6 +22,6 @@ source slurm/webarena-env.sh
bash slurm/webarena-startup.sh
sleep 600
python bench_webarena.py --config short-baseline --model-class claude --large-model claude-3-5-sonnet-20241022 --small-model claude-3-5-haiku-20241022 --save-dir /nlpgpu/data/andrz/redel/experiments/webarena/claude/short-baseline
curl -X GET ${RESTART_URL}
bash slurm/webarena-startup.sh
sleep 600
kill $DOCKER_PID
17 changes: 8 additions & 9 deletions slurm/claude/webarena-all.sh
Original file line number Diff line number Diff line change
Expand Up @@ -11,7 +11,6 @@
#SBATCH --gpus=0
#SBATCH [email protected]
#SBATCH --mail-type=END,FAIL
#SBATCH --nodelist=nlpgpu04,nlpgpu05,nlpgpu08


source slurm/env.sh
Expand All @@ -23,27 +22,27 @@ source slurm/webarena-env.sh
bash slurm/webarena-startup.sh
sleep 600
python bench_webarena.py --config full --model-class claude --large-model claude-3-5-sonnet-20241022 --small-model claude-3-5-haiku-20241022 --save-dir /nlpgpu/data/andrz/redel/experiments/webarena/claude/full
curl -X GET ${RESTART_URL}
bash slurm/webarena-startup.sh
sleep 600
python bench_webarena.py --config root-fc --model-class claude --large-model claude-3-5-sonnet-20241022 --small-model claude-3-5-haiku-20241022 --save-dir /nlpgpu/data/andrz/redel/experiments/webarena/claude/root-fc
curl -X GET ${RESTART_URL}
bash slurm/webarena-startup.sh
sleep 600
python bench_webarena.py --config baseline --model-class claude --large-model claude-3-5-sonnet-20241022 --small-model claude-3-5-haiku-20241022 --save-dir /nlpgpu/data/andrz/redel/experiments/webarena/claude/baseline
curl -X GET ${RESTART_URL}
bash slurm/webarena-startup.sh
sleep 600
python bench_webarena.py --config small-leaf --model-class claude --large-model claude-3-5-sonnet-20241022 --small-model claude-3-5-haiku-20241022 --save-dir /nlpgpu/data/andrz/redel/experiments/webarena/claude/small-leaf
curl -X GET ${RESTART_URL}
bash slurm/webarena-startup.sh
sleep 600
python bench_webarena.py --config small-all --model-class claude --large-model claude-3-5-sonnet-20241022 --small-model claude-3-5-haiku-20241022 --save-dir /nlpgpu/data/andrz/redel/experiments/webarena/claude/small-all
curl -X GET ${RESTART_URL}
bash slurm/webarena-startup.sh
sleep 600
python bench_webarena.py --config small-baseline --model-class claude --large-model claude-3-5-sonnet-20241022 --small-model claude-3-5-haiku-20241022 --save-dir /nlpgpu/data/andrz/redel/experiments/webarena/claude/small-baseline
curl -X GET ${RESTART_URL}
bash slurm/webarena-startup.sh
sleep 600
python bench_webarena.py --config short-context --model-class claude --large-model claude-3-5-sonnet-20241022 --small-model claude-3-5-haiku-20241022 --save-dir /nlpgpu/data/andrz/redel/experiments/webarena/claude/short-context
curl -X GET ${RESTART_URL}
bash slurm/webarena-startup.sh
sleep 600
python bench_webarena.py --config short-baseline --model-class claude --large-model claude-3-5-sonnet-20241022 --small-model claude-3-5-haiku-20241022 --save-dir /nlpgpu/data/andrz/redel/experiments/webarena/claude/short-baseline
curl -X GET ${RESTART_URL}
bash slurm/webarena-startup.sh
sleep 600
kill $DOCKER_PID
3 changes: 1 addition & 2 deletions slurm/cohere-hf/webarena-1-full.sh
Original file line number Diff line number Diff line change
Expand Up @@ -11,7 +11,6 @@
#SBATCH --gpus=8
#SBATCH [email protected]
#SBATCH --mail-type=END,FAIL
#SBATCH --nodelist=nlpgpu04,nlpgpu05,nlpgpu08
#SBATCH --constraint=48GBgpu

source slurm/env.sh
Expand All @@ -23,6 +22,6 @@ source slurm/webarena-env.sh
bash slurm/webarena-startup.sh
sleep 600
python bench_webarena.py --config full --model-class cohere-hf --large-model CohereForAI/c4ai-command-r-plus-08-2024 --small-model CohereForAI/c4ai-command-r-08-2024 --save-dir /nlpgpu/data/andrz/redel/experiments/webarena/cohere-hf/full --engine-timeout 1800
curl -X GET ${RESTART_URL}
bash slurm/webarena-startup.sh
sleep 600
kill $DOCKER_PID
3 changes: 1 addition & 2 deletions slurm/cohere-hf/webarena-2-root-fc.sh
Original file line number Diff line number Diff line change
Expand Up @@ -11,7 +11,6 @@
#SBATCH --gpus=8
#SBATCH [email protected]
#SBATCH --mail-type=END,FAIL
#SBATCH --nodelist=nlpgpu04,nlpgpu05,nlpgpu08
#SBATCH --constraint=48GBgpu

source slurm/env.sh
Expand All @@ -23,6 +22,6 @@ source slurm/webarena-env.sh
bash slurm/webarena-startup.sh
sleep 600
python bench_webarena.py --config root-fc --model-class cohere-hf --large-model CohereForAI/c4ai-command-r-plus-08-2024 --small-model CohereForAI/c4ai-command-r-08-2024 --save-dir /nlpgpu/data/andrz/redel/experiments/webarena/cohere-hf/root-fc --engine-timeout 1800
curl -X GET ${RESTART_URL}
bash slurm/webarena-startup.sh
sleep 600
kill $DOCKER_PID
3 changes: 1 addition & 2 deletions slurm/cohere-hf/webarena-3-baseline.sh
Original file line number Diff line number Diff line change
Expand Up @@ -11,7 +11,6 @@
#SBATCH --gpus=8
#SBATCH [email protected]
#SBATCH --mail-type=END,FAIL
#SBATCH --nodelist=nlpgpu04,nlpgpu05,nlpgpu08
#SBATCH --constraint=48GBgpu

source slurm/env.sh
Expand All @@ -23,6 +22,6 @@ source slurm/webarena-env.sh
bash slurm/webarena-startup.sh
sleep 600
python bench_webarena.py --config baseline --model-class cohere-hf --large-model CohereForAI/c4ai-command-r-plus-08-2024 --small-model CohereForAI/c4ai-command-r-08-2024 --save-dir /nlpgpu/data/andrz/redel/experiments/webarena/cohere-hf/baseline --engine-timeout 1800
curl -X GET ${RESTART_URL}
bash slurm/webarena-startup.sh
sleep 600
kill $DOCKER_PID
3 changes: 1 addition & 2 deletions slurm/cohere-hf/webarena-4-small-leaf.sh
Original file line number Diff line number Diff line change
Expand Up @@ -11,7 +11,6 @@
#SBATCH --gpus=8
#SBATCH [email protected]
#SBATCH --mail-type=END,FAIL
#SBATCH --nodelist=nlpgpu04,nlpgpu05,nlpgpu08
#SBATCH --constraint=48GBgpu

source slurm/env.sh
Expand All @@ -23,6 +22,6 @@ source slurm/webarena-env.sh
bash slurm/webarena-startup.sh
sleep 600
python bench_webarena.py --config small-leaf --model-class cohere-hf --large-model CohereForAI/c4ai-command-r-plus-08-2024 --small-model CohereForAI/c4ai-command-r-08-2024 --save-dir /nlpgpu/data/andrz/redel/experiments/webarena/cohere-hf/small-leaf --engine-timeout 1800
curl -X GET ${RESTART_URL}
bash slurm/webarena-startup.sh
sleep 600
kill $DOCKER_PID
3 changes: 1 addition & 2 deletions slurm/cohere-hf/webarena-5-small-all.sh
Original file line number Diff line number Diff line change
Expand Up @@ -11,7 +11,6 @@
#SBATCH --gpus=8
#SBATCH [email protected]
#SBATCH --mail-type=END,FAIL
#SBATCH --nodelist=nlpgpu04,nlpgpu05,nlpgpu08
#SBATCH --constraint=48GBgpu

source slurm/env.sh
Expand All @@ -23,6 +22,6 @@ source slurm/webarena-env.sh
bash slurm/webarena-startup.sh
sleep 600
python bench_webarena.py --config small-all --model-class cohere-hf --large-model CohereForAI/c4ai-command-r-plus-08-2024 --small-model CohereForAI/c4ai-command-r-08-2024 --save-dir /nlpgpu/data/andrz/redel/experiments/webarena/cohere-hf/small-all --engine-timeout 1800
curl -X GET ${RESTART_URL}
bash slurm/webarena-startup.sh
sleep 600
kill $DOCKER_PID
3 changes: 1 addition & 2 deletions slurm/cohere-hf/webarena-6-small-baseline.sh
Original file line number Diff line number Diff line change
Expand Up @@ -11,7 +11,6 @@
#SBATCH --gpus=8
#SBATCH [email protected]
#SBATCH --mail-type=END,FAIL
#SBATCH --nodelist=nlpgpu04,nlpgpu05,nlpgpu08
#SBATCH --constraint=48GBgpu

source slurm/env.sh
Expand All @@ -23,6 +22,6 @@ source slurm/webarena-env.sh
bash slurm/webarena-startup.sh
sleep 600
python bench_webarena.py --config small-baseline --model-class cohere-hf --large-model CohereForAI/c4ai-command-r-plus-08-2024 --small-model CohereForAI/c4ai-command-r-08-2024 --save-dir /nlpgpu/data/andrz/redel/experiments/webarena/cohere-hf/small-baseline --engine-timeout 1800
curl -X GET ${RESTART_URL}
bash slurm/webarena-startup.sh
sleep 600
kill $DOCKER_PID
3 changes: 1 addition & 2 deletions slurm/cohere-hf/webarena-7-short-context.sh
Original file line number Diff line number Diff line change
Expand Up @@ -11,7 +11,6 @@
#SBATCH --gpus=8
#SBATCH [email protected]
#SBATCH --mail-type=END,FAIL
#SBATCH --nodelist=nlpgpu04,nlpgpu05,nlpgpu08
#SBATCH --constraint=48GBgpu

source slurm/env.sh
Expand All @@ -23,6 +22,6 @@ source slurm/webarena-env.sh
bash slurm/webarena-startup.sh
sleep 600
python bench_webarena.py --config short-context --model-class cohere-hf --large-model CohereForAI/c4ai-command-r-plus-08-2024 --small-model CohereForAI/c4ai-command-r-08-2024 --save-dir /nlpgpu/data/andrz/redel/experiments/webarena/cohere-hf/short-context --engine-timeout 1800
curl -X GET ${RESTART_URL}
bash slurm/webarena-startup.sh
sleep 600
kill $DOCKER_PID
3 changes: 1 addition & 2 deletions slurm/cohere-hf/webarena-8-short-baseline.sh
Original file line number Diff line number Diff line change
Expand Up @@ -11,7 +11,6 @@
#SBATCH --gpus=8
#SBATCH [email protected]
#SBATCH --mail-type=END,FAIL
#SBATCH --nodelist=nlpgpu04,nlpgpu05,nlpgpu08
#SBATCH --constraint=48GBgpu

source slurm/env.sh
Expand All @@ -23,6 +22,6 @@ source slurm/webarena-env.sh
bash slurm/webarena-startup.sh
sleep 600
python bench_webarena.py --config short-baseline --model-class cohere-hf --large-model CohereForAI/c4ai-command-r-plus-08-2024 --small-model CohereForAI/c4ai-command-r-08-2024 --save-dir /nlpgpu/data/andrz/redel/experiments/webarena/cohere-hf/short-baseline --engine-timeout 1800
curl -X GET ${RESTART_URL}
bash slurm/webarena-startup.sh
sleep 600
kill $DOCKER_PID
17 changes: 8 additions & 9 deletions slurm/cohere-hf/webarena-all.sh
Original file line number Diff line number Diff line change
Expand Up @@ -11,7 +11,6 @@
#SBATCH --gpus=8
#SBATCH [email protected]
#SBATCH --mail-type=END,FAIL
#SBATCH --nodelist=nlpgpu04,nlpgpu05,nlpgpu08
#SBATCH --constraint=48GBgpu

source slurm/env.sh
Expand All @@ -23,27 +22,27 @@ source slurm/webarena-env.sh
bash slurm/webarena-startup.sh
sleep 600
python bench_webarena.py --config full --model-class cohere-hf --large-model CohereForAI/c4ai-command-r-plus-08-2024 --small-model CohereForAI/c4ai-command-r-08-2024 --save-dir /nlpgpu/data/andrz/redel/experiments/webarena/cohere-hf/full --engine-timeout 1800
curl -X GET ${RESTART_URL}
bash slurm/webarena-startup.sh
sleep 600
python bench_webarena.py --config root-fc --model-class cohere-hf --large-model CohereForAI/c4ai-command-r-plus-08-2024 --small-model CohereForAI/c4ai-command-r-08-2024 --save-dir /nlpgpu/data/andrz/redel/experiments/webarena/cohere-hf/root-fc --engine-timeout 1800
curl -X GET ${RESTART_URL}
bash slurm/webarena-startup.sh
sleep 600
python bench_webarena.py --config baseline --model-class cohere-hf --large-model CohereForAI/c4ai-command-r-plus-08-2024 --small-model CohereForAI/c4ai-command-r-08-2024 --save-dir /nlpgpu/data/andrz/redel/experiments/webarena/cohere-hf/baseline --engine-timeout 1800
curl -X GET ${RESTART_URL}
bash slurm/webarena-startup.sh
sleep 600
python bench_webarena.py --config small-leaf --model-class cohere-hf --large-model CohereForAI/c4ai-command-r-plus-08-2024 --small-model CohereForAI/c4ai-command-r-08-2024 --save-dir /nlpgpu/data/andrz/redel/experiments/webarena/cohere-hf/small-leaf --engine-timeout 1800
curl -X GET ${RESTART_URL}
bash slurm/webarena-startup.sh
sleep 600
python bench_webarena.py --config small-all --model-class cohere-hf --large-model CohereForAI/c4ai-command-r-plus-08-2024 --small-model CohereForAI/c4ai-command-r-08-2024 --save-dir /nlpgpu/data/andrz/redel/experiments/webarena/cohere-hf/small-all --engine-timeout 1800
curl -X GET ${RESTART_URL}
bash slurm/webarena-startup.sh
sleep 600
python bench_webarena.py --config small-baseline --model-class cohere-hf --large-model CohereForAI/c4ai-command-r-plus-08-2024 --small-model CohereForAI/c4ai-command-r-08-2024 --save-dir /nlpgpu/data/andrz/redel/experiments/webarena/cohere-hf/small-baseline --engine-timeout 1800
curl -X GET ${RESTART_URL}
bash slurm/webarena-startup.sh
sleep 600
python bench_webarena.py --config short-context --model-class cohere-hf --large-model CohereForAI/c4ai-command-r-plus-08-2024 --small-model CohereForAI/c4ai-command-r-08-2024 --save-dir /nlpgpu/data/andrz/redel/experiments/webarena/cohere-hf/short-context --engine-timeout 1800
curl -X GET ${RESTART_URL}
bash slurm/webarena-startup.sh
sleep 600
python bench_webarena.py --config short-baseline --model-class cohere-hf --large-model CohereForAI/c4ai-command-r-plus-08-2024 --small-model CohereForAI/c4ai-command-r-08-2024 --save-dir /nlpgpu/data/andrz/redel/experiments/webarena/cohere-hf/short-baseline --engine-timeout 1800
curl -X GET ${RESTART_URL}
bash slurm/webarena-startup.sh
sleep 600
kill $DOCKER_PID
3 changes: 1 addition & 2 deletions slurm/gen_slurm.py
Original file line number Diff line number Diff line change
Expand Up @@ -85,8 +85,7 @@ def main():
for bench in BENCHES:
# WA needs extra env vars
if bench == "webarena":
gpuconstraint = f"#SBATCH --nodelist=nlpgpu04,nlpgpu05,nlpgpu08\n{gpuconstraint}"
bench_extras = "curl -X GET ${RESTART_URL}\nsleep 600"
bench_extras = "bash slurm/webarena-startup.sh\nsleep 600"
bench_startup = (
"dockerd-rootless.sh &\n"
"DOCKER_PID=$!\n"
Expand Down
3 changes: 1 addition & 2 deletions slurm/mistral/webarena-1-full.sh
Original file line number Diff line number Diff line change
Expand Up @@ -11,7 +11,6 @@
#SBATCH --gpus=8
#SBATCH [email protected]
#SBATCH --mail-type=END,FAIL
#SBATCH --nodelist=nlpgpu04,nlpgpu05,nlpgpu08
#SBATCH --constraint=48GBgpu

source slurm/env.sh
Expand All @@ -23,6 +22,6 @@ source slurm/webarena-env.sh
bash slurm/webarena-startup.sh
sleep 600
python bench_webarena.py --config full --model-class mistral --large-model mistralai/Mistral-Large-Instruct-2407 --small-model mistralai/Mistral-Small-Instruct-2409 --save-dir /nlpgpu/data/andrz/redel/experiments/webarena/mistral/full --engine-timeout 1800
curl -X GET ${RESTART_URL}
bash slurm/webarena-startup.sh
sleep 600
kill $DOCKER_PID
Loading

0 comments on commit 1c3a5cc

Please sign in to comment.