From 1c3a5cca4682a802d47a40501738ae718db19bc9 Mon Sep 17 00:00:00 2001 From: Andrew Zhu Date: Mon, 16 Dec 2024 12:44:17 -0500 Subject: [PATCH] update wa slurm scripts --- slurm/claude/webarena-1-full.sh | 3 +-- slurm/claude/webarena-2-root-fc.sh | 3 +-- slurm/claude/webarena-3-baseline.sh | 3 +-- slurm/claude/webarena-4-small-leaf.sh | 3 +-- slurm/claude/webarena-5-small-all.sh | 3 +-- slurm/claude/webarena-6-small-baseline.sh | 3 +-- slurm/claude/webarena-7-short-context.sh | 3 +-- slurm/claude/webarena-8-short-baseline.sh | 3 +-- slurm/claude/webarena-all.sh | 17 ++++++++--------- slurm/cohere-hf/webarena-1-full.sh | 3 +-- slurm/cohere-hf/webarena-2-root-fc.sh | 3 +-- slurm/cohere-hf/webarena-3-baseline.sh | 3 +-- slurm/cohere-hf/webarena-4-small-leaf.sh | 3 +-- slurm/cohere-hf/webarena-5-small-all.sh | 3 +-- slurm/cohere-hf/webarena-6-small-baseline.sh | 3 +-- slurm/cohere-hf/webarena-7-short-context.sh | 3 +-- slurm/cohere-hf/webarena-8-short-baseline.sh | 3 +-- slurm/cohere-hf/webarena-all.sh | 17 ++++++++--------- slurm/gen_slurm.py | 3 +-- slurm/mistral/webarena-1-full.sh | 3 +-- slurm/mistral/webarena-2-root-fc.sh | 3 +-- slurm/mistral/webarena-3-baseline.sh | 3 +-- slurm/mistral/webarena-4-small-leaf.sh | 3 +-- slurm/mistral/webarena-5-small-all.sh | 3 +-- slurm/mistral/webarena-6-small-baseline.sh | 3 +-- slurm/mistral/webarena-7-short-context.sh | 3 +-- slurm/mistral/webarena-8-short-baseline.sh | 3 +-- slurm/mistral/webarena-all.sh | 17 ++++++++--------- slurm/openai/webarena-1-full.sh | 3 +-- slurm/openai/webarena-2-root-fc.sh | 3 +-- slurm/openai/webarena-3-baseline.sh | 3 +-- slurm/openai/webarena-4-small-leaf.sh | 3 +-- slurm/openai/webarena-5-small-all.sh | 3 +-- slurm/openai/webarena-6-small-baseline.sh | 3 +-- slurm/openai/webarena-7-short-context.sh | 3 +-- slurm/openai/webarena-8-short-baseline.sh | 3 +-- slurm/openai/webarena-all.sh | 17 ++++++++--------- slurm/qwen/webarena-1-full.sh | 3 +-- slurm/qwen/webarena-2-root-fc.sh | 3 +-- slurm/qwen/webarena-3-baseline.sh | 3 +-- slurm/qwen/webarena-4-small-leaf.sh | 3 +-- slurm/qwen/webarena-5-small-all.sh | 3 +-- slurm/qwen/webarena-6-small-baseline.sh | 3 +-- slurm/qwen/webarena-7-short-context.sh | 3 +-- slurm/qwen/webarena-8-short-baseline.sh | 3 +-- slurm/qwen/webarena-all.sh | 17 ++++++++--------- slurm/webarena-startup.sh | 1 - 47 files changed, 81 insertions(+), 128 deletions(-) diff --git a/slurm/claude/webarena-1-full.sh b/slurm/claude/webarena-1-full.sh index 9efc48b39..372033474 100644 --- a/slurm/claude/webarena-1-full.sh +++ b/slurm/claude/webarena-1-full.sh @@ -11,7 +11,6 @@ #SBATCH --gpus=0 #SBATCH --mail-user=andrz@seas.upenn.edu #SBATCH --mail-type=END,FAIL -#SBATCH --nodelist=nlpgpu04,nlpgpu05,nlpgpu08 source slurm/env.sh @@ -23,6 +22,6 @@ source slurm/webarena-env.sh bash slurm/webarena-startup.sh sleep 600 python bench_webarena.py --config full --model-class claude --large-model claude-3-5-sonnet-20241022 --small-model claude-3-5-haiku-20241022 --save-dir /nlpgpu/data/andrz/redel/experiments/webarena/claude/full -curl -X GET ${RESTART_URL} +bash slurm/webarena-startup.sh sleep 600 kill $DOCKER_PID \ No newline at end of file diff --git a/slurm/claude/webarena-2-root-fc.sh b/slurm/claude/webarena-2-root-fc.sh index 0d5952a23..fead41dfd 100644 --- a/slurm/claude/webarena-2-root-fc.sh +++ b/slurm/claude/webarena-2-root-fc.sh @@ -11,7 +11,6 @@ #SBATCH --gpus=0 #SBATCH --mail-user=andrz@seas.upenn.edu #SBATCH --mail-type=END,FAIL -#SBATCH --nodelist=nlpgpu04,nlpgpu05,nlpgpu08 source slurm/env.sh @@ -23,6 +22,6 @@ source slurm/webarena-env.sh bash slurm/webarena-startup.sh sleep 600 python bench_webarena.py --config root-fc --model-class claude --large-model claude-3-5-sonnet-20241022 --small-model claude-3-5-haiku-20241022 --save-dir /nlpgpu/data/andrz/redel/experiments/webarena/claude/root-fc -curl -X GET ${RESTART_URL} +bash slurm/webarena-startup.sh sleep 600 kill $DOCKER_PID \ No newline at end of file diff --git a/slurm/claude/webarena-3-baseline.sh b/slurm/claude/webarena-3-baseline.sh index 10f4c8733..075c7b257 100644 --- a/slurm/claude/webarena-3-baseline.sh +++ b/slurm/claude/webarena-3-baseline.sh @@ -11,7 +11,6 @@ #SBATCH --gpus=0 #SBATCH --mail-user=andrz@seas.upenn.edu #SBATCH --mail-type=END,FAIL -#SBATCH --nodelist=nlpgpu04,nlpgpu05,nlpgpu08 source slurm/env.sh @@ -23,6 +22,6 @@ source slurm/webarena-env.sh bash slurm/webarena-startup.sh sleep 600 python bench_webarena.py --config baseline --model-class claude --large-model claude-3-5-sonnet-20241022 --small-model claude-3-5-haiku-20241022 --save-dir /nlpgpu/data/andrz/redel/experiments/webarena/claude/baseline -curl -X GET ${RESTART_URL} +bash slurm/webarena-startup.sh sleep 600 kill $DOCKER_PID \ No newline at end of file diff --git a/slurm/claude/webarena-4-small-leaf.sh b/slurm/claude/webarena-4-small-leaf.sh index 46d9e1f3d..7cf96e97a 100644 --- a/slurm/claude/webarena-4-small-leaf.sh +++ b/slurm/claude/webarena-4-small-leaf.sh @@ -11,7 +11,6 @@ #SBATCH --gpus=0 #SBATCH --mail-user=andrz@seas.upenn.edu #SBATCH --mail-type=END,FAIL -#SBATCH --nodelist=nlpgpu04,nlpgpu05,nlpgpu08 source slurm/env.sh @@ -23,6 +22,6 @@ source slurm/webarena-env.sh bash slurm/webarena-startup.sh sleep 600 python bench_webarena.py --config small-leaf --model-class claude --large-model claude-3-5-sonnet-20241022 --small-model claude-3-5-haiku-20241022 --save-dir /nlpgpu/data/andrz/redel/experiments/webarena/claude/small-leaf -curl -X GET ${RESTART_URL} +bash slurm/webarena-startup.sh sleep 600 kill $DOCKER_PID \ No newline at end of file diff --git a/slurm/claude/webarena-5-small-all.sh b/slurm/claude/webarena-5-small-all.sh index b788d12f7..26a5085c5 100644 --- a/slurm/claude/webarena-5-small-all.sh +++ b/slurm/claude/webarena-5-small-all.sh @@ -11,7 +11,6 @@ #SBATCH --gpus=0 #SBATCH --mail-user=andrz@seas.upenn.edu #SBATCH --mail-type=END,FAIL -#SBATCH --nodelist=nlpgpu04,nlpgpu05,nlpgpu08 source slurm/env.sh @@ -23,6 +22,6 @@ source slurm/webarena-env.sh bash slurm/webarena-startup.sh sleep 600 python bench_webarena.py --config small-all --model-class claude --large-model claude-3-5-sonnet-20241022 --small-model claude-3-5-haiku-20241022 --save-dir /nlpgpu/data/andrz/redel/experiments/webarena/claude/small-all -curl -X GET ${RESTART_URL} +bash slurm/webarena-startup.sh sleep 600 kill $DOCKER_PID \ No newline at end of file diff --git a/slurm/claude/webarena-6-small-baseline.sh b/slurm/claude/webarena-6-small-baseline.sh index 51b343ed5..4d2f95eb3 100644 --- a/slurm/claude/webarena-6-small-baseline.sh +++ b/slurm/claude/webarena-6-small-baseline.sh @@ -11,7 +11,6 @@ #SBATCH --gpus=0 #SBATCH --mail-user=andrz@seas.upenn.edu #SBATCH --mail-type=END,FAIL -#SBATCH --nodelist=nlpgpu04,nlpgpu05,nlpgpu08 source slurm/env.sh @@ -23,6 +22,6 @@ source slurm/webarena-env.sh bash slurm/webarena-startup.sh sleep 600 python bench_webarena.py --config small-baseline --model-class claude --large-model claude-3-5-sonnet-20241022 --small-model claude-3-5-haiku-20241022 --save-dir /nlpgpu/data/andrz/redel/experiments/webarena/claude/small-baseline -curl -X GET ${RESTART_URL} +bash slurm/webarena-startup.sh sleep 600 kill $DOCKER_PID \ No newline at end of file diff --git a/slurm/claude/webarena-7-short-context.sh b/slurm/claude/webarena-7-short-context.sh index 46e27165a..4281fd4b3 100644 --- a/slurm/claude/webarena-7-short-context.sh +++ b/slurm/claude/webarena-7-short-context.sh @@ -11,7 +11,6 @@ #SBATCH --gpus=0 #SBATCH --mail-user=andrz@seas.upenn.edu #SBATCH --mail-type=END,FAIL -#SBATCH --nodelist=nlpgpu04,nlpgpu05,nlpgpu08 source slurm/env.sh @@ -23,6 +22,6 @@ source slurm/webarena-env.sh bash slurm/webarena-startup.sh sleep 600 python bench_webarena.py --config short-context --model-class claude --large-model claude-3-5-sonnet-20241022 --small-model claude-3-5-haiku-20241022 --save-dir /nlpgpu/data/andrz/redel/experiments/webarena/claude/short-context -curl -X GET ${RESTART_URL} +bash slurm/webarena-startup.sh sleep 600 kill $DOCKER_PID \ No newline at end of file diff --git a/slurm/claude/webarena-8-short-baseline.sh b/slurm/claude/webarena-8-short-baseline.sh index d4db6b2aa..cb9bb64ce 100644 --- a/slurm/claude/webarena-8-short-baseline.sh +++ b/slurm/claude/webarena-8-short-baseline.sh @@ -11,7 +11,6 @@ #SBATCH --gpus=0 #SBATCH --mail-user=andrz@seas.upenn.edu #SBATCH --mail-type=END,FAIL -#SBATCH --nodelist=nlpgpu04,nlpgpu05,nlpgpu08 source slurm/env.sh @@ -23,6 +22,6 @@ source slurm/webarena-env.sh bash slurm/webarena-startup.sh sleep 600 python bench_webarena.py --config short-baseline --model-class claude --large-model claude-3-5-sonnet-20241022 --small-model claude-3-5-haiku-20241022 --save-dir /nlpgpu/data/andrz/redel/experiments/webarena/claude/short-baseline -curl -X GET ${RESTART_URL} +bash slurm/webarena-startup.sh sleep 600 kill $DOCKER_PID \ No newline at end of file diff --git a/slurm/claude/webarena-all.sh b/slurm/claude/webarena-all.sh index fe392827a..9d54dea9c 100644 --- a/slurm/claude/webarena-all.sh +++ b/slurm/claude/webarena-all.sh @@ -11,7 +11,6 @@ #SBATCH --gpus=0 #SBATCH --mail-user=andrz@seas.upenn.edu #SBATCH --mail-type=END,FAIL -#SBATCH --nodelist=nlpgpu04,nlpgpu05,nlpgpu08 source slurm/env.sh @@ -23,27 +22,27 @@ source slurm/webarena-env.sh bash slurm/webarena-startup.sh sleep 600 python bench_webarena.py --config full --model-class claude --large-model claude-3-5-sonnet-20241022 --small-model claude-3-5-haiku-20241022 --save-dir /nlpgpu/data/andrz/redel/experiments/webarena/claude/full -curl -X GET ${RESTART_URL} +bash slurm/webarena-startup.sh sleep 600 python bench_webarena.py --config root-fc --model-class claude --large-model claude-3-5-sonnet-20241022 --small-model claude-3-5-haiku-20241022 --save-dir /nlpgpu/data/andrz/redel/experiments/webarena/claude/root-fc -curl -X GET ${RESTART_URL} +bash slurm/webarena-startup.sh sleep 600 python bench_webarena.py --config baseline --model-class claude --large-model claude-3-5-sonnet-20241022 --small-model claude-3-5-haiku-20241022 --save-dir /nlpgpu/data/andrz/redel/experiments/webarena/claude/baseline -curl -X GET ${RESTART_URL} +bash slurm/webarena-startup.sh sleep 600 python bench_webarena.py --config small-leaf --model-class claude --large-model claude-3-5-sonnet-20241022 --small-model claude-3-5-haiku-20241022 --save-dir /nlpgpu/data/andrz/redel/experiments/webarena/claude/small-leaf -curl -X GET ${RESTART_URL} +bash slurm/webarena-startup.sh sleep 600 python bench_webarena.py --config small-all --model-class claude --large-model claude-3-5-sonnet-20241022 --small-model claude-3-5-haiku-20241022 --save-dir /nlpgpu/data/andrz/redel/experiments/webarena/claude/small-all -curl -X GET ${RESTART_URL} +bash slurm/webarena-startup.sh sleep 600 python bench_webarena.py --config small-baseline --model-class claude --large-model claude-3-5-sonnet-20241022 --small-model claude-3-5-haiku-20241022 --save-dir /nlpgpu/data/andrz/redel/experiments/webarena/claude/small-baseline -curl -X GET ${RESTART_URL} +bash slurm/webarena-startup.sh sleep 600 python bench_webarena.py --config short-context --model-class claude --large-model claude-3-5-sonnet-20241022 --small-model claude-3-5-haiku-20241022 --save-dir /nlpgpu/data/andrz/redel/experiments/webarena/claude/short-context -curl -X GET ${RESTART_URL} +bash slurm/webarena-startup.sh sleep 600 python bench_webarena.py --config short-baseline --model-class claude --large-model claude-3-5-sonnet-20241022 --small-model claude-3-5-haiku-20241022 --save-dir /nlpgpu/data/andrz/redel/experiments/webarena/claude/short-baseline -curl -X GET ${RESTART_URL} +bash slurm/webarena-startup.sh sleep 600 kill $DOCKER_PID \ No newline at end of file diff --git a/slurm/cohere-hf/webarena-1-full.sh b/slurm/cohere-hf/webarena-1-full.sh index 05dacd192..c79bc3129 100644 --- a/slurm/cohere-hf/webarena-1-full.sh +++ b/slurm/cohere-hf/webarena-1-full.sh @@ -11,7 +11,6 @@ #SBATCH --gpus=8 #SBATCH --mail-user=andrz@seas.upenn.edu #SBATCH --mail-type=END,FAIL -#SBATCH --nodelist=nlpgpu04,nlpgpu05,nlpgpu08 #SBATCH --constraint=48GBgpu source slurm/env.sh @@ -23,6 +22,6 @@ source slurm/webarena-env.sh bash slurm/webarena-startup.sh sleep 600 python bench_webarena.py --config full --model-class cohere-hf --large-model CohereForAI/c4ai-command-r-plus-08-2024 --small-model CohereForAI/c4ai-command-r-08-2024 --save-dir /nlpgpu/data/andrz/redel/experiments/webarena/cohere-hf/full --engine-timeout 1800 -curl -X GET ${RESTART_URL} +bash slurm/webarena-startup.sh sleep 600 kill $DOCKER_PID \ No newline at end of file diff --git a/slurm/cohere-hf/webarena-2-root-fc.sh b/slurm/cohere-hf/webarena-2-root-fc.sh index 8adcf0db9..b01bf5512 100644 --- a/slurm/cohere-hf/webarena-2-root-fc.sh +++ b/slurm/cohere-hf/webarena-2-root-fc.sh @@ -11,7 +11,6 @@ #SBATCH --gpus=8 #SBATCH --mail-user=andrz@seas.upenn.edu #SBATCH --mail-type=END,FAIL -#SBATCH --nodelist=nlpgpu04,nlpgpu05,nlpgpu08 #SBATCH --constraint=48GBgpu source slurm/env.sh @@ -23,6 +22,6 @@ source slurm/webarena-env.sh bash slurm/webarena-startup.sh sleep 600 python bench_webarena.py --config root-fc --model-class cohere-hf --large-model CohereForAI/c4ai-command-r-plus-08-2024 --small-model CohereForAI/c4ai-command-r-08-2024 --save-dir /nlpgpu/data/andrz/redel/experiments/webarena/cohere-hf/root-fc --engine-timeout 1800 -curl -X GET ${RESTART_URL} +bash slurm/webarena-startup.sh sleep 600 kill $DOCKER_PID \ No newline at end of file diff --git a/slurm/cohere-hf/webarena-3-baseline.sh b/slurm/cohere-hf/webarena-3-baseline.sh index a6f9dfc5f..38f9adcab 100644 --- a/slurm/cohere-hf/webarena-3-baseline.sh +++ b/slurm/cohere-hf/webarena-3-baseline.sh @@ -11,7 +11,6 @@ #SBATCH --gpus=8 #SBATCH --mail-user=andrz@seas.upenn.edu #SBATCH --mail-type=END,FAIL -#SBATCH --nodelist=nlpgpu04,nlpgpu05,nlpgpu08 #SBATCH --constraint=48GBgpu source slurm/env.sh @@ -23,6 +22,6 @@ source slurm/webarena-env.sh bash slurm/webarena-startup.sh sleep 600 python bench_webarena.py --config baseline --model-class cohere-hf --large-model CohereForAI/c4ai-command-r-plus-08-2024 --small-model CohereForAI/c4ai-command-r-08-2024 --save-dir /nlpgpu/data/andrz/redel/experiments/webarena/cohere-hf/baseline --engine-timeout 1800 -curl -X GET ${RESTART_URL} +bash slurm/webarena-startup.sh sleep 600 kill $DOCKER_PID \ No newline at end of file diff --git a/slurm/cohere-hf/webarena-4-small-leaf.sh b/slurm/cohere-hf/webarena-4-small-leaf.sh index edd203a79..4a60cef42 100644 --- a/slurm/cohere-hf/webarena-4-small-leaf.sh +++ b/slurm/cohere-hf/webarena-4-small-leaf.sh @@ -11,7 +11,6 @@ #SBATCH --gpus=8 #SBATCH --mail-user=andrz@seas.upenn.edu #SBATCH --mail-type=END,FAIL -#SBATCH --nodelist=nlpgpu04,nlpgpu05,nlpgpu08 #SBATCH --constraint=48GBgpu source slurm/env.sh @@ -23,6 +22,6 @@ source slurm/webarena-env.sh bash slurm/webarena-startup.sh sleep 600 python bench_webarena.py --config small-leaf --model-class cohere-hf --large-model CohereForAI/c4ai-command-r-plus-08-2024 --small-model CohereForAI/c4ai-command-r-08-2024 --save-dir /nlpgpu/data/andrz/redel/experiments/webarena/cohere-hf/small-leaf --engine-timeout 1800 -curl -X GET ${RESTART_URL} +bash slurm/webarena-startup.sh sleep 600 kill $DOCKER_PID \ No newline at end of file diff --git a/slurm/cohere-hf/webarena-5-small-all.sh b/slurm/cohere-hf/webarena-5-small-all.sh index eecc7f4e0..38dd902d6 100644 --- a/slurm/cohere-hf/webarena-5-small-all.sh +++ b/slurm/cohere-hf/webarena-5-small-all.sh @@ -11,7 +11,6 @@ #SBATCH --gpus=8 #SBATCH --mail-user=andrz@seas.upenn.edu #SBATCH --mail-type=END,FAIL -#SBATCH --nodelist=nlpgpu04,nlpgpu05,nlpgpu08 #SBATCH --constraint=48GBgpu source slurm/env.sh @@ -23,6 +22,6 @@ source slurm/webarena-env.sh bash slurm/webarena-startup.sh sleep 600 python bench_webarena.py --config small-all --model-class cohere-hf --large-model CohereForAI/c4ai-command-r-plus-08-2024 --small-model CohereForAI/c4ai-command-r-08-2024 --save-dir /nlpgpu/data/andrz/redel/experiments/webarena/cohere-hf/small-all --engine-timeout 1800 -curl -X GET ${RESTART_URL} +bash slurm/webarena-startup.sh sleep 600 kill $DOCKER_PID \ No newline at end of file diff --git a/slurm/cohere-hf/webarena-6-small-baseline.sh b/slurm/cohere-hf/webarena-6-small-baseline.sh index 03be9976d..399efc98e 100644 --- a/slurm/cohere-hf/webarena-6-small-baseline.sh +++ b/slurm/cohere-hf/webarena-6-small-baseline.sh @@ -11,7 +11,6 @@ #SBATCH --gpus=8 #SBATCH --mail-user=andrz@seas.upenn.edu #SBATCH --mail-type=END,FAIL -#SBATCH --nodelist=nlpgpu04,nlpgpu05,nlpgpu08 #SBATCH --constraint=48GBgpu source slurm/env.sh @@ -23,6 +22,6 @@ source slurm/webarena-env.sh bash slurm/webarena-startup.sh sleep 600 python bench_webarena.py --config small-baseline --model-class cohere-hf --large-model CohereForAI/c4ai-command-r-plus-08-2024 --small-model CohereForAI/c4ai-command-r-08-2024 --save-dir /nlpgpu/data/andrz/redel/experiments/webarena/cohere-hf/small-baseline --engine-timeout 1800 -curl -X GET ${RESTART_URL} +bash slurm/webarena-startup.sh sleep 600 kill $DOCKER_PID \ No newline at end of file diff --git a/slurm/cohere-hf/webarena-7-short-context.sh b/slurm/cohere-hf/webarena-7-short-context.sh index bb7be8fe3..100ac2b4f 100644 --- a/slurm/cohere-hf/webarena-7-short-context.sh +++ b/slurm/cohere-hf/webarena-7-short-context.sh @@ -11,7 +11,6 @@ #SBATCH --gpus=8 #SBATCH --mail-user=andrz@seas.upenn.edu #SBATCH --mail-type=END,FAIL -#SBATCH --nodelist=nlpgpu04,nlpgpu05,nlpgpu08 #SBATCH --constraint=48GBgpu source slurm/env.sh @@ -23,6 +22,6 @@ source slurm/webarena-env.sh bash slurm/webarena-startup.sh sleep 600 python bench_webarena.py --config short-context --model-class cohere-hf --large-model CohereForAI/c4ai-command-r-plus-08-2024 --small-model CohereForAI/c4ai-command-r-08-2024 --save-dir /nlpgpu/data/andrz/redel/experiments/webarena/cohere-hf/short-context --engine-timeout 1800 -curl -X GET ${RESTART_URL} +bash slurm/webarena-startup.sh sleep 600 kill $DOCKER_PID \ No newline at end of file diff --git a/slurm/cohere-hf/webarena-8-short-baseline.sh b/slurm/cohere-hf/webarena-8-short-baseline.sh index 83f22d472..3b7f3518f 100644 --- a/slurm/cohere-hf/webarena-8-short-baseline.sh +++ b/slurm/cohere-hf/webarena-8-short-baseline.sh @@ -11,7 +11,6 @@ #SBATCH --gpus=8 #SBATCH --mail-user=andrz@seas.upenn.edu #SBATCH --mail-type=END,FAIL -#SBATCH --nodelist=nlpgpu04,nlpgpu05,nlpgpu08 #SBATCH --constraint=48GBgpu source slurm/env.sh @@ -23,6 +22,6 @@ source slurm/webarena-env.sh bash slurm/webarena-startup.sh sleep 600 python bench_webarena.py --config short-baseline --model-class cohere-hf --large-model CohereForAI/c4ai-command-r-plus-08-2024 --small-model CohereForAI/c4ai-command-r-08-2024 --save-dir /nlpgpu/data/andrz/redel/experiments/webarena/cohere-hf/short-baseline --engine-timeout 1800 -curl -X GET ${RESTART_URL} +bash slurm/webarena-startup.sh sleep 600 kill $DOCKER_PID \ No newline at end of file diff --git a/slurm/cohere-hf/webarena-all.sh b/slurm/cohere-hf/webarena-all.sh index 269ef325a..ca935488e 100644 --- a/slurm/cohere-hf/webarena-all.sh +++ b/slurm/cohere-hf/webarena-all.sh @@ -11,7 +11,6 @@ #SBATCH --gpus=8 #SBATCH --mail-user=andrz@seas.upenn.edu #SBATCH --mail-type=END,FAIL -#SBATCH --nodelist=nlpgpu04,nlpgpu05,nlpgpu08 #SBATCH --constraint=48GBgpu source slurm/env.sh @@ -23,27 +22,27 @@ source slurm/webarena-env.sh bash slurm/webarena-startup.sh sleep 600 python bench_webarena.py --config full --model-class cohere-hf --large-model CohereForAI/c4ai-command-r-plus-08-2024 --small-model CohereForAI/c4ai-command-r-08-2024 --save-dir /nlpgpu/data/andrz/redel/experiments/webarena/cohere-hf/full --engine-timeout 1800 -curl -X GET ${RESTART_URL} +bash slurm/webarena-startup.sh sleep 600 python bench_webarena.py --config root-fc --model-class cohere-hf --large-model CohereForAI/c4ai-command-r-plus-08-2024 --small-model CohereForAI/c4ai-command-r-08-2024 --save-dir /nlpgpu/data/andrz/redel/experiments/webarena/cohere-hf/root-fc --engine-timeout 1800 -curl -X GET ${RESTART_URL} +bash slurm/webarena-startup.sh sleep 600 python bench_webarena.py --config baseline --model-class cohere-hf --large-model CohereForAI/c4ai-command-r-plus-08-2024 --small-model CohereForAI/c4ai-command-r-08-2024 --save-dir /nlpgpu/data/andrz/redel/experiments/webarena/cohere-hf/baseline --engine-timeout 1800 -curl -X GET ${RESTART_URL} +bash slurm/webarena-startup.sh sleep 600 python bench_webarena.py --config small-leaf --model-class cohere-hf --large-model CohereForAI/c4ai-command-r-plus-08-2024 --small-model CohereForAI/c4ai-command-r-08-2024 --save-dir /nlpgpu/data/andrz/redel/experiments/webarena/cohere-hf/small-leaf --engine-timeout 1800 -curl -X GET ${RESTART_URL} +bash slurm/webarena-startup.sh sleep 600 python bench_webarena.py --config small-all --model-class cohere-hf --large-model CohereForAI/c4ai-command-r-plus-08-2024 --small-model CohereForAI/c4ai-command-r-08-2024 --save-dir /nlpgpu/data/andrz/redel/experiments/webarena/cohere-hf/small-all --engine-timeout 1800 -curl -X GET ${RESTART_URL} +bash slurm/webarena-startup.sh sleep 600 python bench_webarena.py --config small-baseline --model-class cohere-hf --large-model CohereForAI/c4ai-command-r-plus-08-2024 --small-model CohereForAI/c4ai-command-r-08-2024 --save-dir /nlpgpu/data/andrz/redel/experiments/webarena/cohere-hf/small-baseline --engine-timeout 1800 -curl -X GET ${RESTART_URL} +bash slurm/webarena-startup.sh sleep 600 python bench_webarena.py --config short-context --model-class cohere-hf --large-model CohereForAI/c4ai-command-r-plus-08-2024 --small-model CohereForAI/c4ai-command-r-08-2024 --save-dir /nlpgpu/data/andrz/redel/experiments/webarena/cohere-hf/short-context --engine-timeout 1800 -curl -X GET ${RESTART_URL} +bash slurm/webarena-startup.sh sleep 600 python bench_webarena.py --config short-baseline --model-class cohere-hf --large-model CohereForAI/c4ai-command-r-plus-08-2024 --small-model CohereForAI/c4ai-command-r-08-2024 --save-dir /nlpgpu/data/andrz/redel/experiments/webarena/cohere-hf/short-baseline --engine-timeout 1800 -curl -X GET ${RESTART_URL} +bash slurm/webarena-startup.sh sleep 600 kill $DOCKER_PID \ No newline at end of file diff --git a/slurm/gen_slurm.py b/slurm/gen_slurm.py index 50b48065e..27344e701 100644 --- a/slurm/gen_slurm.py +++ b/slurm/gen_slurm.py @@ -85,8 +85,7 @@ def main(): for bench in BENCHES: # WA needs extra env vars if bench == "webarena": - gpuconstraint = f"#SBATCH --nodelist=nlpgpu04,nlpgpu05,nlpgpu08\n{gpuconstraint}" - bench_extras = "curl -X GET ${RESTART_URL}\nsleep 600" + bench_extras = "bash slurm/webarena-startup.sh\nsleep 600" bench_startup = ( "dockerd-rootless.sh &\n" "DOCKER_PID=$!\n" diff --git a/slurm/mistral/webarena-1-full.sh b/slurm/mistral/webarena-1-full.sh index 41adbe677..f2ff3c3a5 100644 --- a/slurm/mistral/webarena-1-full.sh +++ b/slurm/mistral/webarena-1-full.sh @@ -11,7 +11,6 @@ #SBATCH --gpus=8 #SBATCH --mail-user=andrz@seas.upenn.edu #SBATCH --mail-type=END,FAIL -#SBATCH --nodelist=nlpgpu04,nlpgpu05,nlpgpu08 #SBATCH --constraint=48GBgpu source slurm/env.sh @@ -23,6 +22,6 @@ source slurm/webarena-env.sh bash slurm/webarena-startup.sh sleep 600 python bench_webarena.py --config full --model-class mistral --large-model mistralai/Mistral-Large-Instruct-2407 --small-model mistralai/Mistral-Small-Instruct-2409 --save-dir /nlpgpu/data/andrz/redel/experiments/webarena/mistral/full --engine-timeout 1800 -curl -X GET ${RESTART_URL} +bash slurm/webarena-startup.sh sleep 600 kill $DOCKER_PID \ No newline at end of file diff --git a/slurm/mistral/webarena-2-root-fc.sh b/slurm/mistral/webarena-2-root-fc.sh index 524bbbb9e..fade62d0a 100644 --- a/slurm/mistral/webarena-2-root-fc.sh +++ b/slurm/mistral/webarena-2-root-fc.sh @@ -11,7 +11,6 @@ #SBATCH --gpus=8 #SBATCH --mail-user=andrz@seas.upenn.edu #SBATCH --mail-type=END,FAIL -#SBATCH --nodelist=nlpgpu04,nlpgpu05,nlpgpu08 #SBATCH --constraint=48GBgpu source slurm/env.sh @@ -23,6 +22,6 @@ source slurm/webarena-env.sh bash slurm/webarena-startup.sh sleep 600 python bench_webarena.py --config root-fc --model-class mistral --large-model mistralai/Mistral-Large-Instruct-2407 --small-model mistralai/Mistral-Small-Instruct-2409 --save-dir /nlpgpu/data/andrz/redel/experiments/webarena/mistral/root-fc --engine-timeout 1800 -curl -X GET ${RESTART_URL} +bash slurm/webarena-startup.sh sleep 600 kill $DOCKER_PID \ No newline at end of file diff --git a/slurm/mistral/webarena-3-baseline.sh b/slurm/mistral/webarena-3-baseline.sh index 5b5151f5c..33671c875 100644 --- a/slurm/mistral/webarena-3-baseline.sh +++ b/slurm/mistral/webarena-3-baseline.sh @@ -11,7 +11,6 @@ #SBATCH --gpus=8 #SBATCH --mail-user=andrz@seas.upenn.edu #SBATCH --mail-type=END,FAIL -#SBATCH --nodelist=nlpgpu04,nlpgpu05,nlpgpu08 #SBATCH --constraint=48GBgpu source slurm/env.sh @@ -23,6 +22,6 @@ source slurm/webarena-env.sh bash slurm/webarena-startup.sh sleep 600 python bench_webarena.py --config baseline --model-class mistral --large-model mistralai/Mistral-Large-Instruct-2407 --small-model mistralai/Mistral-Small-Instruct-2409 --save-dir /nlpgpu/data/andrz/redel/experiments/webarena/mistral/baseline --engine-timeout 1800 -curl -X GET ${RESTART_URL} +bash slurm/webarena-startup.sh sleep 600 kill $DOCKER_PID \ No newline at end of file diff --git a/slurm/mistral/webarena-4-small-leaf.sh b/slurm/mistral/webarena-4-small-leaf.sh index 3568345a4..76974e1d9 100644 --- a/slurm/mistral/webarena-4-small-leaf.sh +++ b/slurm/mistral/webarena-4-small-leaf.sh @@ -11,7 +11,6 @@ #SBATCH --gpus=8 #SBATCH --mail-user=andrz@seas.upenn.edu #SBATCH --mail-type=END,FAIL -#SBATCH --nodelist=nlpgpu04,nlpgpu05,nlpgpu08 #SBATCH --constraint=48GBgpu source slurm/env.sh @@ -23,6 +22,6 @@ source slurm/webarena-env.sh bash slurm/webarena-startup.sh sleep 600 python bench_webarena.py --config small-leaf --model-class mistral --large-model mistralai/Mistral-Large-Instruct-2407 --small-model mistralai/Mistral-Small-Instruct-2409 --save-dir /nlpgpu/data/andrz/redel/experiments/webarena/mistral/small-leaf --engine-timeout 1800 -curl -X GET ${RESTART_URL} +bash slurm/webarena-startup.sh sleep 600 kill $DOCKER_PID \ No newline at end of file diff --git a/slurm/mistral/webarena-5-small-all.sh b/slurm/mistral/webarena-5-small-all.sh index 5cf96c98f..7b9bc5344 100644 --- a/slurm/mistral/webarena-5-small-all.sh +++ b/slurm/mistral/webarena-5-small-all.sh @@ -11,7 +11,6 @@ #SBATCH --gpus=8 #SBATCH --mail-user=andrz@seas.upenn.edu #SBATCH --mail-type=END,FAIL -#SBATCH --nodelist=nlpgpu04,nlpgpu05,nlpgpu08 #SBATCH --constraint=48GBgpu source slurm/env.sh @@ -23,6 +22,6 @@ source slurm/webarena-env.sh bash slurm/webarena-startup.sh sleep 600 python bench_webarena.py --config small-all --model-class mistral --large-model mistralai/Mistral-Large-Instruct-2407 --small-model mistralai/Mistral-Small-Instruct-2409 --save-dir /nlpgpu/data/andrz/redel/experiments/webarena/mistral/small-all --engine-timeout 1800 -curl -X GET ${RESTART_URL} +bash slurm/webarena-startup.sh sleep 600 kill $DOCKER_PID \ No newline at end of file diff --git a/slurm/mistral/webarena-6-small-baseline.sh b/slurm/mistral/webarena-6-small-baseline.sh index d0dcd579d..7d874a590 100644 --- a/slurm/mistral/webarena-6-small-baseline.sh +++ b/slurm/mistral/webarena-6-small-baseline.sh @@ -11,7 +11,6 @@ #SBATCH --gpus=8 #SBATCH --mail-user=andrz@seas.upenn.edu #SBATCH --mail-type=END,FAIL -#SBATCH --nodelist=nlpgpu04,nlpgpu05,nlpgpu08 #SBATCH --constraint=48GBgpu source slurm/env.sh @@ -23,6 +22,6 @@ source slurm/webarena-env.sh bash slurm/webarena-startup.sh sleep 600 python bench_webarena.py --config small-baseline --model-class mistral --large-model mistralai/Mistral-Large-Instruct-2407 --small-model mistralai/Mistral-Small-Instruct-2409 --save-dir /nlpgpu/data/andrz/redel/experiments/webarena/mistral/small-baseline --engine-timeout 1800 -curl -X GET ${RESTART_URL} +bash slurm/webarena-startup.sh sleep 600 kill $DOCKER_PID \ No newline at end of file diff --git a/slurm/mistral/webarena-7-short-context.sh b/slurm/mistral/webarena-7-short-context.sh index a94169932..b7e482e0a 100644 --- a/slurm/mistral/webarena-7-short-context.sh +++ b/slurm/mistral/webarena-7-short-context.sh @@ -11,7 +11,6 @@ #SBATCH --gpus=8 #SBATCH --mail-user=andrz@seas.upenn.edu #SBATCH --mail-type=END,FAIL -#SBATCH --nodelist=nlpgpu04,nlpgpu05,nlpgpu08 #SBATCH --constraint=48GBgpu source slurm/env.sh @@ -23,6 +22,6 @@ source slurm/webarena-env.sh bash slurm/webarena-startup.sh sleep 600 python bench_webarena.py --config short-context --model-class mistral --large-model mistralai/Mistral-Large-Instruct-2407 --small-model mistralai/Mistral-Small-Instruct-2409 --save-dir /nlpgpu/data/andrz/redel/experiments/webarena/mistral/short-context --engine-timeout 1800 -curl -X GET ${RESTART_URL} +bash slurm/webarena-startup.sh sleep 600 kill $DOCKER_PID \ No newline at end of file diff --git a/slurm/mistral/webarena-8-short-baseline.sh b/slurm/mistral/webarena-8-short-baseline.sh index 07ab31562..ec9b767d1 100644 --- a/slurm/mistral/webarena-8-short-baseline.sh +++ b/slurm/mistral/webarena-8-short-baseline.sh @@ -11,7 +11,6 @@ #SBATCH --gpus=8 #SBATCH --mail-user=andrz@seas.upenn.edu #SBATCH --mail-type=END,FAIL -#SBATCH --nodelist=nlpgpu04,nlpgpu05,nlpgpu08 #SBATCH --constraint=48GBgpu source slurm/env.sh @@ -23,6 +22,6 @@ source slurm/webarena-env.sh bash slurm/webarena-startup.sh sleep 600 python bench_webarena.py --config short-baseline --model-class mistral --large-model mistralai/Mistral-Large-Instruct-2407 --small-model mistralai/Mistral-Small-Instruct-2409 --save-dir /nlpgpu/data/andrz/redel/experiments/webarena/mistral/short-baseline --engine-timeout 1800 -curl -X GET ${RESTART_URL} +bash slurm/webarena-startup.sh sleep 600 kill $DOCKER_PID \ No newline at end of file diff --git a/slurm/mistral/webarena-all.sh b/slurm/mistral/webarena-all.sh index 81b39d241..3caa17c99 100644 --- a/slurm/mistral/webarena-all.sh +++ b/slurm/mistral/webarena-all.sh @@ -11,7 +11,6 @@ #SBATCH --gpus=8 #SBATCH --mail-user=andrz@seas.upenn.edu #SBATCH --mail-type=END,FAIL -#SBATCH --nodelist=nlpgpu04,nlpgpu05,nlpgpu08 #SBATCH --constraint=48GBgpu source slurm/env.sh @@ -23,27 +22,27 @@ source slurm/webarena-env.sh bash slurm/webarena-startup.sh sleep 600 python bench_webarena.py --config full --model-class mistral --large-model mistralai/Mistral-Large-Instruct-2407 --small-model mistralai/Mistral-Small-Instruct-2409 --save-dir /nlpgpu/data/andrz/redel/experiments/webarena/mistral/full --engine-timeout 1800 -curl -X GET ${RESTART_URL} +bash slurm/webarena-startup.sh sleep 600 python bench_webarena.py --config root-fc --model-class mistral --large-model mistralai/Mistral-Large-Instruct-2407 --small-model mistralai/Mistral-Small-Instruct-2409 --save-dir /nlpgpu/data/andrz/redel/experiments/webarena/mistral/root-fc --engine-timeout 1800 -curl -X GET ${RESTART_URL} +bash slurm/webarena-startup.sh sleep 600 python bench_webarena.py --config baseline --model-class mistral --large-model mistralai/Mistral-Large-Instruct-2407 --small-model mistralai/Mistral-Small-Instruct-2409 --save-dir /nlpgpu/data/andrz/redel/experiments/webarena/mistral/baseline --engine-timeout 1800 -curl -X GET ${RESTART_URL} +bash slurm/webarena-startup.sh sleep 600 python bench_webarena.py --config small-leaf --model-class mistral --large-model mistralai/Mistral-Large-Instruct-2407 --small-model mistralai/Mistral-Small-Instruct-2409 --save-dir /nlpgpu/data/andrz/redel/experiments/webarena/mistral/small-leaf --engine-timeout 1800 -curl -X GET ${RESTART_URL} +bash slurm/webarena-startup.sh sleep 600 python bench_webarena.py --config small-all --model-class mistral --large-model mistralai/Mistral-Large-Instruct-2407 --small-model mistralai/Mistral-Small-Instruct-2409 --save-dir /nlpgpu/data/andrz/redel/experiments/webarena/mistral/small-all --engine-timeout 1800 -curl -X GET ${RESTART_URL} +bash slurm/webarena-startup.sh sleep 600 python bench_webarena.py --config small-baseline --model-class mistral --large-model mistralai/Mistral-Large-Instruct-2407 --small-model mistralai/Mistral-Small-Instruct-2409 --save-dir /nlpgpu/data/andrz/redel/experiments/webarena/mistral/small-baseline --engine-timeout 1800 -curl -X GET ${RESTART_URL} +bash slurm/webarena-startup.sh sleep 600 python bench_webarena.py --config short-context --model-class mistral --large-model mistralai/Mistral-Large-Instruct-2407 --small-model mistralai/Mistral-Small-Instruct-2409 --save-dir /nlpgpu/data/andrz/redel/experiments/webarena/mistral/short-context --engine-timeout 1800 -curl -X GET ${RESTART_URL} +bash slurm/webarena-startup.sh sleep 600 python bench_webarena.py --config short-baseline --model-class mistral --large-model mistralai/Mistral-Large-Instruct-2407 --small-model mistralai/Mistral-Small-Instruct-2409 --save-dir /nlpgpu/data/andrz/redel/experiments/webarena/mistral/short-baseline --engine-timeout 1800 -curl -X GET ${RESTART_URL} +bash slurm/webarena-startup.sh sleep 600 kill $DOCKER_PID \ No newline at end of file diff --git a/slurm/openai/webarena-1-full.sh b/slurm/openai/webarena-1-full.sh index 9b45fdb9e..8963d0daa 100644 --- a/slurm/openai/webarena-1-full.sh +++ b/slurm/openai/webarena-1-full.sh @@ -11,7 +11,6 @@ #SBATCH --gpus=0 #SBATCH --mail-user=andrz@seas.upenn.edu #SBATCH --mail-type=END,FAIL -#SBATCH --nodelist=nlpgpu04,nlpgpu05,nlpgpu08 source slurm/env.sh @@ -23,6 +22,6 @@ source slurm/webarena-env.sh bash slurm/webarena-startup.sh sleep 600 python bench_webarena.py --config full --model-class openai --large-model gpt-4o-2024-05-13 --small-model gpt-3.5-turbo-0125 --save-dir /nlpgpu/data/andrz/redel/experiments/webarena/openai/full -curl -X GET ${RESTART_URL} +bash slurm/webarena-startup.sh sleep 600 kill $DOCKER_PID \ No newline at end of file diff --git a/slurm/openai/webarena-2-root-fc.sh b/slurm/openai/webarena-2-root-fc.sh index 47d123594..43ce9f32f 100644 --- a/slurm/openai/webarena-2-root-fc.sh +++ b/slurm/openai/webarena-2-root-fc.sh @@ -11,7 +11,6 @@ #SBATCH --gpus=0 #SBATCH --mail-user=andrz@seas.upenn.edu #SBATCH --mail-type=END,FAIL -#SBATCH --nodelist=nlpgpu04,nlpgpu05,nlpgpu08 source slurm/env.sh @@ -23,6 +22,6 @@ source slurm/webarena-env.sh bash slurm/webarena-startup.sh sleep 600 python bench_webarena.py --config root-fc --model-class openai --large-model gpt-4o-2024-05-13 --small-model gpt-3.5-turbo-0125 --save-dir /nlpgpu/data/andrz/redel/experiments/webarena/openai/root-fc -curl -X GET ${RESTART_URL} +bash slurm/webarena-startup.sh sleep 600 kill $DOCKER_PID \ No newline at end of file diff --git a/slurm/openai/webarena-3-baseline.sh b/slurm/openai/webarena-3-baseline.sh index f3a2344b5..7098e9be5 100644 --- a/slurm/openai/webarena-3-baseline.sh +++ b/slurm/openai/webarena-3-baseline.sh @@ -11,7 +11,6 @@ #SBATCH --gpus=0 #SBATCH --mail-user=andrz@seas.upenn.edu #SBATCH --mail-type=END,FAIL -#SBATCH --nodelist=nlpgpu04,nlpgpu05,nlpgpu08 source slurm/env.sh @@ -23,6 +22,6 @@ source slurm/webarena-env.sh bash slurm/webarena-startup.sh sleep 600 python bench_webarena.py --config baseline --model-class openai --large-model gpt-4o-2024-05-13 --small-model gpt-3.5-turbo-0125 --save-dir /nlpgpu/data/andrz/redel/experiments/webarena/openai/baseline -curl -X GET ${RESTART_URL} +bash slurm/webarena-startup.sh sleep 600 kill $DOCKER_PID \ No newline at end of file diff --git a/slurm/openai/webarena-4-small-leaf.sh b/slurm/openai/webarena-4-small-leaf.sh index e9cacd45b..6c5283571 100644 --- a/slurm/openai/webarena-4-small-leaf.sh +++ b/slurm/openai/webarena-4-small-leaf.sh @@ -11,7 +11,6 @@ #SBATCH --gpus=0 #SBATCH --mail-user=andrz@seas.upenn.edu #SBATCH --mail-type=END,FAIL -#SBATCH --nodelist=nlpgpu04,nlpgpu05,nlpgpu08 source slurm/env.sh @@ -23,6 +22,6 @@ source slurm/webarena-env.sh bash slurm/webarena-startup.sh sleep 600 python bench_webarena.py --config small-leaf --model-class openai --large-model gpt-4o-2024-05-13 --small-model gpt-3.5-turbo-0125 --save-dir /nlpgpu/data/andrz/redel/experiments/webarena/openai/small-leaf -curl -X GET ${RESTART_URL} +bash slurm/webarena-startup.sh sleep 600 kill $DOCKER_PID \ No newline at end of file diff --git a/slurm/openai/webarena-5-small-all.sh b/slurm/openai/webarena-5-small-all.sh index 1096eff55..1cb5b70cb 100644 --- a/slurm/openai/webarena-5-small-all.sh +++ b/slurm/openai/webarena-5-small-all.sh @@ -11,7 +11,6 @@ #SBATCH --gpus=0 #SBATCH --mail-user=andrz@seas.upenn.edu #SBATCH --mail-type=END,FAIL -#SBATCH --nodelist=nlpgpu04,nlpgpu05,nlpgpu08 source slurm/env.sh @@ -23,6 +22,6 @@ source slurm/webarena-env.sh bash slurm/webarena-startup.sh sleep 600 python bench_webarena.py --config small-all --model-class openai --large-model gpt-4o-2024-05-13 --small-model gpt-3.5-turbo-0125 --save-dir /nlpgpu/data/andrz/redel/experiments/webarena/openai/small-all -curl -X GET ${RESTART_URL} +bash slurm/webarena-startup.sh sleep 600 kill $DOCKER_PID \ No newline at end of file diff --git a/slurm/openai/webarena-6-small-baseline.sh b/slurm/openai/webarena-6-small-baseline.sh index b6749dfb9..1ab299912 100644 --- a/slurm/openai/webarena-6-small-baseline.sh +++ b/slurm/openai/webarena-6-small-baseline.sh @@ -11,7 +11,6 @@ #SBATCH --gpus=0 #SBATCH --mail-user=andrz@seas.upenn.edu #SBATCH --mail-type=END,FAIL -#SBATCH --nodelist=nlpgpu04,nlpgpu05,nlpgpu08 source slurm/env.sh @@ -23,6 +22,6 @@ source slurm/webarena-env.sh bash slurm/webarena-startup.sh sleep 600 python bench_webarena.py --config small-baseline --model-class openai --large-model gpt-4o-2024-05-13 --small-model gpt-3.5-turbo-0125 --save-dir /nlpgpu/data/andrz/redel/experiments/webarena/openai/small-baseline -curl -X GET ${RESTART_URL} +bash slurm/webarena-startup.sh sleep 600 kill $DOCKER_PID \ No newline at end of file diff --git a/slurm/openai/webarena-7-short-context.sh b/slurm/openai/webarena-7-short-context.sh index 0970c0d54..ef44c7a80 100644 --- a/slurm/openai/webarena-7-short-context.sh +++ b/slurm/openai/webarena-7-short-context.sh @@ -11,7 +11,6 @@ #SBATCH --gpus=0 #SBATCH --mail-user=andrz@seas.upenn.edu #SBATCH --mail-type=END,FAIL -#SBATCH --nodelist=nlpgpu04,nlpgpu05,nlpgpu08 source slurm/env.sh @@ -23,6 +22,6 @@ source slurm/webarena-env.sh bash slurm/webarena-startup.sh sleep 600 python bench_webarena.py --config short-context --model-class openai --large-model gpt-4o-2024-05-13 --small-model gpt-3.5-turbo-0125 --save-dir /nlpgpu/data/andrz/redel/experiments/webarena/openai/short-context -curl -X GET ${RESTART_URL} +bash slurm/webarena-startup.sh sleep 600 kill $DOCKER_PID \ No newline at end of file diff --git a/slurm/openai/webarena-8-short-baseline.sh b/slurm/openai/webarena-8-short-baseline.sh index bef98fdef..f68d89cdd 100644 --- a/slurm/openai/webarena-8-short-baseline.sh +++ b/slurm/openai/webarena-8-short-baseline.sh @@ -11,7 +11,6 @@ #SBATCH --gpus=0 #SBATCH --mail-user=andrz@seas.upenn.edu #SBATCH --mail-type=END,FAIL -#SBATCH --nodelist=nlpgpu04,nlpgpu05,nlpgpu08 source slurm/env.sh @@ -23,6 +22,6 @@ source slurm/webarena-env.sh bash slurm/webarena-startup.sh sleep 600 python bench_webarena.py --config short-baseline --model-class openai --large-model gpt-4o-2024-05-13 --small-model gpt-3.5-turbo-0125 --save-dir /nlpgpu/data/andrz/redel/experiments/webarena/openai/short-baseline -curl -X GET ${RESTART_URL} +bash slurm/webarena-startup.sh sleep 600 kill $DOCKER_PID \ No newline at end of file diff --git a/slurm/openai/webarena-all.sh b/slurm/openai/webarena-all.sh index df0ca33ec..34d49a336 100644 --- a/slurm/openai/webarena-all.sh +++ b/slurm/openai/webarena-all.sh @@ -11,7 +11,6 @@ #SBATCH --gpus=0 #SBATCH --mail-user=andrz@seas.upenn.edu #SBATCH --mail-type=END,FAIL -#SBATCH --nodelist=nlpgpu04,nlpgpu05,nlpgpu08 source slurm/env.sh @@ -23,27 +22,27 @@ source slurm/webarena-env.sh bash slurm/webarena-startup.sh sleep 600 python bench_webarena.py --config full --model-class openai --large-model gpt-4o-2024-05-13 --small-model gpt-3.5-turbo-0125 --save-dir /nlpgpu/data/andrz/redel/experiments/webarena/openai/full -curl -X GET ${RESTART_URL} +bash slurm/webarena-startup.sh sleep 600 python bench_webarena.py --config root-fc --model-class openai --large-model gpt-4o-2024-05-13 --small-model gpt-3.5-turbo-0125 --save-dir /nlpgpu/data/andrz/redel/experiments/webarena/openai/root-fc -curl -X GET ${RESTART_URL} +bash slurm/webarena-startup.sh sleep 600 python bench_webarena.py --config baseline --model-class openai --large-model gpt-4o-2024-05-13 --small-model gpt-3.5-turbo-0125 --save-dir /nlpgpu/data/andrz/redel/experiments/webarena/openai/baseline -curl -X GET ${RESTART_URL} +bash slurm/webarena-startup.sh sleep 600 python bench_webarena.py --config small-leaf --model-class openai --large-model gpt-4o-2024-05-13 --small-model gpt-3.5-turbo-0125 --save-dir /nlpgpu/data/andrz/redel/experiments/webarena/openai/small-leaf -curl -X GET ${RESTART_URL} +bash slurm/webarena-startup.sh sleep 600 python bench_webarena.py --config small-all --model-class openai --large-model gpt-4o-2024-05-13 --small-model gpt-3.5-turbo-0125 --save-dir /nlpgpu/data/andrz/redel/experiments/webarena/openai/small-all -curl -X GET ${RESTART_URL} +bash slurm/webarena-startup.sh sleep 600 python bench_webarena.py --config small-baseline --model-class openai --large-model gpt-4o-2024-05-13 --small-model gpt-3.5-turbo-0125 --save-dir /nlpgpu/data/andrz/redel/experiments/webarena/openai/small-baseline -curl -X GET ${RESTART_URL} +bash slurm/webarena-startup.sh sleep 600 python bench_webarena.py --config short-context --model-class openai --large-model gpt-4o-2024-05-13 --small-model gpt-3.5-turbo-0125 --save-dir /nlpgpu/data/andrz/redel/experiments/webarena/openai/short-context -curl -X GET ${RESTART_URL} +bash slurm/webarena-startup.sh sleep 600 python bench_webarena.py --config short-baseline --model-class openai --large-model gpt-4o-2024-05-13 --small-model gpt-3.5-turbo-0125 --save-dir /nlpgpu/data/andrz/redel/experiments/webarena/openai/short-baseline -curl -X GET ${RESTART_URL} +bash slurm/webarena-startup.sh sleep 600 kill $DOCKER_PID \ No newline at end of file diff --git a/slurm/qwen/webarena-1-full.sh b/slurm/qwen/webarena-1-full.sh index e3e5a7159..7caca17e4 100644 --- a/slurm/qwen/webarena-1-full.sh +++ b/slurm/qwen/webarena-1-full.sh @@ -11,7 +11,6 @@ #SBATCH --gpus=8 #SBATCH --mail-user=andrz@seas.upenn.edu #SBATCH --mail-type=END,FAIL -#SBATCH --nodelist=nlpgpu04,nlpgpu05,nlpgpu08 #SBATCH --constraint=48GBgpu source slurm/env.sh @@ -23,6 +22,6 @@ source slurm/webarena-env.sh bash slurm/webarena-startup.sh sleep 600 python bench_webarena.py --config full --model-class qwen --large-model Qwen/Qwen2.5-72B-Instruct --small-model Qwen/Qwen2.5-7B-Instruct --save-dir /nlpgpu/data/andrz/redel/experiments/webarena/qwen/full --engine-timeout 1800 -curl -X GET ${RESTART_URL} +bash slurm/webarena-startup.sh sleep 600 kill $DOCKER_PID \ No newline at end of file diff --git a/slurm/qwen/webarena-2-root-fc.sh b/slurm/qwen/webarena-2-root-fc.sh index a219d5ad6..54fb551a9 100644 --- a/slurm/qwen/webarena-2-root-fc.sh +++ b/slurm/qwen/webarena-2-root-fc.sh @@ -11,7 +11,6 @@ #SBATCH --gpus=8 #SBATCH --mail-user=andrz@seas.upenn.edu #SBATCH --mail-type=END,FAIL -#SBATCH --nodelist=nlpgpu04,nlpgpu05,nlpgpu08 #SBATCH --constraint=48GBgpu source slurm/env.sh @@ -23,6 +22,6 @@ source slurm/webarena-env.sh bash slurm/webarena-startup.sh sleep 600 python bench_webarena.py --config root-fc --model-class qwen --large-model Qwen/Qwen2.5-72B-Instruct --small-model Qwen/Qwen2.5-7B-Instruct --save-dir /nlpgpu/data/andrz/redel/experiments/webarena/qwen/root-fc --engine-timeout 1800 -curl -X GET ${RESTART_URL} +bash slurm/webarena-startup.sh sleep 600 kill $DOCKER_PID \ No newline at end of file diff --git a/slurm/qwen/webarena-3-baseline.sh b/slurm/qwen/webarena-3-baseline.sh index 3f624b7d7..12e6b41f9 100644 --- a/slurm/qwen/webarena-3-baseline.sh +++ b/slurm/qwen/webarena-3-baseline.sh @@ -11,7 +11,6 @@ #SBATCH --gpus=8 #SBATCH --mail-user=andrz@seas.upenn.edu #SBATCH --mail-type=END,FAIL -#SBATCH --nodelist=nlpgpu04,nlpgpu05,nlpgpu08 #SBATCH --constraint=48GBgpu source slurm/env.sh @@ -23,6 +22,6 @@ source slurm/webarena-env.sh bash slurm/webarena-startup.sh sleep 600 python bench_webarena.py --config baseline --model-class qwen --large-model Qwen/Qwen2.5-72B-Instruct --small-model Qwen/Qwen2.5-7B-Instruct --save-dir /nlpgpu/data/andrz/redel/experiments/webarena/qwen/baseline --engine-timeout 1800 -curl -X GET ${RESTART_URL} +bash slurm/webarena-startup.sh sleep 600 kill $DOCKER_PID \ No newline at end of file diff --git a/slurm/qwen/webarena-4-small-leaf.sh b/slurm/qwen/webarena-4-small-leaf.sh index 5d3878b54..3ba9fe3b1 100644 --- a/slurm/qwen/webarena-4-small-leaf.sh +++ b/slurm/qwen/webarena-4-small-leaf.sh @@ -11,7 +11,6 @@ #SBATCH --gpus=8 #SBATCH --mail-user=andrz@seas.upenn.edu #SBATCH --mail-type=END,FAIL -#SBATCH --nodelist=nlpgpu04,nlpgpu05,nlpgpu08 #SBATCH --constraint=48GBgpu source slurm/env.sh @@ -23,6 +22,6 @@ source slurm/webarena-env.sh bash slurm/webarena-startup.sh sleep 600 python bench_webarena.py --config small-leaf --model-class qwen --large-model Qwen/Qwen2.5-72B-Instruct --small-model Qwen/Qwen2.5-7B-Instruct --save-dir /nlpgpu/data/andrz/redel/experiments/webarena/qwen/small-leaf --engine-timeout 1800 -curl -X GET ${RESTART_URL} +bash slurm/webarena-startup.sh sleep 600 kill $DOCKER_PID \ No newline at end of file diff --git a/slurm/qwen/webarena-5-small-all.sh b/slurm/qwen/webarena-5-small-all.sh index e8c289259..0fa5d3bb7 100644 --- a/slurm/qwen/webarena-5-small-all.sh +++ b/slurm/qwen/webarena-5-small-all.sh @@ -11,7 +11,6 @@ #SBATCH --gpus=8 #SBATCH --mail-user=andrz@seas.upenn.edu #SBATCH --mail-type=END,FAIL -#SBATCH --nodelist=nlpgpu04,nlpgpu05,nlpgpu08 #SBATCH --constraint=48GBgpu source slurm/env.sh @@ -23,6 +22,6 @@ source slurm/webarena-env.sh bash slurm/webarena-startup.sh sleep 600 python bench_webarena.py --config small-all --model-class qwen --large-model Qwen/Qwen2.5-72B-Instruct --small-model Qwen/Qwen2.5-7B-Instruct --save-dir /nlpgpu/data/andrz/redel/experiments/webarena/qwen/small-all --engine-timeout 1800 -curl -X GET ${RESTART_URL} +bash slurm/webarena-startup.sh sleep 600 kill $DOCKER_PID \ No newline at end of file diff --git a/slurm/qwen/webarena-6-small-baseline.sh b/slurm/qwen/webarena-6-small-baseline.sh index d477fc787..d61de836b 100644 --- a/slurm/qwen/webarena-6-small-baseline.sh +++ b/slurm/qwen/webarena-6-small-baseline.sh @@ -11,7 +11,6 @@ #SBATCH --gpus=8 #SBATCH --mail-user=andrz@seas.upenn.edu #SBATCH --mail-type=END,FAIL -#SBATCH --nodelist=nlpgpu04,nlpgpu05,nlpgpu08 #SBATCH --constraint=48GBgpu source slurm/env.sh @@ -23,6 +22,6 @@ source slurm/webarena-env.sh bash slurm/webarena-startup.sh sleep 600 python bench_webarena.py --config small-baseline --model-class qwen --large-model Qwen/Qwen2.5-72B-Instruct --small-model Qwen/Qwen2.5-7B-Instruct --save-dir /nlpgpu/data/andrz/redel/experiments/webarena/qwen/small-baseline --engine-timeout 1800 -curl -X GET ${RESTART_URL} +bash slurm/webarena-startup.sh sleep 600 kill $DOCKER_PID \ No newline at end of file diff --git a/slurm/qwen/webarena-7-short-context.sh b/slurm/qwen/webarena-7-short-context.sh index ede4ce542..0b1a2d4eb 100644 --- a/slurm/qwen/webarena-7-short-context.sh +++ b/slurm/qwen/webarena-7-short-context.sh @@ -11,7 +11,6 @@ #SBATCH --gpus=8 #SBATCH --mail-user=andrz@seas.upenn.edu #SBATCH --mail-type=END,FAIL -#SBATCH --nodelist=nlpgpu04,nlpgpu05,nlpgpu08 #SBATCH --constraint=48GBgpu source slurm/env.sh @@ -23,6 +22,6 @@ source slurm/webarena-env.sh bash slurm/webarena-startup.sh sleep 600 python bench_webarena.py --config short-context --model-class qwen --large-model Qwen/Qwen2.5-72B-Instruct --small-model Qwen/Qwen2.5-7B-Instruct --save-dir /nlpgpu/data/andrz/redel/experiments/webarena/qwen/short-context --engine-timeout 1800 -curl -X GET ${RESTART_URL} +bash slurm/webarena-startup.sh sleep 600 kill $DOCKER_PID \ No newline at end of file diff --git a/slurm/qwen/webarena-8-short-baseline.sh b/slurm/qwen/webarena-8-short-baseline.sh index a210fd838..a10cd8d45 100644 --- a/slurm/qwen/webarena-8-short-baseline.sh +++ b/slurm/qwen/webarena-8-short-baseline.sh @@ -11,7 +11,6 @@ #SBATCH --gpus=8 #SBATCH --mail-user=andrz@seas.upenn.edu #SBATCH --mail-type=END,FAIL -#SBATCH --nodelist=nlpgpu04,nlpgpu05,nlpgpu08 #SBATCH --constraint=48GBgpu source slurm/env.sh @@ -23,6 +22,6 @@ source slurm/webarena-env.sh bash slurm/webarena-startup.sh sleep 600 python bench_webarena.py --config short-baseline --model-class qwen --large-model Qwen/Qwen2.5-72B-Instruct --small-model Qwen/Qwen2.5-7B-Instruct --save-dir /nlpgpu/data/andrz/redel/experiments/webarena/qwen/short-baseline --engine-timeout 1800 -curl -X GET ${RESTART_URL} +bash slurm/webarena-startup.sh sleep 600 kill $DOCKER_PID \ No newline at end of file diff --git a/slurm/qwen/webarena-all.sh b/slurm/qwen/webarena-all.sh index fa4e5bd8f..bc436832c 100644 --- a/slurm/qwen/webarena-all.sh +++ b/slurm/qwen/webarena-all.sh @@ -11,7 +11,6 @@ #SBATCH --gpus=8 #SBATCH --mail-user=andrz@seas.upenn.edu #SBATCH --mail-type=END,FAIL -#SBATCH --nodelist=nlpgpu04,nlpgpu05,nlpgpu08 #SBATCH --constraint=48GBgpu source slurm/env.sh @@ -23,27 +22,27 @@ source slurm/webarena-env.sh bash slurm/webarena-startup.sh sleep 600 python bench_webarena.py --config full --model-class qwen --large-model Qwen/Qwen2.5-72B-Instruct --small-model Qwen/Qwen2.5-7B-Instruct --save-dir /nlpgpu/data/andrz/redel/experiments/webarena/qwen/full --engine-timeout 1800 -curl -X GET ${RESTART_URL} +bash slurm/webarena-startup.sh sleep 600 python bench_webarena.py --config root-fc --model-class qwen --large-model Qwen/Qwen2.5-72B-Instruct --small-model Qwen/Qwen2.5-7B-Instruct --save-dir /nlpgpu/data/andrz/redel/experiments/webarena/qwen/root-fc --engine-timeout 1800 -curl -X GET ${RESTART_URL} +bash slurm/webarena-startup.sh sleep 600 python bench_webarena.py --config baseline --model-class qwen --large-model Qwen/Qwen2.5-72B-Instruct --small-model Qwen/Qwen2.5-7B-Instruct --save-dir /nlpgpu/data/andrz/redel/experiments/webarena/qwen/baseline --engine-timeout 1800 -curl -X GET ${RESTART_URL} +bash slurm/webarena-startup.sh sleep 600 python bench_webarena.py --config small-leaf --model-class qwen --large-model Qwen/Qwen2.5-72B-Instruct --small-model Qwen/Qwen2.5-7B-Instruct --save-dir /nlpgpu/data/andrz/redel/experiments/webarena/qwen/small-leaf --engine-timeout 1800 -curl -X GET ${RESTART_URL} +bash slurm/webarena-startup.sh sleep 600 python bench_webarena.py --config small-all --model-class qwen --large-model Qwen/Qwen2.5-72B-Instruct --small-model Qwen/Qwen2.5-7B-Instruct --save-dir /nlpgpu/data/andrz/redel/experiments/webarena/qwen/small-all --engine-timeout 1800 -curl -X GET ${RESTART_URL} +bash slurm/webarena-startup.sh sleep 600 python bench_webarena.py --config small-baseline --model-class qwen --large-model Qwen/Qwen2.5-72B-Instruct --small-model Qwen/Qwen2.5-7B-Instruct --save-dir /nlpgpu/data/andrz/redel/experiments/webarena/qwen/small-baseline --engine-timeout 1800 -curl -X GET ${RESTART_URL} +bash slurm/webarena-startup.sh sleep 600 python bench_webarena.py --config short-context --model-class qwen --large-model Qwen/Qwen2.5-72B-Instruct --small-model Qwen/Qwen2.5-7B-Instruct --save-dir /nlpgpu/data/andrz/redel/experiments/webarena/qwen/short-context --engine-timeout 1800 -curl -X GET ${RESTART_URL} +bash slurm/webarena-startup.sh sleep 600 python bench_webarena.py --config short-baseline --model-class qwen --large-model Qwen/Qwen2.5-72B-Instruct --small-model Qwen/Qwen2.5-7B-Instruct --save-dir /nlpgpu/data/andrz/redel/experiments/webarena/qwen/short-baseline --engine-timeout 1800 -curl -X GET ${RESTART_URL} +bash slurm/webarena-startup.sh sleep 600 kill $DOCKER_PID \ No newline at end of file diff --git a/slurm/webarena-startup.sh b/slurm/webarena-startup.sh index ccd537fb5..76b8ac4ba 100644 --- a/slurm/webarena-startup.sh +++ b/slurm/webarena-startup.sh @@ -7,6 +7,5 @@ bash 03_docker_create_containers.sh bash 04_docker_start_containers.sh bash 05_docker_patch_containers.sh bash 06_serve_homepage.sh & -bash 07_serve_reset.sh & popd