Skip to content

Commit

Permalink
Merge branch 'demo/emnlp' of https://github.com/zhudotexe/redel into …
Browse files Browse the repository at this point in the history
…demo/emnlp
  • Loading branch information
zhudotexe committed Dec 11, 2024
2 parents 32b8475 + 521651e commit 3b50a13
Show file tree
Hide file tree
Showing 140 changed files with 559 additions and 606 deletions.
3 changes: 3 additions & 0 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -221,6 +221,9 @@ Apache-2.0 license, contained in `experiments/webarena/vendor/LICENSE`.
First, you'll need to set up your own WebArena environment.
See https://github.com/web-arena-x/webarena/blob/main/environment_docker/README.md for instructions.

> [!NOTE]
> https://github.com/gasse/webarena-setup/tree/main/webarena is a helpful unofficial setup script.
Next, run the following to setup the webarena configuration:

```shell
Expand Down
1 change: 0 additions & 1 deletion slurm/.gitignore
Original file line number Diff line number Diff line change
@@ -1,3 +1,2 @@
env.sh
fanout-eval-env.sh
webarena-env.sh
3 changes: 1 addition & 2 deletions slurm/claude/fanoutqa-1-full.sh
Original file line number Diff line number Diff line change
Expand Up @@ -15,5 +15,4 @@

source slurm/env.sh
export VLLM_WORKER_MULTIPROC_METHOD=spawn

python bench_fanoutqa.py --config full --model-class claude --large-model claude-3-5-sonnet-20241022 --small-model claude-3-5-haiku-20241022 --save-dir /nlpgpu/data/andrz/redel/experiments/fanoutqa/claude/full
python bench_fanoutqa.py --config full --model-class claude --large-model claude-3-5-sonnet-20241022 --small-model claude-3-5-haiku-20241022 --save-dir /nlpgpu/data/andrz/redel/experiments/fanoutqa/claude/full
3 changes: 1 addition & 2 deletions slurm/claude/fanoutqa-2-root-fc.sh
Original file line number Diff line number Diff line change
Expand Up @@ -15,5 +15,4 @@

source slurm/env.sh
export VLLM_WORKER_MULTIPROC_METHOD=spawn

python bench_fanoutqa.py --config root-fc --model-class claude --large-model claude-3-5-sonnet-20241022 --small-model claude-3-5-haiku-20241022 --save-dir /nlpgpu/data/andrz/redel/experiments/fanoutqa/claude/root-fc
python bench_fanoutqa.py --config root-fc --model-class claude --large-model claude-3-5-sonnet-20241022 --small-model claude-3-5-haiku-20241022 --save-dir /nlpgpu/data/andrz/redel/experiments/fanoutqa/claude/root-fc
3 changes: 1 addition & 2 deletions slurm/claude/fanoutqa-3-baseline.sh
Original file line number Diff line number Diff line change
Expand Up @@ -15,5 +15,4 @@

source slurm/env.sh
export VLLM_WORKER_MULTIPROC_METHOD=spawn

python bench_fanoutqa.py --config baseline --model-class claude --large-model claude-3-5-sonnet-20241022 --small-model claude-3-5-haiku-20241022 --save-dir /nlpgpu/data/andrz/redel/experiments/fanoutqa/claude/baseline
python bench_fanoutqa.py --config baseline --model-class claude --large-model claude-3-5-sonnet-20241022 --small-model claude-3-5-haiku-20241022 --save-dir /nlpgpu/data/andrz/redel/experiments/fanoutqa/claude/baseline
3 changes: 1 addition & 2 deletions slurm/claude/fanoutqa-4-small-leaf.sh
Original file line number Diff line number Diff line change
Expand Up @@ -15,5 +15,4 @@

source slurm/env.sh
export VLLM_WORKER_MULTIPROC_METHOD=spawn

python bench_fanoutqa.py --config small-leaf --model-class claude --large-model claude-3-5-sonnet-20241022 --small-model claude-3-5-haiku-20241022 --save-dir /nlpgpu/data/andrz/redel/experiments/fanoutqa/claude/small-leaf
python bench_fanoutqa.py --config small-leaf --model-class claude --large-model claude-3-5-sonnet-20241022 --small-model claude-3-5-haiku-20241022 --save-dir /nlpgpu/data/andrz/redel/experiments/fanoutqa/claude/small-leaf
3 changes: 1 addition & 2 deletions slurm/claude/fanoutqa-5-small-all.sh
Original file line number Diff line number Diff line change
Expand Up @@ -15,5 +15,4 @@

source slurm/env.sh
export VLLM_WORKER_MULTIPROC_METHOD=spawn

python bench_fanoutqa.py --config small-all --model-class claude --large-model claude-3-5-sonnet-20241022 --small-model claude-3-5-haiku-20241022 --save-dir /nlpgpu/data/andrz/redel/experiments/fanoutqa/claude/small-all
python bench_fanoutqa.py --config small-all --model-class claude --large-model claude-3-5-sonnet-20241022 --small-model claude-3-5-haiku-20241022 --save-dir /nlpgpu/data/andrz/redel/experiments/fanoutqa/claude/small-all
3 changes: 1 addition & 2 deletions slurm/claude/fanoutqa-6-small-baseline.sh
Original file line number Diff line number Diff line change
Expand Up @@ -15,5 +15,4 @@

source slurm/env.sh
export VLLM_WORKER_MULTIPROC_METHOD=spawn

python bench_fanoutqa.py --config small-baseline --model-class claude --large-model claude-3-5-sonnet-20241022 --small-model claude-3-5-haiku-20241022 --save-dir /nlpgpu/data/andrz/redel/experiments/fanoutqa/claude/small-baseline
python bench_fanoutqa.py --config small-baseline --model-class claude --large-model claude-3-5-sonnet-20241022 --small-model claude-3-5-haiku-20241022 --save-dir /nlpgpu/data/andrz/redel/experiments/fanoutqa/claude/small-baseline
3 changes: 1 addition & 2 deletions slurm/claude/fanoutqa-7-short-context.sh
Original file line number Diff line number Diff line change
Expand Up @@ -15,5 +15,4 @@

source slurm/env.sh
export VLLM_WORKER_MULTIPROC_METHOD=spawn

python bench_fanoutqa.py --config short-context --model-class claude --large-model claude-3-5-sonnet-20241022 --small-model claude-3-5-haiku-20241022 --save-dir /nlpgpu/data/andrz/redel/experiments/fanoutqa/claude/short-context
python bench_fanoutqa.py --config short-context --model-class claude --large-model claude-3-5-sonnet-20241022 --small-model claude-3-5-haiku-20241022 --save-dir /nlpgpu/data/andrz/redel/experiments/fanoutqa/claude/short-context
3 changes: 1 addition & 2 deletions slurm/claude/fanoutqa-8-short-baseline.sh
Original file line number Diff line number Diff line change
Expand Up @@ -15,5 +15,4 @@

source slurm/env.sh
export VLLM_WORKER_MULTIPROC_METHOD=spawn

python bench_fanoutqa.py --config short-baseline --model-class claude --large-model claude-3-5-sonnet-20241022 --small-model claude-3-5-haiku-20241022 --save-dir /nlpgpu/data/andrz/redel/experiments/fanoutqa/claude/short-baseline
python bench_fanoutqa.py --config short-baseline --model-class claude --large-model claude-3-5-sonnet-20241022 --small-model claude-3-5-haiku-20241022 --save-dir /nlpgpu/data/andrz/redel/experiments/fanoutqa/claude/short-baseline
38 changes: 8 additions & 30 deletions slurm/claude/fanoutqa-all.sh
Original file line number Diff line number Diff line change
Expand Up @@ -15,33 +15,11 @@

source slurm/env.sh
export VLLM_WORKER_MULTIPROC_METHOD=spawn

python bench_fanoutqa.py --config full --model-class claude --large-model claude-3-5-sonnet-20241022 --small-model claude-3-5-haiku-20241022 --save-dir /nlpgpu/data/andrz/redel/experiments/fanoutqa/claude/full



python bench_fanoutqa.py --config root-fc --model-class claude --large-model claude-3-5-sonnet-20241022 --small-model claude-3-5-haiku-20241022 --save-dir /nlpgpu/data/andrz/redel/experiments/fanoutqa/claude/root-fc



python bench_fanoutqa.py --config baseline --model-class claude --large-model claude-3-5-sonnet-20241022 --small-model claude-3-5-haiku-20241022 --save-dir /nlpgpu/data/andrz/redel/experiments/fanoutqa/claude/baseline



python bench_fanoutqa.py --config small-leaf --model-class claude --large-model claude-3-5-sonnet-20241022 --small-model claude-3-5-haiku-20241022 --save-dir /nlpgpu/data/andrz/redel/experiments/fanoutqa/claude/small-leaf



python bench_fanoutqa.py --config small-all --model-class claude --large-model claude-3-5-sonnet-20241022 --small-model claude-3-5-haiku-20241022 --save-dir /nlpgpu/data/andrz/redel/experiments/fanoutqa/claude/small-all



python bench_fanoutqa.py --config small-baseline --model-class claude --large-model claude-3-5-sonnet-20241022 --small-model claude-3-5-haiku-20241022 --save-dir /nlpgpu/data/andrz/redel/experiments/fanoutqa/claude/small-baseline



python bench_fanoutqa.py --config short-context --model-class claude --large-model claude-3-5-sonnet-20241022 --small-model claude-3-5-haiku-20241022 --save-dir /nlpgpu/data/andrz/redel/experiments/fanoutqa/claude/short-context



python bench_fanoutqa.py --config short-baseline --model-class claude --large-model claude-3-5-sonnet-20241022 --small-model claude-3-5-haiku-20241022 --save-dir /nlpgpu/data/andrz/redel/experiments/fanoutqa/claude/short-baseline
python bench_fanoutqa.py --config full --model-class claude --large-model claude-3-5-sonnet-20241022 --small-model claude-3-5-haiku-20241022 --save-dir /nlpgpu/data/andrz/redel/experiments/fanoutqa/claude/full
python bench_fanoutqa.py --config root-fc --model-class claude --large-model claude-3-5-sonnet-20241022 --small-model claude-3-5-haiku-20241022 --save-dir /nlpgpu/data/andrz/redel/experiments/fanoutqa/claude/root-fc
python bench_fanoutqa.py --config baseline --model-class claude --large-model claude-3-5-sonnet-20241022 --small-model claude-3-5-haiku-20241022 --save-dir /nlpgpu/data/andrz/redel/experiments/fanoutqa/claude/baseline
python bench_fanoutqa.py --config small-leaf --model-class claude --large-model claude-3-5-sonnet-20241022 --small-model claude-3-5-haiku-20241022 --save-dir /nlpgpu/data/andrz/redel/experiments/fanoutqa/claude/small-leaf
python bench_fanoutqa.py --config small-all --model-class claude --large-model claude-3-5-sonnet-20241022 --small-model claude-3-5-haiku-20241022 --save-dir /nlpgpu/data/andrz/redel/experiments/fanoutqa/claude/small-all
python bench_fanoutqa.py --config small-baseline --model-class claude --large-model claude-3-5-sonnet-20241022 --small-model claude-3-5-haiku-20241022 --save-dir /nlpgpu/data/andrz/redel/experiments/fanoutqa/claude/small-baseline
python bench_fanoutqa.py --config short-context --model-class claude --large-model claude-3-5-sonnet-20241022 --small-model claude-3-5-haiku-20241022 --save-dir /nlpgpu/data/andrz/redel/experiments/fanoutqa/claude/short-context
python bench_fanoutqa.py --config short-baseline --model-class claude --large-model claude-3-5-sonnet-20241022 --small-model claude-3-5-haiku-20241022 --save-dir /nlpgpu/data/andrz/redel/experiments/fanoutqa/claude/short-baseline
3 changes: 1 addition & 2 deletions slurm/claude/travelplanner-1-full.sh
Original file line number Diff line number Diff line change
Expand Up @@ -15,5 +15,4 @@

source slurm/env.sh
export VLLM_WORKER_MULTIPROC_METHOD=spawn

python bench_travelplanner.py --config full --model-class claude --large-model claude-3-5-sonnet-20241022 --small-model claude-3-5-haiku-20241022 --save-dir /nlpgpu/data/andrz/redel/experiments/travelplanner/claude/full
python bench_travelplanner.py --config full --model-class claude --large-model claude-3-5-sonnet-20241022 --small-model claude-3-5-haiku-20241022 --save-dir /nlpgpu/data/andrz/redel/experiments/travelplanner/claude/full
3 changes: 1 addition & 2 deletions slurm/claude/travelplanner-2-root-fc.sh
Original file line number Diff line number Diff line change
Expand Up @@ -15,5 +15,4 @@

source slurm/env.sh
export VLLM_WORKER_MULTIPROC_METHOD=spawn

python bench_travelplanner.py --config root-fc --model-class claude --large-model claude-3-5-sonnet-20241022 --small-model claude-3-5-haiku-20241022 --save-dir /nlpgpu/data/andrz/redel/experiments/travelplanner/claude/root-fc
python bench_travelplanner.py --config root-fc --model-class claude --large-model claude-3-5-sonnet-20241022 --small-model claude-3-5-haiku-20241022 --save-dir /nlpgpu/data/andrz/redel/experiments/travelplanner/claude/root-fc
3 changes: 1 addition & 2 deletions slurm/claude/travelplanner-3-baseline.sh
Original file line number Diff line number Diff line change
Expand Up @@ -15,5 +15,4 @@

source slurm/env.sh
export VLLM_WORKER_MULTIPROC_METHOD=spawn

python bench_travelplanner.py --config baseline --model-class claude --large-model claude-3-5-sonnet-20241022 --small-model claude-3-5-haiku-20241022 --save-dir /nlpgpu/data/andrz/redel/experiments/travelplanner/claude/baseline
python bench_travelplanner.py --config baseline --model-class claude --large-model claude-3-5-sonnet-20241022 --small-model claude-3-5-haiku-20241022 --save-dir /nlpgpu/data/andrz/redel/experiments/travelplanner/claude/baseline
3 changes: 1 addition & 2 deletions slurm/claude/travelplanner-4-small-leaf.sh
Original file line number Diff line number Diff line change
Expand Up @@ -15,5 +15,4 @@

source slurm/env.sh
export VLLM_WORKER_MULTIPROC_METHOD=spawn

python bench_travelplanner.py --config small-leaf --model-class claude --large-model claude-3-5-sonnet-20241022 --small-model claude-3-5-haiku-20241022 --save-dir /nlpgpu/data/andrz/redel/experiments/travelplanner/claude/small-leaf
python bench_travelplanner.py --config small-leaf --model-class claude --large-model claude-3-5-sonnet-20241022 --small-model claude-3-5-haiku-20241022 --save-dir /nlpgpu/data/andrz/redel/experiments/travelplanner/claude/small-leaf
3 changes: 1 addition & 2 deletions slurm/claude/travelplanner-5-small-all.sh
Original file line number Diff line number Diff line change
Expand Up @@ -15,5 +15,4 @@

source slurm/env.sh
export VLLM_WORKER_MULTIPROC_METHOD=spawn

python bench_travelplanner.py --config small-all --model-class claude --large-model claude-3-5-sonnet-20241022 --small-model claude-3-5-haiku-20241022 --save-dir /nlpgpu/data/andrz/redel/experiments/travelplanner/claude/small-all
python bench_travelplanner.py --config small-all --model-class claude --large-model claude-3-5-sonnet-20241022 --small-model claude-3-5-haiku-20241022 --save-dir /nlpgpu/data/andrz/redel/experiments/travelplanner/claude/small-all
3 changes: 1 addition & 2 deletions slurm/claude/travelplanner-6-small-baseline.sh
Original file line number Diff line number Diff line change
Expand Up @@ -15,5 +15,4 @@

source slurm/env.sh
export VLLM_WORKER_MULTIPROC_METHOD=spawn

python bench_travelplanner.py --config small-baseline --model-class claude --large-model claude-3-5-sonnet-20241022 --small-model claude-3-5-haiku-20241022 --save-dir /nlpgpu/data/andrz/redel/experiments/travelplanner/claude/small-baseline
python bench_travelplanner.py --config small-baseline --model-class claude --large-model claude-3-5-sonnet-20241022 --small-model claude-3-5-haiku-20241022 --save-dir /nlpgpu/data/andrz/redel/experiments/travelplanner/claude/small-baseline
3 changes: 1 addition & 2 deletions slurm/claude/travelplanner-7-short-context.sh
Original file line number Diff line number Diff line change
Expand Up @@ -15,5 +15,4 @@

source slurm/env.sh
export VLLM_WORKER_MULTIPROC_METHOD=spawn

python bench_travelplanner.py --config short-context --model-class claude --large-model claude-3-5-sonnet-20241022 --small-model claude-3-5-haiku-20241022 --save-dir /nlpgpu/data/andrz/redel/experiments/travelplanner/claude/short-context
python bench_travelplanner.py --config short-context --model-class claude --large-model claude-3-5-sonnet-20241022 --small-model claude-3-5-haiku-20241022 --save-dir /nlpgpu/data/andrz/redel/experiments/travelplanner/claude/short-context
3 changes: 1 addition & 2 deletions slurm/claude/travelplanner-8-short-baseline.sh
Original file line number Diff line number Diff line change
Expand Up @@ -15,5 +15,4 @@

source slurm/env.sh
export VLLM_WORKER_MULTIPROC_METHOD=spawn

python bench_travelplanner.py --config short-baseline --model-class claude --large-model claude-3-5-sonnet-20241022 --small-model claude-3-5-haiku-20241022 --save-dir /nlpgpu/data/andrz/redel/experiments/travelplanner/claude/short-baseline
python bench_travelplanner.py --config short-baseline --model-class claude --large-model claude-3-5-sonnet-20241022 --small-model claude-3-5-haiku-20241022 --save-dir /nlpgpu/data/andrz/redel/experiments/travelplanner/claude/short-baseline
38 changes: 8 additions & 30 deletions slurm/claude/travelplanner-all.sh
Original file line number Diff line number Diff line change
Expand Up @@ -15,33 +15,11 @@

source slurm/env.sh
export VLLM_WORKER_MULTIPROC_METHOD=spawn

python bench_travelplanner.py --config full --model-class claude --large-model claude-3-5-sonnet-20241022 --small-model claude-3-5-haiku-20241022 --save-dir /nlpgpu/data/andrz/redel/experiments/travelplanner/claude/full



python bench_travelplanner.py --config root-fc --model-class claude --large-model claude-3-5-sonnet-20241022 --small-model claude-3-5-haiku-20241022 --save-dir /nlpgpu/data/andrz/redel/experiments/travelplanner/claude/root-fc



python bench_travelplanner.py --config baseline --model-class claude --large-model claude-3-5-sonnet-20241022 --small-model claude-3-5-haiku-20241022 --save-dir /nlpgpu/data/andrz/redel/experiments/travelplanner/claude/baseline



python bench_travelplanner.py --config small-leaf --model-class claude --large-model claude-3-5-sonnet-20241022 --small-model claude-3-5-haiku-20241022 --save-dir /nlpgpu/data/andrz/redel/experiments/travelplanner/claude/small-leaf



python bench_travelplanner.py --config small-all --model-class claude --large-model claude-3-5-sonnet-20241022 --small-model claude-3-5-haiku-20241022 --save-dir /nlpgpu/data/andrz/redel/experiments/travelplanner/claude/small-all



python bench_travelplanner.py --config small-baseline --model-class claude --large-model claude-3-5-sonnet-20241022 --small-model claude-3-5-haiku-20241022 --save-dir /nlpgpu/data/andrz/redel/experiments/travelplanner/claude/small-baseline



python bench_travelplanner.py --config short-context --model-class claude --large-model claude-3-5-sonnet-20241022 --small-model claude-3-5-haiku-20241022 --save-dir /nlpgpu/data/andrz/redel/experiments/travelplanner/claude/short-context



python bench_travelplanner.py --config short-baseline --model-class claude --large-model claude-3-5-sonnet-20241022 --small-model claude-3-5-haiku-20241022 --save-dir /nlpgpu/data/andrz/redel/experiments/travelplanner/claude/short-baseline
python bench_travelplanner.py --config full --model-class claude --large-model claude-3-5-sonnet-20241022 --small-model claude-3-5-haiku-20241022 --save-dir /nlpgpu/data/andrz/redel/experiments/travelplanner/claude/full
python bench_travelplanner.py --config root-fc --model-class claude --large-model claude-3-5-sonnet-20241022 --small-model claude-3-5-haiku-20241022 --save-dir /nlpgpu/data/andrz/redel/experiments/travelplanner/claude/root-fc
python bench_travelplanner.py --config baseline --model-class claude --large-model claude-3-5-sonnet-20241022 --small-model claude-3-5-haiku-20241022 --save-dir /nlpgpu/data/andrz/redel/experiments/travelplanner/claude/baseline
python bench_travelplanner.py --config small-leaf --model-class claude --large-model claude-3-5-sonnet-20241022 --small-model claude-3-5-haiku-20241022 --save-dir /nlpgpu/data/andrz/redel/experiments/travelplanner/claude/small-leaf
python bench_travelplanner.py --config small-all --model-class claude --large-model claude-3-5-sonnet-20241022 --small-model claude-3-5-haiku-20241022 --save-dir /nlpgpu/data/andrz/redel/experiments/travelplanner/claude/small-all
python bench_travelplanner.py --config small-baseline --model-class claude --large-model claude-3-5-sonnet-20241022 --small-model claude-3-5-haiku-20241022 --save-dir /nlpgpu/data/andrz/redel/experiments/travelplanner/claude/small-baseline
python bench_travelplanner.py --config short-context --model-class claude --large-model claude-3-5-sonnet-20241022 --small-model claude-3-5-haiku-20241022 --save-dir /nlpgpu/data/andrz/redel/experiments/travelplanner/claude/short-context
python bench_travelplanner.py --config short-baseline --model-class claude --large-model claude-3-5-sonnet-20241022 --small-model claude-3-5-haiku-20241022 --save-dir /nlpgpu/data/andrz/redel/experiments/travelplanner/claude/short-baseline
13 changes: 10 additions & 3 deletions slurm/claude/webarena-1-full.sh
Original file line number Diff line number Diff line change
Expand Up @@ -7,15 +7,22 @@
#SBATCH --time=7-0
#SBATCH --nodes=1
#SBATCH -c 1
#SBATCH --mem=32G
#SBATCH --mem=256G
#SBATCH --gpus=0
#SBATCH [email protected]
#SBATCH --mail-type=END,FAIL
#SBATCH --nodelist=nlpgpu05


source slurm/env.sh
export VLLM_WORKER_MULTIPROC_METHOD=spawn
dockerd-rootless.sh &
DOCKER_PID=$!
sleep 15
source slurm/webarena-env.sh
bash slurm/webarena-startup.sh
sleep 600
curl -X GET ${RESTART_URL}
sleep 300
python bench_webarena.py --config full --model-class claude --large-model claude-3-5-sonnet-20241022 --small-model claude-3-5-haiku-20241022 --save-dir /nlpgpu/data/andrz/redel/experiments/webarena/claude/full
sleep 600
python bench_webarena.py --config full --model-class claude --large-model claude-3-5-sonnet-20241022 --small-model claude-3-5-haiku-20241022 --save-dir /nlpgpu/data/andrz/redel/experiments/webarena/claude/full
kill $DOCKER_PID
13 changes: 10 additions & 3 deletions slurm/claude/webarena-2-root-fc.sh
Original file line number Diff line number Diff line change
Expand Up @@ -7,15 +7,22 @@
#SBATCH --time=7-0
#SBATCH --nodes=1
#SBATCH -c 1
#SBATCH --mem=32G
#SBATCH --mem=256G
#SBATCH --gpus=0
#SBATCH [email protected]
#SBATCH --mail-type=END,FAIL
#SBATCH --nodelist=nlpgpu05


source slurm/env.sh
export VLLM_WORKER_MULTIPROC_METHOD=spawn
dockerd-rootless.sh &
DOCKER_PID=$!
sleep 15
source slurm/webarena-env.sh
bash slurm/webarena-startup.sh
sleep 600
curl -X GET ${RESTART_URL}
sleep 300
python bench_webarena.py --config root-fc --model-class claude --large-model claude-3-5-sonnet-20241022 --small-model claude-3-5-haiku-20241022 --save-dir /nlpgpu/data/andrz/redel/experiments/webarena/claude/root-fc
sleep 600
python bench_webarena.py --config root-fc --model-class claude --large-model claude-3-5-sonnet-20241022 --small-model claude-3-5-haiku-20241022 --save-dir /nlpgpu/data/andrz/redel/experiments/webarena/claude/root-fc
kill $DOCKER_PID
13 changes: 10 additions & 3 deletions slurm/claude/webarena-3-baseline.sh
Original file line number Diff line number Diff line change
Expand Up @@ -7,15 +7,22 @@
#SBATCH --time=7-0
#SBATCH --nodes=1
#SBATCH -c 1
#SBATCH --mem=32G
#SBATCH --mem=256G
#SBATCH --gpus=0
#SBATCH [email protected]
#SBATCH --mail-type=END,FAIL
#SBATCH --nodelist=nlpgpu05


source slurm/env.sh
export VLLM_WORKER_MULTIPROC_METHOD=spawn
dockerd-rootless.sh &
DOCKER_PID=$!
sleep 15
source slurm/webarena-env.sh
bash slurm/webarena-startup.sh
sleep 600
curl -X GET ${RESTART_URL}
sleep 300
python bench_webarena.py --config baseline --model-class claude --large-model claude-3-5-sonnet-20241022 --small-model claude-3-5-haiku-20241022 --save-dir /nlpgpu/data/andrz/redel/experiments/webarena/claude/baseline
sleep 600
python bench_webarena.py --config baseline --model-class claude --large-model claude-3-5-sonnet-20241022 --small-model claude-3-5-haiku-20241022 --save-dir /nlpgpu/data/andrz/redel/experiments/webarena/claude/baseline
kill $DOCKER_PID
Loading

0 comments on commit 3b50a13

Please sign in to comment.