-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathrun_generate.py
66 lines (56 loc) · 2.59 KB
/
run_generate.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
from utils.predefine import MODEL_DIR, DATASET_DIR, TEST_DIR
import os
import argparse
import random
from utils.feedback import virturalhome_feedback, world_model_feedback
from equilibrium_model.generate_equilibrium import generate
parser = argparse.ArgumentParser()
parser.add_argument('--model_path', type=str,
default='llama_3_8b_instruct_equilibrium_planner')
parser.add_argument('--data_path', type=str,
default='virtualhome_env_equilibrium_planner.p')
parser.add_argument('--save_path', type=str,
default='generate_equilibrium_planner')
parser.add_argument('--gpu', type=int, default=0)
parser.add_argument('--seed', type=int, default=42)
# generate
parser.add_argument('--max_generate_length', type=int, default=350)
# self-refine baseline
parser.add_argument('--self_refine_only', action='store_true')
# ours equilibrium model
parser.add_argument('--convergence', type=int, default=20)
parser.add_argument('--init_top_k', type=int, default=10)
parser.add_argument('--refine_top_k', type=int, default=1)
# feedback
parser.add_argument('--max_depth', type=int, default=4)
parser.add_argument('--world_model_feedback', action='store_true')
parser.add_argument('--mixed_feedback', action='store_true')
parser.add_argument('--world_model_path', type=str,
default='llama_3_8b_instruct_world_model')
parser.add_argument('--world_model_gpu', type=int, default=0)
parser.add_argument('--world_model_max_generate_length', type=int, default=200)
parser.add_argument('--inner_world_model', type=int, default=1)
args = parser.parse_args()
random.seed(args.seed)
args.model_path = os.path.join(MODEL_DIR, args.model_path)
args.world_model_path = os.path.join(MODEL_DIR, args.world_model_path)
args.data_path = os.path.join(DATASET_DIR, args.data_path)
args.save_path = os.path.join(TEST_DIR, args.save_path)
if not os.path.exists(args.save_path):
os.makedirs(args.save_path)
if args.mixed_feedback:
args.world_model_feedback = False
if args.world_model_feedback:
world_model = world_model_feedback(args)
environment = None
elif args.mixed_feedback:
world_model = world_model_feedback(args)
environment = virturalhome_feedback
else:
world_model = None
environment = virturalhome_feedback
last_feedback_path = os.path.join(args.save_path, 'generate_data.p')
generate(args, args.model_path, last_feedback_path,
splits=['test_new_task', 'test_new_scene', 'test_new_task_and_new_scene'],
environment=environment, generate_iters=args.max_depth,
world_model=world_model, inner_world_iter=args.inner_world_model)