diff --git a/presets/ClassControl/Acrobot-v1/Acrobot-v1_DQN.yaml b/presets/ClassControl/Acrobot-v1/Acrobot-v1_DQN.yaml new file mode 100644 index 0000000..3cc6cdb --- /dev/null +++ b/presets/ClassControl/Acrobot-v1/Acrobot-v1_DQN.yaml @@ -0,0 +1,47 @@ +general_cfg: + joyrl_version: 0.6.2.5 + algo_name: DQN + env_name: gym + interactor_device: cpu + learner_device: cpu + mode: train + is_learner_async: false + collect_traj: false + n_interactors: 1 + load_checkpoint: true + load_path: Train_Acrobot-v1_DQN_2024613 + load_model_step: best + max_episode: -1 + max_step: 200 + seed: 1 + online_eval: true + online_eval_episode: 10 + model_save_fre: 500 + policy_summary_fre: 100 + +algo_cfg: + learn_frequency: 1 + value_layers: + - layer_type: linear + layer_dim: [6,256] + activation: relu + - layer_type: linear + layer_dim: [256,256] + activation: relu + - layer_type: linear + layer_dim: [256,3] + activation: none + batch_size: 64 + max_buffer_size: 100000 + epsilon_decay: 500 + epsilon_end: 0.01 + epsilon_start: 0.95 + gamma: 0.99 + lr: 0.0001 + target_update: 4 + +env_cfg: + id: Acrobot-v1 + render_mode: null + wrappers: + - wrapper_name: GymDiscreteActionWrapper diff --git a/presets/ClassControl/Acrobot-v1/Acrobot-v1_DQN_Test.yaml b/presets/ClassControl/Acrobot-v1/Acrobot-v1_DQN_Test.yaml deleted file mode 100644 index ad60582..0000000 --- a/presets/ClassControl/Acrobot-v1/Acrobot-v1_DQN_Test.yaml +++ /dev/null @@ -1,22 +0,0 @@ -general_cfg: - algo_name: DQN - device: cuda - env_name: Acrobot-v1 - mode: test - load_checkpoint: true - load_path: Train_Acrobot-v1_DQN_20221122-120436 - max_steps: 100000 - save_fig: true - seed: 1 - show_fig: false - test_eps: 10 - train_eps: 100 -algo_cfg: - batch_size: 64 - max_buffer_size: 100000 - epsilon_decay: 500 - epsilon_end: 0.01 - epsilon_start: 0.95 - gamma: 0.95 - lr: 0.002 - target_update: 4 diff --git a/presets/ClassControl/Acrobot-v1/Acrobot-v1_DQN_Train.yaml b/presets/ClassControl/Acrobot-v1/Acrobot-v1_DQN_Train.yaml deleted file mode 100644 index 425c0b3..0000000 --- a/presets/ClassControl/Acrobot-v1/Acrobot-v1_DQN_Train.yaml +++ /dev/null @@ -1,32 +0,0 @@ -general_cfg: - algo_name: DQN - device: cuda - env_name: Acrobot-v1 - mode: train - load_checkpoint: false - load_path: Train_CartPole-v1_DQN_20221026-054757 - max_steps: 200 - save_fig: true - seed: 1 - show_fig: false - test_eps: 10 - train_eps: 100 -algo_cfg: - value_layers: - - layer_type: linear - layer_dim: ['n_states',256] - activation: relu - - layer_type: linear - layer_dim: [256,256] - activation: relu - - layer_type: linear - layer_dim: [256,'n_actions'] - activation: none - batch_size: 64 - max_buffer_size: 100000 - epsilon_decay: 500 - epsilon_end: 0.01 - epsilon_start: 0.95 - gamma: 0.99 - lr: 0.0001 - target_update: 4 diff --git a/presets/ClassControl/MountainCar-v0/MountainCar-v0_DQN.yaml b/presets/ClassControl/MountainCar-v0/MountainCar-v0_DQN.yaml new file mode 100644 index 0000000..fda5588 --- /dev/null +++ b/presets/ClassControl/MountainCar-v0/MountainCar-v0_DQN.yaml @@ -0,0 +1,40 @@ +general_cfg: + joyrl_version: 0.6.2.5 + algo_name: DQN + env_name: gym + interactor_device: cpu + learner_device: cpu + mode: train + is_learner_async: false + collect_traj: false + n_interactors: 1 + load_checkpoint: false + load_path: "Train_MountainCar-v0_DQN" + load_model_step: "best" + max_episode: -1 + max_step: 200 + seed: 1 + online_eval: true + online_eval_episode: 10 +algo_cfg: + learn_frequency: 1 + value_layers: + - layer_type: linear + layer_size: [256] + activation: relu + - layer_type: linear + layer_dim: [128] + activation: none + batch_size: 64 + max_buffer_size: 10000 + epsilon_decay: 1500 + epsilon_end: 0.01 + epsilon_start: 0.98 + gamma: 0.98 + lr: 0.001 + target_update: 10 +env_cfg: + id: MountainCar-v0 + render_mode: null + wrappers: + - wrapper_name: GymDiscreteActionWrapper diff --git a/presets/ClassControl/MountainCar-v0/MountainCar-v0_DQN_Test.yaml b/presets/ClassControl/MountainCar-v0/MountainCar-v0_DQN_Test.yaml deleted file mode 100644 index b895edb..0000000 --- a/presets/ClassControl/MountainCar-v0/MountainCar-v0_DQN_Test.yaml +++ /dev/null @@ -1,45 +0,0 @@ -general_cfg: - algo_name: DQN - device: cpu - env_name: MountainCar-v0 - eval_eps: 10 - eval_per_episode: 5 - load_checkpoint: true - load_path: Train_MountainCar-v0_DQN_20230404-211546 - max_steps: 200 - mode: test - new_step_api: true - render: true - save_fig: true - seed: 3 - show_fig: false - test_eps: 20 - train_eps: 200 - wrapper: null - render_mode: rgb_array - n_workers: 1 -algo_cfg: - batch_size: 64 - max_buffer_size: 40000 - epsilon_decay: 800 - epsilon_end: 0.01 - epsilon_start: 0.99 - gamma: 0.99 - lr: 0.01 - target_update: 4 - value_layers: - - activation: relu - layer_dim: - - n_states - - 256 - layer_type: linear - - activation: relu - layer_dim: - - 256 - - 256 - layer_type: linear - - activation: none - layer_dim: - - 256 - - n_actions - layer_type: linear diff --git a/presets/ClassControl/MountainCar-v0/MountainCar-v0_DQN_Train.yaml b/presets/ClassControl/MountainCar-v0/MountainCar-v0_DQN_Train.yaml deleted file mode 100644 index e4e8d36..0000000 --- a/presets/ClassControl/MountainCar-v0/MountainCar-v0_DQN_Train.yaml +++ /dev/null @@ -1,44 +0,0 @@ -general_cfg: - algo_name: DQN - device: cpu - env_name: MountainCar-v0 - eval_eps: 10 - eval_per_episode: 5 - load_checkpoint: false - load_path: tasks - max_steps: 200 - mode: train - new_step_api: true - render: false - save_fig: true - seed: 3 - show_fig: false - test_eps: 20 - train_eps: 200 - wrapper: null - n_workers: 1 -algo_cfg: - batch_size: 64 - max_buffer_size: 40000 - epsilon_decay: 800 - epsilon_end: 0.01 - epsilon_start: 0.99 - gamma: 0.99 - lr: 0.01 - target_update: 4 - value_layers: - - activation: relu - layer_dim: - - n_states - - 256 - layer_type: linear - - activation: relu - layer_dim: - - 256 - - 256 - layer_type: linear - - activation: none - layer_dim: - - 256 - - n_actions - layer_type: linear