From 08fed3651fa6dd7157d899df7528bdf949563289 Mon Sep 17 00:00:00 2001 From: Kailigithub <100wkl@163.com> Date: Mon, 29 Jul 2024 10:08:56 +0800 Subject: [PATCH] [benchmark] add PPO-MountainCarContinuous-v0 --- .../MountainCarContinuous-v0_PPO-test.yaml | 62 ++++++++++++++++++ .../MountainCarContinuous-v0_PPO.yaml | 64 +++++++++++++++++++ 2 files changed, 126 insertions(+) create mode 100644 presets/ClassControl/MountainCarContinuous-v0/MountainCarContinuous-v0_PPO-test.yaml create mode 100644 presets/ClassControl/MountainCarContinuous-v0/MountainCarContinuous-v0_PPO.yaml diff --git a/presets/ClassControl/MountainCarContinuous-v0/MountainCarContinuous-v0_PPO-test.yaml b/presets/ClassControl/MountainCarContinuous-v0/MountainCarContinuous-v0_PPO-test.yaml new file mode 100644 index 0000000..d8122b9 --- /dev/null +++ b/presets/ClassControl/MountainCarContinuous-v0/MountainCarContinuous-v0_PPO-test.yaml @@ -0,0 +1,62 @@ +general_cfg: + joyrl_version: 0.6.5.1 + algo_name: PPO + device: cpu + env_name: gym + interactor device: cuda + learner device: cuda + mode: test + exps_trucation_size: 1024 + is_learner_async: false + load_checkpoint: true + load_path: Train_MountainCarContinuous-v0_PPO_20240715-161812 + load_model_step: best + n_interactors: 20 + max_episode: -1 + max_step: -1 + seed: 1 + reward_threshold: 90 + online_eval: true + online_eval_episode: 20 + model_save_fre: 10000 + policy_summary_fre: 5000 + interact_summary_fre: 5000 +algo_cfg: + actor_branch_layers: + - name: action + layers: + - layer_type: linear + layer_size: [256] + activation: tanh + - layer_type: linear + layer_size: [256] + activation: tanh + + critic_branch_layers: + - name: critic + layers: + - layer_type: linear + layer_size: [256] + activation: relu + - layer_type: linear + layer_size: [256] + activation: relu + buffer_type: REPLAY_QUE + max_buffer_size: 100000 + action_type_list: continuous + lr: 0.0003 + actor_lr: 0.003 + critic_lr: 0.005 + entropy_coef: 0.003 + critic_loss_coef: 0.5 + eps_clip: 0.2 + gamma: 0.99 + return_form: mc + gae_lambda: 0.95 + k_epochs: 4 + batch_size: 64 +env_cfg: + id: MountainCarContinuous-v0 + render_mode: human + wrappers: + - wrapper_name: MultiHeadObsWrapper diff --git a/presets/ClassControl/MountainCarContinuous-v0/MountainCarContinuous-v0_PPO.yaml b/presets/ClassControl/MountainCarContinuous-v0/MountainCarContinuous-v0_PPO.yaml new file mode 100644 index 0000000..1104fcf --- /dev/null +++ b/presets/ClassControl/MountainCarContinuous-v0/MountainCarContinuous-v0_PPO.yaml @@ -0,0 +1,64 @@ +general_cfg: + joyrl_version: 0.6.5.1 + algo_name: PPO + env_name: gym + device: cpu + interactor device: cuda + learner device: cuda + mode: train + exps_trucation_size: 1024 + is_learner_async: false + load_checkpoint: false + load_path: Train_MountainCar-v0_PPO_20240618-192707 + load_model_step: best + n_interactors: 1 + n_learners: 1 + max_episode: -1 + max_step: -1 + seed: 1 + reward_threshold: 90 + online_eval: true + online_eval_episode: 20 + model_save_fre: 10000 + policy_summary_fre: 10000 + interact_summary_fre: 10000 +algo_cfg: + actor_branch_layers: + - name: action + layers: + - layer_type: linear + layer_size: [256] + activation: tanh + - layer_type: linear + layer_size: [256] + activation: tanh + + critic_branch_layers: + - name: critic + layers: + - layer_type: linear + layer_size: [256] + activation: relu + - layer_type: linear + layer_size: [256] + activation: relu + buffer_type: REPLAY_QUE + max_buffer_size: 100000 + action_type_list: continuous + lr: 0.0003 + actor_lr: 0.003 + critic_lr: 0.005 + entropy_coef: 0.003 + critic_loss_coef: 0.5 + eps_clip: 0.2 + gamma: 0.99 + return_form: mc + gae_lambda: 0.95 + k_epochs: 4 + batch_size: 64 + action_std_bias: 0.8 +env_cfg: + id: MountainCarContinuous-v0 + render_mode: null + wrappers: + - wrapper_name: MultiHeadObsWrapper