-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathnotebook_randlookups.py
91 lines (73 loc) · 2.64 KB
/
notebook_randlookups.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
# Copyright (c) Microsoft Corporation.
# Licensed under the MIT License.
"""Random exploration with credential lookup exploitation (notebook)
This notebooks can be run directly from VSCode, to generate a
traditional Jupyter Notebook to open in your browser
you can run the VSCode command `Export Currenty Python File As Jupyter Notebook`.
"""
# pylint: disable=invalid-name
# %%
import sys
sys.path.append('/home/windy/Desktop/experiment/CyberBattleSim')
from cyberbattle._env.cyberbattle_env import AttackerGoal
from cyberbattle.agents.baseline.agent_randomcredlookup import CredentialCacheExploiter
import cyberbattle.agents.baseline.learner as learner
import gym
import logging
import sys
import cyberbattle.agents.baseline.plotting as p
import cyberbattle.agents.baseline.agent_wrapper as w
from cyberbattle.agents.baseline.agent_wrapper import Verbosity
from cyberbattle._env.cyberbattle_env import AttackerGoal, DefenderConstraint
from cyberbattle._env.defender1 import LearningDefender1
from cyberbattle._env.defender import ScanAndReimageCompromisedMachines
# %%
logging.basicConfig(stream=sys.stdout, level=logging.ERROR, format="%(levelname)s: %(message)s")
# %%
cyberbattlechain_10 = gym.make('CyberBattleChain-v0', size=10,
attacker_goal=AttackerGoal(own_atleast_percent=1.0),defender_constraint=DefenderConstraint(maintain_sla=0.80),defender_agent=LearningDefender1())
# %%
ep = w.EnvironmentBounds.of_identifiers(
maximum_total_credentials=12,
maximum_node_count=12,
identifiers=cyberbattlechain_10.identifiers
)
iteration_count = 9000
training_episode_count = 50
eval_episode_count = 5
# %%
credexplot = learner.epsilon_greedy_search(
cyberbattlechain_10,
learner=CredentialCacheExploiter(),
environment_properties=ep,
episode_count=training_episode_count,
iteration_count=iteration_count,
epsilon=0.90,
render=False,
epsilon_multdecay=0.75, # 0.999,
epsilon_minimum=0.01,
verbosity=Verbosity.Quiet,
title="Random+CredLookup"
)
# %%
randomlearning_results = learner.epsilon_greedy_search(
cyberbattlechain_10,
environment_properties=ep,
learner=CredentialCacheExploiter(),
episode_count=eval_episode_count,
iteration_count=iteration_count,
epsilon=1.0, # purely random
render=False,
verbosity=Verbosity.Quiet,
title="Random search"
)
# %%
p.plot_episodes_length([credexplot])
p.plot_all_episodes(credexplot)
all_runs = [credexplot,
randomlearning_results
]
p.plot_averaged_cummulative_rewards(
title=f'Benchmark -- max_nodes={ep.maximum_node_count}, episodes={eval_episode_count},\n',
all_runs=all_runs)
# %%