-
Notifications
You must be signed in to change notification settings - Fork 2
/
Copy pathmain.py
executable file
·161 lines (130 loc) · 6.79 KB
/
main.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
import pdb
import scipy.io as sio
import numpy as np
import pickle
from utils import *
from MOMDP import MOMDP, MOMDP_TOQ, MOMDP_TO, MOMDP_Q, MOMDP_TOQ_notVectorized
import os
def main():
load = 1 # 0 = compute policy and save policy object, 1 = load policy object, -1 = compute policy but DO NOT save it
digitsResults = 6 # number of digits to print, just for visual output
printLevel = 3 # 0 = only value function update and results, 1 = minimal, 2 = verbose
# Evaluate single policy. Details in evaluateSinglePolicy() function
discOpt = 1
evaluateSinglePolicy(load, digitsResults, printLevel, discOpt)
# # Uncomment the following line to run benckmarks against standard strategies
# # Save .txt file with results for Table I
# discOpt = 1 # 1 = 2^{n_u} + n_u belief points, 2 = 2(2^{n_u}) belief points
# gridWorldList = ['5x5' , '10x5', '15x15' ]
# obstList = [3, 4]
# policyList = ['TO', 'Q', 'TOQ']
# evaluateAllPolicies(load, digitsResults,printLevel, discOpt, gridWorldList, obstList, policyList)
# # Save .txt file with results for Table II
# discOpt = 2 # 1 = 2^{n_u} + n_u belief points, 2 = 2(2^{n_u}) belief points
# gridWorldList = ['15x15' ]
# obstList = [4]
# evaluateAllPolicies(load, digitsResults,printLevel, discOpt, gridWorldList, obstList, policyList)
def evaluateSinglePolicy(load, digitsResults, printLevel, discOpt):
gridWorld = '7x7ug'
numObst = 2
policy = 'TOQ'
# gridWorld = '5x5ug'
# numObst = 3
# policy = 'TOQ'
momdp = getMOMDP(gridWorld, numObst, policy, printLevel, load, discOpt, unGoal = True)
# # Evaluate expected cost and probability of failure
# results = runAllSim(momdp, gridWorld, numObst, policy, printLevel, digitsResults)
# Run a simulation for an environment realization which is defined in the function loadParameters() from the file utils.py
results = runSim(momdp, gridWorld, numObst, policy, printLevel, digitsResults)
def evaluateAllPolicies(load, digitsResults, printLevel, discOpt, gridWorldList, obstList, policyList):
resultsList = []
for gridWorld in gridWorldList:
for numObst in obstList:
for policy in policyList:
momdp = getMOMDP(gridWorld, numObst, policy, printLevel, load, discOpt)
results = runAllSim(momdp, gridWorld, numObst, policy, printLevel, digitsResults)
resultsList.append(results)
# Save and print to screen the results
print("====== Results for expected cost and prob sat specs")
fileToWrite = open("result_expected_cost_Table_"+str(discOpt)+".txt","w")
for i in range(0, len(resultsList)):
print(resultsList[i][0])
fileToWrite.writelines(resultsList[i][0]+'\n')
fileToWrite.close() #to change file access modes
fileToWrite = open("result_expected_time_Table_"+str(discOpt)+".txt","w")
print("====== Results for expected time and prob of failure")
for i in range(0, len(resultsList)):
print(resultsList[i][1])
fileToWrite.writelines(resultsList[i][1]+'\n')
fileToWrite.close() #to change file access modes
def getMOMDP(gridWorld, numObst, policy, printLevel, load, discOpt, unGoal = False):
totTimeSteps, _, _ = loadParameters(gridWorld, numObst, unGoal)
if unGoal == False:
directory = 'data/'+policy+'_'+str(discOpt)+'/'
fileName = 'MOMDP_obj_'+gridWorld+'_'+str(numObst)+'.pkl'
else:
directory = 'data/'+policy+'ug_'+str(discOpt)+'/'
fileName = 'MOMDP_obj_'+gridWorld+'_'+str(numObst)+'.pkl'
if not os.path.exists(directory):
os.makedirs(directory)
if load <= 0: # If load <= 0 compute the policy and store it if load == 0
gridVar = loadGrid(gridWorld+'_'+str(numObst))
if policy == 'TOQ':
# momdp = MOMDP_TOQ_notVectorized(gridVar, totTimeSteps,printLevel, policy, discOpt)
momdp = MOMDP_TOQ(gridVar, totTimeSteps,printLevel, policy, discOpt, unGoal)
elif policy == 'Q':
momdp = MOMDP_Q(gridVar, totTimeSteps,printLevel, policy, discOpt)
elif policy == 'TO':
momdp = MOMDP_TO(gridVar, totTimeSteps,printLevel, policy, discOpt)
if load == 0:
pickle_out = open(directory+fileName,"wb")
pickle.dump(momdp, pickle_out, protocol=4)
pickle_out.close()
else:
pickle_in = open(directory+fileName,"rb")
momdp = pickle.load(pickle_in)
return momdp
def runSim(momdp, gridWorld, numObst, policy, printLevel, digits):
totTimeSteps, loc, initBelief = loadParameters(gridWorld, numObst, momdp.unGoal)
bt = [momdp.initBelief(initBelief)] # initial belief
xt = [0] # initial state
V_t0, Vrealized, J_t0, failure,tRun, xt, bt = eveluateMOMDP(momdp, loc, initBelief, xt, bt, printLevel)
plotFun(momdp, xt, bt)
# Print and store results
print("================ Final Results for ", policy, " Policy in ", gridWorld, " grid world with ", numObst, "obstacles.")
print("Vrealized: ", Vrealized, " and V0: ", V_t0)
print("Prob Sat Spec: ", 1 - failure, " and lower bound: ", J_t0)
print("Time: ", tRun)
print("Policy synthesis time: ", momdp.totTime, " Avg backup time: ", momdp.avgBackupTime)
def runAllSim(momdp, gridWorld, numObst, policy, printLevel, digits):
totTimeSteps, _, initBelief = loadParameters(gridWorld, numObst, momdp.unGoal)
probFailure = 0
expCost = 0
expTime = 0
for i in range(0,len(momdp.comb)): # loop over all possible 2^numObst obstacle configurations
loc = momdp.comb[i] # initialize true obstacle location
bt = [momdp.initBelief(initBelief)] # initial belief
xt = [0] # initial state
V_t0, Vrealized, J_t0, failure,tRun, _, _ = eveluateMOMDP(momdp, loc, initBelief, xt, bt, printLevel)
probFailure += failure*bt[0][i] # add prob failure times probability of this scenario
expCost += Vrealized*bt[0][i] # add cost times probability of this scenario
expTime += tRun*bt[0][i]*(failure==0)
# Print and store results
print("================ Final Results for ", policy, " Policy in ", gridWorld, " grid world with ", numObst, "obstacles.")
print("Expected cost: ", expCost, " and V0: ", V_t0)
print("Prob Sat Spec: ", 1 - probFailure, " and lower bound: ", J_t0)
print("Expected time: ", expTime)
print("Prob Failure: ", probFailure, " and upper bound: ", 1-J_t0)
print("Policy synthesis time: ", momdp.totTime, " Avg backup time: ", momdp.avgBackupTime)
if policy == 'TO': policy = 'TO '
if policy == 'Q': policy = 'Q '
stringTest = policy+"_"+gridWorld+"_"+str(numObst)
stringCost = " || ExpCost: "+str(round(expCost,digits))+", V0: "+str(round(V_t0,digits))
stringProb = " || P specs: "+str(round(1-probFailure,digits))+", J0: "+str(round(J_t0,digits))
stringTime = " || Tot Time: "+str(round(momdp.totTime,digits))+", backup time: "+str(round(momdp.avgBackupTime,digits))
stringExpT = " || ExpTime: "+str(round(expTime,digits))
stringFail = " || P fail: "+str(round(probFailure,digits))+" and upper-bound 1-J0: "+str(round(1-J_t0,digits))
return [stringTest+stringCost+stringProb+stringTime, stringTest+stringExpT+stringFail+stringTime]
if __name__ == "__main__":
# execute only if run as a script
main()