import os import numpy as np class InfiniteHorizonMMDP: def __init__(self): self.nmodels = 0 self.nstates = 0 self.nactions = 0 def __init__(self, filename, discount_factor_in, epsilon_in): f = open(filename, "r") contents = f.read().split() # Read in all the contents of the file and delimit by space self.nstates, self.nactions, self.nmodels = int(contents[0]), int(contents[1]), int(contents[2]) self.weights = np.zeros(self.nmodels) self.init_dist = np.zeros((self.nmodels, self.nstates)) self.rewards = np.zeros((self.nmodels, self.nstates, self.nactions)) self.tps = np.zeros((self.nmodels, self.nstates, self.nactions, self.nstates)) self.discount_factor = discount_factor_in self.epsilon = epsilon_in weights_data = contents[4:4 + self.nmodels] for m in range(0, len(weights_data)): self.weights[m] = float(weights_data[m]) contents = contents[5 + self.nmodels:] init_dist_data = contents[0: self.nmodels * self.nstates] for i in range(len(init_dist_data)): self.init_dist[int(i/self.nstates), int(i % self.nstates)] = float(init_dist_data[i]) contents = contents[self.nmodels*self.nstates + 1:] reward_data = contents[0: self.nmodels * self.nstates * self.nactions] for m in range(0, self.nmodels): for s in range(0, self.nstates): for a in range(0, self.nactions): self.rewards[m, s, a] = float(reward_data[m * s * a]) contents = contents[self.nmodels * self.nstates * self.nactions + 1:] for m in range(0, self.nmodels): for s1 in range(0, self.nstates): for a in range(0, self.nactions): for s2 in range(0, self.nstates): self.tps[m, s1, a, s2] = float(contents[m * s1 * a * s2]) if __name__ == '__main__': filename = "HIV_model.txt" # Change filename as desired mmdp = InfiniteHorizonMMDP(filename, 0.97, 1) # Current discount factor = 0.97 and epsilon = 1