-
Notifications
You must be signed in to change notification settings - Fork 0
/
Simulator.py
97 lines (78 loc) · 3.63 KB
/
Simulator.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
import numpy as np
from scipy import stats
import os
class Simulator:
def __init__(self,
seed = 101,
N_prod = 2,
Tstamp = 20,
price = np.array([[1.5, 1.5]]),
costQ = np.array([[0.1, 0.1]]),
costInv = np.array([[0.2, 0.2]]),
costLastInv = np.array([[1, 1]]),
costBack = np.array([[0.5, 0.5]]) ):
self.seed = seed
self.N_prod = N_prod
self.Tstamp = Tstamp
self.price = price
self.costQ = costQ
self.costInv = costInv
self.costLastInv = costLastInv
self.costBack = costBack
def obs_dim(self):
return self.N_prod
def act_dim(self):
# add another axis in the future
return self.N_prod
def demandGenerator(self, mu, cov, seed = 123):
# generate multivariate log normal random variables
# mu should have shape of N_prod * 1
# cov should have shape of N_prod * N_prod
np.random.seed(seed)
self.demandVec = np.exp(np.random.multivariate_normal(mean = np.log(mu), cov = cov, size = self.Tstamp))
return self.demandVec
def actionSpace(self):
# can be changed
return 0, 10.0
def randomActionGenerator(self, Nsample = 1):
minAction, maxAction = self.actionSpace()
return np.random.uniform(low = minAction, high = maxAction, size = (Nsample, self.N_prod))
def stateSpace(self):
return -10., 10.
def randomStateGenerator(self, Nsample = 1):
minInv, maxInv = self.stateSpace()
return np.random.uniform(low = minInv, high = maxInv, size = (Nsample, self.N_prod))
def randomInitialStateGenerator(self):
_, maxInv = self.stateSpace()
return np.random.uniform(low = 0, high = maxInv, size = (1, self.N_prod))
def _reward(self, action, inventory, demand, last = False):
# here action is a vector of q (1 by N) and is the action for timestamp t
# inventory has shape 1 by N and is the inventory for timestamp (t+1)
# demand has shape 1 by N and is the inventory for timestamp (t)
if last:
return (self.price.dot(demand.T) - (self.costQ.dot(action.T) +
self.costLastInv.dot(np.maximum(np.zeros(shape = inventory.shape), inventory).T) -
self.costBack.dot(np.minimum(np.zeros(shape = inventory.shape), inventory).T)))[0][0]
else:
return (self.price.dot(demand.T) - (self.costQ.dot(action.T) +
self.costInv.dot(np.maximum(np.zeros(shape = inventory.shape), inventory).T) -
self.costBack.dot(np.minimum(np.zeros(shape = inventory.shape), inventory).T)))[0][0]
def nextInventory(self, curInventory, curAction, curDemand):
return curInventory + curAction - curDemand
def step(self, action, currentInventory, curDemand, last):
nextInventory = self.nextInventory(currentInventory, action, curDemand)
reward = self._reward(action, nextInventory, curDemand, last)
return nextInventory, reward
# test
# env = Simulator(seed = 1)
# demand = env.demandGenerator(mu = np.array([5,5]), cov = np.diag(np.array([0.1, 0.1])))
# randomAction = env.randomActionGenerator()
# initState = env.randomStateGenerator()
# rew = env._reward(env.randomActionGenerator(), env.randomStateGenerator(), demand[0,:], False)
# rew2 = env._reward(env.randomActionGenerator(), env.randomStateGenerator(), demand[0,:], True)
# print(rew)
# print(rew2)
# print(randomAction)
# print(initState)
# print(demand)
# print(demand[0,:])