-
-
Notifications
You must be signed in to change notification settings - Fork 3
/
chan_and_shelton.py
394 lines (319 loc) · 14.1 KB
/
chan_and_shelton.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
"""
An implementation of the basic model described in Chan, Nicholas Tung, and Christian Shelton.
"An electronic market-maker." (2001).
"""
import numpy as np
import pandas as pd
"""
Simulation events
"""
EVENT_PRICE_CHANGE_UP = 0
EVENT_PRICE_CHANGE_DOWN = 1
EVENT_UNINFORMED_BUY = 2
EVENT_UNINFORMED_SELL = 3
EVENT_INFORMED_ARRIVAL = 4
EVENT_ALL = \
[EVENT_PRICE_CHANGE_DOWN, EVENT_PRICE_CHANGE_UP, EVENT_UNINFORMED_SELL,
EVENT_UNINFORMED_BUY, EVENT_INFORMED_ARRIVAL]
"""
Default simulation parameters.
"""
INITIAL_PRICE = 200
MAX_T = 150
# \lambda_p in the paper
PROB_PRICE = 0.2
PROB_PRICE_UP = PROB_PRICE
PROB_PRICE_DOWN = PROB_PRICE
# \lambda_u in the paper
PROB_UNINFORMED = 0.1
PROB_UNINFORMED_BUY = PROB_UNINFORMED
PROB_UNINFORMED_SELL = PROB_UNINFORMED
# \lambda_i in the paper
PROB_INFORMED = 0.4
ALL_PROB = [PROB_PRICE_DOWN, PROB_PRICE_UP, PROB_UNINFORMED_BUY, PROB_UNINFORMED_SELL, PROB_INFORMED]
def int_zeros(n):
return np.zeros(n, dtype=np.int)
class MarketMakerPolicy:
"""
A class representing a policy for a market-making strategy.
The policy specifies a change in price given an order imbalance.
"""
def __init__(self):
pass
def price_delta(self, imbalance):
pass
def update(self, s, a, r, s_, a_):
pass
class ThresholdPolicy(MarketMakerPolicy):
"""
A class representing a market-policy in which we increase (decrease) the price
when it reaches the positive (negative) of a pre-specified integer threshold.
"""
def __init__(self, threshold):
"""
:param threshold: A positive integer representing the maximum absolute order-imbalance.
"""
self.threshold = threshold
def price_delta(self, imbalance):
"""
:param imbalance: The currently observed order-imbalance
:return: The integer change in the market-maker's quote
"""
if imbalance == +self.threshold:
return +1
elif imbalance == -self.threshold:
return -1
else:
return 0
class MarketSimulation:
"""
A class representing an intraday simulation of a simple high-frequency order-driven market.
"""
def __init__(self, mm_policy, max_t=MAX_T, initial_price=INITIAL_PRICE, probabilities=ALL_PROB):
"""
:param mm_policy: The market-making policy
:param max_t: The duration of the trading day in integer units
:param initial_price: The initial price of the asset
:param probabilities: The event probabilities for each element of EVENT_ALL
"""
self.max_t = max_t
self.probabilities = probabilities
self.initial_price = initial_price
self.mm_policy = mm_policy
def simulate_events(self):
""" Return a randomly-generated sequence of events covering the entire trading day.
The events are chosen from EVENT_ALL"""
return np.random.choice(EVENT_ALL, p=self.probabilities, size=self.max_t)
def simulate_fundamental_price(self, events):
""" Given a sequence events, return a vector representing the time-series of
the fundamental price. """
price_changes = int_zeros(self.max_t)
price_changes[events == EVENT_PRICE_CHANGE_DOWN] = -1
price_changes[events == EVENT_PRICE_CHANGE_UP] = +1
return self.initial_price + np.cumsum(price_changes)
def simulate_uninformed_orders(self, events):
""" Given a sequence of events, return a vector representing the time-series of
order-flow from uninformed traders. """
orders = int_zeros(self.max_t)
orders[events == EVENT_UNINFORMED_BUY] = +1
orders[events == EVENT_UNINFORMED_SELL] = -1
return orders
def informed_strategy(self, current_price, mm_price):
"""
Simulate an informed-trader
:param current_price: The current fundamental price
:param mm_price: The current market-maker quote
:return: The total shares demanded by the informed trader
"""
if current_price > mm_price:
return 1
elif current_price < mm_price:
return -1
else:
return 0
def mm_reward(self, current_fundamental_price, mm_current_price, order_sign):
"""
Compute the change in the profit accrued to the market-maker
:param current_fundamental_price: The current fundamental price.
:param mm_current_price: The market-maker's current quote.
:param order_sign: +1 if the most recent transaction was buy,
-1 if it is a sell, 0 for no transaction.
:return:
"""
if order_sign < 0:
return current_fundamental_price - mm_current_price
elif order_sign > 0:
return mm_current_price - current_fundamental_price
else:
return 0
def simulate_market(self, events, uninformed_orders, fundamental_price):
"""
Simulate the entire market over a single trading day.
:param events: A sequence of events chosen from ALL_PROB
:param uninformed_orders: The uninformed order-flow
:param fundamental_price: The time-series of the fundamental-price
:return: A tuple containing:
time-series of fundamental prices,
time-series of market-maker's prices,
time-series of order-imbalance, time-series of rewards,
time-series of actions (changes to the market maker's quote).
"""
# Initialise time series
mm_prices = int_zeros(self.max_t)
order_imbalances = int_zeros(self.max_t)
informed_orders = int_zeros(self.max_t)
actions = int_zeros(self.max_t)
rewards = np.zeros(self.max_t)
# The time of the last change to the market-maker's quote
t_mm = 0
# Initialise the quote to the initial fundamental price
mm_current_price = self.initial_price
for t in range(self.max_t):
# Calculate the order-flow for this time period; +1 for a buy order, or -1 for a sell order
if events[t] == EVENT_INFORMED_ARRIVAL:
order = self.informed_strategy(fundamental_price[t], mm_current_price)
informed_orders[t] = order
else:
order = uninformed_orders[t]
# Calculate the aggregate order-imbalance (IMB) since the last change in quote
imbalance = np.sum(informed_orders[t_mm:t] + uninformed_orders[t_mm:t])
# Update the market maker's quote
mm_price_delta = self.mm_policy.price_delta(imbalance)
if mm_price_delta != 0:
t_mm = t # Update the time since the last quote change
mm_current_price += mm_price_delta
# Update time series
order_imbalances[t] = imbalance
mm_prices[t] = mm_current_price
actions[t] = mm_price_delta
rewards[t] = self.mm_reward(fundamental_price[t], mm_current_price, order)
# Provide feedback to the market-maker's policy (required for learning)
if t > 0:
self.mm_policy.update(order_imbalances[t - 1], actions[t - 1], rewards[t - 1],
imbalance, mm_price_delta)
return fundamental_price, mm_prices, order_imbalances, rewards, actions
def run(self):
"""
Run the simulation.
:return: A tuple containing:
time-series of fundamental prices,
time-series of market-maker's prices,
time-series of order-imbalance, time-series of rewards,
time-series of actions (changes to the market maker's quote).
"""
events = self.simulate_events()
fundamental_price = self.simulate_fundamental_price(events)
uninformed_orders = self.simulate_uninformed_orders(events)
return self.simulate_market(events, uninformed_orders, fundamental_price)
class QTable:
""" A class representing a table of Q values for every state-action pair. """
def __init__(self, table=None, all_actions=[-1, 0, +1], all_states=range(-2, 3)):
self.actions = all_actions
self.states = all_states
self.num_states = len(all_states)
self.num_actions = len(all_actions)
self.state_offset = abs(self.states.start)
if table is None:
self.Q = np.zeros((len(all_states), len(all_actions)))
else:
self.Q = table
def state(self, index):
return self.states[int(index)]
def action(self, index):
return self.actions[int(index)]
def state_index(self, imbalance):
"""
Map from a state to the index of a row of the table.
:param imbalance: The state of the market
:return: The row index corresponding to this state.
"""
s = int(imbalance) + self.state_offset
max = self.num_states - 1
if s > max:
return max
elif s < 0:
return 0
else:
return s
def action_index(self, price_delta):
"""
Map from an action to the index of a column of the table.
:param price_delta: The action
:return: The column index corresponding to the action.
"""
return int(price_delta) + 1
def q_values(self, imbalance):
"""
Map from a state to a vector of the q-values for each action in this state.
:param imbalance: The state of the market.
:return: A vector (slice) of Q-values for the specified state
"""
return self.Q[self.state_index(imbalance), :]
def q_value(self, imbalance, price_delta):
"""
Return the q-value for the specified state and action.
:param imbalance: The state
:param price_delta: The action
:return: A single scalar q-value for the state-action pair.
"""
return self.Q[self.state_index(imbalance), self.action_index(price_delta)]
def as_DataFrame(self):
""" Return the q-values as a pandas DataFrame. """
return pd.DataFrame(self.Q,
columns=["$\Delta p=%s$" % a for a in self.actions],
index=self.states)
def greedy_policy(self):
""" Return the greedy policy as a dictionary where the keys are states, and the value is the optimal action """
def values(s):
return self.q_values(s)
return dict(
{(s, self.action(np.where(values(s) == np.max(values(s)))[0][0])) for s in self.states}
)
def greedy_policy_fn(self):
""" Return the greedy policy as a Python function which maps states onto the greedy action for that state. """
p = self.greedy_policy()
return lambda s: p[s]
class SarsaLearner(QTable):
"""
An implementaiton of the SARSA on-policy control algorithm
"""
def __init__(self, table=None, all_actions=[-1, 0, +1], all_states=range(-2, 3), alpha=0.01, gamma=0.0):
QTable.__init__(self, table, all_actions, all_states)
self.alpha = alpha
self.gamma = gamma
def update(self, s, a, r, s_, a_):
"""
Update the Q values from feedback with the environment
:param s: The previous state
:param a: The previous action
:param r: The previous reward
:param s_: The current state
:param a_: The current action
"""
self.Q[self.state_index(s), self.action_index(a)] += \
self.alpha * (r + self.gamma * self.q_value(s_, a_) - self.q_value(s, a))
class LearningMarketMaker(MarketMakerPolicy):
""" A market-making policy which uses SARSA for action-selection. """
def __init__(self, learner, epsilon=0.05):
MarketMakerPolicy.__init__(self)
self.learner = learner
self.epsilon = epsilon
def price_delta(self, s):
if np.random.random() <= self.epsilon:
action = np.random.choice([-1, 0, +1])
else:
values = self.learner.q_values(s)
max_value = np.max(values)
action = np.random.choice(np.where(values == max_value)[0]) - 1
return action
def update(self, s, a, r, s_, a_):
self.learner.update(s, a, r, s_, a_)
def expected_reward_by_state_action(policy, all_actions=[-1, 0, 1], all_states=range(-2, 3), probabilities=ALL_PROB,
samples=1000):
"""
Compute a value function for state-action pairs using Monte-Carlo simulation.
:param policy: The policy to evaluate.
:param all_actions: The action-space for the agent.
:param all_states: The state-space for the agent.
:param probabilities: The initial conditions for the simulation.
:param samples: The number of samples.
:return:
"""
# Initialise the array containing average rewards in each state and each action, for each trajectory.
result = np.zeros((samples, len(all_states), len(all_actions)))
for i in range(samples):
simulation = MarketSimulation(policy, probabilities=probabilities)
_, _, states, rewards, actions = simulation.run()
result[i, :, :] = \
np.reshape(
[np.nanmean(rewards[(states == state) &
(actions == action)]) for state in all_states for action in all_actions],
(len(all_states), len(all_actions)))
return QTable(np.nanmean(result, axis=0), all_actions, all_states)
def learning_experiment(iterations=5000, probabilities=ALL_PROB, alpha=0.01, gamma=0., epsilon=0.05):
learner = SarsaLearner(alpha=alpha, gamma=gamma)
market_maker = LearningMarketMaker(learner, epsilon=epsilon)
simulation = MarketSimulation(market_maker)
for i in range(iterations):
results = simulation.run()
return learner, results