-
Notifications
You must be signed in to change notification settings - Fork 0
/
causal.py
628 lines (519 loc) · 26.3 KB
/
causal.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
539
540
541
542
543
544
545
546
547
548
549
550
551
552
553
554
555
556
557
558
559
560
561
562
563
564
565
566
567
568
569
570
571
572
573
574
575
576
577
578
579
580
581
582
583
584
585
586
587
588
589
590
591
592
593
594
595
596
597
598
599
600
601
602
603
604
605
606
607
608
609
610
611
612
613
614
615
616
617
618
619
620
621
622
623
624
625
626
627
628
import numpy as np
import pandas as pd
import networkx as nx
from causal_ccm.causal_ccm import ccm
from tigramite.pcmci import PCMCI
from tigramite.independence_tests import GPDC
import tigramite.data_processing as dp
from teaspoon.parameter_selection.FNN_n import FNN_n
from teaspoon.parameter_selection.MI_delay import MI_for_delay
from statsmodels.tsa.stattools import grangercausalitytests
from kernels import K_ID, K_dft2, K_dct, K_dwt, K_CEXP
from graph_generation import generate_DAGs, generate_DAGs_pd, sparsify_graph, partially_direct
from regression import hist_linear, knn_regressor
from independence import joint_indep_test
from multiprocessing import cpu_count, get_context
from warnings import filterwarnings
filterwarnings('ignore')
# CONSTRAINT-BASED CAUSAL STRUCTURE LEARNING
def PC_alg(X_array, lambs, n_pretests, n_perms, n_steps, alpha, make_K, analyse, l_cond, r_opts, init, find_lambda):
"""
PC algorithm that returns partially directed according to Meek's orientation rules given some data X_array
Inputs:
X_array: (n_nodes, n_samples, n_preds) array with data according to dependencies specified in causal graph
lambs: range to iterate over for optimal value for regularisation of kernel ridge regression to compute HSCIC
(only for conditional independence test), or single value that's given
n_pretests: number of tests to find optimal value for lambda
n_perms: number of permutations performed when bootstrapping the null distribution
n_steps: number of MC iterations in the CPT
alpha: rejection threshold of the test
make_K: function called to construct the kernel matrix
l_cond: array of optimal lambda values for each size of conditional sets
r_opts: rejection rate with optimal lambda
init: with which method the undirected skeleton graph shall be directed ('cond_set', 'cond_indep', 'max_p')
find_lambda: (boolean) whether lambda values are learnt newly or taken from previous experiments
Returns:
pd_graph: graph that is partially directed according to Meek's orientation rules
"""
sparse_graph, full_results, sepsets_results, lamb_cond, rejects_opts = sparsify_graph(X_array, lambs, n_pretests, n_perms, n_steps, alpha, make_K, l_cond, r_opts, analyse, find_lambda=find_lambda)
if analyse:
print('Full results:', full_results)
pd_graph = partially_direct(sparse_graph, full_results, sepsets_results, init, analyse)
return sparse_graph, pd_graph, lamb_cond, rejects_opts
# REGRESSION-BASED CAUSAL STRUCTURE LEARNING
def RESIT(X_array, pred_points, _DAG, n_intervals, n_neighbours, n_perms, alpha, make_K, analyse, regressor):
"""
Regression with subsequent independence test
Inputs:
X_array: (n_nodes, n_samples, n_preds) array with data according to dependencies specified in causal graph
pred_points: prediction points
_DAG: candidate directed acyclic graph
n_intervals: number of intervals for the historical linear model
n_neighbours: number of neighbours for kNN regression
n_perms: number of permutations performed when bootstrapping the null distribution of the joint independence test
alpha: rejection threshold of the joint independence test
make_K: function called to construct the kernel matrix
analyse: (boolean) whether the regression wants to be analysed for performance; if True, R-squared value is returned
with plots of 10 first functional samples and corresponding model predictions
regressor: 'hist' or 'knn' to choose which regressor function to use
Returns:
reject: 1 if null rejected and candidate DAG is rejected, 0 if null accepted and candidate DAG is accepted
p_value: p-value of joint independence test
"""
n_nodes, n_samples, n_preds = X_array.shape
desc_hat = np.zeros((n_nodes, n_samples, n_preds))
residuals = np.zeros((n_nodes, n_samples, n_preds))
for desc, parents in _DAG.items():
if parents == []:
residuals[desc] = X_array[desc]
else:
for p in parents:
if regressor == 'hist':
desc_hat[desc] += hist_linear(n_intervals, X_array[p], X_array[desc], pred_points, analyse)
elif regressor == 'knn':
desc_hat[desc] += knn_regressor(n_neighbours, X_array[p], X_array[desc], pred_points, analyse)
residuals[desc] = X_array[desc] - desc_hat[desc]
reject, p_value = joint_indep_test(residuals, n_perms, alpha, make_K)
return reject, p_value
def eval_candidate_DAGs(X_array, pred_points, n_intervals, n_neighbours, n_perms, alpha, make_K, analyse, regressor, pd_graph):
"""
Evaluate all possible candidate directed acyclic graphs given the number of variables
and (optional) a partially directed graph
X_array: (n_nodes, n_samples, n_preds) array with data according to dependencies specified in causal graph
pred_points: prediction points
n_intervals: number of intervals for the historical linear model
n_neighbours: number of neighbours for kNN regression
n_perms: number of permutations performed when bootstrapping the null distribution of the joint independence test
alpha: rejection threshold of the joint independence test
make_K: function called to construct the kernel matrix
analyse: (boolean) whether the regression wants to be analysed for performance; if True, R-squared value is returned
with plots of 10 first functional samples and corresponding model predictions
regressor: 'hist' or 'knn' to choose which regressor function to use
pd_graph: partially directed graph based on constraint-based approach with Meek's orientation rules
Returns:
candidate DAG with largest p-value of joint independence test
"""
n_nodes, n_samples, n_preds = X_array.shape
if pd_graph is None:
# generate candidate DAGs without given partially directed graph
DAGs_dict = generate_DAGs(n_nodes, analyse)
else:
# generate candidate DAGs considering given partially directed graph
DAGs_dict = generate_DAGs_pd(pd_graph)
p_values = np.zeros(len(DAGs_dict))
# iterate over each candidate DAG
with get_context('spawn').Pool(cpu_count()) as pool:
jobs = [(i, _DAG, pool.apply_async(RESIT, (X_array, pred_points, _DAG, n_intervals, n_neighbours, n_perms, alpha, make_K, analyse, regressor)))
for i, _DAG in DAGs_dict.items()]
for i, _DAG, job in jobs:
p_values[i] = job.get()[-1]
if analyse:
print('Evaluating DAG #{}: {}'.format(i, _DAG))
print('Independence test p-value:', p_values[i])
# return candidate DAG with greatest p-value
max_p_val_i = np.argmax(p_values)
return DAGs_dict[max_p_val_i], p_values[max_p_val_i]
# ALTERNATIVE METHODS
def ccm_bivariate(x_array, alpha):
"""
Computes the convergent cross mapping according to Sugihara et al (2012): https://www.science.org/doi/10.1126/science.1227079
Inputs:
x_array: the input series of shape (variables, time points)
alpha: significance level for correlation test
Returns:
DAG: learnt DAG
corr_coef: the correlation coefficient
p_value: p-value of correlation test for first variable causing second variable in x_array
"""
n_nodes, n_obs = x_array.shape
# take first local minimum of mutual information as lag
x_lags = [MI_for_delay(x_array[i]) for i in range(n_nodes)]
lag = int(np.sum(x_lags) / n_nodes)
# use false nearest neighbour to find embedding library
output_1, output_2 = [FNN_n(x_array[i], lag) for i in range(n_nodes)]
x_embed_dim = int(np.sum([output_1[-1], output_2[-1]])/2)
# test whether X causes Y
ccm_x0x1 = ccm(x_array[0], x_array[1], lag, x_embed_dim, n_obs)
corr_coef_x0x1, p_value_x0x1 = ccm_x0x1.causality()
# and whether Y causes X
ccm_x1x0 = ccm(x_array[1], x_array[0], lag, x_embed_dim, n_obs)
corr_coef_x1x0, p_value_x1x0 = ccm_x1x0.causality()
if p_value_x0x1 < alpha and p_value_x1x0 < alpha:
cause = np.argmin([np.abs(1 - corr_coef_x0x1), np.abs(1 - corr_coef_x1x0)])
if cause == 0:
p_value = p_value_x0x1
corr_coef = corr_coef_x0x1
DAG = {0: [], 1: 0}
elif cause == 1:
p_value = p_value_x1x0
corr_coef = corr_coef_x1x0
DAG = {0: 1, 1: []}
else:
p_value = 1
corr_coef = 0
DAG = {0: [], 1: []}
elif p_value_x1x0 < alpha:
p_value = p_value_x1x0
corr_coef = corr_coef_x1x0
DAG = {0: 1, 1: []}
elif p_value_x0x1 < alpha:
p_value = p_value_x0x1
corr_coef = corr_coef_x0x1
DAG = {0: [], 1: 0}
else:
if corr_coef_x0x1 > corr_coef_x1x0:
p_value = p_value_x0x1
corr_coef = corr_coef_x0x1
DAG = {0: [], 1: 0}
elif corr_coef_x1x0 > corr_coef_x0x1:
p_value = p_value_x1x0
corr_coef = corr_coef_x1x0
DAG = {0: 1, 1: []}
else:
p_value = 1
corr_coef = 0
DAG = {0: [], 1: []}
return DAG, corr_coef, p_value, lag
def granger(x_array, alpha):
"""
Two-way Granger-causality test to find out whether X (x_array[0]) causes Y (x_array[1]) or vice-versa
Inputs:
x_array: the input series of shape (variables, time points)
alpha: confidence level
Returns:
DAG: learnt DAG
p_value_x0x1: p-value of correlation test for x0 causes x1
p_value_x1x0: p-value of correlation test for x1 causes x0
"""
n_nodes, n_obs = x_array.shape
df = pd.DataFrame(columns=['x0', 'x1'])
x_lags = [MI_for_delay(x_array[i]) for i in range(n_nodes)]
lag = int(np.sum(x_lags) / n_nodes)
# 1. test whether Y causes X
df['x0'] = x_array[0]
df['x1'] = x_array[1]
ftest_statistic_x0x1, p_value_x1x0 = grangercausalitytests(df[['x0', 'x1']], maxlag=[lag], verbose=False)[lag][0]['ssr_ftest'][0:2]
# 2. Test whether X causes Y
df['x0'] = x_array[1]
df['x1'] = x_array[0]
ftest_statistic_x1x0, p_value_x0x1 = grangercausalitytests(df[['x0', 'x1']], maxlag=[lag], verbose=False)[lag][0]['ssr_ftest'][0:2]
if p_value_x0x1 < alpha and p_value_x1x0 < alpha:
p_value = 1
ftest_stat = 0
DAG = {0: [], 1: []}
elif p_value_x1x0 < alpha:
p_value = p_value_x1x0
ftest_stat = ftest_statistic_x1x0
DAG = {0: 1, 1: []}
elif p_value_x0x1 < alpha:
p_value = p_value_x0x1
ftest_stat = ftest_statistic_x0x1
DAG = {0: [], 1: 0}
else:
p_value = 1
ftest_stat = 0
DAG = {0: [], 1: []}
return DAG, ftest_stat, p_value, lag
def pcmci_graph(x_array, cond_indep_test):
"""
Performs the PCMCI method as proposed by Runge et al (2018): https://arxiv.org/abs/1702.07007
Inputs:
x_array: the input series of shape (variables, time points)
cond_indep_test: the conditional independence test
Returns:
pd_graph: the partially directed graph resulting from the PCMCI method
"""
n_nodes, n_samples, n_obs = x_array.shape
# prepare the data
x_array_T = x_array.T
dataframe = dp.DataFrame(x_array_T, analysis_mode='multiple')
# initialise the class
pcmci = PCMCI(dataframe=dataframe, cond_ind_test=cond_indep_test)
# find the optimal lag
x_lags = [MI_for_delay(x_array[i]) for i in range(n_nodes)]
lag = int(np.sum(x_lags) / n_nodes)
# perform the PCMCI method
results = pcmci.run_pcmci(tau_max=lag, pc_alpha=None)
parents = pcmci.return_parents_dict(results['graph'], results['val_matrix'])
return parents, results['p_matrix'], results['val_matrix']
# WRAPPER FUNCTION TO CALL ALL CAUSAL DISCOVERY METHODS
def causal_discovery(cd_type, X_array, pred_points, n_intervals, n_neighbours, n_perms, alpha, make_K, lambs, n_pretests,
n_steps, analyse, regressor, l_cond, r_opts, init, find_lambda, pd_graph=None):
"""
Wrapper function to discover causal graph by constraint-based and regression-based approaches (as specified in cd_type)
Inputs:
cd_type: approach of causal discovery ('regression', 'constraint', 'combined', 'CCM', 'PCMCI', 'Granger')
X_array: (n_nodes, n_samples, n_preds) array with data according to dependencies specified in dictionary edges
pred_points: prediction points
n_intervals: number of intervals for the historical linear model
n_neighbours: number of neighbours for kNN regression
n_perms: number of permutations performed when bootstrapping the null distribution of the joint and conditional independence tests
alpha: rejection threshold of the joint and conditional independence tests
make_K: function called to construct the kernel matrix
lambs: range of optimal lambda based on pre-tests
n_pretests: number of tests to find optimal value for lambda
n_steps: number of MC iterations in the CPT
analyse: (boolean) whether the regression wants to be analysed for performance; if True, R-squared value is returned
with plots of 10 first functional samples and corresponding model predictions
regressor: 'hist' or 'knn' to choose which regressor function to use
l_cond: array of optimal lambda values for each size of conditional sets
r_opts: rejection rate with optimal lambda
init: with which method the undirected skeleton graph shall be directed ('cond_set', 'cond_indep', 'max_p')
find_lambda: (boolean) whether lambda values are learnt newly or taken from previous experiments
pd_graph: given partially directed graph
Returns:
_DAG: causal graph that is fully directed when 'regression' or 'combined' is used,
and partially directed when 'constraint' is used
p_value: p-value of joint independence test of estimated causal graph
lamb_cond: array of potential lambda values for each size of conditional sets
rejects_opts: rejection rate with optimal lambda
"""
if cd_type == 'regression':
# RESIT
sparse_g = []
_DAGs, p_values = eval_candidate_DAGs(X_array, pred_points, n_intervals, n_neighbours, n_perms, alpha, make_K, analyse, regressor, pd_graph=pd_graph)
lamb_cond, rejects_opts, lags, corr_values = 0, 0, 0, 0
elif cd_type == 'constraint':
# PC algorithm
sparse_g, _DAGs, lamb_cond, rejects_opts = PC_alg(X_array, lambs, n_pretests, n_perms, n_steps, alpha, make_K, analyse, l_cond, r_opts, init, find_lambda)
p_values, lags, corr_values = np.nan, 0, 0
elif cd_type == 'combined':
# combined: first PC algorithm, then RESIT on result of PC algorithm (i.e., on Markov equivalence class)
sparse_g, pd_graph, lamb_cond, rejects_opts = PC_alg(X_array, lambs, n_pretests, n_perms, n_steps, alpha, make_K, analyse, l_cond, r_opts, init, find_lambda)
_DAGs, p_values = eval_candidate_DAGs(X_array, pred_points, n_intervals, n_neighbours, n_perms, alpha, make_K, analyse, regressor, pd_graph=pd_graph)
lags, corr_values = 0, 0
elif cd_type == 'PCMCI':
# PCMCI method
sparse_g = []
_DAGs, p_values, corr_values = pcmci_graph(X_array, cond_indep_test=GPDC()) # Conditional independence test based on Gaussian Process and Distance Correlation
lags, lamb_cond, rejects_opts = 0, 0, 0
elif cd_type == 'CCM':
# convergent cross mapping
sparse_g = []
_DAGs = []
p_values = []
lags = []
for i in range(X_array.shape[1]):
dag, corr_coef, p_value, lag = ccm_bivariate(X_array[:, i, :], alpha)
_DAGs.append(dag)
p_values.append(p_value)
lags.append(lag)
lamb_cond, rejects_opts, corr_values = 0, 0, 0
elif cd_type == 'Granger':
# Granger-causality test
sparse_g = []
_DAGs = []
p_values = []
corr_values = []
lags = []
lamb_cond, rejects_opts = 0, 0
for i in range(X_array.shape[1]):
dag, corr_value, p_value, lag = granger(X_array[:, i, :], alpha)
_DAGs.append(dag)
corr_values.append(corr_value)
p_values.append(p_value)
lags.append(lag)
else:
raise ValueError('Chosen causal structure learning method not implemented. Choose between "regression", "constraint", and "combined".')
return sparse_g, _DAGs, p_values, lamb_cond, rejects_opts, lags, corr_values
# EVALUATION METRICS
def precision(true_dag, cpdag):
"""
Measures the fraction of directed edges that are correctly oriented; if no edges are oriented, return 1.
A true positive is orienting an edge correctly, and a false positive is orienting an edge incorrectly.
Inputs:
true_dag: adjacency matrix of the true DAG
cpdag: adjacency matrix of the learnt DAG
Returns:
precision: metric between 0 and 1
"""
tp = len(np.argwhere((true_dag + cpdag - cpdag.T) == 2))
fp = len(np.argwhere((true_dag + cpdag.T - cpdag) == 2))
return tp / (tp + fp) if (tp + fp) > 0 else 1
def recall(true_dag, cpdag):
"""
Measures the fraction of all edges that are oriented; if no edges are oriented, return 0
Inputs:
true_dag: adjacency matrix of the true DAG
cpdag: adjacency matrix of the learnt DAG
Returns:
recall: metric between 0 and 1
"""
tp = len(np.argwhere((true_dag + cpdag - cpdag.T) == 2))
return tp / np.sum(true_dag)
def f1_score(pre, rec):
return 2 * (pre * rec) / (pre + rec) if (pre + rec) > 0 else 0
def shd(true_dag, dag):
"""
Compute Structural Hemming Distance
Inputs:
true_dag: the underlying DAG that was taken to generate the data
dag: the DAG that was learnt by the causal structure learning algorithm
Returns:
sum(diff): the sum of differences between the underlying true DAG and the learnt DAG
"""
diff = np.abs(true_dag - dag)
return np.sum(diff)
def norm_shd(true_dag, dag):
"""
Compute Structural Hemming Distance normalised by maximum number of mistaken edges
Inputs:
true_dag: the underlying DAG that was taken to generate the data
dag: the DAG that was learnt by the causal structure learning algorithm
Returns:
normalised SHD
"""
norm = true_dag.shape[0] * (true_dag.shape[0] - 1)
return shd(true_dag, dag) / norm
def shd_skeleton(true_dag, learnt_dag):
"""
Computes the difference between the learnt causal skeleton and the undirected skeleton of the true DAG
Inputs:
true_dag: the underlying DAG that was taken to generate the data
dag: the DAG that was learnt by the causal structure learning algorithm
Returns:
SHD between causal skeletons of true and learnt DAGs
"""
true_skeleton = true_dag + true_dag.T
learnt_skeleton = learnt_dag + learnt_dag.T
diff = np.abs(true_skeleton - learnt_skeleton)
return np.sum(diff)
def causal_eval(cd_type, X_dict, edges_dict, upper_limit=1, n_preds=100, n_intervals=8, n_neighbours=5, n_trials=200, n_perms=1000,
alpha=0.05, K='K_ID', lambs=[1e-4, 1e-3], n_pretests=100, n_steps=50, analyse=False, regressor='hist',
init='cond_set', find_lambda=False):
"""
Evaluates the causal discovery algorithms based on precision, recall and F1-score
Inputs:
cd_type: approach of causal discovery ('regression', 'constraint', 'combined', 'CCM', 'PCMCI', 'Granger')
X_dict: dictionary of data sets that were generated according to random DAG for each trial
edges_dict: dictionary of "ground truth" edges for each trial that is taken as a base to generate the data X_dict;
each value in the dictionary is a DAG represented by one list for each node, where parents are written
behind a "|" sign (for example, "[0|1]" means that node 0 is the descendant and node 1 its parent)
upper_limit: upper limit of data
n_preds: number of prediction points
n_intervals: number of intervals for the historical linear model
n_neighbours: number of neighbours for kNN regression
n_trials: number of old_trials to compute percentage of rejections over
n_perms: number of permutations performed when bootstrapping the null distribution of the joint independence test
alpha: rejection threshold of the joint independence test
K: function called to construct the kernel matrix
lambs: range of optimal lambda based on pre-tests
n_pretests: number of tests to find optimal value for lambda
n_steps: number of MC iterations in the CPT
analyse: (boolean) whether the algorithm including regression wants to be analysed for performance;
if True, multiple values are printed including the true underlying DAG, the R-squared value
and plots of 10 first functional samples with corresponding model predictions
regressor: 'hist' or 'knn' to choose which regressor function to use
init: with which method the undirected skeleton graph shall be directed ('cond_set', 'cond_indep', 'max_p')
find_lambda: (boolean) whether lambda values are learnt newly or taken from previous experiments
Returns:
precisions, recalls, f1_scores, SHDs, CPDAGs, p_values
"""
n_vars, n_samples, n_obs = X_dict[0].shape
pred_points = np.linspace(0, upper_limit, n_preds)
sparse_graphs = {}
CPDAG = {}
p_value = np.zeros(n_trials, dtype=object)
corr_values = {}
precisions = np.zeros(n_trials)
recalls = np.zeros(n_trials)
f1_scores = np.zeros(n_trials)
SHDs = np.zeros((n_trials, n_samples))
l_cond = np.zeros(n_vars - 2)
r_opts = np.zeros(n_vars - 2)
for i in range(n_trials):
if K == 'K_ID':
make_K = K_ID
elif K == 'K_dft2':
make_K = K_dft2
elif K == 'K_dct':
make_K = K_dct
elif K == 'K_dwt':
make_K = K_dwt
elif K == 'K_CEXP':
make_K = K_CEXP
else:
raise ValueError('Kernel not implemented')
if (X_dict[i].shape[0] == 2) and (cd_type == 'constraint' or cd_type == 'combined'):
raise ValueError('Constraint and combined causal structure learning '
'only feasible with more than 2 variables.')
if analyse:
print('-------------')
print('True DAG:', edges_dict[i])
sparse_graphs[i], CPDAG[i], p_value[i], lamb_cond, rejects_opts, lags, corr_values[i] = causal_discovery(cd_type, X_dict[i], pred_points, n_intervals,
n_neighbours, n_perms, alpha, make_K, lambs,
n_pretests, n_steps, analyse, regressor,
l_cond, r_opts, init, find_lambda)
# updating l_cond and r_opts with learnt optimal lambda values
l_cond, r_opts = lamb_cond, rejects_opts
# transform true DAG and learnt CPDAG into adjacency matrix where rows are cause and columns are effect variables
DAG_adj = np.zeros((len(edges_dict[i].to_nx().nodes()), len(edges_dict[i].to_nx().nodes())))
for edge in edges_dict[i].to_nx().edges():
DAG_adj[edge] = 1
if cd_type == 'CCM' or cd_type == 'Granger':
SHDs_list = []
# iterate over all n_samples causal structures that were learnt with CCM or Granger causality
for cpdag in CPDAG[i]:
CPDAG_adj = np.zeros((len(edges_dict[i].to_nx().nodes()), len(edges_dict[i].to_nx().nodes())))
for d, p in cpdag.items():
CPDAG_adj[p, d] = 1
# calculate SHD
SHDs_list.append(shd(DAG_adj, CPDAG_adj))
SHDs[i] = np.asarray(SHDs_list)
if analyse:
print('SHD:', SHDs[i])
elif cd_type == 'PCMCI':
CPDAG_adj = np.zeros((len(edges_dict[i].to_nx().nodes()), len(edges_dict[i].to_nx().nodes())))
strengths = {}
for d, p in CPDAG[i].items():
strengths[d] = {}
for p_i in p:
if p_i[0] != d:
strengths[d][p_i[0]] = 0
for p_i in p:
if p_i[0] != d: # ignore self-loops
strengths[d][p_i[0]] += corr_values[i][p_i[0]][d][abs(p_i[1])]
for d_i in strengths.keys():
for d_j in strengths.keys():
if d_i == d_j:
continue
else:
if (d_j in strengths[d_i].keys()) and (d_i in strengths[d_j].keys()):
if strengths[d_i][d_j] > strengths[d_j][d_i]:
CPDAG_adj[d_j, d_i] = 1
elif strengths[d_j][d_i] > strengths[d_i][d_j]:
CPDAG_adj[d_i, d_j] = 1
else:
CPDAG_adj[d_i, d_j] = 0
elif (d_j in strengths[d_i].keys()) and (d_i not in strengths[d_j].keys()):
CPDAG_adj[d_j, d_i] = 1
elif (d_j not in strengths[d_i].keys()) and (d_i in strengths[d_j].keys()):
CPDAG_adj[d_i, d_j] = 1
else:
CPDAG_adj[d_i, d_j] = 0
CPDAG_adj[d_j, d_i] = 0
# calculate SHD
SHDs[i] = shd(DAG_adj, CPDAG_adj)
if analyse:
print('SHD:', SHDs[i][0])
else:
CPDAG_adj = np.zeros((len(edges_dict[i].to_nx().nodes()), len(edges_dict[i].to_nx().nodes())))
for d, p in CPDAG[i].items():
CPDAG_adj[p, d] = 1
# calculate SHD
SHDs[i] = shd(DAG_adj, CPDAG_adj)
if analyse:
if cd_type == 'regression':
print('Learned DAG:', CPDAG[i])
print('SHD:', SHDs[i][0])
# calculate precision and recall
precisions[i] = precision(DAG_adj, CPDAG_adj)
recalls[i] = recall(DAG_adj, CPDAG_adj)
# calculate F1-score
f1_scores[i] = f1_score(precisions[i], recalls[i])
if analyse:
if cd_type == 'constraint':
print('Precision:', precisions[i])
print('Recall:', recalls[i])
print('SHD:', SHDs[i][0])
return precisions, recalls, f1_scores, SHDs, CPDAG, p_value