-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathpermutationtest.pyx
197 lines (155 loc) · 6.58 KB
/
permutationtest.pyx
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
from cpython.mem cimport PyMem_Malloc, PyMem_Realloc, PyMem_Free
from cython.view cimport array as cvarray
import numpy as np
import cython
from libc.math cimport fabs
from functools import reduce
import operator as op
def ncr(n, r):
r = min(r, n-r)
numer = reduce(op.mul, range(n, n-r, -1), 1)
denom = reduce(op.mul, range(1, r+1), 1)
return numer / denom
@cython.boundscheck(False)
@cython.wraparound(False)
@cython.cdivision(True)
@cython.nonecheck(False)
@cython.initializedcheck(False)
cdef inline double mean(double *input_arr, int length):
cdef double sum = 0.0
cdef int i
for i in range(length):
sum += input_arr[i]
return sum/length
# returns raw bitmasks
@cython.boundscheck(False)
@cython.wraparound(False)
@cython.cdivision(True)
@cython.nonecheck(False)
@cython.initializedcheck(False)
def raw_gosper_bitmask(int A_len, long long K, long long work_divider):
#subsets = []
cdef long long num_combinations = 0
cdef long long N = A_len
cdef int N_int = A_len
# iterate over subsets of size K
cdef long long mask = ((<long long>1)<<K)-1 # 2^K - 1 is always a number having exactly K 1 bits
cdef long long a, b
cdef int n
cdef long long permutations_per_job = ncr(A_len, K) // work_divider
split_masks = []
while mask < ((<long long>1)<<N):
if mask == 0:
break
if num_combinations % permutations_per_job == 0:
split_masks.append(mask)
# determine next mask with Gosper's hack
a = mask & -mask # determine rightmost 1 bit
b = mask + a # determine carry bit
mask = <long long>(((mask^b)>>2)/a) | b # produce block of ones that begins at the least-significant bit
#mask = b +(((b^mask)/a)>>2) # wikipedia version, no difference to above
num_combinations += 1
return (split_masks, permutations_per_job, num_combinations)
# two-sided permutation test of group means
@cython.boundscheck(False)
@cython.wraparound(False)
@cython.cdivision(True)
@cython.nonecheck(False)
@cython.initializedcheck(False)
def permutation_test_parallel(double[:] A, long long K, double given_mean, long long mask_start, long long mask_end):
#subsets = []
cdef long long num_combinations = 0
cdef long long N = A.size
cdef int N_int = A.size
#subset = [0] * K
cdef int subset_size = <int>K
cdef int subset_remainder_size = <int>(N-K)
cdef double *subset = <double *> PyMem_Malloc(subset_size * sizeof(double))
cdef double *subset_remainder = <double *> PyMem_Malloc(subset_remainder_size * sizeof(double))
cdef int subset_index = 0
cdef int subset_remainder_index = 0
# iterate over subsets of size K
cdef long long mask = mask_start
#((<long long>1)<<K)-1 # 2^K - 1 is always a number having exactly K 1 bits
cdef long long a, b
cdef int n
# two-tailed
cdef double teststatistic_difference = fabs(given_mean)
cdef long long extreme_teststatistic_count = 0
while mask < mask_end:
subset_index = 0
subset_remainder_index = 0
for n in range(N_int):
if ((mask>>n)&1) == 1:
subset[subset_index] = A[n]
subset_index += 1
else:
subset_remainder[subset_remainder_index] = A[n]
subset_remainder_index += 1
#subsets.append(subset)
# catch special case
if mask == 0:
break
# insert your own test
# currently does two-tailed test of means
if fabs(mean(subset, subset_size) - mean(subset_remainder, subset_remainder_size)) >= teststatistic_difference:
extreme_teststatistic_count += 1
# determine next mask with Gosper's hack
a = mask & -mask # determine rightmost 1 bit
b = mask + a # determine carry bit
mask = <long long>(((mask^b)>>2)/a) | b # produce block of ones that begins at the least-significant bit
num_combinations += 1
PyMem_Free(subset)
PyMem_Free(subset_remainder)
return (extreme_teststatistic_count, num_combinations)
# two-sided permutation test of group means
@cython.boundscheck(False)
@cython.wraparound(False)
@cython.cdivision(True)
@cython.nonecheck(False)
@cython.initializedcheck(False)
def permutation_test(double[:] A, long long K, double given_mean):
#subsets = []
cdef long long num_combinations = 0
cdef long long N = A.size
cdef int N_int = A.size
#subset = [0] * K
cdef int subset_size = <int>K
cdef int subset_remainder_size = <int>(N-K)
cdef double *subset = <double *> PyMem_Malloc(subset_size * sizeof(double))
cdef double *subset_remainder = <double *> PyMem_Malloc(subset_remainder_size * sizeof(double))
cdef int subset_index = 0
cdef int subset_remainder_index = 0
# iterate over subsets of size K
cdef long long mask = ((<long long>1)<<K)-1 # 2^K - 1 is always a number having exactly K 1 bits
cdef long long a, b
cdef int n
# two-tailed
cdef double teststatistic_difference = fabs(given_mean)
cdef long long extreme_teststatistic_count = 0
while mask < ((<long long>1)<<N):
subset_index = 0
subset_remainder_index = 0
for n in range(N_int):
if ((mask>>n)&1) == 1:
subset[subset_index] = A[n]
subset_index += 1
else:
subset_remainder[subset_remainder_index] = A[n]
subset_remainder_index += 1
#subsets.append(subset)
# catch special case
if mask == 0:
break
# insert your own test
# currently does two-tailed test of means
if fabs(mean(subset, subset_size) - mean(subset_remainder, subset_remainder_size)) >= teststatistic_difference:
extreme_teststatistic_count += 1
# determine next mask with Gosper's hack
a = mask & -mask # determine rightmost 1 bit
b = mask + a # determine carry bit
mask = <long long>(((mask^b)>>2)/a) | b # produce block of ones that begins at the least-significant bit
num_combinations += 1
PyMem_Free(subset)
PyMem_Free(subset_remainder)
return (extreme_teststatistic_count, num_combinations)