-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathparse_data.py
460 lines (393 loc) · 16 KB
/
parse_data.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
# parse_data.py
# %%
import utils
from math import sqrt, atan, ceil, pi
import numpy as np
"""debug global property"""
from control import pstatus
# pstatus = "release"
# pstatus = "debug"
""" 主要做的是:1.数据有效化 2.segment每次刺激 """
""" 数据的保存形式全部为np.ndarray """
TBD = -10
def f2sec(n,fps): # 将帧数转化为秒
return n / fps
'''from scipy.signal import butter, filtfilt
def butter_bandpass(lowcut, highcut, fs, order=4):
nyq = 0.5 * fs
low = lowcut / nyq
high = highcut / nyq
b, a = butter(order, [low, high], btype='band')
return b, a
def butter_bandpass_filter(data, lowcut, highcut, fs, order=4):
b, a = butter_bandpass(lowcut, highcut, fs, order=order)
y = filtfilt(b, a, data)
return y'''
"""
# 示例用法
fs = 100 # 采样频率
lowcut = 0.01 # 低频截止频率
highcut = 3 # 高频截止频率
order = 4 # 滤波器阶数
# 应用滤波器
filtered_data = butter_bandpass_filter(y, lowcut, highcut, fs, order=order)
"""
class DataParser:
def __init__(self, before=0.5, after=1.5, fps=60, skip_n=1, light=True):
self.X1 = []
self.Y1 = []
self.X2 = []
self.Y2 = []
self.X_mid = []
self.Y_mid = []
self.K = []
self.D = []
self.Theta = []
self.__available__ = []
self.before = before
self.after = after
self.frames = [] # frame numbers
self.frames_adj = [] # adjacent to next frame
self.has_light = light # if there is light information
self.light_frames = [] # all the frames when the light is on
self.stimulus = [] # all the judged stimulate frame number
self.durings = [] # list of index(data indices) that in each stimulate section
self.datakind = ''
self.timestr = ''
self.fps = fps
self.skip_n = skip_n
""" change all scaler of data """
def data_change_ratio(self, ratio):
assert ratio > 0, "Wrong Value"
if self.datakind == 'fb':
self.X1 = [x*ratio for x in self.X1]
self.X2 = [x*ratio for x in self.X2]
self.Y1 = [y*ratio for y in self.Y1]
self.Y2 = [y*ratio for y in self.Y2]
self.D = [d*ratio for d in self.D]
self.X_mid *= ratio
self.Y_mid *= ratio
# self.K = [] # self.K 不需要修改
# self.Theta = [] # self.Theta 不需要修改
'''
for each in self.__available__:
...
'''
""" return every possible frame number in each segment of stimulus, return list """
def sti_segment(self) -> list:
sti_sections = []
for sti in self.stimulus:
sti_sections.append([])
left = sti - ceil(self.before*self.fps)
right = sti + ceil(self.after *self.fps)
for f in range(left, right+1):
if f in self.frames:
# sti_sections[-1].append(self.frames.index(f))
idx = np.where(self.frames == f)[0][0]
sti_sections[-1].append(idx)
assert len(sti_sections) == len(self.stimulus), "Wrong Value"
sti_sections = [np.array(x) for x in sti_sections] # 转为np.array,好用indice格式取数据
return sti_sections
""" 统一用frame data_format """
def parse_light(self, file_light, fps):
light_data = [int(x) for x in file_light.readlines()]
stimulus = []
for i, t in enumerate(light_data):
if i==0:
stimulus.append(t)
elif f2sec(t-light_data[i-1],fps) > 1: # 间隔0.5s就算不同的刺激
stimulus.append(t)
self.light_frames = light_data
self.stimulus = stimulus
def parse_fbpoints(self,file_f,file_b, fps):
self.datakind = 'fb'
self.timestr = utils.timestr()
# 声明所有可用的数据
self.__available__ = ['X1','Y1','X2','Y2','X_mid','Y_mid','K','D','Theta','frames','frames_adj','durings']
data1 = file_f.readlines()
data2 = file_b.readlines()
self.X1 = {} # {frame: value} 格式
self.Y1 = {}
""" X1, X2因为不是同时采样,对应的frame可能不一样 """
nframe_1 = data1[-1].split()[0]
nframe_1 = int(nframe_1)
for i in data1:
frame, coords = i.split()
y,x = tuple(coords.split(','))
x=float(x)
y=-float(y)
# frame = int(frame)
self.X1[frame] = x
self.Y1[frame] = y
self.X2 = {}
self.Y2 = {}
nframe_2 = data2[-1].split()[0]
nframe_2 = int(nframe_2)
for i in data2:
frame, coords = i.split()
y,x = tuple(coords.split(','))
x=float(x)
y=-float(y)
self.X2[frame] = x
self.Y2[frame] = y
self.num1 = len(self.X1)
self.num2 = len(self.X2)
self.frames = []
self.X_mid = []
self.Y_mid = []
self.K=[]
self.D = []
self.Theta=[]
self.zerot = 0 # 可以计算出来
# 对帧
for f in range(min(nframe_1, nframe_2)):
i = str(f) # key也可以当做一种下标
if i in self.X1 and i in self.X2:
if self.X1[i]!=0 and self.X2[i]!=0:
""" 有效帧 """
self.frames.append(f)
xmid = (self.X1[i]+self.X2[i])/2
ymid = (self.Y1[i]+self.Y2[i])/2
self.X_mid.append(xmid)
self.Y_mid.append(ymid)
dist=sqrt((self.X2[i]-self.X1[i])*(self.X2[i]-self.X1[i]) + (self.Y2[i]-self.Y1[i])*(self.Y2[i]-self.Y1[i]))
if self.X2[i] - self.X1[i]==0:
k=0
else:
k=(self.Y2[i]-self.Y1[i])/(self.X2[i]-self.X1[i])
self.D.append(dist)
self.K.append(k)
self.Theta.append(atan(k)*180/pi)
""" 这里的frames一定是统一的,而且一定是在X1, X2里有值的 """
""" 过滤无效值 """
self.X1 = [float(self.X1[str(f)]) for f in self.frames]
self.Y1 = [float(self.Y1[str(f)]) for f in self.frames]
self.X2 = [float(self.X2[str(f)]) for f in self.frames]
self.Y2 = [float(self.Y2[str(f)]) for f in self.frames]
""" 全部转为np.array """
self.frames = np.array(self.frames)
self.num = len(self.frames)
assert self.num > 0, "Data error"
self.frames_adj= np.zeros((self.num-1,))
for i in range(self.num-1):
if self.frames[i+1] - self.frames[i] <= self.skip_n:
self.frames_adj[i] = 1
self.X1 = np.array(self.X1); self.Y1 = np.array(self.Y1); self.X2 = np.array(self.X2); self.Y2 = np.array(self.Y2)
self.X_mid = np.array(self.X_mid); self.Y_mid = np.array(self.Y_mid)
self.K = np.array(self.K); self.D = np.array(self.D); self.Theta = np.array(self.Theta)
self.smooth_thetas(90)
if self.has_light:
self.durings = self.sti_segment()
def parse_feature_result(self, file_f,file_b, fps):
self.datakind = 'fb'
self.parse_time = utils.timestr()
self.__available__ = ['X1','Y1','X2','Y2','X_mid','Y_mid','K','D','Theta','frames','assist_angle_f','assist_angle_b','frames_adj','frame_interval','durings'] # 所有可供使用的数据
data_f = file_f.readlines()
data_b = file_b.readlines()
self.X1 = {}
self.Y1 = {}
self.assist_angle_f = {}
frame = 0
# 逐行解析
for line in data_f:
items = line.split()
frame = items[0]
if items[1] == 'angle': # if len(items) == 3:
self.assist_angle_f[frame] = items[2]
elif 'black' in items[1] or 'relocate' in items[1]:
continue
else:
y, x = items[1].replace(',',''), items[2]
x = float(x)
y = -float(y)
self.X1[frame] = x
self.Y1[frame] = y
self.X2 = {}
self.Y2 = {}
self.assist_angle_b = {}
# 逐行解析后点
for line in data_b:
items = line.split()
frame = items[0]
if items[1] == 'angle': # if len(items) == 3:
self.assist_angle_b[frame] = items[2]
elif 'black' in items[1] or 'relocate' in items[1]:
continue
else:
y, x = items[1].replace(',',''), items[2]
x = float(x)
y = -float(y)
self.X2[frame] = x
self.Y2[frame] = y
# 对帧
"""nframe表示最大的帧, num表示帧数量"""
nframe_1 = int(data_f[-1].split()[0])
nframe_2 = int(data_b[-1].split()[0])
self.num1 = len(self.X1)
self.num2 = len(self.X2)
'''以下都使用np.array格式'''
self.frames = []
self.zerot = TBD # 无效帧的数量
# 重要: range记得+1
for f in range(min(nframe_1, nframe_2) + 1):
i = str(f) # key should be str
if i in self.X1 and i in self.X2:
self.frames.append(f)
self.frames = np.array(self.frames) # 表示有效帧
# 计算帧特征
self.nframe = np.max(self.frames)
self.num = self.frames.size
self.zerot = min(self.num1, self.num2) - self.num
# print('num:', self.num, 'nframe:', self.nframe, 'zerot:', self.zerot)
# 提取中心点
self.X_mid = []
self.Y_mid = []
for f in self.frames:
i = str(f)
xmid = (self.X1[i]+self.X2[i])/2
ymid = (self.Y1[i]+self.Y2[i])/2
self.X_mid.append(xmid)
self.Y_mid.append(ymid)
self.X_mid = np.array(self.X_mid); self.Y_mid = np.array(self.Y_mid)
# 提取(计算)角度
self.D = []
self.K = []
self.Theta = []
for f in self.frames:
i = str(f)
point_f = np.array([self.X1[i], self.Y1[i]])
point_b = np.array([self.X2[i], self.Y2[i]])
dist = np.linalg.norm(point_b - point_f)
dx, dy = point_b - point_f # numpy.array解包
if abs(dx) < 1e-6: # 竖直
if abs(dy) < 1e-6:
raise Exception("前后点重合")
k = np.inf
theta = 90
else:
k = - dy / dx # 因为之前 y = -y
theta = atan(k)*180/pi
self.D.append(dist)
self.K.append(k)
self.Theta.append(theta)
self.K = np.array(self.K); self.D = np.array(self.D); self.Theta = np.array(self.Theta)
# 注:theta要连续,而atan生成的不连续,所以要处理
self.smooth_thetas(90) # 90度以上认为是跳变
# 过滤出有效值(并转为np.array, dtype=np.float)
"""这里的frames一定是统一的,而且一定是在X1, X2里有值的"""
self.X1 = np.array([float(self.X1[str(f)]) for f in self.frames])
self.Y1 = np.array([float(self.Y1[str(f)]) for f in self.frames])
self.X2 = np.array([float(self.X2[str(f)]) for f in self.frames])
self.Y2 = np.array([float(self.Y2[str(f)]) for f in self.frames])
self.assist_angle_f = np.array([float(self.assist_angle_f[str(f)]) for f in self.frames])
self.assist_angle_b = np.array([float(self.assist_angle_b[str(f)]) for f in self.frames])
# 计算帧近邻
assert self.num > 0, "Data error"
# self.frames_adj = np.zeros((self.num-1,), dtype=np.bool)
# 注意:np.where返回的是tuple
self.frames_adj = np.where(self.frames[1:] - self.frames[:-1] <= self.skip_n)[0]
# 计算帧间隔
# self.frame_interval = np.zeros((self.num-1,), dtype=np.int)
self.frame_interval = self.frames[1:] - self.frames[:-1]
# 分割刺激
self.smooth_thetas(90) # 90度以上认为是跳变
if self.has_light:
self.durings = self.sti_segment()
''' end '''
def smooth_thetas(self,threshold: np.array):
smoothed_thetas = self.Theta.copy()
for i in range(1, len(smoothed_thetas)):
diff = smoothed_thetas[i] - smoothed_thetas[i-1]
if diff > threshold: # 由负跳变为正
smoothed_thetas[i:] -= 180
elif diff < -threshold: # 由正跳变为负
smoothed_thetas[i:] += 180
thetas = smoothed_thetas - smoothed_thetas[0] # 以第一个角度为0
self.Theta = thetas
def parse_center_angle(self, file_center,file_angle,fps):
self.datakind = 'ca'
self.timestr = utils.timestr()
self.__available__ = ['X_mid','Y_mid','Theta','frames']
data1 = file_center.readlines()
data2 = file_angle.readlines()
self.X1 = []
self.Y1 = []
self.X2 = []
self.Y2 = []
self.K = []
self.D = []
""" 重置:防止污染数据 """
""" center和angle是同时记录的 """
self.X_mid = []
self.Y_mid = []
for i in data1:
frame, x, y = i.split()
"""统一以', '分割和' '分割的两种情况"""
if x.endswith(','):
x = x[:-1]
x=float(x)
y=-float(y)
frame = int(frame)
self.X_mid.append(x)
self.Y_mid.append(y)
self.frames.append(frame)
self.num1 = len(self.X_mid)
self.Theta = []
for i in data2:
frame, theta = i.split()
theta = float(theta)
self.Theta.append(theta)
self.num2 = len(self.Theta)
# assert self.num1 == self.num2, "Wrong Data"
self.nframe = self.num1
# 对帧
""" 有indice和mask两种写法,考虑到被过滤掉的数据少于保留的数据,这里采用mask的写法"""
mask = np.ones((self.num1,))
for i in range(self.num1):
if self.X_mid[i] == 0 or self.Y_mid[i] == 0:
mask[i] = 0
# 标记为非法数据,mask记录下标
self.X_mid = np.array(self.X_mid)[mask>0]
self.Y_mid = np.array(self.Y_mid)[mask>0]
self.Theta = np.array(self.Theta)[mask>0]
self.frames = np.array(self.frames)[mask>0]
self.num = len(self.frames)
assert self.num > 0, "Data error"
self.frames_adj = np.zeros((self.num-1,))
for i in range(self.num-1):
if self.frames[i+1] - self.frames[i] <= self.skip_n:
self.frames_adj[i] = 1
self.smooth_thetas(90) # 90度以上认为是跳变
if self.has_light:
self.durings = self.sti_segment()
''' end '''
# %%
if pstatus == "debug":
if __name__ == '__main__':
parser = DataParser(light=False)
parser.parse_light(open('out-light-every.txt','r'), 30)
parser.parse_feature_result(open('out-feature-1.txt','r'),open('out-feature-2.txt','r'),30)
'''import matplotlib.pyplot as plt
fig = plt.figure(figsize=(15, 14))
plt.subplot(2, 1, 1)
plt.plot(parser.frames, parser.Theta)
plt.xlabel('Time [s]')
plt.ylabel('Amplitude')
plt.title('Original Data')
# 示例用法
fs = 60 # 采样频率
lowcut = 0.01 # 低频截止频率
highcut = 3 # 高频截止频率
order = 4 # 滤波器阶数
# 应用滤波器'''
'''y = np.zeros(parser.nframe)
y[parser.frames-1] = parser.Theta
filtered_data = butter_bandpass_filter(y, lowcut, highcut, fs, order=order)
plt.subplot(2, 1, 2)
plt.plot(np.arange(1,parser.nframe+1), filtered_data)
plt.xlabel('Time [s]')
plt.ylabel('Amplitude')
plt.title('filtered')
plt.show()
'''
print('finish')