-
Notifications
You must be signed in to change notification settings - Fork 0
/
dictionary_convert_by_EricEarl.py
83 lines (66 loc) · 2.72 KB
/
dictionary_convert_by_EricEarl.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
# imports
import json
from pathlib import Path
import pandas
# TODO: remove hardcoding
# file path handling
INPUT = Path('pine_group/CogQuad Legends - EE update.xlsx')
OUTPUT_DIR = Path('pine_group')
# TODO: read all the sheets but only parse sheetnames with "Legend/legend" in them
all = pandas.read_excel(INPUT, sheet_name=None)
for sheet in all:
sheetname = str(sheet)
df = all[sheetname]
OUTPUT = OUTPUT_DIR.joinpath(sheetname.rstrip(" Legend").replace(" ", "_") + '.json')
# open the output file for writing
with open(OUTPUT, 'w') as f:
# TODO: put this in dictionary indexing notation
# start with a single entry dictionary
d = {
"participant_id": {
"LongName": "Participant Identifier",
"Description": "Unique BIDS identifier for the participant in this study."
}
}
current = '' # intialized for every new sheet
for i, row in df.iterrows():
# detecting if on first line of data dictionary/legend
if current == '':
# start
ShortName = row['QUESTION_ALIAS']
LongName = row['QUESTION_NAME']
current = ShortName
if not pandas.isna(row['QUESTION_TEXT']):
Description = row['QUESTION_TEXT']
else:
Description = None
Levels = None
elif not pandas.isna(row['QUESTION_TEXT']):
# write
d[ShortName] = {}
d[ShortName]['LongName'] = LongName
if Description:
d[ShortName]['Description'] = Description
if Levels:
d[ShortName]['Levels'] = Levels
# reset
ShortName = row['QUESTION_ALIAS']
LongName = row['QUESTION_NAME']
if not pandas.isna(row['QUESTION_TEXT']):
Description = row['QUESTION_TEXT']
Levels = None
if not Levels and not pandas.isna(row['CODEVALUE']):
Levels = {str(row['CODEVALUE']): str(row['DISPLAY'])}
elif Levels and not pandas.isna(row['CODEVALUE']):
Levels[str(row['CODEVALUE'])] = str(row['DISPLAY'])
# detecting if on last line of data dictionary/legend
if i == df.shape[0] - 1:
# write
d[ShortName] = {}
d[ShortName]['LongName'] = LongName
if Description:
d[ShortName]['Description'] = Description
if Levels:
d[ShortName]['Levels'] = Levels
with open(OUTPUT, 'w') as f:
f.write(json.dumps(d, indent=4))