generated from Sage-Bionetworks-Challenges/model-to-data-challenge-workflow
-
Notifications
You must be signed in to change notification settings - Fork 2
/
convert_annotations.cwl
87 lines (74 loc) · 2.85 KB
/
convert_annotations.cwl
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
#!/usr/bin/env cwl-runner
#
# Convert annotated notes to annotation store annotations
#
cwlVersion: v1.0
class: CommandLineTool
baseCommand: python
hints:
DockerRequirement:
dockerPull: python:3.7
inputs:
- id: annotation_json
type: File
- id: annotator_type
type: string
arguments:
- valueFrom: convert_annotations.py
- valueFrom: $(inputs.annotation_json)
prefix: -j
- valueFrom: results.json
prefix: -r
- valueFrom: $(inputs.annotator_type)
prefix: -a
requirements:
- class: InlineJavascriptRequirement
- class: InitialWorkDirRequirement
listing:
- entryname: convert_annotations.py
entry: |
#!/usr/bin/env python
import argparse
import json
import os
parser = argparse.ArgumentParser()
parser.add_argument("-j", "--annotation_json", required=True, help="Annotation json file")
parser.add_argument("-r", "--results", required=True, help="Results file")
parser.add_argument("-a", "--annotator_type", required=True, help="Annotator type")
args = parser.parse_args()
with open(args.annotation_json, "r") as annote_f:
annotations = json.load(annote_f)
if args.annotator_type == "nlpsandbox:date-annotator":
annotation_key = "date_annotations"
post_path = "textDateAnnotations"
elif args.annotator_type == "nlpsandbox:person-name-annotator":
annotation_key = "person_name_annotations"
post_path = "textPersonNameAnnotations"
elif args.annotator_type == "nlpsandbox:location-annotator":
annotation_key = "physical_location_annotations"
post_path = "textLocationAnnotations"
elif args.annotator_type == "nlpsandbox:id-annotator":
annotation_key = "id_annotations"
post_path = "textIdAnnotations"
elif args.annotator_type == "nlpsandbox:contact-annotator":
annotation_key = "contact_annotations"
post_path = "textContactAnnotations"
elif args.annotator_type == "nlpsandbox:covid-symptom-annotator":
annotation_key = "covid_symptom_annotations"
post_path = "textCovidSymptomAnnotations"
all_annotations = []
for annotation in annotations:
# print(annotation)
noteid = annotation['annotationSource']['resourceSource']['name']
for annots in annotation[post_path]:
annots['noteId'] = os.path.basename(noteid)
all_annotations.append(annots)
new_annotations = {annotation_key: all_annotations}
# print(new_annotations)
with open(args.results, "w") as results_f:
json.dump(new_annotations, results_f)
outputs:
- id: results
type: File
outputBinding:
glob: results.json