-
Notifications
You must be signed in to change notification settings - Fork 0
/
export.py
executable file
·242 lines (183 loc) · 9.49 KB
/
export.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
#!/usr/bin/python
print "Starting OLS-SQL Export process"
import json
import urllib2
import sys
import MySQLdb
import ConfigParser
if len(sys.argv)!=1:
print "Please define all properties in the config file"
else:
print "Connecting to SQL Database"
configParser = ConfigParser.RawConfigParser()
configFilePath = r'config.txt'
configParser.read(configFilePath)
host = configParser.get('SQL config', 'path')
username = configParser.get('SQL config', 'username')
password = configParser.get('SQL config', 'password')
database = configParser.get('SQL config', 'database')
# Open database connection
db = MySQLdb.connect(host, username, password, database)
# prepare a cursor object using cursor() method
cursor = db.cursor()
# execute SQL query using execute() method.
cursor.execute("SELECT VERSION()")
# Fetch a single row using fetchone() method.
data = cursor.fetchone()
print "Database version : %s " % data
URL=configParser.get('Service URL', 'url')
#URL=sys.argv[1]
relationshiplist=[]
try:
data=json.load(urllib2.urlopen(URL))
pagesize=data["page"]["totalElements"]
tmpcounter=0;
tmprelationshipcounter=0;
URL=URL+"?size=%i" % pagesize
print "Pagesize and URL"
print pagesize
print URL
try:
data=json.load(urllib2.urlopen(URL))
print "\n Webservice called worked, print data \n\n"
termlist=data["_embedded"]["terms"]
for counter in termlist:
tmpcounter+=1
print "\n######################### This is run number %s #########################" % tmpcounter
print "---Ugly SQL Table Meta-------"
#meta_id INT UNSIGNED NOT NULL AUTO_INCREMENT,
#meta_key VARCHAR(64) NOT NULL,
#meta_value VARCHAR(128),
#species_id INT UNSIGNED DEFAULT NULL,
print "---Ugly SQL Table ontology-------"
#ontology_id INT UNSIGNED NOT NULL AUTO_INCREMENT,
#name VARCHAR(64) NOT NULL,
#namespace VARCHAR(64) NOT NULL,
print "Ontology name? "+counter["ontology_name"]
print "What is namespace supposed to be?"
cursor = db.cursor()
# Prepare SQL query to INSERT a record into the database.
sql = "INSERT INTO ontology(name, namespace) VALUES ('%s', '%s')" % (counter["ontology_name"], "namespace")
try:
# Execute the SQL command
cursor.execute(sql)
except:
db.rollback()
print "---Ugly SQL Table Term-------"
#term_id INT UNSIGNED NOT NULL AUTO_INCREMENT,
#ontology_id INT UNSIGNED NOT NULL,
#subsets TEXT,
#accession VARCHAR(64) NOT NULL,
#name VARCHAR(255) NOT NULL,
#definition TEXT,
#is_root INT NOT NULL DEFAULT 0,
#is_obsolete INT NOT NULL DEFAULT 0,
print "(accession?) "+counter["iri"]
print "(name?) "+counter["label"]
print "(definition?) "
print counter["description"]
print "Is Obsolete? %s " %(counter["is_obsolete"])
print "Is root? %s" % (counter["is_root"])
print "---Ugly SQL Table Synonyms-------"
# synonym_id INT UNSIGNED NOT NULL AUTO_INCREMENT,
# term_id INT UNSIGNED NOT NULL,
# name TEXT NOT NULL,
# type ENUM('EXACT', 'BROAD', 'NARROW', 'RELATED'),
if (type(counter["synonyms"])!=type(None)):
print "synonyms name %s "% counter["synonyms"]
else:
print "synonyms: NONE present"
cursor = db.cursor()
# Prepare SQL query to INSERT a record into the database.
sql = "SELECT ontology_id FROM ontology WHERE name = '%s'" % (counter["ontology_name"])
print sql
try:
# Execute the SQL command
cursor.execute(sql)
# Fetch all the rows in a list of lists.
results = cursor.fetchall()
#print results
for row in results:
ontology_id = row[0]
print ontology_id
if counter["is_root"]=='false':
tmproot=1
else:
tmproot=0
if counter["is_obsolete"]=='false':
tmpobsolete=1
else:
tmpobsolete=0
sql = "INSERT INTO term(ontology_id, subsets, accession, name, definition, is_root, is_obsolete) VALUES ('%s', '%s', '%s', '%s', \"%s\", '%s', '%s')" %\
(ontology_id, "subset", counter["iri"], counter["label"], counter["description"], tmproot, tmpobsolete)
# Execute the SQL command
print sql
cursor.execute(sql)
except:
db.rollback()
print "---Ugly SQL Table Subset-------"
#CREATE TABLE subset (
#subset_id INT UNSIGNED NOT NULL AUTO_INCREMENT,
#name VARCHAR(64) NOT NULL,
#definition VARCHAR(128) NOT NULL,
print "---Ugly SQL Table alt id-------"
#alt_id INT UNSIGNED NOT NULL AUTO_INCREMENT,
#term_id INT UNSIGNED NOT NULL,
#accession VARCHAR(64) NOT NULL,
print "---Ugly SQL relationship_type-------"
#relation_type_id INT UNSIGNED NOT NULL AUTO_INCREMENT,
#name VARCHAR(64) NOT NULL,
print "---Ugly SQL relationship-------"
# relation_id INT UNSIGNED NOT NULL AUTO_INCREMENT,
# child_term_id INT UNSIGNED NOT NULL,
# parent_term_id INT UNSIGNED NOT NULL,
# relation_type_id INT UNSIGNED NOT NULL,
# intersection_of TINYINT UNSIGNED NOT NULL DEFAULT 0,
# ontology_id INT UNSIGNED NOT NULL,
try:
graphdata=json.load(urllib2.urlopen(counter["_links"]["graph"]["href"]))
#print "\n Nodes:"
#print graphdata["nodes"]
#print "\n Edges:"
#print graphdata["edges"]
print "\nSecond Webservice Call - graph - to find relationships"
for edge in graphdata["edges"]:
tmprelationshipcounter+=1;
print "\n"
print " PartentTerm(?Source) "+edge["source"]
print " ChildTerm(?Target) "+edge["target"]
print " RelationShipType "+edge["label"]
print " intersection_of - No idea"
print " ontology_id "+counter["ontology_name"]
relationshiplist.append(edge["label"])
cursor = db.cursor()
# Prepare SQL query to INSERT a record into the database.
sql = "INSERT INTO relation(child_term_id, parent_term_id, relation_type_id, intersection_of) VALUES ('%s', '%s')" % (counter["ontology_name"], "namespace")
# try:
# # Execute the SQL command
# cursor.execute(sql)
# except:
# db.rollback()
except:
print "Error within second webservice call"
raise
print "\n---Ugly SQL closure-------"
#closure_id INT UNSIGNED NOT NULL AUTO_INCREMENT,
#child_term_id INT UNSIGNED NOT NULL,
#parent_term_id INT UNSIGNED NOT NULL,
#subparent_term_id INT UNSIGNED,
#distance TINYINT UNSIGNED NOT NULL,
#ontology_id INT UNSIGNED NOT NULL,
except:
print "Error in a webservice call, that might be ok, we reached the end of the page. %s" %URL
raise
except:
print "Some error occured - maybe the webservice is down?! I tried to fetch data from %s" %URL
raise
print "\nWork is done!\nNumber of terms processed %s, Number of edges (relationships) processed %s " %(tmpcounter, tmprelationshipcounter)
#print "show me relationshiplist"
#print relationshiplist
# Commit your changes in the database
db.commit()
# disconnect from SQL server
db.close()