-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathadd_translator.py
116 lines (106 loc) · 4.57 KB
/
add_translator.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
# This script adds the connection between translator
# and text, which is kept in table contribution.
# Translators have already been added to table
# contributor, and texts to table publication.
#
# Translators for each text have been registered
# in an Excel-file. The file was simplified
# and then saved as CSV to serve as input for this
# script.
#
# Sample input (CSV) at end of file.
import psycopg2
conn_db = psycopg2.connect(
host="",
database="",
user="",
port="",
password=""
)
cursor = conn_db.cursor()
CSV_IN = "csv/translators_and_texts.csv"
CONTRIBUTION_TYPE = "translation"
# create a list from the original csv file
# replace empty values with None
def create_list_from_csv(filename):
with open(filename, "r", encoding="utf-8-sig") as source_file:
list = []
for line in source_file:
row = line.rstrip()
elements = row.split(";")
for i in range(0,len(elements)):
if elements[i] == "":
elements[i] = None
list.append(elements)
return list
# sort info and populate table contribution
def add_contribution(translation):
collection_id = translation[0]
publication_id = translation[1]
text_language = translation[2]
original_language = translation[3]
translator = translation[4]
# if no translator has been registered, there's nothing to add
# if this text's language is the same as the publication's
# original language, this isn't a translation and someone
# has made a mistake by registering a translator for it
if translator is not None and text_language != original_language:
# there might be several translators to a single text
# registered as Surname_a, Forename_a + Surname_b, Forename_b
# split the translators and their names and insert
# each connection separately
if "+" in translator:
translators = translator.split(" + ")
for translator in translators:
translator = translator.split(", ")
last_name = translator[0]
first_name = translator[1]
contributor_id = fetch_contributor(last_name, first_name)
if contributor_id is None:
print(first_name + " " + last_name + " is not in table contributor!")
else:
create_contribution(collection_id, publication_id, contributor_id, CONTRIBUTION_TYPE, text_language)
else:
# split surname and forename
translator = translator.split(", ")
last_name = translator[0]
first_name = translator[1]
contributor_id = fetch_contributor(last_name, first_name)
if contributor_id is None:
print(first_name + " " + last_name + " is not in table contributor!")
else:
create_contribution(collection_id, publication_id, contributor_id, CONTRIBUTION_TYPE, text_language)
# fetch id of the contributor from table contributor
def fetch_contributor(last_name, first_name):
fetch_query = """SELECT id FROM contributor WHERE last_name = %s AND first_name = %s"""
values_to_insert = (last_name, first_name)
cursor.execute(fetch_query, values_to_insert)
translator_id = cursor.fetchone()
return translator_id
# populate table contribution with connections
# between texts and translators
def create_contribution(collection_id, publication_id, contributor_id, CONTRIBUTION_TYPE, text_language):
# check whether this translator/text/language combo
# already is in the db, i.e. has been added earlier
fetch_query = """SELECT id FROM contribution WHERE publication_id = %s AND contributor_id = %s AND text_language = %s"""
values_to_insert = (publication_id, contributor_id, text_language)
cursor.execute(fetch_query, values_to_insert)
already_exists = cursor.fetchone()
# only add connection that isn't in db
if already_exists is None:
insert_query = """INSERT INTO contribution(publication_collection_id, publication_id, contributor_id, type, text_language) VALUES(%s, %s, %s, %s, %s)"""
values_to_insert = (collection_id, publication_id, contributor_id, CONTRIBUTION_TYPE, text_language)
cursor.execute(insert_query, values_to_insert)
def main():
translated_publications = create_list_from_csv(CSV_IN)
for translation in translated_publications:
add_contribution(translation)
conn_db.commit()
print("Translations added to table contribution.")
conn_db.close()
cursor.close()
main()
'''
sample input:
1;106;fi;fr;Surname, Forename
'''