-
Notifications
You must be signed in to change notification settings - Fork 14
/
dict-demo.py
68 lines (47 loc) · 2.12 KB
/
dict-demo.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
# coding: utf-8
import sys
##################################################################
#Example usage:
#python dict-demo.py test-text-files/dict-test-ar-input.txt test-text-files/dict-out.txt ar
#python dict-demo.py test-text-files/dict-test-en-input.txt test-text-files/dict-out.txt en
def usage():
print ('Usage: ', sys.argv[0], '<inputfile> <outputfile> <source language>')
print ('python dict-demo.py test-text-files/dict-test-ar-input.txt test-text-files/dict-out.txt ar')
print ('python dict-demo.py test-text-files/dict-test-en-input.txt test-text-files/dict-out.txt en')
##################################################################
if len(sys.argv) < 4:
usage()
sys.exit(2)
'''
Demo of Arabic-English dictionary translation using Open Multilingual WordNet (OMW)
Dictionaries are obtained from Open Multilingual WordNet website: http://compling.hss.ntu.edu.sg/omw/
# To cite these dictionaries:
# Francis Bond and Kyonghee Paik (2012), A survey of wordnets and their licenses In Proceedings of the 6th Global WordNet Conference (GWC 2012). Matsue. 64–71.
# Francis Bond and Ryan Foster (2013), Linking and extending an open multilingual wordnet. In 51st Annual Meeting of the Association for Computational Linguistics: ACL-2013. Sofia. 1352–1362.
'''
import importlib
tp = importlib.load_source('textpro', 'textpro.py')
def main(argv):
inputfile = sys.argv[1]
outputfile = sys.argv[2]
source_language = sys.argv[3].strip()
print ('source language:', source_language)
text = open(inputfile).read().decode('utf-8')
word_list = tp.process_text(text)
result = []
if source_language == 'ar':
for word in word_list:
translations = []
translations = tp.translate_ar2en(word)
for t in translations: result.append(t.strip())
if source_language == 'en':
for word in word_list:
translations = tp.translate_en2ar(word)
for t in translations: result.append(t.strip())
output_text = ' '.join(result)
output = open(outputfile, 'w')
print>>output, output_text.encode('utf-8')
output.close()
##################################################################
if __name__ == "__main__":
main(sys.argv)