Skip to content

Commit

Permalink
bug fixes etc #39
Browse files Browse the repository at this point in the history
  • Loading branch information
leoalenc committed Dec 25, 2021
1 parent 7dbaae6 commit 1241a66
Showing 1 changed file with 48 additions and 14 deletions.
62 changes: 48 additions & 14 deletions etc/valence_script/WriteVerbEntries.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,24 +4,28 @@

import sys, os, re, pickle
import PorGramEntries, ValenceExtractor
from valences import *
USER=os.path.expanduser("~")
DIR1=os.path.join(USER, "hpsg/por")
DIR2=os.path.join(USER, "hpsg/por/tmp")

MAPPING=dict([re.split(r"\s+",line.strip()) for line in open(os.path.join(USER,DIR1,"mapping.txt"),"r").readlines() if line.strip() != ""])
def build_mapping(filename="mapping02.txt"):
return dict([re.split(r"\s+",line.strip()) for line in open(os.path.join(USER,DIR1,filename),"r").readlines() if line.strip() != ""])

MAPPING=build_mapping()

INFILE=os.path.join(USER, DIR2,"verbtypes142256.txt")

LEXICON=PorGramEntries.MakeDictionary(PorGramEntries.ExtractEntries(INFILE))

def save_lexicon(lexicon,filename):
outfile = open(filename,'wb')
def save_lexicon(lexicon,filename,path_to_dir=DIR1):
outfile = open(os.path.join(path_to_dir,filename),'wb')
pickle.dump(lexicon,outfile)
outfile.close()

def load_lexicon(lexicon,filename):
outfile = open(filename,'rb')
lexicon=pickle.load(lexicon,outfile)
def load_lexicon(filename,path_to_dir=DIR1):
outfile = open(os.path.join(path_to_dir,filename),'rb')
lexicon=pickle.load(outfile)
outfile.close()
return lexicon

Expand All @@ -45,27 +49,48 @@ def build_entry(lemma,verb_type,index):

def expand_lexicon(verb,verb_type,new_lexicon):
if not new_lexicon.get(verb.lemma):
new_lexicon[verb.lemma]=[verb_type]
new_lexicon[verb.lemma]={verb_type}
else:
new_lexicon[verb.lemma].append(verb_type)
new_lexicon[verb.lemma].add(verb_type)

def build_lexicon(infile="/home/leonel/hpsg/por/tmp/verbtypes142256.txt"):
frames=list(MAPPING.keys())
def build_lexicon(mapping=MAPPING,dative=True):
frames=list(mapping.keys())
new_lexicon={}
for frame in frames:
framelist=ValenceExtractor.expand_valence(frame,False)
framelist=ValenceExtractor.expand_valence(frame,dative)
verbs=ValenceExtractor.extract_verbs(framelist)
for verb in verbs:
#entry=build_entry(verb,MAPPING[frame])
#if entry:
#print(entry)
expand_lexicon(verb,MAPPING[frame],new_lexicon)
expand_lexicon(verb,mapping[frame],new_lexicon)
return new_lexicon
#outfile=open(f"/home/leonel/hpsg/por/bosque-entries.tdl","w")
#for num,lemma,types in sortedentries:
# print(num,lemma," ".join(types),file=outfile)
#outfile.close()


def join_lexicons(*lexicons):
old_lexicon=lexicons[0]
new_lexicons=lexicons[1:]
updated_lexicon={}
updated_lexicon.update(old_lexicon)
#return new_lexicons
for lexicon in new_lexicons:
for k,v in lexicon.items():
if updated_lexicon.get(k):
updated_lexicon[k].update(v)
else:
updated_lexicon[k]=v
return updated_lexicon

def collapse_dat_goa(lexicon):
dat_goa={'nom-acc-dat-ditransitive-verb-lex','nom-acc-goa-ditransitive-verb-lex'}
for lemma,verbtypes in lexicon.items():
if verbtypes.issuperset(dat_goa):
verbtypes.difference_update(dat_goa)
verbtypes.add('nom-acc-rec-ditransitive-verb-lex')

def write_lexicon(new_lexicon,path_to_dir=DIR1, outfile="bosque-entries.tdl",old_lexicon=LEXICON):
outfile=os.path.join(path_to_dir,outfile)
outfile=open(outfile,"w")
Expand All @@ -85,7 +110,16 @@ def write_lexicon(new_lexicon,path_to_dir=DIR1, outfile="bosque-entries.tdl",old
print(build_entry(lemma,verbtype,index),file=outfile)
index+=1
outfile.close()


def write_lexicons():
filenames=['prep-obj-2ng-arg-verbs','clausal-verbs', 'intrans-trans', 'ditrans']
for filename in filenames:
infile=f"{filename}.txt"
outfile=f"{filename}.pkl"
mapping=build_mapping(infile)
new_lexicon=build_lexicon(mapping)
save_lexicon(new_lexicon,outfile)

if __name__ == "__main__":
if len(sys.argv) == 2:
main(sys.argv[1])
Expand Down

0 comments on commit 1241a66

Please sign in to comment.