From 1241a66105fc4ea1a81887120a9c0a685407656d Mon Sep 17 00:00:00 2001 From: leoalenc Date: Sat, 25 Dec 2021 11:21:55 -0300 Subject: [PATCH] bug fixes etc #39 --- etc/valence_script/WriteVerbEntries.py | 62 ++++++++++++++++++++------ 1 file changed, 48 insertions(+), 14 deletions(-) diff --git a/etc/valence_script/WriteVerbEntries.py b/etc/valence_script/WriteVerbEntries.py index 3e72b6d..aa14bf3 100644 --- a/etc/valence_script/WriteVerbEntries.py +++ b/etc/valence_script/WriteVerbEntries.py @@ -4,24 +4,28 @@ import sys, os, re, pickle import PorGramEntries, ValenceExtractor +from valences import * USER=os.path.expanduser("~") DIR1=os.path.join(USER, "hpsg/por") DIR2=os.path.join(USER, "hpsg/por/tmp") -MAPPING=dict([re.split(r"\s+",line.strip()) for line in open(os.path.join(USER,DIR1,"mapping.txt"),"r").readlines() if line.strip() != ""]) +def build_mapping(filename="mapping02.txt"): + return dict([re.split(r"\s+",line.strip()) for line in open(os.path.join(USER,DIR1,filename),"r").readlines() if line.strip() != ""]) + +MAPPING=build_mapping() INFILE=os.path.join(USER, DIR2,"verbtypes142256.txt") LEXICON=PorGramEntries.MakeDictionary(PorGramEntries.ExtractEntries(INFILE)) -def save_lexicon(lexicon,filename): - outfile = open(filename,'wb') +def save_lexicon(lexicon,filename,path_to_dir=DIR1): + outfile = open(os.path.join(path_to_dir,filename),'wb') pickle.dump(lexicon,outfile) outfile.close() -def load_lexicon(lexicon,filename): - outfile = open(filename,'rb') - lexicon=pickle.load(lexicon,outfile) +def load_lexicon(filename,path_to_dir=DIR1): + outfile = open(os.path.join(path_to_dir,filename),'rb') + lexicon=pickle.load(outfile) outfile.close() return lexicon @@ -45,27 +49,48 @@ def build_entry(lemma,verb_type,index): def expand_lexicon(verb,verb_type,new_lexicon): if not new_lexicon.get(verb.lemma): - new_lexicon[verb.lemma]=[verb_type] + new_lexicon[verb.lemma]={verb_type} else: - new_lexicon[verb.lemma].append(verb_type) + new_lexicon[verb.lemma].add(verb_type) -def build_lexicon(infile="/home/leonel/hpsg/por/tmp/verbtypes142256.txt"): - frames=list(MAPPING.keys()) +def build_lexicon(mapping=MAPPING,dative=True): + frames=list(mapping.keys()) new_lexicon={} for frame in frames: - framelist=ValenceExtractor.expand_valence(frame,False) + framelist=ValenceExtractor.expand_valence(frame,dative) verbs=ValenceExtractor.extract_verbs(framelist) for verb in verbs: #entry=build_entry(verb,MAPPING[frame]) #if entry: #print(entry) - expand_lexicon(verb,MAPPING[frame],new_lexicon) + expand_lexicon(verb,mapping[frame],new_lexicon) return new_lexicon #outfile=open(f"/home/leonel/hpsg/por/bosque-entries.tdl","w") #for num,lemma,types in sortedentries: # print(num,lemma," ".join(types),file=outfile) #outfile.close() - + +def join_lexicons(*lexicons): + old_lexicon=lexicons[0] + new_lexicons=lexicons[1:] + updated_lexicon={} + updated_lexicon.update(old_lexicon) + #return new_lexicons + for lexicon in new_lexicons: + for k,v in lexicon.items(): + if updated_lexicon.get(k): + updated_lexicon[k].update(v) + else: + updated_lexicon[k]=v + return updated_lexicon + +def collapse_dat_goa(lexicon): + dat_goa={'nom-acc-dat-ditransitive-verb-lex','nom-acc-goa-ditransitive-verb-lex'} + for lemma,verbtypes in lexicon.items(): + if verbtypes.issuperset(dat_goa): + verbtypes.difference_update(dat_goa) + verbtypes.add('nom-acc-rec-ditransitive-verb-lex') + def write_lexicon(new_lexicon,path_to_dir=DIR1, outfile="bosque-entries.tdl",old_lexicon=LEXICON): outfile=os.path.join(path_to_dir,outfile) outfile=open(outfile,"w") @@ -85,7 +110,16 @@ def write_lexicon(new_lexicon,path_to_dir=DIR1, outfile="bosque-entries.tdl",old print(build_entry(lemma,verbtype,index),file=outfile) index+=1 outfile.close() - + +def write_lexicons(): + filenames=['prep-obj-2ng-arg-verbs','clausal-verbs', 'intrans-trans', 'ditrans'] + for filename in filenames: + infile=f"{filename}.txt" + outfile=f"{filename}.pkl" + mapping=build_mapping(infile) + new_lexicon=build_lexicon(mapping) + save_lexicon(new_lexicon,outfile) + if __name__ == "__main__": if len(sys.argv) == 2: main(sys.argv[1])