Skip to content

Commit

Permalink
Attempting weighting with kpv
Browse files Browse the repository at this point in the history
SUBSTITUTE. Does not work as in myv.
  • Loading branch information
rueter committed May 2, 2024
1 parent 3f89eaf commit 22e6d9d
Showing 1 changed file with 12 additions and 0 deletions.
12 changes: 12 additions & 0 deletions src/cg3/disambiguator.cg3
Original file line number Diff line number Diff line change
Expand Up @@ -198,6 +198,7 @@ LIST Der/ProprietiveMod = Der/ProprietiveMod ;

LIST CASE = Nom Gen Abl Dat Com Cns Acc Ins Ine Ill Ela Car Egr Tra Apr Ter Abe Prl ;
SET OBLCASE = CASE - Nom ;
SET NONCORECASE = CASE - Nom - Acc ;
LIST PrsPtc = PrsPtc ;
LIST PrsPrc = PrsPrc ;
LIST PastPtc = PastPtc ;
Expand Down Expand Up @@ -237,7 +238,10 @@ LIST NOT-PRL = "Конӧд" "Серӧд" "анӧд" "бауӧд" "вӧтӧд" "

LIST KPVACRO = "ААНИИ" "АвтоВАЗ" "АККОР" "АН" "АМР" "АО" "АПК" "АПП" "АсПУр" "АСРОС" "АССР" "АТК" "АТО" "АТОС" "АТП" "АТС" "АУ" "АФУН" "БАМ" "БАО" "БЕАР" "БКП" "БМВ" "БМП" "БНР" "БРИЗ" "БРИКС" "БССР" "БТИ" "БТР" "ВАД" "ВАДА" "ВАЗ" "ВАИ" "ВАК" "ВАСХНИЛ" "ВАСХНИИЛ" "ВВК" "ВВС" "ВГИК" "ВГТРК" "ВДВ" "ВДК" "ВДНХ" "ВДПО" "ВЗУ" "ВИА" "ВКСШ" "ВЛКСМ" "ВМП" "ВМС" "ВМФ" "ВНИГРИ" "ВНИИ" "ВОБ" "ВОВ" "ВОЗ" "ВОИ" "ВОИР" "ВООПИК" "ВОХР" "ВСНХ" "ВСУ" "ВСХВ" "ВТБ" "ВТО" "ВЦИОМ" "ВЦИОМС" "ВУЗ" "ВЦИК" "ВЦСПС" "ВЧК" "ГАИ" "ГАУ" "ГБУ" "ГДР" "ГИБДД" "ГИЗ" "ГИМС" "ГИТИС" "ГИХЛ" "ГКЧП" "ГлавПУР" "ГОМ" "ГПС" "ГПТУ" "ГПУ" "ГРП" "ГРПП" "ГРУ" "ГРЭС" "ГСВГ" "ГСМ" "ГСО" "ГТО" "ГТРК" "ГУ" "ГУЛАГ" "ГУП" "ГУФСИН" "ГЭС" "ДВП" "ДЗОТ" "ДИНА" "ДИП" "ДКБ" "ДНД" "ДНР" "ДНТ" "ДОСААФ" "ДПД" "ДПМК" "ДПП" "ДПС" "ДРБ" "ДРВ" "ДРСУ" "ДСК" "ДСО" "ДСУ" "ДТ" "ДХС" "ДЭУ" "ДЮКФП" "ДЮСШ" "ДЮФКП" "ДЮШС" "ЕС" "ЕГЭ" "ЕТС" "ЖКО" "ЖКХ" "ЖЭС" "ЖЭУ" "ЗАГС" "ЗАО" "ЗЖБИ" "ЗИЛ" "ИВС" "ИГИЛ" "ИГИЛС" "ИК" "ИКП" "ИМЦП" "ИНО" "ИПК" "ИПККНО" "ИТК" "ИТР" "ИУУ" "ИЯЛИ" "КамАЗ" "КАМАЗ" "КАПП" "КАССР" "КБО" "КВЖД" "КВН" "КВЧ" "КГБ" "КГПИ" "КИМ" "КИП" "КИППТ" "КИПТ" "КИР" "ККОВ" "КМЕТ" "КНВ" "КНГ" "КНДР" "КНР" "КНЦ" "КНШ" "КП" "КПГ" "КПДН" "КПЗ" "КПП" "КПРФ" "КПСС" "КПТ" "КПУ" "КР" "КРАГСиУ" "КРАПТ" "КРБ" "КРИРО" "КРИРОиПК" "КРО" "КРОО" "КРТК" "КРЦДОД" "КТП" "КФХ" "КЭБ" "ЛГУ" "ЛДК" "ЛДПР" "ЛЗУ" "ЛНР" "ЛПК" "ЛПО" "ЛПХ" "ЛСГ" "ЛТО" "ЛУАД" "ЛУКОЙЛ" "ЛФК" "МА" "МАГАТЭ" "МАЗ" "МАФУН" "МВД" "МВФ" "МГБ" "МГУ" "МЖКХ" "МИД" "МКС" "ММС" "МОД" "МОК" "МОУ" "МП" "МПС" "МР" "МРСК" "МСНК" "МТС" "МТФ" "МУК" "МУП" "МФЦ" "МХАТ" "МЧС" "МЭИ" "НА" "НАКЮ" "НАО" "НАСА" "НАТО" "НВХО" "НГДУ" "НИИ" "НИИУ" "НИЛ" "НИПТИ" "НКВД" "НКГБ" "НКСД" "НКФ" "НЛО" "НПЗ" "НРБ" "НСШ" "НТВ" "НТР" "НХЛ" "ОАО" "ОАР" "ОАЭ" "ОблОНО" "ОбОНО" "ОБСЕ" "ОБЭП" "ОБХСС" "ОВД" "ОВС" "ОГИПТАД" "ОГПН" "ОГПУ" "ОДВФ" "ОИКК" "ОК" "ОКК-РКИ" "ОЛП" "ОМВД" "ОМОН" "ОМСН" "ОНО" "ОНФ" "ООН" "ООО" "ОРС" "ОСВОД" "ОТК" "ОТФ" "ОУН" "ОУН-УПА" "ОУПД" "ПАК" "ПАО" "ПАТП" "ПВО" "ПДД" "ПечорНИУИ" "ПЛ" "ПМК" "ПО" "ППС" "ПСК" "ПТО" "ПТУ" "ПУ" "ПУРП" "ПЭВМ" "ПЭС" "РАЕН" "РАМН" "РАН" "РАПО" "РАПП" "РАФУЖ" "РВА" "РГД" "РГПУ" "РГУ" "РДК" "РДЮСШ" "РЖД" "РИА" "РИК" "РИПКРО" "РК" "РКИ" "РККА" "РКП" "РКСМ" "РКЦ" "РМЗ" "РММ" "РМЦ" "РНА" "РО" "РОА" "РОВД" "РОНО" "РПД" "РПП" "РПЦ" "РРМЦ" "РСДПР" "РСДРП" "РСТ" "РСУ" "РСФРС" "РСФР" "РСФСР" "РТ" "РТПЦ" "РТС" "РУДН" "РУНО" "РФ" "РЭБ" "РЭС" "РЭУ" "СБЕР" "СБУ" "СГУ" "СД" "СЕПГ" "СЗФО" "СИЗО" "СКА" "СКУ" "СЛВЗ" "СЛДК" "СЛИ" "СЛПК" "СМЕРШ" "СМЗ" "СМИ" "СМУ" "СНГ" "СНК" "СОБР" "СОФИН" "СОЦ" "СП" "СПб" "СПбГУ" "СПбГУСЭ" "СПК" "СПОГАТ" "СПТУ" "СС" "ССО" "ССП" "ССР" "СССР" "СТО" "СТФ" "СТЭК" "СУ" "СУАЛ" "СУС" "СЦБТ" "США" "СыктГУ" "СЭВ" "ТАП" "ТАСС" "ТАССР" "ТГУ" "ТИК" "ТКЗХ" "ТКУ" "ТНТ" "ТОЗ" "ТОО" "ТОС" "ТПК" "ТПП" "ТСЖ" "ТУ" "ТЭФИ" "ТЭЦ" "УАЗ" "УАССР" "УБОП" "УВД" "УВД-ОВД" "УГБ" "УГИБДД" "УГТУ" "УдГУ" "УЖД" "УЗИ" "УИИ" "УИК" "УИН" "УКОМ" "УКП" "УКС" "УМВД" "УНА-УПА" "УНКВД" "УОНО" "УПА" "УПН" "УР" "УРБ" "УРС" "УСЕВЛОН" "УССР" "УФМС" "УФПС" "УФСБ" "УФСИН" "УФССП" "УЧК" "ФАП" "ФГУ" "ФГУП" "ФЗД" "ФЗО" "ФЗУ" "ФИФА" "ФЛГР" "ФНС" "ФОМС" "ФРГ" "ФСБ" "ФСКН" "ФССП" "ЦБ" "ЦБТИ" "ЦВР" "ЦГА" "ЦДТ" "ЦЗН" "ЦИК" "ЦК" "ЦНИИМЭ" "ЦРБ" "ЦРМ" "ЦРУ" "ЦСКА" "ЦСУ" "ЦУМ" "ЧК" "ЧП" "ШКМ" "ШСУ" "ХАД" "ЧАЭС" "ЧГТРК" "ЧОН" "ЧОС" "ЧССР" "ШВК" "ШИК" "ЭВМ" "ЭССР" "ЮАР" "ЮПС" "ЮЭС" "ЮКОС" "ЮНЕСКО" ; # acronyms from Komi-language sources

LIST SYV = ("сыв" N Sg) ;

SET NOTACRO = ACR - KPVACRO ;
SET SYVNONCORE = SYV - Nom - Acc ;# for work with сійӧ
# Complementary sets

SET Non-Pcle = WORD - Pcle ;
Expand Down Expand Up @@ -271,6 +275,10 @@ SUBSTITUTE (IV) (*) TARGET (*) ;
SUBSTITUTE (TV) (*) TARGET (*) ;

SUBSTITUTE (<W:0.0>) (<W:1.0>) TARGET NOTACRO ;
#SUBSTITUTE (<W:0.0>) (<W:1.0>) TARGET (SYV NONCORECASE);# does not work
#SUBSTITUTE (<W:0.0>) (<W:1.0>) TARGET ("сыв" N Sg Ela <W:0.0>) ;# (0 ("сійӧ") LINK 0 ("сыв"));# does not work
SUBSTITUTE:WeightSyvWhenSg1 (<W:0.0>) (<W:1.0>) TARGET ("сыв" N Sg Ela);
# сылысь

#
SUBSTITUTE:negv_to_aux (V Neg) (Aux Neg) TARGET (V Neg) IF (1 (ConNeg)) ;
Expand Down Expand Up @@ -351,6 +359,9 @@ SELECT:PoIfCmpl-1 Po IF (-1 Cmpl) ;
REMOVE Relat IF (-1 Cmpl);
# Та йылысь юӧртліс «Коми му»

REMOVE:NoAWhenRelat A IF (-1 (Nom)) (0 (Relat Sg Ine) OR (Relat Sg Ill) OR (Po Sg Ine) OR (Po Sg Ill));
# син водзас воссис

# Remove Cmpl reading in personal pron if not followed by Po
# The Cmpl is used with personal pronouns with a form that takes Po
# ме тэ сы ми ті на
Expand All @@ -359,6 +370,7 @@ REMOVE Relat IF (-1 Cmpl);
#REMOVE:prsPrnCmpl Cmpl IF (0 Nom)(1 NotPO) ;
REMOVE:prsPrnCmpl Cmpl IF (1 NotPO) ;


# Numerals

SELECT:CardNomWhenNextNoun Nom IF (0 Num + Card) (1 N);
Expand Down

0 comments on commit 22e6d9d

Please sign in to comment.