Skip to content

Commit

Permalink
full test coverage
Browse files Browse the repository at this point in the history
  • Loading branch information
xrotwang committed Nov 8, 2024
1 parent 4db7e44 commit da61f9b
Show file tree
Hide file tree
Showing 41 changed files with 1,647 additions and 56 deletions.
4 changes: 0 additions & 4 deletions src/linglit/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -39,9 +39,6 @@ def iter_examples(d='.', glottolog='glottolog', **dirs): # pragma: no cover
c = collections.Counter()
glottolog = Glottolog(glottolog)
for rid, cls in PROVIDERS.items():
# if rid != 'glossa':
if rid != 'langsci':
continue
sd = dirs.get(rid, d / rid)
bibtex = sd / 'bibtex'
if sd.exists():
Expand All @@ -66,7 +63,6 @@ def iter_examples(d='.', glottolog='glottolog', **dirs): # pragma: no cover
bibtex.joinpath(
'{}.bib'.format(pub.record.ID)).write_text('\n\n'.join(t), encoding='utf8')

continue
pid = '{}-{}'.format(rid, pub.record.ID)
for i, ex in enumerate(pub.iter_examples()):
if ex.Language_ID is None:
Expand Down
2 changes: 1 addition & 1 deletion src/linglit/base/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -110,7 +110,7 @@ def __attrs_post_init__(self):
cmt = cmt.strip()
if len(cmt.split()) > 1:
if self.Comment:
self.Comment += ';'
self.Comment += '; '
self.Comment = (self.Comment or '') + cmt
else:
self.Corpus_Ref = cmt
Expand Down
32 changes: 32 additions & 0 deletions src/linglit/cfg/langsci/texfile_titles.tsv
Original file line number Diff line number Diff line change
Expand Up @@ -788,6 +788,7 @@ Book_ID Filename Language Title
280 simik.tex Inherent vs. accidental uniqueness in bare and demonstrative nominals
280 zimmermann.tex The role of the correlate in clause-embedding
280 zivojinovic.tex Torlak Torlak clitic doubling: A cross-linguistic comparison
282 bliss.tex siks1238
282 diessel.tex A typology of demonstrative clause linkers
282 fuchs.tex Referential shift potential of demonstrative pronouns – Evidence from text continuation
282 johannessen.tex Psychologically distal demonstratives in Scandinavian are not “discourse new”
Expand Down Expand Up @@ -871,6 +872,21 @@ Book_ID Filename Language Title
290 08.tex Building bridges between languages: How students develop crosslinguistic awareness in multilingual learning settings
290 09.tex Students’ perceptions of plurilingual nonnative teachers in higher education: An added or a mudded value?
290 prefaceEd.tex Preface
291 02-Cupik.tex cent2127 Constituency in Cup'ik and the problem of holophrasis
291 03-Cherokee.tex cher1273 Constituency in Oklahoma Cherokee
291 04-Kiowa.tex kiow1266 Constituency and Wordhood in Kiowa
291 05-Mazatec.tex ayau1235 Constituency in Ayautla Mazatec
291 06-Mixtec.tex juxt1235 Constituency in Tù'un Ntá'ví (Mixtec) of San Martín Duraznos
291 07-Zapotec.tex teot1238 Words as emergent constituents in Teotitlán del Valle Zapotec
291 08-Chatino.tex zenz1235 Constituency in Zenzontepec Chatino
291 09-Martinican.tex mart1259 Constituency in Martinican (creole, Martinique)
291 10-Hup.tex hupd1244 Constituency in Hup: Synchronic and diachronic perspectives
291 11-Yukuna.tex yucu1253 Constituency in Yukuna
291 12-Mebengokre.tex kaya1330 Constituency in Mẽbêngôkre independent clauses
291 13-Araona.tex arao1248 Graded constituency in the Araona (Takana) verb complex
291 14-Quechua.tex sout2991 Word structure and constituency in Uma Piwra South Bolivian Quechua
291 15-Chorote.tex iyoj1235 Wordhood in Chorote (Mataguayan)
291 16-Mocovi.tex moco1246 Constituency in Northern Chaco Mocoví (Guaycuruan, Argentina)
293 avatime.tex Avatime A note on wh -questions in Avatime
293 barzlai.tex Nobiin Morphologically conditioned phonological variation in Nobiin
293 bukusumu.tex Lubukusu Object marking in Lubukusu: Information structure in the verb phrase
Expand Down Expand Up @@ -1015,3 +1031,19 @@ Book_ID Filename Language Title
329 07.tex Agreement inflection and word order in Viskadalian Swedish
329 08.tex From ‘big’ to ‘much’ From ‘big’ to ‘much’: On the grammaticalization of two gradable adjectives in Swedish
329 prefaceEd.tex Preface with an editor, abstract and citation footer
383 kahigi.tex sumb1240 Verb extensions and morphosyntactic variation in Bantu: The case of Sumbwa
383 lukusa.tex luba1249 A morphosyntactic study of verb object marking in Čilubà
383 lusekelo.tex nyak1261 Concord and agreement in Eastern Bantu: The augment and noun classes in Nyakyusa
383 mallya.tex bosh1240 The morphosyntax of locative expressions in Kiwoso
383 ngwasi.tex hehe1240 The historical development of the reflexive-reciprocal polysemy in Hehe
383 taji.tex yaoo1241 Demonstratives in Chiyao: An analysis of their form, distribution and functions
383 yoneda.tex gand1255 Multiple-object constructions in Ganda
411 01-Arsenijevic.tex sout1528 Specification of telicity in Serbo-Croatian, without null prefixes
411 06-Georgieva.tex bulg1262 Inflectionless adjectives in Bulgarian as a case of nominal predication
411 08-Geist.tex russ1263 Responding to negative biased questions in Russian
411 09-Matushansky.tex russ1263 Responding to negative biased questions in Russian
411 12-Stepanov.tex slov1268 Number mismatch effect and processing cataphora in a \textit{pro}-drop language: The case of Slovenian
440 01.tex Czech LEMUR: A lexicon of Czech multiword expressions
440 02.tex poma1238 Description of Pomak within IDION: Challenges in the representation of verb multiword expressions
440 07.tex Dutch MWE-Finder: Querying for multiword expressions in large Dutch text corpora
440 09.tex Swedish Multiword expressions in Swedish as a second language: Taxonomy, annotation, and initial results
2 changes: 1 addition & 1 deletion src/linglit/cldf/publication.py
Original file line number Diff line number Diff line change
Expand Up @@ -24,7 +24,7 @@ def languages(self):
def iter_references(self):
sid2langs = collections.defaultdict(set)
if not self.cfg.bib:
return
return # pragma: no cover
s2l = self.cfg.source_to_language
l2gc = {}
for row in self.languages:
Expand Down
2 changes: 1 addition & 1 deletion src/linglit/cldf/repository.py
Original file line number Diff line number Diff line change
Expand Up @@ -89,7 +89,7 @@ def create(self, verbose=False):
if dldir.exists():
if self.metadata(did)['version'] == rec.version:
continue
shutil.rmtree(dldir)
shutil.rmtree(dldir) # pragma: no cover
print('downloading {} ...'.format(rec.version))
rec.download_dataset(self.dir / did)
print('... done')
Expand Down
4 changes: 2 additions & 2 deletions src/linglit/commands/mergedbib.py
Original file line number Diff line number Diff line change
Expand Up @@ -24,9 +24,9 @@ def bibtex(src):
with TemporaryDirectory() as tmp:
for pub in tqdm(repos.iter_publications()):
if pub.id == 'langsci{}'.format(args.drop_until):
do = True
do = True # pragma: no cover
if not do:
continue
continue # pragma: no cover
with tmp.joinpath('{}.bib'.format(pub.id)).open('w') as bib:
bib.write(bibtex(pub.as_source()))
for src in pub.cited_references:
Expand Down
4 changes: 2 additions & 2 deletions src/linglit/glossa/xml.py
Original file line number Diff line number Diff line change
Expand Up @@ -150,8 +150,8 @@ def iter_igt(d, abbrs):
numbers = [
t(li.xpath('list-item')[0])
for li in gloss.xpath(".//list[@list-type='wordfirst']")]
except IndexError:
continue
except IndexError: # pragma: no cover
continue # Something isn't as expected. We just skip this potential example.
for n in numbers:
m = re.match(r'\(([0-9]+|[iv]+)\)', n)
if m:
Expand Down
19 changes: 10 additions & 9 deletions src/linglit/langsci/examples.py
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
import functools
import re
import typing
import hashlib
import functools

from pyigt.igt import IGT, NON_OVERT_ELEMENT
from pyigt.lgrmorphemes import MORPHEME_SEPARATORS
Expand Down Expand Up @@ -47,7 +47,7 @@ def parse_cmd(cmd, line):
# Cut out the command and its first argument from line:
texcmd = '\\' + cmd + '{' + line.split('\\' + cmd + '{')[-1].split('}')[0]
cmd = getattr(TexSoup(texcmd, tolerance=1), cmd)
except: # noqa: E722
except: # pragma: no cover # noqa: E722
raise ValueError(line) # pragma: no cover
return (cmd.args[0].string.split('!')[-1], '', '')

Expand Down Expand Up @@ -252,7 +252,7 @@ def lines_and_comment(lines):
comment.append(s.jambox.string)
s.jambox.delete()
line = str(s)
except: # noqa: E722
except: # pragma: no cover # noqa: E722
pass # pragma: no cover
if line:
res.append(line)
Expand Down Expand Up @@ -282,7 +282,8 @@ def lines_and_comment(lines):
to_text(res[-1].split('\n')[0])[0].strip())
if m:
if m.groups()[0][0].isalpha() and m.groups()[0][0].islower():
linfo = (m.groups()[0], '', '')
# Hm. Seems to be impossible given the regex.
linfo = (m.groups()[0], '', '') # pragma: no cover
else:
comment.append(m.groups()[0])
res = res[:-1]
Expand Down Expand Up @@ -318,7 +319,7 @@ def make_example(
aligned = [line.strip() for line in re.split(r'\\(?:\\|newline)', aligned) if line.strip()]

# book-specifics:
if pub.record.int_id == 212:
if pub.record.int_id == 212: # pragma: no cover
if len(aligned) > 2:
if 'footnotesize' in aligned[2]:
aligned = aligned[:2]
Expand Down Expand Up @@ -347,7 +348,7 @@ def make_example(
pt, gl = aligned
obj = None
elif len(aligned):
if len(aligned) == 4 and aligned[3].startswith(r'}\\jambox'):
if len(aligned) == 4 and aligned[3].startswith(r'}\jambox'):
obj, pt = aligned[0], aligned[1]
gl = aligned[2] + aligned[3]
else: # Dunno what to do here ...
Expand All @@ -356,7 +357,7 @@ def make_example(
# print('---')
return
else: # ... or here.
return
return # pragma: no cover
if obj:
obj, cmt, _refs = to_text(obj)
if _refs:
Expand All @@ -370,8 +371,8 @@ def make_example(
if len(pt) != len(gl):
if gl and gl[-1] in ['()', '*()']:
gl = gl[:-1]
if len(pt) != len(gl):
return
if len(pt) != len(gl): # Primary text cannot be aligned with glosses.
return # pragma: no cover

obj = obj or IGT(phrase=pt, gloss=gl).primary_text
return Example(
Expand Down
30 changes: 15 additions & 15 deletions src/linglit/langsci/latex.py
Original file line number Diff line number Diff line change
Expand Up @@ -798,7 +798,7 @@
def uppercase_arg(n, l2tobj):
if n.nodeargd:
return l2tobj.nodelist_to_text([n.nodeargd.argnlist[0]]).upper()
return ''
return '' # pragma: no cover


def dot_uppercase_arg(n, l2tobj):
Expand Down Expand Up @@ -829,7 +829,7 @@ def secondarg(n, l2tobj):


def repl(abbr, *args):
return abbr
return abbr # pragma: no cover


def japhug(n, l2tobj):
Expand Down Expand Up @@ -871,19 +871,19 @@ def cite(n, l2tobj):
# n.nodeargd can be empty if e.g. \putinquotes was a single
# token passed as an argument to a macro,
# e.g. \newcommand\putinquotes...
return ''
return '' # pragma: no cover
page = ''
if len(n.nodeargd.argnlist) > 1:
page = _get_optional_arg(n.nodeargd.argnlist[0], '', l2tobj)
key = l2tobj.nodelist_to_text([n.nodeargd.argnlist[-1]]).strip().replace(' ', '&')
if key:
return '<cit page="{}">{}</cit>'.format(page.replace('"', ''), key)
return ''
return '' # pragma: no cover


def langinfo(n, l2tobj):
if not n.nodeargd:
return ''
return '' # pragma: no cover
res = ''
for i, arg in enumerate(n.nodeargd.argnlist):
t = l2tobj.nodelist_to_text([arg]).strip()
Expand Down Expand Up @@ -990,7 +990,7 @@ def custom_latex_to_text(input_latex, parser=lw_context_db, converter=l2t_contex
# convert to text
try:
return l2t_obj.nodelist_to_text(nodelist)
except (IndexError, ValueError):
except (IndexError, ValueError): # pragma: no cover
return input_latex


Expand Down Expand Up @@ -1034,20 +1034,20 @@ def to_text(latex):

# extract citations:
pattern = re.compile(r'<cit page="([^"]*)">([^<]+)</cit>')
for m in pattern.finditer(text):
if m.groups()[1] != '[':
for sid in m.groups()[1].split(','):
if sid.strip():
refs.append((sid.strip(), m.groups()[0]))
if refs:
text = pattern.sub('', text).strip()

for cc in comment:
for m in pattern.finditer(cc):
def find_refs(t):
for m in pattern.finditer(t):
if m.groups()[1] != '[':
for sid in m.groups()[1].split(','):
if sid.strip():
refs.append((sid.strip(), m.groups()[0]))

find_refs(text)
if refs:
text = pattern.sub('', text).strip()

for cc in comment:
find_refs(cc)
comment = [pattern.sub(lambda m: m.groups()[1], cc).strip() for cc in comment]

#
Expand Down
2 changes: 1 addition & 1 deletion src/linglit/langsci/publication.py
Original file line number Diff line number Diff line change
Expand Up @@ -239,7 +239,7 @@ def norm_include(s):
p = m.get(p.name.lower(), p)
if not p.exists() and (p.stem in ['preface', 'acknowledgments']):
continue
if not p.exists() and p.stem == 'abbreviations':
if not p.exists() and p.stem == 'abbreviations': # pragma: no cover
if p.parent.parent.joinpath('abbreviations.tex').exists():
p = p.parent.parent.joinpath('abbreviations.tex')
assert p.exists(), str(p)
Expand Down
28 changes: 20 additions & 8 deletions src/linglit/langsci/repository.py
Original file line number Diff line number Diff line change
Expand Up @@ -21,13 +21,27 @@
CATALOG_NAME = "catalog.tsv"
FILELIST_NAME = "files.json"
MISSING_TEX_SOURCES = [
155, 192, 195, 255, 287, 297, 311, 325, 373, 380,
410,
284, # For the time being ...
292,
155,
192,
195,
255,
287,
297,
311,
325,
373,
380,
# 410,
284, # For the time being ... no main file found
# 292,
438,
]
MISSING_REPOS = [410, 389, 392, 393, 438]
MISSING_REPOS = [ # Some publications don't have a public repository (yet).
410,
389,
# 392,
# 393,
438]
TEX_BRANCH = {187: 'master'}


Expand Down Expand Up @@ -87,10 +101,8 @@ def __getitem__(self, item):

def iter_publications(self):
for item in self.catalog:
# if item.int_id != 22:
# continue
if item.int_id in MISSING_REPOS:
continue
continue # pragma: no cover
if item.int_id not in MISSING_TEX_SOURCES:
yield Publication(item, self.dir / item.ID, self)

Expand Down
3 changes: 2 additions & 1 deletion tests/cldf/catalog.csv
Original file line number Diff line number Diff line change
@@ -1,2 +1,3 @@
id,name,conceptdoi,bib,source_to_language,hhtype,igt,gloss_abbreviations
8,uratyp,10.5281/zenodo.5236365,1,LanguageTable,,1,
8,uratyp,10.5281/zenodo.5236365,1,LanguageTable,grammar,1,ga.csv abbr def
9,petersonsouthasia,10.5281/zenodo.5236365,1,ValueTable,,,
25 changes: 25 additions & 0 deletions tests/cldf/petersonsouthasia.json
Original file line number Diff line number Diff line change
@@ -0,0 +1,25 @@
{
"doi": "10.5281/zenodo.6392555",
"title": "other",
"creators": [
"Vesakoski, Outi"
],
"year": "2022",
"license": "cc-by-4.0",
"download_urls": [
"https://zenodo.org/records/6392555/files/cldf-datasets/uratyp-v1.1.zip/content"
],
"keywords": [
"cldf:StructureDataset",
"linguistics"
],
"communities": [],
"github_repos": {
"org": "cldf-datasets",
"name": "uratyp",
"tag": "v1.1"
},
"closed_access": false,
"version": "v1.1",
"concept_doi": "10.5281/zenodo.5236365"
}
Loading

0 comments on commit da61f9b

Please sign in to comment.