-
Notifications
You must be signed in to change notification settings - Fork 0
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
- Loading branch information
Showing
6 changed files
with
48 additions
and
44 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -1,6 +1,6 @@ | ||
@book{Cihui, | ||
Editor = {北京大学, Běijīng Dàxué}, | ||
Publisher = {Wénzì Gǎigé 文字改革}, | ||
Title = {Hànyǔ fāngyán cíhuì}, | ||
Publisher = {Wénzì Gǎigé 文字改革}, | ||
Title = {Hànyǔ fāngyán cíhuì 汉语方言词汇}, | ||
Year = {1964} | ||
} |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -1,53 +1,46 @@ | ||
# coding=utf-8 | ||
from __future__ import unicode_literals, print_function | ||
from itertools import groupby | ||
|
||
import attr | ||
import lingpy | ||
from pycldf.sources import Source | ||
|
||
from lingpy.sequence.sound_classes import syllabify | ||
|
||
from clldutils.path import Path | ||
from clldutils.misc import slug | ||
from clldutils.misc import lazyproperty | ||
from pylexibank.dataset import Metadata, Concept | ||
from lingpy.sequence.sound_classes import syllabify | ||
from pylexibank.dataset import Concept | ||
from pylexibank.dataset import Dataset as BaseDataset | ||
from pylexibank.util import pb, getEvoBibAsBibtex | ||
|
||
|
||
|
||
@attr.s | ||
class BDConcept(Concept): | ||
Chinese = attr.ib(default=None) | ||
|
||
|
||
class Dataset(BaseDataset): | ||
dir = Path(__file__).parent | ||
id = 'beidasinitic' | ||
id = "beidasinitic" | ||
concept_class = BDConcept | ||
|
||
def cmd_download(self, **kw): | ||
self.raw.write('sources.bib', getEvoBibAsBibtex('Cihui', **kw)) | ||
self.raw.write("sources.bib", getEvoBibAsBibtex("Cihui", **kw)) | ||
|
||
def cmd_install(self, **kw): | ||
wl = lingpy.Wordlist(self.raw.posix('words.tsv'), | ||
conf=self.raw.posix('wordlist.rc')) | ||
|
||
wl = lingpy.Wordlist(self.raw.posix("words.tsv"), conf=self.raw.posix("wordlist.rc")) | ||
|
||
with self.cldf as ds: | ||
ds.add_sources(*self.raw.read_bib()) | ||
ds.add_concepts(id_factory=lambda c: c.number) | ||
ds.add_languages(id_factory=lambda c: c['ID']) | ||
for k in pb(wl, desc='wl-to-cldf', total=len(wl)): | ||
if wl[k, 'value']: | ||
ds.add_languages(id_factory=lambda c: c["ID"]) | ||
for k in pb(wl, desc="wl-to-cldf", total=len(wl)): | ||
if wl[k, "value"]: | ||
ds.add_lexemes( | ||
Language_ID=wl[k, 'doculect'], | ||
Parameter_ID=wl[k, 'beida_id'], | ||
Value=wl[k, 'value'], | ||
Form=wl[k, 'form'], | ||
Segments = syllabify([{'t↑h': 'tʰ', 'ᴇ': 'ᴇ/ɛ̝'}.get( | ||
x, x) for x in self.tokenizer(None, | ||
''.join(wl[k, 'segments']), | ||
column='IPA')]), | ||
Source='Cihui') | ||
Language_ID=wl[k, "doculect"], | ||
Parameter_ID=wl[k, "beida_id"], | ||
Value=wl[k, "value"], | ||
Form=wl[k, "form"], | ||
Segments=syllabify( | ||
[ | ||
{"t↑h": "tʰ", "ᴇ": "ᴇ/ɛ̝"}.get(x, x) | ||
for x in self.tokenizer( | ||
None, "".join(wl[k, "segments"]), column="IPA" | ||
) | ||
] | ||
), | ||
Source="Cihui", | ||
) |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -1,7 +1,15 @@ | ||
# coding: utf-8 | ||
from __future__ import unicode_literals | ||
|
||
|
||
def test_valid(cldf_dataset, cldf_logger): | ||
assert cldf_dataset.validate(log=cldf_logger) | ||
|
||
|
||
def test_languages(cldf_dataset): | ||
assert len(list(cldf_dataset['LanguageTable'])) == 18 | ||
|
||
|
||
def test_parameters(cldf_dataset): | ||
assert len(list(cldf_dataset['ParameterTable'])) == 905 | ||
|
||
|
||
def test_sources(cldf_dataset): | ||
assert len(cldf_dataset.sources) == 1 | ||
|