Skip to content

Commit

Permalink
Merge pull request #35 from fearful-symmetry/lots-of-bugfixes
Browse files Browse the repository at this point in the history
Number of bugfixes, add historical_metadata
  • Loading branch information
fearful-symmetry authored Oct 27, 2023
2 parents 6010987 + 11597d6 commit 6b2423a
Show file tree
Hide file tree
Showing 11 changed files with 167 additions and 20 deletions.
28 changes: 22 additions & 6 deletions kirum/src/entries.rs
Original file line number Diff line number Diff line change
@@ -1,5 +1,6 @@
use std::collections::HashMap;

use anyhow::{anyhow, Result};
use libkirum::{word::{PartOfSpeech, Etymology}, kirum::Lexis, transforms::{TransformFunc, Transform}, matching::LexisMatch, lemma::Lemma};
use serde::{Serialize, Deserialize};
use serde_with::skip_serializing_none;
Expand Down Expand Up @@ -46,8 +47,12 @@ pub struct RawLexicalEntry {
#[serde(default = "default_archaic")]
/// Optional user tagging
pub archaic: bool,
/// Optional tags used for user-filtering
pub tags: Option<Vec<String>>,
/// A tag that tells Kirum to generate the word based on the phonetic rule set specified by the tag
/// Optional metadata values used for filtering, and ordering.
/// Unlike tags, historical_metadata will be copied to any derivative words, and can be used for templating, filtering, etc
pub historical_metadata: Option<HashMap<String, String>>,
/// A key that tells Kirum to generate the word based on the phonetic rule set specified by the tag
pub generate: Option<String>,
/// Words that will be added as a derivative of the enclosing Lexis; any value not specified will be taken from the enclosing entry.
pub derivatives: Option<Vec<Derivative>>
Expand Down Expand Up @@ -82,6 +87,7 @@ impl From<RawLexicalEntry> for Lexis{
definition: source.definition,
archaic: source.archaic,
tags: source.tags.unwrap_or(Vec::new()),
historical_metadata: source.historical_metadata.unwrap_or(HashMap::new()),
word_create: source.generate
}
}
Expand All @@ -97,24 +103,34 @@ impl From<Lexis> for RawLexicalEntry{
etymology: None,
archaic: value.archaic,
tags: if !value.tags.is_empty() {Some(value.tags)} else {None},
historical_metadata: if !value.historical_metadata.is_empty() {Some(value.historical_metadata)} else {None},
derivatives: None,
generate: value.word_create
}
}
}

/// take the output of a call to to_vec_etymons() and structure it like a graph json file structure
pub fn create_json_graph<F>(lex: Vec<(Lexis, Etymology)>,mut key_gen: F) -> WordGraph
/// If render_metadata is false, any historical_metadata fields will not be copied.
/// This is useful in situations where we're writing out derivative values, and don't want metadata that will be
/// re-derived during ingest to get copied over
pub fn create_json_graph<F>(lex: Vec<(Lexis, Etymology)>,mut key_gen: F, render_metadata: bool) -> Result<WordGraph>
where F: FnMut(Lexis) -> String
{
let mut graph: HashMap<String, RawLexicalEntry> = HashMap::new();

for (word, ety) in lex{
let base: RawLexicalEntry = word.clone().into();
let found_ety = if !ety.etymons.is_empty() {Some(ety)} else {None};
let complete = RawLexicalEntry{etymology: found_ety, ..base};
let mut complete = RawLexicalEntry{etymology: found_ety, ..base};
if !render_metadata{
complete.historical_metadata = None
}
let key = key_gen(word);
graph.insert(key, complete);
}
WordGraph { words: graph }
let found = graph.insert(key.clone(), complete.clone());
if let Some(existing) = found{
return Err(anyhow!("Key {} already exists in map; existing: '{}' \n new:' '{}'", key, existing.definition, complete.definition))
}
};
Ok( WordGraph { words: graph })
}
1 change: 1 addition & 0 deletions kirum/src/files.rs
Original file line number Diff line number Diff line change
Expand Up @@ -89,6 +89,7 @@ pub fn read_tree_files(files: &Vec<PathBuf>) -> Result<HashMap<String, RawLexica
etymons: vec![Edge{etymon: lex_name.to_string(),
transforms: der.transforms.clone(),
agglutination_order: None}] }),
historical_metadata: node.historical_metadata.clone(),
..der.lexis.clone()
};
language_map.insert(der_id, der_lex_raw);
Expand Down
5 changes: 3 additions & 2 deletions kirum/src/generate.rs
Original file line number Diff line number Diff line change
Expand Up @@ -27,7 +27,7 @@ pub fn daughter(daughter_ety: String,
debug!("Creating daughter language '{}' from '{}'", lang_name, ancestor);
computed.generate_daughter_language(lang_name.clone(),
processed_transforms, |l| l.language == ancestor,
|l| Lexis {tags: [l.tags.clone(), ["autogenerated".to_string()].to_vec()].concat(), ..l.clone()});
|l| Lexis { id: format!("daughter-from-{}", l.id), tags: [l.tags.clone(), ["autogenerated".to_string()].to_vec()].concat(), ..l.clone()});

let rendered_dict = computed.to_vec_etymons(|word|word.language == lang_name);

Expand All @@ -47,7 +47,8 @@ pub fn daughter(daughter_ety: String,
}

for (fname, data) in file_map {
let graph = entries::create_json_graph(data, |l| format!("daughter-gen-{}", l.word.unwrap().string_without_sep()));
let graph = entries::create_json_graph(data, |l| l.id, false)
.context("error creating map from new language data")?;

let graph_data = serde_json::to_string_pretty(&graph)
.context("error creating JSON from graph")?;
Expand Down
14 changes: 13 additions & 1 deletion kirum/src/ingest/json.rs
Original file line number Diff line number Diff line change
Expand Up @@ -219,6 +219,7 @@ mod tests {
let good_input = WordGraph {
words: HashMap::from([(
"ingest-failure".to_string(), RawLexicalEntry {
historical_metadata: None,
word: None,
word_type: None,
language: None,
Expand All @@ -241,6 +242,7 @@ mod tests {
derivatives: None,
}),
("ingest-grab".to_string(), RawLexicalEntry {
historical_metadata: None,
word: None,
word_type: None,
language: None,
Expand All @@ -253,6 +255,7 @@ mod tests {
derivatives: None,
}),
("ingest-fail".to_string(), RawLexicalEntry {
historical_metadata: None,
word: None,
word_type: None,
language: None,
Expand All @@ -265,6 +268,7 @@ mod tests {
derivatives: None,
}),
("ingest-twistable".to_string(), RawLexicalEntry {
historical_metadata: None,
word: None,
word_type: None,
language: None,
Expand All @@ -291,6 +295,7 @@ mod tests {
derivatives: None,
}),
("ingest-failing".to_string(), RawLexicalEntry {
historical_metadata: None,
word: None,
word_type: None,
language: None,
Expand All @@ -312,7 +317,8 @@ mod tests {
generate: None,
derivatives: None,
}),
("ingest-unretwistable".to_string(), RawLexicalEntry {
("ingest-unretwistable".to_string(), RawLexicalEntry {
historical_metadata: None,
word: None,
word_type: None,
language: None,
Expand All @@ -335,6 +341,7 @@ mod tests {
derivatives: None,
}),
("ingest-untwistable".to_string(), RawLexicalEntry {
historical_metadata: None,
word: None,
word_type: None,
language: None,
Expand All @@ -357,6 +364,7 @@ mod tests {
derivatives: None,
}),
("ingest-twist".to_string(), RawLexicalEntry {
historical_metadata: None,
word: None,
word_type: None,
language: None,
Expand All @@ -369,6 +377,7 @@ mod tests {
derivatives: None,
}),
("ingest-retwistable".to_string(), RawLexicalEntry {
historical_metadata: None,
word: None,
word_type: None,
language: None,
Expand All @@ -391,6 +400,7 @@ mod tests {
derivatives: None,
}),
("ingest-attack".to_string(), RawLexicalEntry {
historical_metadata: None,
word: None,
word_type: None,
language: None,
Expand All @@ -403,6 +413,7 @@ mod tests {
derivatives: None,
}),
("ingest-attacked".to_string(), RawLexicalEntry {
historical_metadata: None,
word: None,
word_type: None,
language: None,
Expand All @@ -425,6 +436,7 @@ mod tests {
derivatives: None,
}),
("ingest-attacking".to_string(), RawLexicalEntry {
historical_metadata: None,
word: None,
word_type: None,
language: None,
Expand Down
5 changes: 3 additions & 2 deletions kirum/src/main.rs
Original file line number Diff line number Diff line change
Expand Up @@ -13,7 +13,7 @@ use clap::Parser;
use entries::create_json_graph;
use files::{read_and_compute, apply_def_vars};
use new::create_new_project;
use anyhow::Result;
use anyhow::{Result, Context};
use stat::gen_stats;
use std::{fs::File, io::Write};
//use csv::WriterBuilder;
Expand Down Expand Up @@ -84,7 +84,8 @@ fn main() -> Result<()> {
},
cli::Format::Json => {
let words = computed.to_vec_etymons(|_|true);
let word_data = create_json_graph(words, |l| l.id);
let word_data = create_json_graph(words, |l| l.id, false)
.context("could not create map from language data")?;
serde_json::to_string_pretty(&word_data)?
}

Expand Down
3 changes: 3 additions & 0 deletions kirum/src/new.rs
Original file line number Diff line number Diff line change
Expand Up @@ -49,12 +49,14 @@ pub fn create_new_project(name: &str) -> Result<()> {
etymology: None,
archaic: true,
tags: None,
historical_metadata: None,
derivatives: None,
generate: None,
});
word_map.insert("latin_example".into(), RawLexicalEntry {
word: None,
word_type: Some("word".into()),
historical_metadata: None,
language: Some("Latin".into()),
definition: "an instance, model, example".into(),
part_of_speech: Some(libkirum::word::PartOfSpeech::Noun),
Expand All @@ -71,6 +73,7 @@ pub fn create_new_project(name: &str) -> Result<()> {
etymology: None,
archaic: true,
tags: None,
historical_metadata: None,
derivatives: None,
generate: None,
},
Expand Down
1 change: 1 addition & 0 deletions kirum/src/tmpl.rs
Original file line number Diff line number Diff line change
Expand Up @@ -7,6 +7,7 @@ use anyhow::{Result, Context, anyhow};
/// Render a dictionary from a list of words, and a template
pub fn generate_from_tmpl(rendered_lang: Vec<Lexis>, template_file: String, rhai_files: Option<Vec<String>>) -> Result<String> {
let mut reg = Handlebars::new();
reg.register_escape_fn(handlebars::no_escape);
reg.register_helper("string_eq", Box::new(string_eq));
reg.register_template_file("tmpl", &template_file).context(format!("could not add template file {}", template_file))?;
if let Some(files) = rhai_files{
Expand Down
Loading

0 comments on commit 6b2423a

Please sign in to comment.