Skip to content

Commit

Permalink
Arabic positional forms check
Browse files Browse the repository at this point in the history
  • Loading branch information
simoncozens committed Nov 26, 2024
1 parent 52cbfa0 commit 2107a9c
Show file tree
Hide file tree
Showing 4 changed files with 83 additions and 2 deletions.
1 change: 1 addition & 0 deletions shaperglot-lib/Cargo.toml
Original file line number Diff line number Diff line change
Expand Up @@ -20,6 +20,7 @@ serde_json = "1.0"
unicode-normalization = "*"
colored = { version = "*", optional = true }
unicode-properties = "0.1.3"
unicode-joining-type = "1.0.0"
indexmap = "2"
log = "0.4"
toml = "0.8.19"
Expand Down
4 changes: 3 additions & 1 deletion shaperglot-lib/src/providers/mod.rs
Original file line number Diff line number Diff line change
@@ -1,9 +1,10 @@
use crate::{checks::Check, language::Language};

// mod african_latin;
mod orthographies;
mod positional;
mod small_caps;
use orthographies::OrthographiesProvider;
use positional::PositionalProvider;
use small_caps::SmallCapsProvider;

pub trait Provider {
Expand All @@ -18,6 +19,7 @@ impl Provider for BaseCheckProvider {
let mut checks: Vec<Check> = vec![];
checks.extend(OrthographiesProvider.checks_for(language));
checks.extend(SmallCapsProvider.checks_for(language));
checks.extend(PositionalProvider.checks_for(language));

// And any manually coded checks

Expand Down
78 changes: 78 additions & 0 deletions shaperglot-lib/src/providers/positional.rs
Original file line number Diff line number Diff line change
@@ -0,0 +1,78 @@
use unicode_joining_type::{get_joining_type, JoiningType};
use unicode_properties::{GeneralCategoryGroup, UnicodeGeneralCategory};

use crate::{
checks::{Check, CheckType, ScoringStrategy, ShapingDiffers},
language::Language,
shaping::ShapingInput,
Provider, ResultCode,
};

const ZWJ: &str = "\u{200D}";

// const MARKS_FOR_LANG: [(&str, &str); 1] = [(
// "ar_Arab",
// "\u{064E}\u{0651} \u{064B}\u{0651} \u{0650}\u{0651} \u{064D}\u{0651} \u{064F}\u{0651} \u{064C}\u{0651}",
// )];

pub struct PositionalProvider;

impl Provider for PositionalProvider {
fn checks_for(&self, language: &Language) -> Vec<Check> {
// let marks = language
// .marks
// .iter()
// .map(|s| s.replace("\u{25CC}", ""))
// .filter(|s| {
// s.chars()
// .all(|c| c.general_category() == GeneralCategory::NonspacingMark)
// });
let letters = language.bases.iter().filter(|s| {
s.chars().count() == 1
&& s.chars()
.all(|c| c.general_category_group() == GeneralCategoryGroup::Letter)
});
let mut fina_pairs = vec![];
let mut medi_pairs = vec![];
let mut init_pairs = vec![];
for base in letters {
match get_joining_type(base.chars().next().unwrap()) {
JoiningType::DualJoining => {
init_pairs.push(positional_check("", base, ZWJ, "init"));
medi_pairs.push(positional_check(ZWJ, base, ZWJ, "medi"));
fina_pairs.push(positional_check(ZWJ, base, "", "fina"));
}
JoiningType::RightJoining => {
fina_pairs.push(positional_check(ZWJ, base, "", "fina"));
}
_ => {}
}
}
let implementations = vec![
CheckType::ShapingDiffers(ShapingDiffers::new(init_pairs, false)),
CheckType::ShapingDiffers(ShapingDiffers::new(medi_pairs, false)),
CheckType::ShapingDiffers(ShapingDiffers::new(fina_pairs, false)),
];
vec![Check {
name: "Positional forms for Arabic letters".to_string(),
severity: ResultCode::Fail,
description: "Arabic letters should form positional forms when the init, medi, and fina features are enabled"
.to_string(),
scoring_strategy: ScoringStrategy::Continuous,
weight: 20,
implementations,
}]
}
}

fn positional_check(
pre: &str,
character: &str,
post: &str,
feature: &str,
) -> (ShapingInput, ShapingInput) {
let input = pre.to_string() + character + post;
let before = ShapingInput::new_with_feature(input.clone(), "-".to_string() + feature);
let after = ShapingInput::new_simple(input);
(before, after)
}
2 changes: 1 addition & 1 deletion shaperglot-lib/src/shaping.rs
Original file line number Diff line number Diff line change
Expand Up @@ -12,7 +12,7 @@ use crate::Checker;
pub struct ShapingInput {
pub text: String,
pub features: Vec<String>,
language: Option<String>,
pub language: Option<String>,
}

impl ShapingInput {
Expand Down

0 comments on commit 2107a9c

Please sign in to comment.