Skip to content

Commit

Permalink
Document everything and enforce documentation
Browse files Browse the repository at this point in the history
  • Loading branch information
simoncozens committed Nov 27, 2024
1 parent 7316c4b commit 6722b0a
Show file tree
Hide file tree
Showing 15 changed files with 198 additions and 31 deletions.
10 changes: 10 additions & 0 deletions shaperglot-lib/src/checker.rs
Original file line number Diff line number Diff line change
Expand Up @@ -9,17 +9,25 @@ use crate::{
use rustybuzz::Face;
use skrifa::{raw::ReadError, FontRef, GlyphId, MetadataProvider};

/// The context for running font language support checks
pub struct Checker<'a> {
/// The font to check, as a [read_fonts::FontRef]
pub font: FontRef<'a>,
/// The face to use for shaping
pub face: Face<'a>,
/// The glyph names in the font
pub glyph_names: Vec<String>,
/// The OpenType features present in the font
pub features: HashSet<String>,
/// The character map of the font
pub cmap: BTreeMap<u32, GlyphId>,
/// The reversed character map of the font
reversed_cmap: BTreeMap<GlyphId, u32>,
// full_reversed_cmap: Arc<Mutex<Option<BTreeMap<GlyphId, u32>>>>,
}

impl<'a> Checker<'a> {
/// Create a new font checker
pub fn new(font_binary: &'a [u8]) -> Result<Self, ReadError> {
let face = Face::from_slice(font_binary, 0).expect("Couldn't load font");
let font = FontRef::new(font_binary)?;
Expand All @@ -38,6 +46,7 @@ impl<'a> Checker<'a> {
})
}

/// Get the codepoint for a given glyph ID
pub fn codepoint_for(&self, gid: GlyphId) -> Option<u32> {
// self.reversed_cmap.get(&gid).copied().or_else(||
// if !self.full_reversed_cmap.is_some() {
Expand All @@ -47,6 +56,7 @@ impl<'a> Checker<'a> {
self.reversed_cmap.get(&gid).copied()
}

/// Check a font for language support
pub fn check(&self, language: &Language) -> Reporter {
let mut results = Reporter::default();
for check_object in language.checks.iter() {
Expand Down
4 changes: 4 additions & 0 deletions shaperglot-lib/src/checks/codepoint_coverage.rs
Original file line number Diff line number Diff line change
Expand Up @@ -10,8 +10,11 @@ use serde_json::json;
use std::collections::HashSet;

#[derive(Serialize, Deserialize, Debug, Clone)]
/// A check implementation which ensures codepoints are present in a font
pub struct CodepointCoverage {
/// The codepoints to check for
strings: HashSet<String>,
/// The unique code to return on failure (e.g. "marks-missing")
code: String,
}

Expand Down Expand Up @@ -72,6 +75,7 @@ impl CheckImplementation for CodepointCoverage {
}

impl CodepointCoverage {
/// Create a new `CodepointCoverage` check implementation
pub fn new(test_strings: Vec<String>, code: String) -> Self {
Self {
strings: test_strings.into_iter().collect(),
Expand Down
27 changes: 27 additions & 0 deletions shaperglot-lib/src/checks/mod.rs
Original file line number Diff line number Diff line change
@@ -1,5 +1,8 @@
/// A check implementation which ensures codepoints are present in a font
mod codepoint_coverage;
/// A check implementation which ensures marks are anchors to their respective base characters
mod no_orphaned_marks;
/// A check implementation which ensures that two shaping inputs produce different outputs
mod shaping_differs;

use crate::{
Expand All @@ -14,39 +17,63 @@ use serde::{Deserialize, Serialize};
pub use shaping_differs::ShapingDiffers;

#[delegatable_trait]
/// A check implementation
///
/// This is a sub-unit of a [Check]; a Check is made up of multiple
/// `CheckImplementations`. For example, an orthography check will
/// first check bases, then marks, then auxiliary codepoints.
pub trait CheckImplementation {
/// The name of the check implementation
fn name(&self) -> String;
/// A description of the check implementation
fn describe(&self) -> String;
/// Whether the subcheck should be skipped for this font
fn should_skip(&self, checker: &Checker) -> Option<String>;
/// Execute the check implementation and return problems found
fn execute(&self, checker: &Checker) -> (Vec<Problem>, usize);
}

#[derive(Serialize, Deserialize, PartialEq, Debug, Clone)]
/// The scoring strategy for a check
pub enum ScoringStrategy {
/// A continuous score; the score is the proportion of checks that pass
Continuous,
/// An all-or-nothing score; the score is 1 if all checks pass, 0 otherwise
AllOrNothing,
}

#[derive(Delegate, Serialize, Deserialize, Debug, Clone)]
#[delegate(CheckImplementation)]
#[serde(tag = "type")]
/// Check implementations available to higher-level checks
pub enum CheckType {
/// A check implementation which ensures codepoints are present in a font
CodepointCoverage(CodepointCoverage),
/// A check implementation which ensures marks are anchors to their respective base characters
NoOrphanedMarks(NoOrphanedMarks),
/// A check implementation which ensures that two shaping inputs produce different outputs
ShapingDiffers(ShapingDiffers),
}

#[derive(Serialize, Deserialize, Debug, Clone)]
/// A check to be executed
pub struct Check {
/// The name of the check
pub name: String,
/// The severity of the check in terms of how it affects language support
pub severity: ResultCode,
/// A description of the check
pub description: String,
/// The scoring strategy for the check
pub scoring_strategy: ScoringStrategy,
/// The weight of the check
pub weight: u8,
/// Individual implementations to be run
pub implementations: Vec<CheckType>,
}

impl Check {
/// Execute the check and return the results
pub fn execute(&self, checker: &Checker) -> CheckResult {
let mut problems = Vec::new();
let mut total_checks = 0;
Expand Down
8 changes: 8 additions & 0 deletions shaperglot-lib/src/checks/no_orphaned_marks.rs
Original file line number Diff line number Diff line change
Expand Up @@ -9,8 +9,14 @@ use serde::{Deserialize, Serialize};
use unicode_properties::{GeneralCategory, UnicodeGeneralCategory};

#[derive(Serialize, Deserialize, Debug, Clone)]
/// A check implementation which ensures marks are anchors to their respective base characters
pub struct NoOrphanedMarks {
/// The strings to shape and check
test_strings: Vec<ShapingInput>,
/// Whether the language has orthography data
///
/// If this is true, we will not report notdefs, as the orthography check will
/// catch them.
has_orthography: bool,
}

Expand Down Expand Up @@ -130,13 +136,15 @@ impl CheckImplementation for NoOrphanedMarks {
}
}

/// Check if a codepoint is a nonspacing mark
fn simple_mark_check(c: u32) -> bool {
char::from_u32(c)
.map(|c| matches!(c.general_category(), GeneralCategory::NonspacingMark))
.unwrap_or(false)
}

impl NoOrphanedMarks {
/// Create a new `NoOrphanedMarks` check implementation
pub fn new(test_strings: Vec<ShapingInput>, has_orthography: bool) -> Self {
Self {
test_strings,
Expand Down
7 changes: 7 additions & 0 deletions shaperglot-lib/src/checks/shaping_differs.rs
Original file line number Diff line number Diff line change
Expand Up @@ -11,8 +11,14 @@ use rustybuzz::SerializeFlags;
use serde::{Deserialize, Serialize};

#[derive(Serialize, Deserialize, Debug, Clone)]
/// A check implementation which ensures that two shaping inputs produce different outputs
pub struct ShapingDiffers {
/// The pairs of strings to shape and compare
pairs: Vec<(ShapingInput, ShapingInput)>,
/// Whether the features are optional
///
/// If this is true, the check will only run if the font contains the requested feature;
/// otherwise it will be skiped. If it is false, the check will always run.
features_optional: bool,
}

Expand Down Expand Up @@ -94,6 +100,7 @@ impl CheckImplementation for ShapingDiffers {
}

impl ShapingDiffers {
/// Create a new `ShapingDiffers` check implementation
pub fn new(pairs: Vec<(ShapingInput, ShapingInput)>, features_optional: bool) -> Self {
Self {
pairs,
Expand Down
2 changes: 2 additions & 0 deletions shaperglot-lib/src/font.rs
Original file line number Diff line number Diff line change
Expand Up @@ -9,6 +9,7 @@ use skrifa::{
FontRef,
};

/// Get a list of glyph names for a font
pub(crate) fn glyph_names(font: &FontRef) -> Result<Vec<String>, ReadError> {
#[allow(clippy::unwrap_used)] // Heck, Skrifa does the same
let glyph_count = font.maxp().unwrap().num_glyphs().into();
Expand Down Expand Up @@ -54,6 +55,7 @@ pub(crate) fn glyph_names(font: &FontRef) -> Result<Vec<String>, ReadError> {
Ok(names)
}

/// Get a list of feature tags present in a font
pub(crate) fn feature_tags(font: &FontRef) -> Result<HashSet<String>, ReadError> {
let mut tags = HashSet::new();
if let Some(gsub_featurelist) = font.gsub().ok().and_then(|gsub| gsub.feature_list().ok()) {
Expand Down
33 changes: 18 additions & 15 deletions shaperglot-lib/src/language.rs
Original file line number Diff line number Diff line change
@@ -1,49 +1,49 @@
use google_fonts_languages::{LanguageProto, LANGUAGES, SCRIPTS};
use google_fonts_languages::{LanguageProto, LANGUAGES};
use unicode_normalization::UnicodeNormalization;

use crate::{
checks::Check,
providers::{BaseCheckProvider, Provider},
};

/// A language definition, including checks and exemplar characters
pub struct Language {
/// The underlying language definition from the google-fonts-languages database
pub proto: Box<LanguageProto>,
/// The checks that apply to this language
pub checks: Vec<Check>,
/// Mandatory base characters for the language
pub bases: Vec<String>,
/// Optional auxiliary characters for the language
pub auxiliaries: Vec<String>,
/// Mandatory mark characters for the language
pub marks: Vec<String>,
}

impl Language {
/// The language's ISO 639-3 code
pub fn id(&self) -> &str {
self.proto.id()
}

/// The language's name
pub fn name(&self) -> &str {
self.proto.name()
}

pub fn full_name(&self) -> String {
format!(
"{} in the {} script",
self.proto.name(),
SCRIPTS
.get(self.proto.script())
.map(|s| s.name())
.unwrap_or("Unknown")
)
}

/// The language's ISO15924 script code
pub fn script(&self) -> &str {
self.proto.script()
}
pub fn language(&self) -> &str {
self.proto.language()
}
}

/// The language database
pub struct Languages(Vec<Language>);

impl Languages {
/// Instantiate a new language database
///
/// This loads the database and fills it with checks.
pub fn new() -> Self {
let mut languages = Vec::new();
for (_id, proto) in LANGUAGES.iter() {
Expand Down Expand Up @@ -85,10 +85,12 @@ impl Languages {
Languages(languages)
}

/// Get an iterator over the languages
pub fn iter(&self) -> std::slice::Iter<Language> {
self.0.iter()
}

/// Get a single language by ID or name
pub fn get_language(&self, id: &str) -> Option<&Language> {
self.0
.iter()
Expand All @@ -103,6 +105,7 @@ impl Default for Languages {
}
}

/// Split up an exemplars string into individual characters
fn parse_chars(chars: &str) -> Vec<String> {
chars
.split_whitespace()
Expand Down
20 changes: 18 additions & 2 deletions shaperglot-lib/src/lib.rs
Original file line number Diff line number Diff line change
@@ -1,11 +1,27 @@
// #![deny(missing_docs)]
// #![deny(clippy::missing_docs_in_private_items)]
#![deny(missing_docs)]
#![deny(clippy::missing_docs_in_private_items)]
//! Shaperglot is a library for checking a font's language support.
//!
//! Unlike other language coverage tools, shaperglot is based on the idea
//! that the font must not simply cover Unicode codepoints to support a
//! language but must also behave in certain ways. Shaperglot does not
//! dictate particular implementations of language support, in terms of
//! what glyphs or rules are present in the font or how glyphs should be named,
//! but tests a font for its behaviour.
/// The checker object, representing the context of a check
mod checker;
/// Low-level checks and their implementations
mod checks;
/// Utility functions to extract information from a font
mod font;
/// Structures and routines relating to the language database
mod language;
/// Providers turn a language definition into a set of checks
mod providers;
/// The reporter object, representing the results of a language test
mod reporter;
/// Utility functions for text shaping
mod shaping;

pub use crate::{
Expand Down
8 changes: 8 additions & 0 deletions shaperglot-lib/src/providers/mod.rs
Original file line number Diff line number Diff line change
@@ -1,20 +1,28 @@
use crate::{checks::Check, language::Language};

/// Orthographic checks provider
mod orthographies;
/// Arabic positional forms checks provider
mod positional;
/// Latin small caps checks provider
mod small_caps;
/// Manually-coded checks provider
mod toml;

use orthographies::OrthographiesProvider;
use positional::PositionalProvider;
use small_caps::SmallCapsProvider;
use toml::TomlProvider;

/// A provider of checks for a language
pub trait Provider {
/// Given a language, return a list of checks that apply to it
fn checks_for(&self, language: &Language) -> Vec<Check>;
}

/// The base check provider provides all checks for a language
///
/// It calls all other known providers to get their checks.
pub struct BaseCheckProvider;

impl Provider for BaseCheckProvider {
Expand Down
9 changes: 7 additions & 2 deletions shaperglot-lib/src/providers/orthographies.rs
Original file line number Diff line number Diff line change
Expand Up @@ -7,11 +7,16 @@ use crate::{
use itertools::Itertools;
use unicode_properties::{GeneralCategoryGroup, UnicodeGeneralCategory};

/// Check if a base character (in NFC) contains a mark
fn has_complex_decomposed_base(base: &str) -> bool {
base.chars()
.any(|c| c.general_category_group() == GeneralCategoryGroup::Mark)
}

/// Check that the font covers the basic codepoints for the language's orthography
///
/// This check is mandatory for all languages. Base and mark codepoints are required,
/// and auxiliary codepoints are optional.
pub struct OrthographiesProvider;

impl Provider for OrthographiesProvider {
Expand All @@ -29,7 +34,7 @@ impl Provider for OrthographiesProvider {
}
}

// Orthography check. We MUST have all bases and marks.
/// Orthography check. We MUST have all bases and marks.
fn mandatory_orthography(language: &Language) -> Check {
let mut mandatory_orthography = Check {
name: "Mandatory orthography codepoints".to_string(),
Expand Down Expand Up @@ -83,7 +88,7 @@ fn mandatory_orthography(language: &Language) -> Check {
mandatory_orthography
}

// We SHOULD have auxiliaries
/// We SHOULD have auxiliaries
fn auxiliaries_check(language: &Language) -> Option<Check> {
if language.auxiliaries.is_empty() {
return None;
Expand Down
Loading

0 comments on commit 6722b0a

Please sign in to comment.