Skip to content

Commit

Permalink
Add support for biblatex extended name format default nameparts (#48)
Browse files Browse the repository at this point in the history
  • Loading branch information
quachpas authored Dec 12, 2023
1 parent 7333873 commit 66f2cf7
Show file tree
Hide file tree
Showing 4 changed files with 187 additions and 16 deletions.
8 changes: 8 additions & 0 deletions src/chunk.rs
Original file line number Diff line number Diff line change
Expand Up @@ -474,6 +474,14 @@ pub(crate) fn split_values(
(src, new)
}

/// Returns the number of characters in the chunks.
pub(crate) fn count_num_char(chunks: ChunksRef, c: char) -> usize {
chunks
.iter()
.map(|val| if let Chunk::Normal(s) = &val.v { s.matches(c).count() } else { 0 })
.sum()
}

#[cfg(test)]
#[allow(non_snake_case)]
pub(crate) mod tests {
Expand Down
5 changes: 5 additions & 0 deletions src/lib.rs
Original file line number Diff line number Diff line change
Expand Up @@ -1026,6 +1026,11 @@ mod tests {
dump_author_title("tests/polaritons.bib");
}

#[test]
fn test_extended_name_format() {
dump_author_title("tests/extended_name_format.bib");
}

#[test]
fn test_alias() {
let contents = fs::read_to_string("tests/cross.bib").unwrap();
Expand Down
152 changes: 136 additions & 16 deletions src/types/person.rs
Original file line number Diff line number Diff line change
@@ -1,3 +1,4 @@
use std::collections::HashMap;
use std::fmt::{self, Display, Formatter};

use crate::{chunk::*, Spanned};
Expand All @@ -21,34 +22,58 @@ pub struct Person {
}

impl Person {
/// Constructs a new person from a chunk vector according to the specs of
/// [Nicolas Markey in "Tame the BeaST"][taming], pp. 23-24.
/// Constructs a new person from a chunk vector:
/// 1. according to the specs of
/// [Nicolas Markey in "Tame the BeaST"][taming], pp. 23-24.
/// 2. biblatex extended name format according to the
/// [documentation of biblatex][biblatex], section 3.4 pp. 80-81,
/// and section §4.2.3 pp. 164-165.
/// Support is limited to default `nameparts`: prefix, family,
/// suffix, given.
///
/// [taming]: https://ftp.rrze.uni-erlangen.de/ctan/info/bibtex/tamethebeast/ttb_en.pdf
/// [biblatex]: https://ctan.gutenberg-asso.fr/macros/latex/contrib/biblatex/doc/biblatex.pdf
pub fn parse(chunks: ChunksRef) -> Self {
let num_commas: usize =
chunks
.iter()
.map(|val| {
if let Chunk::Normal(s) = &val.v {
s.matches(',').count()
} else {
0
}
})
.sum();
// Extended Name Format
if chunks.iter().any(|val| val.v.get().contains("=")) {
return Self::parse_extended_name_format(chunks);
}
// BibTeX
Self::parse_bibtex(chunks)
}

/// Constructs new person from chunk slices.
fn parse_extended_name_format(chunks: ChunksRef) -> Self {
let mut person = HashMap::new();
for chunk in split_token_lists(chunks, ",") {
let (key, value) = split_at_normal_char(&chunk, '=', true);
let key = key.format_verbatim();
let value = value.format_verbatim();
person.insert(key, value);
}

let name = person.remove("family").unwrap_or_default();
let given_name = person.remove("given").unwrap_or_default();
let prefix = person.remove("prefix").unwrap_or_default();
let suffix = person.remove("suffix").unwrap_or_default();

return Self { name, given_name, prefix, suffix };
}

fn parse_bibtex(chunks: ChunksRef) -> Self {
let num_commas = count_num_char(chunks, ',');

match num_commas {
0 => Self::parse_unified(chunks),
0 => Self::parse_unified(chunks), // `<First> <Prefix> <Last>`
1 => {
let (v1, v2) = split_at_normal_char(chunks, ',', true);
Self::parse_single_comma(&v1, &v2)
}
} // `<Prefix> <Last>, <First>`
_ => {
let (v1, v2) = split_at_normal_char(chunks, ',', true);
let (v2, v3) = split_at_normal_char(&v2, ',', true);
Self::parse_two_commas(&v1, &v2, &v3)
}
} // `<Prefix> <Last>, <Suffix>, <First>`.
}
}

Expand Down Expand Up @@ -588,4 +613,99 @@ Claude Garamond",
assert_eq!(p.suffix, "Sr.");
assert_eq!(p.given_name, "Harcourt Fenton");
}

#[test]
fn test_person_extended_name_format() {
// AUTHOR = {given=Hans, family=Harman}
let p = Person::parse(&[Spanned::zero(N("given=Hans, family=Harman"))]);
assert_eq!(p.name, "Harman");
assert_eq!(p.prefix, "");
assert_eq!(p.suffix, "");
assert_eq!(p.given_name, "Hans");

// AUTHOR = {given={Jean Pierre}}
let p =
Person::parse(&[Spanned::zero(N("given=")), Spanned::zero(V("Jean Pierre"))]);
assert_eq!(p.name, "");
assert_eq!(p.prefix, "");
assert_eq!(p.suffix, "");
assert_eq!(p.given_name, "Jean Pierre");

// AUTHOR = {given={Jean Pierre Simon}, given-i=JPS}
let p = Person::parse(&[
Spanned::zero(N("given=")),
Spanned::zero(V("Jean Pierre Simon")),
Spanned::zero(N(", given-i=JPS")),
]);
assert_eq!(p.name, "");
assert_eq!(p.prefix, "");
assert_eq!(p.suffix, "");
assert_eq!(p.given_name, "Jean Pierre Simon");

// AUTHOR = {given=Jean, prefix=de la, prefix-i=d, family=Rousse}
let p = Person::parse(&[Spanned::zero(N(
"given=Jean, prefix=de la, prefix-i=d, family=Rousse",
))]);
assert_eq!(p.name, "Rousse");
assert_eq!(p.prefix, "de la");
assert_eq!(p.suffix, "");
assert_eq!(p.given_name, "Jean");

// AUTHOR = {"family={Robert and Sons, Inc.}"}
let p = Person::parse(&[
Spanned::zero(N("family=")),
Spanned::zero(V("Robert and Sons, Inc.")),
]);
assert_eq!(p.name, "Robert and Sons, Inc.");
assert_eq!(p.prefix, "");
assert_eq!(p.suffix, "");
assert_eq!(p.given_name, "");

// AUTHOR = {given = Simon, prefix = de, family = Beumont}
let p = Person::parse(&[Spanned::zero(N(
"given = Simon, prefix = de, family = Beumont",
))]);
assert_eq!(p.name, "Beumont");
assert_eq!(p.prefix, "de");
assert_eq!(p.suffix, "");
assert_eq!(p.given_name, "Simon");

// AUTHOR = {given=Hans, family=Harman and given=Simon, prefix=de, family=Beumont}
let people = &[Spanned::zero(N(
"given=Hans, family=Harman and given=Simon, prefix=de, family=Beumont",
))];
let people: Vec<Person> = Type::from_chunks(people).unwrap();
assert_eq!(people.len(), 2);
assert_eq!(people[0].name, "Harman");
assert_eq!(people[0].prefix, "");
assert_eq!(people[0].suffix, "");
assert_eq!(people[0].given_name, "Hans");
assert_eq!(people[1].name, "Beumont");
assert_eq!(people[1].prefix, "de");
assert_eq!(people[1].suffix, "");
assert_eq!(people[1].given_name, "Simon");

// AUTHOR = {Hans Harman and given=Simon, prefix=de, family=Beumont}
let people =
&[Spanned::zero(N("Hans Harman and given=Simon, prefix=de, family=Beumont"))];
let people: Vec<Person> = Type::from_chunks(people).unwrap();
assert_eq!(people.len(), 2);
assert_eq!(people[0].name, "Harman");
assert_eq!(people[0].prefix, "");
assert_eq!(people[0].suffix, "");
assert_eq!(people[0].given_name, "Hans");
assert_eq!(people[1].name, "Beumont");
assert_eq!(people[1].prefix, "de");
assert_eq!(people[1].suffix, "");
assert_eq!(people[1].given_name, "Simon");

// AUTHOR = {nosortothers=true and Hans Harman and given=Simon, family=Beumont, prefix=de, useprefix=true}
let people = &[Spanned::zero(N("nosortothers=true and Hans Harman and given=Simon, family=Beumont, prefix=de, useprefix=true"))];
let people: Vec<Person> = Type::from_chunks(people).unwrap();
assert_eq!(people.len(), 3);
assert_eq!(people[0].name, "");
assert_eq!(people[0].prefix, "");
assert_eq!(people[0].suffix, "");
assert_eq!(people[0].given_name, "");
}
}
38 changes: 38 additions & 0 deletions tests/extended_name_format.bib
Original file line number Diff line number Diff line change
@@ -0,0 +1,38 @@
@article{vanackooij_dec_2018_LargescaleUnitCommitment,
title = {Large-Scale Unit Commitment under Uncertainty: An Updated Literature Survey},
shorttitle = {Large-Scale Unit Commitment under Uncertainty},
author = {family=Ackooij, given=W., prefix=van, useprefix=true and Danti Lopez, I. and Frangioni, A. and Lacalandra, F. and Tahanan, M.},
date = {2018-12-01},
journaltitle = {Annals of Operations Research},
shortjournal = {Ann Oper Res},
volume = {271},
number = {1},
pages = {11--85},
issn = {1572-9338},
doi = {10.1007/s10479-018-3003-z},
}
@article{godoy_sep_2023_JuliaUnifyingEndtoend,
title = {Julia as a Unifying End-to-End Workflow Language on the {{Frontier}} Exascale System},
author = {Godoy, William F. and Valero-Lara, Pedro and Anderson, Caira and Lee, Katrina W. and Gainaru, Ana and family=Silva, given=Rafael Ferreira, prefix=da, useprefix=false and Vetter, Jeffrey S.},
date = {2023-09-24T13:27:44+00:00},
url = {https://hgpu.org/?p=28622},
urldate = {2023-10-27},
abstract = {We evaluate using Julia as a single language and ecosystem paradigm powered by LLVM to develop workflow components for high-performance computing. We run a Gray-Scott, 2-variable diffusion-reaction…},
langid = {american}
}
@article{persson_feb_2022_OutsideSafeOperating,
title = {Outside the {{Safe Operating Space}} of the {{Planetary Boundary}} for {{Novel Entities}}},
author = {Persson, Linn and Carney Almroth, Bethanie M. and Collins, Christopher D. and Cornell, Sarah and family=Wit, given=Cynthia A., prefix=de, useprefix=true and Diamond, Miriam L. and Fantke, Peter and Hassellöv, Martin and MacLeod, Matthew and Ryberg, Morten W. and Søgaard Jørgensen, Peter and Villarrubia-Gómez, Patricia and Wang, Zhanyun and Hauschild, Michael Zwicky},
date = {2022-02-01},
journaltitle = {Environmental Science \& Technology},
shortjournal = {Environ. Sci. Technol.},
volume = {56},
number = {3},
pages = {1510--1521},
publisher = {{American Chemical Society}},
issn = {0013-936X},
doi = {10.1021/acs.est.1c04158},
url = {https://doi.org/10.1021/acs.est.1c04158},
urldate = {2023-12-04},
abstract = {We submit that the safe operating space of the planetary boundary of novel entities is exceeded since annual production and releases are increasing at a pace that outstrips the global capacity for assessment and monitoring. The novel entities boundary in the planetary boundaries framework refers to entities that are novel in a geological sense and that could have large-scale impacts that threaten the integrity of Earth system processes. We review the scientific literature relevant to quantifying the boundary for novel entities and highlight plastic pollution as a particular aspect of high concern. An impact pathway from production of novel entities to impacts on Earth system processes is presented. We define and apply three criteria for assessment of the suitability of control variables for the boundary: feasibility, relevance, and comprehensiveness. We propose several complementary control variables to capture the complexity of this boundary, while acknowledging major data limitations. We conclude that humanity is currently operating outside the planetary boundary based on the weight-of-evidence for several of these control variables. The increasing rate of production and releases of larger volumes and higher numbers of novel entities with diverse risk potentials exceed societies’ ability to conduct safety related assessments and monitoring. We recommend taking urgent action to reduce the harm associated with exceeding the boundary by reducing the production and releases of novel entities, noting that even so, the persistence of many novel entities and/or their associated effects will continue to pose a threat.}
}

0 comments on commit 66f2cf7

Please sign in to comment.