From 94e9acedeac4ec21def043663a3273344ab59338 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Christian=20Kleinb=C3=B6lting?= Date: Wed, 24 Jul 2024 11:06:36 +0200 Subject: [PATCH] Add valid draft model for existing json metadata --- dsp-meta/src/api/convert/serde/draft_model.rs | 0 dsp-meta/src/api/convert/serde/mod.rs | 0 dsp-meta/tests/draft_schema_test.rs | 116 ++++++ dsp-meta/tests/hcl_learning.rs | 114 ------ dsp-meta/tests/json-toml.rs | 367 ------------------ dsp-meta/tests/toml_learning.rs | 141 ------- 6 files changed, 116 insertions(+), 622 deletions(-) create mode 100644 dsp-meta/src/api/convert/serde/draft_model.rs create mode 100644 dsp-meta/src/api/convert/serde/mod.rs create mode 100644 dsp-meta/tests/draft_schema_test.rs delete mode 100644 dsp-meta/tests/hcl_learning.rs delete mode 100644 dsp-meta/tests/json-toml.rs delete mode 100644 dsp-meta/tests/toml_learning.rs diff --git a/dsp-meta/src/api/convert/serde/draft_model.rs b/dsp-meta/src/api/convert/serde/draft_model.rs new file mode 100644 index 00000000..e69de29b diff --git a/dsp-meta/src/api/convert/serde/mod.rs b/dsp-meta/src/api/convert/serde/mod.rs new file mode 100644 index 00000000..e69de29b diff --git a/dsp-meta/tests/draft_schema_test.rs b/dsp-meta/tests/draft_schema_test.rs new file mode 100644 index 00000000..99f24efe --- /dev/null +++ b/dsp-meta/tests/draft_schema_test.rs @@ -0,0 +1,116 @@ +use std::fs; +use std::fs::File; +use std::path::PathBuf; + +use serde_json::Value; +use valico::json_schema; +use api::convert::serde::draft_model::*; +use dsp_meta::api; + +#[test] +fn test_json_and_yaml_serialization_are_equal() { + let path = "/Users/christian/git/dasch/dsp-meta/data/examples/sgv.json"; + let contents_json = fs::read_to_string(path).expect("Read JSON"); + let metadata_json = serde_json::from_str::(&*contents_json).expect("From JSON"); + let contents_yaml = fs::read_to_string("/Users/christian/git/dasch/dsp-meta/data/examples/sgv.yaml").expect("Read YML"); + let metadata_yaml = serde_yaml::from_str(&*contents_yaml).expect("From YAML"); + assert_eq!(metadata_json, metadata_yaml); +} + +#[test] +fn test_json_and_toml_serialization_are_equal() { + let path = "/Users/christian/git/dasch/dsp-meta/data/examples/sgv.json"; + let contents_json = fs::read_to_string(path).expect("Read JSON"); + let metadata_json = serde_json::from_str::(&*contents_json).expect("From JSON"); + let contents_toml = fs::read_to_string("/Users/christian/git/dasch/dsp-meta/data/examples/sgv.toml").expect("Read TOML"); + let metadata_toml = toml::from_str::(&*contents_toml).expect("From TOML"); + assert_eq!(metadata_json, metadata_toml); +} + +#[test] +fn test_deserialization_data() { + let paths = fs::read_dir("/Users/christian/git/dasch/dsp-meta/data/json") + .expect("Directory not found") + .filter_map( + |entry| { + let entry = entry.ok()?; + let path = entry.path(); + if path.is_file() && path.extension().and_then(|s| s.to_str()) == Some("json") { + Some(path) + } else { + None + } + } + ).collect::>(); + let mut success: usize = 0; + let mut error: usize = 0; + + for path in paths { + let path = path.as_path(); + if path.extension().and_then(|s| s.to_str()) == Some("json") { + println!("Checking {}:", path.to_str().get_or_insert("")); + let contents = fs::read_to_string(path) + .expect("Should have been able to read the file"); + let metadata = serde_json::from_str::(&*contents); + match metadata { + Ok(_data) => { + success = success + 1; + println!("SUCCESS\n") // println!("DATA:\n {:?}\n", data), + } + Err(err) => { + error = error + 1; + println!("ERROR:\n {:?}\n", err) + } + }; + } + } + println!("Success: {}, Error: {}, Total: {}", success, error, success + error) +} + +#[test] +fn test_draft_json_schema() { + verify_all_json_files_in_directory_jsonschema("/Users/christian/git/dasch/dsp-meta/data/json/"); + assert!(true) +} + +fn verify_all_json_files_in_directory_jsonschema(directory: &str) { + let paths = fs::read_dir(directory).unwrap(); + let mut success: usize = 0; + let mut error: usize = 0; + let json_schema: Value = serde_json::from_reader(File::open("/Users/christian/git/dasch/dsp-meta/data/schema-metadata-draft.json").unwrap()).unwrap(); + let mut scope = json_schema::Scope::new(); + let schema = scope.compile_and_return(json_schema, false).unwrap(); + let mut valid: Vec = Vec::new(); + let mut invalid: Vec = Vec::new(); + + for path in paths { + let path = path.unwrap().path(); + if path.extension().and_then(|s| s.to_str()) == Some("json") { + let file = (*path.to_str().get_or_insert("")).to_string(); + println!("Checking {}:", file); + let contents = fs::read_to_string(&path) + .expect("Should have been able to read the file"); + let metadata = serde_json::from_str::(&*contents).expect("parsed data as json"); + let result = schema.validate(&metadata); + let filename = file["/Users/christian/git/dasch/dsp-meta/data/json/".len()..].to_string(); + if result.is_valid() { + success = success + 1; + valid.push(filename); + println!("VALID\n") // println!("DATA:\n {:?}\n", data), + } else { + error = error + 1; + invalid.push(filename); + println!("INVALID: {:?}\n", result) // println!("DATA:\n {:?}\n", data), + } + } + } + println!("Success: {}, Error: {}, Total: {}", success, error, success + error); + println!(); + + println!("VALID files:\n{}", valid.join("\n")); + println!(); + + println!("INVALID files:\n{}", invalid.join("\n")); + + assert!(invalid.is_empty()); +} \ No newline at end of file diff --git a/dsp-meta/tests/hcl_learning.rs b/dsp-meta/tests/hcl_learning.rs deleted file mode 100644 index 77cc3bff..00000000 --- a/dsp-meta/tests/hcl_learning.rs +++ /dev/null @@ -1,114 +0,0 @@ -use std::collections::HashMap; - -use hcl::Body; -use serde::Deserialize; - -#[derive(Deserialize, Debug, PartialEq)] -pub struct User { - name: String, - email: String, -} - -#[derive(Deserialize, Debug, PartialEq)] -pub struct Discipline { - skos: Option, - snf: Vec, -} - -#[derive(Deserialize, Debug, PartialEq)] -pub struct Skos(RefData); - -#[derive(Deserialize, Debug, PartialEq)] -pub struct Snf(RefData); - -#[derive(Deserialize, Debug, PartialEq)] -pub struct RefData { - ref_id: String, -} - -#[derive(Deserialize, Debug, PartialEq)] -pub struct ProjectMetadata { - version: Version, - discipline: Discipline, - user: User, - #[serde(rename = "keyword")] - keywords: Vec, -} - -#[derive(Deserialize, Debug, PartialEq)] -struct Version(usize); - -#[derive(Deserialize, Debug, PartialEq)] -struct Keyword(HashMap); - -#[test] -fn test_hcl() { - let input = r#" - version = 1 - discipline skos { - ref_id = "foo" - } - discipline snf { - ref_id = "snf1" - } - discipline snf { - ref_id = "snf2" - } - user { - name = "John Doe" - email = "john@doe.tld" - } - keyword { - en ="word" - de = "wort" - } - keyword { - en ="word2" - } - "#; - - let body: Body = hcl::from_str(input).expect("Failed to parse"); - - let metadata: ProjectMetadata = hcl::from_body(body).expect("Failed to parse"); - - let exp = Discipline { - skos: Some(Skos(RefData { - ref_id: "foo".to_string(), - })), - snf: vec![ - Snf(RefData { - ref_id: "snf1".to_string(), - }), - Snf(RefData { - ref_id: "snf2".to_string(), - }), - ], - }; - - let mut exp_keywords = Vec::new(); - let keyw1 = Keyword({ - let mut map = HashMap::new(); - map.insert("en".to_string(), "word".to_string()); - map.insert("de".to_string(), "wort".to_string()); - map - }); - let kew2 = Keyword({ - let mut map = HashMap::new(); - map.insert("en".to_string(), "word2".to_string()); - map - }); - exp_keywords.append(&mut vec![keyw1, kew2]); - - assert_eq!( - metadata, - ProjectMetadata { - version: Version(1), - discipline: exp, - keywords: exp_keywords, - user: User { - name: "John Doe".to_string(), - email: "john@doe.tld".to_string() - } - } - ); -} diff --git a/dsp-meta/tests/json-toml.rs b/dsp-meta/tests/json-toml.rs deleted file mode 100644 index 17cffcc1..00000000 --- a/dsp-meta/tests/json-toml.rs +++ /dev/null @@ -1,367 +0,0 @@ -use std::collections::HashMap; -use std::fs; -use std::fs::File; -use std::path::PathBuf; - -use nonempty::NonEmpty; -use serde::{Deserialize, Serialize}; -use serde_json::Value; -use valico::json_schema; - -#[derive(Debug, Serialize, Deserialize, PartialEq)] -#[serde(rename_all = "camelCase")] -pub struct Metadata { - pub project: Project, - pub datasets: Option>, - pub persons: Option>, - pub organizations: Option>, - pub grants: Option>, -} - -#[derive(Debug, Serialize, Deserialize, PartialEq)] -#[serde(rename_all = "camelCase")] -pub struct Project { - #[serde(rename = "__id")] - pub id: String, - #[serde(rename = "__createdAt")] - pub created_at: Option, - #[serde(rename = "__createdBy")] - pub created_by: Option, - pub shortcode: String, - pub name: String, - pub description: Option, - pub start_date: Date, - pub teaser_text: String, - pub datasets: NonEmpty, - pub keywords: NonEmpty, - pub disciplines: NonEmpty, - pub temporal_coverage: Option>, - pub spatial_coverage: Option>, - pub funders: Option>, - pub url: Option, - pub secondary_url: Option, - pub data_management_plan: Option, - pub end_date: Option, - pub contact_point: Option, - pub how_to_cite: Option, - pub publications: Option>, - pub grants: Option>, - pub alternative_names: Option>, -} - -#[derive(Debug, Serialize, Deserialize, PartialEq)] -pub struct Publication { - pub text: String, - pub url: Option>, -} - -#[derive(Debug, Serialize, Deserialize, PartialEq)] -#[serde(rename_all = "camelCase")] -pub struct Dataset { - #[serde(rename = "__id")] - pub id: String, - #[serde(rename = "__createdAt")] - pub created_at: Option, - #[serde(rename = "__createdBy")] - pub created_by: Option, - pub abstracts: Option>, - pub access_conditions: Option, - pub additional: Option>, - pub alternative_titles: Option>, - pub attributions: Option>, - pub date_created: Option, - pub date_modified: Option, - pub date_published: Option, - pub distribution: Option, - pub how_to_cite: Option, - pub languages: Option>, - pub licenses: Option>, - pub status: Option, - pub title: Option, - pub type_of_data: Option>, - pub urls: Option>, -} - -#[derive(Debug, Serialize, Deserialize, PartialEq)] -#[serde(rename_all = "lowercase")] -pub enum AccessCondition { - Open, - Restricted, - Closed, -} - -#[derive(Debug, Serialize, Deserialize, PartialEq)] -pub enum Status { - #[serde(rename = "In planning")] - InPlanning, - #[serde(rename = "Ongoing")] - OnGoing, - #[serde(rename = "On hold")] - OnHold, - Finished, -} - -#[derive(Debug, Serialize, Deserialize, PartialEq)] -pub enum TypeOfData { - XML, - Text, - Image, - Video, - Audio, -} - -#[derive(Debug, Serialize, Deserialize, PartialEq)] -#[serde(rename_all = "camelCase")] -pub struct Person { - #[serde(rename = "__id")] - pub id: String, - #[serde(rename = "__createdAt")] - pub created_at: Option, - #[serde(rename = "__createdBy")] - pub created_by: Option, - pub job_titles: Option>, - pub given_names: NonEmpty, - pub family_names: NonEmpty, - pub affiliation: Option>, - pub address: Option
, - pub email: Option, - pub secondary_email: Option, - pub authority_refs: Option>, -} - -#[derive(Debug, Serialize, Deserialize, PartialEq)] -#[serde(rename_all = "camelCase")] -pub struct Organization { - #[serde(rename = "__id")] - pub id: String, - #[serde(rename = "__createdAt")] - pub created_at: Option, - #[serde(rename = "__createdBy")] - pub created_by: Option, - pub name: String, - pub url: Option, - pub address: Option
, - pub email: Option, - pub alternative_names: Option>, - pub authority_refs: Option>, -} - -#[derive(Debug, Serialize, Deserialize, PartialEq)] -#[serde(rename_all = "camelCase")] -pub struct Grant { - #[serde(rename = "__id")] - pub id: String, - #[serde(rename = "__createdAt")] - pub created_at: Option, - #[serde(rename = "__createdBy")] - pub created_by: Option, - pub funders: NonEmpty, - pub number: Option, - pub name: Option, - pub url: Option, -} - -#[derive(Debug, Serialize, Deserialize, PartialEq)] -pub struct Text(HashMap); - -#[derive(Debug, Serialize, Deserialize, Eq, PartialEq, Hash)] -pub struct IsoCode(String); - -#[derive(Debug, Serialize, Deserialize, PartialEq)] -pub struct Date(String); - -#[derive(Debug, Serialize, Deserialize, PartialEq)] -#[serde(rename_all = "camelCase")] -pub struct Url { - pub url: String, - pub text: Option, - #[serde(rename = "type")] - #[serde(default = "UrlType::default")] - pub url_type: UrlType, -} - -#[derive(Debug, Serialize, Deserialize, PartialEq)] -pub enum UrlType { - URL, - Geonames, - Pleiades, - Skos, - Periodo, - Chronontology, - GND, - VIAF, - Grid, - ORCID, - #[serde(rename = "Creative Commons")] - CreativeCommons, - DOI, - ARK, -} -impl UrlType { - fn default() -> Self { UrlType::URL } -} - -#[derive(Debug, Serialize, Deserialize, PartialEq)] -#[serde(rename_all = "camelCase")] -pub struct Address { - pub street: String, - pub postal_code: String, - pub locality: Option, - pub country: String, - pub canton: Option, - pub additional: Option, -} - -#[derive(Debug, Serialize, Deserialize, PartialEq)] -#[serde(rename_all = "camelCase")] -pub struct DataManagementPlan { - pub available: bool, - pub url: Option, -} - -#[derive(Debug, Serialize, Deserialize, PartialEq)] -#[serde(rename_all = "camelCase")] -pub struct Attribution { - pub agent: String, - pub roles: NonEmpty, -} - -#[derive(Debug, Serialize, Deserialize, PartialEq)] -#[serde(rename_all = "camelCase")] -pub struct License { - pub license: Url, - pub date: Date, - pub details: Option, -} - -#[derive(Debug, Serialize, Deserialize, PartialEq)] -#[serde(untagged)] -pub enum TextOrUrl { - TextValue(Text), - UrlValue(Url), -} - -#[test] -fn test_as_toml_and_yaml() { - let path = "/Users/christian/git/dasch/dsp-meta/data/examples/sgv.json"; - let contents = fs::read_to_string(path) - .expect("Should have been able to read the file"); - let metadata = serde_json::from_str::(&*contents).expect("From JSON"); - let toml = toml::to_string(&metadata).expect("To TOML"); - fs::write("/Users/christian/git/dasch/dsp-meta/data/examples/sgv.toml", &toml).expect("Write TOML"); - println!("As TOML:"); - println!("{}", toml); - println!(); - - let yaml = serde_yaml::to_string(&metadata).expect("To YAML"); - fs::write("/Users/christian/git/dasch/dsp-meta/data/examples/sgv.yaml", &yaml).expect("Write YAML"); - println!("As YAML:"); - println!("{}", yaml); - println!("As YAML:"); -} - -#[test] -fn test_json_and_yaml_serialization_are_equal() { - let path = "/Users/christian/git/dasch/dsp-meta/data/examples/sgv.json"; - let contents_json = fs::read_to_string(path).expect("Read JSON"); - let metadata_json = serde_json::from_str::(&*contents_json).expect("From JSON"); - let contents_yaml = fs::read_to_string("/Users/christian/git/dasch/dsp-meta/data/examples/sgv.yaml").expect("Read YML"); - let metadata_yaml = serde_yaml::from_str(&*contents_yaml).expect("From YAML"); - assert_eq!(metadata_json, metadata_yaml); -} - -#[test] -fn test_json_and_toml_serialization_are_equal() { - let path = "/Users/christian/git/dasch/dsp-meta/data/examples/sgv.json"; - let contents_json = fs::read_to_string(path).expect("Read JSON"); - let metadata_json = serde_json::from_str::(&*contents_json).expect("From JSON"); - let contents_toml = fs::read_to_string("/Users/christian/git/dasch/dsp-meta/data/examples/sgv.toml").expect("Read TOML"); - let metadata_toml = toml::from_str::(&*contents_toml).expect("From TOML"); - assert_eq!(metadata_json, metadata_toml); -} - -#[test] -fn test_deserialization() { - let paths = fs::read_dir("/Users/christian/git/dasch/dsp-meta/data/json") - .expect("Directory not found") - .filter_map( - |entry| { - let entry = entry.ok()?; - let path = entry.path(); - if path.is_file() && path.extension().and_then(|s| s.to_str()) == Some("json") { - Some(path) - } else { - None - } - } - ).collect::>(); - let mut success: usize = 0; - let mut error: usize = 0; - - for path in paths { - let path = path.as_path(); - if path.extension().and_then(|s| s.to_str()) == Some("json") { - println!("Checking {}:", path.to_str().get_or_insert("")); - let contents = fs::read_to_string(path) - .expect("Should have been able to read the file"); - let metadata = serde_json::from_str::(&*contents); - match metadata { - Ok(_data) => { - success = success + 1; - println!("SUCCESS\n") // println!("DATA:\n {:?}\n", data), - } - Err(err) => { - error = error + 1; - println!("ERROR:\n {:?}\n", err) - } - }; - } - } - println!("Success: {}, Error: {}, Total: {}", success, error, success + error) -} - -#[test] -fn test_jsonschema() { - verify_all_json_files_in_directory_jsonschema("/Users/christian/git/dasch/dsp-meta/data/json/"); - assert!(true) -} - -fn verify_all_json_files_in_directory_jsonschema(directory: &str) { - let paths = fs::read_dir(directory).unwrap(); - let mut success: usize = 0; - let mut error: usize = 0; - let json_schema: Value = serde_json::from_reader(File::open("/Users/christian/git/dasch/dsp-meta/data/schema-metadata-draft.json").unwrap()).unwrap(); - let mut scope = json_schema::Scope::new(); - let schema = scope.compile_and_return(json_schema, false).unwrap(); - let mut valid: Vec = Vec::new(); - let mut invalid: Vec = Vec::new(); - - for path in paths { - let path = path.unwrap().path(); - if path.extension().and_then(|s| s.to_str()) == Some("json") { - let file = (*path.to_str().get_or_insert("")).to_string(); - println!("Checking {}:", file); - let contents = fs::read_to_string(&path) - .expect("Should have been able to read the file"); - let metadata = serde_json::from_str::(&*contents).expect("parsed data as json"); - let result = schema.validate(&metadata); - let filename = file["/Users/christian/git/dasch/dsp-meta/data/json/".len()..].to_string(); - if result.is_valid() { - success = success + 1; - valid.push(filename); - println!("VALID\n") // println!("DATA:\n {:?}\n", data), - } else { - error = error + 1; - invalid.push(filename); - println!("INVALID: {:?}\n", result) // println!("DATA:\n {:?}\n", data), - } - } - } - println!("Success: {}, Error: {}, Total: {}", success, error, success + error); - println!(); - - println!("VALID files:\n{}", valid.join("\n")); - println!(); - - println!("INVALID files:\n{}", invalid.join("\n")); -} \ No newline at end of file diff --git a/dsp-meta/tests/toml_learning.rs b/dsp-meta/tests/toml_learning.rs deleted file mode 100644 index ff96cb5d..00000000 --- a/dsp-meta/tests/toml_learning.rs +++ /dev/null @@ -1,141 +0,0 @@ -use std::collections::HashMap; - -use serde::Deserialize; -use url::Url; - -#[derive(Deserialize, Debug, PartialEq)] -struct ProjectMetadata { - version: Version, - project: Project, -} - -#[derive(Deserialize, Debug, PartialEq)] -struct Project { - keyword: Option>, - #[serde(rename = "discipline")] - disciplines: Vec, -} - -#[derive(Deserialize, Debug, PartialEq)] -struct Version(usize); - -#[derive(Deserialize, Debug, PartialEq)] -struct Keyword(HashMap); - -#[derive(Deserialize, Debug, PartialEq)] -struct Discipline { - skos: Option>, - snf: Option>, - text: Option>, -} - -#[derive(Deserialize, Debug, PartialEq)] -struct RefData { - ref_id: String, - description: Option, - url: Option, -} - -#[derive(Deserialize, Debug, PartialEq)] -struct LangTextData(HashMap); - -#[test] -fn test_toml() { - let input = r#" - version = 1 - - [project] - [[project.keyword]] - en = "English" - de = "German" - - [[project.keyword]] - fr = "French" - - [[project.discipline]] - [[project.discipline.skos]] - ref_id = "foo" - description = "foo description" - url = "http://example.com/foo" - - [[project.discipline.skos]] - ref_id = "bar" - description = "bar description" - - [[project.discipline.text]] - age = "old" - "#; - let actual = toml::from_str::(input).unwrap(); - - let input2 = r#" - version = 1 - - [project] - keyword = [ - { en = "English", de = "German" }, - { fr = "French" } - ] - - [[project.discipline]] - skos = [ - { ref_id = "foo", description = "foo description", url = "http://example.com/foo" }, - { ref_id = "bar", description = "bar description" } - ] - - [[project.discipline.text]] - age = "old" - "#; - - let actual2 = toml::from_str::(input2).unwrap(); - - let exp_keywords = Some(vec![ - Keyword( - [ - ("en".to_string(), "English".to_string()), - ("de".to_string(), "German".to_string()), - ] - .iter() - .cloned() - .collect(), - ), - Keyword( - [("fr".to_string(), "French".to_string())] - .iter() - .cloned() - .collect(), - ), - ]); - - let exp_discipline = vec![Discipline { - skos: Some(vec![ - RefData { - ref_id: "foo".to_string(), - description: Some("foo description".to_string()), - url: Some(Url::parse("http://example.com/foo").unwrap()), - }, - RefData { - ref_id: "bar".to_string(), - description: Some("bar description".to_string()), - url: None - }, - ]), - snf: None, - text: Some(vec![LangTextData( - [("age".to_string(), "old".to_string())] - .iter() - .cloned() - .collect(), - )]), - }]; - - let exp_project = Project { - keyword: exp_keywords, - disciplines: exp_discipline, - }; - let expected_metadata = ProjectMetadata { - version: Version(1), - project: exp_project, - }; - assert_eq!(actual, expected_metadata); - assert_eq!(actual2, expected_metadata); -}