From 6d5ca46fcb932e4bbae9e7e7a33ae6b8743e319d Mon Sep 17 00:00:00 2001 From: Matthew Donoughe Date: Tue, 27 Feb 2024 09:32:18 -0500 Subject: [PATCH] better align parsing with spec --- purl/src/format.rs | 14 +------- purl/src/lib.rs | 1 - purl/src/parse.rs | 26 +++++++------- purl_test/src/lib.rs | 36 +++++++++++++++++++ .../phylum-test-suite-data.json | 9 +++++ 5 files changed, 60 insertions(+), 26 deletions(-) diff --git a/purl/src/format.rs b/purl/src/format.rs index c6c1bce..a0fece0 100644 --- a/purl/src/format.rs +++ b/purl/src/format.rs @@ -56,7 +56,7 @@ where if let Some(version) = self.version() { // The version is a continuation of the same path segment. - write!(f, "@{}", utf8_percent_encode(version, PURL_PATH_SEGMENT))?; + write!(f, "@{}", utf8_percent_encode(version, PURL_PATH))?; } if !self.parts.qualifiers.is_empty() { @@ -162,18 +162,6 @@ mod tests { ); } - #[test] - fn display_encodes_version_correctly() { - assert_eq!( - "pkg:generic/name@a%23%2Fb%3F%2Fc%40", - &GenericPurlBuilder::new(Cow::Borrowed("generic"), "name") - .with_version("a#/b?/c@") - .build() - .expect("Could not build PURL") - .to_string(), - ); - } - #[test] fn display_encodes_qualifiers_correctly() { assert_eq!( diff --git a/purl/src/lib.rs b/purl/src/lib.rs index 806c777..142aa3b 100644 --- a/purl/src/lib.rs +++ b/purl/src/lib.rs @@ -4,7 +4,6 @@ use std::borrow::Cow; pub use builder::*; -pub use format::*; #[cfg(feature = "package-type")] pub use package_type::*; pub use parse::*; diff --git a/purl/src/parse.rs b/purl/src/parse.rs index 168a114..bf9d859 100644 --- a/purl/src/parse.rs +++ b/purl/src/parse.rs @@ -167,6 +167,8 @@ where fn from_str(s: &str) -> Result { // This mostly follows the procedure documented in the PURL spec. // https://github.com/package-url/purl-spec/blob/master/PURL-SPECIFICATION.rst#how-to-parse-a-purl-string-in-its-components + + // Check for `pkg:` first to quickly reject non-PURLs. let s = s.strip_prefix("pkg:").ok_or(ParseError::UnsupportedUrlScheme)?; // PURLs are not supposed to have any leading slashes, but the spec says that @@ -177,7 +179,7 @@ where // Remove subpath and qualifiers from the end now because they have higher // precedence than the path separater. - let s = match s.split_once('#') { + let s = match s.rsplit_once('#') { Some((s, subpath)) => { parts.subpath = decode_subpath(subpath)?; s @@ -185,7 +187,7 @@ where None => s, }; - let s = match s.split_once('?') { + let s = match s.rsplit_once('?') { Some((s, qualifiers)) => { decode_qualifiers(qualifiers, &mut parts)?; s @@ -206,8 +208,16 @@ where let package_type = T::from_str(package_type)?; + let s = match s.rsplit_once('@') { + Some((s, version)) => { + parts.version = decode(version)?.into(); + s + }, + None => s, + }; + // The namespace is optional so we may not have any more slashes. - let name_and_version = match s.rsplit_once('/') { + let name = match s.rsplit_once('/') { Some((namespace, s)) => { parts.namespace = decode_namespace(namespace)?; s @@ -215,15 +225,7 @@ where None => s, }; - match name_and_version.rsplit_once('@') { - Some((name, version)) => { - parts.name = decode(name)?.into(); - parts.version = decode(version)?.into(); - }, - None => { - parts.name = decode(name_and_version)?.into(); - }, - }; + parts.name = decode(name)?.into(); GenericPurlBuilder { package_type, parts }.build() } diff --git a/purl_test/src/lib.rs b/purl_test/src/lib.rs index c819ecd..d9bc5cc 100644 --- a/purl_test/src/lib.rs +++ b/purl_test/src/lib.rs @@ -1296,3 +1296,39 @@ fn unsupported_percent_signs_are_properly_encoded_and_decoded() { "Incorrect string representation" ); } +#[test] +/// unsupported: version encoding +fn unsupported_version_encoding() { + assert!( + matches!( + Purl::from_str("pkg:generic/name@a%23%2Fb%3F%2Fc%40"), + Err(PackageError::UnsupportedType) + ), + "Type {} is not supported", + "generic" + ); + let parsed = match GenericPurl::::from_str("pkg:generic/name@a%23%2Fb%3F%2Fc%40") { + Ok(purl) => purl, + Err(error) => { + panic!( + "Failed to parse valid purl {:?}: {}", + "pkg:generic/name@a%23%2Fb%3F%2Fc%40", error + ) + }, + }; + assert_eq!("generic", parsed.package_type(), "Incorrect package type"); + assert_eq!(None, parsed.namespace(), "Incorrect namespace"); + assert_eq!("name", parsed.name(), "Incorrect name"); + assert_eq!(Some("a#/b?/c@"), parsed.version(), "Incorrect version"); + assert_eq!(None, parsed.subpath(), "Incorrect subpath"); + let expected_qualifiers: HashMap<&str, &str> = HashMap::new(); + assert_eq!( + expected_qualifiers, + parsed.qualifiers().iter().map(|(k, v)| (k.as_str(), v)).collect::>() + ); + assert_eq!( + "pkg:generic/name@a%23/b%3F/c%40", + &parsed.to_string(), + "Incorrect string representation" + ); +} diff --git a/xtask/src/generate_tests/phylum-test-suite-data.json b/xtask/src/generate_tests/phylum-test-suite-data.json index 8528201..62ebd91 100644 --- a/xtask/src/generate_tests/phylum-test-suite-data.json +++ b/xtask/src/generate_tests/phylum-test-suite-data.json @@ -134,5 +134,14 @@ }, "subpath": "100%", "is_invalid": false + }, + { + "description": "version encoding", + "purl": "pkg:generic/name@a%23%2Fb%3F%2Fc%40", + "canonical_purl": "pkg:generic/name@a%23/b%3F/c%40", + "type": "generic", + "name": "name", + "version": "a#/b?/c@", + "is_invalid": false } ]