From 8d6a19de3e7d4b7d8b6491cf48bf2d94689cd5be Mon Sep 17 00:00:00 2001 From: Semyon Uchvatov Date: Thu, 28 Nov 2024 11:55:06 +0300 Subject: [PATCH] Add cbor support --- Cargo.lock | 21 +++--- Cargo.toml | 2 + LICENSE-3rdparty.csv | 2 + changelog.d/1152.feature.md | 1 + src/stdlib/mod.rs | 3 + src/stdlib/parse_cbor.rs | 128 +++++++++++++++++++++++++++++++++++ tests/data/cbor/complex.cbor | 1 + tests/data/cbor/simple.cbor | 1 + 8 files changed, 151 insertions(+), 8 deletions(-) create mode 100644 changelog.d/1152.feature.md create mode 100644 src/stdlib/parse_cbor.rs create mode 100644 tests/data/cbor/complex.cbor create mode 100644 tests/data/cbor/simple.cbor diff --git a/Cargo.lock b/Cargo.lock index 4efe4689bf..70733a52d9 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -539,9 +539,9 @@ dependencies = [ [[package]] name = "ciborium" -version = "0.2.1" +version = "0.2.2" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "effd91f6c78e5a4ace8a5d3c0b6bfaec9e2baaef55f3efc00e45fb2e477ee926" +checksum = "42e69ffd6f0917f5c029256a24d0161db17cea3997d185db0d35926308770f0e" dependencies = [ "ciborium-io", "ciborium-ll", @@ -550,15 +550,15 @@ dependencies = [ [[package]] name = "ciborium-io" -version = "0.2.1" +version = "0.2.2" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "cdf919175532b369853f5d5e20b26b43112613fd6fe7aee757e35f7a44642656" +checksum = "05afea1e0a06c9be33d539b876f1ce3692f4afea2cb41f740e7743225ed1c757" [[package]] name = "ciborium-ll" -version = "0.2.1" +version = "0.2.2" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "defaa24ecc093c77630e6c15e17c51f5e187bf35ee514f4e2d67baaa96dae22b" +checksum = "57663b653d948a338bfb3eeba9bb2fd5fcfaecb9e199e87e1eda4d9e8b240fd9" dependencies = [ "ciborium-io", "half", @@ -1269,9 +1269,13 @@ dependencies = [ [[package]] name = "half" -version = "1.8.2" +version = "2.4.1" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "eabb4a44450da02c90444cf74558da904edde8fb4e9035a9a6a4e15445af0bd7" +checksum = "6dd08c532ae367adf81c312a4580bc67f1d0fe8bc9c460520283f4c0ff277888" +dependencies = [ + "cfg-if", + "crunchy", +] [[package]] name = "hashbrown" @@ -3727,6 +3731,7 @@ dependencies = [ "charset", "chrono", "chrono-tz", + "ciborium", "cidr-utils", "clap", "codespan-reporting", diff --git a/Cargo.toml b/Cargo.toml index 591f828d44..341c4cef9c 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -68,6 +68,7 @@ stdlib = [ "dep:chacha20poly1305", "dep:charset", "dep:convert_case", + "dep:ciborium", "dep:cidr-utils", "dep:community-id", "dep:crypto_secretbox", @@ -129,6 +130,7 @@ bytes = { version = "1", default-features = false, optional = true } charset = { version = "0.1", optional = true } chrono = { version = "0.4", default-features = false, features = ["clock", "serde", "wasmbind"], optional = true } chrono-tz = { version = "0.10", default-features = false, optional = true } +ciborium = { version = "0.2.2", default-features = false, optional = true } cidr-utils = { version = "0.6", optional = true } csv = { version = "1", optional = true } clap = { version = "4", features = ["derive"], optional = true } diff --git a/LICENSE-3rdparty.csv b/LICENSE-3rdparty.csv index cdf98d3e2a..40b9438d66 100644 --- a/LICENSE-3rdparty.csv +++ b/LICENSE-3rdparty.csv @@ -44,6 +44,7 @@ chacha20poly1305,https://github.com/RustCrypto/AEADs/tree/master/chacha20poly130 charset,https://github.com/hsivonen/charset,Apache-2.0 OR MIT,Henri Sivonen chrono,https://github.com/chronotope/chrono,MIT OR Apache-2.0,The chrono Authors chrono-tz,https://github.com/chronotope/chrono-tz,MIT OR Apache-2.0,The chrono-tz Authors +ciborium,https://github.com/enarx/ciborium,Apache-2.0,Nathaniel McCallum cidr,https://github.com/stbuehler/rust-cidr,MIT,Stefan Bühler cidr-utils,https://github.com/magiclen/cidr-utils,MIT,Magic Len cipher,https://github.com/RustCrypto/traits,MIT OR Apache-2.0,RustCrypto Developers @@ -102,6 +103,7 @@ generic-array,https://github.com/fizyk20/generic-array,MIT,"BartÅ‚omiej KamiÅ„sk getrandom,https://github.com/rust-random/getrandom,MIT OR Apache-2.0,The Rand Project Developers gimli,https://github.com/gimli-rs/gimli,MIT OR Apache-2.0,The gimli Authors grok,https://github.com/daschl/grok,Apache-2.0,Michael Nitschinger +half,https://github.com/starkat99/half-rs,MIT OR Apache-2.0,Kathryn Long hashbrown,https://github.com/rust-lang/hashbrown,MIT OR Apache-2.0,Amanieu d'Antras heck,https://github.com/withoutboats/heck,MIT OR Apache-2.0,The heck Authors heck,https://github.com/withoutboats/heck,MIT OR Apache-2.0,Without Boats diff --git a/changelog.d/1152.feature.md b/changelog.d/1152.feature.md new file mode 100644 index 0000000000..42ca00d855 --- /dev/null +++ b/changelog.d/1152.feature.md @@ -0,0 +1 @@ +Add `parse_cbor` function diff --git a/src/stdlib/mod.rs b/src/stdlib/mod.rs index acc5f96538..c4dc5b6ae0 100644 --- a/src/stdlib/mod.rs +++ b/src/stdlib/mod.rs @@ -137,6 +137,7 @@ cfg_if::cfg_if! { mod parse_aws_cloudwatch_log_subscription_message; mod parse_aws_vpc_flow_log; mod parse_cef; + mod parse_cbor; mod parse_common_log; mod parse_csv; mod parse_duration; @@ -316,6 +317,7 @@ cfg_if::cfg_if! { pub use parse_aws_alb_log::ParseAwsAlbLog; pub use parse_aws_cloudwatch_log_subscription_message::ParseAwsCloudWatchLogSubscriptionMessage; pub use parse_aws_vpc_flow_log::ParseAwsVpcFlowLog; + pub use parse_cbor::ParseCbor; pub use parse_cef::ParseCef; pub use parse_common_log::ParseCommonLog; pub use parse_csv::ParseCsv; @@ -500,6 +502,7 @@ pub fn all() -> Vec> { Box::new(ParseAwsAlbLog), Box::new(ParseAwsCloudWatchLogSubscriptionMessage), Box::new(ParseAwsVpcFlowLog), + Box::new(ParseCbor), Box::new(ParseCef), Box::new(ParseCommonLog), Box::new(ParseCsv), diff --git a/src/stdlib/parse_cbor.rs b/src/stdlib/parse_cbor.rs new file mode 100644 index 0000000000..833a18c207 --- /dev/null +++ b/src/stdlib/parse_cbor.rs @@ -0,0 +1,128 @@ +use crate::compiler::prelude::*; +use ciborium::de::from_reader; +use zstd::zstd_safe::WriteBuf; + +fn parse_cbor(value: Value) -> Resolved { + let bytes = value.try_bytes()?; + let value = from_reader(bytes.as_slice()).map_err(|e| format!("unable to parse cbor: {e}"))?; + Ok(value) +} + +#[derive(Clone, Copy, Debug)] +pub struct ParseCbor; + +impl Function for ParseCbor { + fn identifier(&self) -> &'static str { + "parse_cbor" + } + + fn summary(&self) -> &'static str { + "parse a string to a JSON type" + } + + fn usage(&self) -> &'static str { + indoc! {" + Parses the provided `value` as CBOR. + + Only JSON types are returned. If you need to convert a `string` into a `timestamp`, + consider the `parse_timestamp` function. + "} + } + + fn parameters(&self) -> &'static [Parameter] { + &[Parameter { + keyword: "value", + kind: kind::BYTES, + required: true, + }] + } + + fn examples(&self) -> &'static [Example] { + &[Example { + title: "object", + source: r#"parse_cbor!(decode_base64!("oWVmaWVsZGV2YWx1ZQ=="))"#, + result: Ok(r#"{ "field": "value" }"#), + }] + } + + fn compile( + &self, + _state: &state::TypeState, + _ctx: &mut FunctionCompileContext, + arguments: ArgumentList, + ) -> Compiled { + let value = arguments.required("value"); + Ok(ParseCborFn { value }.as_expr()) + } +} + +#[derive(Debug, Clone)] +struct ParseCborFn { + value: Box, +} + +impl FunctionExpression for ParseCborFn { + fn resolve(&self, ctx: &mut Context) -> Resolved { + let value = self.value.resolve(ctx)?; + parse_cbor(value) + } + + fn type_def(&self, _: &state::TypeState) -> TypeDef { + type_def() + } +} + +fn inner_kind() -> Kind { + Kind::null() + | Kind::bytes() + | Kind::integer() + | Kind::float() + | Kind::boolean() + | Kind::array(Collection::any()) + | Kind::object(Collection::any()) +} + +fn type_def() -> TypeDef { + TypeDef::bytes() + .fallible() + .or_boolean() + .or_integer() + .or_float() + .add_null() + .or_array(Collection::from_unknown(inner_kind())) + .or_object(Collection::from_unknown(inner_kind())) +} + +#[cfg(test)] +mod tests { + use super::*; + use crate::value; + use nom::AsBytes; + use std::env; + use std::fs; + use std::path::PathBuf; + + fn test_data_dir() -> PathBuf { + PathBuf::from(env::var_os("CARGO_MANIFEST_DIR").unwrap()).join("tests/data/cbor") + } + + fn read_cbor_file(cbor_bin_message_path: &str) -> Vec { + fs::read(test_data_dir().join(cbor_bin_message_path)).unwrap() + } + + test_function![ + parse_cbor => ParseCbor; + + parses { + args: func_args![ value: value!(read_cbor_file("simple.cbor").as_bytes()) ], + want: Ok(value!({ field: "value" })), + tdef: type_def(), + } + + complex_cbor { + args: func_args![ value: value!(read_cbor_file("complex.cbor").as_bytes()) ], + want: Ok(value!({ object: {string: "value", number: 42, array: ["hello", "world"], boolean: false} })), + tdef: type_def(), + } + ]; +} diff --git a/tests/data/cbor/complex.cbor b/tests/data/cbor/complex.cbor new file mode 100644 index 0000000000..6f4bfb247c --- /dev/null +++ b/tests/data/cbor/complex.cbor @@ -0,0 +1 @@ +¡fobject¤fstringevaluefnumber*earray‚ehelloeworldgbooleanô \ No newline at end of file diff --git a/tests/data/cbor/simple.cbor b/tests/data/cbor/simple.cbor new file mode 100644 index 0000000000..4381f17dc5 --- /dev/null +++ b/tests/data/cbor/simple.cbor @@ -0,0 +1 @@ +¡efieldevalue \ No newline at end of file