From 5a58efb9cc03cf1b4840f2c93e718cb9fd30587a Mon Sep 17 00:00:00 2001 From: David Herberth Date: Mon, 6 Nov 2023 15:04:45 +0100 Subject: [PATCH] feat(py): Add validate_pii_selector (#2687) Expose a function to validate PII selectors directly, so we can use it to validate safe fields in Sentry. --- CHANGELOG.md | 1 + py/CHANGELOG.md | 4 +++ py/sentry_relay/processing.py | 12 ++++++++ py/tests/test_processing.py | 18 +++++++++++ relay-cabi/include/relay.h | 5 ++++ relay-cabi/src/processing.rs | 56 +++++++++++++++++++++++++---------- 6 files changed, 80 insertions(+), 16 deletions(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index ab5de5bc54..ce644e37b2 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -13,6 +13,7 @@ - Allow advanced scrubbing expressions for datascrubbing safe fields. ([#2670](https://github.com/getsentry/relay/pull/2670)) - Track when a span was received. ([#2688](https://github.com/getsentry/relay/pull/2688)) - Add context for NEL (Network Error Logging) reports to the event schema. ([#2421](https://github.com/getsentry/relay/pull/2421)) +- Add `validate_pii_selector` to CABI for safe fields validation. ([#2687](https://github.com/getsentry/relay/pull/2687)) **Bug Fixes**: diff --git a/py/CHANGELOG.md b/py/CHANGELOG.md index ceb33ecb47..b52fa8013b 100644 --- a/py/CHANGELOG.md +++ b/py/CHANGELOG.md @@ -1,5 +1,9 @@ # Changelog +## Unreleased + +- Add `validate_pii_selector` to validate safe fields. ([#2687](https://github.com/getsentry/relay/pull/2687)) + ## 0.8.34 - Add context for NEL (Network Error Logging) reports to the event schema. ([#2421](https://github.com/getsentry/relay/pull/2421)) diff --git a/py/sentry_relay/processing.py b/py/sentry_relay/processing.py index 0914c1f811..7716ce816e 100644 --- a/py/sentry_relay/processing.py +++ b/py/sentry_relay/processing.py @@ -21,6 +21,7 @@ "is_glob_match", "is_codeowners_path_match", "parse_release", + "validate_pii_selector", "validate_pii_config", "convert_datascrubbing_config", "pii_strip_event", @@ -166,6 +167,17 @@ def is_codeowners_path_match(value, pattern): ) +def validate_pii_selector(selector): + """ + Validate a PII selector spec. Used to validate datascrubbing safe fields. + """ + assert isinstance(selector, str) + raw_error = rustcall(lib.relay_validate_pii_selector, encode_str(selector)) + error = decode_str(raw_error, free=True) + if error: + raise ValueError(error) + + def validate_pii_config(config): """ Validate a PII config against the schema. Used in project options UI. diff --git a/py/tests/test_processing.py b/py/tests/test_processing.py index 452d98dd25..ae0f9cafc2 100644 --- a/py/tests/test_processing.py +++ b/py/tests/test_processing.py @@ -128,6 +128,24 @@ def test_normalize_user_agent(must_normalize): assert "contexts" not in event +def test_validate_pii_selector(): + sentry_relay.validate_pii_selector("test") + sentry_relay.validate_pii_selector("$user.id") + sentry_relay.validate_pii_selector("extra.'sys.argv'.**") + + with pytest.raises(ValueError) as e: + sentry_relay.validate_pii_selector("no_spaces allowed") + assert str(e.value) == 'invalid syntax near "no_spaces allowed"' + + with pytest.raises(ValueError) as e: + sentry_relay.validate_pii_selector("unterminated.'string") + assert str(e.value) == 'invalid syntax near "unterminated.\'string"' + + with pytest.raises(ValueError) as e: + sentry_relay.validate_pii_selector("double.**.wildcard.**") + assert str(e.value) == "deep wildcard used more than once" + + def test_validate_pii_config(): sentry_relay.validate_pii_config("{}") sentry_relay.validate_pii_config('{"applications": {}}') diff --git a/relay-cabi/include/relay.h b/relay-cabi/include/relay.h index 55a1ea451e..8700128cba 100644 --- a/relay-cabi/include/relay.h +++ b/relay-cabi/include/relay.h @@ -552,6 +552,11 @@ struct RelayStr relay_store_normalizer_normalize_event(struct RelayStoreNormaliz */ bool relay_translate_legacy_python_json(struct RelayStr *event); +/** + * Validates a PII selector spec. Used to validate datascrubbing safe fields. + */ +struct RelayStr relay_validate_pii_selector(const struct RelayStr *value); + /** * Validate a PII config against the schema. Used in project options UI. */ diff --git a/relay-cabi/src/processing.rs b/relay-cabi/src/processing.rs index 78dfd3e9cb..f13b63ae10 100644 --- a/relay-cabi/src/processing.rs +++ b/relay-cabi/src/processing.rs @@ -18,7 +18,8 @@ use relay_event_normalization::{ use relay_event_schema::processor::{process_value, split_chunks, ProcessingState}; use relay_event_schema::protocol::{Event, VALID_PLATFORMS}; use relay_pii::{ - selector_suggestions_from_value, DataScrubbingConfig, PiiConfig, PiiConfigError, PiiProcessor, + selector_suggestions_from_value, DataScrubbingConfig, InvalidSelectorError, PiiConfig, + PiiConfigError, PiiProcessor, SelectorSpec, }; use relay_protocol::{Annotated, Remark, RuleCondition}; use relay_sampling::SamplingConfig; @@ -150,6 +151,24 @@ pub unsafe extern "C" fn relay_translate_legacy_python_json(event: *mut RelayStr true } +/// Validates a PII selector spec. Used to validate datascrubbing safe fields. +#[no_mangle] +#[relay_ffi::catch_unwind] +pub unsafe extern "C" fn relay_validate_pii_selector(value: *const RelayStr) -> RelayStr { + let value = (*value).as_str(); + match value.parse::() { + Ok(_) => RelayStr::new(""), + Err(err) => match err { + InvalidSelectorError::ParseError(_) => { + // Change the error to something more concise we can show in an UI. + // Error message follows the same format used for fingerprinting rules. + RelayStr::from_string(format!("invalid syntax near {value:?}")) + } + err => RelayStr::from_string(err.to_string()), + }, + } +} + /// Validate a PII config against the schema. Used in project options UI. #[no_mangle] #[relay_ffi::catch_unwind] @@ -336,9 +355,13 @@ pub unsafe extern "C" fn normalize_global_config(value: *const RelayStr) -> Rela } } -#[test] -fn pii_config_validation_invalid_regex() { - let config = r#" +#[cfg(test)] +mod tests { + use super::*; + + #[test] + fn pii_config_validation_invalid_regex() { + let config = r#" { "rules": { "strip-fields": { @@ -355,15 +378,15 @@ fn pii_config_validation_invalid_regex() { } } "#; - assert_eq!( - unsafe { relay_validate_pii_config(&RelayStr::from(config)).as_str() }, - "regex parse error:\n (not valid regex\n ^\nerror: unclosed group" - ); -} + assert_eq!( + unsafe { relay_validate_pii_config(&RelayStr::from(config)).as_str() }, + "regex parse error:\n (not valid regex\n ^\nerror: unclosed group" + ); + } -#[test] -fn pii_config_validation_valid_regex() { - let config = r#" + #[test] + fn pii_config_validation_valid_regex() { + let config = r#" { "rules": { "strip-fields": { @@ -380,8 +403,9 @@ fn pii_config_validation_valid_regex() { } } "#; - assert_eq!( - unsafe { relay_validate_pii_config(&RelayStr::from(config)).as_str() }, - "" - ); + assert_eq!( + unsafe { relay_validate_pii_config(&RelayStr::from(config)).as_str() }, + "" + ); + } }