From c9ba0b2f6469c720639f2daedc40ed0762c60b92 Mon Sep 17 00:00:00 2001 From: Vincent Ruello <5345986+vruello@users.noreply.github.com> Date: Sat, 30 Sep 2023 23:57:33 +0200 Subject: [PATCH] Try to recover malformed events --- CHANGELOG.md | 1 + server/src/event.rs | 294 ++++++++++++++++++++++++++++++++++++++++---- 2 files changed, 271 insertions(+), 24 deletions(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index 66ebfb1..f1decfb 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -23,6 +23,7 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0 - Remove `OperationID` from responses because we don't support "Robust Connection" (#37) - Clear in-memory subscriptions when a SIGHUP signal is received, resulting in all file descriptors used by subscriptions being closed (#37) - `heartbeats_queue_size` now defaults to 2048 instead of 32 (#37) +- A malformed event will no longer stop the event stream anymore with the json formatter. Some work is done to try to recover the raw data of the event, and an `OpenWEC.Error` field is added to help catch the problem (#47) ## [0.1.0] - 2023-05-30 diff --git a/server/src/event.rs b/server/src/event.rs index feee3b0..0cddadc 100644 --- a/server/src/event.rs +++ b/server/src/event.rs @@ -1,6 +1,6 @@ use anyhow::{anyhow, bail, Context, Result}; use chrono::{DateTime, Utc}; -use log::{info, trace}; +use log::{debug, info, trace, warn}; use roxmltree::{Document, Node}; use serde::Serialize; use std::{collections::HashMap, net::SocketAddr}; @@ -67,10 +67,40 @@ impl DataType { } } +#[derive(Serialize, Debug, Clone, Default)] +#[serde(tag = "Type")] +pub enum ErrorType { + /// Initial XML parsing failed but Raw content could be recovered + RawContentRecovered { + #[serde(rename = "Message")] + message: String, + }, + /// Initial XML parsing failed and recovering failed again + FailedToRecoverRawContent { + #[serde(rename = "Message")] + message: String, + }, + /// Initial XML parsing failed and no recovering strategy was usable + Unrecoverable { + #[serde(rename = "Message")] + message: String, + }, + #[default] + Unknown, +} + +#[derive(Debug, Default, Serialize, Clone)] +struct ErrorInfo { + #[serde(rename = "OriginalContent")] + original_content: String, + #[serde(flatten)] + _type: ErrorType, +} + #[derive(Debug, Default, Serialize, Clone)] pub struct Event { - #[serde(rename = "System")] - system: System, + #[serde(rename = "System", skip_serializing_if = "Option::is_none")] + system: Option, #[serde(flatten, skip_serializing_if = "DataType::is_unknown")] data: DataType, #[serde(rename = "RenderingInfo", skip_serializing_if = "Option::is_none")] @@ -80,25 +110,16 @@ pub struct Event { } impl Event { - pub fn from_str(metadata: &EventMetadata, content: &str) -> Result { - let doc = Document::parse(content).context("Failed to parse event XML")?; - let mut event = Event::default(); - event.additional = Additional { - addr: metadata.addr().ip().to_string(), - principal: metadata.principal().to_owned(), // TODO : change to something that works for TLS as well (modify db and output) - node: metadata.node_name().cloned(), - time_received: metadata.time_received().to_rfc3339(), - subscription: SubscriptionType { - uuid: metadata.subscription_uuid().to_owned(), - version: metadata.subscription_version().to_owned(), - name: metadata.subscription_name().to_owned(), - uri: metadata.subscription_uri().cloned(), - }, - }; + /// Reads a parsed XML document and feeds an Event struct + fn feed_event_from_document( + event: &mut Event, + doc: &Document<'_>, + content: &str, // Only used for logging + ) -> Result<()> { let root = doc.root_element(); for node in root.children() { if node.tag_name().name() == "System" { - event.system = System::from(&node).context("Parsing failure in System")? + event.system = Some(System::from(&node).context("Parsing failure in System")?) } else if node.tag_name().name() == "EventData" { event.data = parse_event_data(&node).context("Parsing failure in EventData")? } else if node.tag_name().name() == "UserData" { @@ -120,7 +141,92 @@ impl Event { trace!("Event was: {}", content); } } + Ok(()) + } + pub fn from_str(metadata: &EventMetadata, content: &str) -> Result { + let mut event = Event { + additional: Additional { + addr: metadata.addr().ip().to_string(), + principal: metadata.principal().to_owned(), // TODO : change to something that works for TLS as well (modify db and output) + node: metadata.node_name().cloned(), + time_received: metadata.time_received().to_rfc3339(), + subscription: SubscriptionType { + uuid: metadata.subscription_uuid().to_owned(), + version: metadata.subscription_version().to_owned(), + name: metadata.subscription_name().to_owned(), + uri: metadata.subscription_uri().cloned(), + }, + error: None, + }, + ..Default::default() + }; + + let doc_parse_attempt = Document::parse(content); + match doc_parse_attempt { + Ok(doc) => Event::feed_event_from_document(&mut event, &doc, content)?, + Err(initial_error) => { + // Sometimes, `RenderingInfo` content is malformed, meaning that + // the event content is cut off in the middle without closing tags, + // resulting in invalid XML (see issue #46 for more details). + // + // When this problem occurs, we try to remove the RenderingInfo + // element and recover the Raw event content. Such an operation should + // be valid because RenderingInfo is the last "specified" child node + // of the Event element according to the Event schema. + // + // See tests: + // - test_serialize_recoverable_rendering_info_malformed + // - test_serialize_malformed_rendering_info_1 + // - test_serialize_malformed_rendering_info_2 + // - test_serialize_malformed_rendering_info_3 + match content.rsplit_once(" { + let clean_content = beginning.to_string() + ""; + match Document::parse(&clean_content) { + Ok(doc) => { + Event::feed_event_from_document(&mut event, &doc, &clean_content)?; + let message = format!( + "Failed to parse event XML ({}) but Raw content could be recovered.", + initial_error + ); + event.additional.error = Some(ErrorInfo { + original_content: content.to_string(), + _type: ErrorType::RawContentRecovered { + message: message.clone(), + }, + }); + debug!("{}. Context: {:?}", message, event.additional); + } + Err(recovering_error) => { + let message = format!( + "Failed to parse event XML ({}) and Raw content recovering failed ({})", + initial_error, + recovering_error + ); + event.additional.error = Some(ErrorInfo { + original_content: content.to_string(), + _type: ErrorType::FailedToRecoverRawContent { + message: message.clone(), + }, + }); + warn!("{}. Context: {:?}", message, event.additional); + } + } + } + None => { + let message = format!("Failed to parse event XML: {}", initial_error); + event.additional.error = Some(ErrorInfo { + original_content: content.to_string(), + _type: ErrorType::Unrecoverable { + message: message.clone(), + }, + }); + warn!("{}. Context: {:?}", message, event.additional); + } + }; + } + } Ok(event) } } @@ -213,6 +319,8 @@ struct Additional { subscription: SubscriptionType, #[serde(rename = "Node", skip_serializing_if = "Option::is_none")] node: Option, + #[serde(rename = "Error", skip_serializing_if = "Option::is_none")] + error: Option, } #[derive(Debug, Default, Serialize, Clone)] @@ -427,12 +535,14 @@ struct RenderingInfo { impl RenderingInfo { fn from(rendering_info_node: &Node) -> Result { - let mut rendering_info = RenderingInfo::default(); + let mut rendering_info = RenderingInfo { + culture: rendering_info_node + .attribute("Culture") + .unwrap_or_default() + .to_owned(), + ..Default::default() + }; - rendering_info.culture = rendering_info_node - .attribute("Culture") - .unwrap_or_default() - .to_owned(); for node in rendering_info_node.children() { let tag = node.tag_name(); if tag.name() == "Message" { @@ -1119,4 +1229,140 @@ If this computer is a domain controller for the specified domain, it sets up the assert_eq!(event_json_value, expected_value); } + + const RECOVERABLE_RENDERING_INFO_MALFORMED: &str = r#"4798001382400x8020000000000000980236Securitydvas0004_xpsdavevxxxxx_xpsS-1-5-21-1604529354-1295832394-4197355770-1001S-1-5-18xxxxx_XPS$WORKGROUP0x3e70x28d4C:\\Windows\\System32\\svchost.exeA user's local group membership was enumerated. Subject: "#; + const RENDERING_INFO_MALFORMED_RECOVERED: &str = r#"{"System":{ "Provider":{ "Name":"Microsoft-Windows-Security-Auditing", "Guid":"{54849625-5478-4994-a5ba-3e3b0328c30d}" }, "EventID":4798, "Version":0, "Level":0, "Task":13824, "Opcode":0, "Keywords":"0x8020000000000000", "TimeCreated":"2023-09-29T13:39:08.7234692Z", "EventRecordID":980236, "Correlation":{ "ActivityID":"{f59bb999-ec5b-0008-f6b9-9bf55becd901}" }, "Execution":{ "ProcessID":1440, "ThreadID":16952 }, "Channel":"Security", "Computer":"dvas0004_xps" }, "EventData":{ "SubjectLogonId":"0x3e7", "TargetDomainName":"xxxxx_xps", "CallerProcessId":"0x28d4", "CallerProcessName":"C:\\\\Windows\\\\System32\\\\svchost.exe", "TargetUserName":"davev", "SubjectDomainName":"WORKGROUP", "SubjectUserName":"xxxxx_XPS$", "TargetSid":"S-1-5-21-1604529354-1295832394-4197355770-1001", "SubjectUserSid":"S-1-5-18" }, "OpenWEC":{ "IpAddress":"127.0.0.1", "TimeReceived":"2023-09-29T14:33:12.574363325+00:00", "Principal":"demo-client", "Subscription":{ "Uuid":"91E05B32-F8F6-48CF-8AB4-4038233B83AC", "Version":"523D1886-E73E-4A96-A95D-F0326CB282F0", "Name":"my-test-subscription" }, "Error":{ "OriginalContent":"4798001382400x8020000000000000980236Securitydvas0004_xpsdavevxxxxx_xpsS-1-5-21-1604529354-1295832394-4197355770-1001S-1-5-18xxxxx_XPS$WORKGROUP0x3e70x28d4C:\\\\Windows\\\\System32\\\\svchost.exeA user's local group membership was enumerated. Subject: ", "Type": "RawContentRecovered", "Message":"Failed to parse event XML (the root node was opened but never closed) but Raw content could be recovered." } } }"#; + + #[test] + fn test_serialize_recoverable_rendering_info_malformed() { + // Try to serialize a malformed event, and use the recovering strategy to + // recover its Raw content + let event = Event::from_str( + &EventMetadata { + addr: SocketAddr::from_str("127.0.0.1:5985").unwrap(), + principal: "demo-client".to_string(), + node_name: None, + time_received: chrono::DateTime::parse_from_rfc3339( + "2023-09-29T14:33:12.574363325+00:00", + ) + .unwrap() + .with_timezone(&Utc), + subscription_name: "my-test-subscription".to_string(), + subscription_uuid: "91E05B32-F8F6-48CF-8AB4-4038233B83AC".to_string(), + subscription_version: "523D1886-E73E-4A96-A95D-F0326CB282F0".to_string(), + subscription_uri: None, + }, + RECOVERABLE_RENDERING_INFO_MALFORMED, + ) + .expect("Failed to parse Event"); + + let event_json = serde_json::to_string(&event).expect("Failed to serialize event"); + + let event_json_value: Value = serde_json::from_str(&event_json).unwrap(); + let expected_value: Value = + serde_json::from_str(RENDERING_INFO_MALFORMED_RECOVERED).unwrap(); + + assert_eq!(event_json_value, expected_value); + } + + const RENDERING_INFO_MALFORMED_1: &str = r#"4798001382400x8020000000000000980236Securitydvas0004_xpsdavevxxxxx_xpsS-1-5-21-1604529354-1295832394-4197355770-1001S-1-5-18xxxxx_XPS$WORKGROUP0x3e70x28d4C:\\Windows\\System32\\svchost.exe"#; + const RENDERING_INFO_MALFORMED_1_ERROR: &str = r#"{"OpenWEC":{ "IpAddress":"127.0.0.1", "TimeReceived":"2023-09-29T14:33:12.574363325+00:00", "Principal":"demo-client", "Subscription":{ "Uuid":"91E05B32-F8F6-48CF-8AB4-4038233B83AC", "Version":"523D1886-E73E-4A96-A95D-F0326CB282F0", "Name":"my-test-subscription" }, "Error":{ "Type": "Unrecoverable", "OriginalContent":"4798001382400x8020000000000000980236Securitydvas0004_xpsdavevxxxxx_xpsS-1-5-21-1604529354-1295832394-4197355770-1001S-1-5-18xxxxx_XPS$WORKGROUP0x3e70x28d4C:\\\\Windows\\\\System32\\\\svchost.exe", "Message":"Failed to parse event XML: the root node was opened but never closed" } } }"#; + + #[test] + fn test_serialize_malformed_rendering_info_1() { + // Try to serialize an event for which there is no recovering strategy + let event = Event::from_str( + &EventMetadata { + addr: SocketAddr::from_str("127.0.0.1:5985").unwrap(), + principal: "demo-client".to_string(), + node_name: None, + time_received: chrono::DateTime::parse_from_rfc3339( + "2023-09-29T14:33:12.574363325+00:00", + ) + .unwrap() + .with_timezone(&Utc), + subscription_name: "my-test-subscription".to_string(), + subscription_uuid: "91E05B32-F8F6-48CF-8AB4-4038233B83AC".to_string(), + subscription_version: "523D1886-E73E-4A96-A95D-F0326CB282F0".to_string(), + subscription_uri: None, + }, + RENDERING_INFO_MALFORMED_1, + ) + .expect("Failed to parse Event"); + + let event_json = serde_json::to_string(&event).expect("Failed to serialize event"); + + let event_json_value: Value = serde_json::from_str(&event_json).unwrap(); + let expected_value: Value = serde_json::from_str(RENDERING_INFO_MALFORMED_1_ERROR).unwrap(); + + assert_eq!(event_json_value, expected_value); + } + + const RENDERING_INFO_MALFORMED_2: &str = r#"4798001382400x8020000000000000980236Securitydvas0004_xpsdavevxxxxx_xpsS-1-5-21-1604529354-1295832394-4197355770-1001S-1-5-18xxxxx_XPS$WORKGROUP0x3e70x28d4C:\\Windows\\System32\\svchost.exe4798001382400x8020000000000000980236Securitydvas0004_xpsdavevxxxxx_xpsS-1-5-21-1604529354-1295832394-4197355770-1001S-1-5-18xxxxx_XPS$WORKGROUP0x3e70x28d4C:\\\\Windows\\\\System32\\\\svchost.exe