From 7def59759a7ad68843047947101b6e4194541276 Mon Sep 17 00:00:00 2001 From: "Victor M. Alvarez" Date: Tue, 27 Aug 2024 13:39:14 +0200 Subject: [PATCH] refactor: large refactoring of how compilation errors are handled (#180) The changes in this PR include: * Redesign how the compilation errors are exposed in the Rust API. With this change users of the API have access to more details about the error, including the structure of error reports (report title, individual labels with their corresponding text and code spans, etc). These changes are backward-incompatible, though. * The CLI now shows multiple compilation errors at a time, instead of showing only the first error found. * All the error details exposed in the Rust API are also exposed in the C API and the Golang API. --- Cargo.lock | 12 +- capi/Cargo.toml | 1 + capi/include/yara_x.h | 40 + capi/src/compiler.rs | 84 +- capi/src/lib.rs | 36 +- capi/src/scanner.rs | 31 +- capi/src/tests.rs | 77 +- cli/src/commands/mod.rs | 20 +- cli/src/commands/scan.rs | 3 +- cli/src/main.rs | 24 +- go/compiler.go | 59 ++ go/compiler_test.go | 36 +- lib/Cargo.toml | 4 +- lib/src/compiler/context.rs | 10 +- lib/src/compiler/errors.rs | 827 ++++++++++-------- lib/src/compiler/ir/ast2ir.rs | 227 +++-- lib/src/compiler/ir/hex2hir.rs | 23 +- lib/src/compiler/ir/mod.rs | 11 +- lib/src/compiler/mod.rs | 323 +++++-- lib/src/compiler/report.rs | 321 +++++-- lib/src/compiler/rules.rs | 7 +- lib/src/compiler/tests/mod.rs | 65 +- lib/src/compiler/tests/testdata/errors/1.out | 2 +- .../compiler/tests/testdata/errors/115.out | 2 +- lib/src/compiler/tests/testdata/errors/46.out | 2 +- lib/src/compiler/warnings.rs | 557 ++++++++---- lib/src/lib.rs | 26 +- lib/src/scanner/context.rs | 2 +- lib/src/scanner/mod.rs | 2 +- lib/src/variables.rs | 2 +- lib/src/wasm/mod.rs | 3 +- macros/Cargo.toml | 3 +- macros/src/error.rs | 609 ++++++------- macros/src/lib.rs | 164 ++-- parser/Cargo.toml | 5 +- parser/src/lib.rs | 4 + py/src/lib.rs | 4 +- 37 files changed, 2248 insertions(+), 1380 deletions(-) diff --git a/Cargo.lock b/Cargo.lock index f9aef61c1..56da37cf3 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -670,15 +670,6 @@ version = "0.3.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "f7144d30dcf0fafbce74250a3963025d8d52177934239851c917d29f1df280c2" -[[package]] -name = "convert_case" -version = "0.6.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "ec182b0ca2f35d8fc196cf3404988fd8b8c739a4d270ff118a398feb0cbec1ca" -dependencies = [ - "unicode-segmentation", -] - [[package]] name = "core-foundation-sys" version = "0.8.7" @@ -4827,6 +4818,7 @@ name = "yara-x-capi" version = "0.6.0" dependencies = [ "cbindgen", + "serde_json", "yara-x", ] @@ -4879,7 +4871,6 @@ dependencies = [ name = "yara-x-macros" version = "0.6.0" dependencies = [ - "convert_case", "darling", "proc-macro2", "quote", @@ -4906,6 +4897,7 @@ dependencies = [ "rayon", "rowan", "rustc-hash 2.0.0", + "serde", "yansi 1.0.1", ] diff --git a/capi/Cargo.toml b/capi/Cargo.toml index 4a1cf4072..b5f896f8b 100644 --- a/capi/Cargo.toml +++ b/capi/Cargo.toml @@ -22,6 +22,7 @@ name = "yara_x_capi" crate-type = ["staticlib", "cdylib"] [dependencies] +serde_json = { workspace = true } yara-x = { workspace = true } [build-dependencies] diff --git a/capi/include/yara_x.h b/capi/include/yara_x.h index 57c74c499..c4ef3180a 100644 --- a/capi/include/yara_x.h +++ b/capi/include/yara_x.h @@ -315,6 +315,46 @@ enum YRX_RESULT yrx_compiler_define_global_float(struct YRX_COMPILER *compiler, const char *ident, double value); +// Returns the errors encountered during the compilation in JSON format. +// +// In the address indicated by the `buf` pointer, the function will copy a +// `YRX_BUFFER*` pointer. The `YRX_BUFFER` structure represents a buffer +// that contains the JSON representation of the compilation errors. +// +// The JSON consists on an array of objects, each object representing a +// compilation error. The object has the following fields: +// +// * type: A string that describes the type of error. +// * code: Error code (e.g: "E009"). +// * title: Error title (e.g: ""unknown identifier `foo`"). +// * labels: Array of labels. +// * text: The full text of the error report, as shown by the command-line tool. +// +// Here is an example: +// +// ```json +// [ +// { +// "type": "UnknownIdentifier", +// "code": "E009", +// "title": "unknown identifier `foo`", +// "labels": [ +// { +// "level": "error", +// "code_origin": null, +// "span": {"start":25,"end":28}, +// "text": "this identifier has not been declared" +// } +// ], +// "text": "... ..." +// } +// ] +// ``` +// +// The [`YRX_BUFFER`] must be destroyed with [`yrx_buffer_destroy`]. +enum YRX_RESULT yrx_compiler_errors_json(struct YRX_COMPILER *compiler, + struct YRX_BUFFER **buf); + // Builds the source code previously added to the compiler. // // After calling this function the compiler is reset to its initial state, diff --git a/capi/src/compiler.rs b/capi/src/compiler.rs index b62110895..b3254980f 100644 --- a/capi/src/compiler.rs +++ b/capi/src/compiler.rs @@ -1,6 +1,10 @@ -use crate::{LAST_ERROR, YRX_RESULT, YRX_RULES}; -use std::ffi::{c_char, CStr, CString}; +use std::ffi::{c_char, CStr}; use std::mem; +use std::mem::ManuallyDrop; + +use yara_x::errors::{CompileError, SerializationError, VariableError}; + +use crate::{_yrx_set_last_error, YRX_BUFFER, YRX_RESULT, YRX_RULES}; /// A compiler that takes YARA source code and produces compiled rules. pub struct YRX_COMPILER<'a> { @@ -83,11 +87,11 @@ pub unsafe extern "C" fn yrx_compiler_add_source( match compiler.inner.add_source(src.to_bytes()) { Ok(_) => { - LAST_ERROR.set(None); + _yrx_set_last_error::(None); YRX_RESULT::SUCCESS } Err(err) => { - LAST_ERROR.set(Some(CString::new(err.to_string()).unwrap())); + _yrx_set_last_error(Some(err)); YRX_RESULT::SYNTAX_ERROR } } @@ -158,7 +162,7 @@ pub unsafe extern "C" fn yrx_compiler_new_namespace( /// scanning data, however each scanner can change the variable’s initial /// value by calling `yrx_scanner_set_global`. unsafe fn yrx_compiler_define_global< - T: TryInto, + T: TryInto, >( compiler: *mut YRX_COMPILER, ident: *const c_char, @@ -178,11 +182,11 @@ unsafe fn yrx_compiler_define_global< match compiler.inner.define_global(ident, value) { Ok(_) => { - LAST_ERROR.set(None); + _yrx_set_last_error::(None); YRX_RESULT::SUCCESS } Err(err) => { - LAST_ERROR.set(Some(CString::new(err.to_string()).unwrap())); + _yrx_set_last_error(Some(err)); YRX_RESULT::VARIABLE_ERROR } } @@ -234,6 +238,72 @@ pub unsafe extern "C" fn yrx_compiler_define_global_float( yrx_compiler_define_global(compiler, ident, value) } +/// Returns the errors encountered during the compilation in JSON format. +/// +/// In the address indicated by the `buf` pointer, the function will copy a +/// `YRX_BUFFER*` pointer. The `YRX_BUFFER` structure represents a buffer +/// that contains the JSON representation of the compilation errors. +/// +/// The JSON consists on an array of objects, each object representing a +/// compilation error. The object has the following fields: +/// +/// * type: A string that describes the type of error. +/// * code: Error code (e.g: "E009"). +/// * title: Error title (e.g: ""unknown identifier `foo`"). +/// * labels: Array of labels. +/// * text: The full text of the error report, as shown by the command-line tool. +/// +/// Here is an example: +/// +/// ```json +/// [ +/// { +/// "type": "UnknownIdentifier", +/// "code": "E009", +/// "title": "unknown identifier `foo`", +/// "labels": [ +/// { +/// "level": "error", +/// "code_origin": null, +/// "span": {"start":25,"end":28}, +/// "text": "this identifier has not been declared" +/// } +/// ], +/// "text": "... ..." +/// } +/// ] +/// ``` +/// +/// The [`YRX_BUFFER`] must be destroyed with [`yrx_buffer_destroy`]. +#[no_mangle] +pub unsafe extern "C" fn yrx_compiler_errors_json( + compiler: *mut YRX_COMPILER, + buf: &mut *mut YRX_BUFFER, +) -> YRX_RESULT { + let compiler = if let Some(compiler) = compiler.as_mut() { + compiler + } else { + return YRX_RESULT::INVALID_ARGUMENT; + }; + + match serde_json::to_vec(compiler.inner.errors()) { + Ok(json) => { + let json = json.into_boxed_slice(); + let mut json = ManuallyDrop::new(json); + *buf = Box::into_raw(Box::new(YRX_BUFFER { + data: json.as_mut_ptr(), + length: json.len(), + })); + _yrx_set_last_error::(None); + YRX_RESULT::SUCCESS + } + Err(err) => { + _yrx_set_last_error(Some(err)); + YRX_RESULT::SERIALIZATION_ERROR + } + } +} + /// Builds the source code previously added to the compiler. /// /// After calling this function the compiler is reset to its initial state, diff --git a/capi/src/lib.rs b/capi/src/lib.rs index 5d972b247..676361c73 100644 --- a/capi/src/lib.rs +++ b/capi/src/lib.rs @@ -99,19 +99,29 @@ use std::mem::ManuallyDrop; use std::ptr::slice_from_raw_parts_mut; use std::slice; +use yara_x::errors::{CompileError, SerializationError}; + +pub use scanner::*; + mod compiler; mod scanner; #[cfg(test)] mod tests; -pub use scanner::*; - thread_local! { static LAST_ERROR: RefCell> = const { RefCell::new(None) }; } +fn _yrx_set_last_error(err: Option) +where + E: ToString, +{ + LAST_ERROR.set(err.map(|err| CString::new(err.to_string()).unwrap())) +} + /// Error codes returned by functions in this API. +#[derive(PartialEq, Debug)] #[repr(C)] pub enum YRX_RESULT { /// Everything was OK. @@ -318,11 +328,11 @@ pub unsafe extern "C" fn yrx_compile( match yara_x::compile(c_str.to_bytes()) { Ok(r) => { *rules = Box::into_raw(Box::new(YRX_RULES(r))); - LAST_ERROR.set(None); + _yrx_set_last_error::(None); YRX_RESULT::SUCCESS } Err(err) => { - LAST_ERROR.set(Some(CString::new(err.to_string()).unwrap())); + _yrx_set_last_error(Some(err)); YRX_RESULT::SYNTAX_ERROR } } @@ -350,11 +360,11 @@ pub unsafe extern "C" fn yrx_rules_serialize( data: serialized.as_mut_ptr(), length: serialized.len(), })); - LAST_ERROR.set(None); + _yrx_set_last_error::(None); YRX_RESULT::SUCCESS } Err(err) => { - LAST_ERROR.set(Some(CString::new(err.to_string()).unwrap())); + _yrx_set_last_error(Some(err)); YRX_RESULT::SERIALIZATION_ERROR } } @@ -375,11 +385,11 @@ pub unsafe extern "C" fn yrx_rules_deserialize( match yara_x::Rules::deserialize(slice::from_raw_parts(data, len)) { Ok(r) => { *rules = Box::into_raw(Box::new(YRX_RULES(r))); - LAST_ERROR.set(None); + _yrx_set_last_error::(None); YRX_RESULT::SUCCESS } Err(err) => { - LAST_ERROR.set(Some(CString::new(err.to_string()).unwrap())); + _yrx_set_last_error(Some(err)); YRX_RESULT::SERIALIZATION_ERROR } } @@ -408,7 +418,7 @@ pub unsafe extern "C" fn yrx_rule_identifier( if let Some(rule) = rule.as_ref() { *ident = rule.0.identifier().as_ptr(); *len = rule.0.identifier().len(); - LAST_ERROR.set(None); + _yrx_set_last_error::(None); YRX_RESULT::SUCCESS } else { YRX_RESULT::INVALID_ARGUMENT @@ -432,7 +442,7 @@ pub unsafe extern "C" fn yrx_rule_namespace( if let Some(rule) = rule.as_ref() { *ns = rule.0.namespace().as_ptr(); *len = rule.0.namespace().len(); - LAST_ERROR.set(None); + _yrx_set_last_error::(None); YRX_RESULT::SUCCESS } else { YRX_RESULT::INVALID_ARGUMENT @@ -599,9 +609,9 @@ pub unsafe extern "C" fn yrx_buffer_destroy(buf: *mut YRX_BUFFER) { /// the most recent function was successfully. #[no_mangle] pub unsafe extern "C" fn yrx_last_error() -> *const c_char { - LAST_ERROR.with_borrow(|last_error| { - if let Some(last_error) = last_error { - last_error.as_ptr() + LAST_ERROR.with_borrow(|err| { + if let Some(err) = err { + err.as_ptr() } else { std::ptr::null() } diff --git a/capi/src/scanner.rs b/capi/src/scanner.rs index 4c94a5dd7..2947bacc4 100644 --- a/capi/src/scanner.rs +++ b/capi/src/scanner.rs @@ -1,9 +1,10 @@ -use std::ffi::{c_char, CStr, CString}; +use std::ffi::{c_char, CStr}; use std::slice; use std::time::Duration; -use yara_x::ScanError; -use crate::{LAST_ERROR, YRX_RESULT, YRX_RULE, YRX_RULES}; +use yara_x::errors::ScanError; + +use crate::{_yrx_set_last_error, YRX_RESULT, YRX_RULE, YRX_RULES}; /// A scanner that scans data with a set of compiled YARA rules. pub struct YRX_SCANNER<'s> { @@ -77,6 +78,8 @@ pub unsafe extern "C" fn yrx_scanner_scan( data: *const u8, len: usize, ) -> YRX_RESULT { + _yrx_set_last_error::(None); + if scanner.is_null() { return YRX_RESULT::INVALID_ARGUMENT; } @@ -90,11 +93,12 @@ pub unsafe extern "C" fn yrx_scanner_scan( let scan_results = scanner.inner.scan(data); if let Err(err) = scan_results { - LAST_ERROR.set(Some(CString::new(err.to_string()).unwrap())); - return match err { + let result = match err { ScanError::Timeout => YRX_RESULT::SCAN_TIMEOUT, _ => YRX_RESULT::SCAN_ERROR, }; + _yrx_set_last_error(Some(err)); + return result; } let scan_results = scan_results.unwrap(); @@ -106,7 +110,6 @@ pub unsafe extern "C" fn yrx_scanner_scan( } } - LAST_ERROR.set(None); YRX_RESULT::SUCCESS } @@ -195,7 +198,7 @@ pub unsafe extern "C" fn yrx_scanner_set_module_output( let module_name = match CStr::from_ptr(name).to_str() { Ok(name) => name, Err(err) => { - LAST_ERROR.set(Some(CString::new(err.to_string()).unwrap())); + _yrx_set_last_error(Some(err)); return YRX_RESULT::INVALID_UTF8; } }; @@ -209,18 +212,18 @@ pub unsafe extern "C" fn yrx_scanner_set_module_output( match scanner.inner.set_module_output_raw(module_name, data) { Ok(_) => { - LAST_ERROR.set(None); + _yrx_set_last_error::(None); YRX_RESULT::SUCCESS } Err(err) => { - LAST_ERROR.set(Some(CString::new(err.to_string()).unwrap())); + _yrx_set_last_error(Some(err)); YRX_RESULT::SCAN_ERROR } } } unsafe extern "C" fn yrx_scanner_set_global< - T: TryInto, + T: TryInto, >( scanner: *mut YRX_SCANNER, ident: *const c_char, @@ -233,7 +236,7 @@ unsafe extern "C" fn yrx_scanner_set_global< let ident = match CStr::from_ptr(ident).to_str() { Ok(ident) => ident, Err(err) => { - LAST_ERROR.set(Some(CString::new(err.to_string()).unwrap())); + _yrx_set_last_error(Some(err)); return YRX_RESULT::INVALID_UTF8; } }; @@ -242,11 +245,11 @@ unsafe extern "C" fn yrx_scanner_set_global< match scanner.inner.set_global(ident, value) { Ok(_) => { - LAST_ERROR.set(None); + _yrx_set_last_error::(None); YRX_RESULT::SUCCESS } Err(err) => { - LAST_ERROR.set(Some(CString::new(err.to_string()).unwrap())); + _yrx_set_last_error(Some(err)); YRX_RESULT::VARIABLE_ERROR } } @@ -262,7 +265,7 @@ pub unsafe extern "C" fn yrx_scanner_set_global_str( match CStr::from_ptr(value).to_str() { Ok(value) => yrx_scanner_set_global(scanner, ident, value), Err(err) => { - LAST_ERROR.set(Some(CString::new(err.to_string()).unwrap())); + _yrx_set_last_error(Some(err)); YRX_RESULT::INVALID_UTF8 } } diff --git a/capi/src/tests.rs b/capi/src/tests.rs index c020b3035..acc6636fc 100644 --- a/capi/src/tests.rs +++ b/capi/src/tests.rs @@ -12,9 +12,10 @@ use crate::{ yrx_scanner_destroy, yrx_scanner_on_matching_rule, yrx_scanner_scan, yrx_scanner_set_global_bool, yrx_scanner_set_global_float, yrx_scanner_set_global_int, yrx_scanner_set_global_str, - yrx_scanner_set_timeout, YRX_BUFFER, YRX_RULE, + yrx_scanner_set_timeout, YRX_BUFFER, YRX_RESULT, YRX_RULE, }; -use std::ffi::{c_void, CString}; + +use std::ffi::{c_void, CStr, CString}; extern "C" fn callback(rule: *const YRX_RULE, user_data: *mut c_void) { let mut ptr = std::ptr::null(); @@ -45,29 +46,31 @@ fn capi() { let mut compiler = std::ptr::null_mut(); yrx_compiler_create(0, &mut compiler); + // TODO: Use c-string literals cr#"rule test ..."# when we MSRV + // is bumped to 1.77. + // https://doc.rust-lang.org/edition-guide/rust-2021/c-string-literals.html let src = CString::new( - b"rule test {\ - meta: \ - some_int = 1 \ - some_string = \"foo\" \ - some_bytes = \"\\x01\\x00\\x02\" \ - strings: \ - $foo = \"foo\" \ - condition: \ - $foo or ( \ - some_bool and \ - some_str == \"some_str\" and \ - some_int == 1 and \ - some_float == 1.5) \ - }" - .to_vec(), + br#"rule test { + meta: + some_int = 1 + some_string = "foo" + some_bytes = "\x01\x00\x02" + strings: + $foo = "foo" + condition: + $foo or ( + some_bool and + some_str == "some_str" and + some_int == 1 and + some_float == 1.5) + }"#, ) .unwrap(); - let some_bool = CString::new(b"some_bool".to_vec()).unwrap(); - let some_str = CString::new(b"some_str".to_vec()).unwrap(); - let some_int = CString::new(b"some_int".to_vec()).unwrap(); - let some_float = CString::new(b"some_float".to_vec()).unwrap(); + let some_bool = CString::new(b"some_bool").unwrap(); + let some_str = CString::new(b"some_str").unwrap(); + let some_int = CString::new(b"some_int").unwrap(); + let some_float = CString::new(b"some_float").unwrap(); yrx_compiler_define_global_int(compiler, some_int.as_ptr(), 1); yrx_compiler_define_global_float(compiler, some_float.as_ptr(), 1.5); @@ -78,7 +81,7 @@ fn capi() { some_str.as_ptr(), ); - let namespace = CString::new(b"foo".to_vec()).unwrap(); + let namespace = CString::new(b"foo").unwrap(); yrx_compiler_new_namespace(compiler, namespace.as_ptr()); yrx_compiler_add_source(compiler, src.as_ptr()); @@ -135,3 +138,33 @@ fn capi() { yrx_rules_destroy(rules); } } + +#[test] +fn capi_errors() { + unsafe { + let mut compiler = std::ptr::null_mut(); + yrx_compiler_create(0, &mut compiler); + + let src = CString::new(b"rule test { condition: foo }").unwrap(); + + assert_eq!( + yrx_compiler_add_source(compiler, src.as_ptr()), + YRX_RESULT::SYNTAX_ERROR + ); + + assert_eq!( + CStr::from_ptr(yrx_last_error()), + CStr::from_bytes_with_nul( + b"error[E009]: unknown identifier `foo` + --> line:1:24 + | +1 | rule test { condition: foo } + | ^^^ this identifier has not been declared + |\0" + ) + .unwrap() + ); + + yrx_compiler_destroy(compiler); + } +} diff --git a/cli/src/commands/mod.rs b/cli/src/commands/mod.rs index 92d90a599..4664f06bf 100644 --- a/cli/src/commands/mod.rs +++ b/cli/src/commands/mod.rs @@ -22,7 +22,7 @@ use std::fs; use std::io::stdout; use std::path::PathBuf; -use anyhow::{anyhow, Context}; +use anyhow::{anyhow, bail, Context}; use clap::{command, crate_authors, Command}; use crossterm::tty::IsTty; use serde_json::Value; @@ -147,12 +147,10 @@ where .new_namespace(file_path.to_string_lossy().as_ref()); } - let result = compiler.add_source(src); + let _ = compiler.add_source(src); state.file_in_progress = None; - result?; - state.num_compiled_files = state.num_compiled_files.saturating_add(1); @@ -168,16 +166,24 @@ where } } - let rules = compiler.build(); - if let Some(console) = console { console.finalize(&state).unwrap(); } - for warning in rules.warnings() { + for error in compiler.errors() { + eprintln!("{}", error); + } + + for warning in compiler.warnings() { eprintln!("{}", warning); } + if !compiler.errors().is_empty() { + bail!("{} errors found", compiler.errors().len()); + } + + let rules = compiler.build(); + Ok(rules) } diff --git a/cli/src/commands/scan.rs b/cli/src/commands/scan.rs index cd712af05..eb3749c76 100644 --- a/cli/src/commands/scan.rs +++ b/cli/src/commands/scan.rs @@ -14,7 +14,8 @@ use superconsole::style::Stylize; use superconsole::{Component, Line, Lines, Span}; use yansi::Color::{Cyan, Red, Yellow}; use yansi::Paint; -use yara_x::{MetaValue, Rule, Rules, ScanError, ScanResults, Scanner}; +use yara_x::errors::ScanError; +use yara_x::{MetaValue, Rule, Rules, ScanResults, Scanner}; use crate::commands::{ compile_rules, external_var_parser, truncate_with_ellipsis, diff --git a/cli/src/main.rs b/cli/src/main.rs index ab26c0838..ec3d97ba1 100644 --- a/cli/src/main.rs +++ b/cli/src/main.rs @@ -76,26 +76,10 @@ fn main() -> anyhow::Result<()> { }; if let Err(err) = result { - match err.downcast_ref::() { - // Errors produced by the compiler already have colors and start - // with "error:", in such cases the error is printed as is. - Some(yara_x::Error::CompileError(_)) => { - eprintln!("{}", err); - } - // In all other cases imitate the style of compiler errors, so that - // they all look in the same way. - _ => { - if let Some(source) = err.source() { - eprintln!( - "{} {}: {}", - "error:".paint(Red).bold(), - err, - source - ); - } else { - eprintln!("{} {}", "error:".paint(Red).bold(), err); - } - } + if let Some(source) = err.source() { + eprintln!("{} {}: {}", "error:".paint(Red).bold(), err, source); + } else { + eprintln!("{} {}", "error:".paint(Red).bold(), err); } process::exit(1); } diff --git a/go/compiler.go b/go/compiler.go index 2e7e9ee6e..e84344b6d 100644 --- a/go/compiler.go +++ b/go/compiler.go @@ -3,6 +3,7 @@ package yara_x // #include import "C" import ( + "encoding/json" "errors" "fmt" "runtime" @@ -80,6 +81,41 @@ func ErrorOnSlowPattern(yes bool) CompileOption { } } +// CompileError represents each of the errors returned by [Compiler.Errors]. +type CompileError struct { + // Error code (e.g: "E001"). + Code string + // Error title (e.g: "unknown identifier `foo`"). + Title string + // Each of the labels in the error report. + Labels []Label + // The error's full report, as shown by the command-line tool. + Text string +} + +// Label represents a label in a [CompileError]. +type Label struct { + // Label's level (e.g: "error", "warning", "info", "note", "help"). + Level string + CodeOrigin string + // The code span covered by the label. + Span Span + // Text associated to the label. + Text string +} + +// Span represents the starting and ending point of some piece of source +// code. +type Span struct { + Start int + End int +} + +// Error returns the error's full report. +func (c CompileError) Error() string { + return c.Text +} + // Compiler represent a YARA compiler. type Compiler struct { cCompiler *C.YRX_COMPILER @@ -254,6 +290,29 @@ func (c *Compiler) DefineGlobal(ident string, value interface{}) error { return nil } + +// Errors that occurred during the compilation, across multiple calls to +// [Compiler.AddSource]. +func (c *Compiler) Errors() []CompileError { + var buf *C.YRX_BUFFER + if C.yrx_compiler_errors_json(c.cCompiler, &buf) != C.SUCCESS { + panic("yrx_compiler_errors_json failed") + } + + defer C.yrx_buffer_destroy(buf) + runtime.KeepAlive(c) + + jsonErrors := C.GoBytes(unsafe.Pointer(buf.data), C.int(buf.length)) + + var result []CompileError + + if err := json.Unmarshal(jsonErrors, &result); err != nil { + panic(err) + } + + return result +} + // Build creates a [Rules] object containing a compiled version of all the // YARA rules previously added to the compiler. // diff --git a/go/compiler_test.go b/go/compiler_test.go index 826a2581e..c3c931011 100644 --- a/go/compiler_test.go +++ b/go/compiler_test.go @@ -107,10 +107,42 @@ func TestVariables(t *testing.T) { func TestError(t *testing.T) { _, err := Compile("rule test { condition: foo }") - assert.EqualError(t, err, `error[E009]: unknown identifier `+"`foo`"+` + expected := `error[E009]: unknown identifier `+"`foo`"+` --> line:1:24 | 1 | rule test { condition: foo } | ^^^ this identifier has not been declared - |`) + |` + assert.EqualError(t, err, expected) +} + + +func TestErrors(t *testing.T) { + c, err := NewCompiler() + assert.NoError(t, err) + + c.AddSource("rule test_1 { condition: true }") + assert.Equal(t, []CompileError{}, c.Errors()) + + c.AddSource("rule test_2 { condition: foo }") + assert.Equal(t, []CompileError{ + { + Code: "E009", + Title: "unknown identifier `foo`", + Labels: []Label{ + { + Level: "error", + CodeOrigin: "", + Span: Span { Start: 25, End: 28 }, + Text: "this identifier has not been declared", + }, + }, + Text: `error[E009]: unknown identifier `+"`foo`"+` + --> line:1:26 + | +1 | rule test_2 { condition: foo } + | ^^^ this identifier has not been declared + |`, + }, + }, c.Errors()) } diff --git a/lib/Cargo.toml b/lib/Cargo.toml index f5f281d75..567c75843 100644 --- a/lib/Cargo.toml +++ b/lib/Cargo.toml @@ -207,7 +207,7 @@ roxmltree = { workspace = true, optional = true } rsa = { workspace = true, optional = true } smallvec = { workspace = true, features = ["serde"] } serde = { workspace = true, features = ["rc"] } -serde_json = { workspace = true } +serde_json = { workspace = true, features = ["preserve_order"] } thiserror = { workspace = true } tlsh-fixed = { workspace = true, optional = true } uuid = { workspace = true, optional = true, features = ["v4"] } @@ -216,7 +216,7 @@ wasmtime = { workspace = true, features = ["cranelift", "parallel-compilation"] x509-parser = { workspace = true, optional = true } yansi = { workspace = true } yara-x-macros = { workspace = true } -yara-x-parser = { workspace = true } +yara-x-parser = { workspace = true, features = ["serde"] } lingua = { version = "1.6.0", optional = true, default-features = false, features = ["english", "german", "french", "spanish"] } diff --git a/lib/src/compiler/context.rs b/lib/src/compiler/context.rs index 95ef2da84..0c667b94d 100644 --- a/lib/src/compiler/context.rs +++ b/lib/src/compiler/context.rs @@ -4,12 +4,13 @@ use std::rc::Rc; use yara_x_parser::ast::{Ident, WithSpan}; +use crate::compiler::errors::{CompileError, UnknownPattern}; use crate::compiler::ir::PatternIdx; use crate::compiler::report::ReportBuilder; use crate::compiler::{ir, Warnings}; use crate::symbols::{StackedSymbolTable, SymbolLookup}; use crate::types::Type; -use crate::{wasm, CompileError}; +use crate::wasm; /// Structure that contains information and data structures required during the /// current compilation process. @@ -55,8 +56,7 @@ impl<'a, 'src, 'sym> CompileContext<'a, 'src, 'sym> { pub fn get_pattern_mut( &mut self, ident: &Ident, - ) -> Result<(PatternIdx, &mut ir::PatternInRule<'src>), Box> - { + ) -> Result<(PatternIdx, &mut ir::PatternInRule<'src>), CompileError> { // Make sure that identifier starts with `$`, `#`, `@` or `!`. debug_assert!("$#@!".contains( ident @@ -71,11 +71,11 @@ impl<'a, 'src, 'sym> CompileContext<'a, 'src, 'sym> { .find_position(|p| p.identifier().name[1..] == ident.name[1..]) .map(|(pos, pattern)| (PatternIdx::from(pos), pattern)) .ok_or_else(|| { - Box::new(CompileError::unknown_pattern( + UnknownPattern::build( self.report_builder, ident.name.to_string(), ident.span().into(), - )) + ) }) } } diff --git a/lib/src/compiler/errors.rs b/lib/src/compiler/errors.rs index 73af7c136..a713fdc2a 100644 --- a/lib/src/compiler/errors.rs +++ b/lib/src/compiler/errors.rs @@ -1,16 +1,18 @@ +#![cfg_attr(any(), rustfmt::skip)] + use std::fmt::{Debug, Display, Formatter}; use std::io; +use serde::Serialize; use thiserror::Error; -use yara_x_macros::Error as DeriveError; +use yara_x_macros::ErrorEnum; +use yara_x_macros::ErrorStruct; use yara_x_parser::ast; -use crate::compiler::report::{Level, ReportBuilder, SourceRef}; -use crate::compiler::warnings::InvalidWarningCode; -use crate::VariableError; +use crate::compiler::report::{Level, Report, ReportBuilder, CodeLoc, Label}; -/// Errors returned while serializing/deserializing compiled rules. +/// Error returned while serializing/deserializing compiled rules. #[derive(Error, Debug)] pub enum SerializationError { /// The data being deserialized doesn't contain YARA-X serialized rules. @@ -33,326 +35,46 @@ pub enum SerializationError { #[doc(hidden)] pub struct EmitWasmError(#[from] anyhow::Error); -/// Errors returned by the compiler. -#[derive(Error, Debug, Eq, PartialEq)] -#[allow(missing_docs)] -pub enum Error { - #[error(transparent)] - CompileError(#[from] Box), - - #[error(transparent)] - VariableError(#[from] VariableError), - - #[error(transparent)] - InvalidWarningCode(#[from] InvalidWarningCode), -} - -/// An error occurred during the compilation process. -#[derive(DeriveError, Eq, PartialEq)] +/// Error returned when rule compilation fails. #[allow(missing_docs)] #[non_exhaustive] +#[derive(ErrorEnum, Error, Clone, PartialEq, Eq)] +#[derive(Serialize)] +#[serde(tag = "type")] pub enum CompileError { - #[error("E001", "syntax error")] - #[label_error("{error_msg}", error_span)] - SyntaxError { - detailed_report: String, - error_msg: String, - error_span: SourceRef, - }, - - #[error("E002", "wrong type")] - #[label_error( - "expression should be {expected_types}, but is `{actual_type}`", - expression_span - )] - WrongType { - detailed_report: String, - expected_types: String, - actual_type: String, - expression_span: SourceRef, - }, - - #[error("E003", "mismatching types")] - #[label_error("this expression is `{type1}`", type1_span)] - #[label_error("this expression is `{type2}`", type2_span)] - MismatchingTypes { - detailed_report: String, - type1: String, - type2: String, - type1_span: SourceRef, - type2_span: SourceRef, - }, - - #[error("E004", "wrong arguments")] - #[label_error("wrong arguments in this call", args_span)] - #[note(note)] - WrongArguments { - detailed_report: String, - args_span: SourceRef, - note: Option, - }, - - #[error("E005", "assignment mismatch")] - #[label_error("this expects {expected_values} value(s)", error_span)] - #[label_error("this produces {actual_values} value(s)", iterable_span)] - AssignmentMismatch { - detailed_report: String, - expected_values: u8, - actual_values: u8, - iterable_span: SourceRef, - error_span: SourceRef, - }, - - #[error("E006", "unexpected negative number")] - #[label_error("this number can not be negative", span)] - UnexpectedNegativeNumber { detailed_report: String, span: SourceRef }, - - #[error("E007", "number out of range")] - #[label_error( - "this number is out of the allowed range [{min}-{max}]", - span - )] - NumberOutOfRange { - detailed_report: String, - min: i64, - max: i64, - span: SourceRef, - }, - - #[error("E008", "unknown field or method `{identifier}`")] - #[label_error("this field or method doesn't exist", span)] - UnknownField { - detailed_report: String, - identifier: String, - span: SourceRef, - }, - - #[error("E009", "unknown identifier `{identifier}`")] - #[label_error("this identifier has not been declared", span)] - #[note(note)] - UnknownIdentifier { - detailed_report: String, - identifier: String, - span: SourceRef, - note: Option, - }, - - #[error("E010", "unknown module `{identifier}`")] - #[label_error("module `{identifier}` not found", span)] - UnknownModule { - detailed_report: String, - identifier: String, - span: SourceRef, - }, - - #[error("E011", "invalid range")] - #[label_error("{error_msg}", span)] - InvalidRange { - detailed_report: String, - error_msg: String, - span: SourceRef, - }, - - #[error("E012", "duplicate rule `{new_rule}`")] - #[label_note( - "`{new_rule}` declared here for the first time", - existing_rule_span - )] - #[label_error("duplicate declaration of `{new_rule}`", new_rule_span)] - DuplicateRule { - detailed_report: String, - new_rule: String, - new_rule_span: SourceRef, - existing_rule_span: SourceRef, - }, - - #[error("E013", "rule `{ident}` conflicts with an existing identifier")] - #[label_error( - "identifier already in use by a module or global variable", - ident_span - )] - ConflictingRuleIdentifier { - detailed_report: String, - ident: String, - ident_span: SourceRef, - }, - - #[error("E014", "invalid regular expression")] - #[label_error("{error}", span)] - #[note(note)] - InvalidRegexp { - detailed_report: String, - error: String, - span: SourceRef, - note: Option, - }, - - #[error( - "E015", - "mixing greedy and non-greedy quantifiers in regular expression" - )] - #[label_error("this is {quantifier1_greediness}", quantifier1_span)] - #[label_error("this is {quantifier2_greediness}", quantifier2_span)] - MixedGreediness { - detailed_report: String, - quantifier1_greediness: String, - quantifier2_greediness: String, - quantifier1_span: SourceRef, - quantifier2_span: SourceRef, - }, - - #[error("E016", "no matching patterns")] - #[label_error("there's no pattern in this set", span)] - #[note(note)] - EmptyPatternSet { - detailed_report: String, - span: SourceRef, - note: Option, - }, - - #[error("E017", "`entrypoint` is unsupported`")] - #[label_error("the `entrypoint` keyword is not supported anymore", span)] - #[label_help( - "use `pe.entry_point` or `elf.entry_point` or `macho.entry_point`", - span - )] - EntrypointUnsupported { detailed_report: String, span: SourceRef }, - - #[error("E018", "slow pattern")] - #[label_error("this pattern may slow down the scan", span)] - SlowPattern { detailed_report: String, span: SourceRef }, - - #[error("E117", "invalid pattern modifier")] - #[label_error("{error_msg}", error_span)] - InvalidModifier { - detailed_report: String, - error_msg: String, - error_span: SourceRef, - }, - - #[error( - "E019", - "invalid modifier combination: `{modifier1}` `{modifier2}`" - )] - #[label_error("`{modifier1}` modifier used here", modifier1_span)] - #[label_error("`{modifier2}` modifier used here", modifier2_span)] - #[note(note)] - InvalidModifierCombination { - detailed_report: String, - modifier1: String, - modifier2: String, - modifier1_span: SourceRef, - modifier2_span: SourceRef, - note: Option, - }, - - #[error("E020", "duplicate pattern modifier")] - #[label_error("duplicate modifier", modifier_span)] - DuplicateModifier { detailed_report: String, modifier_span: SourceRef }, - - #[error("E021", "duplicate tag `{tag}`")] - #[label_error("duplicate tag", tag_span)] - DuplicateTag { detailed_report: String, tag: String, tag_span: SourceRef }, - - #[error("E022", "unused pattern `{pattern_ident}`")] - #[label_error( - "this pattern was not used in the condition", - pattern_ident_span - )] - UnusedPattern { - detailed_report: String, - pattern_ident: String, - pattern_ident_span: SourceRef, - }, - - #[error("E023", "duplicate pattern `{pattern_ident}`")] - #[label_error( - "duplicate declaration of `{pattern_ident}`", - new_pattern_span - )] - #[label_note( - "`{pattern_ident}` declared here for the first time", - existing_pattern_span - )] - DuplicatePattern { - detailed_report: String, - pattern_ident: String, - new_pattern_span: SourceRef, - existing_pattern_span: SourceRef, - }, - - #[error("E024", "invalid pattern `{pattern_ident}`")] - #[label_error("{error_msg}", error_span)] - #[note(note)] - InvalidPattern { - detailed_report: String, - pattern_ident: String, - error_msg: String, - error_span: SourceRef, - note: Option, - }, - - #[error("E025", "unknown pattern `{pattern_ident}`")] - #[label_error( - "this pattern is not declared in the `strings` section", - pattern_ident_span - )] - UnknownPattern { - detailed_report: String, - pattern_ident: String, - pattern_ident_span: SourceRef, - }, - - #[error("E026", "invalid base64 alphabet")] - #[label_error("{error_msg}", error_span)] - InvalidBase64Alphabet { - detailed_report: String, - error_msg: String, - error_span: SourceRef, - }, - - #[error("E027", "invalid integer")] - #[label_error("{error_msg}", error_span)] - InvalidInteger { - detailed_report: String, - error_msg: String, - error_span: SourceRef, - }, - - #[error("E028", "invalid float")] - #[label_error("{error_msg}", error_span)] - InvalidFloat { - detailed_report: String, - error_msg: String, - error_span: SourceRef, - }, - - #[error("E029", "invalid escape sequence")] - #[label_error("{error_msg}", error_span)] - InvalidEscapeSequence { - detailed_report: String, - error_msg: String, - error_span: SourceRef, - }, - - #[error("E030", "invalid regexp modifier `{modifier}`")] - #[label_error("invalid modifier", error_span)] - InvalidRegexpModifier { - detailed_report: String, - modifier: String, - error_span: SourceRef, - }, - - #[error("E031", "unexpected escape sequence")] - #[label_error( - "escape sequences are not allowed in this string", - error_span - )] - UnexpectedEscapeSequence { detailed_report: String, error_span: SourceRef }, - - #[error("E032", "invalid UTF-8")] - #[label_error("invalid UTF-8 character", error_span)] - InvalidUTF8 { detailed_report: String, error_span: SourceRef }, + AssignmentMismatch(Box), + ConflictingRuleIdentifier(Box), + DuplicateModifier(Box), + DuplicatePattern(Box), + DuplicateRule(Box), + DuplicateTag(Box), + EmptyPatternSet(Box), + EntrypointUnsupported(Box), + InvalidBase64Alphabet(Box), + InvalidEscapeSequence(Box), + InvalidFloat(Box), + InvalidInteger(Box), + InvalidModifier(Box), + InvalidModifierCombination(Box), + InvalidPattern(Box), + InvalidRange(Box), + InvalidRegexp(Box), + InvalidRegexpModifier(Box), + InvalidUTF8(Box), + MismatchingTypes(Box), + MixedGreediness(Box), + NumberOutOfRange(Box), + SlowPattern(Box), + SyntaxError(Box), + UnexpectedEscapeSequence(Box), + UnexpectedNegativeNumber(Box), + UnknownField(Box), + UnknownIdentifier(Box), + UnknownModule(Box), + UnknownPattern(Box), + UnusedPattern(Box), + WrongArguments(Box), + WrongType(Box), } impl CompileError { @@ -362,54 +84,37 @@ impl CompileError { ) -> Self { match err { ast::Error::SyntaxError { message, span } => { - CompileError::syntax_error( - report_builder, - message, - span.into(), - ) + SyntaxError::build(report_builder, message, span.into()) } ast::Error::InvalidInteger { message, span } => { - CompileError::invalid_integer( - report_builder, - message, - span.into(), - ) + InvalidInteger::build(report_builder, message, span.into()) } ast::Error::InvalidFloat { message, span } => { - CompileError::invalid_float( - report_builder, - message, - span.into(), - ) + InvalidFloat::build(report_builder, message, span.into()) } ast::Error::InvalidRegexpModifier { message, span } => { - CompileError::invalid_regexp_modifier( + InvalidRegexpModifier::build( report_builder, message, span.into(), ) } ast::Error::InvalidEscapeSequence { message, span } => { - CompileError::invalid_escape_sequence( + InvalidEscapeSequence::build( report_builder, message, span.into(), ) } ast::Error::UnexpectedEscapeSequence(span) => { - CompileError::unexpected_escape_sequence( - report_builder, - span.into(), - ) + UnexpectedEscapeSequence::build(report_builder, span.into()) } ast::Error::InvalidUTF8(span) => { - CompileError::invalid_utf_8(report_builder, span.into()) + InvalidUTF8::build(report_builder, span.into()) } } } -} -impl CompileError { /// Utility function that receives an array of strings and joins them /// together separated by commas and with "or" before the last one. /// For example, if input is `["s1", "s2", "s3"]` the result is: @@ -452,3 +157,433 @@ impl CompileError { } } } + +/// A syntax error was found in the rule. +#[derive(ErrorStruct, Clone, Debug, PartialEq, Eq)] +#[associated_enum(CompileError)] +#[error(code = "E001", title = "syntax error")] +#[label("{error}", error_loc)] +pub struct SyntaxError { + report: Report, + error: String, + error_loc: CodeLoc, +} + +/// Some expression has an unexpected type. +#[derive(ErrorStruct, Clone, Debug, PartialEq, Eq)] +#[associated_enum(CompileError)] +#[error(code = "E002", title = "wrong type")] +#[label( + "expression should be {expected_types}, but is `{actual_type}`", + error_loc +)] +pub struct WrongType { + report: Report, + expected_types: String, + actual_type: String, + error_loc: CodeLoc, +} + +/// Operands have mismatching types. +#[derive(ErrorStruct, Clone, Debug, PartialEq, Eq)] +#[associated_enum(CompileError)] +#[error(code = "E003", title = "mismatching types")] +#[label("this expression is `{type1}`", type1_loc)] +#[label("this expression is `{type2}`", type2_loc)] +pub struct MismatchingTypes { + report: Report, + type1: String, + type2: String, + type1_loc: CodeLoc, + type2_loc: CodeLoc, +} + +/// Wrong arguments when calling a function. +#[derive(ErrorStruct, Clone, Debug, PartialEq, Eq)] +#[associated_enum(CompileError)] +#[error(code = "E004", title = "wrong arguments")] +#[label("wrong arguments in this call", error_loc)] +#[note(note)] +pub struct WrongArguments { + report: Report, + error_loc: CodeLoc, + note: Option, +} + +/// Mismatch between number of variables and number of values in a loop +/// expression. +#[derive(ErrorStruct, Clone, Debug, PartialEq, Eq)] +#[associated_enum(CompileError)] +#[error(code = "E005", title = "assignment mismatch")] +#[label("this expects {expected_values} value(s)", error_loc)] +#[label("this produces {actual_values} value(s)", iterable_loc)] +pub struct AssignmentMismatch { + report: Report, + expected_values: u8, + actual_values: u8, + iterable_loc: CodeLoc, + error_loc: CodeLoc, +} + +/// Negative number used where positive number was expected. +#[derive(ErrorStruct, Clone, Debug, PartialEq, Eq)] +#[associated_enum(CompileError)] +#[error(code = "E006", title = "unexpected negative number")] +#[label("this number can not be negative", error_loc)] +pub struct UnexpectedNegativeNumber { + report: Report, + error_loc: CodeLoc, +} + +/// A number is out of the allowed range. +#[derive(ErrorStruct, Clone, Debug, PartialEq, Eq)] +#[associated_enum(CompileError)] +#[error(code = "E007", title = "number out of range")] +#[label("this number is out of the allowed range [{min}-{max}]", error_loc)] +pub struct NumberOutOfRange { + report: Report, + min: i64, + max: i64, + error_loc: CodeLoc, +} + +/// Unknown field or method name. +#[derive(ErrorStruct, Clone, Debug, PartialEq, Eq)] +#[associated_enum(CompileError)] +#[error(code = "E008", title = "unknown field or method `{identifier}`")] +#[label("this field or method doesn't exist", error_loc)] +pub struct UnknownField { + report: Report, + identifier: String, + error_loc: CodeLoc, +} + +/// Unknown identifier. +#[derive(ErrorStruct, Clone, Debug, PartialEq, Eq)] +#[associated_enum(CompileError)] +#[error(code = "E009", title = "unknown identifier `{identifier}`")] +#[label("this identifier has not been declared", identifier_loc)] +#[note(note)] +pub struct UnknownIdentifier { + report: Report, + identifier: String, + identifier_loc: CodeLoc, + note: Option, +} + +impl UnknownIdentifier { + /// Name of the unknown identifier. + #[inline] + pub fn identifier(&self) -> &str { + self.identifier.as_str() + } + /// Location of the unknown identifier. + pub(crate) fn identifier_location(&self) -> &CodeLoc { + &self.identifier_loc + } +} + +/// Unknown module. +#[derive(ErrorStruct, Clone, Debug, PartialEq, Eq)] +#[associated_enum(CompileError)] +#[error(code = "E010", title = "unknown module `{identifier}`")] +#[label("module `{identifier}` not found", error_loc)] +pub struct UnknownModule { + report: Report, + identifier: String, + error_loc: CodeLoc, +} + +/// Invalid range. +#[derive(ErrorStruct, Clone, Debug, PartialEq, Eq)] +#[associated_enum(CompileError)] +#[error(code = "E011", title = "invalid range")] +#[label("{error}", error_loc)] +pub struct InvalidRange { + report: Report, + error: String, + error_loc: CodeLoc, +} + +/// Two rules have the same name. +#[derive(ErrorStruct, Clone, Debug, PartialEq, Eq)] +#[associated_enum(CompileError)] +#[error(code = "E012", title = "duplicate rule `{new_rule}`")] +#[label( + "duplicate declaration of `{new_rule}`", + duplicate_rule_loc, + Level::Error +)] +#[label( + "`{new_rule}` declared here for the first time", + existing_rule_loc, + Level::Note +)] +pub struct DuplicateRule { + report: Report, + new_rule: String, + duplicate_rule_loc: CodeLoc, + existing_rule_loc: CodeLoc, +} + + +/// A rule has the same name as a module or global variable. +#[derive(ErrorStruct, Clone, Debug, PartialEq, Eq)] +#[associated_enum(CompileError)] +#[error( + code = "E013", + title = "rule `{identifier}` conflicts with an existing identifier" +)] +#[label("identifier already in use by a module or global variable", error_loc)] +pub struct ConflictingRuleIdentifier { + report: Report, + identifier: String, + error_loc: CodeLoc, +} + +/// A regular expression is invalid. +#[derive(ErrorStruct, Clone, Debug, PartialEq, Eq)] +#[associated_enum(CompileError)] +#[error(code = "E014", title = "invalid regular expression")] +#[label("{error}", error_loc)] +#[note(note)] +pub struct InvalidRegexp { + report: Report, + error: String, + error_loc: CodeLoc, + note: Option, +} + +/// A regular expression contains a mixture of greedy and non-greedy quantifiers. +#[derive(ErrorStruct, Clone, Debug, PartialEq, Eq)] +#[associated_enum(CompileError)] +#[error( + code = "E015", + title = "mixing greedy and non-greedy quantifiers in regular expression" +)] +#[label("this is {quantifier1_greediness}", quantifier1_loc)] +#[label("this is {quantifier2_greediness}", quantifier2_loc)] +pub struct MixedGreediness { + report: Report, + quantifier1_greediness: String, + quantifier2_greediness: String, + quantifier1_loc: CodeLoc, + quantifier2_loc: CodeLoc, +} + +/// A set of patterns doesn't contain any patterns. +#[derive(ErrorStruct, Clone, Debug, PartialEq, Eq)] +#[associated_enum(CompileError)] +#[error(code = "E016", title = "no matching patterns")] +#[label("there's no pattern in this set", error_loc)] +#[note(note)] +pub struct EmptyPatternSet { + report: Report, + error_loc: CodeLoc, + note: Option, +} + +/// The `entrypoint` keyword is not supported. +#[derive(ErrorStruct, Clone, Debug, PartialEq, Eq)] +#[associated_enum(CompileError)] +#[error(code = "E017", title = "`entrypoint` is unsupported")] +#[label("the `entrypoint` keyword is not supported anymore", error_loc)] +#[label( + "use `pe.entry_point` or `elf.entry_point` or `macho.entry_point`", + error_loc, + Level::Help +)] +pub struct EntrypointUnsupported { + report: Report, + error_loc: CodeLoc, +} + +/// Some pattern may be potentially slow. +#[derive(ErrorStruct, Clone, Debug, PartialEq, Eq)] +#[associated_enum(CompileError)] +#[error(code = "E018", title = "slow pattern")] +#[label("this pattern may slow down the scan", error_loc)] +pub struct SlowPattern { + report: Report, + error_loc: CodeLoc, +} + +/// A pattern has modifiers that can't be used together. +#[derive(ErrorStruct, Clone, Debug, PartialEq, Eq)] +#[associated_enum(CompileError)] +#[error( + code = "E019", + title = "invalid modifier combination: `{modifier1}` `{modifier2}`" +)] +#[label("`{modifier1}` modifier used here", modifier1_loc)] +#[label("`{modifier2}` modifier used here", modifier2_loc)] +#[note(note)] +pub struct InvalidModifierCombination { + report: Report, + modifier1: String, + modifier2: String, + modifier1_loc: CodeLoc, + modifier2_loc: CodeLoc, + note: Option, +} + +/// A pattern has duplicate modifiers. +#[derive(ErrorStruct, Clone, Debug, PartialEq, Eq)] +#[associated_enum(CompileError)] +#[error(code = "E020", title = "duplicate pattern modifier")] +#[label("duplicate modifier", error_loc)] +pub struct DuplicateModifier { + report: Report, + error_loc: CodeLoc, +} + +/// A rule has duplicate tags. +#[derive(ErrorStruct, Clone, Debug, PartialEq, Eq)] +#[associated_enum(CompileError)] +#[error(code = "E021", title = "duplicate tag `{tag}`")] +#[label("duplicate tag", error_loc)] +pub struct DuplicateTag { + report: Report, + tag: String, + error_loc: CodeLoc, +} + +/// A rule defines a pattern that is not used in the condition. +#[derive(ErrorStruct, Clone, Debug, PartialEq, Eq)] +#[associated_enum(CompileError)] +#[error(code = "E022", title = "unused pattern `{pattern_ident}`")] +#[label("this pattern was not used in the condition", error_loc)] +pub struct UnusedPattern { + report: Report, + pattern_ident: String, + error_loc: CodeLoc, +} + +/// A rule has two patterns with the same identifier. +#[derive(ErrorStruct, Clone, Debug, PartialEq, Eq)] +#[associated_enum(CompileError)] +#[error(code = "E023", title = "duplicate pattern `{pattern_ident}`")] +#[label("duplicate declaration of `{pattern_ident}`", error_loc)] +#[label( + "`{pattern_ident}` declared here for the first time", + note_loc, + Level::Note +)] +pub struct DuplicatePattern { + report: Report, + pattern_ident: String, + error_loc: CodeLoc, + note_loc: CodeLoc, +} + +/// A rule has an invalid pattern. +#[derive(ErrorStruct, Clone, Debug, PartialEq, Eq)] +#[associated_enum(CompileError)] +#[error(code = "E024", title = "invalid pattern `{pattern_ident}`")] +#[label("{error}", error_loc)] +#[note(note)] +pub struct InvalidPattern { + report: Report, + pattern_ident: String, + error: String, + error_loc: CodeLoc, + note: Option, +} + +/// Some rule condition uses a pattern that was not defined. +#[derive(ErrorStruct, Clone, Debug, PartialEq, Eq)] +#[associated_enum(CompileError)] +#[error(code = "E025", title = "unknown pattern `{pattern_ident}`")] +#[label("this pattern is not declared in the `strings` section", error_loc)] +pub struct UnknownPattern { + report: Report, + pattern_ident: String, + error_loc: CodeLoc, +} + +/// Wrong alphabet for the `base64` or `base64wide` modifiers. +#[derive(ErrorStruct, Clone, Debug, PartialEq, Eq)] +#[associated_enum(CompileError)] +#[error(code = "E026", title = "invalid base64 alphabet")] +#[label("{error}", error_loc)] +pub struct InvalidBase64Alphabet { + report: Report, + error: String, + error_loc: CodeLoc, +} + +/// Invalid integer. +#[derive(ErrorStruct, Clone, Debug, PartialEq, Eq)] +#[associated_enum(CompileError)] +#[error(code = "E027", title = "invalid integer")] +#[label("{error}", error_loc)] +pub struct InvalidInteger { + report: Report, + error: String, + error_loc: CodeLoc, +} + +/// Invalid float. +#[derive(ErrorStruct, Clone, Debug, PartialEq, Eq)] +#[associated_enum(CompileError)] +#[error(code = "E028", title = "invalid float")] +#[label("{error}", error_loc)] +pub struct InvalidFloat { + report: Report, + error: String, + error_loc: CodeLoc, +} + +/// A text pattern contains an invalid escape sequence. +#[derive(ErrorStruct, Clone, Debug, PartialEq, Eq)] +#[associated_enum(CompileError)] +#[error(code = "E029", title = "invalid escape sequence")] +#[label("{error}", error_loc)] +pub struct InvalidEscapeSequence { + report: Report, + error: String, + error_loc: CodeLoc, +} + +/// Invalid modifier for a regular expression. +#[derive(ErrorStruct, Clone, Debug, PartialEq, Eq)] +#[associated_enum(CompileError)] +#[error(code = "E030", title = "invalid regexp modifier `{modifier}`")] +#[label("invalid modifier", error_loc)] +pub struct InvalidRegexpModifier { + report: Report, + modifier: String, + error_loc: CodeLoc, +} + +/// A string literal contains escaped sequences and it shouldn't. +#[derive(ErrorStruct, Clone, Debug, PartialEq, Eq)] +#[associated_enum(CompileError)] +#[error(code = "E031", title = "unexpected escape sequence")] +#[label("escape sequences are not allowed in this string", error_loc)] +pub struct UnexpectedEscapeSequence { + report: Report, + error_loc: CodeLoc, +} + + +/// Source code contains invalid UTF-8 characters. +#[derive(ErrorStruct, Clone, Debug, PartialEq, Eq)] +#[associated_enum(CompileError)] +#[error(code = "E032", title = "invalid UTF-8")] +#[label("invalid UTF-8 character", error_loc)] +pub struct InvalidUTF8 { + report: Report, + error_loc: CodeLoc, +} + +/// Some pattern has an invalid modifier. +#[derive(ErrorStruct, Clone, Debug, PartialEq, Eq)] +#[associated_enum(CompileError)] +#[error(code = "E033", title = "invalid pattern modifier")] +#[label("{error}", error_loc)] +pub struct InvalidModifier { + report: Report, + error: String, + error_loc: CodeLoc, +} diff --git a/lib/src/compiler/ir/ast2ir.rs b/lib/src/compiler/ir/ast2ir.rs index 6d84bbda8..41f834955 100644 --- a/lib/src/compiler/ir/ast2ir.rs +++ b/lib/src/compiler/ir/ast2ir.rs @@ -12,6 +12,14 @@ use yara_x_parser::ast; use yara_x_parser::ast::WithSpan; use yara_x_parser::Span; +use crate::compiler::errors::{ + AssignmentMismatch, DuplicateModifier, DuplicatePattern, EmptyPatternSet, + EntrypointUnsupported, InvalidBase64Alphabet, InvalidModifier, + InvalidModifierCombination, InvalidPattern, InvalidRange, InvalidRegexp, + MismatchingTypes, MixedGreediness, NumberOutOfRange, SyntaxError, + UnexpectedNegativeNumber, UnknownField, UnknownIdentifier, WrongArguments, + WrongType, +}; use crate::compiler::ir::hex2hir::hex_pattern_hir_from_ast; use crate::compiler::ir::{ Expr, ForIn, ForOf, FuncCall, Iterable, LiteralPattern, Lookup, @@ -19,8 +27,7 @@ use crate::compiler::ir::{ PatternIdx, PatternInRule, Quantifier, Range, RegexpPattern, }; use crate::compiler::report::ReportBuilder; -use crate::compiler::warnings::Warning; -use crate::compiler::{CompileContext, CompileError}; +use crate::compiler::{warnings, CompileContext, CompileError}; use crate::modules::BUILTIN_MODULES; use crate::re; use crate::re::parser::Error; @@ -30,7 +37,7 @@ use crate::types::{Map, Regexp, Type, TypeValue, Value}; pub(in crate::compiler) fn patterns_from_ast<'src>( ctx: &mut CompileContext<'_, 'src, '_>, patterns: Option<&Vec>>, -) -> Result<(), Box> { +) -> Result<(), CompileError> { for pattern_ast in patterns.into_iter().flatten() { let pattern = pattern_from_ast(ctx, pattern_ast)?; @@ -40,12 +47,12 @@ pub(in crate::compiler) fn patterns_from_ast<'src>( .iter() .find(|p| p.identifier.name == pattern.identifier.name) { - return Err(Box::new(CompileError::duplicate_pattern( + return Err(DuplicatePattern::build( ctx.report_builder, pattern.identifier().name.to_string(), pattern.identifier().span().into(), existing.identifier.span().into(), - ))); + )); } } @@ -57,15 +64,15 @@ pub(in crate::compiler) fn patterns_from_ast<'src>( fn pattern_from_ast<'src>( ctx: &mut CompileContext, pattern: &ast::Pattern<'src>, -) -> Result, Box> { +) -> Result, CompileError> { // Check for duplicate pattern modifiers. let mut modifiers = BTreeSet::new(); for modifier in pattern.modifiers().iter() { if !modifiers.insert(modifier.as_text()) { - return Err(Box::new(CompileError::duplicate_modifier( + return Err(DuplicateModifier::build( ctx.report_builder, modifier.span().into(), - ))); + )); } } @@ -83,7 +90,7 @@ fn pattern_from_ast<'src>( pub(in crate::compiler) fn text_pattern_from_ast<'src>( ctx: &mut CompileContext, pattern: &ast::TextPattern<'src>, -) -> Result, Box> { +) -> Result, CompileError> { let ascii = pattern.modifiers.ascii(); let xor = pattern.modifiers.xor(); let nocase = pattern.modifiers.nocase(); @@ -104,14 +111,14 @@ pub(in crate::compiler) fn text_pattern_from_ast<'src>( for (name1, modifier1, name2, modifier2) in invalid_combinations { if let (Some(modifier1), Some(modifier2)) = (modifier1, modifier2) { - return Err(Box::new(CompileError::invalid_modifier_combination( + return Err(InvalidModifierCombination::build( ctx.report_builder, name1.to_string(), name2.to_string(), modifier1.span().into(), modifier2.span().into(), Some("these two modifiers can't be used together".to_string()), - ))); + )); }; } @@ -136,14 +143,14 @@ pub(in crate::compiler) fn text_pattern_from_ast<'src>( let xor_range = match xor { Some(modifier @ ast::PatternModifier::Xor { start, end, .. }) => { if *end < *start { - return Err(Box::new(CompileError::invalid_range( + return Err(InvalidRange::build( ctx.report_builder, format!( "lower bound ({}) is greater than upper bound ({})", start, end ), modifier.span().into(), - ))); + )); } flags.set(PatternFlags::Xor); Some(*start..=*end) @@ -159,11 +166,11 @@ pub(in crate::compiler) fn text_pattern_from_ast<'src>( let alphabet_str = alphabet.as_str().unwrap(); match base64::alphabet::Alphabet::new(alphabet_str) { Ok(_) => Ok(Some(String::from(alphabet_str))), - Err(err) => Err(Box::new(CompileError::invalid_base_64_alphabet( + Err(err) => Err(InvalidBase64Alphabet::build( ctx.report_builder, err.to_string().to_lowercase(), alphabet.span().into(), - ))), + )), } }; @@ -206,13 +213,13 @@ pub(in crate::compiler) fn text_pattern_from_ast<'src>( let text: BString = pattern.text.value.as_ref().into(); if text.len() < min_len { - return Err(Box::new(CompileError::invalid_pattern( + return Err(InvalidPattern::build( ctx.report_builder, pattern.identifier.name.to_string(), "this pattern is too short".to_string(), pattern.text.span().into(), note, - ))); + )); } Ok(PatternInRule { @@ -232,18 +239,18 @@ pub(in crate::compiler) fn text_pattern_from_ast<'src>( pub(in crate::compiler) fn hex_pattern_from_ast<'src>( ctx: &mut CompileContext, pattern: &ast::HexPattern<'src>, -) -> Result, Box> { +) -> Result, CompileError> { // The only modifier accepted by hex patterns is `private`. for modifier in pattern.modifiers.iter() { match modifier { ast::PatternModifier::Private { .. } => {} _ => { - return Err(Box::new(CompileError::invalid_modifier( + return Err(InvalidModifier::build( ctx.report_builder, "this modifier can't be applied to a hex pattern" .to_string(), modifier.span().into(), - ))); + )); } } } @@ -262,7 +269,7 @@ pub(in crate::compiler) fn hex_pattern_from_ast<'src>( pub(in crate::compiler) fn regexp_pattern_from_ast<'src>( ctx: &mut CompileContext, pattern: &ast::RegexpPattern<'src>, -) -> Result, Box> { +) -> Result, CompileError> { // Regular expressions don't accept `base64`, `base64wide` and `xor` // modifiers. for modifier in pattern.modifiers.iter() { @@ -270,11 +277,11 @@ pub(in crate::compiler) fn regexp_pattern_from_ast<'src>( ast::PatternModifier::Base64 { .. } | ast::PatternModifier::Base64Wide { .. } | ast::PatternModifier::Xor { .. } => { - return Err(Box::new(CompileError::invalid_modifier( + return Err(InvalidModifier::build( ctx.report_builder, "this modifier can't be applied to a regexp".to_string(), modifier.span().into(), - ))); + )); } _ => {} } @@ -310,7 +317,7 @@ pub(in crate::compiler) fn regexp_pattern_from_ast<'src>( let i_pos = pattern.regexp.literal.rfind('i').unwrap(); ctx.warnings.add(|| { - Warning::redundant_case_modifier( + warnings::RedundantCaseModifier::build( ctx.report_builder, pattern.modifiers.nocase().unwrap().span().into(), pattern.regexp.span().subspan(i_pos, i_pos + 1).into(), @@ -371,13 +378,10 @@ pub(in crate::compiler) fn regexp_pattern_from_ast<'src>( pub(in crate::compiler) fn expr_from_ast( ctx: &mut CompileContext, expr: &ast::Expr, -) -> Result> { +) -> Result { match expr { ast::Expr::Entrypoint { span } => { - Err(Box::new(CompileError::entrypoint_unsupported( - ctx.report_builder, - span.into(), - ))) + Err(EntrypointUnsupported::build(ctx.report_builder, span.into())) } ast::Expr::Filesize { .. } => Ok(Expr::Filesize), @@ -489,10 +493,10 @@ pub(in crate::compiler) fn expr_from_ast( if let Some((expr, msg)) = replacement { ctx.warnings.add(|| { - Warning::boolean_integer_comparison( + warnings::BooleanIntegerComparison::build( ctx.report_builder, - span.into(), msg, + span.into(), ) }); Ok(expr) @@ -571,7 +575,7 @@ pub(in crate::compiler) fn expr_from_ast( // If the current symbol table is `None` it means that the // identifier is not a field or method of some structure. return if current_symbol_table.is_none() { - Err(Box::new(CompileError::unknown_identifier( + Err(UnknownIdentifier::build( ctx.report_builder, ident.name.to_string(), ident.span().into(), @@ -586,13 +590,13 @@ pub(in crate::compiler) fn expr_from_ast( } else { None }, - ))) + )) } else { - Err(Box::new(CompileError::unknown_field( + Err(UnknownField::build( ctx.report_builder, ident.name.to_string(), ident.span().into(), - ))) + )) }; } @@ -616,11 +620,11 @@ pub(in crate::compiler) fn expr_from_ast( // If the identifier is just `$`, and we are not inside a // loop, that's an error. if ctx.for_of_depth == 0 { - return Err(Box::new(CompileError::syntax_error( + return Err(SyntaxError::build( ctx.report_builder, "this `$` is outside of the condition of a `for .. of` statement".to_string(), p.identifier.span().into(), - ))); + )); } // If we are inside a loop, we don't know which is the // PatternId because `$` refers to a different pattern on @@ -653,11 +657,11 @@ pub(in crate::compiler) fn expr_from_ast( // If the identifier is just `#`, and we are not inside a loop, // that's an error. if p.ident.name == "#" && ctx.for_of_depth == 0 { - return Err(Box::new(CompileError::syntax_error( + return Err(SyntaxError::build( ctx.report_builder, "this `#` is outside of the condition of a `for .. of` statement".to_string(), p.ident.span().into(), - ))); + )); } match (p.ident.name, &p.range) { // Cases where the identifier is `#`. @@ -695,11 +699,11 @@ pub(in crate::compiler) fn expr_from_ast( // If the identifier is just `@`, and we are not inside a loop, // that's an error. if p.ident.name == "@" && ctx.for_of_depth == 0 { - return Err(Box::new(CompileError::syntax_error( + return Err(SyntaxError::build( ctx.report_builder, "this `@` is outside of the condition of a `for .. of` statement".to_string(), p.ident.span().into(), - ))); + )); } match (p.ident.name, &p.index) { // Cases where the identifier is `@`. @@ -745,11 +749,11 @@ pub(in crate::compiler) fn expr_from_ast( // If the identifier is just `!`, and we are not inside a loop, // that's an error. if p.ident.name == "!" && ctx.for_of_depth == 0 { - return Err(Box::new(CompileError::syntax_error( + return Err(SyntaxError::build( ctx.report_builder, "this `!` is outside of the condition of a `for .. of` statement".to_string(), p.ident.span().into(), - ))); + )); } match (p.ident.name, &p.index) { // Cases where the identifier is `!`. @@ -824,12 +828,12 @@ pub(in crate::compiler) fn expr_from_ast( // The type of the key/index expression should correspond // with the type of the map's keys. if key_ty != ty { - return Err(Box::new(CompileError::wrong_type( + return Err(WrongType::build( ctx.report_builder, format!("`{}`", key_ty), ty.to_string(), expr.index.span().into(), - ))); + )); } Ok(Expr::Lookup(Box::new(Lookup { @@ -838,12 +842,12 @@ pub(in crate::compiler) fn expr_from_ast( index, }))) } - type_value => Err(Box::new(CompileError::wrong_type( + type_value => Err(WrongType::build( ctx.report_builder, format!("`{}` or `{}`", Type::Array, Type::Map), type_value.ty().to_string(), expr.primary.span().into(), - ))), + )), } } } @@ -852,7 +856,7 @@ pub(in crate::compiler) fn expr_from_ast( pub(in crate::compiler) fn bool_expr_from_ast( ctx: &mut CompileContext, ast: &ast::Expr, -) -> Result> { +) -> Result { let expr = expr_from_ast(ctx, ast)?; warn_if_not_bool(ctx, expr.ty(), ast.span()); Ok(expr) @@ -861,7 +865,7 @@ pub(in crate::compiler) fn bool_expr_from_ast( fn of_expr_from_ast( ctx: &mut CompileContext, of: &ast::Of, -) -> Result> { +) -> Result { let quantifier = quantifier_from_ast(ctx, &of.quantifier)?; // Create new stack frame with 5 slots: // 1 slot for the loop variable, a bool in this case. @@ -878,7 +882,7 @@ fn of_expr_from_ast( check_type(ctx, expr.ty(), e.span(), &[Type::Bool])?; Ok(expr) }) - .collect::, Box>>()?; + .collect::, CompileError>>()?; let num_items = tuple.len(); (OfItems::BoolExprTuple(tuple), num_items) @@ -896,7 +900,7 @@ fn of_expr_from_ast( if let Quantifier::Expr(expr) = &quantifier { if let TypeValue::Integer(Value::Const(value)) = expr.type_value() { if value > num_items.try_into().unwrap() { - ctx.warnings.add(|| Warning::invariant_boolean_expression( + ctx.warnings.add(|| warnings::InvariantBooleanExpression::build( ctx.report_builder, false, of.span().into(), @@ -949,7 +953,7 @@ fn of_expr_from_ast( if raise_warning { ctx.warnings.add(|| { - Warning::potentially_unsatisfiable_expression( + warnings::PotentiallyUnsatisfiableExpression::build( ctx.report_builder, of.quantifier.span().into(), of.anchor.as_ref().unwrap().span().into(), @@ -968,7 +972,7 @@ fn of_expr_from_ast( fn for_of_expr_from_ast( ctx: &mut CompileContext, for_of: &ast::ForOf, -) -> Result> { +) -> Result { let quantifier = quantifier_from_ast(ctx, &for_of.quantifier)?; let pattern_set = pattern_set_from_ast(ctx, &for_of.pattern_set)?; // Create new stack frame with 5 slots: @@ -1007,7 +1011,7 @@ fn for_of_expr_from_ast( fn for_in_expr_from_ast( ctx: &mut CompileContext, for_in: &ast::ForIn, -) -> Result> { +) -> Result { let quantifier = quantifier_from_ast(ctx, &for_in.quantifier)?; let iterable = iterable_from_ast(ctx, &for_in.iterable)?; @@ -1050,13 +1054,13 @@ fn for_in_expr_from_ast( if loop_vars.len() != expected_vars.len() { let span = loop_vars.first().unwrap().span(); let span = span.combine(&loop_vars.last().unwrap().span()); - return Err(Box::new(CompileError::assignment_mismatch( + return Err(AssignmentMismatch::build( ctx.report_builder, loop_vars.len() as u8, expected_vars.len() as u8, for_in.iterable.span().into(), span.into(), - ))); + )); } // Create stack frame with capacity for the loop variables, plus 4 @@ -1100,7 +1104,7 @@ fn for_in_expr_from_ast( fn iterable_from_ast( ctx: &mut CompileContext, iter: &ast::Iterable, -) -> Result> { +) -> Result { match iter { ast::Iterable::Range(range) => { Ok(Iterable::Range(range_from_ast(ctx, range)?)) @@ -1132,14 +1136,12 @@ fn iterable_from_ast( // type mismatch. if let Some((prev_ty, prev_span)) = prev { if prev_ty != ty { - return Err(Box::new( - CompileError::mismatching_types( - ctx.report_builder, - prev_ty.to_string(), - ty.to_string(), - prev_span.into(), - span.into(), - ), + return Err(MismatchingTypes::build( + ctx.report_builder, + prev_ty.to_string(), + ty.to_string(), + prev_span.into(), + span.into(), )); } } @@ -1154,7 +1156,7 @@ fn iterable_from_ast( fn anchor_from_ast( ctx: &mut CompileContext, anchor: &Option, -) -> Result> { +) -> Result { match anchor { Some(ast::MatchAnchor::At(at_)) => Ok(MatchAnchor::At(Box::new( non_negative_integer_from_ast(ctx, &at_.expr)?, @@ -1169,7 +1171,7 @@ fn anchor_from_ast( fn range_from_ast( ctx: &mut CompileContext, range: &ast::Range, -) -> Result> { +) -> Result { let lower_bound = Box::new(non_negative_integer_from_ast(ctx, &range.lower_bound)?); @@ -1186,14 +1188,14 @@ fn range_from_ast( ) = (lower_bound.type_value(), upper_bound.type_value()) { if lower_bound > upper_bound { - return Err(Box::new(CompileError::invalid_range( + return Err(InvalidRange::build( ctx.report_builder, format!( "lower bound ({}) is greater than upper bound ({})", lower_bound, upper_bound ), range.span().into(), - ))); + )); } } @@ -1203,7 +1205,7 @@ fn range_from_ast( fn non_negative_integer_from_ast( ctx: &mut CompileContext, expr: &ast::Expr, -) -> Result> { +) -> Result { let span = expr.span(); let expr = expr_from_ast(ctx, expr)?; let type_value = expr.type_value(); @@ -1212,10 +1214,10 @@ fn non_negative_integer_from_ast( if let TypeValue::Integer(Value::Const(value)) = type_value { if value < 0 { - return Err(Box::new(CompileError::unexpected_negative_number( + return Err(UnexpectedNegativeNumber::build( ctx.report_builder, span.into(), - ))); + )); } } @@ -1226,7 +1228,7 @@ fn integer_in_range_from_ast( ctx: &mut CompileContext, expr: &ast::Expr, range: RangeInclusive, -) -> Result> { +) -> Result { let span = expr.span(); let expr = expr_from_ast(ctx, expr)?; let type_value = expr.type_value(); @@ -1237,12 +1239,12 @@ fn integer_in_range_from_ast( // the given range. if let TypeValue::Integer(Value::Const(value)) = type_value { if !range.contains(&value) { - return Err(Box::new(CompileError::number_out_of_range( + return Err(NumberOutOfRange::build( ctx.report_builder, *range.start(), *range.end(), span.into(), - ))); + )); } } @@ -1252,7 +1254,7 @@ fn integer_in_range_from_ast( fn quantifier_from_ast( ctx: &mut CompileContext, quantifier: &ast::Quantifier, -) -> Result> { +) -> Result { match quantifier { ast::Quantifier::None { .. } => Ok(Quantifier::None), ast::Quantifier::All { .. } => Ok(Quantifier::All), @@ -1274,7 +1276,7 @@ fn quantifier_from_ast( fn pattern_set_from_ast( ctx: &mut CompileContext, pattern_set: &ast::PatternSet, -) -> Result, Box> { +) -> Result, CompileError> { let pattern_indexes = match pattern_set { // `x of them` ast::PatternSet::Them { span } => { @@ -1284,11 +1286,11 @@ fn pattern_set_from_ast( .collect(); if pattern_indexes.is_empty() { - return Err(Box::new(CompileError::empty_pattern_set( + return Err(EmptyPatternSet::build( ctx.report_builder, span.into(), Some("this rule doesn't define any patterns".to_string()), - ))); + )); } // Make all the patterns in the set non-anchorable and mark them @@ -1307,7 +1309,7 @@ fn pattern_set_from_ast( .iter() .any(|pattern| item.matches(pattern.identifier())) { - return Err(Box::new(CompileError::empty_pattern_set( + return Err(EmptyPatternSet::build( ctx.report_builder, item.span().into(), Some(if item.wildcard { @@ -1321,7 +1323,7 @@ fn pattern_set_from_ast( item.identifier, ) }), - ))); + )); } } let mut pattern_indexes = Vec::new(); @@ -1347,7 +1349,7 @@ fn pattern_set_from_ast( fn func_call_from_ast( ctx: &mut CompileContext, func_call: &ast::FuncCall, -) -> Result> { +) -> Result { let callable = expr_from_ast(ctx, &func_call.callable)?; let type_value = callable.type_value(); @@ -1362,7 +1364,7 @@ fn func_call_from_ast( .args .iter() .map(|arg| expr_from_ast(ctx, arg)) - .collect::, Box>>()?; + .collect::, CompileError>>()?; let arg_types: Vec = args.iter().map(|arg| arg.ty()).collect(); @@ -1394,7 +1396,7 @@ fn func_call_from_ast( // No matching signature was found, that means that the arguments // provided were incorrect. if matching_signature.is_none() { - return Err(Box::new(CompileError::wrong_arguments( + return Err(WrongArguments::build( ctx.report_builder, func_call.args_span().into(), Some(format!( @@ -1413,7 +1415,7 @@ fn func_call_from_ast( .collect::>() .join("\n") )), - ))); + )); } let (signature_index, type_value) = matching_signature.unwrap(); @@ -1429,7 +1431,7 @@ fn func_call_from_ast( fn matches_expr_from_ast( ctx: &mut CompileContext, expr: &ast::BinaryExpr, -) -> Result> { +) -> Result { let span = expr.span(); let lhs_span = expr.lhs.span(); let rhs_span = expr.rhs.span(); @@ -1454,16 +1456,16 @@ fn check_type( ty: Type, span: Span, accepted_types: &[Type], -) -> Result<(), Box> { +) -> Result<(), CompileError> { if accepted_types.contains(&ty) { Ok(()) } else { - Err(Box::new(CompileError::wrong_type( + Err(WrongType::build( ctx.report_builder, CompileError::join_with_or(accepted_types, true), ty.to_string(), span.into(), - ))) + )) } } @@ -1475,7 +1477,7 @@ fn check_operands( rhs_span: Span, accepted_types: &[Type], compatible_types: &[Type], -) -> Result<(), Box> { +) -> Result<(), CompileError> { // Both types must be known. assert!(!matches!(lhs_ty, Type::Unknown)); assert!(!matches!(rhs_ty, Type::Unknown)); @@ -1495,13 +1497,13 @@ fn check_operands( }; if !types_are_compatible { - return Err(Box::new(CompileError::mismatching_types( + return Err(MismatchingTypes::build( ctx.report_builder, lhs_ty.to_string(), rhs_ty.to_string(), lhs_span.into(), rhs_span.into(), - ))); + )); } Ok(()) @@ -1514,7 +1516,7 @@ fn re_error_to_compile_error( ) -> CompileError { match err { Error::SyntaxError { msg, span, note } => { - CompileError::invalid_regexp( + InvalidRegexp::build( report_builder, msg, // The error span is relative to the start of the regexp, not to @@ -1539,7 +1541,7 @@ fn re_error_to_compile_error( is_greedy_2, span_1, span_2, - } => CompileError::mixed_greediness( + } => MixedGreediness::build( report_builder, if is_greedy_1 { "greedy" } else { "non-greedy" }.to_string(), if is_greedy_2 { "greedy" } else { "non-greedy" }.to_string(), @@ -1580,7 +1582,7 @@ pub(in crate::compiler) fn warn_if_not_bool( ), _ => None, }; - Warning::non_boolean_as_boolean( + warnings::NonBooleanAsBoolean::build( ctx.report_builder, ty.to_string(), span.into(), @@ -1595,7 +1597,7 @@ macro_rules! gen_unary_op { fn $name( ctx: &mut CompileContext, expr: &ast::UnaryExpr, - ) -> Result> { + ) -> Result { let span = expr.span(); let operand = expr_from_ast(ctx, &expr.operand)?; @@ -1607,7 +1609,7 @@ macro_rules! gen_unary_op { )?; let check_fn: - Option Result<(), Box>> + Option Result<(), CompileError>> = $check_fn; if let Some(check_fn) = check_fn { @@ -1630,7 +1632,7 @@ macro_rules! gen_binary_op { fn $name( ctx: &mut CompileContext, expr: &ast::BinaryExpr, - ) -> Result> { + ) -> Result { let span = expr.span(); let lhs_span = expr.lhs.span(); let rhs_span = expr.rhs.span(); @@ -1649,7 +1651,7 @@ macro_rules! gen_binary_op { )?; let check_fn: - Option Result<(), Box>> + Option Result<(), CompileError>> = $check_fn; if let Some(check_fn) = check_fn { @@ -1672,7 +1674,7 @@ macro_rules! gen_string_op { fn $name( ctx: &mut CompileContext, expr: &ast::BinaryExpr, - ) -> Result> { + ) -> Result { let span = expr.span(); let lhs_span = expr.lhs.span(); let rhs_span = expr.rhs.span(); @@ -1706,7 +1708,7 @@ macro_rules! gen_n_ary_operation { fn $name( ctx: &mut CompileContext, expr: &ast::NAryExpr, - ) -> Result> { + ) -> Result { let span = expr.span(); let accepted_types = &[$( $accepted_types ),+]; let compatible_types = &[$( $compatible_types ),+]; @@ -1714,10 +1716,10 @@ macro_rules! gen_n_ary_operation { let operands_hir: Vec = expr .operands() .map(|expr| expr_from_ast(ctx, expr)) - .collect::, Box>>()?; + .collect::, CompileError>>()?; let check_fn: - Option Result<(), Box>> + Option Result<(), CompileError>> = $check_fn; // Make sure that all operands have one of the accepted types. @@ -1748,13 +1750,12 @@ macro_rules! gen_n_ary_operation { }; if !types_are_compatible { - return Err(Box::new(CompileError::mismatching_types( + return Err(MismatchingTypes::build( ctx.report_builder, lhs_ty.to_string(), rhs_ty.to_string(), expr.first().span().combine(&lhs_ast.span()).into(), rhs_ast.span().into(), - ), )); } } @@ -1870,11 +1871,9 @@ gen_binary_op!( Some(|ctx, _lhs, rhs, _lhs_span, rhs_span| { if let TypeValue::Integer(Value::Const(value)) = rhs.type_value() { if value < 0 { - return Err(Box::new( - CompileError::unexpected_negative_number( - ctx.report_builder, - rhs_span.into(), - ), + return Err(UnexpectedNegativeNumber::build( + ctx.report_builder, + rhs_span.into(), )); } } @@ -1890,11 +1889,9 @@ gen_binary_op!( Some(|ctx, _lhs, rhs, _lhs_span, rhs_span| { if let TypeValue::Integer(Value::Const(value)) = rhs.type_value() { if value < 0 { - return Err(Box::new( - CompileError::unexpected_negative_number( - ctx.report_builder, - rhs_span.into(), - ), + return Err(UnexpectedNegativeNumber::build( + ctx.report_builder, + rhs_span.into(), )); } } diff --git a/lib/src/compiler/ir/hex2hir.rs b/lib/src/compiler/ir/hex2hir.rs index 474bc9957..cfd1dcbd9 100644 --- a/lib/src/compiler/ir/hex2hir.rs +++ b/lib/src/compiler/ir/hex2hir.rs @@ -1,18 +1,17 @@ /*! Functions for converting a hex pattern AST into a HIR. */ -use crate::CompileError; use regex_syntax::hir; use yara_x_parser::ast; use yara_x_parser::ast::WithSpan; use crate::compiler::context::CompileContext; -use crate::compiler::warnings::Warning; -use crate::compiler::ByteMaskCombinator; +use crate::compiler::errors::{CompileError, InvalidPattern}; +use crate::compiler::{warnings, ByteMaskCombinator}; pub(in crate::compiler) fn hex_pattern_hir_from_ast( ctx: &mut CompileContext, pattern: &ast::HexPattern, -) -> Result> { +) -> Result { hex_tokens_hir_from_ast(ctx, &pattern.identifier, &pattern.tokens) } @@ -20,7 +19,7 @@ fn hex_tokens_hir_from_ast( ctx: &mut CompileContext, pattern_ident: &ast::Ident, tokens: &ast::HexTokens, -) -> Result> { +) -> Result { let mut hir_tokens = Vec::with_capacity(tokens.tokens.len()); let mut ast_tokens = tokens.tokens.iter().peekable(); @@ -32,13 +31,13 @@ fn hex_tokens_hir_from_ast( ast::HexToken::NotByte(byte) => { // ~?? is not allowed. if byte.mask == 0 { - return Err(Box::new(CompileError::invalid_pattern( + return Err(InvalidPattern::build( ctx.report_builder, pattern_ident.name.to_string(), "negation of `??` is not allowed".to_string(), token.span().into(), None, - ))); + )); } let class = match hex_byte_hir_from_ast(byte).into_kind() { @@ -102,7 +101,7 @@ fn hex_tokens_hir_from_ast( if coalesced { ctx.warnings.add(|| { - Warning::consecutive_jumps( + warnings::ConsecutiveJumps::build( ctx.report_builder, pattern_ident.name.to_string(), format!("{jump}"), @@ -113,17 +112,17 @@ fn hex_tokens_hir_from_ast( match (jump.start, jump.end) { (Some(0), Some(0)) => { - return Err(Box::new(CompileError::invalid_pattern( + return Err(InvalidPattern::build( ctx.report_builder, pattern_ident.name.to_string(), "zero-length jumps are useless, remove it" .to_string(), span.into(), None, - ))); + )); } (Some(start), Some(end)) if start > end => { - return Err(Box::new(CompileError::invalid_pattern( + return Err(InvalidPattern::build( ctx.report_builder, pattern_ident.name.to_string(), format!( @@ -134,7 +133,7 @@ fn hex_tokens_hir_from_ast( } else { None } - ))); + )); } _ => {} } diff --git a/lib/src/compiler/ir/mod.rs b/lib/src/compiler/ir/mod.rs index 9e6bc2bed..7708c324b 100644 --- a/lib/src/compiler/ir/mod.rs +++ b/lib/src/compiler/ir/mod.rs @@ -47,8 +47,9 @@ pub(in crate::compiler) use ast2ir::patterns_from_ast; use yara_x_parser::ast::Ident; use yara_x_parser::Span; +use crate::compiler::errors::{CompileError, NumberOutOfRange}; use crate::compiler::ir::dfs::{DepthFirstSearch, Event}; -use crate::{re, CompileError}; +use crate::re; mod ast2ir; mod dfs; @@ -968,7 +969,7 @@ impl Expr { self, ctx: &mut CompileContext, span: Span, - ) -> Result> { + ) -> Result { match self { Expr::Minus { ref operand } => match operand.type_value() { TypeValue::Integer(Value::Const(v)) => { @@ -1062,7 +1063,7 @@ impl Expr { span: Span, operands: Vec, f: F, - ) -> Result> + ) -> Result where F: FnMut(f64, f64) -> f64, { @@ -1090,12 +1091,12 @@ impl Expr { } else if result >= i64::MIN as f64 && result <= i64::MAX as f64 { Ok(Expr::Const(TypeValue::const_integer_from(result as i64))) } else { - Err(Box::new(CompileError::number_out_of_range( + Err(NumberOutOfRange::build( ctx.report_builder, i64::MIN, i64::MAX, span.into(), - ))) + )) } } } diff --git a/lib/src/compiler/mod.rs b/lib/src/compiler/mod.rs index 292080538..3bd01189d 100644 --- a/lib/src/compiler/mod.rs +++ b/lib/src/compiler/mod.rs @@ -23,8 +23,9 @@ use itertools::izip; #[cfg(feature = "logging")] use log::*; use regex_syntax::hir; -use rustc_hash::FxHashMap; +use rustc_hash::{FxHashMap, FxHashSet}; use serde::{Deserialize, Serialize}; +use thiserror::Error; use walrus::FunctionId; use yara_x_parser::ast; @@ -33,7 +34,11 @@ use yara_x_parser::{Parser, Span}; use crate::compiler::base64::base64_patterns; use crate::compiler::emit::{emit_rule_condition, EmitContext}; -use crate::compiler::report::{ReportBuilder, SourceRef}; +use crate::compiler::errors::{ + CompileError, ConflictingRuleIdentifier, DuplicateRule, DuplicateTag, + EmitWasmError, InvalidRegexp, InvalidUTF8, UnknownModule, UnusedPattern, +}; +use crate::compiler::report::{CodeLoc, ReportBuilder}; use crate::compiler::{CompileContext, VarStack}; use crate::modules::BUILTIN_MODULES; use crate::re; @@ -52,9 +57,6 @@ pub(crate) use crate::compiler::atoms::*; pub(crate) use crate::compiler::context::*; pub(crate) use crate::compiler::ir::*; -#[doc(inline)] -pub use crate::compiler::errors::*; - #[doc(inline)] pub use crate::compiler::rules::*; @@ -64,16 +66,17 @@ pub use crate::compiler::warnings::*; mod atoms; mod context; mod emit; -mod errors; mod ir; mod report; mod rules; -mod warnings; -pub mod base64; #[cfg(test)] mod tests; +pub mod base64; +pub mod errors; +pub mod warnings; + /// A structure that describes some YARA source code. /// /// This structure contains a `&str` pointing to the code itself, and an @@ -182,7 +185,7 @@ impl<'src> From<&'src [u8]> for SourceCode<'src> { /// let results = scanner.scan("Lorem ipsum".as_bytes()).unwrap(); /// assert_eq!(results.matching_rules().len(), 1); /// ``` -pub fn compile<'src, S>(src: S) -> Result +pub fn compile<'src, S>(src: S) -> Result where S: Into>, { @@ -340,7 +343,7 @@ pub struct Compiler<'a> { /// without causing an error, but a warning is raised to let the user know /// that the module is not supported. Any rule that depends on an unsupported /// module is ignored. - ignored_modules: Vec, + ignored_modules: FxHashSet, /// Keys in this map are the name of rules that will be ignored because they /// depend on unsupported modules, either directly or indirectly. Values are @@ -355,6 +358,9 @@ pub struct Compiler<'a> { /// Warnings generated while compiling the rules. warnings: Warnings, + /// Errors generated while compiling the rules. + errors: Vec, + /// Optional writer where the compiler writes the IR produced by each rule. /// This is used for test cases and debugging. #[cfg(test)] @@ -423,13 +429,14 @@ impl<'a> Compiler<'a> { current_pattern_id: PatternId(0), current_namespace: default_namespace, warnings: Warnings::default(), + errors: Vec::new(), rules: Vec::new(), sub_patterns: Vec::new(), anchored_sub_patterns: Vec::new(), atoms: Vec::new(), re_code: Vec::new(), imported_modules: Vec::new(), - ignored_modules: Vec::new(), + ignored_modules: FxHashSet::default(), ignored_rules: FxHashMap::default(), root_struct: Struct::new().make_root(), report_builder: ReportBuilder::new(), @@ -441,10 +448,26 @@ impl<'a> Compiler<'a> { } } - /// Adds a YARA source code to be compiled. + /// Adds YARA rules in source form for compilation. + /// + /// The `src` parameter accepts any type that implements [`Into`], + /// such as `&str`, `&[u8]`, and naturally, [`SourceCode`] itself. This input + /// can include one or more YARA rules. + /// + /// This function may be invoked multiple times to add several sets of YARA + /// rules. If the rules provided in `src` contain errors that prevent + /// compilation, the function will return the first error encountered. + /// Additionally, the compiler will store this error, along with any others + /// discovered during compilation, which can be accessed using + /// [`Compiler::errors`]. /// - /// This function can be called multiple times. - pub fn add_source<'src, S>(&mut self, src: S) -> Result<&mut Self, Error> + /// Even if a previous invocation resulted in a compilation error, you can + /// continue calling this function. In such cases, any rules that failed to + /// compile will not be included in the final compiled set. + pub fn add_source<'src, S>( + &mut self, + src: S, + ) -> Result<&mut Self, CompileError> where S: Into>, { @@ -477,21 +500,16 @@ impl<'a> Compiler<'a> { } else { span_start }; - return Err(Error::CompileError(Box::new( - CompileError::invalid_utf_8( - &self.report_builder, - Span(span_start as u32..span_end as u32).into(), - ), - ))); + return Err(InvalidUTF8::build( + &self.report_builder, + Span(span_start as u32..span_end as u32).into(), + )); } }; - if !ast.errors().is_empty() { - return Err(Error::CompileError(Box::new(CompileError::from( - &self.report_builder, - ast.into_errors().remove(0), - )))); - } + // Store the current length of the `errors` vector, so that we can + // know if more errors were added. + let existing_errors = self.errors.len(); let mut already_imported = FxHashMap::default(); @@ -504,7 +522,7 @@ impl<'a> Compiler<'a> { already_imported.insert(&import.module_name, import.span()) { self.warnings.add(|| { - Warning::duplicate_import( + warnings::DuplicateImport::build( &self.report_builder, import.module_name.to_string(), import.span().into(), @@ -512,17 +530,31 @@ impl<'a> Compiler<'a> { ) }) } - // Import the module. This updates `self.root_struct` if // necessary. - self.c_import(import)?; + if let Err(err) = self.c_import(import) { + self.errors.push(err); + } } // Iterate over the list of declared rules and verify that their // conditions are semantically valid. For each rule add a symbol // to the current namespace. for rule in ast.rules() { - self.c_rule(rule)?; + if let Err(err) = self.c_rule(rule) { + self.errors.push(err); + } + } + + self.errors.extend( + ast.into_errors() + .into_iter() + .map(|err| CompileError::from(&self.report_builder, err)), + ); + + // More errors were added? Return the first error that was added. + if self.errors.len() > existing_errors { + return Err(self.errors[existing_errors].clone()); } Ok(self) @@ -553,21 +585,19 @@ impl<'a> Compiler<'a> { &mut self, ident: &str, value: T, - ) -> Result<&mut Self, Error> + ) -> Result<&mut Self, VariableError> where - Error: From<>::Error>, + VariableError: From<>::Error>, { if !is_valid_identifier(ident) { - return Err( - VariableError::InvalidIdentifier(ident.to_string()).into() - ); + return Err(VariableError::InvalidIdentifier(ident.to_string())); } let var: Variable = value.try_into()?; let type_value: TypeValue = var.into(); if self.root_struct.add_field(ident, type_value).is_some() { - return Err(VariableError::AlreadyExists(ident.to_string()).into()); + return Err(VariableError::AlreadyExists(ident.to_string())); } self.global_symbols @@ -695,7 +725,7 @@ impl<'a> Compiler<'a> { /// ignored module will be ignored, while the rest of rules that /// don't rely on that module will be correctly compiled. pub fn ignore_module>(&mut self, module: M) -> &mut Self { - self.ignored_modules.push(module.into()); + self.ignored_modules.insert(module.into()); self } @@ -719,7 +749,7 @@ impl<'a> Compiler<'a> { &mut self, code: &str, enabled: bool, - ) -> Result<&mut Self, Error> { + ) -> Result<&mut Self, InvalidWarningCode> { self.warnings.switch_warning(code, enabled)?; Ok(self) } @@ -764,7 +794,19 @@ impl<'a> Compiler<'a> { self } + /// Retrieves all errors generated by the compiler. + /// + /// This method returns every error encountered during the compilation, + /// across all invocations of [`Compiler::add_source`]. + #[inline] + pub fn errors(&self) -> &[CompileError] { + self.errors.as_slice() + } + /// Returns the warnings emitted by the compiler. + /// + /// This method returns every warning issued during the compilation, + /// across all invocations of [`Compiler::add_source`]. #[inline] pub fn warnings(&self) -> &[Warning] { self.warnings.as_slice() @@ -816,29 +858,27 @@ impl<'a> Compiler<'a> { fn check_for_existing_identifier( &self, ident: &Ident, - ) -> Result<(), Box> { + ) -> Result<(), CompileError> { if let Some(symbol) = self.symbol_table.lookup(ident.name) { return match symbol.kind() { // Found another rule with the same name. - SymbolKind::Rule(rule_id) => { - Err(Box::new(CompileError::duplicate_rule( - &self.report_builder, - ident.name.to_string(), - ident.span().into(), - self.rules - .get(rule_id.0 as usize) - .unwrap() - .ident_ref - .clone(), - ))) - } + SymbolKind::Rule(rule_id) => Err(DuplicateRule::build( + &self.report_builder, + ident.name.to_string(), + ident.span().into(), + self.rules + .get(rule_id.0 as usize) + .unwrap() + .ident_ref + .clone(), + )), // Found another symbol that is not a rule, but has the same // name. - _ => Err(Box::new(CompileError::conflicting_rule_identifier( + _ => Err(ConflictingRuleIdentifier::build( &self.report_builder, ident.name.to_string(), ident.span().into(), - ))), + )), }; } Ok(()) @@ -848,15 +888,15 @@ impl<'a> Compiler<'a> { fn check_for_duplicate_tags( &self, tags: &[Ident], - ) -> Result<(), Box> { + ) -> Result<(), CompileError> { let mut s = HashSet::new(); for tag in tags { if !s.insert(tag.name) { - return Err(Box::new(CompileError::duplicate_tag( + return Err(DuplicateTag::build( &self.report_builder, tag.name.to_string(), tag.span().into(), - ))); + )); } } Ok(()) @@ -922,7 +962,7 @@ impl<'a> Compiler<'a> { } impl<'a> Compiler<'a> { - fn c_rule(&mut self, rule: &ast::Rule) -> Result<(), Box> { + fn c_rule(&mut self, rule: &ast::Rule) -> Result<(), CompileError> { // Check if another rule, module or variable has the same identifier // and return an error in that case. self.check_for_existing_identifier(&rule.identifier)?; @@ -988,7 +1028,7 @@ impl<'a> Compiler<'a> { namespace_id: self.current_namespace.id, namespace_ident_id: self.current_namespace.ident_id, ident_id: self.ident_pool.get_or_intern(rule.identifier.name), - ident_ref: SourceRef::new( + ident_ref: CodeLoc::new( self.report_builder.current_source_id(), rule.identifier.span(), ), @@ -1017,7 +1057,7 @@ impl<'a> Compiler<'a> { if let Err(err) = patterns_from_ast(&mut ctx, rule.patterns.as_ref()) { drop(ctx); self.restore_snapshot(snapshot); - return Err(Box::new(*err)); + return Err(err); }; // Convert the rule condition's AST to the intermediate representation @@ -1031,26 +1071,23 @@ impl<'a> Compiler<'a> { // entering this function. Also, if the error is due to an unknown // identifier, but the identifier is one of the unsupported modules, // the error is tolerated and a warning is issued instead. - let mut condition = match condition.map_err(|err| *err) { + let mut condition = match condition { Ok(condition) => condition, - Err(CompileError::UnknownIdentifier { - identifier, - span: identifier_ref, - .. - }) if self.ignored_modules.contains(&identifier) - || self.ignored_rules.contains_key(&identifier) => + Err(CompileError::UnknownIdentifier(unknown)) + if self.ignored_rules.contains_key(unknown.identifier()) + || self.ignored_modules.contains(unknown.identifier()) => { self.restore_snapshot(snapshot); - if let Some(module_name) = self.ignored_rules.get(&identifier) + if let Some(module_name) = + self.ignored_rules.get(unknown.identifier()) { self.warnings.add(|| { - Warning::ignored_rule( + warnings::IgnoredRule::build( &self.report_builder, - rule.identifier.name.to_string(), - identifier, module_name.clone(), - identifier_ref, + rule.identifier.name.to_string(), + unknown.identifier_location().clone(), ) }); self.ignored_rules.insert( @@ -1059,25 +1096,27 @@ impl<'a> Compiler<'a> { ); } else { self.warnings.add(|| { - Warning::ignored_module( + warnings::IgnoredModule::build( &self.report_builder, - identifier.clone(), - identifier_ref, + unknown.identifier().to_string(), + unknown.identifier_location().clone(), Some(format!( "the whole rule `{}` will be ignored", rule.identifier.name )), ) }); - self.ignored_rules - .insert(rule.identifier.name.to_string(), identifier); + self.ignored_rules.insert( + rule.identifier.name.to_string(), + unknown.identifier().to_string(), + ); } return Ok(()); } Err(err) => { self.restore_snapshot(snapshot); - return Err(Box::new(err)); + return Err(err); } }; @@ -1094,7 +1133,7 @@ impl<'a> Compiler<'a> { condition.type_value().cast_to_bool().try_as_bool() { self.warnings.add(|| { - Warning::invariant_boolean_expression( + warnings::InvariantBooleanExpression::build( &self.report_builder, value, rule.condition.span().into(), @@ -1133,11 +1172,11 @@ impl<'a> Compiler<'a> { // Raise error is some pattern was not used, except if the pattern // identifier starts with underscore. if !pattern.in_use() && !pattern.identifier().starts_with("$_") { - return Err(Box::new(CompileError::unused_pattern( + return Err(UnusedPattern::build( &self.report_builder, pattern.identifier().name.to_string(), pattern.identifier().span().into(), - ))); + )); } // Check if this pattern has been declared before, in this rule or @@ -1224,7 +1263,7 @@ impl<'a> Compiler<'a> { Ok(()) } - fn c_import(&mut self, import: &Import) -> Result<(), Box> { + fn c_import(&mut self, import: &Import) -> Result<(), CompileError> { let module_name = import.module_name; let module = BUILTIN_MODULES.get(module_name); @@ -1235,7 +1274,7 @@ impl<'a> Compiler<'a> { // only a warning. return if self.ignored_modules.iter().any(|m| m == module_name) { self.warnings.add(|| { - Warning::ignored_module( + warnings::IgnoredModule::build( &self.report_builder, module_name.to_string(), import.span().into(), @@ -1246,11 +1285,11 @@ impl<'a> Compiler<'a> { } else { // The module does not exist, and is not explicitly added to // the list of unsupported modules, that's an error. - Err(Box::new(CompileError::unknown_module( + Err(UnknownModule::build( &self.report_builder, module_name.to_string(), import.span().into(), - ))) + )) }; } @@ -1502,7 +1541,7 @@ impl<'a> Compiler<'a> { pattern: RegexpPattern, anchored_at: Option, span: Span, - ) -> Result<(), Box> { + ) -> Result<(), CompileError> { // Try splitting the regexp into multiple chained sub-patterns if it // contains large gaps. For example, `{ 01 02 03 [-] 04 05 06 }` is // split into `{ 01 02 03 }` and `{ 04 05 06 }`, where `{ 04 05 06 }` @@ -1578,7 +1617,7 @@ impl<'a> Compiler<'a> { hir: re::hir::Hir, anchored_at: Option, flags: PatternFlagSet, - ) -> Result<(), Box> { + ) -> Result<(), CompileError> { let ascii = flags.contains(PatternFlags::Ascii); let wide = flags.contains(PatternFlags::Wide); let case_insensitive = flags.contains(PatternFlags::Nocase); @@ -1670,7 +1709,7 @@ impl<'a> Compiler<'a> { trailing: &[ChainedPattern], flags: PatternFlagSet, span: Span, - ) -> Result<(), Box> { + ) -> Result<(), CompileError> { let ascii = flags.contains(PatternFlags::Ascii); let wide = flags.contains(PatternFlags::Wide); let case_insensitive = flags.contains(PatternFlags::Nocase); @@ -1816,7 +1855,7 @@ impl<'a> Compiler<'a> { &mut self, hir: &re::hir::Hir, span: Span, - ) -> Result<(Vec, bool), Box> { + ) -> Result<(Vec, bool), CompileError> { // When the `fast-regexp` feature is enabled, try to compile the regexp // for `FastVM` first, if it fails with `Error::FastIncompatible`, the // regexp is not compatible for `FastVM` and `PikeVM` must be used @@ -1839,22 +1878,22 @@ impl<'a> Compiler<'a> { ); let mut atoms = result.map_err(|err| match err { - re::Error::TooLarge => Box::new(CompileError::invalid_regexp( + re::Error::TooLarge => InvalidRegexp::build( &self.report_builder, "regexp is too large".to_string(), (&span).into(), None, - )), + ), _ => unreachable!(), })?; if matches!(hir.minimum_len(), Some(0)) { - return Err(Box::new(CompileError::invalid_regexp( + return Err(InvalidRegexp::build( &self.report_builder, "this regexp can match empty strings".to_string(), (&span).into(), None, - ))); + )); } let mut slow_pattern = false; @@ -1867,13 +1906,16 @@ impl<'a> Compiler<'a> { if slow_pattern { if self.error_on_slow_pattern { - return Err(Box::new(CompileError::slow_pattern( + return Err(errors::SlowPattern::build( &self.report_builder, span.into(), - ))); + )); } else { self.warnings.add(|| { - Warning::slow_pattern(&self.report_builder, span.into()) + warnings::SlowPattern::build( + &self.report_builder, + span.into(), + ) }); } } @@ -2269,3 +2311,94 @@ struct Snapshot { sub_patterns_len: usize, symbol_table_len: usize, } + +/// Error returned by [`Compiler::switch_warning`] when the warning +/// code is not valid. +#[derive(Error, Debug, Eq, PartialEq)] +#[error("`{0}` is not a valid warning code")] +pub struct InvalidWarningCode(String); + +/// Represents a list of warnings. +/// +/// This is a wrapper around a `Vec` that contains additional logic +/// for limiting the number of warnings stored in the vector and silencing some +/// warnings types. +pub(crate) struct Warnings { + warnings: Vec, + max_warnings: usize, + disabled_warnings: HashSet, +} + +impl Default for Warnings { + fn default() -> Self { + Self { + warnings: Vec::new(), + max_warnings: 100, + disabled_warnings: HashSet::default(), + } + } +} + +impl Warnings { + /// Adds the warning returned by `f` to the list. + /// + /// If the maximum number of warnings has been reached the warning is not + /// added. + #[inline] + pub fn add(&mut self, f: impl FnOnce() -> Warning) { + if self.warnings.len() < self.max_warnings { + let warning = f(); + if !self.disabled_warnings.contains(warning.code()) { + self.warnings.push(warning); + } + } + } + + /// Returns true if the given code is a valid warning code. + pub fn is_valid_code(code: &str) -> bool { + Warning::all_codes().iter().any(|c| *c == code) + } + + /// Enables or disables a specific warning identified by `code`. + /// + /// Returns `true` if the warning was previously enabled, or `false` if + /// otherwise. Returns an error if the code doesn't correspond to any + /// of the existing warnings. + #[inline] + pub fn switch_warning( + &mut self, + code: &str, + enabled: bool, + ) -> Result { + if !Self::is_valid_code(code) { + return Err(InvalidWarningCode(code.to_string())); + } + if enabled { + Ok(!self.disabled_warnings.remove(code)) + } else { + Ok(self.disabled_warnings.insert(code.to_string())) + } + } + + /// Enable or disables all warnings. + pub fn switch_all_warnings(&mut self, enabled: bool) { + if enabled { + self.disabled_warnings.clear(); + } else { + for c in Warning::all_codes() { + self.disabled_warnings.insert(c.to_string()); + } + } + } + + #[inline] + pub fn as_slice(&self) -> &[Warning] { + self.warnings.as_slice() + } +} + +impl From for Vec { + fn from(value: Warnings) -> Self { + value.warnings + } +} diff --git a/lib/src/compiler/report.rs b/lib/src/compiler/report.rs index 852f38a4a..61e119c1f 100644 --- a/lib/src/compiler/report.rs +++ b/lib/src/compiler/report.rs @@ -1,7 +1,10 @@ +use serde::ser::SerializeStruct; +use serde::{Serialize, Serializer}; use std::borrow::Cow; -use std::cell::{Cell, RefCell}; +use std::cell::Cell; use std::collections::HashMap; -use std::fmt::Debug; +use std::fmt::{Debug, Display, Formatter}; +use std::sync::{Arc, RwLock, RwLockReadGuard, RwLockWriteGuard}; use yara_x_parser::Span; @@ -15,7 +18,7 @@ pub type Level = annotate_snippets::Level; #[derive(Hash, Eq, PartialEq, Clone, Copy, Debug, Default)] pub struct SourceId(u32); -/// A `SourceRef` points to a fragment of source code. +/// A `CodeLoc` points to a fragment of source code. /// /// It consists of a [`SourceId`] and a [`Span`], where the former identifies /// the source file, and the latter a span of text within that source file. @@ -23,31 +26,213 @@ pub struct SourceId(u32); /// The [`SourceId`] is optional, if it is [`None`] it means that the [`Span`] /// is relative to the current source file. #[derive(PartialEq, Debug, Clone, Eq, Default)] -pub struct SourceRef { +pub struct CodeLoc { source_id: Option, span: Span, } -impl SourceRef { +impl CodeLoc { pub(crate) fn new(source_id: Option, span: Span) -> Self { Self { source_id, span } } + + /// Returns the span within the source code. + #[inline] + pub fn span(&self) -> &Span { + &self.span + } } -impl From<&Span> for SourceRef { - /// Creates a [`SourceRef`] from a reference to a [`Span`]. +impl From<&Span> for CodeLoc { + /// Creates a [`CodeLoc`] from a reference to a [`Span`]. fn from(span: &Span) -> Self { Self { source_id: None, span: span.clone() } } } -impl From for SourceRef { - /// Creates a [`SourceRef`] from a [`Span`]. +impl From for CodeLoc { + /// Creates a [`CodeLoc`] from a [`Span`]. fn from(span: Span) -> Self { Self { source_id: None, span } } } +/// Represents an error or warning report. +/// +/// This structure represents the message displayed to the user when an error +/// or warning occurs. It implements the [`Display`] trait, ensuring that when +/// printed, it reflects the standard error format used by YARA-X. For example: +/// +/// ```text +/// error[E006]: unexpected negative number +/// --> line:6:12 +/// | +/// 6 | $a in (-1..0) +/// | ^^ this number can not be negative +/// | +/// ``` +/// +/// In addition to generating the report, this type provides access to the +/// individual components of the report, which include: +/// +/// - `level`: Indicates the severity, either `Level::Error` or `Level::Warning`. +/// - `code`: A unique code that identifies the specific error or warning +/// (e.g., "E006"). +/// - `title`: The title of the report (e.g., "unexpected negative number"). +/// - `labels`: A collection of labels included in the report. Each label +/// contains a level, a span, and associated text. +#[derive(Clone)] +pub(crate) struct Report { + code_cache: Arc, + default_source_id: SourceId, + with_colors: bool, + level: Level, + code: &'static str, + title: String, + labels: Vec<(Level, CodeLoc, String)>, + note: Option, +} + +impl Report { + /// Returns the report's title. + #[inline] + pub(crate) fn title(&self) -> &str { + self.title.as_str() + } + + /// Returns the report's labels. + pub(crate) fn labels(&self) -> impl Iterator { + self.labels.iter().map(|(level, code_loc, text)| { + let source_id = + code_loc.source_id.unwrap_or(self.default_source_id); + + let code_cache = self.code_cache.read(); + let code_origin = + code_cache.get(&source_id).unwrap().origin.clone(); + + let level = match level { + Level::Error => "error", + Level::Warning => "warning", + Level::Info => "info", + Level::Note => "note", + Level::Help => "help", + }; + + Label { level, code_origin, span: code_loc.span.clone(), text } + }) + } + + /// Returns the report's note. + #[inline] + pub(crate) fn note(&self) -> Option<&str> { + self.note.as_deref() + } +} + +impl Serialize for Report { + fn serialize(&self, serializer: S) -> Result + where + S: Serializer, + { + let mut s = serializer.serialize_struct("report", 4)?; + s.serialize_field("code", &self.code)?; + s.serialize_field("title", &self.title)?; + s.serialize_field("labels", &self.labels().collect::>())?; + s.serialize_field("note", &self.note)?; + s.serialize_field("text", &self.to_string())?; + s.end() + } +} + +impl PartialEq for Report { + fn eq(&self, other: &Self) -> bool { + self.level.eq(&other.level) + && self.code.eq(other.code) + && self.title.eq(&other.title) + && self.labels.eq(&other.labels) + && self.note.eq(&other.note) + } +} + +impl Eq for Report {} + +impl Debug for Report { + fn fmt(&self, f: &mut Formatter<'_>) -> std::fmt::Result { + write!(f, "{}", self) + } +} + +impl Display for Report { + fn fmt(&self, f: &mut Formatter<'_>) -> std::fmt::Result { + // Use the SourceId indicated by the first label, or the one + // corresponding to the current source file (i.e: the most + // recently registered). + let source_id = self + .labels + .first() + .and_then(|label| label.1.source_id) + .unwrap_or(self.default_source_id); + + let code_cache = self.code_cache.read(); + let mut cache_entry = code_cache.get(&source_id).unwrap(); + let mut src = cache_entry.code.as_str(); + + let mut message = self.level.title(self.title.as_str()).id(self.code); + let mut snippet = annotate_snippets::Snippet::source(src) + .origin(cache_entry.origin.as_deref().unwrap_or("line")) + .fold(true); + + for (level, label_ref, label) in &self.labels { + let label_source_id = + label_ref.source_id.unwrap_or(self.default_source_id); + + // If the current label doesn't belong to the same source file + // finish the current snippet, add it to the error message and + // start a new snippet for the label's source file. + if label_source_id != source_id { + cache_entry = code_cache.get(&label_source_id).unwrap(); + src = cache_entry.code.as_str(); + message = message.snippet(snippet); + snippet = annotate_snippets::Snippet::source(src) + .origin(cache_entry.origin.as_deref().unwrap_or("line")) + .fold(true) + } + + let span_start = label_ref.span.start(); + let span_end = label_ref.span.end(); + + snippet = snippet.annotation( + level.span(span_start..span_end).label(label.as_str()), + ); + } + + message = message.snippet(snippet); + + if let Some(note) = &self.note { + message = message.footer(Level::Note.title(note.as_str())); + } + + let renderer = if self.with_colors { + annotate_snippets::Renderer::styled() + } else { + annotate_snippets::Renderer::plain() + }; + + let text = renderer.render(message); + + write!(f, "{}", text) + } +} + +/// Represents a label in an error or warning report. +#[derive(Serialize)] +pub struct Label<'a> { + level: &'a str, + code_origin: Option, + span: Span, + text: &'a str, +} + /// Builds error and warning reports. /// /// `ReportBuilder` helps to create error and warning reports. It stores a copy @@ -60,18 +245,34 @@ pub struct ReportBuilder { with_colors: bool, current_source_id: Cell>, next_source_id: Cell, - // RefCell allows getting a mutable reference to the cache, even if we have - // an immutable reference to the report builder. - cache: RefCell, + code_cache: Arc, } /// A cache containing source files registered in a [`ReportBuilder`]. -struct Cache { - data: HashMap, +struct CodeCache { + data: RwLock>, } -/// Each of the entries stored in [`Cache`]. -struct CacheEntry { +impl CodeCache { + fn new() -> Self { + Self { data: RwLock::new(HashMap::new()) } + } + + pub fn read( + &self, + ) -> RwLockReadGuard<'_, HashMap> { + self.data.read().unwrap() + } + + pub fn write( + &self, + ) -> RwLockWriteGuard<'_, HashMap> { + self.data.write().unwrap() + } +} + +/// Each of the entries stored in [`CodeCache`]. +struct CodeCacheEntry { code: String, origin: Option, } @@ -89,7 +290,7 @@ impl ReportBuilder { with_colors: false, current_source_id: Cell::new(None), next_source_id: Cell::new(SourceId(0)), - cache: RefCell::new(Cache { data: HashMap::new() }), + code_cache: Arc::new(CodeCache::new()), } } @@ -115,17 +316,17 @@ impl ReportBuilder { /// replaces the invalid characters with the UTF-8 replacement character. pub fn register_source(&self, src: &SourceCode) -> &Self { let source_id = self.next_source_id.get(); + self.next_source_id.set(SourceId(source_id.0 + 1)); self.current_source_id.set(Some(source_id)); - let map = &mut self.cache.borrow_mut().data; - map.entry(source_id).or_insert_with(|| { + self.code_cache.write().entry(source_id).or_insert_with(|| { let s = if let Some(s) = src.valid { Cow::Borrowed(s) } else { String::from_utf8_lossy(src.raw.as_ref()) }; - CacheEntry { + CodeCacheEntry { // Replace tab characters with a single space. This doesn't // affect code spans, because the number of characters remain // the same, but prevents error messages from being wrongly @@ -134,18 +335,19 @@ impl ReportBuilder { origin: src.origin.clone(), } }); + self } /// Returns the fragment of source code indicated by `source_ref`. - pub fn get_snippet(&self, source_ref: &SourceRef) -> String { + pub fn get_snippet(&self, source_ref: &CodeLoc) -> String { let source_id = source_ref .source_id .or_else(|| self.current_source_id()) .expect("create_report without registering any source code"); - let cache = self.cache.borrow(); - let cache_entry = cache.data.get(&source_id).unwrap(); + let code_cache = self.code_cache.read(); + let cache_entry = code_cache.get(&source_id).unwrap(); let src = cache_entry.code.as_str(); src[source_ref.span.range()].to_string() @@ -157,70 +359,23 @@ impl ReportBuilder { level: Level, code: &'static str, title: String, - labels: Vec<(SourceRef, String, Level)>, + labels: Vec<(Level, CodeLoc, String)>, note: Option, - ) -> String { + ) -> Report { // Make sure there's at least one label. assert!(!labels.is_empty()); - // Use the SourceId indicated by the first label, or the one - // corresponding to the current source file (i.e: the most - // recently registered). - let source_id = labels - .first() - .and_then(|label| label.0.source_id) - .or_else(|| self.current_source_id()) - .expect("create_report without registering any source code"); - - let cache = self.cache.borrow(); - let mut cache_entry = cache.data.get(&source_id).unwrap(); - let mut src = cache_entry.code.as_str(); - - let mut message = level.title(title.as_str()).id(code); - let mut snippet = annotate_snippets::Snippet::source(src) - .origin(cache_entry.origin.as_deref().unwrap_or("line")) - .fold(true); - - for (label_ref, label, level) in &labels { - let label_source_id = label_ref - .source_id - .or_else(|| self.current_source_id()) - .unwrap(); - - // If the current label doesn't belong to the same source file - // finish the current snippet, add it to the error message and - // start a new snippet for the label's source file. - if label_source_id != source_id { - cache_entry = cache.data.get(&label_source_id).unwrap(); - src = cache_entry.code.as_str(); - message = message.snippet(snippet); - snippet = annotate_snippets::Snippet::source(src) - .origin(cache_entry.origin.as_deref().unwrap_or("line")) - .fold(true) - } - - let span_start = label_ref.span.start(); - let span_end = label_ref.span.end(); - - snippet = snippet.annotation( - level.span(span_start..span_end).label(label.as_str()), - ); - } - - message = message.snippet(snippet); - - if let Some(note) = ¬e { - message = message.footer(Level::Note.title(note.as_str())); + Report { + code_cache: self.code_cache.clone(), + with_colors: self.with_colors, + default_source_id: self.current_source_id().expect( + "`create_report` called without registering any source", + ), + level, + code, + title, + labels, + note, } - - let renderer = if self.with_colors { - annotate_snippets::Renderer::styled() - } else { - annotate_snippets::Renderer::plain() - }; - - let message = renderer.render(message); - - message.to_string() } } diff --git a/lib/src/compiler/rules.rs b/lib/src/compiler/rules.rs index b8dccb8cb..e6d8cd0c4 100644 --- a/lib/src/compiler/rules.rs +++ b/lib/src/compiler/rules.rs @@ -11,7 +11,8 @@ use regex_automata::meta::Regex; use serde::{Deserialize, Deserializer, Serialize, Serializer}; use crate::compiler::atoms::Atom; -use crate::compiler::report::SourceRef; +use crate::compiler::errors::SerializationError; +use crate::compiler::report::CodeLoc; use crate::compiler::warnings::Warning; use crate::compiler::{ IdentId, Imports, LiteralId, NamespaceId, PatternId, RegexpId, RuleId, @@ -19,7 +20,7 @@ use crate::compiler::{ }; use crate::re::{BckCodeLoc, FwdCodeLoc, RegexpAtom}; use crate::string_pool::{BStringPool, StringPool}; -use crate::{re, types, SerializationError}; +use crate::{re, types}; /// A set of YARA rules in compiled form. /// @@ -478,7 +479,7 @@ pub(crate) struct RuleInfo { /// is used only during the compilation phase, but not during the scan /// phase. #[serde(skip)] - pub(crate) ident_ref: SourceRef, + pub(crate) ident_ref: CodeLoc, /// Metadata associated to the rule. pub(crate) metadata: Vec<(IdentId, MetaValue)>, /// Vector with all the patterns defined by this rule. diff --git a/lib/src/compiler/tests/mod.rs b/lib/src/compiler/tests/mod.rs index d8c79036e..16ff5854f 100644 --- a/lib/src/compiler/tests/mod.rs +++ b/lib/src/compiler/tests/mod.rs @@ -4,11 +4,10 @@ use std::fs; use std::io::Write; use std::mem::size_of; -use crate::compiler::{ - SerializationError, SubPattern, Var, VarStack, VariableError, -}; +use crate::compiler::{SubPattern, Var, VarStack}; +use crate::errors::{SerializationError, VariableError}; use crate::types::Type; -use crate::{compile, Compiler, Error, Rules, Scanner}; +use crate::{compile, Compiler, Rules, Scanner, SourceCode}; #[test] fn serialization() { @@ -122,9 +121,7 @@ fn globals() { assert_eq!( compiler.define_global("#invalid", true).err().unwrap(), - Error::VariableError(VariableError::InvalidIdentifier( - "#invalid".to_string() - )) + VariableError::InvalidIdentifier("#invalid".to_string()) ); let mut compiler = Compiler::new(); @@ -136,7 +133,7 @@ fn globals() { .define_global("a", false) .err() .unwrap(), - Error::VariableError(VariableError::AlreadyExists("a".to_string())) + VariableError::AlreadyExists("a".to_string()) ); let mut compiler = Compiler::new(); @@ -464,28 +461,28 @@ fn globals_json() { Compiler::new() .define_global("invalid_array", json!([1, "foo", 3])) .unwrap_err(), - Error::VariableError(VariableError::InvalidArray) + VariableError::InvalidArray ); assert_eq!( Compiler::new() .define_global("invalid_array", json!([1, [2, 3], 4])) .unwrap_err(), - Error::VariableError(VariableError::InvalidArray) + VariableError::InvalidArray ); assert_eq!( Compiler::new() .define_global("invalid_array", json!([1, null])) .unwrap_err(), - Error::VariableError(VariableError::InvalidArray) + VariableError::InvalidArray ); assert_eq!( Compiler::new() .define_global("invalid_array", json!({ "foo": null })) .unwrap_err(), - Error::VariableError(VariableError::UnexpectedNull) + VariableError::UnexpectedNull ); } @@ -671,13 +668,13 @@ fn errors_2() { "error[E012]: duplicate rule `foo` --> line:1:6 | -1 | rule foo : first {condition: true} - | --- note: `foo` declared here for the first time +1 | rule foo : second {condition: true} + | ^^^ duplicate declaration of `foo` | ::: line:1:6 | -1 | rule foo : second {condition: true} - | ^^^ duplicate declaration of `foo` +1 | rule foo : first {condition: true} + | --- note: `foo` declared here for the first time |" ); @@ -723,6 +720,42 @@ fn utf8_errors() { ); } +#[test] +fn errors_serialization() { + let err = Compiler::new() + .add_source( + SourceCode::from("rule test {condition: foo}") + .with_origin("test.yar"), + ) + .err() + .unwrap(); + + let json_error = serde_json::to_string(&err).unwrap(); + + let expected = json!({ + "type": "UnknownIdentifier", + "code": "E009", + "title": "unknown identifier `foo`", + "labels":[ + { + "level": "error", + "code_origin": "test.yar", + "span": { "start": 22, "end": 25 }, + "text": "this identifier has not been declared" + } + ], + "note": null, + "text": r#"error[E009]: unknown identifier `foo` + --> test.yar:1:23 + | +1 | rule test {condition: foo} + | ^^^ this identifier has not been declared + |"# + }); + + assert_eq!(json_error, expected.to_string()); +} + #[test] fn test_errors() { let mut mint = goldenfile::Mint::new("."); diff --git a/lib/src/compiler/tests/testdata/errors/1.out b/lib/src/compiler/tests/testdata/errors/1.out index 3d9536104..19ebd92b9 100644 --- a/lib/src/compiler/tests/testdata/errors/1.out +++ b/lib/src/compiler/tests/testdata/errors/1.out @@ -1,5 +1,5 @@ error[E012]: duplicate rule `test` - --> line:1:6 + --> line:5:6 | 1 | rule test { | ---- note: `test` declared here for the first time diff --git a/lib/src/compiler/tests/testdata/errors/115.out b/lib/src/compiler/tests/testdata/errors/115.out index 52f62fc10..28622be8f 100644 --- a/lib/src/compiler/tests/testdata/errors/115.out +++ b/lib/src/compiler/tests/testdata/errors/115.out @@ -1,4 +1,4 @@ -error[E117]: invalid pattern modifier +error[E033]: invalid pattern modifier --> line:3:20 | 3 | $a = { 01 02 } nocase diff --git a/lib/src/compiler/tests/testdata/errors/46.out b/lib/src/compiler/tests/testdata/errors/46.out index e07c0a140..a8ce46ff5 100644 --- a/lib/src/compiler/tests/testdata/errors/46.out +++ b/lib/src/compiler/tests/testdata/errors/46.out @@ -1,4 +1,4 @@ -error[E017]: `entrypoint` is unsupported` +error[E017]: `entrypoint` is unsupported --> line:3:5 | 3 | entrypoint == 0x1000 diff --git a/lib/src/compiler/warnings.rs b/lib/src/compiler/warnings.rs index 618c3852e..835297e03 100644 --- a/lib/src/compiler/warnings.rs +++ b/lib/src/compiler/warnings.rs @@ -1,213 +1,394 @@ -use std::collections::HashSet; +#![cfg_attr(any(), rustfmt::skip)] + use std::fmt::{Debug, Display, Formatter}; +use serde::Serialize; use thiserror::Error; -use yara_x_macros::Error as DeriveError; +use yara_x_macros::ErrorEnum; +use yara_x_macros::ErrorStruct; -use crate::compiler::report::Level; -use crate::compiler::report::{ReportBuilder, SourceRef}; +use crate::compiler::report::{Level, Report, ReportBuilder, CodeLoc, Label}; /// A warning raised while compiling YARA rules. -#[rustfmt::skip] #[allow(missing_docs)] -#[derive(DeriveError)] +#[non_exhaustive] +#[derive(ErrorEnum, Error, PartialEq, Eq)] +#[derive(Serialize)] +#[serde(tag = "type")] pub enum Warning { - #[warning("consecutive_jumps", "consecutive jumps in hex pattern `{pattern_ident}`")] - #[label_warn("these consecutive jumps will be treated as {coalesced_jump}", jumps_span)] - ConsecutiveJumps { - detailed_report: String, - pattern_ident: String, - coalesced_jump: String, - jumps_span: SourceRef , - }, - - #[warning("unsatisfiable_expr", "potentially unsatisfiable expression")] - #[label_warn("this implies that multiple patterns must match", quantifier_span)] - #[label_warn("but they must match at the same offset", at_span)] - PotentiallyUnsatisfiableExpression { - detailed_report: String, - quantifier_span: SourceRef, - at_span: SourceRef, - }, - - #[warning("invariant_expr", "invariant boolean expression")] - #[label_warn("this expression is always {value}", span)] - #[note(note)] - InvariantBooleanExpression { - detailed_report: String, - value: bool, - span: SourceRef, - note: Option, - }, - - #[warning("non_bool_expr", "non-boolean expression used as boolean")] - #[label_warn("this expression is `{expression_type}` but is being used as `bool`", span)] - #[note(note)] - NonBooleanAsBoolean { - detailed_report: String, - expression_type: String, - span: SourceRef, - note: Option, - }, - - #[warning("bool_int_comparison", "comparison between boolean and integer")] - #[label_warn("this comparison can be replaced with: `{replacement}`", span)] - BooleanIntegerComparison { - detailed_report: String, - span: SourceRef, - replacement: String, - }, - - #[warning("duplicate_import", "duplicate import statement")] - #[label_warn( - "duplicate import", - new_import_span - )] - #[label_note( - "`{module_name}` imported here for the first time", - existing_import_span - )] - DuplicateImport { - detailed_report: String, - module_name: String, - new_import_span: SourceRef, - existing_import_span: SourceRef, - }, - - #[warning("redundant_modifier", "redundant case-insensitive modifier")] - #[label_warn("the `i` suffix indicates that the pattern is case-insensitive", i_span)] - #[label_warn("the `nocase` modifier does the same", nocase_span)] - RedundantCaseModifier { - detailed_report: String, - nocase_span: SourceRef, - i_span: SourceRef, - }, - - #[warning("slow_pattern", "slow pattern")] - #[label_warn("this pattern may slow down the scan", span)] - SlowPattern { - detailed_report: String, - span: SourceRef, - }, - - #[warning("unsupported_module", "module `{module_name}` is not supported")] - #[label_warn("module `{module_name}` used here", span)] - #[note(note)] - IgnoredModule { - detailed_report: String, - module_name: String, - span: SourceRef, - note: Option, - }, - - #[warning( - "ignored_rule", - "rule `{ignored_rule}` will be ignored due to an indirect dependency on module `{module_name}`" - )] - #[label_warn("this other rule depends on module `{module_name}`, which is unsupported", span)] - IgnoredRule { - detailed_report: String, - ignored_rule: String, - dependency: String, - module_name: String, - span: SourceRef, - }, + ConsecutiveJumps(Box), + PotentiallyUnsatisfiableExpression(Box), + InvariantBooleanExpression(Box), + NonBooleanAsBoolean(Box), + BooleanIntegerComparison(Box), + DuplicateImport(Box), + RedundantCaseModifier(Box), + SlowPattern(Box), + IgnoredModule(Box), + IgnoredRule(Box), } -/// Error returned by [`Warnings::switch_warning`] when the warning code is -/// not valid. -#[derive(Error, Debug, Eq, PartialEq)] -#[error("`{0}` is not a valid warning code")] -pub struct InvalidWarningCode(String); - -/// Represents a list of warnings. -pub struct Warnings { - warnings: Vec, - max_warnings: usize, - disabled_warnings: HashSet, +/// A hex pattern contains two or more consecutive jumps. +/// +/// For instance, in `{01 02 [0-2] [1-3] 03 04 }` the jumps `[0-2]` and `[1-3]` +/// appear one after the other. Consecutive jumps are useless, and they can be +/// folded into a single one. In this case they can be replaced by `[1-5]`. +/// +/// ## Example +/// +/// ```text +/// warning[consecutive_jumps]: consecutive jumps in hex pattern `$a` +/// --> line:3:18 +/// | +/// 3 | $a = { 0F 84 [4] [0-7] 8D } +/// | --------- these consecutive jumps will be treated as [4-11] +/// | +/// ``` +#[derive(ErrorStruct, Debug, PartialEq, Eq)] +#[associated_enum(Warning)] +#[warning( + code = "consecutive_jumps", + title = "consecutive jumps in hex pattern `{pattern_ident}`", +)] +#[label( + "these consecutive jumps will be treated as {coalesced_jump}", + coalesced_jump_loc +)] +pub struct ConsecutiveJumps { + report: Report, + pattern_ident: String, + coalesced_jump: String, + coalesced_jump_loc: CodeLoc, } -impl Default for Warnings { - fn default() -> Self { - Self { - warnings: Vec::new(), - max_warnings: 100, - disabled_warnings: HashSet::default(), - } +impl ConsecutiveJumps { + /// Identifier of the pattern containing the consecutive jumps. + #[inline] + pub fn pattern(&self) -> &str { + self.pattern_ident.as_str() } } -impl Warnings { - #[inline] - pub fn is_empty(&self) -> bool { - self.warnings.is_empty() - } +/// A boolean expression may be impossible to match. +/// +/// For instance, the condition `2 of ($a, $b) at 0` is impossible +/// to match, unless that both `$a` and `$b` are the same pattern, +/// or one is a prefix of the other. In most cases this expression +/// is unsatisfiable because two different matches can match at the +/// same file offset. +/// +/// ## Example +/// +/// ```text +/// warning[unsatisfiable_expr]: potentially unsatisfiable expression +/// --> line:6:5 +/// | +/// 6 | 2 of ($*) at 0 +/// | - this implies that multiple patterns must match +/// | ---- but they must match at the same offset +/// | +/// ``` +#[derive(ErrorStruct, Debug, PartialEq, Eq)] +#[associated_enum(Warning)] +#[warning( + code = "unsatisfiable_expr", + title = "potentially unsatisfiable expression" +)] +#[label( + "this implies that multiple patterns must match", + quantifier_loc +)] +#[label( + "but they must match at the same offset", + at_loc +)] +pub struct PotentiallyUnsatisfiableExpression { + report: Report, + quantifier_loc: CodeLoc, + at_loc: CodeLoc, +} - #[inline] - pub fn len(&self) -> usize { - self.warnings.len() - } - #[inline] - pub fn add(&mut self, f: impl FnOnce() -> Warning) { - if self.warnings.len() < self.max_warnings { - let warning = f(); - if !self.disabled_warnings.contains(warning.code()) { - self.warnings.push(warning); - } - } - } +/// A boolean expression always has the same value. +/// +/// This warning indicates that some boolean expression is always true or false, +/// regardless of the data being scanned. +/// +/// ## Example +/// +/// ```text +/// warning[invariant_expr]: invariant boolean expression +/// --> line:6:5 +/// | +/// 6 | 3 of them +/// | --------- this expression is always false +/// | +/// = note: the expression requires 3 matching patterns out of 2 +/// ``` +#[derive(ErrorStruct, Debug, PartialEq, Eq)] +#[associated_enum(Warning)] +#[warning( + code = "invariant_expr", + title = "invariant boolean expression" +)] +#[label( + "this expression is always {expr_value}", + expr_loc +)] +#[note(note)] +pub struct InvariantBooleanExpression { + report: Report, + expr_value: bool, + expr_loc: CodeLoc, + note: Option, +} - /// Enables or disables a specific warning identified by `code`. - /// - /// Returns `true` if the warning was previously enabled, or `false` if - /// otherwise. Returns an error if the code doesn't correspond to any - /// of the existing warnings. - #[inline] - pub fn switch_warning( - &mut self, - code: &str, - enabled: bool, - ) -> Result { - if !Warning::is_valid_code(code) { - return Err(InvalidWarningCode(code.to_string())); - } - if enabled { - Ok(!self.disabled_warnings.remove(code)) - } else { - Ok(self.disabled_warnings.insert(code.to_string())) - } - } +/// A non-boolean expression is being used as a boolean. +/// +/// ## Example +/// +/// ```text +/// warning[non_bool_expr]: non-boolean expression used as boolean +/// --> line:3:14 +/// | +/// 3 | condition: 2 and 3 +/// | - this expression is `integer` but is being used as `bool` +/// | +/// = note: non-zero integers are considered `true`, while zero is `false` +/// ``` +#[derive(ErrorStruct, Debug, PartialEq, Eq)] +#[associated_enum(Warning)] +#[warning( + code = "non_bool_expr", + title = "non-boolean expression used as boolean" +)] +#[label( + "this expression is `{expr_type}` but is being used as `bool`", + expr_loc +)] +#[note(note)] +pub struct NonBooleanAsBoolean { + report: Report, + expr_type: String, + expr_loc: CodeLoc, + note: Option, +} - /// Enable or disables all warnings. - pub fn switch_all_warnings(&mut self, enabled: bool) { - if enabled { - self.disabled_warnings.clear(); - } else { - for c in Warning::all_codes() { - self.disabled_warnings.insert(c.to_string()); - } - } - } +/// Comparison between boolean and integer. +/// +/// This warning indicates that some expression is a comparison between +/// boolean and integer values. +/// +/// ## Example +/// +/// ```text +/// warning[bool_int_comparison]: comparison between boolean and integer +/// --> line:4:13 +/// | +/// 4 | condition: test_proto2.array_bool[0] == 1 +/// | ------------------------------ this comparison can be replaced with: `test_proto2.array_bool[0]` +/// | +/// ``` +#[derive(ErrorStruct, Debug, PartialEq, Eq)] +#[associated_enum(Warning)] +#[warning( + code = "bool_int_comparison", + title = "comparison between boolean and integer" +)] +#[label( + "this comparison can be replaced with: `{replacement}`", + expr_loc +)] +pub struct BooleanIntegerComparison { + report: Report, + replacement: String, + expr_loc: CodeLoc, +} - #[inline] - pub fn as_slice(&self) -> &[Warning] { - self.warnings.as_slice() - } +/// Duplicate import statement. +/// +/// This warning indicates that some module has been imported multiple times. +/// +/// ## Example +/// +/// ```text +/// warning[duplicate_import]: duplicate import statement +/// --> line:1:21 +/// | +/// 1 | import "test_proto2" +/// | -------------------- note: `test_proto2` imported here for the first time +/// 2 | import "test_proto2" +/// | -------------------- duplicate import +/// | +/// ``` +#[derive(ErrorStruct, Debug, PartialEq, Eq)] +#[associated_enum(Warning)] +#[warning( + code = "duplicate_import", + title = "duplicate import statement" +)] +#[label( + "duplicate import", + new_import_loc +)] +#[label( + "`{module_name}` imported here for the first time", + existing_import_loc, + Level::Note +)] +pub struct DuplicateImport { + report: Report, + module_name: String, + new_import_loc: CodeLoc, + existing_import_loc: CodeLoc, +} - pub fn append(&mut self, mut warnings: Self) { - for w in warnings.warnings.drain(0..) { - if self.warnings.len() == self.max_warnings { - break; - } - self.warnings.push(w) - } - } + +/// Redundant case-insensitive modifier for a regular expression. +/// +/// A regular expression can be made case-insensitive in two ways: by using the +/// `nocase` modifier or by appending the `i` suffix to the pattern. Both +/// methods achieve the same result, making it redundant to use them +/// simultaneously. +/// +/// For example, the following patterns are equivalent: +/// +/// ```text +/// $re = /some regexp/i +/// $re = /some regexp/ nocase +/// ``` +/// +/// ## Example +/// +/// ```text +/// warning[redundant_modifier]: redundant case-insensitive modifier +/// --> line:3:15 +/// | +/// 3 | $a = /foo/i nocase +/// | - the `i` suffix indicates that the pattern is case-insensitive +/// | ------ the `nocase` modifier does the same +/// | +/// ``` +#[derive(ErrorStruct, Debug, PartialEq, Eq)] +#[associated_enum(Warning)] +#[warning( + code = "redundant_modifier", + title = "redundant case-insensitive modifier" +)] +#[label( + "the `i` suffix indicates that the pattern is case-insensitive", + i_loc +)] +#[label( + "the `nocase` modifier does the same", + nocase_loc +)] +pub struct RedundantCaseModifier { + report: Report, + nocase_loc: CodeLoc, + i_loc: CodeLoc, } -impl From for Vec { - fn from(value: Warnings) -> Self { - value.warnings - } +/// Some pattern may be potentially slow. +/// +/// This warning indicates that a pattern may be very slow to match, and can +/// degrade rule's the performance. In most cases this is caused by patterns +/// that doesn't contain any large fixed sub-pattern that be used for speeding +/// up the scan. For example, `{00 [1-10] 01}` is very slow because the only +/// fixed sub-patterns (`00` and `01`) are only one byte long. +/// +/// ## Example +/// +/// ```text +/// warning[slow_pattern]: slow pattern +/// --> line:3:5 +/// | +/// 3 | $a = {00 [1-10] 01} +/// | ------------------ this pattern may slow down the scan +/// | +/// ``` +#[derive(ErrorStruct, Debug, PartialEq, Eq)] +#[associated_enum(Warning)] +#[warning( + code = "slow_pattern", + title = "slow pattern" +)] +#[label( + "this pattern may slow down the scan", + pattern_loc +)] +pub struct SlowPattern { + report: Report, + pattern_loc: CodeLoc, } + +/// An unsupported module has been used. +/// +/// If you use [`crate::Compiler::ignore_module`] for telling the compiler +/// that some module is not supported, the compiler will raise this warning +/// when the module is used in some of your rules. +/// +/// ## Example +/// +/// ```text +/// warning[unsupported_module]: module `magic` is not supported +/// --> line:4:5 +/// | +/// 4 | magic.type() +/// | ----- module `magic` used here +/// | +/// = note: the whole rule `foo` will be ignored +/// ``` +#[derive(ErrorStruct, Debug, PartialEq, Eq)] +#[associated_enum(Warning)] +#[warning( + code = "unsupported_module", + title = "module `{module_name}` is not supported" +)] +#[label( + "module `{module_name}` used here", + module_name_loc +)] +#[note(note)] +pub struct IgnoredModule { + report: Report, + module_name: String, + module_name_loc: CodeLoc, + note: Option, +} + +/// A rule indirectly depends on some unsupported module. +/// +/// If you use [`crate::Compiler::ignore_module`] for telling the compiler +/// that some module is not supported, the compiler will raise this warning +/// when a rule `A` uses some rule `B` that uses the module. +/// +/// ## Example +/// +/// ```text +/// warning[ignored_rule]: rule `foo` will be ignored due to an indirect dependency on module `magic` +/// --> line:9:5 +/// | +/// 9 | bar +/// | --- this other rule depends on module `magic`, which is unsupported +/// | +/// ``` +#[derive(ErrorStruct, Debug, PartialEq, Eq)] +#[associated_enum(Warning)] +#[warning( + code = "ignored_rule", + title = "rule `{ignored_rule}` will be ignored due to an indirect dependency on module `{module_name}`" +)] +#[label( + "this other rule depends on module `{module_name}`, which is unsupported", + ignored_rule_loc +)] +pub struct IgnoredRule { + report: Report, + module_name: String, + ignored_rule: String, + ignored_rule_loc: CodeLoc, +} + + diff --git a/lib/src/lib.rs b/lib/src/lib.rs index 36573f149..01a752ec3 100644 --- a/lib/src/lib.rs +++ b/lib/src/lib.rs @@ -44,14 +44,11 @@ assert_eq!(results.matching_rules().len(), 1); #![deny(missing_docs)] pub use compiler::compile; -pub use compiler::CompileError; pub use compiler::Compiler; -pub use compiler::Error; pub use compiler::Rules; -pub use compiler::SerializationError; pub use compiler::SourceCode; -pub use compiler::Warning; +pub use modules::mods; pub use scanner::Match; pub use scanner::Matches; pub use scanner::MatchingRules; @@ -62,14 +59,9 @@ pub use scanner::NonMatchingRules; pub use scanner::Pattern; pub use scanner::Patterns; pub use scanner::Rule; -pub use scanner::ScanError; pub use scanner::ScanResults; pub use scanner::Scanner; - -pub use modules::mods; - pub use variables::Variable; -pub use variables::VariableError; mod compiler; mod modules; @@ -84,6 +76,22 @@ mod wasm; #[cfg(test)] mod tests; +pub mod errors { + //! Errors returned by this crate. + //! + //! This module contains the definitions for all error types returned by this + //! crate. + pub use crate::compiler::errors::*; + pub use crate::compiler::InvalidWarningCode; + pub use crate::scanner::ScanError; + pub use crate::variables::VariableError; +} + +pub mod warnings { + //! Warnings returned while compiling rules. + pub use crate::compiler::warnings::*; +} + mod utils { /// Tries to match `target` as the enum variant `pat`. Returns the /// inner value contained in the variant, or panics if `target` does diff --git a/lib/src/scanner/context.rs b/lib/src/scanner/context.rs index 6bfa05ff5..ed4b8e3bb 100644 --- a/lib/src/scanner/context.rs +++ b/lib/src/scanner/context.rs @@ -30,10 +30,10 @@ use crate::re::fast::FastVM; use crate::re::thompson::PikeVM; use crate::re::Action; use crate::scanner::matches::{Match, PatternMatches, UnconfirmedMatch}; +use crate::scanner::ScanError; use crate::scanner::HEARTBEAT_COUNTER; use crate::types::{Array, Map, Struct}; use crate::wasm::MATCHING_RULES_BITMAP_BASE; -use crate::ScanError; /// Structure that holds information about the current scan. pub(crate) struct ScanContext<'r> { diff --git a/lib/src/scanner/mod.rs b/lib/src/scanner/mod.rs index b3f9c1e50..fe4313dd2 100644 --- a/lib/src/scanner/mod.rs +++ b/lib/src/scanner/mod.rs @@ -48,7 +48,7 @@ mod matches; #[cfg(test)] mod tests; -/// Error returned by [`Scanner::scan`] and [`Scanner::scan_file`]. +/// Error returned when a scan operation fails. #[derive(Error, Debug)] pub enum ScanError { /// The scan was aborted after the timeout period. diff --git a/lib/src/variables.rs b/lib/src/variables.rs index e56c62f86..904b888c6 100644 --- a/lib/src/variables.rs +++ b/lib/src/variables.rs @@ -22,7 +22,7 @@ use crate::types::{Array, TypeValue, Value}; /// implement [`Into`]. pub struct Variable(TypeValue); -/// Errors returned while defining or setting variables. +/// Error returned while defining or setting variables. #[derive(Error, Debug, Eq, PartialEq)] pub enum VariableError { /// The variable has not being defined. Before calling diff --git a/lib/src/wasm/mod.rs b/lib/src/wasm/mod.rs index 0ae5934bb..83ed643bc 100644 --- a/lib/src/wasm/mod.rs +++ b/lib/src/wasm/mod.rs @@ -94,12 +94,11 @@ use yara_x_macros::wasm_export; use crate::compiler::{LiteralId, PatternId, RegexpId, RuleId}; use crate::modules::BUILTIN_MODULES; -use crate::scanner::{RuntimeObjectHandle, ScanContext}; +use crate::scanner::{RuntimeObjectHandle, ScanContext, ScanError}; use crate::types::{ Array, Func, FuncSignature, Map, Struct, TypeValue, Value, }; use crate::wasm::string::RuntimeString; -use crate::ScanError; pub(crate) mod builder; pub(crate) mod string; diff --git a/macros/Cargo.toml b/macros/Cargo.toml index eb826d51f..194a3a2e3 100644 --- a/macros/Cargo.toml +++ b/macros/Cargo.toml @@ -15,5 +15,4 @@ proc-macro = true darling = "0.20.10" syn = { version = "2.0.74", features = ["full", "derive", "parsing", "visit"] } quote = "1.0" -proc-macro2 = "1.0.86" -convert_case = "0.6.0" +proc-macro2 = "1.0.86" \ No newline at end of file diff --git a/macros/src/error.rs b/macros/src/error.rs index e8723f82e..9e44728af 100644 --- a/macros/src/error.rs +++ b/macros/src/error.rs @@ -1,375 +1,346 @@ extern crate proc_macro; -use convert_case::{Case, Casing}; -use proc_macro2::{Span, TokenStream}; -use quote::{quote, TokenStreamExt}; +use proc_macro2::TokenStream; +use quote::quote; use syn::parse::{Parse, ParseStream}; -use syn::punctuated::Punctuated; +use syn::spanned::Spanned; use syn::token::Comma; -use syn::{ - Attribute, Data, DataEnum, DeriveInput, Error, Expr, Fields, Ident, - LitStr, Result, Variant, -}; +use syn::{Data, DeriveInput, Error, Expr, Field, Ident, LitStr, Result}; -pub(crate) fn impl_error_macro(input: DeriveInput) -> Result { - let name = &input.ident; +/// Describes a label in an error/warning message. +#[derive(Debug)] +struct Label { + label_fmt: LitStr, + label_ref: Ident, + level: Option, +} - let (codes, variants, funcs) = match &input.data { - Data::Struct(_) | Data::Union(_) => { - return Err(Error::new( - name.span(), - "macros macro Error can be used with only with enum types" +impl Parse for Label { + /// Parses a label with like the one below. + /// + /// ```text + /// #[label("{error_msg}", error_ref, Level::Info)] + /// ``` + /// + /// The last argument is optional, the default value is `Level::Error`. + fn parse(input: ParseStream) -> Result { + let label_fmt: LitStr = input.parse()?; + let _ = input.parse::()?; + let label_ref: Ident = input.parse()?; + let mut level = None; + if input.peek(Comma) { + input.parse::()?; + level = Some(input.parse::()?); + } + Ok(Label { label_fmt, label_ref, level }) + } +} + +pub(crate) fn impl_error_struct_macro( + input: DeriveInput, +) -> Result { + let fields = + match &input.data { + Data::Struct(s) => &s.fields, + Data::Enum(_) | Data::Union(_) => return Err(Error::new( + input.ident.span(), + "macro ErrorStruct can be used with only with struct types" .to_string(), - )) + )), + }; + + let mut level = None; + let mut code = None; + let mut title = None; + let mut note = None; + let mut associated_enum = None; + let mut labels = Vec::new(); + + for attr in input.attrs { + if attr.path().is_ident("doc") { + // `doc` attributes are ignored, they are actually the + // documentation comments added in front of structures. + continue; + } else if attr.path().is_ident("associated_enum") { + associated_enum = Some(attr.parse_args::()?); + } else if attr.path().is_ident("label") { + labels.push(attr.parse_args::