diff --git a/Cargo.lock b/Cargo.lock index f9aef61c1..56da37cf3 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -670,15 +670,6 @@ version = "0.3.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "f7144d30dcf0fafbce74250a3963025d8d52177934239851c917d29f1df280c2" -[[package]] -name = "convert_case" -version = "0.6.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "ec182b0ca2f35d8fc196cf3404988fd8b8c739a4d270ff118a398feb0cbec1ca" -dependencies = [ - "unicode-segmentation", -] - [[package]] name = "core-foundation-sys" version = "0.8.7" @@ -4827,6 +4818,7 @@ name = "yara-x-capi" version = "0.6.0" dependencies = [ "cbindgen", + "serde_json", "yara-x", ] @@ -4879,7 +4871,6 @@ dependencies = [ name = "yara-x-macros" version = "0.6.0" dependencies = [ - "convert_case", "darling", "proc-macro2", "quote", @@ -4906,6 +4897,7 @@ dependencies = [ "rayon", "rowan", "rustc-hash 2.0.0", + "serde", "yansi 1.0.1", ] diff --git a/capi/Cargo.toml b/capi/Cargo.toml index 4a1cf4072..b5f896f8b 100644 --- a/capi/Cargo.toml +++ b/capi/Cargo.toml @@ -22,6 +22,7 @@ name = "yara_x_capi" crate-type = ["staticlib", "cdylib"] [dependencies] +serde_json = { workspace = true } yara-x = { workspace = true } [build-dependencies] diff --git a/capi/include/yara_x.h b/capi/include/yara_x.h index 57c74c499..c4ef3180a 100644 --- a/capi/include/yara_x.h +++ b/capi/include/yara_x.h @@ -315,6 +315,46 @@ enum YRX_RESULT yrx_compiler_define_global_float(struct YRX_COMPILER *compiler, const char *ident, double value); +// Returns the errors encountered during the compilation in JSON format. +// +// In the address indicated by the `buf` pointer, the function will copy a +// `YRX_BUFFER*` pointer. The `YRX_BUFFER` structure represents a buffer +// that contains the JSON representation of the compilation errors. +// +// The JSON consists on an array of objects, each object representing a +// compilation error. The object has the following fields: +// +// * type: A string that describes the type of error. +// * code: Error code (e.g: "E009"). +// * title: Error title (e.g: ""unknown identifier `foo`"). +// * labels: Array of labels. +// * text: The full text of the error report, as shown by the command-line tool. +// +// Here is an example: +// +// ```json +// [ +// { +// "type": "UnknownIdentifier", +// "code": "E009", +// "title": "unknown identifier `foo`", +// "labels": [ +// { +// "level": "error", +// "code_origin": null, +// "span": {"start":25,"end":28}, +// "text": "this identifier has not been declared" +// } +// ], +// "text": "... ..." +// } +// ] +// ``` +// +// The [`YRX_BUFFER`] must be destroyed with [`yrx_buffer_destroy`]. +enum YRX_RESULT yrx_compiler_errors_json(struct YRX_COMPILER *compiler, + struct YRX_BUFFER **buf); + // Builds the source code previously added to the compiler. // // After calling this function the compiler is reset to its initial state, diff --git a/capi/src/compiler.rs b/capi/src/compiler.rs index b62110895..b3254980f 100644 --- a/capi/src/compiler.rs +++ b/capi/src/compiler.rs @@ -1,6 +1,10 @@ -use crate::{LAST_ERROR, YRX_RESULT, YRX_RULES}; -use std::ffi::{c_char, CStr, CString}; +use std::ffi::{c_char, CStr}; use std::mem; +use std::mem::ManuallyDrop; + +use yara_x::errors::{CompileError, SerializationError, VariableError}; + +use crate::{_yrx_set_last_error, YRX_BUFFER, YRX_RESULT, YRX_RULES}; /// A compiler that takes YARA source code and produces compiled rules. pub struct YRX_COMPILER<'a> { @@ -83,11 +87,11 @@ pub unsafe extern "C" fn yrx_compiler_add_source( match compiler.inner.add_source(src.to_bytes()) { Ok(_) => { - LAST_ERROR.set(None); + _yrx_set_last_error::(None); YRX_RESULT::SUCCESS } Err(err) => { - LAST_ERROR.set(Some(CString::new(err.to_string()).unwrap())); + _yrx_set_last_error(Some(err)); YRX_RESULT::SYNTAX_ERROR } } @@ -158,7 +162,7 @@ pub unsafe extern "C" fn yrx_compiler_new_namespace( /// scanning data, however each scanner can change the variable’s initial /// value by calling `yrx_scanner_set_global`. unsafe fn yrx_compiler_define_global< - T: TryInto, + T: TryInto, >( compiler: *mut YRX_COMPILER, ident: *const c_char, @@ -178,11 +182,11 @@ unsafe fn yrx_compiler_define_global< match compiler.inner.define_global(ident, value) { Ok(_) => { - LAST_ERROR.set(None); + _yrx_set_last_error::(None); YRX_RESULT::SUCCESS } Err(err) => { - LAST_ERROR.set(Some(CString::new(err.to_string()).unwrap())); + _yrx_set_last_error(Some(err)); YRX_RESULT::VARIABLE_ERROR } } @@ -234,6 +238,72 @@ pub unsafe extern "C" fn yrx_compiler_define_global_float( yrx_compiler_define_global(compiler, ident, value) } +/// Returns the errors encountered during the compilation in JSON format. +/// +/// In the address indicated by the `buf` pointer, the function will copy a +/// `YRX_BUFFER*` pointer. The `YRX_BUFFER` structure represents a buffer +/// that contains the JSON representation of the compilation errors. +/// +/// The JSON consists on an array of objects, each object representing a +/// compilation error. The object has the following fields: +/// +/// * type: A string that describes the type of error. +/// * code: Error code (e.g: "E009"). +/// * title: Error title (e.g: ""unknown identifier `foo`"). +/// * labels: Array of labels. +/// * text: The full text of the error report, as shown by the command-line tool. +/// +/// Here is an example: +/// +/// ```json +/// [ +/// { +/// "type": "UnknownIdentifier", +/// "code": "E009", +/// "title": "unknown identifier `foo`", +/// "labels": [ +/// { +/// "level": "error", +/// "code_origin": null, +/// "span": {"start":25,"end":28}, +/// "text": "this identifier has not been declared" +/// } +/// ], +/// "text": "... ..." +/// } +/// ] +/// ``` +/// +/// The [`YRX_BUFFER`] must be destroyed with [`yrx_buffer_destroy`]. +#[no_mangle] +pub unsafe extern "C" fn yrx_compiler_errors_json( + compiler: *mut YRX_COMPILER, + buf: &mut *mut YRX_BUFFER, +) -> YRX_RESULT { + let compiler = if let Some(compiler) = compiler.as_mut() { + compiler + } else { + return YRX_RESULT::INVALID_ARGUMENT; + }; + + match serde_json::to_vec(compiler.inner.errors()) { + Ok(json) => { + let json = json.into_boxed_slice(); + let mut json = ManuallyDrop::new(json); + *buf = Box::into_raw(Box::new(YRX_BUFFER { + data: json.as_mut_ptr(), + length: json.len(), + })); + _yrx_set_last_error::(None); + YRX_RESULT::SUCCESS + } + Err(err) => { + _yrx_set_last_error(Some(err)); + YRX_RESULT::SERIALIZATION_ERROR + } + } +} + /// Builds the source code previously added to the compiler. /// /// After calling this function the compiler is reset to its initial state, diff --git a/capi/src/lib.rs b/capi/src/lib.rs index 5d972b247..676361c73 100644 --- a/capi/src/lib.rs +++ b/capi/src/lib.rs @@ -99,19 +99,29 @@ use std::mem::ManuallyDrop; use std::ptr::slice_from_raw_parts_mut; use std::slice; +use yara_x::errors::{CompileError, SerializationError}; + +pub use scanner::*; + mod compiler; mod scanner; #[cfg(test)] mod tests; -pub use scanner::*; - thread_local! { static LAST_ERROR: RefCell> = const { RefCell::new(None) }; } +fn _yrx_set_last_error(err: Option) +where + E: ToString, +{ + LAST_ERROR.set(err.map(|err| CString::new(err.to_string()).unwrap())) +} + /// Error codes returned by functions in this API. +#[derive(PartialEq, Debug)] #[repr(C)] pub enum YRX_RESULT { /// Everything was OK. @@ -318,11 +328,11 @@ pub unsafe extern "C" fn yrx_compile( match yara_x::compile(c_str.to_bytes()) { Ok(r) => { *rules = Box::into_raw(Box::new(YRX_RULES(r))); - LAST_ERROR.set(None); + _yrx_set_last_error::(None); YRX_RESULT::SUCCESS } Err(err) => { - LAST_ERROR.set(Some(CString::new(err.to_string()).unwrap())); + _yrx_set_last_error(Some(err)); YRX_RESULT::SYNTAX_ERROR } } @@ -350,11 +360,11 @@ pub unsafe extern "C" fn yrx_rules_serialize( data: serialized.as_mut_ptr(), length: serialized.len(), })); - LAST_ERROR.set(None); + _yrx_set_last_error::(None); YRX_RESULT::SUCCESS } Err(err) => { - LAST_ERROR.set(Some(CString::new(err.to_string()).unwrap())); + _yrx_set_last_error(Some(err)); YRX_RESULT::SERIALIZATION_ERROR } } @@ -375,11 +385,11 @@ pub unsafe extern "C" fn yrx_rules_deserialize( match yara_x::Rules::deserialize(slice::from_raw_parts(data, len)) { Ok(r) => { *rules = Box::into_raw(Box::new(YRX_RULES(r))); - LAST_ERROR.set(None); + _yrx_set_last_error::(None); YRX_RESULT::SUCCESS } Err(err) => { - LAST_ERROR.set(Some(CString::new(err.to_string()).unwrap())); + _yrx_set_last_error(Some(err)); YRX_RESULT::SERIALIZATION_ERROR } } @@ -408,7 +418,7 @@ pub unsafe extern "C" fn yrx_rule_identifier( if let Some(rule) = rule.as_ref() { *ident = rule.0.identifier().as_ptr(); *len = rule.0.identifier().len(); - LAST_ERROR.set(None); + _yrx_set_last_error::(None); YRX_RESULT::SUCCESS } else { YRX_RESULT::INVALID_ARGUMENT @@ -432,7 +442,7 @@ pub unsafe extern "C" fn yrx_rule_namespace( if let Some(rule) = rule.as_ref() { *ns = rule.0.namespace().as_ptr(); *len = rule.0.namespace().len(); - LAST_ERROR.set(None); + _yrx_set_last_error::(None); YRX_RESULT::SUCCESS } else { YRX_RESULT::INVALID_ARGUMENT @@ -599,9 +609,9 @@ pub unsafe extern "C" fn yrx_buffer_destroy(buf: *mut YRX_BUFFER) { /// the most recent function was successfully. #[no_mangle] pub unsafe extern "C" fn yrx_last_error() -> *const c_char { - LAST_ERROR.with_borrow(|last_error| { - if let Some(last_error) = last_error { - last_error.as_ptr() + LAST_ERROR.with_borrow(|err| { + if let Some(err) = err { + err.as_ptr() } else { std::ptr::null() } diff --git a/capi/src/scanner.rs b/capi/src/scanner.rs index 4c94a5dd7..2947bacc4 100644 --- a/capi/src/scanner.rs +++ b/capi/src/scanner.rs @@ -1,9 +1,10 @@ -use std::ffi::{c_char, CStr, CString}; +use std::ffi::{c_char, CStr}; use std::slice; use std::time::Duration; -use yara_x::ScanError; -use crate::{LAST_ERROR, YRX_RESULT, YRX_RULE, YRX_RULES}; +use yara_x::errors::ScanError; + +use crate::{_yrx_set_last_error, YRX_RESULT, YRX_RULE, YRX_RULES}; /// A scanner that scans data with a set of compiled YARA rules. pub struct YRX_SCANNER<'s> { @@ -77,6 +78,8 @@ pub unsafe extern "C" fn yrx_scanner_scan( data: *const u8, len: usize, ) -> YRX_RESULT { + _yrx_set_last_error::(None); + if scanner.is_null() { return YRX_RESULT::INVALID_ARGUMENT; } @@ -90,11 +93,12 @@ pub unsafe extern "C" fn yrx_scanner_scan( let scan_results = scanner.inner.scan(data); if let Err(err) = scan_results { - LAST_ERROR.set(Some(CString::new(err.to_string()).unwrap())); - return match err { + let result = match err { ScanError::Timeout => YRX_RESULT::SCAN_TIMEOUT, _ => YRX_RESULT::SCAN_ERROR, }; + _yrx_set_last_error(Some(err)); + return result; } let scan_results = scan_results.unwrap(); @@ -106,7 +110,6 @@ pub unsafe extern "C" fn yrx_scanner_scan( } } - LAST_ERROR.set(None); YRX_RESULT::SUCCESS } @@ -195,7 +198,7 @@ pub unsafe extern "C" fn yrx_scanner_set_module_output( let module_name = match CStr::from_ptr(name).to_str() { Ok(name) => name, Err(err) => { - LAST_ERROR.set(Some(CString::new(err.to_string()).unwrap())); + _yrx_set_last_error(Some(err)); return YRX_RESULT::INVALID_UTF8; } }; @@ -209,18 +212,18 @@ pub unsafe extern "C" fn yrx_scanner_set_module_output( match scanner.inner.set_module_output_raw(module_name, data) { Ok(_) => { - LAST_ERROR.set(None); + _yrx_set_last_error::(None); YRX_RESULT::SUCCESS } Err(err) => { - LAST_ERROR.set(Some(CString::new(err.to_string()).unwrap())); + _yrx_set_last_error(Some(err)); YRX_RESULT::SCAN_ERROR } } } unsafe extern "C" fn yrx_scanner_set_global< - T: TryInto, + T: TryInto, >( scanner: *mut YRX_SCANNER, ident: *const c_char, @@ -233,7 +236,7 @@ unsafe extern "C" fn yrx_scanner_set_global< let ident = match CStr::from_ptr(ident).to_str() { Ok(ident) => ident, Err(err) => { - LAST_ERROR.set(Some(CString::new(err.to_string()).unwrap())); + _yrx_set_last_error(Some(err)); return YRX_RESULT::INVALID_UTF8; } }; @@ -242,11 +245,11 @@ unsafe extern "C" fn yrx_scanner_set_global< match scanner.inner.set_global(ident, value) { Ok(_) => { - LAST_ERROR.set(None); + _yrx_set_last_error::(None); YRX_RESULT::SUCCESS } Err(err) => { - LAST_ERROR.set(Some(CString::new(err.to_string()).unwrap())); + _yrx_set_last_error(Some(err)); YRX_RESULT::VARIABLE_ERROR } } @@ -262,7 +265,7 @@ pub unsafe extern "C" fn yrx_scanner_set_global_str( match CStr::from_ptr(value).to_str() { Ok(value) => yrx_scanner_set_global(scanner, ident, value), Err(err) => { - LAST_ERROR.set(Some(CString::new(err.to_string()).unwrap())); + _yrx_set_last_error(Some(err)); YRX_RESULT::INVALID_UTF8 } } diff --git a/capi/src/tests.rs b/capi/src/tests.rs index c020b3035..acc6636fc 100644 --- a/capi/src/tests.rs +++ b/capi/src/tests.rs @@ -12,9 +12,10 @@ use crate::{ yrx_scanner_destroy, yrx_scanner_on_matching_rule, yrx_scanner_scan, yrx_scanner_set_global_bool, yrx_scanner_set_global_float, yrx_scanner_set_global_int, yrx_scanner_set_global_str, - yrx_scanner_set_timeout, YRX_BUFFER, YRX_RULE, + yrx_scanner_set_timeout, YRX_BUFFER, YRX_RESULT, YRX_RULE, }; -use std::ffi::{c_void, CString}; + +use std::ffi::{c_void, CStr, CString}; extern "C" fn callback(rule: *const YRX_RULE, user_data: *mut c_void) { let mut ptr = std::ptr::null(); @@ -45,29 +46,31 @@ fn capi() { let mut compiler = std::ptr::null_mut(); yrx_compiler_create(0, &mut compiler); + // TODO: Use c-string literals cr#"rule test ..."# when we MSRV + // is bumped to 1.77. + // https://doc.rust-lang.org/edition-guide/rust-2021/c-string-literals.html let src = CString::new( - b"rule test {\ - meta: \ - some_int = 1 \ - some_string = \"foo\" \ - some_bytes = \"\\x01\\x00\\x02\" \ - strings: \ - $foo = \"foo\" \ - condition: \ - $foo or ( \ - some_bool and \ - some_str == \"some_str\" and \ - some_int == 1 and \ - some_float == 1.5) \ - }" - .to_vec(), + br#"rule test { + meta: + some_int = 1 + some_string = "foo" + some_bytes = "\x01\x00\x02" + strings: + $foo = "foo" + condition: + $foo or ( + some_bool and + some_str == "some_str" and + some_int == 1 and + some_float == 1.5) + }"#, ) .unwrap(); - let some_bool = CString::new(b"some_bool".to_vec()).unwrap(); - let some_str = CString::new(b"some_str".to_vec()).unwrap(); - let some_int = CString::new(b"some_int".to_vec()).unwrap(); - let some_float = CString::new(b"some_float".to_vec()).unwrap(); + let some_bool = CString::new(b"some_bool").unwrap(); + let some_str = CString::new(b"some_str").unwrap(); + let some_int = CString::new(b"some_int").unwrap(); + let some_float = CString::new(b"some_float").unwrap(); yrx_compiler_define_global_int(compiler, some_int.as_ptr(), 1); yrx_compiler_define_global_float(compiler, some_float.as_ptr(), 1.5); @@ -78,7 +81,7 @@ fn capi() { some_str.as_ptr(), ); - let namespace = CString::new(b"foo".to_vec()).unwrap(); + let namespace = CString::new(b"foo").unwrap(); yrx_compiler_new_namespace(compiler, namespace.as_ptr()); yrx_compiler_add_source(compiler, src.as_ptr()); @@ -135,3 +138,33 @@ fn capi() { yrx_rules_destroy(rules); } } + +#[test] +fn capi_errors() { + unsafe { + let mut compiler = std::ptr::null_mut(); + yrx_compiler_create(0, &mut compiler); + + let src = CString::new(b"rule test { condition: foo }").unwrap(); + + assert_eq!( + yrx_compiler_add_source(compiler, src.as_ptr()), + YRX_RESULT::SYNTAX_ERROR + ); + + assert_eq!( + CStr::from_ptr(yrx_last_error()), + CStr::from_bytes_with_nul( + b"error[E009]: unknown identifier `foo` + --> line:1:24 + | +1 | rule test { condition: foo } + | ^^^ this identifier has not been declared + |\0" + ) + .unwrap() + ); + + yrx_compiler_destroy(compiler); + } +} diff --git a/cli/src/commands/mod.rs b/cli/src/commands/mod.rs index 92d90a599..4664f06bf 100644 --- a/cli/src/commands/mod.rs +++ b/cli/src/commands/mod.rs @@ -22,7 +22,7 @@ use std::fs; use std::io::stdout; use std::path::PathBuf; -use anyhow::{anyhow, Context}; +use anyhow::{anyhow, bail, Context}; use clap::{command, crate_authors, Command}; use crossterm::tty::IsTty; use serde_json::Value; @@ -147,12 +147,10 @@ where .new_namespace(file_path.to_string_lossy().as_ref()); } - let result = compiler.add_source(src); + let _ = compiler.add_source(src); state.file_in_progress = None; - result?; - state.num_compiled_files = state.num_compiled_files.saturating_add(1); @@ -168,16 +166,24 @@ where } } - let rules = compiler.build(); - if let Some(console) = console { console.finalize(&state).unwrap(); } - for warning in rules.warnings() { + for error in compiler.errors() { + eprintln!("{}", error); + } + + for warning in compiler.warnings() { eprintln!("{}", warning); } + if !compiler.errors().is_empty() { + bail!("{} errors found", compiler.errors().len()); + } + + let rules = compiler.build(); + Ok(rules) } diff --git a/cli/src/commands/scan.rs b/cli/src/commands/scan.rs index cd712af05..eb3749c76 100644 --- a/cli/src/commands/scan.rs +++ b/cli/src/commands/scan.rs @@ -14,7 +14,8 @@ use superconsole::style::Stylize; use superconsole::{Component, Line, Lines, Span}; use yansi::Color::{Cyan, Red, Yellow}; use yansi::Paint; -use yara_x::{MetaValue, Rule, Rules, ScanError, ScanResults, Scanner}; +use yara_x::errors::ScanError; +use yara_x::{MetaValue, Rule, Rules, ScanResults, Scanner}; use crate::commands::{ compile_rules, external_var_parser, truncate_with_ellipsis, diff --git a/cli/src/main.rs b/cli/src/main.rs index ab26c0838..ec3d97ba1 100644 --- a/cli/src/main.rs +++ b/cli/src/main.rs @@ -76,26 +76,10 @@ fn main() -> anyhow::Result<()> { }; if let Err(err) = result { - match err.downcast_ref::() { - // Errors produced by the compiler already have colors and start - // with "error:", in such cases the error is printed as is. - Some(yara_x::Error::CompileError(_)) => { - eprintln!("{}", err); - } - // In all other cases imitate the style of compiler errors, so that - // they all look in the same way. - _ => { - if let Some(source) = err.source() { - eprintln!( - "{} {}: {}", - "error:".paint(Red).bold(), - err, - source - ); - } else { - eprintln!("{} {}", "error:".paint(Red).bold(), err); - } - } + if let Some(source) = err.source() { + eprintln!("{} {}: {}", "error:".paint(Red).bold(), err, source); + } else { + eprintln!("{} {}", "error:".paint(Red).bold(), err); } process::exit(1); } diff --git a/go/compiler.go b/go/compiler.go index 2e7e9ee6e..e84344b6d 100644 --- a/go/compiler.go +++ b/go/compiler.go @@ -3,6 +3,7 @@ package yara_x // #include import "C" import ( + "encoding/json" "errors" "fmt" "runtime" @@ -80,6 +81,41 @@ func ErrorOnSlowPattern(yes bool) CompileOption { } } +// CompileError represents each of the errors returned by [Compiler.Errors]. +type CompileError struct { + // Error code (e.g: "E001"). + Code string + // Error title (e.g: "unknown identifier `foo`"). + Title string + // Each of the labels in the error report. + Labels []Label + // The error's full report, as shown by the command-line tool. + Text string +} + +// Label represents a label in a [CompileError]. +type Label struct { + // Label's level (e.g: "error", "warning", "info", "note", "help"). + Level string + CodeOrigin string + // The code span covered by the label. + Span Span + // Text associated to the label. + Text string +} + +// Span represents the starting and ending point of some piece of source +// code. +type Span struct { + Start int + End int +} + +// Error returns the error's full report. +func (c CompileError) Error() string { + return c.Text +} + // Compiler represent a YARA compiler. type Compiler struct { cCompiler *C.YRX_COMPILER @@ -254,6 +290,29 @@ func (c *Compiler) DefineGlobal(ident string, value interface{}) error { return nil } + +// Errors that occurred during the compilation, across multiple calls to +// [Compiler.AddSource]. +func (c *Compiler) Errors() []CompileError { + var buf *C.YRX_BUFFER + if C.yrx_compiler_errors_json(c.cCompiler, &buf) != C.SUCCESS { + panic("yrx_compiler_errors_json failed") + } + + defer C.yrx_buffer_destroy(buf) + runtime.KeepAlive(c) + + jsonErrors := C.GoBytes(unsafe.Pointer(buf.data), C.int(buf.length)) + + var result []CompileError + + if err := json.Unmarshal(jsonErrors, &result); err != nil { + panic(err) + } + + return result +} + // Build creates a [Rules] object containing a compiled version of all the // YARA rules previously added to the compiler. // diff --git a/go/compiler_test.go b/go/compiler_test.go index 826a2581e..c3c931011 100644 --- a/go/compiler_test.go +++ b/go/compiler_test.go @@ -107,10 +107,42 @@ func TestVariables(t *testing.T) { func TestError(t *testing.T) { _, err := Compile("rule test { condition: foo }") - assert.EqualError(t, err, `error[E009]: unknown identifier `+"`foo`"+` + expected := `error[E009]: unknown identifier `+"`foo`"+` --> line:1:24 | 1 | rule test { condition: foo } | ^^^ this identifier has not been declared - |`) + |` + assert.EqualError(t, err, expected) +} + + +func TestErrors(t *testing.T) { + c, err := NewCompiler() + assert.NoError(t, err) + + c.AddSource("rule test_1 { condition: true }") + assert.Equal(t, []CompileError{}, c.Errors()) + + c.AddSource("rule test_2 { condition: foo }") + assert.Equal(t, []CompileError{ + { + Code: "E009", + Title: "unknown identifier `foo`", + Labels: []Label{ + { + Level: "error", + CodeOrigin: "", + Span: Span { Start: 25, End: 28 }, + Text: "this identifier has not been declared", + }, + }, + Text: `error[E009]: unknown identifier `+"`foo`"+` + --> line:1:26 + | +1 | rule test_2 { condition: foo } + | ^^^ this identifier has not been declared + |`, + }, + }, c.Errors()) } diff --git a/lib/Cargo.toml b/lib/Cargo.toml index f5f281d75..567c75843 100644 --- a/lib/Cargo.toml +++ b/lib/Cargo.toml @@ -207,7 +207,7 @@ roxmltree = { workspace = true, optional = true } rsa = { workspace = true, optional = true } smallvec = { workspace = true, features = ["serde"] } serde = { workspace = true, features = ["rc"] } -serde_json = { workspace = true } +serde_json = { workspace = true, features = ["preserve_order"] } thiserror = { workspace = true } tlsh-fixed = { workspace = true, optional = true } uuid = { workspace = true, optional = true, features = ["v4"] } @@ -216,7 +216,7 @@ wasmtime = { workspace = true, features = ["cranelift", "parallel-compilation"] x509-parser = { workspace = true, optional = true } yansi = { workspace = true } yara-x-macros = { workspace = true } -yara-x-parser = { workspace = true } +yara-x-parser = { workspace = true, features = ["serde"] } lingua = { version = "1.6.0", optional = true, default-features = false, features = ["english", "german", "french", "spanish"] } diff --git a/lib/src/compiler/context.rs b/lib/src/compiler/context.rs index 95ef2da84..0c667b94d 100644 --- a/lib/src/compiler/context.rs +++ b/lib/src/compiler/context.rs @@ -4,12 +4,13 @@ use std::rc::Rc; use yara_x_parser::ast::{Ident, WithSpan}; +use crate::compiler::errors::{CompileError, UnknownPattern}; use crate::compiler::ir::PatternIdx; use crate::compiler::report::ReportBuilder; use crate::compiler::{ir, Warnings}; use crate::symbols::{StackedSymbolTable, SymbolLookup}; use crate::types::Type; -use crate::{wasm, CompileError}; +use crate::wasm; /// Structure that contains information and data structures required during the /// current compilation process. @@ -55,8 +56,7 @@ impl<'a, 'src, 'sym> CompileContext<'a, 'src, 'sym> { pub fn get_pattern_mut( &mut self, ident: &Ident, - ) -> Result<(PatternIdx, &mut ir::PatternInRule<'src>), Box> - { + ) -> Result<(PatternIdx, &mut ir::PatternInRule<'src>), CompileError> { // Make sure that identifier starts with `$`, `#`, `@` or `!`. debug_assert!("$#@!".contains( ident @@ -71,11 +71,11 @@ impl<'a, 'src, 'sym> CompileContext<'a, 'src, 'sym> { .find_position(|p| p.identifier().name[1..] == ident.name[1..]) .map(|(pos, pattern)| (PatternIdx::from(pos), pattern)) .ok_or_else(|| { - Box::new(CompileError::unknown_pattern( + UnknownPattern::build( self.report_builder, ident.name.to_string(), ident.span().into(), - )) + ) }) } } diff --git a/lib/src/compiler/errors.rs b/lib/src/compiler/errors.rs index 73af7c136..a713fdc2a 100644 --- a/lib/src/compiler/errors.rs +++ b/lib/src/compiler/errors.rs @@ -1,16 +1,18 @@ +#![cfg_attr(any(), rustfmt::skip)] + use std::fmt::{Debug, Display, Formatter}; use std::io; +use serde::Serialize; use thiserror::Error; -use yara_x_macros::Error as DeriveError; +use yara_x_macros::ErrorEnum; +use yara_x_macros::ErrorStruct; use yara_x_parser::ast; -use crate::compiler::report::{Level, ReportBuilder, SourceRef}; -use crate::compiler::warnings::InvalidWarningCode; -use crate::VariableError; +use crate::compiler::report::{Level, Report, ReportBuilder, CodeLoc, Label}; -/// Errors returned while serializing/deserializing compiled rules. +/// Error returned while serializing/deserializing compiled rules. #[derive(Error, Debug)] pub enum SerializationError { /// The data being deserialized doesn't contain YARA-X serialized rules. @@ -33,326 +35,46 @@ pub enum SerializationError { #[doc(hidden)] pub struct EmitWasmError(#[from] anyhow::Error); -/// Errors returned by the compiler. -#[derive(Error, Debug, Eq, PartialEq)] -#[allow(missing_docs)] -pub enum Error { - #[error(transparent)] - CompileError(#[from] Box), - - #[error(transparent)] - VariableError(#[from] VariableError), - - #[error(transparent)] - InvalidWarningCode(#[from] InvalidWarningCode), -} - -/// An error occurred during the compilation process. -#[derive(DeriveError, Eq, PartialEq)] +/// Error returned when rule compilation fails. #[allow(missing_docs)] #[non_exhaustive] +#[derive(ErrorEnum, Error, Clone, PartialEq, Eq)] +#[derive(Serialize)] +#[serde(tag = "type")] pub enum CompileError { - #[error("E001", "syntax error")] - #[label_error("{error_msg}", error_span)] - SyntaxError { - detailed_report: String, - error_msg: String, - error_span: SourceRef, - }, - - #[error("E002", "wrong type")] - #[label_error( - "expression should be {expected_types}, but is `{actual_type}`", - expression_span - )] - WrongType { - detailed_report: String, - expected_types: String, - actual_type: String, - expression_span: SourceRef, - }, - - #[error("E003", "mismatching types")] - #[label_error("this expression is `{type1}`", type1_span)] - #[label_error("this expression is `{type2}`", type2_span)] - MismatchingTypes { - detailed_report: String, - type1: String, - type2: String, - type1_span: SourceRef, - type2_span: SourceRef, - }, - - #[error("E004", "wrong arguments")] - #[label_error("wrong arguments in this call", args_span)] - #[note(note)] - WrongArguments { - detailed_report: String, - args_span: SourceRef, - note: Option, - }, - - #[error("E005", "assignment mismatch")] - #[label_error("this expects {expected_values} value(s)", error_span)] - #[label_error("this produces {actual_values} value(s)", iterable_span)] - AssignmentMismatch { - detailed_report: String, - expected_values: u8, - actual_values: u8, - iterable_span: SourceRef, - error_span: SourceRef, - }, - - #[error("E006", "unexpected negative number")] - #[label_error("this number can not be negative", span)] - UnexpectedNegativeNumber { detailed_report: String, span: SourceRef }, - - #[error("E007", "number out of range")] - #[label_error( - "this number is out of the allowed range [{min}-{max}]", - span - )] - NumberOutOfRange { - detailed_report: String, - min: i64, - max: i64, - span: SourceRef, - }, - - #[error("E008", "unknown field or method `{identifier}`")] - #[label_error("this field or method doesn't exist", span)] - UnknownField { - detailed_report: String, - identifier: String, - span: SourceRef, - }, - - #[error("E009", "unknown identifier `{identifier}`")] - #[label_error("this identifier has not been declared", span)] - #[note(note)] - UnknownIdentifier { - detailed_report: String, - identifier: String, - span: SourceRef, - note: Option, - }, - - #[error("E010", "unknown module `{identifier}`")] - #[label_error("module `{identifier}` not found", span)] - UnknownModule { - detailed_report: String, - identifier: String, - span: SourceRef, - }, - - #[error("E011", "invalid range")] - #[label_error("{error_msg}", span)] - InvalidRange { - detailed_report: String, - error_msg: String, - span: SourceRef, - }, - - #[error("E012", "duplicate rule `{new_rule}`")] - #[label_note( - "`{new_rule}` declared here for the first time", - existing_rule_span - )] - #[label_error("duplicate declaration of `{new_rule}`", new_rule_span)] - DuplicateRule { - detailed_report: String, - new_rule: String, - new_rule_span: SourceRef, - existing_rule_span: SourceRef, - }, - - #[error("E013", "rule `{ident}` conflicts with an existing identifier")] - #[label_error( - "identifier already in use by a module or global variable", - ident_span - )] - ConflictingRuleIdentifier { - detailed_report: String, - ident: String, - ident_span: SourceRef, - }, - - #[error("E014", "invalid regular expression")] - #[label_error("{error}", span)] - #[note(note)] - InvalidRegexp { - detailed_report: String, - error: String, - span: SourceRef, - note: Option, - }, - - #[error( - "E015", - "mixing greedy and non-greedy quantifiers in regular expression" - )] - #[label_error("this is {quantifier1_greediness}", quantifier1_span)] - #[label_error("this is {quantifier2_greediness}", quantifier2_span)] - MixedGreediness { - detailed_report: String, - quantifier1_greediness: String, - quantifier2_greediness: String, - quantifier1_span: SourceRef, - quantifier2_span: SourceRef, - }, - - #[error("E016", "no matching patterns")] - #[label_error("there's no pattern in this set", span)] - #[note(note)] - EmptyPatternSet { - detailed_report: String, - span: SourceRef, - note: Option, - }, - - #[error("E017", "`entrypoint` is unsupported`")] - #[label_error("the `entrypoint` keyword is not supported anymore", span)] - #[label_help( - "use `pe.entry_point` or `elf.entry_point` or `macho.entry_point`", - span - )] - EntrypointUnsupported { detailed_report: String, span: SourceRef }, - - #[error("E018", "slow pattern")] - #[label_error("this pattern may slow down the scan", span)] - SlowPattern { detailed_report: String, span: SourceRef }, - - #[error("E117", "invalid pattern modifier")] - #[label_error("{error_msg}", error_span)] - InvalidModifier { - detailed_report: String, - error_msg: String, - error_span: SourceRef, - }, - - #[error( - "E019", - "invalid modifier combination: `{modifier1}` `{modifier2}`" - )] - #[label_error("`{modifier1}` modifier used here", modifier1_span)] - #[label_error("`{modifier2}` modifier used here", modifier2_span)] - #[note(note)] - InvalidModifierCombination { - detailed_report: String, - modifier1: String, - modifier2: String, - modifier1_span: SourceRef, - modifier2_span: SourceRef, - note: Option, - }, - - #[error("E020", "duplicate pattern modifier")] - #[label_error("duplicate modifier", modifier_span)] - DuplicateModifier { detailed_report: String, modifier_span: SourceRef }, - - #[error("E021", "duplicate tag `{tag}`")] - #[label_error("duplicate tag", tag_span)] - DuplicateTag { detailed_report: String, tag: String, tag_span: SourceRef }, - - #[error("E022", "unused pattern `{pattern_ident}`")] - #[label_error( - "this pattern was not used in the condition", - pattern_ident_span - )] - UnusedPattern { - detailed_report: String, - pattern_ident: String, - pattern_ident_span: SourceRef, - }, - - #[error("E023", "duplicate pattern `{pattern_ident}`")] - #[label_error( - "duplicate declaration of `{pattern_ident}`", - new_pattern_span - )] - #[label_note( - "`{pattern_ident}` declared here for the first time", - existing_pattern_span - )] - DuplicatePattern { - detailed_report: String, - pattern_ident: String, - new_pattern_span: SourceRef, - existing_pattern_span: SourceRef, - }, - - #[error("E024", "invalid pattern `{pattern_ident}`")] - #[label_error("{error_msg}", error_span)] - #[note(note)] - InvalidPattern { - detailed_report: String, - pattern_ident: String, - error_msg: String, - error_span: SourceRef, - note: Option, - }, - - #[error("E025", "unknown pattern `{pattern_ident}`")] - #[label_error( - "this pattern is not declared in the `strings` section", - pattern_ident_span - )] - UnknownPattern { - detailed_report: String, - pattern_ident: String, - pattern_ident_span: SourceRef, - }, - - #[error("E026", "invalid base64 alphabet")] - #[label_error("{error_msg}", error_span)] - InvalidBase64Alphabet { - detailed_report: String, - error_msg: String, - error_span: SourceRef, - }, - - #[error("E027", "invalid integer")] - #[label_error("{error_msg}", error_span)] - InvalidInteger { - detailed_report: String, - error_msg: String, - error_span: SourceRef, - }, - - #[error("E028", "invalid float")] - #[label_error("{error_msg}", error_span)] - InvalidFloat { - detailed_report: String, - error_msg: String, - error_span: SourceRef, - }, - - #[error("E029", "invalid escape sequence")] - #[label_error("{error_msg}", error_span)] - InvalidEscapeSequence { - detailed_report: String, - error_msg: String, - error_span: SourceRef, - }, - - #[error("E030", "invalid regexp modifier `{modifier}`")] - #[label_error("invalid modifier", error_span)] - InvalidRegexpModifier { - detailed_report: String, - modifier: String, - error_span: SourceRef, - }, - - #[error("E031", "unexpected escape sequence")] - #[label_error( - "escape sequences are not allowed in this string", - error_span - )] - UnexpectedEscapeSequence { detailed_report: String, error_span: SourceRef }, - - #[error("E032", "invalid UTF-8")] - #[label_error("invalid UTF-8 character", error_span)] - InvalidUTF8 { detailed_report: String, error_span: SourceRef }, + AssignmentMismatch(Box), + ConflictingRuleIdentifier(Box), + DuplicateModifier(Box), + DuplicatePattern(Box), + DuplicateRule(Box), + DuplicateTag(Box), + EmptyPatternSet(Box), + EntrypointUnsupported(Box), + InvalidBase64Alphabet(Box), + InvalidEscapeSequence(Box), + InvalidFloat(Box), + InvalidInteger(Box), + InvalidModifier(Box), + InvalidModifierCombination(Box), + InvalidPattern(Box), + InvalidRange(Box), + InvalidRegexp(Box), + InvalidRegexpModifier(Box), + InvalidUTF8(Box), + MismatchingTypes(Box), + MixedGreediness(Box), + NumberOutOfRange(Box), + SlowPattern(Box), + SyntaxError(Box), + UnexpectedEscapeSequence(Box), + UnexpectedNegativeNumber(Box), + UnknownField(Box), + UnknownIdentifier(Box), + UnknownModule(Box), + UnknownPattern(Box), + UnusedPattern(Box), + WrongArguments(Box), + WrongType(Box), } impl CompileError { @@ -362,54 +84,37 @@ impl CompileError { ) -> Self { match err { ast::Error::SyntaxError { message, span } => { - CompileError::syntax_error( - report_builder, - message, - span.into(), - ) + SyntaxError::build(report_builder, message, span.into()) } ast::Error::InvalidInteger { message, span } => { - CompileError::invalid_integer( - report_builder, - message, - span.into(), - ) + InvalidInteger::build(report_builder, message, span.into()) } ast::Error::InvalidFloat { message, span } => { - CompileError::invalid_float( - report_builder, - message, - span.into(), - ) + InvalidFloat::build(report_builder, message, span.into()) } ast::Error::InvalidRegexpModifier { message, span } => { - CompileError::invalid_regexp_modifier( + InvalidRegexpModifier::build( report_builder, message, span.into(), ) } ast::Error::InvalidEscapeSequence { message, span } => { - CompileError::invalid_escape_sequence( + InvalidEscapeSequence::build( report_builder, message, span.into(), ) } ast::Error::UnexpectedEscapeSequence(span) => { - CompileError::unexpected_escape_sequence( - report_builder, - span.into(), - ) + UnexpectedEscapeSequence::build(report_builder, span.into()) } ast::Error::InvalidUTF8(span) => { - CompileError::invalid_utf_8(report_builder, span.into()) + InvalidUTF8::build(report_builder, span.into()) } } } -} -impl CompileError { /// Utility function that receives an array of strings and joins them /// together separated by commas and with "or" before the last one. /// For example, if input is `["s1", "s2", "s3"]` the result is: @@ -452,3 +157,433 @@ impl CompileError { } } } + +/// A syntax error was found in the rule. +#[derive(ErrorStruct, Clone, Debug, PartialEq, Eq)] +#[associated_enum(CompileError)] +#[error(code = "E001", title = "syntax error")] +#[label("{error}", error_loc)] +pub struct SyntaxError { + report: Report, + error: String, + error_loc: CodeLoc, +} + +/// Some expression has an unexpected type. +#[derive(ErrorStruct, Clone, Debug, PartialEq, Eq)] +#[associated_enum(CompileError)] +#[error(code = "E002", title = "wrong type")] +#[label( + "expression should be {expected_types}, but is `{actual_type}`", + error_loc +)] +pub struct WrongType { + report: Report, + expected_types: String, + actual_type: String, + error_loc: CodeLoc, +} + +/// Operands have mismatching types. +#[derive(ErrorStruct, Clone, Debug, PartialEq, Eq)] +#[associated_enum(CompileError)] +#[error(code = "E003", title = "mismatching types")] +#[label("this expression is `{type1}`", type1_loc)] +#[label("this expression is `{type2}`", type2_loc)] +pub struct MismatchingTypes { + report: Report, + type1: String, + type2: String, + type1_loc: CodeLoc, + type2_loc: CodeLoc, +} + +/// Wrong arguments when calling a function. +#[derive(ErrorStruct, Clone, Debug, PartialEq, Eq)] +#[associated_enum(CompileError)] +#[error(code = "E004", title = "wrong arguments")] +#[label("wrong arguments in this call", error_loc)] +#[note(note)] +pub struct WrongArguments { + report: Report, + error_loc: CodeLoc, + note: Option, +} + +/// Mismatch between number of variables and number of values in a loop +/// expression. +#[derive(ErrorStruct, Clone, Debug, PartialEq, Eq)] +#[associated_enum(CompileError)] +#[error(code = "E005", title = "assignment mismatch")] +#[label("this expects {expected_values} value(s)", error_loc)] +#[label("this produces {actual_values} value(s)", iterable_loc)] +pub struct AssignmentMismatch { + report: Report, + expected_values: u8, + actual_values: u8, + iterable_loc: CodeLoc, + error_loc: CodeLoc, +} + +/// Negative number used where positive number was expected. +#[derive(ErrorStruct, Clone, Debug, PartialEq, Eq)] +#[associated_enum(CompileError)] +#[error(code = "E006", title = "unexpected negative number")] +#[label("this number can not be negative", error_loc)] +pub struct UnexpectedNegativeNumber { + report: Report, + error_loc: CodeLoc, +} + +/// A number is out of the allowed range. +#[derive(ErrorStruct, Clone, Debug, PartialEq, Eq)] +#[associated_enum(CompileError)] +#[error(code = "E007", title = "number out of range")] +#[label("this number is out of the allowed range [{min}-{max}]", error_loc)] +pub struct NumberOutOfRange { + report: Report, + min: i64, + max: i64, + error_loc: CodeLoc, +} + +/// Unknown field or method name. +#[derive(ErrorStruct, Clone, Debug, PartialEq, Eq)] +#[associated_enum(CompileError)] +#[error(code = "E008", title = "unknown field or method `{identifier}`")] +#[label("this field or method doesn't exist", error_loc)] +pub struct UnknownField { + report: Report, + identifier: String, + error_loc: CodeLoc, +} + +/// Unknown identifier. +#[derive(ErrorStruct, Clone, Debug, PartialEq, Eq)] +#[associated_enum(CompileError)] +#[error(code = "E009", title = "unknown identifier `{identifier}`")] +#[label("this identifier has not been declared", identifier_loc)] +#[note(note)] +pub struct UnknownIdentifier { + report: Report, + identifier: String, + identifier_loc: CodeLoc, + note: Option, +} + +impl UnknownIdentifier { + /// Name of the unknown identifier. + #[inline] + pub fn identifier(&self) -> &str { + self.identifier.as_str() + } + /// Location of the unknown identifier. + pub(crate) fn identifier_location(&self) -> &CodeLoc { + &self.identifier_loc + } +} + +/// Unknown module. +#[derive(ErrorStruct, Clone, Debug, PartialEq, Eq)] +#[associated_enum(CompileError)] +#[error(code = "E010", title = "unknown module `{identifier}`")] +#[label("module `{identifier}` not found", error_loc)] +pub struct UnknownModule { + report: Report, + identifier: String, + error_loc: CodeLoc, +} + +/// Invalid range. +#[derive(ErrorStruct, Clone, Debug, PartialEq, Eq)] +#[associated_enum(CompileError)] +#[error(code = "E011", title = "invalid range")] +#[label("{error}", error_loc)] +pub struct InvalidRange { + report: Report, + error: String, + error_loc: CodeLoc, +} + +/// Two rules have the same name. +#[derive(ErrorStruct, Clone, Debug, PartialEq, Eq)] +#[associated_enum(CompileError)] +#[error(code = "E012", title = "duplicate rule `{new_rule}`")] +#[label( + "duplicate declaration of `{new_rule}`", + duplicate_rule_loc, + Level::Error +)] +#[label( + "`{new_rule}` declared here for the first time", + existing_rule_loc, + Level::Note +)] +pub struct DuplicateRule { + report: Report, + new_rule: String, + duplicate_rule_loc: CodeLoc, + existing_rule_loc: CodeLoc, +} + + +/// A rule has the same name as a module or global variable. +#[derive(ErrorStruct, Clone, Debug, PartialEq, Eq)] +#[associated_enum(CompileError)] +#[error( + code = "E013", + title = "rule `{identifier}` conflicts with an existing identifier" +)] +#[label("identifier already in use by a module or global variable", error_loc)] +pub struct ConflictingRuleIdentifier { + report: Report, + identifier: String, + error_loc: CodeLoc, +} + +/// A regular expression is invalid. +#[derive(ErrorStruct, Clone, Debug, PartialEq, Eq)] +#[associated_enum(CompileError)] +#[error(code = "E014", title = "invalid regular expression")] +#[label("{error}", error_loc)] +#[note(note)] +pub struct InvalidRegexp { + report: Report, + error: String, + error_loc: CodeLoc, + note: Option, +} + +/// A regular expression contains a mixture of greedy and non-greedy quantifiers. +#[derive(ErrorStruct, Clone, Debug, PartialEq, Eq)] +#[associated_enum(CompileError)] +#[error( + code = "E015", + title = "mixing greedy and non-greedy quantifiers in regular expression" +)] +#[label("this is {quantifier1_greediness}", quantifier1_loc)] +#[label("this is {quantifier2_greediness}", quantifier2_loc)] +pub struct MixedGreediness { + report: Report, + quantifier1_greediness: String, + quantifier2_greediness: String, + quantifier1_loc: CodeLoc, + quantifier2_loc: CodeLoc, +} + +/// A set of patterns doesn't contain any patterns. +#[derive(ErrorStruct, Clone, Debug, PartialEq, Eq)] +#[associated_enum(CompileError)] +#[error(code = "E016", title = "no matching patterns")] +#[label("there's no pattern in this set", error_loc)] +#[note(note)] +pub struct EmptyPatternSet { + report: Report, + error_loc: CodeLoc, + note: Option, +} + +/// The `entrypoint` keyword is not supported. +#[derive(ErrorStruct, Clone, Debug, PartialEq, Eq)] +#[associated_enum(CompileError)] +#[error(code = "E017", title = "`entrypoint` is unsupported")] +#[label("the `entrypoint` keyword is not supported anymore", error_loc)] +#[label( + "use `pe.entry_point` or `elf.entry_point` or `macho.entry_point`", + error_loc, + Level::Help +)] +pub struct EntrypointUnsupported { + report: Report, + error_loc: CodeLoc, +} + +/// Some pattern may be potentially slow. +#[derive(ErrorStruct, Clone, Debug, PartialEq, Eq)] +#[associated_enum(CompileError)] +#[error(code = "E018", title = "slow pattern")] +#[label("this pattern may slow down the scan", error_loc)] +pub struct SlowPattern { + report: Report, + error_loc: CodeLoc, +} + +/// A pattern has modifiers that can't be used together. +#[derive(ErrorStruct, Clone, Debug, PartialEq, Eq)] +#[associated_enum(CompileError)] +#[error( + code = "E019", + title = "invalid modifier combination: `{modifier1}` `{modifier2}`" +)] +#[label("`{modifier1}` modifier used here", modifier1_loc)] +#[label("`{modifier2}` modifier used here", modifier2_loc)] +#[note(note)] +pub struct InvalidModifierCombination { + report: Report, + modifier1: String, + modifier2: String, + modifier1_loc: CodeLoc, + modifier2_loc: CodeLoc, + note: Option, +} + +/// A pattern has duplicate modifiers. +#[derive(ErrorStruct, Clone, Debug, PartialEq, Eq)] +#[associated_enum(CompileError)] +#[error(code = "E020", title = "duplicate pattern modifier")] +#[label("duplicate modifier", error_loc)] +pub struct DuplicateModifier { + report: Report, + error_loc: CodeLoc, +} + +/// A rule has duplicate tags. +#[derive(ErrorStruct, Clone, Debug, PartialEq, Eq)] +#[associated_enum(CompileError)] +#[error(code = "E021", title = "duplicate tag `{tag}`")] +#[label("duplicate tag", error_loc)] +pub struct DuplicateTag { + report: Report, + tag: String, + error_loc: CodeLoc, +} + +/// A rule defines a pattern that is not used in the condition. +#[derive(ErrorStruct, Clone, Debug, PartialEq, Eq)] +#[associated_enum(CompileError)] +#[error(code = "E022", title = "unused pattern `{pattern_ident}`")] +#[label("this pattern was not used in the condition", error_loc)] +pub struct UnusedPattern { + report: Report, + pattern_ident: String, + error_loc: CodeLoc, +} + +/// A rule has two patterns with the same identifier. +#[derive(ErrorStruct, Clone, Debug, PartialEq, Eq)] +#[associated_enum(CompileError)] +#[error(code = "E023", title = "duplicate pattern `{pattern_ident}`")] +#[label("duplicate declaration of `{pattern_ident}`", error_loc)] +#[label( + "`{pattern_ident}` declared here for the first time", + note_loc, + Level::Note +)] +pub struct DuplicatePattern { + report: Report, + pattern_ident: String, + error_loc: CodeLoc, + note_loc: CodeLoc, +} + +/// A rule has an invalid pattern. +#[derive(ErrorStruct, Clone, Debug, PartialEq, Eq)] +#[associated_enum(CompileError)] +#[error(code = "E024", title = "invalid pattern `{pattern_ident}`")] +#[label("{error}", error_loc)] +#[note(note)] +pub struct InvalidPattern { + report: Report, + pattern_ident: String, + error: String, + error_loc: CodeLoc, + note: Option, +} + +/// Some rule condition uses a pattern that was not defined. +#[derive(ErrorStruct, Clone, Debug, PartialEq, Eq)] +#[associated_enum(CompileError)] +#[error(code = "E025", title = "unknown pattern `{pattern_ident}`")] +#[label("this pattern is not declared in the `strings` section", error_loc)] +pub struct UnknownPattern { + report: Report, + pattern_ident: String, + error_loc: CodeLoc, +} + +/// Wrong alphabet for the `base64` or `base64wide` modifiers. +#[derive(ErrorStruct, Clone, Debug, PartialEq, Eq)] +#[associated_enum(CompileError)] +#[error(code = "E026", title = "invalid base64 alphabet")] +#[label("{error}", error_loc)] +pub struct InvalidBase64Alphabet { + report: Report, + error: String, + error_loc: CodeLoc, +} + +/// Invalid integer. +#[derive(ErrorStruct, Clone, Debug, PartialEq, Eq)] +#[associated_enum(CompileError)] +#[error(code = "E027", title = "invalid integer")] +#[label("{error}", error_loc)] +pub struct InvalidInteger { + report: Report, + error: String, + error_loc: CodeLoc, +} + +/// Invalid float. +#[derive(ErrorStruct, Clone, Debug, PartialEq, Eq)] +#[associated_enum(CompileError)] +#[error(code = "E028", title = "invalid float")] +#[label("{error}", error_loc)] +pub struct InvalidFloat { + report: Report, + error: String, + error_loc: CodeLoc, +} + +/// A text pattern contains an invalid escape sequence. +#[derive(ErrorStruct, Clone, Debug, PartialEq, Eq)] +#[associated_enum(CompileError)] +#[error(code = "E029", title = "invalid escape sequence")] +#[label("{error}", error_loc)] +pub struct InvalidEscapeSequence { + report: Report, + error: String, + error_loc: CodeLoc, +} + +/// Invalid modifier for a regular expression. +#[derive(ErrorStruct, Clone, Debug, PartialEq, Eq)] +#[associated_enum(CompileError)] +#[error(code = "E030", title = "invalid regexp modifier `{modifier}`")] +#[label("invalid modifier", error_loc)] +pub struct InvalidRegexpModifier { + report: Report, + modifier: String, + error_loc: CodeLoc, +} + +/// A string literal contains escaped sequences and it shouldn't. +#[derive(ErrorStruct, Clone, Debug, PartialEq, Eq)] +#[associated_enum(CompileError)] +#[error(code = "E031", title = "unexpected escape sequence")] +#[label("escape sequences are not allowed in this string", error_loc)] +pub struct UnexpectedEscapeSequence { + report: Report, + error_loc: CodeLoc, +} + + +/// Source code contains invalid UTF-8 characters. +#[derive(ErrorStruct, Clone, Debug, PartialEq, Eq)] +#[associated_enum(CompileError)] +#[error(code = "E032", title = "invalid UTF-8")] +#[label("invalid UTF-8 character", error_loc)] +pub struct InvalidUTF8 { + report: Report, + error_loc: CodeLoc, +} + +/// Some pattern has an invalid modifier. +#[derive(ErrorStruct, Clone, Debug, PartialEq, Eq)] +#[associated_enum(CompileError)] +#[error(code = "E033", title = "invalid pattern modifier")] +#[label("{error}", error_loc)] +pub struct InvalidModifier { + report: Report, + error: String, + error_loc: CodeLoc, +} diff --git a/lib/src/compiler/ir/ast2ir.rs b/lib/src/compiler/ir/ast2ir.rs index 6d84bbda8..41f834955 100644 --- a/lib/src/compiler/ir/ast2ir.rs +++ b/lib/src/compiler/ir/ast2ir.rs @@ -12,6 +12,14 @@ use yara_x_parser::ast; use yara_x_parser::ast::WithSpan; use yara_x_parser::Span; +use crate::compiler::errors::{ + AssignmentMismatch, DuplicateModifier, DuplicatePattern, EmptyPatternSet, + EntrypointUnsupported, InvalidBase64Alphabet, InvalidModifier, + InvalidModifierCombination, InvalidPattern, InvalidRange, InvalidRegexp, + MismatchingTypes, MixedGreediness, NumberOutOfRange, SyntaxError, + UnexpectedNegativeNumber, UnknownField, UnknownIdentifier, WrongArguments, + WrongType, +}; use crate::compiler::ir::hex2hir::hex_pattern_hir_from_ast; use crate::compiler::ir::{ Expr, ForIn, ForOf, FuncCall, Iterable, LiteralPattern, Lookup, @@ -19,8 +27,7 @@ use crate::compiler::ir::{ PatternIdx, PatternInRule, Quantifier, Range, RegexpPattern, }; use crate::compiler::report::ReportBuilder; -use crate::compiler::warnings::Warning; -use crate::compiler::{CompileContext, CompileError}; +use crate::compiler::{warnings, CompileContext, CompileError}; use crate::modules::BUILTIN_MODULES; use crate::re; use crate::re::parser::Error; @@ -30,7 +37,7 @@ use crate::types::{Map, Regexp, Type, TypeValue, Value}; pub(in crate::compiler) fn patterns_from_ast<'src>( ctx: &mut CompileContext<'_, 'src, '_>, patterns: Option<&Vec>>, -) -> Result<(), Box> { +) -> Result<(), CompileError> { for pattern_ast in patterns.into_iter().flatten() { let pattern = pattern_from_ast(ctx, pattern_ast)?; @@ -40,12 +47,12 @@ pub(in crate::compiler) fn patterns_from_ast<'src>( .iter() .find(|p| p.identifier.name == pattern.identifier.name) { - return Err(Box::new(CompileError::duplicate_pattern( + return Err(DuplicatePattern::build( ctx.report_builder, pattern.identifier().name.to_string(), pattern.identifier().span().into(), existing.identifier.span().into(), - ))); + )); } } @@ -57,15 +64,15 @@ pub(in crate::compiler) fn patterns_from_ast<'src>( fn pattern_from_ast<'src>( ctx: &mut CompileContext, pattern: &ast::Pattern<'src>, -) -> Result, Box> { +) -> Result, CompileError> { // Check for duplicate pattern modifiers. let mut modifiers = BTreeSet::new(); for modifier in pattern.modifiers().iter() { if !modifiers.insert(modifier.as_text()) { - return Err(Box::new(CompileError::duplicate_modifier( + return Err(DuplicateModifier::build( ctx.report_builder, modifier.span().into(), - ))); + )); } } @@ -83,7 +90,7 @@ fn pattern_from_ast<'src>( pub(in crate::compiler) fn text_pattern_from_ast<'src>( ctx: &mut CompileContext, pattern: &ast::TextPattern<'src>, -) -> Result, Box> { +) -> Result, CompileError> { let ascii = pattern.modifiers.ascii(); let xor = pattern.modifiers.xor(); let nocase = pattern.modifiers.nocase(); @@ -104,14 +111,14 @@ pub(in crate::compiler) fn text_pattern_from_ast<'src>( for (name1, modifier1, name2, modifier2) in invalid_combinations { if let (Some(modifier1), Some(modifier2)) = (modifier1, modifier2) { - return Err(Box::new(CompileError::invalid_modifier_combination( + return Err(InvalidModifierCombination::build( ctx.report_builder, name1.to_string(), name2.to_string(), modifier1.span().into(), modifier2.span().into(), Some("these two modifiers can't be used together".to_string()), - ))); + )); }; } @@ -136,14 +143,14 @@ pub(in crate::compiler) fn text_pattern_from_ast<'src>( let xor_range = match xor { Some(modifier @ ast::PatternModifier::Xor { start, end, .. }) => { if *end < *start { - return Err(Box::new(CompileError::invalid_range( + return Err(InvalidRange::build( ctx.report_builder, format!( "lower bound ({}) is greater than upper bound ({})", start, end ), modifier.span().into(), - ))); + )); } flags.set(PatternFlags::Xor); Some(*start..=*end) @@ -159,11 +166,11 @@ pub(in crate::compiler) fn text_pattern_from_ast<'src>( let alphabet_str = alphabet.as_str().unwrap(); match base64::alphabet::Alphabet::new(alphabet_str) { Ok(_) => Ok(Some(String::from(alphabet_str))), - Err(err) => Err(Box::new(CompileError::invalid_base_64_alphabet( + Err(err) => Err(InvalidBase64Alphabet::build( ctx.report_builder, err.to_string().to_lowercase(), alphabet.span().into(), - ))), + )), } }; @@ -206,13 +213,13 @@ pub(in crate::compiler) fn text_pattern_from_ast<'src>( let text: BString = pattern.text.value.as_ref().into(); if text.len() < min_len { - return Err(Box::new(CompileError::invalid_pattern( + return Err(InvalidPattern::build( ctx.report_builder, pattern.identifier.name.to_string(), "this pattern is too short".to_string(), pattern.text.span().into(), note, - ))); + )); } Ok(PatternInRule { @@ -232,18 +239,18 @@ pub(in crate::compiler) fn text_pattern_from_ast<'src>( pub(in crate::compiler) fn hex_pattern_from_ast<'src>( ctx: &mut CompileContext, pattern: &ast::HexPattern<'src>, -) -> Result, Box> { +) -> Result, CompileError> { // The only modifier accepted by hex patterns is `private`. for modifier in pattern.modifiers.iter() { match modifier { ast::PatternModifier::Private { .. } => {} _ => { - return Err(Box::new(CompileError::invalid_modifier( + return Err(InvalidModifier::build( ctx.report_builder, "this modifier can't be applied to a hex pattern" .to_string(), modifier.span().into(), - ))); + )); } } } @@ -262,7 +269,7 @@ pub(in crate::compiler) fn hex_pattern_from_ast<'src>( pub(in crate::compiler) fn regexp_pattern_from_ast<'src>( ctx: &mut CompileContext, pattern: &ast::RegexpPattern<'src>, -) -> Result, Box> { +) -> Result, CompileError> { // Regular expressions don't accept `base64`, `base64wide` and `xor` // modifiers. for modifier in pattern.modifiers.iter() { @@ -270,11 +277,11 @@ pub(in crate::compiler) fn regexp_pattern_from_ast<'src>( ast::PatternModifier::Base64 { .. } | ast::PatternModifier::Base64Wide { .. } | ast::PatternModifier::Xor { .. } => { - return Err(Box::new(CompileError::invalid_modifier( + return Err(InvalidModifier::build( ctx.report_builder, "this modifier can't be applied to a regexp".to_string(), modifier.span().into(), - ))); + )); } _ => {} } @@ -310,7 +317,7 @@ pub(in crate::compiler) fn regexp_pattern_from_ast<'src>( let i_pos = pattern.regexp.literal.rfind('i').unwrap(); ctx.warnings.add(|| { - Warning::redundant_case_modifier( + warnings::RedundantCaseModifier::build( ctx.report_builder, pattern.modifiers.nocase().unwrap().span().into(), pattern.regexp.span().subspan(i_pos, i_pos + 1).into(), @@ -371,13 +378,10 @@ pub(in crate::compiler) fn regexp_pattern_from_ast<'src>( pub(in crate::compiler) fn expr_from_ast( ctx: &mut CompileContext, expr: &ast::Expr, -) -> Result> { +) -> Result { match expr { ast::Expr::Entrypoint { span } => { - Err(Box::new(CompileError::entrypoint_unsupported( - ctx.report_builder, - span.into(), - ))) + Err(EntrypointUnsupported::build(ctx.report_builder, span.into())) } ast::Expr::Filesize { .. } => Ok(Expr::Filesize), @@ -489,10 +493,10 @@ pub(in crate::compiler) fn expr_from_ast( if let Some((expr, msg)) = replacement { ctx.warnings.add(|| { - Warning::boolean_integer_comparison( + warnings::BooleanIntegerComparison::build( ctx.report_builder, - span.into(), msg, + span.into(), ) }); Ok(expr) @@ -571,7 +575,7 @@ pub(in crate::compiler) fn expr_from_ast( // If the current symbol table is `None` it means that the // identifier is not a field or method of some structure. return if current_symbol_table.is_none() { - Err(Box::new(CompileError::unknown_identifier( + Err(UnknownIdentifier::build( ctx.report_builder, ident.name.to_string(), ident.span().into(), @@ -586,13 +590,13 @@ pub(in crate::compiler) fn expr_from_ast( } else { None }, - ))) + )) } else { - Err(Box::new(CompileError::unknown_field( + Err(UnknownField::build( ctx.report_builder, ident.name.to_string(), ident.span().into(), - ))) + )) }; } @@ -616,11 +620,11 @@ pub(in crate::compiler) fn expr_from_ast( // If the identifier is just `$`, and we are not inside a // loop, that's an error. if ctx.for_of_depth == 0 { - return Err(Box::new(CompileError::syntax_error( + return Err(SyntaxError::build( ctx.report_builder, "this `$` is outside of the condition of a `for .. of` statement".to_string(), p.identifier.span().into(), - ))); + )); } // If we are inside a loop, we don't know which is the // PatternId because `$` refers to a different pattern on @@ -653,11 +657,11 @@ pub(in crate::compiler) fn expr_from_ast( // If the identifier is just `#`, and we are not inside a loop, // that's an error. if p.ident.name == "#" && ctx.for_of_depth == 0 { - return Err(Box::new(CompileError::syntax_error( + return Err(SyntaxError::build( ctx.report_builder, "this `#` is outside of the condition of a `for .. of` statement".to_string(), p.ident.span().into(), - ))); + )); } match (p.ident.name, &p.range) { // Cases where the identifier is `#`. @@ -695,11 +699,11 @@ pub(in crate::compiler) fn expr_from_ast( // If the identifier is just `@`, and we are not inside a loop, // that's an error. if p.ident.name == "@" && ctx.for_of_depth == 0 { - return Err(Box::new(CompileError::syntax_error( + return Err(SyntaxError::build( ctx.report_builder, "this `@` is outside of the condition of a `for .. of` statement".to_string(), p.ident.span().into(), - ))); + )); } match (p.ident.name, &p.index) { // Cases where the identifier is `@`. @@ -745,11 +749,11 @@ pub(in crate::compiler) fn expr_from_ast( // If the identifier is just `!`, and we are not inside a loop, // that's an error. if p.ident.name == "!" && ctx.for_of_depth == 0 { - return Err(Box::new(CompileError::syntax_error( + return Err(SyntaxError::build( ctx.report_builder, "this `!` is outside of the condition of a `for .. of` statement".to_string(), p.ident.span().into(), - ))); + )); } match (p.ident.name, &p.index) { // Cases where the identifier is `!`. @@ -824,12 +828,12 @@ pub(in crate::compiler) fn expr_from_ast( // The type of the key/index expression should correspond // with the type of the map's keys. if key_ty != ty { - return Err(Box::new(CompileError::wrong_type( + return Err(WrongType::build( ctx.report_builder, format!("`{}`", key_ty), ty.to_string(), expr.index.span().into(), - ))); + )); } Ok(Expr::Lookup(Box::new(Lookup { @@ -838,12 +842,12 @@ pub(in crate::compiler) fn expr_from_ast( index, }))) } - type_value => Err(Box::new(CompileError::wrong_type( + type_value => Err(WrongType::build( ctx.report_builder, format!("`{}` or `{}`", Type::Array, Type::Map), type_value.ty().to_string(), expr.primary.span().into(), - ))), + )), } } } @@ -852,7 +856,7 @@ pub(in crate::compiler) fn expr_from_ast( pub(in crate::compiler) fn bool_expr_from_ast( ctx: &mut CompileContext, ast: &ast::Expr, -) -> Result> { +) -> Result { let expr = expr_from_ast(ctx, ast)?; warn_if_not_bool(ctx, expr.ty(), ast.span()); Ok(expr) @@ -861,7 +865,7 @@ pub(in crate::compiler) fn bool_expr_from_ast( fn of_expr_from_ast( ctx: &mut CompileContext, of: &ast::Of, -) -> Result> { +) -> Result { let quantifier = quantifier_from_ast(ctx, &of.quantifier)?; // Create new stack frame with 5 slots: // 1 slot for the loop variable, a bool in this case. @@ -878,7 +882,7 @@ fn of_expr_from_ast( check_type(ctx, expr.ty(), e.span(), &[Type::Bool])?; Ok(expr) }) - .collect::, Box>>()?; + .collect::, CompileError>>()?; let num_items = tuple.len(); (OfItems::BoolExprTuple(tuple), num_items) @@ -896,7 +900,7 @@ fn of_expr_from_ast( if let Quantifier::Expr(expr) = &quantifier { if let TypeValue::Integer(Value::Const(value)) = expr.type_value() { if value > num_items.try_into().unwrap() { - ctx.warnings.add(|| Warning::invariant_boolean_expression( + ctx.warnings.add(|| warnings::InvariantBooleanExpression::build( ctx.report_builder, false, of.span().into(), @@ -949,7 +953,7 @@ fn of_expr_from_ast( if raise_warning { ctx.warnings.add(|| { - Warning::potentially_unsatisfiable_expression( + warnings::PotentiallyUnsatisfiableExpression::build( ctx.report_builder, of.quantifier.span().into(), of.anchor.as_ref().unwrap().span().into(), @@ -968,7 +972,7 @@ fn of_expr_from_ast( fn for_of_expr_from_ast( ctx: &mut CompileContext, for_of: &ast::ForOf, -) -> Result> { +) -> Result { let quantifier = quantifier_from_ast(ctx, &for_of.quantifier)?; let pattern_set = pattern_set_from_ast(ctx, &for_of.pattern_set)?; // Create new stack frame with 5 slots: @@ -1007,7 +1011,7 @@ fn for_of_expr_from_ast( fn for_in_expr_from_ast( ctx: &mut CompileContext, for_in: &ast::ForIn, -) -> Result> { +) -> Result { let quantifier = quantifier_from_ast(ctx, &for_in.quantifier)?; let iterable = iterable_from_ast(ctx, &for_in.iterable)?; @@ -1050,13 +1054,13 @@ fn for_in_expr_from_ast( if loop_vars.len() != expected_vars.len() { let span = loop_vars.first().unwrap().span(); let span = span.combine(&loop_vars.last().unwrap().span()); - return Err(Box::new(CompileError::assignment_mismatch( + return Err(AssignmentMismatch::build( ctx.report_builder, loop_vars.len() as u8, expected_vars.len() as u8, for_in.iterable.span().into(), span.into(), - ))); + )); } // Create stack frame with capacity for the loop variables, plus 4 @@ -1100,7 +1104,7 @@ fn for_in_expr_from_ast( fn iterable_from_ast( ctx: &mut CompileContext, iter: &ast::Iterable, -) -> Result> { +) -> Result { match iter { ast::Iterable::Range(range) => { Ok(Iterable::Range(range_from_ast(ctx, range)?)) @@ -1132,14 +1136,12 @@ fn iterable_from_ast( // type mismatch. if let Some((prev_ty, prev_span)) = prev { if prev_ty != ty { - return Err(Box::new( - CompileError::mismatching_types( - ctx.report_builder, - prev_ty.to_string(), - ty.to_string(), - prev_span.into(), - span.into(), - ), + return Err(MismatchingTypes::build( + ctx.report_builder, + prev_ty.to_string(), + ty.to_string(), + prev_span.into(), + span.into(), )); } } @@ -1154,7 +1156,7 @@ fn iterable_from_ast( fn anchor_from_ast( ctx: &mut CompileContext, anchor: &Option, -) -> Result> { +) -> Result { match anchor { Some(ast::MatchAnchor::At(at_)) => Ok(MatchAnchor::At(Box::new( non_negative_integer_from_ast(ctx, &at_.expr)?, @@ -1169,7 +1171,7 @@ fn anchor_from_ast( fn range_from_ast( ctx: &mut CompileContext, range: &ast::Range, -) -> Result> { +) -> Result { let lower_bound = Box::new(non_negative_integer_from_ast(ctx, &range.lower_bound)?); @@ -1186,14 +1188,14 @@ fn range_from_ast( ) = (lower_bound.type_value(), upper_bound.type_value()) { if lower_bound > upper_bound { - return Err(Box::new(CompileError::invalid_range( + return Err(InvalidRange::build( ctx.report_builder, format!( "lower bound ({}) is greater than upper bound ({})", lower_bound, upper_bound ), range.span().into(), - ))); + )); } } @@ -1203,7 +1205,7 @@ fn range_from_ast( fn non_negative_integer_from_ast( ctx: &mut CompileContext, expr: &ast::Expr, -) -> Result> { +) -> Result { let span = expr.span(); let expr = expr_from_ast(ctx, expr)?; let type_value = expr.type_value(); @@ -1212,10 +1214,10 @@ fn non_negative_integer_from_ast( if let TypeValue::Integer(Value::Const(value)) = type_value { if value < 0 { - return Err(Box::new(CompileError::unexpected_negative_number( + return Err(UnexpectedNegativeNumber::build( ctx.report_builder, span.into(), - ))); + )); } } @@ -1226,7 +1228,7 @@ fn integer_in_range_from_ast( ctx: &mut CompileContext, expr: &ast::Expr, range: RangeInclusive, -) -> Result> { +) -> Result { let span = expr.span(); let expr = expr_from_ast(ctx, expr)?; let type_value = expr.type_value(); @@ -1237,12 +1239,12 @@ fn integer_in_range_from_ast( // the given range. if let TypeValue::Integer(Value::Const(value)) = type_value { if !range.contains(&value) { - return Err(Box::new(CompileError::number_out_of_range( + return Err(NumberOutOfRange::build( ctx.report_builder, *range.start(), *range.end(), span.into(), - ))); + )); } } @@ -1252,7 +1254,7 @@ fn integer_in_range_from_ast( fn quantifier_from_ast( ctx: &mut CompileContext, quantifier: &ast::Quantifier, -) -> Result> { +) -> Result { match quantifier { ast::Quantifier::None { .. } => Ok(Quantifier::None), ast::Quantifier::All { .. } => Ok(Quantifier::All), @@ -1274,7 +1276,7 @@ fn quantifier_from_ast( fn pattern_set_from_ast( ctx: &mut CompileContext, pattern_set: &ast::PatternSet, -) -> Result, Box> { +) -> Result, CompileError> { let pattern_indexes = match pattern_set { // `x of them` ast::PatternSet::Them { span } => { @@ -1284,11 +1286,11 @@ fn pattern_set_from_ast( .collect(); if pattern_indexes.is_empty() { - return Err(Box::new(CompileError::empty_pattern_set( + return Err(EmptyPatternSet::build( ctx.report_builder, span.into(), Some("this rule doesn't define any patterns".to_string()), - ))); + )); } // Make all the patterns in the set non-anchorable and mark them @@ -1307,7 +1309,7 @@ fn pattern_set_from_ast( .iter() .any(|pattern| item.matches(pattern.identifier())) { - return Err(Box::new(CompileError::empty_pattern_set( + return Err(EmptyPatternSet::build( ctx.report_builder, item.span().into(), Some(if item.wildcard { @@ -1321,7 +1323,7 @@ fn pattern_set_from_ast( item.identifier, ) }), - ))); + )); } } let mut pattern_indexes = Vec::new(); @@ -1347,7 +1349,7 @@ fn pattern_set_from_ast( fn func_call_from_ast( ctx: &mut CompileContext, func_call: &ast::FuncCall, -) -> Result> { +) -> Result { let callable = expr_from_ast(ctx, &func_call.callable)?; let type_value = callable.type_value(); @@ -1362,7 +1364,7 @@ fn func_call_from_ast( .args .iter() .map(|arg| expr_from_ast(ctx, arg)) - .collect::, Box>>()?; + .collect::, CompileError>>()?; let arg_types: Vec = args.iter().map(|arg| arg.ty()).collect(); @@ -1394,7 +1396,7 @@ fn func_call_from_ast( // No matching signature was found, that means that the arguments // provided were incorrect. if matching_signature.is_none() { - return Err(Box::new(CompileError::wrong_arguments( + return Err(WrongArguments::build( ctx.report_builder, func_call.args_span().into(), Some(format!( @@ -1413,7 +1415,7 @@ fn func_call_from_ast( .collect::>() .join("\n") )), - ))); + )); } let (signature_index, type_value) = matching_signature.unwrap(); @@ -1429,7 +1431,7 @@ fn func_call_from_ast( fn matches_expr_from_ast( ctx: &mut CompileContext, expr: &ast::BinaryExpr, -) -> Result> { +) -> Result { let span = expr.span(); let lhs_span = expr.lhs.span(); let rhs_span = expr.rhs.span(); @@ -1454,16 +1456,16 @@ fn check_type( ty: Type, span: Span, accepted_types: &[Type], -) -> Result<(), Box> { +) -> Result<(), CompileError> { if accepted_types.contains(&ty) { Ok(()) } else { - Err(Box::new(CompileError::wrong_type( + Err(WrongType::build( ctx.report_builder, CompileError::join_with_or(accepted_types, true), ty.to_string(), span.into(), - ))) + )) } } @@ -1475,7 +1477,7 @@ fn check_operands( rhs_span: Span, accepted_types: &[Type], compatible_types: &[Type], -) -> Result<(), Box> { +) -> Result<(), CompileError> { // Both types must be known. assert!(!matches!(lhs_ty, Type::Unknown)); assert!(!matches!(rhs_ty, Type::Unknown)); @@ -1495,13 +1497,13 @@ fn check_operands( }; if !types_are_compatible { - return Err(Box::new(CompileError::mismatching_types( + return Err(MismatchingTypes::build( ctx.report_builder, lhs_ty.to_string(), rhs_ty.to_string(), lhs_span.into(), rhs_span.into(), - ))); + )); } Ok(()) @@ -1514,7 +1516,7 @@ fn re_error_to_compile_error( ) -> CompileError { match err { Error::SyntaxError { msg, span, note } => { - CompileError::invalid_regexp( + InvalidRegexp::build( report_builder, msg, // The error span is relative to the start of the regexp, not to @@ -1539,7 +1541,7 @@ fn re_error_to_compile_error( is_greedy_2, span_1, span_2, - } => CompileError::mixed_greediness( + } => MixedGreediness::build( report_builder, if is_greedy_1 { "greedy" } else { "non-greedy" }.to_string(), if is_greedy_2 { "greedy" } else { "non-greedy" }.to_string(), @@ -1580,7 +1582,7 @@ pub(in crate::compiler) fn warn_if_not_bool( ), _ => None, }; - Warning::non_boolean_as_boolean( + warnings::NonBooleanAsBoolean::build( ctx.report_builder, ty.to_string(), span.into(), @@ -1595,7 +1597,7 @@ macro_rules! gen_unary_op { fn $name( ctx: &mut CompileContext, expr: &ast::UnaryExpr, - ) -> Result> { + ) -> Result { let span = expr.span(); let operand = expr_from_ast(ctx, &expr.operand)?; @@ -1607,7 +1609,7 @@ macro_rules! gen_unary_op { )?; let check_fn: - Option Result<(), Box>> + Option Result<(), CompileError>> = $check_fn; if let Some(check_fn) = check_fn { @@ -1630,7 +1632,7 @@ macro_rules! gen_binary_op { fn $name( ctx: &mut CompileContext, expr: &ast::BinaryExpr, - ) -> Result> { + ) -> Result { let span = expr.span(); let lhs_span = expr.lhs.span(); let rhs_span = expr.rhs.span(); @@ -1649,7 +1651,7 @@ macro_rules! gen_binary_op { )?; let check_fn: - Option Result<(), Box>> + Option Result<(), CompileError>> = $check_fn; if let Some(check_fn) = check_fn { @@ -1672,7 +1674,7 @@ macro_rules! gen_string_op { fn $name( ctx: &mut CompileContext, expr: &ast::BinaryExpr, - ) -> Result> { + ) -> Result { let span = expr.span(); let lhs_span = expr.lhs.span(); let rhs_span = expr.rhs.span(); @@ -1706,7 +1708,7 @@ macro_rules! gen_n_ary_operation { fn $name( ctx: &mut CompileContext, expr: &ast::NAryExpr, - ) -> Result> { + ) -> Result { let span = expr.span(); let accepted_types = &[$( $accepted_types ),+]; let compatible_types = &[$( $compatible_types ),+]; @@ -1714,10 +1716,10 @@ macro_rules! gen_n_ary_operation { let operands_hir: Vec = expr .operands() .map(|expr| expr_from_ast(ctx, expr)) - .collect::, Box>>()?; + .collect::, CompileError>>()?; let check_fn: - Option Result<(), Box>> + Option Result<(), CompileError>> = $check_fn; // Make sure that all operands have one of the accepted types. @@ -1748,13 +1750,12 @@ macro_rules! gen_n_ary_operation { }; if !types_are_compatible { - return Err(Box::new(CompileError::mismatching_types( + return Err(MismatchingTypes::build( ctx.report_builder, lhs_ty.to_string(), rhs_ty.to_string(), expr.first().span().combine(&lhs_ast.span()).into(), rhs_ast.span().into(), - ), )); } } @@ -1870,11 +1871,9 @@ gen_binary_op!( Some(|ctx, _lhs, rhs, _lhs_span, rhs_span| { if let TypeValue::Integer(Value::Const(value)) = rhs.type_value() { if value < 0 { - return Err(Box::new( - CompileError::unexpected_negative_number( - ctx.report_builder, - rhs_span.into(), - ), + return Err(UnexpectedNegativeNumber::build( + ctx.report_builder, + rhs_span.into(), )); } } @@ -1890,11 +1889,9 @@ gen_binary_op!( Some(|ctx, _lhs, rhs, _lhs_span, rhs_span| { if let TypeValue::Integer(Value::Const(value)) = rhs.type_value() { if value < 0 { - return Err(Box::new( - CompileError::unexpected_negative_number( - ctx.report_builder, - rhs_span.into(), - ), + return Err(UnexpectedNegativeNumber::build( + ctx.report_builder, + rhs_span.into(), )); } } diff --git a/lib/src/compiler/ir/hex2hir.rs b/lib/src/compiler/ir/hex2hir.rs index 474bc9957..cfd1dcbd9 100644 --- a/lib/src/compiler/ir/hex2hir.rs +++ b/lib/src/compiler/ir/hex2hir.rs @@ -1,18 +1,17 @@ /*! Functions for converting a hex pattern AST into a HIR. */ -use crate::CompileError; use regex_syntax::hir; use yara_x_parser::ast; use yara_x_parser::ast::WithSpan; use crate::compiler::context::CompileContext; -use crate::compiler::warnings::Warning; -use crate::compiler::ByteMaskCombinator; +use crate::compiler::errors::{CompileError, InvalidPattern}; +use crate::compiler::{warnings, ByteMaskCombinator}; pub(in crate::compiler) fn hex_pattern_hir_from_ast( ctx: &mut CompileContext, pattern: &ast::HexPattern, -) -> Result> { +) -> Result { hex_tokens_hir_from_ast(ctx, &pattern.identifier, &pattern.tokens) } @@ -20,7 +19,7 @@ fn hex_tokens_hir_from_ast( ctx: &mut CompileContext, pattern_ident: &ast::Ident, tokens: &ast::HexTokens, -) -> Result> { +) -> Result { let mut hir_tokens = Vec::with_capacity(tokens.tokens.len()); let mut ast_tokens = tokens.tokens.iter().peekable(); @@ -32,13 +31,13 @@ fn hex_tokens_hir_from_ast( ast::HexToken::NotByte(byte) => { // ~?? is not allowed. if byte.mask == 0 { - return Err(Box::new(CompileError::invalid_pattern( + return Err(InvalidPattern::build( ctx.report_builder, pattern_ident.name.to_string(), "negation of `??` is not allowed".to_string(), token.span().into(), None, - ))); + )); } let class = match hex_byte_hir_from_ast(byte).into_kind() { @@ -102,7 +101,7 @@ fn hex_tokens_hir_from_ast( if coalesced { ctx.warnings.add(|| { - Warning::consecutive_jumps( + warnings::ConsecutiveJumps::build( ctx.report_builder, pattern_ident.name.to_string(), format!("{jump}"), @@ -113,17 +112,17 @@ fn hex_tokens_hir_from_ast( match (jump.start, jump.end) { (Some(0), Some(0)) => { - return Err(Box::new(CompileError::invalid_pattern( + return Err(InvalidPattern::build( ctx.report_builder, pattern_ident.name.to_string(), "zero-length jumps are useless, remove it" .to_string(), span.into(), None, - ))); + )); } (Some(start), Some(end)) if start > end => { - return Err(Box::new(CompileError::invalid_pattern( + return Err(InvalidPattern::build( ctx.report_builder, pattern_ident.name.to_string(), format!( @@ -134,7 +133,7 @@ fn hex_tokens_hir_from_ast( } else { None } - ))); + )); } _ => {} } diff --git a/lib/src/compiler/ir/mod.rs b/lib/src/compiler/ir/mod.rs index 9e6bc2bed..7708c324b 100644 --- a/lib/src/compiler/ir/mod.rs +++ b/lib/src/compiler/ir/mod.rs @@ -47,8 +47,9 @@ pub(in crate::compiler) use ast2ir::patterns_from_ast; use yara_x_parser::ast::Ident; use yara_x_parser::Span; +use crate::compiler::errors::{CompileError, NumberOutOfRange}; use crate::compiler::ir::dfs::{DepthFirstSearch, Event}; -use crate::{re, CompileError}; +use crate::re; mod ast2ir; mod dfs; @@ -968,7 +969,7 @@ impl Expr { self, ctx: &mut CompileContext, span: Span, - ) -> Result> { + ) -> Result { match self { Expr::Minus { ref operand } => match operand.type_value() { TypeValue::Integer(Value::Const(v)) => { @@ -1062,7 +1063,7 @@ impl Expr { span: Span, operands: Vec, f: F, - ) -> Result> + ) -> Result where F: FnMut(f64, f64) -> f64, { @@ -1090,12 +1091,12 @@ impl Expr { } else if result >= i64::MIN as f64 && result <= i64::MAX as f64 { Ok(Expr::Const(TypeValue::const_integer_from(result as i64))) } else { - Err(Box::new(CompileError::number_out_of_range( + Err(NumberOutOfRange::build( ctx.report_builder, i64::MIN, i64::MAX, span.into(), - ))) + )) } } } diff --git a/lib/src/compiler/mod.rs b/lib/src/compiler/mod.rs index 292080538..3bd01189d 100644 --- a/lib/src/compiler/mod.rs +++ b/lib/src/compiler/mod.rs @@ -23,8 +23,9 @@ use itertools::izip; #[cfg(feature = "logging")] use log::*; use regex_syntax::hir; -use rustc_hash::FxHashMap; +use rustc_hash::{FxHashMap, FxHashSet}; use serde::{Deserialize, Serialize}; +use thiserror::Error; use walrus::FunctionId; use yara_x_parser::ast; @@ -33,7 +34,11 @@ use yara_x_parser::{Parser, Span}; use crate::compiler::base64::base64_patterns; use crate::compiler::emit::{emit_rule_condition, EmitContext}; -use crate::compiler::report::{ReportBuilder, SourceRef}; +use crate::compiler::errors::{ + CompileError, ConflictingRuleIdentifier, DuplicateRule, DuplicateTag, + EmitWasmError, InvalidRegexp, InvalidUTF8, UnknownModule, UnusedPattern, +}; +use crate::compiler::report::{CodeLoc, ReportBuilder}; use crate::compiler::{CompileContext, VarStack}; use crate::modules::BUILTIN_MODULES; use crate::re; @@ -52,9 +57,6 @@ pub(crate) use crate::compiler::atoms::*; pub(crate) use crate::compiler::context::*; pub(crate) use crate::compiler::ir::*; -#[doc(inline)] -pub use crate::compiler::errors::*; - #[doc(inline)] pub use crate::compiler::rules::*; @@ -64,16 +66,17 @@ pub use crate::compiler::warnings::*; mod atoms; mod context; mod emit; -mod errors; mod ir; mod report; mod rules; -mod warnings; -pub mod base64; #[cfg(test)] mod tests; +pub mod base64; +pub mod errors; +pub mod warnings; + /// A structure that describes some YARA source code. /// /// This structure contains a `&str` pointing to the code itself, and an @@ -182,7 +185,7 @@ impl<'src> From<&'src [u8]> for SourceCode<'src> { /// let results = scanner.scan("Lorem ipsum".as_bytes()).unwrap(); /// assert_eq!(results.matching_rules().len(), 1); /// ``` -pub fn compile<'src, S>(src: S) -> Result +pub fn compile<'src, S>(src: S) -> Result where S: Into>, { @@ -340,7 +343,7 @@ pub struct Compiler<'a> { /// without causing an error, but a warning is raised to let the user know /// that the module is not supported. Any rule that depends on an unsupported /// module is ignored. - ignored_modules: Vec, + ignored_modules: FxHashSet, /// Keys in this map are the name of rules that will be ignored because they /// depend on unsupported modules, either directly or indirectly. Values are @@ -355,6 +358,9 @@ pub struct Compiler<'a> { /// Warnings generated while compiling the rules. warnings: Warnings, + /// Errors generated while compiling the rules. + errors: Vec, + /// Optional writer where the compiler writes the IR produced by each rule. /// This is used for test cases and debugging. #[cfg(test)] @@ -423,13 +429,14 @@ impl<'a> Compiler<'a> { current_pattern_id: PatternId(0), current_namespace: default_namespace, warnings: Warnings::default(), + errors: Vec::new(), rules: Vec::new(), sub_patterns: Vec::new(), anchored_sub_patterns: Vec::new(), atoms: Vec::new(), re_code: Vec::new(), imported_modules: Vec::new(), - ignored_modules: Vec::new(), + ignored_modules: FxHashSet::default(), ignored_rules: FxHashMap::default(), root_struct: Struct::new().make_root(), report_builder: ReportBuilder::new(), @@ -441,10 +448,26 @@ impl<'a> Compiler<'a> { } } - /// Adds a YARA source code to be compiled. + /// Adds YARA rules in source form for compilation. + /// + /// The `src` parameter accepts any type that implements [`Into`], + /// such as `&str`, `&[u8]`, and naturally, [`SourceCode`] itself. This input + /// can include one or more YARA rules. + /// + /// This function may be invoked multiple times to add several sets of YARA + /// rules. If the rules provided in `src` contain errors that prevent + /// compilation, the function will return the first error encountered. + /// Additionally, the compiler will store this error, along with any others + /// discovered during compilation, which can be accessed using + /// [`Compiler::errors`]. /// - /// This function can be called multiple times. - pub fn add_source<'src, S>(&mut self, src: S) -> Result<&mut Self, Error> + /// Even if a previous invocation resulted in a compilation error, you can + /// continue calling this function. In such cases, any rules that failed to + /// compile will not be included in the final compiled set. + pub fn add_source<'src, S>( + &mut self, + src: S, + ) -> Result<&mut Self, CompileError> where S: Into>, { @@ -477,21 +500,16 @@ impl<'a> Compiler<'a> { } else { span_start }; - return Err(Error::CompileError(Box::new( - CompileError::invalid_utf_8( - &self.report_builder, - Span(span_start as u32..span_end as u32).into(), - ), - ))); + return Err(InvalidUTF8::build( + &self.report_builder, + Span(span_start as u32..span_end as u32).into(), + )); } }; - if !ast.errors().is_empty() { - return Err(Error::CompileError(Box::new(CompileError::from( - &self.report_builder, - ast.into_errors().remove(0), - )))); - } + // Store the current length of the `errors` vector, so that we can + // know if more errors were added. + let existing_errors = self.errors.len(); let mut already_imported = FxHashMap::default(); @@ -504,7 +522,7 @@ impl<'a> Compiler<'a> { already_imported.insert(&import.module_name, import.span()) { self.warnings.add(|| { - Warning::duplicate_import( + warnings::DuplicateImport::build( &self.report_builder, import.module_name.to_string(), import.span().into(), @@ -512,17 +530,31 @@ impl<'a> Compiler<'a> { ) }) } - // Import the module. This updates `self.root_struct` if // necessary. - self.c_import(import)?; + if let Err(err) = self.c_import(import) { + self.errors.push(err); + } } // Iterate over the list of declared rules and verify that their // conditions are semantically valid. For each rule add a symbol // to the current namespace. for rule in ast.rules() { - self.c_rule(rule)?; + if let Err(err) = self.c_rule(rule) { + self.errors.push(err); + } + } + + self.errors.extend( + ast.into_errors() + .into_iter() + .map(|err| CompileError::from(&self.report_builder, err)), + ); + + // More errors were added? Return the first error that was added. + if self.errors.len() > existing_errors { + return Err(self.errors[existing_errors].clone()); } Ok(self) @@ -553,21 +585,19 @@ impl<'a> Compiler<'a> { &mut self, ident: &str, value: T, - ) -> Result<&mut Self, Error> + ) -> Result<&mut Self, VariableError> where - Error: From<>::Error>, + VariableError: From<>::Error>, { if !is_valid_identifier(ident) { - return Err( - VariableError::InvalidIdentifier(ident.to_string()).into() - ); + return Err(VariableError::InvalidIdentifier(ident.to_string())); } let var: Variable = value.try_into()?; let type_value: TypeValue = var.into(); if self.root_struct.add_field(ident, type_value).is_some() { - return Err(VariableError::AlreadyExists(ident.to_string()).into()); + return Err(VariableError::AlreadyExists(ident.to_string())); } self.global_symbols @@ -695,7 +725,7 @@ impl<'a> Compiler<'a> { /// ignored module will be ignored, while the rest of rules that /// don't rely on that module will be correctly compiled. pub fn ignore_module>(&mut self, module: M) -> &mut Self { - self.ignored_modules.push(module.into()); + self.ignored_modules.insert(module.into()); self } @@ -719,7 +749,7 @@ impl<'a> Compiler<'a> { &mut self, code: &str, enabled: bool, - ) -> Result<&mut Self, Error> { + ) -> Result<&mut Self, InvalidWarningCode> { self.warnings.switch_warning(code, enabled)?; Ok(self) } @@ -764,7 +794,19 @@ impl<'a> Compiler<'a> { self } + /// Retrieves all errors generated by the compiler. + /// + /// This method returns every error encountered during the compilation, + /// across all invocations of [`Compiler::add_source`]. + #[inline] + pub fn errors(&self) -> &[CompileError] { + self.errors.as_slice() + } + /// Returns the warnings emitted by the compiler. + /// + /// This method returns every warning issued during the compilation, + /// across all invocations of [`Compiler::add_source`]. #[inline] pub fn warnings(&self) -> &[Warning] { self.warnings.as_slice() @@ -816,29 +858,27 @@ impl<'a> Compiler<'a> { fn check_for_existing_identifier( &self, ident: &Ident, - ) -> Result<(), Box> { + ) -> Result<(), CompileError> { if let Some(symbol) = self.symbol_table.lookup(ident.name) { return match symbol.kind() { // Found another rule with the same name. - SymbolKind::Rule(rule_id) => { - Err(Box::new(CompileError::duplicate_rule( - &self.report_builder, - ident.name.to_string(), - ident.span().into(), - self.rules - .get(rule_id.0 as usize) - .unwrap() - .ident_ref - .clone(), - ))) - } + SymbolKind::Rule(rule_id) => Err(DuplicateRule::build( + &self.report_builder, + ident.name.to_string(), + ident.span().into(), + self.rules + .get(rule_id.0 as usize) + .unwrap() + .ident_ref + .clone(), + )), // Found another symbol that is not a rule, but has the same // name. - _ => Err(Box::new(CompileError::conflicting_rule_identifier( + _ => Err(ConflictingRuleIdentifier::build( &self.report_builder, ident.name.to_string(), ident.span().into(), - ))), + )), }; } Ok(()) @@ -848,15 +888,15 @@ impl<'a> Compiler<'a> { fn check_for_duplicate_tags( &self, tags: &[Ident], - ) -> Result<(), Box> { + ) -> Result<(), CompileError> { let mut s = HashSet::new(); for tag in tags { if !s.insert(tag.name) { - return Err(Box::new(CompileError::duplicate_tag( + return Err(DuplicateTag::build( &self.report_builder, tag.name.to_string(), tag.span().into(), - ))); + )); } } Ok(()) @@ -922,7 +962,7 @@ impl<'a> Compiler<'a> { } impl<'a> Compiler<'a> { - fn c_rule(&mut self, rule: &ast::Rule) -> Result<(), Box> { + fn c_rule(&mut self, rule: &ast::Rule) -> Result<(), CompileError> { // Check if another rule, module or variable has the same identifier // and return an error in that case. self.check_for_existing_identifier(&rule.identifier)?; @@ -988,7 +1028,7 @@ impl<'a> Compiler<'a> { namespace_id: self.current_namespace.id, namespace_ident_id: self.current_namespace.ident_id, ident_id: self.ident_pool.get_or_intern(rule.identifier.name), - ident_ref: SourceRef::new( + ident_ref: CodeLoc::new( self.report_builder.current_source_id(), rule.identifier.span(), ), @@ -1017,7 +1057,7 @@ impl<'a> Compiler<'a> { if let Err(err) = patterns_from_ast(&mut ctx, rule.patterns.as_ref()) { drop(ctx); self.restore_snapshot(snapshot); - return Err(Box::new(*err)); + return Err(err); }; // Convert the rule condition's AST to the intermediate representation @@ -1031,26 +1071,23 @@ impl<'a> Compiler<'a> { // entering this function. Also, if the error is due to an unknown // identifier, but the identifier is one of the unsupported modules, // the error is tolerated and a warning is issued instead. - let mut condition = match condition.map_err(|err| *err) { + let mut condition = match condition { Ok(condition) => condition, - Err(CompileError::UnknownIdentifier { - identifier, - span: identifier_ref, - .. - }) if self.ignored_modules.contains(&identifier) - || self.ignored_rules.contains_key(&identifier) => + Err(CompileError::UnknownIdentifier(unknown)) + if self.ignored_rules.contains_key(unknown.identifier()) + || self.ignored_modules.contains(unknown.identifier()) => { self.restore_snapshot(snapshot); - if let Some(module_name) = self.ignored_rules.get(&identifier) + if let Some(module_name) = + self.ignored_rules.get(unknown.identifier()) { self.warnings.add(|| { - Warning::ignored_rule( + warnings::IgnoredRule::build( &self.report_builder, - rule.identifier.name.to_string(), - identifier, module_name.clone(), - identifier_ref, + rule.identifier.name.to_string(), + unknown.identifier_location().clone(), ) }); self.ignored_rules.insert( @@ -1059,25 +1096,27 @@ impl<'a> Compiler<'a> { ); } else { self.warnings.add(|| { - Warning::ignored_module( + warnings::IgnoredModule::build( &self.report_builder, - identifier.clone(), - identifier_ref, + unknown.identifier().to_string(), + unknown.identifier_location().clone(), Some(format!( "the whole rule `{}` will be ignored", rule.identifier.name )), ) }); - self.ignored_rules - .insert(rule.identifier.name.to_string(), identifier); + self.ignored_rules.insert( + rule.identifier.name.to_string(), + unknown.identifier().to_string(), + ); } return Ok(()); } Err(err) => { self.restore_snapshot(snapshot); - return Err(Box::new(err)); + return Err(err); } }; @@ -1094,7 +1133,7 @@ impl<'a> Compiler<'a> { condition.type_value().cast_to_bool().try_as_bool() { self.warnings.add(|| { - Warning::invariant_boolean_expression( + warnings::InvariantBooleanExpression::build( &self.report_builder, value, rule.condition.span().into(), @@ -1133,11 +1172,11 @@ impl<'a> Compiler<'a> { // Raise error is some pattern was not used, except if the pattern // identifier starts with underscore. if !pattern.in_use() && !pattern.identifier().starts_with("$_") { - return Err(Box::new(CompileError::unused_pattern( + return Err(UnusedPattern::build( &self.report_builder, pattern.identifier().name.to_string(), pattern.identifier().span().into(), - ))); + )); } // Check if this pattern has been declared before, in this rule or @@ -1224,7 +1263,7 @@ impl<'a> Compiler<'a> { Ok(()) } - fn c_import(&mut self, import: &Import) -> Result<(), Box> { + fn c_import(&mut self, import: &Import) -> Result<(), CompileError> { let module_name = import.module_name; let module = BUILTIN_MODULES.get(module_name); @@ -1235,7 +1274,7 @@ impl<'a> Compiler<'a> { // only a warning. return if self.ignored_modules.iter().any(|m| m == module_name) { self.warnings.add(|| { - Warning::ignored_module( + warnings::IgnoredModule::build( &self.report_builder, module_name.to_string(), import.span().into(), @@ -1246,11 +1285,11 @@ impl<'a> Compiler<'a> { } else { // The module does not exist, and is not explicitly added to // the list of unsupported modules, that's an error. - Err(Box::new(CompileError::unknown_module( + Err(UnknownModule::build( &self.report_builder, module_name.to_string(), import.span().into(), - ))) + )) }; } @@ -1502,7 +1541,7 @@ impl<'a> Compiler<'a> { pattern: RegexpPattern, anchored_at: Option, span: Span, - ) -> Result<(), Box> { + ) -> Result<(), CompileError> { // Try splitting the regexp into multiple chained sub-patterns if it // contains large gaps. For example, `{ 01 02 03 [-] 04 05 06 }` is // split into `{ 01 02 03 }` and `{ 04 05 06 }`, where `{ 04 05 06 }` @@ -1578,7 +1617,7 @@ impl<'a> Compiler<'a> { hir: re::hir::Hir, anchored_at: Option, flags: PatternFlagSet, - ) -> Result<(), Box> { + ) -> Result<(), CompileError> { let ascii = flags.contains(PatternFlags::Ascii); let wide = flags.contains(PatternFlags::Wide); let case_insensitive = flags.contains(PatternFlags::Nocase); @@ -1670,7 +1709,7 @@ impl<'a> Compiler<'a> { trailing: &[ChainedPattern], flags: PatternFlagSet, span: Span, - ) -> Result<(), Box> { + ) -> Result<(), CompileError> { let ascii = flags.contains(PatternFlags::Ascii); let wide = flags.contains(PatternFlags::Wide); let case_insensitive = flags.contains(PatternFlags::Nocase); @@ -1816,7 +1855,7 @@ impl<'a> Compiler<'a> { &mut self, hir: &re::hir::Hir, span: Span, - ) -> Result<(Vec, bool), Box> { + ) -> Result<(Vec, bool), CompileError> { // When the `fast-regexp` feature is enabled, try to compile the regexp // for `FastVM` first, if it fails with `Error::FastIncompatible`, the // regexp is not compatible for `FastVM` and `PikeVM` must be used @@ -1839,22 +1878,22 @@ impl<'a> Compiler<'a> { ); let mut atoms = result.map_err(|err| match err { - re::Error::TooLarge => Box::new(CompileError::invalid_regexp( + re::Error::TooLarge => InvalidRegexp::build( &self.report_builder, "regexp is too large".to_string(), (&span).into(), None, - )), + ), _ => unreachable!(), })?; if matches!(hir.minimum_len(), Some(0)) { - return Err(Box::new(CompileError::invalid_regexp( + return Err(InvalidRegexp::build( &self.report_builder, "this regexp can match empty strings".to_string(), (&span).into(), None, - ))); + )); } let mut slow_pattern = false; @@ -1867,13 +1906,16 @@ impl<'a> Compiler<'a> { if slow_pattern { if self.error_on_slow_pattern { - return Err(Box::new(CompileError::slow_pattern( + return Err(errors::SlowPattern::build( &self.report_builder, span.into(), - ))); + )); } else { self.warnings.add(|| { - Warning::slow_pattern(&self.report_builder, span.into()) + warnings::SlowPattern::build( + &self.report_builder, + span.into(), + ) }); } } @@ -2269,3 +2311,94 @@ struct Snapshot { sub_patterns_len: usize, symbol_table_len: usize, } + +/// Error returned by [`Compiler::switch_warning`] when the warning +/// code is not valid. +#[derive(Error, Debug, Eq, PartialEq)] +#[error("`{0}` is not a valid warning code")] +pub struct InvalidWarningCode(String); + +/// Represents a list of warnings. +/// +/// This is a wrapper around a `Vec` that contains additional logic +/// for limiting the number of warnings stored in the vector and silencing some +/// warnings types. +pub(crate) struct Warnings { + warnings: Vec, + max_warnings: usize, + disabled_warnings: HashSet, +} + +impl Default for Warnings { + fn default() -> Self { + Self { + warnings: Vec::new(), + max_warnings: 100, + disabled_warnings: HashSet::default(), + } + } +} + +impl Warnings { + /// Adds the warning returned by `f` to the list. + /// + /// If the maximum number of warnings has been reached the warning is not + /// added. + #[inline] + pub fn add(&mut self, f: impl FnOnce() -> Warning) { + if self.warnings.len() < self.max_warnings { + let warning = f(); + if !self.disabled_warnings.contains(warning.code()) { + self.warnings.push(warning); + } + } + } + + /// Returns true if the given code is a valid warning code. + pub fn is_valid_code(code: &str) -> bool { + Warning::all_codes().iter().any(|c| *c == code) + } + + /// Enables or disables a specific warning identified by `code`. + /// + /// Returns `true` if the warning was previously enabled, or `false` if + /// otherwise. Returns an error if the code doesn't correspond to any + /// of the existing warnings. + #[inline] + pub fn switch_warning( + &mut self, + code: &str, + enabled: bool, + ) -> Result { + if !Self::is_valid_code(code) { + return Err(InvalidWarningCode(code.to_string())); + } + if enabled { + Ok(!self.disabled_warnings.remove(code)) + } else { + Ok(self.disabled_warnings.insert(code.to_string())) + } + } + + /// Enable or disables all warnings. + pub fn switch_all_warnings(&mut self, enabled: bool) { + if enabled { + self.disabled_warnings.clear(); + } else { + for c in Warning::all_codes() { + self.disabled_warnings.insert(c.to_string()); + } + } + } + + #[inline] + pub fn as_slice(&self) -> &[Warning] { + self.warnings.as_slice() + } +} + +impl From for Vec { + fn from(value: Warnings) -> Self { + value.warnings + } +} diff --git a/lib/src/compiler/report.rs b/lib/src/compiler/report.rs index 852f38a4a..61e119c1f 100644 --- a/lib/src/compiler/report.rs +++ b/lib/src/compiler/report.rs @@ -1,7 +1,10 @@ +use serde::ser::SerializeStruct; +use serde::{Serialize, Serializer}; use std::borrow::Cow; -use std::cell::{Cell, RefCell}; +use std::cell::Cell; use std::collections::HashMap; -use std::fmt::Debug; +use std::fmt::{Debug, Display, Formatter}; +use std::sync::{Arc, RwLock, RwLockReadGuard, RwLockWriteGuard}; use yara_x_parser::Span; @@ -15,7 +18,7 @@ pub type Level = annotate_snippets::Level; #[derive(Hash, Eq, PartialEq, Clone, Copy, Debug, Default)] pub struct SourceId(u32); -/// A `SourceRef` points to a fragment of source code. +/// A `CodeLoc` points to a fragment of source code. /// /// It consists of a [`SourceId`] and a [`Span`], where the former identifies /// the source file, and the latter a span of text within that source file. @@ -23,31 +26,213 @@ pub struct SourceId(u32); /// The [`SourceId`] is optional, if it is [`None`] it means that the [`Span`] /// is relative to the current source file. #[derive(PartialEq, Debug, Clone, Eq, Default)] -pub struct SourceRef { +pub struct CodeLoc { source_id: Option, span: Span, } -impl SourceRef { +impl CodeLoc { pub(crate) fn new(source_id: Option, span: Span) -> Self { Self { source_id, span } } + + /// Returns the span within the source code. + #[inline] + pub fn span(&self) -> &Span { + &self.span + } } -impl From<&Span> for SourceRef { - /// Creates a [`SourceRef`] from a reference to a [`Span`]. +impl From<&Span> for CodeLoc { + /// Creates a [`CodeLoc`] from a reference to a [`Span`]. fn from(span: &Span) -> Self { Self { source_id: None, span: span.clone() } } } -impl From for SourceRef { - /// Creates a [`SourceRef`] from a [`Span`]. +impl From for CodeLoc { + /// Creates a [`CodeLoc`] from a [`Span`]. fn from(span: Span) -> Self { Self { source_id: None, span } } } +/// Represents an error or warning report. +/// +/// This structure represents the message displayed to the user when an error +/// or warning occurs. It implements the [`Display`] trait, ensuring that when +/// printed, it reflects the standard error format used by YARA-X. For example: +/// +/// ```text +/// error[E006]: unexpected negative number +/// --> line:6:12 +/// | +/// 6 | $a in (-1..0) +/// | ^^ this number can not be negative +/// | +/// ``` +/// +/// In addition to generating the report, this type provides access to the +/// individual components of the report, which include: +/// +/// - `level`: Indicates the severity, either `Level::Error` or `Level::Warning`. +/// - `code`: A unique code that identifies the specific error or warning +/// (e.g., "E006"). +/// - `title`: The title of the report (e.g., "unexpected negative number"). +/// - `labels`: A collection of labels included in the report. Each label +/// contains a level, a span, and associated text. +#[derive(Clone)] +pub(crate) struct Report { + code_cache: Arc, + default_source_id: SourceId, + with_colors: bool, + level: Level, + code: &'static str, + title: String, + labels: Vec<(Level, CodeLoc, String)>, + note: Option, +} + +impl Report { + /// Returns the report's title. + #[inline] + pub(crate) fn title(&self) -> &str { + self.title.as_str() + } + + /// Returns the report's labels. + pub(crate) fn labels(&self) -> impl Iterator { + self.labels.iter().map(|(level, code_loc, text)| { + let source_id = + code_loc.source_id.unwrap_or(self.default_source_id); + + let code_cache = self.code_cache.read(); + let code_origin = + code_cache.get(&source_id).unwrap().origin.clone(); + + let level = match level { + Level::Error => "error", + Level::Warning => "warning", + Level::Info => "info", + Level::Note => "note", + Level::Help => "help", + }; + + Label { level, code_origin, span: code_loc.span.clone(), text } + }) + } + + /// Returns the report's note. + #[inline] + pub(crate) fn note(&self) -> Option<&str> { + self.note.as_deref() + } +} + +impl Serialize for Report { + fn serialize(&self, serializer: S) -> Result + where + S: Serializer, + { + let mut s = serializer.serialize_struct("report", 4)?; + s.serialize_field("code", &self.code)?; + s.serialize_field("title", &self.title)?; + s.serialize_field("labels", &self.labels().collect::>())?; + s.serialize_field("note", &self.note)?; + s.serialize_field("text", &self.to_string())?; + s.end() + } +} + +impl PartialEq for Report { + fn eq(&self, other: &Self) -> bool { + self.level.eq(&other.level) + && self.code.eq(other.code) + && self.title.eq(&other.title) + && self.labels.eq(&other.labels) + && self.note.eq(&other.note) + } +} + +impl Eq for Report {} + +impl Debug for Report { + fn fmt(&self, f: &mut Formatter<'_>) -> std::fmt::Result { + write!(f, "{}", self) + } +} + +impl Display for Report { + fn fmt(&self, f: &mut Formatter<'_>) -> std::fmt::Result { + // Use the SourceId indicated by the first label, or the one + // corresponding to the current source file (i.e: the most + // recently registered). + let source_id = self + .labels + .first() + .and_then(|label| label.1.source_id) + .unwrap_or(self.default_source_id); + + let code_cache = self.code_cache.read(); + let mut cache_entry = code_cache.get(&source_id).unwrap(); + let mut src = cache_entry.code.as_str(); + + let mut message = self.level.title(self.title.as_str()).id(self.code); + let mut snippet = annotate_snippets::Snippet::source(src) + .origin(cache_entry.origin.as_deref().unwrap_or("line")) + .fold(true); + + for (level, label_ref, label) in &self.labels { + let label_source_id = + label_ref.source_id.unwrap_or(self.default_source_id); + + // If the current label doesn't belong to the same source file + // finish the current snippet, add it to the error message and + // start a new snippet for the label's source file. + if label_source_id != source_id { + cache_entry = code_cache.get(&label_source_id).unwrap(); + src = cache_entry.code.as_str(); + message = message.snippet(snippet); + snippet = annotate_snippets::Snippet::source(src) + .origin(cache_entry.origin.as_deref().unwrap_or("line")) + .fold(true) + } + + let span_start = label_ref.span.start(); + let span_end = label_ref.span.end(); + + snippet = snippet.annotation( + level.span(span_start..span_end).label(label.as_str()), + ); + } + + message = message.snippet(snippet); + + if let Some(note) = &self.note { + message = message.footer(Level::Note.title(note.as_str())); + } + + let renderer = if self.with_colors { + annotate_snippets::Renderer::styled() + } else { + annotate_snippets::Renderer::plain() + }; + + let text = renderer.render(message); + + write!(f, "{}", text) + } +} + +/// Represents a label in an error or warning report. +#[derive(Serialize)] +pub struct Label<'a> { + level: &'a str, + code_origin: Option, + span: Span, + text: &'a str, +} + /// Builds error and warning reports. /// /// `ReportBuilder` helps to create error and warning reports. It stores a copy @@ -60,18 +245,34 @@ pub struct ReportBuilder { with_colors: bool, current_source_id: Cell>, next_source_id: Cell, - // RefCell allows getting a mutable reference to the cache, even if we have - // an immutable reference to the report builder. - cache: RefCell, + code_cache: Arc, } /// A cache containing source files registered in a [`ReportBuilder`]. -struct Cache { - data: HashMap, +struct CodeCache { + data: RwLock>, } -/// Each of the entries stored in [`Cache`]. -struct CacheEntry { +impl CodeCache { + fn new() -> Self { + Self { data: RwLock::new(HashMap::new()) } + } + + pub fn read( + &self, + ) -> RwLockReadGuard<'_, HashMap> { + self.data.read().unwrap() + } + + pub fn write( + &self, + ) -> RwLockWriteGuard<'_, HashMap> { + self.data.write().unwrap() + } +} + +/// Each of the entries stored in [`CodeCache`]. +struct CodeCacheEntry { code: String, origin: Option, } @@ -89,7 +290,7 @@ impl ReportBuilder { with_colors: false, current_source_id: Cell::new(None), next_source_id: Cell::new(SourceId(0)), - cache: RefCell::new(Cache { data: HashMap::new() }), + code_cache: Arc::new(CodeCache::new()), } } @@ -115,17 +316,17 @@ impl ReportBuilder { /// replaces the invalid characters with the UTF-8 replacement character. pub fn register_source(&self, src: &SourceCode) -> &Self { let source_id = self.next_source_id.get(); + self.next_source_id.set(SourceId(source_id.0 + 1)); self.current_source_id.set(Some(source_id)); - let map = &mut self.cache.borrow_mut().data; - map.entry(source_id).or_insert_with(|| { + self.code_cache.write().entry(source_id).or_insert_with(|| { let s = if let Some(s) = src.valid { Cow::Borrowed(s) } else { String::from_utf8_lossy(src.raw.as_ref()) }; - CacheEntry { + CodeCacheEntry { // Replace tab characters with a single space. This doesn't // affect code spans, because the number of characters remain // the same, but prevents error messages from being wrongly @@ -134,18 +335,19 @@ impl ReportBuilder { origin: src.origin.clone(), } }); + self } /// Returns the fragment of source code indicated by `source_ref`. - pub fn get_snippet(&self, source_ref: &SourceRef) -> String { + pub fn get_snippet(&self, source_ref: &CodeLoc) -> String { let source_id = source_ref .source_id .or_else(|| self.current_source_id()) .expect("create_report without registering any source code"); - let cache = self.cache.borrow(); - let cache_entry = cache.data.get(&source_id).unwrap(); + let code_cache = self.code_cache.read(); + let cache_entry = code_cache.get(&source_id).unwrap(); let src = cache_entry.code.as_str(); src[source_ref.span.range()].to_string() @@ -157,70 +359,23 @@ impl ReportBuilder { level: Level, code: &'static str, title: String, - labels: Vec<(SourceRef, String, Level)>, + labels: Vec<(Level, CodeLoc, String)>, note: Option, - ) -> String { + ) -> Report { // Make sure there's at least one label. assert!(!labels.is_empty()); - // Use the SourceId indicated by the first label, or the one - // corresponding to the current source file (i.e: the most - // recently registered). - let source_id = labels - .first() - .and_then(|label| label.0.source_id) - .or_else(|| self.current_source_id()) - .expect("create_report without registering any source code"); - - let cache = self.cache.borrow(); - let mut cache_entry = cache.data.get(&source_id).unwrap(); - let mut src = cache_entry.code.as_str(); - - let mut message = level.title(title.as_str()).id(code); - let mut snippet = annotate_snippets::Snippet::source(src) - .origin(cache_entry.origin.as_deref().unwrap_or("line")) - .fold(true); - - for (label_ref, label, level) in &labels { - let label_source_id = label_ref - .source_id - .or_else(|| self.current_source_id()) - .unwrap(); - - // If the current label doesn't belong to the same source file - // finish the current snippet, add it to the error message and - // start a new snippet for the label's source file. - if label_source_id != source_id { - cache_entry = cache.data.get(&label_source_id).unwrap(); - src = cache_entry.code.as_str(); - message = message.snippet(snippet); - snippet = annotate_snippets::Snippet::source(src) - .origin(cache_entry.origin.as_deref().unwrap_or("line")) - .fold(true) - } - - let span_start = label_ref.span.start(); - let span_end = label_ref.span.end(); - - snippet = snippet.annotation( - level.span(span_start..span_end).label(label.as_str()), - ); - } - - message = message.snippet(snippet); - - if let Some(note) = ¬e { - message = message.footer(Level::Note.title(note.as_str())); + Report { + code_cache: self.code_cache.clone(), + with_colors: self.with_colors, + default_source_id: self.current_source_id().expect( + "`create_report` called without registering any source", + ), + level, + code, + title, + labels, + note, } - - let renderer = if self.with_colors { - annotate_snippets::Renderer::styled() - } else { - annotate_snippets::Renderer::plain() - }; - - let message = renderer.render(message); - - message.to_string() } } diff --git a/lib/src/compiler/rules.rs b/lib/src/compiler/rules.rs index b8dccb8cb..e6d8cd0c4 100644 --- a/lib/src/compiler/rules.rs +++ b/lib/src/compiler/rules.rs @@ -11,7 +11,8 @@ use regex_automata::meta::Regex; use serde::{Deserialize, Deserializer, Serialize, Serializer}; use crate::compiler::atoms::Atom; -use crate::compiler::report::SourceRef; +use crate::compiler::errors::SerializationError; +use crate::compiler::report::CodeLoc; use crate::compiler::warnings::Warning; use crate::compiler::{ IdentId, Imports, LiteralId, NamespaceId, PatternId, RegexpId, RuleId, @@ -19,7 +20,7 @@ use crate::compiler::{ }; use crate::re::{BckCodeLoc, FwdCodeLoc, RegexpAtom}; use crate::string_pool::{BStringPool, StringPool}; -use crate::{re, types, SerializationError}; +use crate::{re, types}; /// A set of YARA rules in compiled form. /// @@ -478,7 +479,7 @@ pub(crate) struct RuleInfo { /// is used only during the compilation phase, but not during the scan /// phase. #[serde(skip)] - pub(crate) ident_ref: SourceRef, + pub(crate) ident_ref: CodeLoc, /// Metadata associated to the rule. pub(crate) metadata: Vec<(IdentId, MetaValue)>, /// Vector with all the patterns defined by this rule. diff --git a/lib/src/compiler/tests/mod.rs b/lib/src/compiler/tests/mod.rs index d8c79036e..16ff5854f 100644 --- a/lib/src/compiler/tests/mod.rs +++ b/lib/src/compiler/tests/mod.rs @@ -4,11 +4,10 @@ use std::fs; use std::io::Write; use std::mem::size_of; -use crate::compiler::{ - SerializationError, SubPattern, Var, VarStack, VariableError, -}; +use crate::compiler::{SubPattern, Var, VarStack}; +use crate::errors::{SerializationError, VariableError}; use crate::types::Type; -use crate::{compile, Compiler, Error, Rules, Scanner}; +use crate::{compile, Compiler, Rules, Scanner, SourceCode}; #[test] fn serialization() { @@ -122,9 +121,7 @@ fn globals() { assert_eq!( compiler.define_global("#invalid", true).err().unwrap(), - Error::VariableError(VariableError::InvalidIdentifier( - "#invalid".to_string() - )) + VariableError::InvalidIdentifier("#invalid".to_string()) ); let mut compiler = Compiler::new(); @@ -136,7 +133,7 @@ fn globals() { .define_global("a", false) .err() .unwrap(), - Error::VariableError(VariableError::AlreadyExists("a".to_string())) + VariableError::AlreadyExists("a".to_string()) ); let mut compiler = Compiler::new(); @@ -464,28 +461,28 @@ fn globals_json() { Compiler::new() .define_global("invalid_array", json!([1, "foo", 3])) .unwrap_err(), - Error::VariableError(VariableError::InvalidArray) + VariableError::InvalidArray ); assert_eq!( Compiler::new() .define_global("invalid_array", json!([1, [2, 3], 4])) .unwrap_err(), - Error::VariableError(VariableError::InvalidArray) + VariableError::InvalidArray ); assert_eq!( Compiler::new() .define_global("invalid_array", json!([1, null])) .unwrap_err(), - Error::VariableError(VariableError::InvalidArray) + VariableError::InvalidArray ); assert_eq!( Compiler::new() .define_global("invalid_array", json!({ "foo": null })) .unwrap_err(), - Error::VariableError(VariableError::UnexpectedNull) + VariableError::UnexpectedNull ); } @@ -671,13 +668,13 @@ fn errors_2() { "error[E012]: duplicate rule `foo` --> line:1:6 | -1 | rule foo : first {condition: true} - | --- note: `foo` declared here for the first time +1 | rule foo : second {condition: true} + | ^^^ duplicate declaration of `foo` | ::: line:1:6 | -1 | rule foo : second {condition: true} - | ^^^ duplicate declaration of `foo` +1 | rule foo : first {condition: true} + | --- note: `foo` declared here for the first time |" ); @@ -723,6 +720,42 @@ fn utf8_errors() { ); } +#[test] +fn errors_serialization() { + let err = Compiler::new() + .add_source( + SourceCode::from("rule test {condition: foo}") + .with_origin("test.yar"), + ) + .err() + .unwrap(); + + let json_error = serde_json::to_string(&err).unwrap(); + + let expected = json!({ + "type": "UnknownIdentifier", + "code": "E009", + "title": "unknown identifier `foo`", + "labels":[ + { + "level": "error", + "code_origin": "test.yar", + "span": { "start": 22, "end": 25 }, + "text": "this identifier has not been declared" + } + ], + "note": null, + "text": r#"error[E009]: unknown identifier `foo` + --> test.yar:1:23 + | +1 | rule test {condition: foo} + | ^^^ this identifier has not been declared + |"# + }); + + assert_eq!(json_error, expected.to_string()); +} + #[test] fn test_errors() { let mut mint = goldenfile::Mint::new("."); diff --git a/lib/src/compiler/tests/testdata/errors/1.out b/lib/src/compiler/tests/testdata/errors/1.out index 3d9536104..19ebd92b9 100644 --- a/lib/src/compiler/tests/testdata/errors/1.out +++ b/lib/src/compiler/tests/testdata/errors/1.out @@ -1,5 +1,5 @@ error[E012]: duplicate rule `test` - --> line:1:6 + --> line:5:6 | 1 | rule test { | ---- note: `test` declared here for the first time diff --git a/lib/src/compiler/tests/testdata/errors/115.out b/lib/src/compiler/tests/testdata/errors/115.out index 52f62fc10..28622be8f 100644 --- a/lib/src/compiler/tests/testdata/errors/115.out +++ b/lib/src/compiler/tests/testdata/errors/115.out @@ -1,4 +1,4 @@ -error[E117]: invalid pattern modifier +error[E033]: invalid pattern modifier --> line:3:20 | 3 | $a = { 01 02 } nocase diff --git a/lib/src/compiler/tests/testdata/errors/46.out b/lib/src/compiler/tests/testdata/errors/46.out index e07c0a140..a8ce46ff5 100644 --- a/lib/src/compiler/tests/testdata/errors/46.out +++ b/lib/src/compiler/tests/testdata/errors/46.out @@ -1,4 +1,4 @@ -error[E017]: `entrypoint` is unsupported` +error[E017]: `entrypoint` is unsupported --> line:3:5 | 3 | entrypoint == 0x1000 diff --git a/lib/src/compiler/warnings.rs b/lib/src/compiler/warnings.rs index 618c3852e..835297e03 100644 --- a/lib/src/compiler/warnings.rs +++ b/lib/src/compiler/warnings.rs @@ -1,213 +1,394 @@ -use std::collections::HashSet; +#![cfg_attr(any(), rustfmt::skip)] + use std::fmt::{Debug, Display, Formatter}; +use serde::Serialize; use thiserror::Error; -use yara_x_macros::Error as DeriveError; +use yara_x_macros::ErrorEnum; +use yara_x_macros::ErrorStruct; -use crate::compiler::report::Level; -use crate::compiler::report::{ReportBuilder, SourceRef}; +use crate::compiler::report::{Level, Report, ReportBuilder, CodeLoc, Label}; /// A warning raised while compiling YARA rules. -#[rustfmt::skip] #[allow(missing_docs)] -#[derive(DeriveError)] +#[non_exhaustive] +#[derive(ErrorEnum, Error, PartialEq, Eq)] +#[derive(Serialize)] +#[serde(tag = "type")] pub enum Warning { - #[warning("consecutive_jumps", "consecutive jumps in hex pattern `{pattern_ident}`")] - #[label_warn("these consecutive jumps will be treated as {coalesced_jump}", jumps_span)] - ConsecutiveJumps { - detailed_report: String, - pattern_ident: String, - coalesced_jump: String, - jumps_span: SourceRef , - }, - - #[warning("unsatisfiable_expr", "potentially unsatisfiable expression")] - #[label_warn("this implies that multiple patterns must match", quantifier_span)] - #[label_warn("but they must match at the same offset", at_span)] - PotentiallyUnsatisfiableExpression { - detailed_report: String, - quantifier_span: SourceRef, - at_span: SourceRef, - }, - - #[warning("invariant_expr", "invariant boolean expression")] - #[label_warn("this expression is always {value}", span)] - #[note(note)] - InvariantBooleanExpression { - detailed_report: String, - value: bool, - span: SourceRef, - note: Option, - }, - - #[warning("non_bool_expr", "non-boolean expression used as boolean")] - #[label_warn("this expression is `{expression_type}` but is being used as `bool`", span)] - #[note(note)] - NonBooleanAsBoolean { - detailed_report: String, - expression_type: String, - span: SourceRef, - note: Option, - }, - - #[warning("bool_int_comparison", "comparison between boolean and integer")] - #[label_warn("this comparison can be replaced with: `{replacement}`", span)] - BooleanIntegerComparison { - detailed_report: String, - span: SourceRef, - replacement: String, - }, - - #[warning("duplicate_import", "duplicate import statement")] - #[label_warn( - "duplicate import", - new_import_span - )] - #[label_note( - "`{module_name}` imported here for the first time", - existing_import_span - )] - DuplicateImport { - detailed_report: String, - module_name: String, - new_import_span: SourceRef, - existing_import_span: SourceRef, - }, - - #[warning("redundant_modifier", "redundant case-insensitive modifier")] - #[label_warn("the `i` suffix indicates that the pattern is case-insensitive", i_span)] - #[label_warn("the `nocase` modifier does the same", nocase_span)] - RedundantCaseModifier { - detailed_report: String, - nocase_span: SourceRef, - i_span: SourceRef, - }, - - #[warning("slow_pattern", "slow pattern")] - #[label_warn("this pattern may slow down the scan", span)] - SlowPattern { - detailed_report: String, - span: SourceRef, - }, - - #[warning("unsupported_module", "module `{module_name}` is not supported")] - #[label_warn("module `{module_name}` used here", span)] - #[note(note)] - IgnoredModule { - detailed_report: String, - module_name: String, - span: SourceRef, - note: Option, - }, - - #[warning( - "ignored_rule", - "rule `{ignored_rule}` will be ignored due to an indirect dependency on module `{module_name}`" - )] - #[label_warn("this other rule depends on module `{module_name}`, which is unsupported", span)] - IgnoredRule { - detailed_report: String, - ignored_rule: String, - dependency: String, - module_name: String, - span: SourceRef, - }, + ConsecutiveJumps(Box), + PotentiallyUnsatisfiableExpression(Box), + InvariantBooleanExpression(Box), + NonBooleanAsBoolean(Box), + BooleanIntegerComparison(Box), + DuplicateImport(Box), + RedundantCaseModifier(Box), + SlowPattern(Box), + IgnoredModule(Box), + IgnoredRule(Box), } -/// Error returned by [`Warnings::switch_warning`] when the warning code is -/// not valid. -#[derive(Error, Debug, Eq, PartialEq)] -#[error("`{0}` is not a valid warning code")] -pub struct InvalidWarningCode(String); - -/// Represents a list of warnings. -pub struct Warnings { - warnings: Vec, - max_warnings: usize, - disabled_warnings: HashSet, +/// A hex pattern contains two or more consecutive jumps. +/// +/// For instance, in `{01 02 [0-2] [1-3] 03 04 }` the jumps `[0-2]` and `[1-3]` +/// appear one after the other. Consecutive jumps are useless, and they can be +/// folded into a single one. In this case they can be replaced by `[1-5]`. +/// +/// ## Example +/// +/// ```text +/// warning[consecutive_jumps]: consecutive jumps in hex pattern `$a` +/// --> line:3:18 +/// | +/// 3 | $a = { 0F 84 [4] [0-7] 8D } +/// | --------- these consecutive jumps will be treated as [4-11] +/// | +/// ``` +#[derive(ErrorStruct, Debug, PartialEq, Eq)] +#[associated_enum(Warning)] +#[warning( + code = "consecutive_jumps", + title = "consecutive jumps in hex pattern `{pattern_ident}`", +)] +#[label( + "these consecutive jumps will be treated as {coalesced_jump}", + coalesced_jump_loc +)] +pub struct ConsecutiveJumps { + report: Report, + pattern_ident: String, + coalesced_jump: String, + coalesced_jump_loc: CodeLoc, } -impl Default for Warnings { - fn default() -> Self { - Self { - warnings: Vec::new(), - max_warnings: 100, - disabled_warnings: HashSet::default(), - } +impl ConsecutiveJumps { + /// Identifier of the pattern containing the consecutive jumps. + #[inline] + pub fn pattern(&self) -> &str { + self.pattern_ident.as_str() } } -impl Warnings { - #[inline] - pub fn is_empty(&self) -> bool { - self.warnings.is_empty() - } +/// A boolean expression may be impossible to match. +/// +/// For instance, the condition `2 of ($a, $b) at 0` is impossible +/// to match, unless that both `$a` and `$b` are the same pattern, +/// or one is a prefix of the other. In most cases this expression +/// is unsatisfiable because two different matches can match at the +/// same file offset. +/// +/// ## Example +/// +/// ```text +/// warning[unsatisfiable_expr]: potentially unsatisfiable expression +/// --> line:6:5 +/// | +/// 6 | 2 of ($*) at 0 +/// | - this implies that multiple patterns must match +/// | ---- but they must match at the same offset +/// | +/// ``` +#[derive(ErrorStruct, Debug, PartialEq, Eq)] +#[associated_enum(Warning)] +#[warning( + code = "unsatisfiable_expr", + title = "potentially unsatisfiable expression" +)] +#[label( + "this implies that multiple patterns must match", + quantifier_loc +)] +#[label( + "but they must match at the same offset", + at_loc +)] +pub struct PotentiallyUnsatisfiableExpression { + report: Report, + quantifier_loc: CodeLoc, + at_loc: CodeLoc, +} - #[inline] - pub fn len(&self) -> usize { - self.warnings.len() - } - #[inline] - pub fn add(&mut self, f: impl FnOnce() -> Warning) { - if self.warnings.len() < self.max_warnings { - let warning = f(); - if !self.disabled_warnings.contains(warning.code()) { - self.warnings.push(warning); - } - } - } +/// A boolean expression always has the same value. +/// +/// This warning indicates that some boolean expression is always true or false, +/// regardless of the data being scanned. +/// +/// ## Example +/// +/// ```text +/// warning[invariant_expr]: invariant boolean expression +/// --> line:6:5 +/// | +/// 6 | 3 of them +/// | --------- this expression is always false +/// | +/// = note: the expression requires 3 matching patterns out of 2 +/// ``` +#[derive(ErrorStruct, Debug, PartialEq, Eq)] +#[associated_enum(Warning)] +#[warning( + code = "invariant_expr", + title = "invariant boolean expression" +)] +#[label( + "this expression is always {expr_value}", + expr_loc +)] +#[note(note)] +pub struct InvariantBooleanExpression { + report: Report, + expr_value: bool, + expr_loc: CodeLoc, + note: Option, +} - /// Enables or disables a specific warning identified by `code`. - /// - /// Returns `true` if the warning was previously enabled, or `false` if - /// otherwise. Returns an error if the code doesn't correspond to any - /// of the existing warnings. - #[inline] - pub fn switch_warning( - &mut self, - code: &str, - enabled: bool, - ) -> Result { - if !Warning::is_valid_code(code) { - return Err(InvalidWarningCode(code.to_string())); - } - if enabled { - Ok(!self.disabled_warnings.remove(code)) - } else { - Ok(self.disabled_warnings.insert(code.to_string())) - } - } +/// A non-boolean expression is being used as a boolean. +/// +/// ## Example +/// +/// ```text +/// warning[non_bool_expr]: non-boolean expression used as boolean +/// --> line:3:14 +/// | +/// 3 | condition: 2 and 3 +/// | - this expression is `integer` but is being used as `bool` +/// | +/// = note: non-zero integers are considered `true`, while zero is `false` +/// ``` +#[derive(ErrorStruct, Debug, PartialEq, Eq)] +#[associated_enum(Warning)] +#[warning( + code = "non_bool_expr", + title = "non-boolean expression used as boolean" +)] +#[label( + "this expression is `{expr_type}` but is being used as `bool`", + expr_loc +)] +#[note(note)] +pub struct NonBooleanAsBoolean { + report: Report, + expr_type: String, + expr_loc: CodeLoc, + note: Option, +} - /// Enable or disables all warnings. - pub fn switch_all_warnings(&mut self, enabled: bool) { - if enabled { - self.disabled_warnings.clear(); - } else { - for c in Warning::all_codes() { - self.disabled_warnings.insert(c.to_string()); - } - } - } +/// Comparison between boolean and integer. +/// +/// This warning indicates that some expression is a comparison between +/// boolean and integer values. +/// +/// ## Example +/// +/// ```text +/// warning[bool_int_comparison]: comparison between boolean and integer +/// --> line:4:13 +/// | +/// 4 | condition: test_proto2.array_bool[0] == 1 +/// | ------------------------------ this comparison can be replaced with: `test_proto2.array_bool[0]` +/// | +/// ``` +#[derive(ErrorStruct, Debug, PartialEq, Eq)] +#[associated_enum(Warning)] +#[warning( + code = "bool_int_comparison", + title = "comparison between boolean and integer" +)] +#[label( + "this comparison can be replaced with: `{replacement}`", + expr_loc +)] +pub struct BooleanIntegerComparison { + report: Report, + replacement: String, + expr_loc: CodeLoc, +} - #[inline] - pub fn as_slice(&self) -> &[Warning] { - self.warnings.as_slice() - } +/// Duplicate import statement. +/// +/// This warning indicates that some module has been imported multiple times. +/// +/// ## Example +/// +/// ```text +/// warning[duplicate_import]: duplicate import statement +/// --> line:1:21 +/// | +/// 1 | import "test_proto2" +/// | -------------------- note: `test_proto2` imported here for the first time +/// 2 | import "test_proto2" +/// | -------------------- duplicate import +/// | +/// ``` +#[derive(ErrorStruct, Debug, PartialEq, Eq)] +#[associated_enum(Warning)] +#[warning( + code = "duplicate_import", + title = "duplicate import statement" +)] +#[label( + "duplicate import", + new_import_loc +)] +#[label( + "`{module_name}` imported here for the first time", + existing_import_loc, + Level::Note +)] +pub struct DuplicateImport { + report: Report, + module_name: String, + new_import_loc: CodeLoc, + existing_import_loc: CodeLoc, +} - pub fn append(&mut self, mut warnings: Self) { - for w in warnings.warnings.drain(0..) { - if self.warnings.len() == self.max_warnings { - break; - } - self.warnings.push(w) - } - } + +/// Redundant case-insensitive modifier for a regular expression. +/// +/// A regular expression can be made case-insensitive in two ways: by using the +/// `nocase` modifier or by appending the `i` suffix to the pattern. Both +/// methods achieve the same result, making it redundant to use them +/// simultaneously. +/// +/// For example, the following patterns are equivalent: +/// +/// ```text +/// $re = /some regexp/i +/// $re = /some regexp/ nocase +/// ``` +/// +/// ## Example +/// +/// ```text +/// warning[redundant_modifier]: redundant case-insensitive modifier +/// --> line:3:15 +/// | +/// 3 | $a = /foo/i nocase +/// | - the `i` suffix indicates that the pattern is case-insensitive +/// | ------ the `nocase` modifier does the same +/// | +/// ``` +#[derive(ErrorStruct, Debug, PartialEq, Eq)] +#[associated_enum(Warning)] +#[warning( + code = "redundant_modifier", + title = "redundant case-insensitive modifier" +)] +#[label( + "the `i` suffix indicates that the pattern is case-insensitive", + i_loc +)] +#[label( + "the `nocase` modifier does the same", + nocase_loc +)] +pub struct RedundantCaseModifier { + report: Report, + nocase_loc: CodeLoc, + i_loc: CodeLoc, } -impl From for Vec { - fn from(value: Warnings) -> Self { - value.warnings - } +/// Some pattern may be potentially slow. +/// +/// This warning indicates that a pattern may be very slow to match, and can +/// degrade rule's the performance. In most cases this is caused by patterns +/// that doesn't contain any large fixed sub-pattern that be used for speeding +/// up the scan. For example, `{00 [1-10] 01}` is very slow because the only +/// fixed sub-patterns (`00` and `01`) are only one byte long. +/// +/// ## Example +/// +/// ```text +/// warning[slow_pattern]: slow pattern +/// --> line:3:5 +/// | +/// 3 | $a = {00 [1-10] 01} +/// | ------------------ this pattern may slow down the scan +/// | +/// ``` +#[derive(ErrorStruct, Debug, PartialEq, Eq)] +#[associated_enum(Warning)] +#[warning( + code = "slow_pattern", + title = "slow pattern" +)] +#[label( + "this pattern may slow down the scan", + pattern_loc +)] +pub struct SlowPattern { + report: Report, + pattern_loc: CodeLoc, } + +/// An unsupported module has been used. +/// +/// If you use [`crate::Compiler::ignore_module`] for telling the compiler +/// that some module is not supported, the compiler will raise this warning +/// when the module is used in some of your rules. +/// +/// ## Example +/// +/// ```text +/// warning[unsupported_module]: module `magic` is not supported +/// --> line:4:5 +/// | +/// 4 | magic.type() +/// | ----- module `magic` used here +/// | +/// = note: the whole rule `foo` will be ignored +/// ``` +#[derive(ErrorStruct, Debug, PartialEq, Eq)] +#[associated_enum(Warning)] +#[warning( + code = "unsupported_module", + title = "module `{module_name}` is not supported" +)] +#[label( + "module `{module_name}` used here", + module_name_loc +)] +#[note(note)] +pub struct IgnoredModule { + report: Report, + module_name: String, + module_name_loc: CodeLoc, + note: Option, +} + +/// A rule indirectly depends on some unsupported module. +/// +/// If you use [`crate::Compiler::ignore_module`] for telling the compiler +/// that some module is not supported, the compiler will raise this warning +/// when a rule `A` uses some rule `B` that uses the module. +/// +/// ## Example +/// +/// ```text +/// warning[ignored_rule]: rule `foo` will be ignored due to an indirect dependency on module `magic` +/// --> line:9:5 +/// | +/// 9 | bar +/// | --- this other rule depends on module `magic`, which is unsupported +/// | +/// ``` +#[derive(ErrorStruct, Debug, PartialEq, Eq)] +#[associated_enum(Warning)] +#[warning( + code = "ignored_rule", + title = "rule `{ignored_rule}` will be ignored due to an indirect dependency on module `{module_name}`" +)] +#[label( + "this other rule depends on module `{module_name}`, which is unsupported", + ignored_rule_loc +)] +pub struct IgnoredRule { + report: Report, + module_name: String, + ignored_rule: String, + ignored_rule_loc: CodeLoc, +} + + diff --git a/lib/src/lib.rs b/lib/src/lib.rs index 36573f149..01a752ec3 100644 --- a/lib/src/lib.rs +++ b/lib/src/lib.rs @@ -44,14 +44,11 @@ assert_eq!(results.matching_rules().len(), 1); #![deny(missing_docs)] pub use compiler::compile; -pub use compiler::CompileError; pub use compiler::Compiler; -pub use compiler::Error; pub use compiler::Rules; -pub use compiler::SerializationError; pub use compiler::SourceCode; -pub use compiler::Warning; +pub use modules::mods; pub use scanner::Match; pub use scanner::Matches; pub use scanner::MatchingRules; @@ -62,14 +59,9 @@ pub use scanner::NonMatchingRules; pub use scanner::Pattern; pub use scanner::Patterns; pub use scanner::Rule; -pub use scanner::ScanError; pub use scanner::ScanResults; pub use scanner::Scanner; - -pub use modules::mods; - pub use variables::Variable; -pub use variables::VariableError; mod compiler; mod modules; @@ -84,6 +76,22 @@ mod wasm; #[cfg(test)] mod tests; +pub mod errors { + //! Errors returned by this crate. + //! + //! This module contains the definitions for all error types returned by this + //! crate. + pub use crate::compiler::errors::*; + pub use crate::compiler::InvalidWarningCode; + pub use crate::scanner::ScanError; + pub use crate::variables::VariableError; +} + +pub mod warnings { + //! Warnings returned while compiling rules. + pub use crate::compiler::warnings::*; +} + mod utils { /// Tries to match `target` as the enum variant `pat`. Returns the /// inner value contained in the variant, or panics if `target` does diff --git a/lib/src/scanner/context.rs b/lib/src/scanner/context.rs index 6bfa05ff5..ed4b8e3bb 100644 --- a/lib/src/scanner/context.rs +++ b/lib/src/scanner/context.rs @@ -30,10 +30,10 @@ use crate::re::fast::FastVM; use crate::re::thompson::PikeVM; use crate::re::Action; use crate::scanner::matches::{Match, PatternMatches, UnconfirmedMatch}; +use crate::scanner::ScanError; use crate::scanner::HEARTBEAT_COUNTER; use crate::types::{Array, Map, Struct}; use crate::wasm::MATCHING_RULES_BITMAP_BASE; -use crate::ScanError; /// Structure that holds information about the current scan. pub(crate) struct ScanContext<'r> { diff --git a/lib/src/scanner/mod.rs b/lib/src/scanner/mod.rs index b3f9c1e50..fe4313dd2 100644 --- a/lib/src/scanner/mod.rs +++ b/lib/src/scanner/mod.rs @@ -48,7 +48,7 @@ mod matches; #[cfg(test)] mod tests; -/// Error returned by [`Scanner::scan`] and [`Scanner::scan_file`]. +/// Error returned when a scan operation fails. #[derive(Error, Debug)] pub enum ScanError { /// The scan was aborted after the timeout period. diff --git a/lib/src/variables.rs b/lib/src/variables.rs index e56c62f86..904b888c6 100644 --- a/lib/src/variables.rs +++ b/lib/src/variables.rs @@ -22,7 +22,7 @@ use crate::types::{Array, TypeValue, Value}; /// implement [`Into`]. pub struct Variable(TypeValue); -/// Errors returned while defining or setting variables. +/// Error returned while defining or setting variables. #[derive(Error, Debug, Eq, PartialEq)] pub enum VariableError { /// The variable has not being defined. Before calling diff --git a/lib/src/wasm/mod.rs b/lib/src/wasm/mod.rs index 0ae5934bb..83ed643bc 100644 --- a/lib/src/wasm/mod.rs +++ b/lib/src/wasm/mod.rs @@ -94,12 +94,11 @@ use yara_x_macros::wasm_export; use crate::compiler::{LiteralId, PatternId, RegexpId, RuleId}; use crate::modules::BUILTIN_MODULES; -use crate::scanner::{RuntimeObjectHandle, ScanContext}; +use crate::scanner::{RuntimeObjectHandle, ScanContext, ScanError}; use crate::types::{ Array, Func, FuncSignature, Map, Struct, TypeValue, Value, }; use crate::wasm::string::RuntimeString; -use crate::ScanError; pub(crate) mod builder; pub(crate) mod string; diff --git a/macros/Cargo.toml b/macros/Cargo.toml index eb826d51f..194a3a2e3 100644 --- a/macros/Cargo.toml +++ b/macros/Cargo.toml @@ -15,5 +15,4 @@ proc-macro = true darling = "0.20.10" syn = { version = "2.0.74", features = ["full", "derive", "parsing", "visit"] } quote = "1.0" -proc-macro2 = "1.0.86" -convert_case = "0.6.0" +proc-macro2 = "1.0.86" \ No newline at end of file diff --git a/macros/src/error.rs b/macros/src/error.rs index e8723f82e..9e44728af 100644 --- a/macros/src/error.rs +++ b/macros/src/error.rs @@ -1,375 +1,346 @@ extern crate proc_macro; -use convert_case::{Case, Casing}; -use proc_macro2::{Span, TokenStream}; -use quote::{quote, TokenStreamExt}; +use proc_macro2::TokenStream; +use quote::quote; use syn::parse::{Parse, ParseStream}; -use syn::punctuated::Punctuated; +use syn::spanned::Spanned; use syn::token::Comma; -use syn::{ - Attribute, Data, DataEnum, DeriveInput, Error, Expr, Fields, Ident, - LitStr, Result, Variant, -}; +use syn::{Data, DeriveInput, Error, Expr, Field, Ident, LitStr, Result}; -pub(crate) fn impl_error_macro(input: DeriveInput) -> Result { - let name = &input.ident; +/// Describes a label in an error/warning message. +#[derive(Debug)] +struct Label { + label_fmt: LitStr, + label_ref: Ident, + level: Option, +} - let (codes, variants, funcs) = match &input.data { - Data::Struct(_) | Data::Union(_) => { - return Err(Error::new( - name.span(), - "macros macro Error can be used with only with enum types" +impl Parse for Label { + /// Parses a label with like the one below. + /// + /// ```text + /// #[label("{error_msg}", error_ref, Level::Info)] + /// ``` + /// + /// The last argument is optional, the default value is `Level::Error`. + fn parse(input: ParseStream) -> Result { + let label_fmt: LitStr = input.parse()?; + let _ = input.parse::()?; + let label_ref: Ident = input.parse()?; + let mut level = None; + if input.peek(Comma) { + input.parse::()?; + level = Some(input.parse::()?); + } + Ok(Label { label_fmt, label_ref, level }) + } +} + +pub(crate) fn impl_error_struct_macro( + input: DeriveInput, +) -> Result { + let fields = + match &input.data { + Data::Struct(s) => &s.fields, + Data::Enum(_) | Data::Union(_) => return Err(Error::new( + input.ident.span(), + "macro ErrorStruct can be used with only with struct types" .to_string(), - )) + )), + }; + + let mut level = None; + let mut code = None; + let mut title = None; + let mut note = None; + let mut associated_enum = None; + let mut labels = Vec::new(); + + for attr in input.attrs { + if attr.path().is_ident("doc") { + // `doc` attributes are ignored, they are actually the + // documentation comments added in front of structures. + continue; + } else if attr.path().is_ident("associated_enum") { + associated_enum = Some(attr.parse_args::()?); + } else if attr.path().is_ident("label") { + labels.push(attr.parse_args::