Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Next skip #84

Merged
merged 10 commits into from
Apr 23, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
19 changes: 18 additions & 1 deletion .github/workflows/ci.yml
Original file line number Diff line number Diff line change
Expand Up @@ -144,6 +144,23 @@ jobs:

- run: cargo fuzz run --fuzz-dir crates/fuzz compare_to_serde --release -- -max_total_time=300s

fuzz-skip:
name: fuzz skip
# we only run this on ubuntu since architecture should make no difference

runs-on: ubuntu-latest

steps:
- uses: actions/checkout@v3

- uses: moonrepo/setup-rust@v1
with:
channel: nightly
cache-target: release
bins: cargo-fuzz

- run: cargo fuzz run --fuzz-dir crates/fuzz compare_skip --release -- -max_total_time=300s

lint:
runs-on: ubuntu-latest
steps:
Expand All @@ -166,7 +183,7 @@ jobs:
# https://github.com/marketplace/actions/alls-green#why used for branch protection checks
check:
if: always()
needs: [test-linux, test-macos, bench, fuzz, lint]
needs: [test-linux, test-macos, bench, fuzz, fuzz-skip, lint]
runs-on: ubuntu-latest
steps:
- name: Decide whether the needed jobs succeeded or failed
Expand Down
6 changes: 6 additions & 0 deletions crates/fuzz/Cargo.toml
Original file line number Diff line number Diff line change
Expand Up @@ -22,3 +22,9 @@ name = "compare_to_serde"
path = "fuzz_targets/compare_to_serde.rs"
test = false
doc = false

[[bin]]
name = "compare_skip"
path = "fuzz_targets/compare_skip.rs"
test = false
doc = false
32 changes: 32 additions & 0 deletions crates/fuzz/fuzz_targets/compare_skip.rs
Original file line number Diff line number Diff line change
@@ -0,0 +1,32 @@
#![no_main]

use jiter::{Jiter, JiterError, JiterErrorType, JsonError, JsonValue};

use libfuzzer_sys::fuzz_target;
fn errors_equal(value_error: &JsonError, jiter_error: &JiterError) {
let jiter_error_type = match &jiter_error.error_type {
JiterErrorType::JsonError(json_error_type) => json_error_type,
JiterErrorType::WrongType { .. } => panic!("Expected JsonError, found WrongType"),
};
assert_eq!(&value_error.error_type, jiter_error_type);
assert_eq!(value_error.index, jiter_error.index);
}

fuzz_target!(|json: String| {
let json_data = json.as_bytes();
match JsonValue::parse(json_data, false) {
Ok(_) => {
let mut jiter = Jiter::new(json_data, false);
jiter.next_skip().unwrap();
jiter.finish().unwrap();
}
Err(json_error) => {
let mut jiter = Jiter::new(json_data, false);
let jiter_error = match jiter.next_skip() {
Ok(_) => jiter.finish().unwrap_err(),
Err(e) => e,
};
errors_equal(&json_error, &jiter_error);
}
};
});
27 changes: 27 additions & 0 deletions crates/jiter/benches/main.rs
Original file line number Diff line number Diff line change
Expand Up @@ -23,6 +23,15 @@ fn jiter_value(path: &str, bench: &mut Bencher) {
})
}

fn jiter_skip(path: &str, bench: &mut Bencher) {
let json = read_file(path);
let json_data = black_box(json.as_bytes());
bench.iter(|| {
let mut jiter = Jiter::new(json_data, false);
jiter.next_skip().unwrap();
})
}

fn jiter_iter_big(path: &str, bench: &mut Bencher) {
let json = read_file(path);
let json_data = black_box(json.as_bytes());
Expand Down Expand Up @@ -211,6 +220,10 @@ macro_rules! test_cases {
jiter_string(&file_path, bench);
}
}
fn [< $file_name _jiter_skip >](bench: &mut Bencher) {
let file_path = format!("./benches/{}.json", stringify!($file_name));
jiter_skip(&file_path, bench);
}

fn [< $file_name _serde_value >](bench: &mut Bencher) {
let file_path = format!("./benches/{}.json", stringify!($file_name));
Expand Down Expand Up @@ -293,51 +306,65 @@ fn lazy_map_lookup_3_50(bench: &mut Bencher) {
benchmark_group!(
benches,
big_jiter_iter,
big_jiter_skip,
big_jiter_value,
big_serde_value,
bigints_array_jiter_iter,
bigints_array_jiter_skip,
bigints_array_jiter_value,
bigints_array_serde_value,
floats_array_jiter_iter,
floats_array_jiter_skip,
floats_array_jiter_value,
floats_array_serde_value,
massive_ints_array_jiter_iter,
massive_ints_array_jiter_skip,
massive_ints_array_jiter_value,
massive_ints_array_serde_value,
medium_response_jiter_iter,
medium_response_jiter_skip,
medium_response_jiter_value,
medium_response_jiter_value_owned,
medium_response_serde_value,
x100_jiter_iter,
x100_jiter_skip,
x100_jiter_value,
x100_serde_iter,
x100_serde_value,
sentence_jiter_iter,
sentence_jiter_skip,
sentence_jiter_value,
sentence_serde_value,
unicode_jiter_iter,
unicode_jiter_skip,
unicode_jiter_value,
unicode_serde_value,
pass1_jiter_iter,
pass1_jiter_skip,
pass1_jiter_value,
pass1_serde_value,
pass2_jiter_iter,
pass2_jiter_skip,
pass2_jiter_value,
pass2_serde_value,
string_array_jiter_iter,
string_array_jiter_skip,
string_array_jiter_value,
string_array_jiter_value_owned,
string_array_serde_value,
true_array_jiter_iter,
true_array_jiter_skip,
true_array_jiter_value,
true_array_serde_value,
true_object_jiter_iter,
true_object_jiter_skip,
true_object_jiter_value,
true_object_serde_value,
lazy_map_lookup_1_10,
lazy_map_lookup_2_20,
lazy_map_lookup_3_50,
short_numbers_jiter_iter,
short_numbers_jiter_skip,
short_numbers_jiter_value,
short_numbers_serde_value,
);
Expand Down
5 changes: 0 additions & 5 deletions crates/jiter/src/errors.rs
Original file line number Diff line number Diff line change
Expand Up @@ -6,10 +6,6 @@ use std::fmt;
/// those expected from `serde_json`.
#[derive(Debug, PartialEq, Eq, Clone, Copy)]
pub enum JsonErrorType {
/// string escape sequences are not supported in this method, usize here is the position within the string
/// that is invalid
StringEscapeNotSupported,

/// float value was found where an int was expected
FloatExpectingInt,

Expand Down Expand Up @@ -82,7 +78,6 @@ impl std::fmt::Display for JsonErrorType {
fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
// Messages for enum members copied from serde_json are unchanged
match self {
Self::StringEscapeNotSupported => f.write_str("string escape sequences are not supported"),
Self::FloatExpectingInt => f.write_str("float value was found where an int was expected"),
Self::EofWhileParsingList => f.write_str("EOF while parsing a list"),
Self::EofWhileParsingObject => f.write_str("EOF while parsing an object"),
Expand Down
37 changes: 36 additions & 1 deletion crates/jiter/src/jiter.rs
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,7 @@ use crate::errors::{json_error, JiterError, JsonType, LinePosition, DEFAULT_RECU
use crate::number_decoder::{NumberAny, NumberFloat, NumberInt, NumberRange};
use crate::parse::{Parser, Peek};
use crate::string_decoder::{StringDecoder, StringDecoderRange, Tape};
use crate::value::{take_value_borrowed, take_value_owned, JsonValue};
use crate::value::{take_value_borrowed, take_value_owned, take_value_skip, JsonValue};
use crate::{JsonError, JsonErrorType};

pub type JiterResult<T> = Result<T, JiterError>;
Expand Down Expand Up @@ -48,6 +48,16 @@ impl<'j> Jiter<'j> {
self.parser.current_position()
}

/// Get the current index of the parser.
pub fn current_index(&self) -> usize {
self.parser.index
}

/// Get a slice of the underlying JSON data from `start` to `current_index`.
pub fn slice_to_current(&self, start: usize) -> &'j [u8] {
&self.data[start..self.current_index()]
}

/// Convert an error index to a [LinePosition].
///
/// # Arguments
Expand Down Expand Up @@ -218,6 +228,31 @@ impl<'j> Jiter<'j> {
.map_err(Into::into)
}

/// Parse the next JSON value, but don't return it.
/// This should be faster than returning the value, useful when you don't care about this value.
/// Error if it is invalid JSON.
///
/// *WARNING:* For performance reasons, this method does not check that strings would be valid UTF-8.
pub fn next_skip(&mut self) -> JiterResult<()> {
let peek = self.peek()?;
self.known_skip(peek)
}

/// Parse the next JSON value, but don't return it. Error if it is invalid JSON.
///
/// # Arguments
/// - `peek`: The [Peek] of the next JSON value.
pub fn known_skip(&mut self, peek: Peek) -> JiterResult<()> {
take_value_skip(
peek,
&mut self.parser,
&mut self.tape,
DEFAULT_RECURSION_LIMIT,
self.allow_inf_nan,
)
.map_err(Into::into)
}

/// Parse the next JSON value and return it as a [JsonValue] with static lifetime. Error if it is invalid JSON.
pub fn next_value_owned(&mut self) -> JiterResult<JsonValue<'static>> {
let peek = self.peek()?;
Expand Down
2 changes: 1 addition & 1 deletion crates/jiter/src/lib.rs
Original file line number Diff line number Diff line change
Expand Up @@ -14,7 +14,7 @@ mod simd_aarch64;
mod string_decoder;
mod value;

pub use errors::{JiterErrorType, JsonError, JsonErrorType, JsonResult, JsonType, LinePosition};
pub use errors::{JiterError, JiterErrorType, JsonError, JsonErrorType, JsonResult, JsonType, LinePosition};
pub use jiter::{Jiter, JiterResult};
pub use lazy_index_map::LazyIndexMap;
pub use number_decoder::{NumberAny, NumberInt};
Expand Down
48 changes: 36 additions & 12 deletions crates/jiter/src/number_decoder.rs
Original file line number Diff line number Diff line change
Expand Up @@ -384,8 +384,8 @@ impl AbstractNumberDecoder for NumberRange {
let end = consume_exponential(data, index)?;
Ok((start..end, end))
}
Some(_) => return json_err!(InvalidNumber, index),
None => return Ok((start..index, index)),
Some(digit) if digit.is_ascii_digit() => json_err!(InvalidNumber, index),
samuelcolvin marked this conversation as resolved.
Show resolved Hide resolved
_ => return Ok((start..index, index)),
};
}
Some(b'I') => {
Expand All @@ -398,25 +398,49 @@ impl AbstractNumberDecoder for NumberRange {
};

index += 1;
while let Some(next) = data.get(index) {
match next {
b'0'..=b'9' => (),
b'.' => {
for _ in 0..18 {
if let Some(digit) = data.get(index) {
if INT_CHAR_MAP[*digit as usize] {
index += 1;
continue;
} else if matches!(digit, b'.') {
index += 1;
let end = consume_decimal(data, index)?;
return Ok((start..end, end));
}
b'e' | b'E' => {
} else if matches!(digit, b'e' | b'E') {
index += 1;
let end = consume_exponential(data, index)?;
return Ok((start..end, end));
}
_ => break,
}
index += 1;
return Ok((start..index, index));
}
loop {
let (chunk, new_index) = IntChunk::parse_big(data, index);
samuelcolvin marked this conversation as resolved.
Show resolved Hide resolved
if (new_index - start) > 4300 {
return json_err!(NumberOutOfRange, start + 4301);
}
match chunk {
IntChunk::Ongoing(_) => {
index = new_index;
}
IntChunk::Done(_) => return Ok((start..new_index, new_index)),
IntChunk::Float => {
return match data.get(new_index) {
Some(b'.') => {
index = new_index + 1;
let end = consume_decimal(data, index)?;
Ok((start..end, end))
}
_ => {
index = new_index + 1;
let end = consume_exponential(data, index)?;
Ok((start..end, end))
}
}
}
}
}

Ok((start..index, index))
}
}

Expand Down
2 changes: 1 addition & 1 deletion crates/jiter/src/parse.rs
Original file line number Diff line number Diff line change
Expand Up @@ -19,7 +19,7 @@ impl Peek {
}

impl Peek {
const fn new(next: u8) -> Self {
pub const fn new(next: u8) -> Self {
samuelcolvin marked this conversation as resolved.
Show resolved Hide resolved
Self(next)
}

Expand Down
44 changes: 22 additions & 22 deletions crates/jiter/src/string_decoder.rs
Original file line number Diff line number Diff line change
Expand Up @@ -327,6 +327,9 @@ fn parse_u4(data: &[u8], mut index: usize) -> JsonResult<(u16, usize)> {
Ok((n, index))
}

/// A string decoder that returns the range of the string.
///
/// *WARNING:* For performance reasons, this decoder does not check that the string would be valid UTF-8.
pub struct StringDecoderRange;

impl<'t, 'j> AbstractStringDecoder<'t, 'j> for StringDecoderRange
Expand All @@ -338,33 +341,30 @@ where
fn decode(data: &'j [u8], mut index: usize, _tape: &'t mut Tape) -> JsonResult<(Self::Output, usize)> {
index += 1;
let start = index;
while let Some(next) = data.get(index) {
match next {
b'"' => {

loop {
index = match decode_chunk(data, index, true)? {
(StringChunk::Quote, _, index) => {
let r = start..index;
index += 1;
return Ok((r, index));
return Ok((r, index + 1));
}
b'\\' => {
index += 1;
if let Some(next_inner) = data.get(index) {
match next_inner {
// these escapes are easy to validate
b'"' | b'\\' | b'/' | b'b' | b'f' | b'n' | b'r' | b't' => (),
// unicode escapes are harder to validate, we just prevent them here
b'u' => return json_err!(StringEscapeNotSupported, index),
_ => return json_err!(InvalidEscape, index),
}
} else {
return json_err!(EofWhileParsingString, index);
(StringChunk::Backslash, _, index) => index,
};
index += 1;
if let Some(next_inner) = data.get(index) {
match next_inner {
// these escapes are easy to validate
b'"' | b'\\' | b'/' | b'b' | b'f' | b'n' | b'r' | b't' => (),
b'u' => {
let (_, new_index) = parse_escape(data, index)?;
index = new_index;
}
index += 1;
}
_ => {
index += 1;
_ => return json_err!(InvalidEscape, index),
}
index += 1;
} else {
return json_err!(EofWhileParsingString, index);
}
}
json_err!(EofWhileParsingString, index)
}
}
Loading
Loading