Skip to content

Commit

Permalink
Improve rules of srt parser
Browse files Browse the repository at this point in the history
  • Loading branch information
mochi-neko committed Feb 19, 2024
1 parent a249f3a commit 41408a8
Show file tree
Hide file tree
Showing 4 changed files with 155 additions and 54 deletions.
1 change: 0 additions & 1 deletion src/lib.rs
Original file line number Diff line number Diff line change
Expand Up @@ -14,7 +14,6 @@ pub mod vtt;

// Internal modules.
mod error;
mod general;
mod result;
mod str_parser;
mod vtt_parser;
122 changes: 121 additions & 1 deletion src/srt.rs
Original file line number Diff line number Diff line change
Expand Up @@ -64,6 +64,7 @@
use std::cmp::Ordering;
use std::fmt::{Display, Formatter};
use std::ops::{Add, Sub};
use std::time::Duration;

use crate::str_parser;
use crate::ParseResult;
Expand Down Expand Up @@ -269,7 +270,7 @@ pub struct SrtSubtitle {
pub start: SrtTimestamp,
/// The end timestamp.
pub end: SrtTimestamp,
/// The text.
/// The subtitle text.
pub text: Vec<String>,
}

Expand Down Expand Up @@ -356,6 +357,11 @@ impl Display for SrtSubtitle {
/// seconds: 1,
/// ..Default::default()
/// };
///
/// assert_eq!(
/// timestamp.to_string(),
/// "00:00:01,000".to_string()
/// );
/// ```
#[derive(Debug, Clone, Copy, Eq, PartialEq, Ord, PartialOrd, Hash)]
pub struct SrtTimestamp {
Expand Down Expand Up @@ -466,6 +472,35 @@ impl Sub for SrtTimestamp {
}
}

impl From<Duration> for SrtTimestamp {
fn from(duration: Duration) -> Self {
let seconds = duration.as_secs();
let milliseconds = duration.subsec_millis() as u16;

let hours = (seconds / 3600) as u8;
let minutes = ((seconds % 3600) / 60) as u8;
let seconds = (seconds % 60) as u8;

Self {
hours,
minutes,
seconds,
milliseconds,
}
}
}

impl Into<Duration> for SrtTimestamp {
fn into(self) -> Duration {
Duration::new(
u64::from(self.hours) * 3600
+ u64::from(self.minutes) * 60
+ u64::from(self.seconds),
self.milliseconds as u32 * 1_000_000,
)
}
}

#[cfg(test)]
mod tests {
use super::*;
Expand Down Expand Up @@ -765,4 +800,89 @@ This is a test.
};
assert!(subtitle1 < subtitle2);
}

#[test]
fn display_timestamp() {
let timestamp = SrtTimestamp {
hours: 0,
minutes: 0,
seconds: 1,
milliseconds: 0,
};
let displayed = format!("{}", timestamp);
let expected = "00:00:01,000";
assert_eq!(displayed, expected);
}

#[test]
fn from_duration_to_timestamp() {
let duration = Duration::new(1, 0);
let timestamp: SrtTimestamp = duration.into();
assert_eq!(
timestamp,
SrtTimestamp {
hours: 0,
minutes: 0,
seconds: 1,
milliseconds: 0,
}
);

let duration = Duration::new(3661, 0);
let timestamp: SrtTimestamp = duration.into();
assert_eq!(
timestamp,
SrtTimestamp {
hours: 1,
minutes: 1,
seconds: 1,
milliseconds: 0,
}
);

let duration = Duration::new(3661, 500 * 1_000_000);
let timestamp: SrtTimestamp = duration.into();
assert_eq!(
timestamp,
SrtTimestamp {
hours: 1,
minutes: 1,
seconds: 1,
milliseconds: 500,
}
);
}

#[test]
fn from_timestamp_to_duration() {
let timestamp = SrtTimestamp {
hours: 0,
minutes: 0,
seconds: 1,
milliseconds: 0,
};
let duration: Duration = timestamp.into();
assert_eq!(duration, Duration::new(1, 0));

let timestamp = SrtTimestamp {
hours: 1,
minutes: 1,
seconds: 1,
milliseconds: 0,
};
let duration: Duration = timestamp.into();
assert_eq!(duration, Duration::new(3661, 0));

let timestamp = SrtTimestamp {
hours: 1,
minutes: 1,
seconds: 1,
milliseconds: 500,
};
let duration: Duration = timestamp.into();
assert_eq!(
duration,
Duration::new(3661, 500 * 1_000_000)
);
}
}
84 changes: 33 additions & 51 deletions src/str_parser.rs
Original file line number Diff line number Diff line change
Expand Up @@ -12,65 +12,38 @@ peg::parser! {
/// Whitespace.
rule whitespace() = [' ' | '\t']

/// Zero or more whitespaces.
pub(crate) rule whitespaces() = quiet!{ whitespace()* }

/// One or more whitespaces.
pub(crate) rule some_whitespaces() = whitespace()+

/// Newline.
pub(crate) rule newline() = "\r\n" / "\n" / "\r"

/// Zero or more newlines.
pub(crate) rule newlines() = quiet!{ newline()* }

/// One or more newlines.
pub(crate) rule some_newlines() = newline()+

/// Whitespace or newline.
pub(crate) rule whitespace_or_newline() = [' ' | '\t' | '\r' | '\n']

/// Zero or more whitespaces or newlines.
pub(crate) rule whitespaces_or_newlines() = quiet!{ whitespace_or_newline()* }
rule newline() = "\r\n" / "\n" / "\r"

/// One or more whitespaces or one newline.
pub(crate) rule some_whitespaces_or_newline() = some_whitespaces() / newline()

/// One or more whitespaces or newlines.
pub(crate) rule some_whitespaces_or_newlines() = whitespace_or_newline()+
/// Whitespaces and/or newline without two or more newlines.
rule separator() = !(newline() newline()) (whitespace() / newline())+

/// Any-digit number.
pub(crate) rule number() -> u32
rule number() -> u32
= n:$(['0'..='9']+) {?
n.parse().or(Err("number"))
n.parse().or(Err("number in u32"))
}

/// Two-digit number.
pub(crate) rule two_number() -> u8
rule two_number() -> u8
= n:$(['0'..='9']['0'..='9']) {?
n.parse().or(Err("two-digit number"))
}

/// Three-digit number.
pub(crate) rule three_number() -> u16
rule three_number() -> u16
= n:$(['0'..='9']['0'..='9']['0'..='9']) {?
n.parse().or(Err("three-digit number"))
}

/// Multiple lines block of text.
pub(crate) rule multiline() -> Vec<String>
= !whitespace_or_newline() lines:$((!newline() [_])+ newline()) ** ()
{?
let lines = lines
rule multiline() -> Vec<String>
= !((whitespace() / newline())+) lines:$((!newline() [_])+ newline()) ++ ()
{
lines
.iter()
.map(|l| l.to_string().trim().to_string())
.collect::<Vec<String>>();

if !lines.is_empty() {
Ok(lines)
} else {
Err("Empty multiline")
}
.collect::<Vec<String>>()
}

/// Timestamp.
Expand All @@ -87,18 +60,18 @@ peg::parser! {

/// Single subtitle entry.
pub(crate) rule subtitle() -> SrtSubtitle
= whitespaces() sequence:number() whitespaces() newline()
whitespaces() start:timestamp() whitespaces() "-->" whitespaces() end:timestamp() whitespaces() newline()
whitespaces() text:multiline()
= sequence:number() separator()
start:timestamp() whitespace()* "-->" whitespace()* end:timestamp() separator()
text:multiline()
{
SrtSubtitle { sequence, start, end, text }
}

/// The entire SRT.
pub(crate) rule srt() -> SubRip
= whitespaces_or_newlines()
subtitles:subtitle() ** some_whitespaces_or_newlines()
whitespaces_or_newlines()
= (whitespace() / newline())*
subtitles:subtitle() ** (newline()+)
(whitespace() / newline())*
{
SubRip { subtitles, }
}
Expand Down Expand Up @@ -211,7 +184,7 @@ mod test {
// Allow whitespaces.
assert_eq!(
srt_parser::subtitle(
" 1 \n 00:00:00,000 --> 00:00:01,000 \n \tHello, world! \n"
"1 \n 00:00:00,000 --> 00:00:01,000 \n \tHello, world! \n"
)
.unwrap(),
subtitle
Expand All @@ -226,16 +199,20 @@ mod test {
subtitle
);

// Allow separator with whitespaces.
assert_eq!(
srt_parser::subtitle(
"1 00:00:00,000 --> 00:00:01,000 Hello, world!\n"
)
.unwrap(),
subtitle
);

// Prohibit spaces or new lines in header.
assert!(srt_parser::subtitle(
"\n1\n00:00:00,000 --> 00:00:01,000\nHello, world!\n"
)
.is_err());
// Must be separated by newlines.
assert!(srt_parser::subtitle(
"1 00:00:00,000 --> 00:00:01,000 Hello, world!\n"
)
.is_err());
// Prohibit two or more newlines.
assert!(srt_parser::subtitle(
"1\n\n00:00:00,000 --> 00:00:01,000\nHello, world!\n"
Expand All @@ -249,6 +226,11 @@ mod test {
"1\n00:00:00,000 --> 00:00:01,000\nHello, world!\n\n"
)
.is_err());
// Prohibit empty text.
assert!(
srt_parser::subtitle("1\n00:00:00,000 --> 00:00:01,000\n\n")
.is_err()
);
}

#[test]
Expand Down
2 changes: 1 addition & 1 deletion src/general.rs → tests/general.rs
Original file line number Diff line number Diff line change
Expand Up @@ -33,7 +33,7 @@ peg::parser! {
/// Any-digit number.
pub(crate) rule number() -> u32
= n:$(['0'..='9']+) {?
n.parse().or(Err("number"))
n.parse().or(Err("number in u32"))
}

/// Signed integer.
Expand Down

0 comments on commit 41408a8

Please sign in to comment.