From 264282b374d312f1c90feba39600b9565e7160ef Mon Sep 17 00:00:00 2001 From: Rob Emery Date: Tue, 11 Apr 2017 04:02:06 +0100 Subject: [PATCH] DateRole: Handling 'of' in textual date formats (#3982) * DateRole: Adding failing test for `$nth of $month $year` * DateRole: Now parses "7th of march 2017" * DateRole: Adding failing test for '5 of jan 1993' (no suffix and short month) * DateRole: Handles 5 of march * DateRole: Now also handle "march the 30th, 2017" --- lib/DDG/GoodieRole/Dates.pm | 5 +++-- t/00-roles.t | 3 +++ 2 files changed, 6 insertions(+), 2 deletions(-) diff --git a/lib/DDG/GoodieRole/Dates.pm b/lib/DDG/GoodieRole/Dates.pm index 4724154cd7e..cf8308777b8 100755 --- a/lib/DDG/GoodieRole/Dates.pm +++ b/lib/DDG/GoodieRole/Dates.pm @@ -347,12 +347,12 @@ sub build_datestring_regex { # month-first date formats push @regexes, qr#$date_number$date_delim$short_month$date_delim$full_year#i; push @regexes, qr#$date_number$date_delim$full_month$date_delim$full_year#i; - push @regexes, qr#(?:$short_month|$full_month) $date_number(?: ?$number_suffixes)?[,]? $full_year#i; + push @regexes, qr#(?:$short_month|$full_month) (?:the )?$date_number(?: ?$number_suffixes)?[,]? $full_year#i; # day-first date formats push @regexes, qr#$short_month$date_delim$date_number$date_delim$full_year#i; push @regexes, qr#$full_month$date_delim$date_number$date_delim$full_year#i; - push @regexes, qr#$date_number[,]?(?: ?$number_suffixes)? (?:$short_month|$full_month)[,]? $full_year#i; + push @regexes, qr#$date_number[,]?(?: ?$number_suffixes)? (?:of )?(?:$short_month|$full_month)[,]? $full_year#i; ## Ambiguous, but potentially valid date formats push @regexes, $ambiguous_dates; @@ -391,6 +391,7 @@ sub parse_formatted_datestring_to_date { } $d =~ s/(\d+)\s?$number_suffixes/$1/i; # Strip ordinal text. + $d =~ s/(\sof\s)|(\sthe\s)/ /i; # Strip "of" for "4th of march" and "the" for "march the 4th" $d =~ s/,//i; # Strip any random commas. $d =~ s/($full_month)/$full_month_to_short{lc $1}/i; # Parser deals better with the shorter month names. $d =~ s/^($short_month)$date_delim(\d{1,2})/$2-$short_month_fix{lc $1}/i; # Switching Jun-01-2012 to 01 Jun 2012 diff --git a/t/00-roles.t b/t/00-roles.t index 336572dee5f..bfd18d564bc 100755 --- a/t/00-roles.t +++ b/t/00-roles.t @@ -138,6 +138,9 @@ subtest 'Dates' => sub { '29 feb, 2012' => 1330473600, '2038-01-20' => 2147558400, # 32-bit signed int UNIX epoch ends 2038-01-19 '1780-01-20' => -5994172800, # Way before 32-bit signed int epoch + '5th of january 1993' => 726192000, + '5 of jan 1993' => 726192000, + 'june the 1st 2012' => 1338508800, ); foreach my $test_date (sort keys %dates_to_match) {