From d9f5ae50195819cb9e3c70315d9c52612d114a58 Mon Sep 17 00:00:00 2001 From: Vasyl Khrystiuk Date: Sat, 30 Nov 2024 03:14:52 +0200 Subject: [PATCH] [WIP] --- .../liqp/filters/date/FuzzyDateParser.java | 157 +++++++++++++++--- .../filters/date/FuzzyDateParserTest.java | 30 ++-- 2 files changed, 155 insertions(+), 32 deletions(-) diff --git a/src/main/java/liqp/filters/date/FuzzyDateParser.java b/src/main/java/liqp/filters/date/FuzzyDateParser.java index d8ac31b7..d4bb95b6 100644 --- a/src/main/java/liqp/filters/date/FuzzyDateParser.java +++ b/src/main/java/liqp/filters/date/FuzzyDateParser.java @@ -1,9 +1,11 @@ package liqp.filters.date; +import java.text.DateFormatSymbols; import java.time.ZoneId; import java.time.ZonedDateTime; import java.time.temporal.TemporalAccessor; import java.util.ArrayList; +import java.util.Arrays; import java.util.List; import java.util.Locale; import java.util.regex.Matcher; @@ -20,7 +22,7 @@ public ZonedDateTime parse(String valAsString, Locale locale, ZoneId defaultZone return zonedDateTime; } - String pattern = guessPattern(normalized); + String pattern = guessPattern(normalized, locale); TemporalAccessor temporalAccessor = parseUsingPattern(normalized, pattern, locale); if (temporalAccessor == null) { @@ -30,10 +32,10 @@ public ZonedDateTime parse(String valAsString, Locale locale, ZoneId defaultZone return getZonedDateTimeFromTemporalAccessor(temporalAccessor, defaultZone); } - String guessPattern(String normalized) { + String guessPattern(String normalized, Locale locale) { List parts = new ArrayList<>(); // we start as one big single unparsed part - DateParseContext ctx = new DateParseContext(); + DateParseContext ctx = new DateParseContext(locale); parts.add(new UnparsedPart(0, normalized.length(), normalized)); while (haveUnparsed(parts)) { @@ -55,19 +57,108 @@ private String reconstructPattern(List parts) { } static class DateParseContext { + + private final Locale locale; Boolean hasYear; - } + Boolean hasMonthName; + Boolean hasTime; - static class PatternPair { - final Pattern pattern; - final String formatterPattern; + public DateParseContext(Locale locale) { + this.locale = locale; + } + } + static class PartExtractorResult { + boolean found; + int start; + int end; + } + interface PartExtractor { + PartExtractorResult extract(String source); + String formatterPattern(); + } + static class RegexPartExtractor implements PartExtractor { + private final Pattern pattern; + private final String formatterPattern; - PatternPair(Pattern pattern, String formatterPattern) { - this.pattern = pattern; + RegexPartExtractor(String regex, String formatterPattern) { + this.pattern = Pattern.compile(regex); this.formatterPattern = formatterPattern; } + + @Override + public PartExtractorResult extract(String source) { + Matcher matcher = pattern.matcher(source); + if (matcher.find()) { + PartExtractorResult result = new PartExtractorResult(); + result.found = true; + result.start = matcher.start(1); + result.end = matcher.end(1); + return result; + } + return new PartExtractorResult(); + } + + @Override + public String formatterPattern() { + return formatterPattern; + } + } + PartExtractor plainYearExtractor = new RegexPartExtractor(".*\\b?(\\d{4})\\b?.*", "yyyy"); + + static class PartExtractorDelegate implements PartExtractor { + private PartExtractor delegate; + + @Override + public PartExtractorResult extract(String source) { + return delegate.extract(source); + } + + @Override + public String formatterPattern() { + return delegate.formatterPattern(); + } + } + static class FullMonthExtractor extends PartExtractorDelegate { + public FullMonthExtractor(Locale locale, String formatterPattern) { + if (locale == null || Locale.ROOT.equals(locale)) { + locale = Locale.US; + } + String[] months = withoutNulls(getMonthsNamesFromLocale(locale)); + String monthPattern = String.join("|", months); + super.delegate = new RegexPartExtractor(".*\\b?(" + monthPattern + ")\\b?.*", formatterPattern); + } + + protected String[] getMonthsNamesFromLocale(Locale locale) { + return new DateFormatSymbols(locale).getMonths(); + } + + private static String[] withoutNulls(String[] shortMonths) { + return Arrays.stream(shortMonths) + .filter(month -> month != null && !month.isEmpty()) + .toArray(String[]::new); + } } - static final PatternPair plainYearPair = new PatternPair(Pattern.compile(".*\\b?(\\d{4})\\b?.*"), "yyyy"); + + private PartExtractor fullMonthExtractor(Locale locale) { + return new FullMonthExtractor(locale, "MMMM"); + } + + static class ShortMonthExtractor extends FullMonthExtractor { + public ShortMonthExtractor(Locale locale) { + super(locale, "MMM"); + } + + @Override + protected String[] getMonthsNamesFromLocale(Locale locale) { + return new DateFormatSymbols(locale).getShortMonths(); + } + } + + private PartExtractor shortMonthExtractor(Locale locale) { + return new ShortMonthExtractor(locale); + } + + static class LookupResult { final List parts; final boolean found; @@ -78,17 +169,41 @@ static class LookupResult { } private List parsePart(List parts, DateParseContext ctx) { if (notSet(ctx.hasYear)) { - LookupResult result = lookup(parts, plainYearPair); + LookupResult result = lookup(parts, plainYearExtractor); if (result.found) { ctx.hasYear = true; return result.parts; - } else { - ctx.hasYear = false; } + ctx.hasYear = false; + } + if (notSet(ctx.hasMonthName)) { + LookupResult result = lookup(parts, fullMonthExtractor(ctx.locale)); + if (result.found) { + ctx.hasMonthName = true; + return result.parts; + } + + result = lookup(parts, shortMonthExtractor(ctx.locale)); + if (result.found) { + ctx.hasMonthName = true; + return result.parts; + } + + ctx.hasMonthName = false; + } + + if (notSet(ctx.hasTime)) { + LookupResult result = new LookupResult(parts, false); + if (result.found) { + ctx.hasTime = true; + return result.parts; + } + ctx.hasTime = false; } return markAsUnrecognized(parts); } + private List markAsUnrecognized(List parts) { return parts.stream().map(p -> { if (p.state() == PartState.UNPARSED) { @@ -104,26 +219,26 @@ private boolean notSet(Boolean val) { } - private LookupResult lookup(List parts, PatternPair patternPair) { + private LookupResult lookup(List parts, PartExtractor partExtractor) { for (int i = 0; i < parts.size(); i++) { Part part = parts.get(i); if (part.state() == PartState.UNPARSED) { String source = part.source(); - Matcher matcher = patternPair.pattern.matcher(source); - if (matcher.find()) { + PartExtractorResult per = partExtractor.extract(source); + if (per.found) { parts.remove(i); - if (matcher.end(1) != source.length()) { - UnparsedPart after = new UnparsedPart(part.start() + matcher.end(1), part.end(), source.substring(matcher.end(1))); + if (per.end != source.length()) { + UnparsedPart after = new UnparsedPart(part.start() + per.end, part.end(), source.substring(per.end)); parts.add(i, after); } - ParsedPart parsed = new ParsedPart(part.start() + matcher.start(1), part.start() + matcher.end(1), patternPair.formatterPattern); + ParsedPart parsed = new ParsedPart(part.start() + per.start, part.start() + per.end, partExtractor.formatterPattern()); parts.add(i, parsed); - if (matcher.start(1) != 0) { - UnparsedPart before = new UnparsedPart(part.start(), part.start() + matcher.start(1), source.substring(0, matcher.start(1))); + if (per.start != 0) { + UnparsedPart before = new UnparsedPart(part.start(), part.start() + per.start, source.substring(0, per.start)); parts.add(i, before); } diff --git a/src/test/java/liqp/filters/date/FuzzyDateParserTest.java b/src/test/java/liqp/filters/date/FuzzyDateParserTest.java index 0ecf0f91..8d74e658 100644 --- a/src/test/java/liqp/filters/date/FuzzyDateParserTest.java +++ b/src/test/java/liqp/filters/date/FuzzyDateParserTest.java @@ -1,5 +1,6 @@ package liqp.filters.date; +import java.util.Locale; import org.junit.Test; import org.junit.runner.RunWith; import org.junit.runners.Parameterized; @@ -11,32 +12,39 @@ @RunWith(Parameterized.class) public class FuzzyDateParserTest { - private FuzzyDateParser parser = new FuzzyDateParser(); - private final String input; private final String expectedPattern; - - + private final Locale locale; @Parameterized.Parameters public static Collection data() { return Arrays.asList(new Object[][] { - { "1995", "yyyy" }, - { " 1995 ", " yyyy "}, - { " 1995", " yyyy"}, - { "1995 ", "yyyy "}, + {null, "1995", "yyyy" }, + {null, " 1995 ", " yyyy "}, + {null, " 1995", " yyyy"}, + {null, "1995 ", "yyyy "}, + {null, "January 1995", "MMMM yyyy"}, + {null, "January 1995 ", "MMMM yyyy "}, + {null, " January 1995", " MMMM yyyy"}, + {null, " 1995 January", " yyyy MMMM"}, + {null, "Jan 1995", "MMM yyyy"}, + {null, "1995 Jan ", "yyyy MMM "}, + {Locale.GERMAN, "1995 Mai", "yyyy MMMM"}, + {Locale.GERMAN, "??1995-----Dez!", "??yyyy-----MMM!"}, }); } - public FuzzyDateParserTest(String input, String expectedPattern) { + public FuzzyDateParserTest(Locale locale, String input, String expectedPattern) { + this.locale = locale; this.input = input; this.expectedPattern = expectedPattern; } @Test public void shouldParse() { - String pattern = parser.guessPattern(input); + final FuzzyDateParser parser = new FuzzyDateParser(); + String pattern = parser.guessPattern(input, locale); assertEquals(expectedPattern, pattern); } -} \ No newline at end of file +}