Skip to content

Commit

Permalink
[WIP]
Browse files Browse the repository at this point in the history
  • Loading branch information
vkhrystiuk-ks committed Nov 30, 2024
1 parent 7f542c4 commit d9f5ae5
Show file tree
Hide file tree
Showing 2 changed files with 155 additions and 32 deletions.
157 changes: 136 additions & 21 deletions src/main/java/liqp/filters/date/FuzzyDateParser.java
Original file line number Diff line number Diff line change
@@ -1,9 +1,11 @@
package liqp.filters.date;

import java.text.DateFormatSymbols;
import java.time.ZoneId;
import java.time.ZonedDateTime;
import java.time.temporal.TemporalAccessor;
import java.util.ArrayList;
import java.util.Arrays;
import java.util.List;
import java.util.Locale;
import java.util.regex.Matcher;
Expand All @@ -20,7 +22,7 @@ public ZonedDateTime parse(String valAsString, Locale locale, ZoneId defaultZone
return zonedDateTime;
}

String pattern = guessPattern(normalized);
String pattern = guessPattern(normalized, locale);

TemporalAccessor temporalAccessor = parseUsingPattern(normalized, pattern, locale);
if (temporalAccessor == null) {
Expand All @@ -30,10 +32,10 @@ public ZonedDateTime parse(String valAsString, Locale locale, ZoneId defaultZone
return getZonedDateTimeFromTemporalAccessor(temporalAccessor, defaultZone);
}

String guessPattern(String normalized) {
String guessPattern(String normalized, Locale locale) {
List<Part> parts = new ArrayList<>();
// we start as one big single unparsed part
DateParseContext ctx = new DateParseContext();
DateParseContext ctx = new DateParseContext(locale);
parts.add(new UnparsedPart(0, normalized.length(), normalized));

while (haveUnparsed(parts)) {
Expand All @@ -55,19 +57,108 @@ private String reconstructPattern(List<Part> parts) {
}

static class DateParseContext {

private final Locale locale;
Boolean hasYear;
}
Boolean hasMonthName;
Boolean hasTime;

static class PatternPair {
final Pattern pattern;
final String formatterPattern;
public DateParseContext(Locale locale) {
this.locale = locale;
}
}
static class PartExtractorResult {
boolean found;
int start;
int end;
}
interface PartExtractor {
PartExtractorResult extract(String source);
String formatterPattern();
}
static class RegexPartExtractor implements PartExtractor {
private final Pattern pattern;
private final String formatterPattern;

PatternPair(Pattern pattern, String formatterPattern) {
this.pattern = pattern;
RegexPartExtractor(String regex, String formatterPattern) {
this.pattern = Pattern.compile(regex);
this.formatterPattern = formatterPattern;
}

@Override
public PartExtractorResult extract(String source) {
Matcher matcher = pattern.matcher(source);
if (matcher.find()) {
PartExtractorResult result = new PartExtractorResult();
result.found = true;
result.start = matcher.start(1);
result.end = matcher.end(1);
return result;
}
return new PartExtractorResult();
}

@Override
public String formatterPattern() {
return formatterPattern;
}
}
PartExtractor plainYearExtractor = new RegexPartExtractor(".*\\b?(\\d{4})\\b?.*", "yyyy");

static class PartExtractorDelegate implements PartExtractor {
private PartExtractor delegate;

@Override
public PartExtractorResult extract(String source) {
return delegate.extract(source);
}

@Override
public String formatterPattern() {
return delegate.formatterPattern();
}
}
static class FullMonthExtractor extends PartExtractorDelegate {
public FullMonthExtractor(Locale locale, String formatterPattern) {
if (locale == null || Locale.ROOT.equals(locale)) {
locale = Locale.US;
}
String[] months = withoutNulls(getMonthsNamesFromLocale(locale));
String monthPattern = String.join("|", months);
super.delegate = new RegexPartExtractor(".*\\b?(" + monthPattern + ")\\b?.*", formatterPattern);
}

protected String[] getMonthsNamesFromLocale(Locale locale) {
return new DateFormatSymbols(locale).getMonths();
}

private static String[] withoutNulls(String[] shortMonths) {
return Arrays.stream(shortMonths)
.filter(month -> month != null && !month.isEmpty())
.toArray(String[]::new);
}
}
static final PatternPair plainYearPair = new PatternPair(Pattern.compile(".*\\b?(\\d{4})\\b?.*"), "yyyy");

private PartExtractor fullMonthExtractor(Locale locale) {
return new FullMonthExtractor(locale, "MMMM");
}

static class ShortMonthExtractor extends FullMonthExtractor {
public ShortMonthExtractor(Locale locale) {
super(locale, "MMM");
}

@Override
protected String[] getMonthsNamesFromLocale(Locale locale) {
return new DateFormatSymbols(locale).getShortMonths();
}
}

private PartExtractor shortMonthExtractor(Locale locale) {
return new ShortMonthExtractor(locale);
}


static class LookupResult {
final List<Part> parts;
final boolean found;
Expand All @@ -78,17 +169,41 @@ static class LookupResult {
}
private List<Part> parsePart(List<Part> parts, DateParseContext ctx) {
if (notSet(ctx.hasYear)) {
LookupResult result = lookup(parts, plainYearPair);
LookupResult result = lookup(parts, plainYearExtractor);
if (result.found) {
ctx.hasYear = true;
return result.parts;
} else {
ctx.hasYear = false;
}
ctx.hasYear = false;
}
if (notSet(ctx.hasMonthName)) {
LookupResult result = lookup(parts, fullMonthExtractor(ctx.locale));
if (result.found) {
ctx.hasMonthName = true;
return result.parts;
}

result = lookup(parts, shortMonthExtractor(ctx.locale));
if (result.found) {
ctx.hasMonthName = true;
return result.parts;
}

ctx.hasMonthName = false;
}

if (notSet(ctx.hasTime)) {
LookupResult result = new LookupResult(parts, false);
if (result.found) {
ctx.hasTime = true;
return result.parts;
}
ctx.hasTime = false;
}
return markAsUnrecognized(parts);
}


private List<Part> markAsUnrecognized(List<Part> parts) {
return parts.stream().map(p -> {
if (p.state() == PartState.UNPARSED) {
Expand All @@ -104,26 +219,26 @@ private boolean notSet(Boolean val) {
}


private LookupResult lookup(List<Part> parts, PatternPair patternPair) {
private LookupResult lookup(List<Part> parts, PartExtractor partExtractor) {
for (int i = 0; i < parts.size(); i++) {
Part part = parts.get(i);

if (part.state() == PartState.UNPARSED) {
String source = part.source();
Matcher matcher = patternPair.pattern.matcher(source);
if (matcher.find()) {
PartExtractorResult per = partExtractor.extract(source);
if (per.found) {
parts.remove(i);

if (matcher.end(1) != source.length()) {
UnparsedPart after = new UnparsedPart(part.start() + matcher.end(1), part.end(), source.substring(matcher.end(1)));
if (per.end != source.length()) {
UnparsedPart after = new UnparsedPart(part.start() + per.end, part.end(), source.substring(per.end));
parts.add(i, after);
}

ParsedPart parsed = new ParsedPart(part.start() + matcher.start(1), part.start() + matcher.end(1), patternPair.formatterPattern);
ParsedPart parsed = new ParsedPart(part.start() + per.start, part.start() + per.end, partExtractor.formatterPattern());
parts.add(i, parsed);

if (matcher.start(1) != 0) {
UnparsedPart before = new UnparsedPart(part.start(), part.start() + matcher.start(1), source.substring(0, matcher.start(1)));
if (per.start != 0) {
UnparsedPart before = new UnparsedPart(part.start(), part.start() + per.start, source.substring(0, per.start));
parts.add(i, before);
}

Expand Down
30 changes: 19 additions & 11 deletions src/test/java/liqp/filters/date/FuzzyDateParserTest.java
Original file line number Diff line number Diff line change
@@ -1,5 +1,6 @@
package liqp.filters.date;

import java.util.Locale;
import org.junit.Test;
import org.junit.runner.RunWith;
import org.junit.runners.Parameterized;
Expand All @@ -11,32 +12,39 @@

@RunWith(Parameterized.class)
public class FuzzyDateParserTest {
private FuzzyDateParser parser = new FuzzyDateParser();

private final String input;
private final String expectedPattern;


private final Locale locale;

@Parameterized.Parameters
public static Collection<Object[]> data() {
return Arrays.asList(new Object[][] {
{ "1995", "yyyy" },
{ " 1995 ", " yyyy "},
{ " 1995", " yyyy"},
{ "1995 ", "yyyy "},
{null, "1995", "yyyy" },
{null, " 1995 ", " yyyy "},
{null, " 1995", " yyyy"},
{null, "1995 ", "yyyy "},
{null, "January 1995", "MMMM yyyy"},
{null, "January 1995 ", "MMMM yyyy "},
{null, " January 1995", " MMMM yyyy"},
{null, " 1995 January", " yyyy MMMM"},
{null, "Jan 1995", "MMM yyyy"},
{null, "1995 Jan ", "yyyy MMM "},
{Locale.GERMAN, "1995 Mai", "yyyy MMMM"},
{Locale.GERMAN, "??1995-----Dez!", "??yyyy-----MMM!"},
});
}

public FuzzyDateParserTest(String input, String expectedPattern) {
public FuzzyDateParserTest(Locale locale, String input, String expectedPattern) {
this.locale = locale;
this.input = input;
this.expectedPattern = expectedPattern;
}

@Test
public void shouldParse() {
String pattern = parser.guessPattern(input);
final FuzzyDateParser parser = new FuzzyDateParser();
String pattern = parser.guessPattern(input, locale);
assertEquals(expectedPattern, pattern);
}

}
}

0 comments on commit d9f5ae5

Please sign in to comment.