diff --git a/languagetool-commandline/src/test/java/org/languagetool/commandline/AbstractSecurityTestCase.java b/languagetool-commandline/src/test/java/org/languagetool/commandline/AbstractSecurityTestCase.java deleted file mode 100644 index 11e0ed1c7888..000000000000 --- a/languagetool-commandline/src/test/java/org/languagetool/commandline/AbstractSecurityTestCase.java +++ /dev/null @@ -1,76 +0,0 @@ -/* LanguageTool, a natural language style checker - * Copyright (C) 2009 Daniel Naber (http://www.danielnaber.de) - * - * This library is free software; you can redistribute it and/or - * modify it under the terms of the GNU Lesser General Public - * License as published by the Free Software Foundation; either - * version 2.1 of the License, or (at your option) any later version. - * - * This library is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU - * Lesser General Public License for more details. - * - * You should have received a copy of the GNU Lesser General Public - * License along with this library; if not, write to the Free Software - * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 - * USA - */ -package org.languagetool.commandline; - -import org.junit.After; -import org.junit.Before; -import org.junit.Test; - -import java.security.Permission; - -/** - * @author Charlie Collins (Maven Test Example from - * http://www.screaming-penguin.com/node/7570) - */ -public class AbstractSecurityTestCase { - - protected static class ExitException extends SecurityException { - private static final long serialVersionUID = 1L; - public final int status; - public ExitException(int status) { - super("There is no escape!"); - this.status = status; - } - } - - private static class NoExitSecurityManager extends SecurityManager { - @Override - public void checkPermission(@SuppressWarnings("unused") Permission perm) { - // allow anything. - } - - @Override - @SuppressWarnings("unused") - public void checkPermission(Permission perm, Object context) { - // allow anything. - } - - @Override - public void checkExit(int status) { - super.checkExit(status); - throw new ExitException(status); - } - } - - @Before - public void setUp() throws Exception { - System.setSecurityManager(new NoExitSecurityManager()); - } - - @After - public void tearDown() throws Exception { - System.setSecurityManager(null); - } - - //get rid of JUnit warning for this helper class - @Test - public void testSomething() { - } - -} diff --git a/languagetool-commandline/src/test/java/org/languagetool/commandline/MainTest.java b/languagetool-commandline/src/test/java/org/languagetool/commandline/MainTest.java index f9e6c3f87917..24b2e4a0b966 100644 --- a/languagetool-commandline/src/test/java/org/languagetool/commandline/MainTest.java +++ b/languagetool-commandline/src/test/java/org/languagetool/commandline/MainTest.java @@ -23,15 +23,7 @@ import org.junit.Before; import org.junit.Test; -import java.io.ByteArrayInputStream; -import java.io.ByteArrayOutputStream; -import java.io.File; -import java.io.FileInputStream; -import java.io.FileOutputStream; -import java.io.IOException; -import java.io.OutputStreamWriter; -import java.io.PrintStream; -import java.io.PrintWriter; +import java.io.*; import java.nio.charset.StandardCharsets; import static org.hamcrest.CoreMatchers.is; @@ -42,7 +34,7 @@ * * @author Marcin Miłkowski */ -public class MainTest extends AbstractSecurityTestCase { +public class MainTest { private final File enTestFile; private final File xxRuleFile; @@ -113,8 +105,7 @@ public MainTest() throws IOException { } @Before - public void setUp() throws Exception { - super.setUp(); + public void setUp() { this.stdout = System.out; this.stderr = System.err; this.out = new ByteArrayOutputStream(); @@ -124,38 +115,33 @@ public void setUp() throws Exception { } @After - public void tearDown() throws Exception { + public void tearDown() { System.setOut(this.stdout); System.setErr(this.stderr); - super.tearDown(); } @Test public void testUsageMessage() throws Exception { - try { - String[] args = {"-h"}; - Main.main(args); - fail("LT should have exited with status 0!"); - } catch (ExitException e) { - String output = new String(this.out.toByteArray()); - assertTrue(output.contains("Usage: java -jar languagetool-commandline.jar")); - assertEquals("Exit status", 1, e.status); - } + Process process = new ProcessBuilder( + "java", "-cp", System.getProperty("java.class.path"), "org.languagetool.commandline.Main", "-h" + ).start(); + int exitCode = process.waitFor(); + String output = readProcessOutput(process); + assertTrue(output.contains("Usage: java -jar languagetool-commandline.jar")); + assertEquals("Exit status", 1, exitCode); } @Test public void testPrintLanguages() throws Exception { - try { - String[] args = {"--list"}; - Main.main(args); - fail("LT should have exited with status 0!"); - } catch (ExitException e) { - String output = new String(this.out.toByteArray()); - assertTrue(output.contains("German")); - assertTrue(output.contains("de-DE")); - assertTrue(output.contains("English")); - assertEquals("Exit status", 0, e.status); - } + Process process = new ProcessBuilder( + "java", "-cp", System.getProperty("java.class.path"), "org.languagetool.commandline.Main", "--list" + ).start(); + int exitCode = process.waitFor(); + String output = readProcessOutput(process); + assertTrue(output.contains("German")); + assertTrue(output.contains("de-DE")); + assertTrue(output.contains("English")); + assertEquals("Exit status", 0, exitCode); } @Test @@ -670,4 +656,14 @@ private String getExternalFalseFriends() { return xxFalseFriendFile.getAbsolutePath(); } + private String readProcessOutput(Process process) throws IOException { + try (BufferedReader reader = new BufferedReader(new InputStreamReader(process.getInputStream()))) { + StringBuilder output = new StringBuilder(); + String line; + while ((line = reader.readLine()) != null) { + output.append(line).append(System.lineSeparator()); + } + return output.toString(); + } + } } diff --git a/languagetool-core/src/main/java/org/languagetool/rules/AbstractUnitConversionRule.java b/languagetool-core/src/main/java/org/languagetool/rules/AbstractUnitConversionRule.java index c69679c5b158..1fb4d08e284c 100644 --- a/languagetool-core/src/main/java/org/languagetool/rules/AbstractUnitConversionRule.java +++ b/languagetool-core/src/main/java/org/languagetool/rules/AbstractUnitConversionRule.java @@ -80,7 +80,7 @@ public abstract class AbstractUnitConversionRule extends Rule { protected static final String NUMBER_REGEX = "(-?[0-9]{1,32}[0-9,.]{0,32})"; protected static final String NUMBER_REGEX_WITH_BOUNDARY = "(-?\\b[0-9]{1,32}[0-9,.]{0,32})"; - protected final Pattern numberRangePart = Pattern.compile(NUMBER_REGEX_WITH_BOUNDARY + "$"); + protected final Pattern numberRangePart = Pattern.compile(NUMBER_REGEX_WITH_BOUNDARY + "$", Pattern.UNICODE_CHARACTER_CLASS); private static final double DELTA = 1e-2; private static final double ROUNDING_DELTA = 0.05; @@ -196,7 +196,7 @@ protected String formatRounded(String s) { */ protected void addUnit(String pattern, Unit base, String symbol, double factor, boolean metric) { Unit unit = base.multiply(factor); - unitPatterns.put(Pattern.compile(NUMBER_REGEX_WITH_BOUNDARY + "[\\s\u00A0]{0," + WHITESPACE_LIMIT + "}" + pattern + "\\b"), unit); + unitPatterns.put(Pattern.compile(NUMBER_REGEX_WITH_BOUNDARY + "[\\s\u00A0]{0," + WHITESPACE_LIMIT + "}" + pattern + "\\b", Pattern.UNICODE_CHARACTER_CLASS), unit); unitSymbols.putIfAbsent(unit, new ArrayList<>()); unitSymbols.get(unit).add(symbol); if (metric && !metricUnits.contains(unit)) { diff --git a/languagetool-core/src/main/java/org/languagetool/rules/patterns/PatternRuleHandler.java b/languagetool-core/src/main/java/org/languagetool/rules/patterns/PatternRuleHandler.java index 2bd150784472..04f43cf8d5c1 100644 --- a/languagetool-core/src/main/java/org/languagetool/rules/patterns/PatternRuleHandler.java +++ b/languagetool-core/src/main/java/org/languagetool/rules/patterns/PatternRuleHandler.java @@ -776,7 +776,8 @@ private void createRules(List elemList, rule.setDistanceTokens(distanceTokens); rule.setXmlLineNumber(xmlLineNumber); } else if (regex.length() > 0) { - int flags = regexCaseSensitive ? 0 : Pattern.CASE_INSENSITIVE|Pattern.UNICODE_CASE; +// int flags = regexCaseSensitive ? 0 : Pattern.CASE_INSENSITIVE|Pattern.UNICODE_CASE; + int flags = regexCaseSensitive ? Pattern.UNICODE_CHARACTER_CLASS : Pattern.CASE_INSENSITIVE|Pattern.UNICODE_CHARACTER_CLASS; String regexStr = regex.toString(); if (regexMode == RegexpMode.SMART) { // Note: it's not that easy to add \b because the regex might look like '(foo)' or '\d' so we cannot just look at the last character diff --git a/languagetool-core/src/main/java/org/languagetool/rules/patterns/RegexAntiPatternFilter.java b/languagetool-core/src/main/java/org/languagetool/rules/patterns/RegexAntiPatternFilter.java index d7e7592e8500..06c4910344d6 100644 --- a/languagetool-core/src/main/java/org/languagetool/rules/patterns/RegexAntiPatternFilter.java +++ b/languagetool-core/src/main/java/org/languagetool/rules/patterns/RegexAntiPatternFilter.java @@ -42,7 +42,7 @@ public RuleMatch acceptRuleMatch(RuleMatch match, Map arguments, } String[] antiPatterns = antiPatternStr.split("\\|"); for (String antiPattern : antiPatterns) { - Pattern p = Pattern.compile(antiPattern); + Pattern p = Pattern.compile(antiPattern, Pattern.UNICODE_CHARACTER_CLASS); Matcher matcher = p.matcher(sentenceObj.getText()); while (matcher.find()) { // partial overlap is enough to filter out a match: diff --git a/languagetool-core/src/main/java/org/languagetool/rules/patterns/XMLRuleHandler.java b/languagetool-core/src/main/java/org/languagetool/rules/patterns/XMLRuleHandler.java index 2dc66dbbf6c8..9a0fa22038a6 100644 --- a/languagetool-core/src/main/java/org/languagetool/rules/patterns/XMLRuleHandler.java +++ b/languagetool-core/src/main/java/org/languagetool/rules/patterns/XMLRuleHandler.java @@ -18,11 +18,18 @@ */ package org.languagetool.rules.patterns; +import java.util.ArrayList; +import java.util.HashMap; +import java.util.List; +import java.util.Locale; +import java.util.Map; +import java.util.ResourceBundle; +import java.util.function.Function; + import org.apache.commons.lang3.ObjectUtils; import org.apache.commons.lang3.tuple.Triple; import org.jetbrains.annotations.Nullable; import org.languagetool.Language; -import org.languagetool.ResourceBundleTools; import org.languagetool.chunking.ChunkTag; import org.languagetool.rules.CorrectExample; import org.languagetool.rules.ErrorTriggeringExample; @@ -35,9 +42,6 @@ import org.xml.sax.SAXParseException; import org.xml.sax.helpers.DefaultHandler; -import java.util.*; -import java.util.function.Function; - /** * XML rule handler that loads rules from XML and throws * exceptions on errors and warnings. diff --git a/languagetool-core/src/main/java/org/languagetool/tokenizers/SrxTools.java b/languagetool-core/src/main/java/org/languagetool/tokenizers/SrxTools.java index 96d7e9c8c146..d5331052bb65 100644 --- a/languagetool-core/src/main/java/org/languagetool/tokenizers/SrxTools.java +++ b/languagetool-core/src/main/java/org/languagetool/tokenizers/SrxTools.java @@ -31,6 +31,7 @@ import java.util.HashMap; import java.util.List; import java.util.Map; +import java.util.regex.Pattern; /** * Tools for loading an SRX tokenizer file. @@ -59,7 +60,8 @@ static SrxDocument createSrxDocument(String path) { static List tokenize(String text, SrxDocument srxDocument, String code) { List segments = new ArrayList<>(); - TextIterator textIterator = new SrxTextIterator(srxDocument, code, text); + Map parserParameters = Map.of(SrxTextIterator.DEFAULT_PATTERN_FLAGS_PARAMETER, Pattern.UNICODE_CHARACTER_CLASS); + TextIterator textIterator = new SrxTextIterator(srxDocument, code, text, parserParameters); while (textIterator.hasNext()) { segments.add(textIterator.next()); } diff --git a/languagetool-core/src/main/resources/org/languagetool/resource/segment.srx b/languagetool-core/src/main/resources/org/languagetool/resource/segment.srx index 78d9dc15c81c..bd6beff2aa38 100644 --- a/languagetool-core/src/main/resources/org/languagetool/resource/segment.srx +++ b/languagetool-core/src/main/resources/org/languagetool/resource/segment.srx @@ -5744,7 +5744,7 @@ -(?U)\b(в|у|на|за|з|із|зі|зо)(\.\.\.|…)[\h\v]* +\b(в|у|на|за|з|із|зі|зо)(\.\.\.|…)[\h\v]* \p{Lu} @@ -5757,12 +5757,12 @@ -(?U)\b\d{1,3}\.[\h]+ +\b\d{1,3}\.[\h]+ \p{Ll}|\p{Lu}{2,} -(?U)\b\p{Ll}+[.!?][\h\v]* +\b\p{Ll}+[.!?][\h\v]* \h*(([\(«]|[\[‐-―-][\h\v]*)?\p{Ll}) @@ -5771,17 +5771,17 @@ -(?U)\b\p{L}{1,2}\. +\b\p{L}{1,2}\. \p{L}{1,2}\. -(?U)\b[\u00A0\u202F]?[A-Z]\.[\h\v]? +\b[\u00A0\u202F]?[A-Z]\.[\h\v]? [A-Z][a-zA-Z'’.-]|[А-ЯІЇЄҐ]\. -(?U)(^[\h\v]*|\([\h\v]*|[«„"]|(\b[А-ЯІЇЄҐACEIHOPX]\.-))[А-ЯІЇЄҐA-Z]\.[\h\v]* +(^[\h\v]*|\([\h\v]*|[«„"]|(\b[А-ЯІЇЄҐACEIHOPX]\.-))[А-ЯІЇЄҐA-Z]\.[\h\v]* @@ -5805,12 +5805,12 @@ а до лютого 2020 р. — затвердити --> -(?U)\b([0-9]{2}|[0-9]{4})[\h\v]+р\.[\h\v]+ +\b([0-9]{2}|[0-9]{4})[\h\v]+р\.[\h\v]+ [\h\v]*[№0-9‐-―-] -(?U)(?<!\d[\h]*)\bр\.[\h\v]* +(?<!\d[\h]*)\bр\.[\h\v]* [\h]*(?!(На|Але|Так?)[\h\v]+)[А-ЯІЇЄҐA-Z][^\h] @@ -5825,29 +5825,29 @@ -(?U)\b([0-9]0|[0-9]{3}0)(-[мх])?рр\.[\h\v]* +\b([0-9]0|[0-9]{3}0)(-[мх])?рр\.[\h\v]* -(?U)\b(тис|млн|млрд|грн)\.[\h\v]* +\b(тис|млн|млрд|грн)\.[\h\v]* [\h\v]*(\d|[КМ]Вт) -(?U)\b(укр|рос|англ?|амер|італ|ісп|нім|фр(анц)?|лат|грец(ьк)?)\.[\h\v]* +\b(укр|рос|англ?|амер|італ|ісп|нім|фр(анц)?|лат|грец(ьк)?)\.[\h\v]* -(?U)\b(абз|арк|ауд|бл|буд|бульв|вул|держ|дод|зав|зб|зв|зовн|екон|к|кв|канд|кн|напр|нпр|нац|обл|оп|пл|пол|поч|пп|пор|просп|розд|стор|табл|[Тт]]ел|ч|част)\.[\h\v]* +\b(абз|арк|ауд|бл|буд|бульв|вул|держ|дод|зав|зб|зв|зовн|екон|к|кв|канд|кн|напр|нпр|нац|обл|оп|пл|пол|поч|пп|пор|просп|розд|стор|табл|[Тт]ел|ч|част)\.[\h\v]* -(?U)\b(кін)\.[\h\v]* +\b(кін)\.[\h\v]* [а-яіїєґ0-9IXV]|[ІХ]+\b -(?U)\b[сС]т\.[\h\v] +\b[сС]т\.[\h\v] [\h]*(?!([АВУОІЄ]|На|Але|Так?)[\h\v]) @@ -5856,21 +5856,21 @@ -(?U)\bнар\.[\h\v]* +\bнар\.[\h\v]* ([0-9]|бл\.|арт\.) -(?U)\bдол\.[\h\v]* +\bдол\.[\h\v]* США -(?U)(?<!т\.[\h\v]?)\b[пд]\.[\h\v]* +(?<!т\.[\h\v]?)\b[пд]\.[\h\v]* -(?U)\b(див)\.[\h\v] +\b(див)\.[\h\v] [\h\v]*[^А-ЯІЇЄҐ] @@ -5882,20 +5882,20 @@ України (див. Зимові походи) --> -(?U)(\([^)]*|\[[^\]]*|,[\h\v]*)\b(див)\.[\h\v]* +(\([^)]*|\[[^\]]*|,[\h\v]*)\b(див)\.[\h\v]* -(?U)\b(ап|[Аа]кад|[Пп]роф|[Дд]оц|[Аа]сист|[Рр]еж|[Аа]рх|[Сс]вв?|о|оз|ім|інж|дир|тов|упоряд|тт|чл\.-кор|[Пп]реп|[сС]вт)\.[\h\v]* +\b(ап|[Аа]кад|[Пп]роф|[Дд]оц|[Аа]сист|[Рр]еж|[Аа]рх|[Сс]вв?|о|оз|ім|інж|дир|тов|упоряд|тт|чл\.-кор|[Пп]реп|[сС]вт)\.[\h\v]* [\h\v]*[А-ЯІЇЄҐA-Z] -(?U)(?<![іи]\s+)\bдр\.[\h\v]* +(?<![іи]\s+)\bдр\.[\h\v]* [\h\v]*[А-ЯІЇЄҐ] -(?U)\bМан\.[\h\v]* +\bМан\.[\h\v]* [\h\v]*([Сс]іті|[Юю]н) @@ -5905,7 +5905,7 @@ -(?U)\b([Аа]рт|[Мм]ал|[Рр]ис|[Сс]пр)\.[\h\v]* +\b([Аа]рт|[Мм]ал|[Рр]ис|[Сс]пр)\.[\h\v]* [\h\v]*(№[\h\v]*)?[0-9] @@ -5915,7 +5915,7 @@ -(?U)(?<!\d[\h\v]*)\bм\.[\h\v]* +(?<!\d[\h\v]*)\bм\.[\h\v]* [А-ЯІЇЄҐ][а-яіїєґ'] @@ -5939,7 +5939,7 @@ [\h\v]*[‐-―-][\h\v]*([Рр]ед|[Аа]вт)\.[\h\v]*[\)\]] -(?U)\b([Рр]ед)\.[\h\v]* +\b([Рр]ед)\.[\h\v]* [А-ЯІЇЄҐ] diff --git a/languagetool-language-modules/ca/src/main/java/org/languagetool/language/Catalan.java b/languagetool-language-modules/ca/src/main/java/org/languagetool/language/Catalan.java index ef481fff1f57..6b2c6cd5498c 100644 --- a/languagetool-language-modules/ca/src/main/java/org/languagetool/language/Catalan.java +++ b/languagetool-language-modules/ca/src/main/java/org/languagetool/language/Catalan.java @@ -45,9 +45,9 @@ public class Catalan extends Language { - private static final Pattern PATTERN_1 = compile("(\\b[lmnstdLMNSTD])'"); - private static final Pattern PATTERN_2 = compile("(\\b[lmnstdLMNSTD])’\""); - private static final Pattern PATTERN_3 = compile("(\\b[lmnstdLMNSTD])’'"); + private static final Pattern PATTERN_1 = compile("(\\b[lmnstdLMNSTD])'", Pattern.UNICODE_CHARACTER_CLASS); + private static final Pattern PATTERN_2 = compile("(\\b[lmnstdLMNSTD])’\"", Pattern.UNICODE_CHARACTER_CLASS); + private static final Pattern PATTERN_3 = compile("(\\b[lmnstdLMNSTD])’'", Pattern.UNICODE_CHARACTER_CLASS); @Override public String getName() { @@ -345,7 +345,7 @@ public SpellingCheckRule createDefaultSpellingRule(ResourceBundle messages) thro return new MorfologikCatalanSpellerRule(messages, this, null, Collections.emptyList()); } - private static final Pattern CA_OLD_DIACRITICS = compile(".*\\b(sóc|dóna|dónes|vénen|véns|fóra)\\b.*",Pattern.CASE_INSENSITIVE|Pattern.UNICODE_CASE); + private static final Pattern CA_OLD_DIACRITICS = compile(".*\\b(sóc|dóna|dónes|vénen|véns|fóra)\\b.*",Pattern.CASE_INSENSITIVE|Pattern.UNICODE_CHARACTER_CLASS); private RuleMatch adjustCatalanMatch(RuleMatch ruleMatch, Set enabledRules) { String errorStr = ruleMatch.getOriginalErrorStr(); @@ -419,27 +419,27 @@ private String removeOldDiacritics(String s) { .replace("Fóra", "Fora"); } - private static final Pattern CA_CONTRACTIONS = compile("\\b([Aa]|[Dd]e) e(ls?)\\b"); - private static final Pattern CA_APOSTROPHES1 = compile("\\b([LDNSTMldnstm]['’]) "); + private static final Pattern CA_CONTRACTIONS = compile("\\b([Aa]|[Dd]e) e(ls?)\\b", Pattern.UNICODE_CHARACTER_CLASS); + private static final Pattern CA_APOSTROPHES1 = compile("\\b([LDNSTMldnstm]['’]) ", Pattern.UNICODE_CHARACTER_CLASS); // exceptions: l'FBI, l'statu quo - private static final Pattern CA_APOSTROPHES2 = compile("\\b([mtlsn])['’]([^1haeiouáàèéíòóúA-ZÀÈÉÍÒÓÚ“«\"])"); + private static final Pattern CA_APOSTROPHES2 = compile("\\b([mtlsn])['’]([^1haeiouáàèéíòóúA-ZÀÈÉÍÒÓÚ“«\"])", Pattern.UNICODE_CHARACTER_CLASS); // exceptions: el iogurt, la essa private static final Pattern CA_APOSTROPHES3 = compile("\\be?([mtsldn])e? (h?[aeiouàèéíòóú])", - Pattern.CASE_INSENSITIVE | Pattern.UNICODE_CASE); + Pattern.CASE_INSENSITIVE | Pattern.UNICODE_CHARACTER_CLASS); private static final Pattern CA_APOSTROPHES4 = compile("\\b(l)a ([aeoàúèéí][^ ])", - Pattern.CASE_INSENSITIVE | Pattern.UNICODE_CASE); + Pattern.CASE_INSENSITIVE | Pattern.UNICODE_CHARACTER_CLASS); private static final Pattern CA_APOSTROPHES5 = compile("\\b([mts]e) (['’])", - Pattern.CASE_INSENSITIVE | Pattern.UNICODE_CASE); + Pattern.CASE_INSENSITIVE | Pattern.UNICODE_CHARACTER_CLASS); private static final Pattern CA_APOSTROPHES6 = compile("\\bs'e(ns|ls)\\b", - Pattern.CASE_INSENSITIVE | Pattern.UNICODE_CASE); + Pattern.CASE_INSENSITIVE | Pattern.UNICODE_CHARACTER_CLASS); private static final Pattern CA_APOSTROPHES7 = compile("\\b(de|a)l (h?[aeoàúèéí][^ ])", - Pattern.CASE_INSENSITIVE | Pattern.UNICODE_CASE); + Pattern.CASE_INSENSITIVE | Pattern.UNICODE_CHARACTER_CLASS); private static final Pattern POSSESSIUS_v = compile("\\b([mtsMTS]e)v(a|es)\\b", - Pattern.UNICODE_CASE); + Pattern.UNICODE_CHARACTER_CLASS); private static final Pattern POSSESSIUS_V = compile("\\b([MTS]E)V(A|ES)\\b", - Pattern.UNICODE_CASE); + Pattern.UNICODE_CHARACTER_CLASS); private static final Pattern CA_REMOVE_SPACES = compile("\\b(a|de|pe) (ls? )", - Pattern.CASE_INSENSITIVE | Pattern.UNICODE_CASE); + Pattern.CASE_INSENSITIVE | Pattern.UNICODE_CHARACTER_CLASS); @Override public String adaptSuggestion(String s) { diff --git a/languagetool-language-modules/ca/src/main/java/org/languagetool/rules/ca/PronomsFeblesHelper.java b/languagetool-language-modules/ca/src/main/java/org/languagetool/rules/ca/PronomsFeblesHelper.java index fa5d8439a699..169c1f10b937 100644 --- a/languagetool-language-modules/ca/src/main/java/org/languagetool/rules/ca/PronomsFeblesHelper.java +++ b/languagetool-language-modules/ca/src/main/java/org/languagetool/rules/ca/PronomsFeblesHelper.java @@ -553,7 +553,7 @@ public static String convertPronounsForIntransitiveVerb(String s) { private static Pattern pronoun_wrong_apostrophation = Pattern.compile("([mts])'([^aeiouh].*)", Pattern.CASE_INSENSITIVE); private static Pattern pronoun_missing_apostrophation = Pattern.compile("(.*)\\be([stm]) (h?[aeiouh].*)", - Pattern.CASE_INSENSITIVE); + Pattern.CASE_INSENSITIVE|Pattern.UNICODE_CHARACTER_CLASS); private static Pattern pronoun_wrong_hypphen = Pattern.compile("(.*)(-[stm])e-(h[oi])", Pattern.CASE_INSENSITIVE); diff --git a/languagetool-language-modules/de/src/main/java/org/languagetool/language/German.java b/languagetool-language-modules/de/src/main/java/org/languagetool/language/German.java index a9e70b6dc6c9..287c770c37b7 100644 --- a/languagetool-language-modules/de/src/main/java/org/languagetool/language/German.java +++ b/languagetool-language-modules/de/src/main/java/org/languagetool/language/German.java @@ -55,7 +55,7 @@ */ public class German extends Language implements AutoCloseable { - private static final Pattern TYPOGRAPHY_PATTERN = compile("\\b([a-zA-Z]\\.)([a-zA-Z]\\.)"); + private static final Pattern TYPOGRAPHY_PATTERN = compile("\\b([a-zA-Z]\\.)([a-zA-Z]\\.)", Pattern.UNICODE_CHARACTER_CLASS); private static final Pattern AI_DE_GGEC_MISSING_PUNCT = compile("AI_DE_GGEC_MISSING_PUNCTUATION_\\d+_DASH_J(_|AE)HRIG|AI_DE_GGEC_REPLACEMENT_CONFUSION", Pattern.CASE_INSENSITIVE); diff --git a/languagetool-language-modules/de/src/main/resources/org/languagetool/rules/de/grammar.xml b/languagetool-language-modules/de/src/main/resources/org/languagetool/rules/de/grammar.xml index 70eabb69ccb8..82f94c6d2fd7 100644 --- a/languagetool-language-modules/de/src/main/resources/org/languagetool/rules/de/grammar.xml +++ b/languagetool-language-modules/de/src/main/resources/org/languagetool/rules/de/grammar.xml @@ -79455,7 +79455,7 @@ Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301, USA Dr.phil. Hans Müller - (s|S)\.\s?(o|u)\. + (s|S)\.[ \t]?(o|u)\. &glalong; \1. \2. @@ -79464,7 +79464,7 @@ Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301, USA Das ist auch nicht gut, s. o. - (d|D)\.\s?h\. + (d|D)\.[ \t]?h\. &glalong; \1. h. @@ -79472,7 +79472,7 @@ Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301, USA Das ist falsch, d.h. schlecht. - (m|M)\.\s?E\. + (m|M)\.[ \t]?E\. &glalong; \1. E. @@ -79488,7 +79488,7 @@ Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301, USA Hans Müller, M.A. - (z|Z)\.\s?(B|K|T|Zt|Hd?)\. + (z|Z)\.[ \t]?(B|K|T|Zt|Hd?)\. &glalong; \1. \2. @@ -79498,7 +79498,7 @@ Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301, USA Das ist z. B. auch nicht gut. - (v|n)\.\s?Chr\. + (v|n)\.[ \t]?Chr\. &glalong; \1. Chr. @@ -79514,7 +79514,7 @@ Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301, USA Das Seminar beginnt um 16 Uhr c.t. - (u|o|i)\.\s?(ö|ä|a|dgl)\. + (u|o|i)\.[ \t]?(ö|ä|a|dgl)\. &glalong; \1. \2. @@ -79525,7 +79525,7 @@ Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301, USA Das hier u. dgl. ist auch nicht gut. - (e|i|n)\.\s?V\. + (e|i|n)\.[ \t]?V\. &glalong; \1. V. @@ -79534,7 +79534,7 @@ Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301, USA Sprechstunde n.V. - (u)\.\s?(v)\.\s?(m|a)\. + (u)\.\s?(v)\.[ \t]?(m|a)\. &glalong; \1. \2. \3. https://languagetool.org/insights/de/beitrag/vielmehr-viel-mehr/ @@ -79569,7 +79569,7 @@ Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301, USA Vgl. Radebrecher, a.a.O. - \bp\.\s?a\. + \bp\.[ \t]?a\. &glalong; p. a. https://de.wiktionary.org/wiki/p._a. @@ -79577,7 +79577,7 @@ Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301, USA Hier gibt es eine Rendite von 2 % p.a. - Dr\.\s?med\.\s?(dent|vet)\. + Dr\.\s?med\.[ \t]?(dent|vet)\. &glalong; Dr. med. \1. @@ -79587,7 +79587,7 @@ Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301, USA Dr. med.vet. Hans Müller - Dr\.\s?rer\.\s?(nat|pol)\. + Dr\.\s?rer\.[ \t]?(nat|pol)\. &glalong; Dr. rer. \1. diff --git a/languagetool-language-modules/es/src/main/java/org/languagetool/language/Spanish.java b/languagetool-language-modules/es/src/main/java/org/languagetool/language/Spanish.java index 58bb5bfda4a2..2cfc141199d4 100644 --- a/languagetool-language-modules/es/src/main/java/org/languagetool/language/Spanish.java +++ b/languagetool-language-modules/es/src/main/java/org/languagetool/language/Spanish.java @@ -315,7 +315,7 @@ public boolean hasMinMatchesRules() { return true; } - private static final Pattern ES_CONTRACTIONS = Pattern.compile("\\b([Aa]|[Dd]e) e(l)\\b"); + private static final Pattern ES_CONTRACTIONS = Pattern.compile("\\b([Aa]|[Dd]e) e(l)\\b", Pattern.UNICODE_CHARACTER_CLASS); @Override public String adaptSuggestion(String replacement) { diff --git a/languagetool-language-modules/es/src/main/java/org/languagetool/tokenizers/es/SpanishWordTokenizer.java b/languagetool-language-modules/es/src/main/java/org/languagetool/tokenizers/es/SpanishWordTokenizer.java index 7b7a114f356f..0bd7594ecec2 100644 --- a/languagetool-language-modules/es/src/main/java/org/languagetool/tokenizers/es/SpanishWordTokenizer.java +++ b/languagetool-language-modules/es/src/main/java/org/languagetool/tokenizers/es/SpanishWordTokenizer.java @@ -43,7 +43,7 @@ public class SpanishWordTokenizer extends WordTokenizer { // decimal comma between digits private static final Pattern DECIMAL_COMMA= Pattern.compile("([\\d]),([\\d])",Pattern.CASE_INSENSITIVE|Pattern.UNICODE_CASE); // ordinals - private static final Pattern ORDINAL_POINT= Pattern.compile("\\b([\\d]+)\\.(º|ª|o|a|er|os|as)\\b",Pattern.CASE_INSENSITIVE|Pattern.UNICODE_CASE); + private static final Pattern ORDINAL_POINT= Pattern.compile("\\b([\\d]+)\\.(º|ª|o|a|er|os|as)\\b",Pattern.CASE_INSENSITIVE|Pattern.UNICODE_CHARACTER_CLASS); private static final Pattern PATTERN_1 = Pattern.compile("xxES_DECIMAL_POINTxx", Pattern.LITERAL); private static final Pattern PATTERN_2 = Pattern.compile("xxES_DECIMAL_COMMAxx", Pattern.LITERAL); private static final Pattern PATTERN_3 = Pattern.compile("xxES_ORDINAL_POINTxx", Pattern.LITERAL); diff --git a/languagetool-language-modules/fr/src/main/java/org/languagetool/language/French.java b/languagetool-language-modules/fr/src/main/java/org/languagetool/language/French.java index ceebef3a2005..3d431644aa81 100644 --- a/languagetool-language-modules/fr/src/main/java/org/languagetool/language/French.java +++ b/languagetool-language-modules/fr/src/main/java/org/languagetool/language/French.java @@ -50,9 +50,9 @@ public class French extends Language implements AutoCloseable { private static final String BEFORE_APOS = "([cjnmtsldCJNMTSLD]|qu|jusqu|lorsqu|puisqu|quoiqu|Qu|Jusqu|Lorsqu|Puisqu|Quoiqu|QU|JUSQU|LORSQU|PUISQU|QUOIQU)"; - private static final Pattern BEFORE_APOS_PATTERN_1 = compile("(\\b" + BEFORE_APOS + ")'"); - private static final Pattern BEFORE_APOS_PATTERN_2 = compile("(\\b" + BEFORE_APOS + ")’\""); - private static final Pattern BEFORE_APOS_PATTERN_3 = compile("(\\b" + BEFORE_APOS + ")’'"); + private static final Pattern BEFORE_APOS_PATTERN_1 = compile("(\\b" + BEFORE_APOS + ")'", Pattern.UNICODE_CHARACTER_CLASS); + private static final Pattern BEFORE_APOS_PATTERN_2 = compile("(\\b" + BEFORE_APOS + ")’\"", Pattern.UNICODE_CHARACTER_CLASS); + private static final Pattern BEFORE_APOS_PATTERN_3 = compile("(\\b" + BEFORE_APOS + ")’'", Pattern.UNICODE_CHARACTER_CLASS); private static final Pattern TYPOGRAPHY_PATTERN_1 = compile("\u00a0;"); private static final Pattern TYPOGRAPHY_PATTERN_2 = compile("\u00a0!"); diff --git a/languagetool-language-modules/fr/src/main/java/org/languagetool/rules/fr/MakeContractionsFilter.java b/languagetool-language-modules/fr/src/main/java/org/languagetool/rules/fr/MakeContractionsFilter.java index 3837ddd01816..740de82b848e 100644 --- a/languagetool-language-modules/fr/src/main/java/org/languagetool/rules/fr/MakeContractionsFilter.java +++ b/languagetool-language-modules/fr/src/main/java/org/languagetool/rules/fr/MakeContractionsFilter.java @@ -26,10 +26,10 @@ public class MakeContractionsFilter extends AbstractMakeContractionsFilter { - private static final Pattern DE_LE = Pattern.compile("\\bde le\\b", Pattern.CASE_INSENSITIVE | Pattern.UNICODE_CASE); - private static final Pattern A_LE = Pattern.compile("\\bà le\\b", Pattern.CASE_INSENSITIVE | Pattern.UNICODE_CASE); - private static final Pattern DE_LES = Pattern.compile("\\bde les\\b", Pattern.CASE_INSENSITIVE | Pattern.UNICODE_CASE); - private static final Pattern A_LES = Pattern.compile("\\bà les\\b", Pattern.CASE_INSENSITIVE | Pattern.UNICODE_CASE); + private static final Pattern DE_LE = Pattern.compile("\\bde le\\b", Pattern.CASE_INSENSITIVE | Pattern.UNICODE_CHARACTER_CLASS); + private static final Pattern A_LE = Pattern.compile("\\bà le\\b", Pattern.CASE_INSENSITIVE | Pattern.UNICODE_CHARACTER_CLASS); + private static final Pattern DE_LES = Pattern.compile("\\bde les\\b", Pattern.CASE_INSENSITIVE | Pattern.UNICODE_CHARACTER_CLASS); + private static final Pattern A_LES = Pattern.compile("\\bà les\\b", Pattern.CASE_INSENSITIVE | Pattern.UNICODE_CHARACTER_CLASS); protected String fixContractions(String suggestion) { Matcher matcher = DE_LE.matcher(suggestion); diff --git a/languagetool-language-modules/fr/src/main/java/org/languagetool/tokenizers/fr/FrenchWordTokenizer.java b/languagetool-language-modules/fr/src/main/java/org/languagetool/tokenizers/fr/FrenchWordTokenizer.java index 7331cd1d9d70..fed9ca788a57 100644 --- a/languagetool-language-modules/fr/src/main/java/org/languagetool/tokenizers/fr/FrenchWordTokenizer.java +++ b/languagetool-language-modules/fr/src/main/java/org/languagetool/tokenizers/fr/FrenchWordTokenizer.java @@ -64,9 +64,9 @@ public class FrenchWordTokenizer extends WordTokenizer { private static final Pattern SPACE_DIGITS0 = Pattern.compile("([\\d]{4}) ", Pattern.CASE_INSENSITIVE | Pattern.UNICODE_CASE); private static final Pattern SPACE_DIGITS = Pattern.compile("([\\d]) ([\\d][\\d][\\d])\\b", - Pattern.CASE_INSENSITIVE | Pattern.UNICODE_CASE); + Pattern.CASE_INSENSITIVE | Pattern.UNICODE_CHARACTER_CLASS); private static final Pattern SPACE_DIGITS2 = Pattern.compile("([\\d]) ([\\d][\\d][\\d]) ([\\d][\\d][\\d])\\b", - Pattern.CASE_INSENSITIVE | Pattern.UNICODE_CASE); + Pattern.CASE_INSENSITIVE | Pattern.UNICODE_CHARACTER_CLASS); private static final List doNotSplit = Arrays.asList("mers-cov", "mcgraw-hill", "sars-cov-2", "sars-cov", "ph-metre", "ph-metres", "anti-ivg", "anti-uv", "anti-vih", "al-qaïda", "c'est-à-dire", "add-on", "add-ons", diff --git a/languagetool-language-modules/pt/src/main/resources/org/languagetool/resource/pt/entities/hyphenised.ent b/languagetool-language-modules/pt/src/main/resources/org/languagetool/resource/pt/entities/hyphenised.ent index 8b17a479a3a0..fcc36aa2bbc3 100644 --- a/languagetool-language-modules/pt/src/main/resources/org/languagetool/resource/pt/entities/hyphenised.ent +++ b/languagetool-language-modules/pt/src/main/resources/org/languagetool/resource/pt/entities/hyphenised.ent @@ -1,2 +1,2 @@ - + diff --git a/languagetool-language-modules/pt/src/main/resources/org/languagetool/resource/pt/pt.sor b/languagetool-language-modules/pt/src/main/resources/org/languagetool/resource/pt/pt.sor index 60eae1dc4509..f9f2cf948051 100644 --- a/languagetool-language-modules/pt/src/main/resources/org/languagetool/resource/pt/pt.sor +++ b/languagetool-language-modules/pt/src/main/resources/org/languagetool/resource/pt/pt.sor @@ -178,7 +178,7 @@ f:(.*),(.*) \1\2 == ordinal-feminine == ([-−]?\d+) $(ordinal-feminine $(ordinal-masculine \1)) -(.*)o\b(.*) $(ordinal-feminine \1a\2) +(?U)(.*)o\b(.*) $(ordinal-feminine \1a\2) (.*) \1 == (ordinal)-number(-feminine|-masculine)? == diff --git a/languagetool-standalone/src/test/java/org/languagetool/SentenceRangeTest.java b/languagetool-standalone/src/test/java/org/languagetool/SentenceRangeTest.java index a181e58db9f0..2f4039ba94c0 100644 --- a/languagetool-standalone/src/test/java/org/languagetool/SentenceRangeTest.java +++ b/languagetool-standalone/src/test/java/org/languagetool/SentenceRangeTest.java @@ -451,4 +451,181 @@ public void testCorrectSentenceRange() { } assertEquals("Hallo,Das ist ein neuer Satz.Ein Satz mit \uFEFFSonderzeichen.Satz mehreren Leerzeichen.Hier sind die Zeichen mal am Ende.\uFEFFNoch ein Satz.", sb.toString()); } + + @Test + public void testSpecialCase() throws IOException { + JLanguageTool jLanguageTool = new JLanguageTool(new NoRulesEnglish()); + String text = "\"This\"+is+Mr.+Pigfat+calling.\n\n\n\n \n\n\n\n \n\n\n\n \n\n\n\n \n\n\n\n \n\n\n\n \n\n\n\n \n\n\n\n \n\n\n\n \n\n\n\n \n\n\n\n \n\n\n\n \n\n\n\n \n\n\n\n \n\n\n\n \n\n\n\n \n\n\n\n \n\n\n\n \n\n\n\n \n\n\n\n \n\n\n\n \n\n\n\n \n\n\n\n \n\n\n\n \n\n\n\nThis+is+an+\"test\".\n\n\n\n \n\n\n\n \n\n\n\n \n\n\n\n \n\n\n\n \n\n\n\n \n\n\n\n \n\n\n\n \n\n\n\n \n\n\n\n \n\n\n\n \n\n\n\n \n\n\n\n \n\n\n\n \n\n\n\n \n\n\n\n \n\n\n\n \n\n\n\n \n\n\n\n \n\n\n\n \n\n\n\n \n\n\n\n \n\n\n\n \n\n\n\n \n\n\n\nHe+was+very+\"afraid\"+of+the+consequeces  "; + AnnotatedText annotatedText = new AnnotatedTextBuilder().addText(text).build(); + + CheckResults checkResults = jLanguageTool.check2(annotatedText, + true, + JLanguageTool.ParagraphHandling.NORMAL, + ruleMatch -> { + }, + JLanguageTool.Mode.ALL_BUT_TEXTLEVEL_ONLY, + JLanguageTool.Level.PICKY, + null); + List sentenceRanges = checkResults.getSentenceRanges(); + assertEquals(51, sentenceRanges.size()); + + SentenceRange testSentence1 = sentenceRanges.get(0); + assertEquals(0, testSentence1.getFromPos()); + assertEquals(29, testSentence1.getToPos()); + assertEquals("\"This\"+is+Mr.+Pigfat+calling.", text.substring(testSentence1.getFromPos(), testSentence1.getToPos())); + + assertEquals(33, sentenceRanges.get(1).getFromPos()); + assertEquals(34, sentenceRanges.get(1).getToPos()); + + assertEquals(38, sentenceRanges.get(2).getFromPos()); + assertEquals(39, sentenceRanges.get(2).getToPos()); + + assertEquals(43, sentenceRanges.get(3).getFromPos()); + assertEquals(44, sentenceRanges.get(3).getToPos()); + + assertEquals(48, sentenceRanges.get(4).getFromPos()); + assertEquals(49, sentenceRanges.get(4).getToPos()); + + assertEquals(53, sentenceRanges.get(5).getFromPos()); + assertEquals(54, sentenceRanges.get(5).getToPos()); + + assertEquals(58, sentenceRanges.get(6).getFromPos()); + assertEquals(59, sentenceRanges.get(6).getToPos()); + + assertEquals(63, sentenceRanges.get(7).getFromPos()); + assertEquals(64, sentenceRanges.get(7).getToPos()); + + assertEquals(68, sentenceRanges.get(8).getFromPos()); + assertEquals(69, sentenceRanges.get(8).getToPos()); + + assertEquals(73, sentenceRanges.get(9).getFromPos()); + assertEquals(74, sentenceRanges.get(9).getToPos()); + + assertEquals(78, sentenceRanges.get(10).getFromPos()); + assertEquals(79, sentenceRanges.get(10).getToPos()); + + assertEquals(83, sentenceRanges.get(11).getFromPos()); + assertEquals(84, sentenceRanges.get(11).getToPos()); + + assertEquals(88, sentenceRanges.get(12).getFromPos()); + assertEquals(89, sentenceRanges.get(12).getToPos()); + + assertEquals(93, sentenceRanges.get(13).getFromPos()); + assertEquals(94, sentenceRanges.get(13).getToPos()); + + assertEquals(98, sentenceRanges.get(14).getFromPos()); + assertEquals(99, sentenceRanges.get(14).getToPos()); + + assertEquals(103, sentenceRanges.get(15).getFromPos()); + assertEquals(104, sentenceRanges.get(15).getToPos()); + + assertEquals(108, sentenceRanges.get(16).getFromPos()); + assertEquals(109, sentenceRanges.get(16).getToPos()); + + assertEquals(113, sentenceRanges.get(17).getFromPos()); + assertEquals(114, sentenceRanges.get(17).getToPos()); + + assertEquals(118, sentenceRanges.get(18).getFromPos()); + assertEquals(119, sentenceRanges.get(18).getToPos()); + + assertEquals(123, sentenceRanges.get(19).getFromPos()); + assertEquals(124, sentenceRanges.get(19).getToPos()); + + assertEquals(128, sentenceRanges.get(20).getFromPos()); + assertEquals(129, sentenceRanges.get(20).getToPos()); + + assertEquals(133, sentenceRanges.get(21).getFromPos()); + assertEquals(134, sentenceRanges.get(21).getToPos()); + + assertEquals(138, sentenceRanges.get(22).getFromPos()); + assertEquals(139, sentenceRanges.get(22).getToPos()); + + assertEquals(143, sentenceRanges.get(23).getFromPos()); + assertEquals(144, sentenceRanges.get(23).getToPos()); + + assertEquals(148, sentenceRanges.get(24).getFromPos()); + assertEquals(149, sentenceRanges.get(24).getToPos()); + + SentenceRange testSentence2 = sentenceRanges.get(25); + assertEquals(153, testSentence2.getFromPos()); + assertEquals(171, testSentence2.getToPos()); + assertEquals("This+is+an+\"test\".", text.substring(testSentence2.getFromPos(), testSentence2.getToPos())); + + assertEquals(175, sentenceRanges.get(26).getFromPos()); + assertEquals(176, sentenceRanges.get(26).getToPos()); + + assertEquals(180, sentenceRanges.get(27).getFromPos()); + assertEquals(181, sentenceRanges.get(27).getToPos()); + + assertEquals(185, sentenceRanges.get(28).getFromPos()); + assertEquals(186, sentenceRanges.get(28).getToPos()); + + assertEquals(190, sentenceRanges.get(29).getFromPos()); + assertEquals(191, sentenceRanges.get(29).getToPos()); + + assertEquals(195, sentenceRanges.get(30).getFromPos()); + assertEquals(196, sentenceRanges.get(30).getToPos()); + + assertEquals(200, sentenceRanges.get(31).getFromPos()); + assertEquals(201, sentenceRanges.get(31).getToPos()); + + assertEquals(205, sentenceRanges.get(32).getFromPos()); + assertEquals(206, sentenceRanges.get(32).getToPos()); + + assertEquals(210, sentenceRanges.get(33).getFromPos()); + assertEquals(211, sentenceRanges.get(33).getToPos()); + + assertEquals(215, sentenceRanges.get(34).getFromPos()); + assertEquals(216, sentenceRanges.get(34).getToPos()); + + assertEquals(220, sentenceRanges.get(35).getFromPos()); + assertEquals(221, sentenceRanges.get(35).getToPos()); + + assertEquals(225, sentenceRanges.get(36).getFromPos()); + assertEquals(226, sentenceRanges.get(36).getToPos()); + + assertEquals(230, sentenceRanges.get(37).getFromPos()); + assertEquals(231, sentenceRanges.get(37).getToPos()); + + assertEquals(235, sentenceRanges.get(38).getFromPos()); + assertEquals(236, sentenceRanges.get(38).getToPos()); + + assertEquals(240, sentenceRanges.get(39).getFromPos()); + assertEquals(241, sentenceRanges.get(39).getToPos()); + + assertEquals(245, sentenceRanges.get(40).getFromPos()); + assertEquals(246, sentenceRanges.get(40).getToPos()); + + assertEquals(250, sentenceRanges.get(41).getFromPos()); + assertEquals(251, sentenceRanges.get(41).getToPos()); + + assertEquals(255, sentenceRanges.get(42).getFromPos()); + assertEquals(256, sentenceRanges.get(42).getToPos()); + + assertEquals(260, sentenceRanges.get(43).getFromPos()); + assertEquals(261, sentenceRanges.get(43).getToPos()); + + assertEquals(265, sentenceRanges.get(44).getFromPos()); + assertEquals(266, sentenceRanges.get(44).getToPos()); + + assertEquals(270, sentenceRanges.get(45).getFromPos()); + assertEquals(271, sentenceRanges.get(45).getToPos()); + + assertEquals(275, sentenceRanges.get(46).getFromPos()); + assertEquals(276, sentenceRanges.get(46).getToPos()); + + assertEquals(280, sentenceRanges.get(47).getFromPos()); + assertEquals(281, sentenceRanges.get(47).getToPos()); + + assertEquals(285, sentenceRanges.get(48).getFromPos()); + assertEquals(286, sentenceRanges.get(48).getToPos()); + + assertEquals(290, sentenceRanges.get(49).getFromPos()); + assertEquals(291, sentenceRanges.get(49).getToPos()); + + SentenceRange testSentence3 = sentenceRanges.get(50); + assertEquals(295, testSentence3.getFromPos()); + assertEquals(336, testSentence3.getToPos()); + assertEquals("He+was+very+\"afraid\"+of+the+consequeces  ", text.substring(testSentence3.getFromPos(), testSentence3.getToPos())); + } } diff --git a/pom.xml b/pom.xml index ae7bdc0d64db..213d0dba67aa 100644 --- a/pom.xml +++ b/pom.xml @@ -191,7 +191,7 @@ 1.0 2.3.1 - 2.0.3 + 2.0.4 4.4 1.12.0