Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Jdk19 regexp fix #11016

Open
wants to merge 13 commits into
base: master
Choose a base branch
from

This file was deleted.

Original file line number Diff line number Diff line change
Expand Up @@ -23,15 +23,7 @@
import org.junit.Before;
import org.junit.Test;

import java.io.ByteArrayInputStream;
import java.io.ByteArrayOutputStream;
import java.io.File;
import java.io.FileInputStream;
import java.io.FileOutputStream;
import java.io.IOException;
import java.io.OutputStreamWriter;
import java.io.PrintStream;
import java.io.PrintWriter;
import java.io.*;
import java.nio.charset.StandardCharsets;

import static org.hamcrest.CoreMatchers.is;
Expand All @@ -42,7 +34,7 @@
*
* @author Marcin Miłkowski
*/
public class MainTest extends AbstractSecurityTestCase {
public class MainTest {

private final File enTestFile;
private final File xxRuleFile;
Expand Down Expand Up @@ -113,8 +105,7 @@ public MainTest() throws IOException {
}

@Before
public void setUp() throws Exception {
super.setUp();
public void setUp() {
this.stdout = System.out;
this.stderr = System.err;
this.out = new ByteArrayOutputStream();
Expand All @@ -124,38 +115,33 @@ public void setUp() throws Exception {
}

@After
public void tearDown() throws Exception {
public void tearDown() {
System.setOut(this.stdout);
System.setErr(this.stderr);
super.tearDown();
}

@Test
public void testUsageMessage() throws Exception {
try {
String[] args = {"-h"};
Main.main(args);
fail("LT should have exited with status 0!");
} catch (ExitException e) {
String output = new String(this.out.toByteArray());
assertTrue(output.contains("Usage: java -jar languagetool-commandline.jar"));
assertEquals("Exit status", 1, e.status);
}
Process process = new ProcessBuilder(
"java", "-cp", System.getProperty("java.class.path"), "org.languagetool.commandline.Main", "-h"
).start();
int exitCode = process.waitFor();
String output = readProcessOutput(process);
assertTrue(output.contains("Usage: java -jar languagetool-commandline.jar"));
assertEquals("Exit status", 1, exitCode);
}

@Test
public void testPrintLanguages() throws Exception {
try {
String[] args = {"--list"};
Main.main(args);
fail("LT should have exited with status 0!");
} catch (ExitException e) {
String output = new String(this.out.toByteArray());
assertTrue(output.contains("German"));
assertTrue(output.contains("de-DE"));
assertTrue(output.contains("English"));
assertEquals("Exit status", 0, e.status);
}
Process process = new ProcessBuilder(
"java", "-cp", System.getProperty("java.class.path"), "org.languagetool.commandline.Main", "--list"
).start();
int exitCode = process.waitFor();
String output = readProcessOutput(process);
assertTrue(output.contains("German"));
assertTrue(output.contains("de-DE"));
assertTrue(output.contains("English"));
assertEquals("Exit status", 0, exitCode);
}

@Test
Expand Down Expand Up @@ -670,4 +656,14 @@ private String getExternalFalseFriends() {
return xxFalseFriendFile.getAbsolutePath();
}

private String readProcessOutput(Process process) throws IOException {
try (BufferedReader reader = new BufferedReader(new InputStreamReader(process.getInputStream()))) {
StringBuilder output = new StringBuilder();
String line;
while ((line = reader.readLine()) != null) {
output.append(line).append(System.lineSeparator());
}
return output.toString();
}
}
Comment on lines +659 to +668
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

🛠️ Refactor suggestion

⚠️ Potential issue

Prevent potential deadlocks by handling both standard and error streams

In the readProcessOutput method, only the process's standard output stream is read. If the process writes to its error stream and the buffer fills up, it could cause the process to hang due to unhandled data. It's important to read both the standard output and error streams to prevent this issue.

Apply this diff to read both the standard output and error streams:

 private String readProcessOutput(Process process) throws IOException {
-    try (BufferedReader reader = new BufferedReader(new InputStreamReader(process.getInputStream()))) {
-      StringBuilder output = new StringBuilder();
-      String line;
-      while ((line = reader.readLine()) != null) {
-        output.append(line).append(System.lineSeparator());
-      }
-      return output.toString();
-    }
+    StringBuilder output = new StringBuilder();
+    StringBuilder errorOutput = new StringBuilder();
+    try (
+      BufferedReader stdOutReader = new BufferedReader(new InputStreamReader(process.getInputStream()));
+      BufferedReader stdErrReader = new BufferedReader(new InputStreamReader(process.getErrorStream()))
+    ) {
+      String line;
+      while ((line = stdOutReader.readLine()) != null) {
+        output.append(line).append(System.lineSeparator());
+      }
+      while ((line = stdErrReader.readLine()) != null) {
+        errorOutput.append(line).append(System.lineSeparator());
+      }
+    }
+    // Optionally, you can assert on errorOutput if needed
+    return output.toString();
 }

This ensures that both output streams are consumed, preventing the process from hanging due to full buffers.

📝 Committable suggestion

‼️ IMPORTANT
Carefully review the code before committing. Ensure that it accurately replaces the highlighted code, contains no missing lines, and has no issues with indentation. Thoroughly test & benchmark the code to ensure it meets the requirements.

Suggested change
private String readProcessOutput(Process process) throws IOException {
try (BufferedReader reader = new BufferedReader(new InputStreamReader(process.getInputStream()))) {
StringBuilder output = new StringBuilder();
String line;
while ((line = reader.readLine()) != null) {
output.append(line).append(System.lineSeparator());
}
return output.toString();
}
}
private String readProcessOutput(Process process) throws IOException {
StringBuilder output = new StringBuilder();
StringBuilder errorOutput = new StringBuilder();
try (
BufferedReader stdOutReader = new BufferedReader(new InputStreamReader(process.getInputStream()));
BufferedReader stdErrReader = new BufferedReader(new InputStreamReader(process.getErrorStream()))
) {
String line;
while ((line = stdOutReader.readLine()) != null) {
output.append(line).append(System.lineSeparator());
}
while ((line = stdErrReader.readLine()) != null) {
errorOutput.append(line).append(System.lineSeparator());
}
}
// Optionally, you can assert on errorOutput if needed
return output.toString();
}

}
Original file line number Diff line number Diff line change
Expand Up @@ -80,7 +80,7 @@ public abstract class AbstractUnitConversionRule extends Rule {
protected static final String NUMBER_REGEX = "(-?[0-9]{1,32}[0-9,.]{0,32})";
protected static final String NUMBER_REGEX_WITH_BOUNDARY = "(-?\\b[0-9]{1,32}[0-9,.]{0,32})";

protected final Pattern numberRangePart = Pattern.compile(NUMBER_REGEX_WITH_BOUNDARY + "$");
protected final Pattern numberRangePart = Pattern.compile(NUMBER_REGEX_WITH_BOUNDARY + "$", Pattern.UNICODE_CHARACTER_CLASS);

private static final double DELTA = 1e-2;
private static final double ROUNDING_DELTA = 0.05;
Expand Down Expand Up @@ -196,7 +196,7 @@ protected String formatRounded(String s) {
*/
protected void addUnit(String pattern, Unit base, String symbol, double factor, boolean metric) {
Unit unit = base.multiply(factor);
unitPatterns.put(Pattern.compile(NUMBER_REGEX_WITH_BOUNDARY + "[\\s\u00A0]{0," + WHITESPACE_LIMIT + "}" + pattern + "\\b"), unit);
unitPatterns.put(Pattern.compile(NUMBER_REGEX_WITH_BOUNDARY + "[\\s\u00A0]{0," + WHITESPACE_LIMIT + "}" + pattern + "\\b", Pattern.UNICODE_CHARACTER_CLASS), unit);
unitSymbols.putIfAbsent(unit, new ArrayList<>());
unitSymbols.get(unit).add(symbol);
if (metric && !metricUnits.contains(unit)) {
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -776,7 +776,8 @@ private void createRules(List<PatternToken> elemList,
rule.setDistanceTokens(distanceTokens);
rule.setXmlLineNumber(xmlLineNumber);
} else if (regex.length() > 0) {
int flags = regexCaseSensitive ? 0 : Pattern.CASE_INSENSITIVE|Pattern.UNICODE_CASE;
// int flags = regexCaseSensitive ? 0 : Pattern.CASE_INSENSITIVE|Pattern.UNICODE_CASE;
int flags = regexCaseSensitive ? Pattern.UNICODE_CHARACTER_CLASS : Pattern.CASE_INSENSITIVE|Pattern.UNICODE_CHARACTER_CLASS;
Comment on lines +779 to +780
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

⚠️ Potential issue

Confirm the replacement of Pattern.UNICODE_CASE with Pattern.UNICODE_CHARACTER_CLASS

In the previous implementation (now commented out), when regexCaseSensitive was false, the flags included Pattern.UNICODE_CASE to enable case-insensitive matching of Unicode characters. The new code replaces Pattern.UNICODE_CASE with Pattern.UNICODE_CHARACTER_CLASS, which extends character classes like \w, \d, and \s to match Unicode characters but does not affect case-insensitive matching.

If the intent is to support both Unicode character classes and case-insensitive matching for Unicode characters when regexCaseSensitive is false, you should include both Pattern.UNICODE_CASE and Pattern.UNICODE_CHARACTER_CLASS in the flags. Otherwise, case-insensitive matching may only affect ASCII characters.

Consider applying the following change to include both flags:

-int flags = regexCaseSensitive ? Pattern.UNICODE_CHARACTER_CLASS : Pattern.CASE_INSENSITIVE|Pattern.UNICODE_CHARACTER_CLASS;
+int flags = regexCaseSensitive ? Pattern.UNICODE_CHARACTER_CLASS : Pattern.CASE_INSENSITIVE | Pattern.UNICODE_CASE | Pattern.UNICODE_CHARACTER_CLASS;

This ensures that both case-insensitive matching and Unicode character classes are enabled when regexCaseSensitive is false.

📝 Committable suggestion

‼️ IMPORTANT
Carefully review the code before committing. Ensure that it accurately replaces the highlighted code, contains no missing lines, and has no issues with indentation. Thoroughly test & benchmark the code to ensure it meets the requirements.

Suggested change
// int flags = regexCaseSensitive ? 0 : Pattern.CASE_INSENSITIVE|Pattern.UNICODE_CASE;
int flags = regexCaseSensitive ? Pattern.UNICODE_CHARACTER_CLASS : Pattern.CASE_INSENSITIVE|Pattern.UNICODE_CHARACTER_CLASS;
// int flags = regexCaseSensitive ? 0 : Pattern.CASE_INSENSITIVE|Pattern.UNICODE_CASE;
int flags = regexCaseSensitive ? Pattern.UNICODE_CHARACTER_CLASS : Pattern.CASE_INSENSITIVE | Pattern.UNICODE_CASE | Pattern.UNICODE_CHARACTER_CLASS;

String regexStr = regex.toString();
if (regexMode == RegexpMode.SMART) {
// Note: it's not that easy to add \b because the regex might look like '(foo)' or '\d' so we cannot just look at the last character
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -42,7 +42,7 @@ public RuleMatch acceptRuleMatch(RuleMatch match, Map<String, String> arguments,
}
String[] antiPatterns = antiPatternStr.split("\\|");
for (String antiPattern : antiPatterns) {
Pattern p = Pattern.compile(antiPattern);
Pattern p = Pattern.compile(antiPattern, Pattern.UNICODE_CHARACTER_CLASS);
Matcher matcher = p.matcher(sentenceObj.getText());
while (matcher.find()) {
// partial overlap is enough to filter out a match:
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -18,11 +18,18 @@
*/
package org.languagetool.rules.patterns;

import java.util.ArrayList;
import java.util.HashMap;
import java.util.List;
import java.util.Locale;
import java.util.Map;
import java.util.ResourceBundle;
import java.util.function.Function;

import org.apache.commons.lang3.ObjectUtils;
import org.apache.commons.lang3.tuple.Triple;
import org.jetbrains.annotations.Nullable;
import org.languagetool.Language;
import org.languagetool.ResourceBundleTools;
import org.languagetool.chunking.ChunkTag;
import org.languagetool.rules.CorrectExample;
import org.languagetool.rules.ErrorTriggeringExample;
Expand All @@ -35,9 +42,6 @@
import org.xml.sax.SAXParseException;
import org.xml.sax.helpers.DefaultHandler;

import java.util.*;
import java.util.function.Function;

/**
* XML rule handler that loads rules from XML and throws
* exceptions on errors and warnings.
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -31,6 +31,7 @@
import java.util.HashMap;
import java.util.List;
import java.util.Map;
import java.util.regex.Pattern;

/**
* Tools for loading an SRX tokenizer file.
Expand Down Expand Up @@ -59,7 +60,8 @@ static SrxDocument createSrxDocument(String path) {

static List<String> tokenize(String text, SrxDocument srxDocument, String code) {
List<String> segments = new ArrayList<>();
TextIterator textIterator = new SrxTextIterator(srxDocument, code, text);
Map<String, Object> parserParameters = Map.of(SrxTextIterator.DEFAULT_PATTERN_FLAGS_PARAMETER, Pattern.UNICODE_CHARACTER_CLASS);
TextIterator textIterator = new SrxTextIterator(srxDocument, code, text, parserParameters);
while (textIterator.hasNext()) {
segments.add(textIterator.next());
}
Expand Down
Loading
Loading