Skip to content

Commit

Permalink
AVRO-4053: Add test case
Browse files Browse the repository at this point in the history
  • Loading branch information
opwvhk committed Sep 6, 2024
1 parent ca41764 commit 3dd0954
Show file tree
Hide file tree
Showing 2 changed files with 111 additions and 49 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -17,6 +17,27 @@
*/
package org.apache.avro.compiler.specific;

import org.apache.avro.Conversion;
import org.apache.avro.Conversions;
import org.apache.avro.JsonProperties;
import org.apache.avro.LogicalType;
import org.apache.avro.LogicalTypes;
import org.apache.avro.Protocol;
import org.apache.avro.Protocol.Message;
import org.apache.avro.Schema;
import org.apache.avro.Schema.Field;
import org.apache.avro.SchemaNormalization;
import org.apache.avro.SchemaParser;
import org.apache.avro.data.TimeConversions;
import org.apache.avro.generic.GenericData;
import org.apache.avro.generic.GenericData.StringType;
import org.apache.avro.specific.SpecificData;
import org.apache.velocity.Template;
import org.apache.velocity.VelocityContext;
import org.apache.velocity.app.VelocityEngine;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;

import java.io.File;
import java.io.FileOutputStream;
import java.io.IOException;
Expand All @@ -36,30 +57,10 @@
import java.util.List;
import java.util.Map;
import java.util.Set;
import java.util.regex.Pattern;
import java.util.stream.Collectors;
import java.util.stream.Stream;

import org.apache.avro.Conversion;
import org.apache.avro.Conversions;
import org.apache.avro.JsonProperties;
import org.apache.avro.LogicalType;
import org.apache.avro.LogicalTypes;
import org.apache.avro.Protocol;
import org.apache.avro.Protocol.Message;
import org.apache.avro.Schema;
import org.apache.avro.Schema.Field;
import org.apache.avro.SchemaNormalization;
import org.apache.avro.SchemaParser;
import org.apache.avro.data.TimeConversions;
import org.apache.avro.generic.GenericData;
import org.apache.avro.generic.GenericData.StringType;
import org.apache.avro.specific.SpecificData;
import org.apache.velocity.Template;
import org.apache.velocity.VelocityContext;
import org.apache.velocity.app.VelocityEngine;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;

import static java.nio.charset.StandardCharsets.UTF_8;

/**
Expand Down Expand Up @@ -1004,21 +1005,35 @@ public String conversionInstance(Schema schema) {
*/
public String[] javaAnnotations(JsonProperties props) {
final Object value = props.getObjectProp("javaAnnotation");
if (value == null)
return new String[0];
if (value instanceof String)
if (value instanceof String && isValidAsAnnotation((String) value))
return new String[] { value.toString() };
if (value instanceof List) {
final List<?> list = (List<?>) value;
final List<String> annots = new ArrayList<>(list.size());
for (Object o : list) {
annots.add(o.toString());
if (isValidAsAnnotation(o.toString()))
annots.add(o.toString());
}
return annots.toArray(new String[0]);
}
return new String[0];
}

private static final String PATTERN_IDENTIFIER = "\\p{javaJavaIdentifierStart}\\p{javaJavaIdentifierPart}*";
private static final String PATTERN_STRING = "\"(?:\\\\[\\\\\"ntfb]|(?<!\\\\).)*\"";
private static final String PATTERN_NUMBER = "(?:\\((?:byte|char|short|int|long|float|double)\\))?[x0-9_.]*[fl]?";
private static final String PATTERN_LITERAL_VALUE = String.format("(?:%s|%s|true|false)", PATTERN_STRING,
PATTERN_NUMBER);
private static final String PATTERN_PARAMETER_LIST = String.format(
"\\(\\s*(?:%s|%s\\s*=\\s*%s(?:\\s*,\\s*%s\\s*=\\s*%s)*)?\\s*\\)", PATTERN_LITERAL_VALUE, PATTERN_IDENTIFIER,
PATTERN_LITERAL_VALUE, PATTERN_IDENTIFIER, PATTERN_LITERAL_VALUE);
private static final Pattern VALID_AS_ANNOTATION = Pattern
.compile(String.format("%s(?:%s)?", PATTERN_IDENTIFIER, PATTERN_PARAMETER_LIST));

private boolean isValidAsAnnotation(String value) {
return VALID_AS_ANNOTATION.matcher(value.strip()).matches();
}

// maximum size for string constants, to avoid javac limits
int maxStringChars = 8192;

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -17,14 +17,19 @@
*/
package org.apache.avro.compiler.specific;

import static org.hamcrest.CoreMatchers.equalTo;
import static org.hamcrest.CoreMatchers.hasItem;
import static org.hamcrest.CoreMatchers.not;
import static org.hamcrest.MatcherAssert.assertThat;
import static org.junit.jupiter.api.Assertions.assertEquals;
import static org.junit.jupiter.api.Assertions.assertFalse;
import static org.junit.jupiter.api.Assertions.assertThrows;
import static org.junit.jupiter.api.Assertions.assertTrue;
import org.apache.avro.AvroTypeException;
import org.apache.avro.LogicalType;
import org.apache.avro.LogicalTypes;
import org.apache.avro.Protocol;
import org.apache.avro.Schema;
import org.apache.avro.SchemaBuilder;
import org.apache.avro.generic.GenericData.StringType;
import org.apache.avro.specific.SpecificData;
import org.junit.jupiter.api.BeforeEach;
import org.junit.jupiter.api.Test;
import org.junit.jupiter.api.io.TempDir;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;

import java.io.BufferedReader;
import java.io.File;
Expand All @@ -37,29 +42,26 @@
import java.util.Collections;
import java.util.HashSet;
import java.util.List;
import java.util.Locale;
import java.util.Map;
import java.util.Set;
import java.util.regex.Matcher;
import java.util.regex.Pattern;
import javax.tools.Diagnostic;
import javax.tools.DiagnosticListener;
import javax.tools.JavaCompiler;
import javax.tools.JavaFileObject;
import javax.tools.StandardJavaFileManager;
import javax.tools.ToolProvider;
import org.apache.avro.AvroTypeException;

import java.util.Locale;
import java.util.Map;
import java.util.Set;

import org.apache.avro.LogicalType;
import org.apache.avro.LogicalTypes;
import org.apache.avro.Schema;
import org.apache.avro.SchemaBuilder;
import org.apache.avro.generic.GenericData.StringType;
import org.apache.avro.specific.SpecificData;
import org.junit.jupiter.api.BeforeEach;
import org.junit.jupiter.api.Test;
import org.junit.jupiter.api.io.TempDir;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
import static org.hamcrest.CoreMatchers.equalTo;
import static org.hamcrest.CoreMatchers.hasItem;
import static org.hamcrest.CoreMatchers.not;
import static org.hamcrest.MatcherAssert.assertThat;
import static org.junit.jupiter.api.Assertions.assertEquals;
import static org.junit.jupiter.api.Assertions.assertFalse;
import static org.junit.jupiter.api.Assertions.assertThrows;
import static org.junit.jupiter.api.Assertions.assertTrue;

public class TestSpecificCompiler {
private static final Logger LOG = LoggerFactory.getLogger(TestSpecificCompiler.class);
Expand Down Expand Up @@ -990,4 +992,49 @@ void fieldWithUnderscore_avro3826() {
assertFalse(outputFile4.contents.contains("$3"));
}

@Test
void docsAreEscaped_avro() {
String jsonSchema = "{\n" + " \"protocol\": \"DummyProtocol\",\n"
+ " \"doc\": \"*/\\nTest escaping <threats>\\n/*\",\n" + " \"types\" : [\n"
+ " {\"type\": \"fixed\", \"name\": \"Hash\", \"size\": 16, \"doc\": \"*/\\nTest escaping <threats>\\n/*\""
+ "},\n"
+ " {\"type\": \"enum\", \"name\": \"Status\", \"symbols\": [\"ON\", \"OFF\"], \"doc\": \"*/\\nTest escaping <threats>\\n/*\"},\n"
+ " " + " {\"type\": \"record\", \"name\": \"Message\", \"fields\" : [\n"
+ " {\"name\": \"content\", \"type\": \"string\", \"doc\": \"*/\\nTest escaping <threats>\\n/*\"},\n"
+ " {\"name\": \"status\", \"type\": \"Status\", \"doc\": \"*/\\nTest escaping <threats>\\n/*\"},\n"
+ " {\"name\": \"signature\", \"type\": \"Hash\", \"doc\": \"*/\\nTest escaping <threats>\\n/*\"}\n"
+ " ]}\n" + " ],\n" + " \"messages\" : {\n" + " \"echo\": {\"request\": ["
+ "{\"name\": \"msg\", \"type\": \"Message\"}"
+ "], \"response\": \"Message\", \"doc\": \"*/\\nTest escaping <threats>\\n/*\"}\n" + " },\n"
+ " \"javaAnnotation\": [\n" + " \"Deprecated(forRemoval = true, since = \\\"forever\\\")\",\n"
+ " \"SuppressWarnings(\\\"ALL\\\")\",\n" + " \"SuppressWarnings(\\\"CodeInjection\\\")/*\",\n"
+ " \" This is inside a comment as each line is prefixed with @\",\n"
+ " \" and the next bit is really dangerous... */ static { System.exit(); }\"\n" + " ]\n" + "}";
Collection<SpecificCompiler.OutputFile> outputs = new SpecificCompiler(Protocol.parse(jsonSchema)).compile();
for (SpecificCompiler.OutputFile outputFile : outputs) {
assertFalse(outputFile.contents.contains("*/\\nTest escaping <threats>\\n/*"), "Threats present?");

int expectedEscapeCount = countOccurrences(Pattern.compile("Test escaping", Pattern.LITERAL),
outputFile.contents);
int escapedJavaDocCount = countOccurrences(Pattern.compile("\\*&#47;\\s*Test escaping &lt;threats&gt;\\s*/\\*"),
outputFile.contents);
// noinspection RegExpRedundantEscape
int escapedDocStringCount = countOccurrences(
Pattern.compile("\\\"doc\\\":\\\"*/\\\\nTest escaping <threats>\\\\n/*\\\"", Pattern.LITERAL),
outputFile.contents);
assertEquals(expectedEscapeCount, escapedJavaDocCount + escapedDocStringCount,
"Escaped threats in " + outputFile.path);

assertFalse(Pattern.compile("\\{ System.exit\\(\\); }(?!\\\\\")").matcher(outputFile.contents).find(),
"Code injection present? " + outputFile.contents);
}
}

private int countOccurrences(Pattern pattern, String textToSearch) {
int count = 0;
for (Matcher matcher = pattern.matcher(textToSearch); matcher.find();) {
count++;
}
return count;
}
}

0 comments on commit 3dd0954

Please sign in to comment.