Skip to content

Commit

Permalink
fixes #15 SKIP_BLANK_LINES feature for csv parser
Browse files Browse the repository at this point in the history
  • Loading branch information
vboulaye authored and cowtowncoder committed Oct 8, 2019
1 parent f44a320 commit 6afa34f
Show file tree
Hide file tree
Showing 5 changed files with 70 additions and 37 deletions.
4 changes: 2 additions & 2 deletions csv/README.md
Original file line number Diff line number Diff line change
Expand Up @@ -236,8 +236,8 @@ Jackson supports the following extension or variations:
* Linefeed character: when generating content, the default linefeed String used is "`\n`" but this may be changed
* Null value: by default, null values are serialized as empty Strings (""), but any other String value be configured to be used instead (for example, "null", "N/A" etc)
* Use of first row as a set of column names: as explained earlier, it is possible to configure `CsvSchema` to indicate that the contents of the first (non-comment) document row is taken to mean the set of column names to use
* Comments
* When enabled (via `CsvSchema`, or enabling `JsonParser.Feature.ALLOW_YAML_COMMENTS`), if a row starts with a `#` character, it will be considered a comment and skipped
* Comments: when enabled (via `CsvSchema`, or enabling `CsvParser.Feature.ALLOW_COMMENTS`), if a row starts with a `#` character, it will be considered a comment and skipped
* Blank lines: when enabled (using `CsvParser.Feature.SKIP_BLANK_LINES`) rows that are empty or composed only of whitespaces are skipped

# Limitations

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -73,13 +73,13 @@ public enum Feature
IGNORE_TRAILING_UNMAPPABLE(false),

/**
* Feature that allows skipping input lines that are completely empty, instead
* of being decoded as lines of just a single column with empty String value (or,
* Feature that allows skipping input lines that are completely empty or blank (composed only of whitespace),
* instead of being decoded as lines of just a single column with an empty/blank String value (or,
* depending on binding, `null`).
*<p>
* Feature is disabled by default.
*/
SKIP_EMPTY_LINES(false),
SKIP_BLANK_LINES(false),

/**
* Feature that allows there to be a trailing single extraneous data
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -70,7 +70,7 @@ public class CsvDecoder {

protected boolean _allowComments;

protected boolean _skipEmptyLines;
protected boolean _skipBlankLines;

/**
* Maximum of quote character, linefeeds (\r and \n), escape character.
Expand Down Expand Up @@ -269,11 +269,10 @@ public CsvDecoder(IOContext ctxt, CsvParser owner, Reader r,
_textBuffer = textBuffer;
_autoCloseInput = StreamReadFeature.AUTO_CLOSE_SOURCE.enabledIn(stdFeatures);
_allowComments = CsvParser.Feature.ALLOW_COMMENTS.enabledIn(csvFeatures);
_skipEmptyLines = CsvParser.Feature.SKIP_EMPTY_LINES.enabledIn(csvFeatures);
_skipBlankLines = CsvParser.Feature.SKIP_BLANK_LINES.enabledIn(csvFeatures);
_trimSpaces = CsvParser.Feature.TRIM_SPACES.enabledIn(csvFeatures);
_inputBuffer = ctxt.allocTokenBuffer();
_bufferRecyclable = true; // since we allocated it
_inputSource = r;
_tokenInputRow = -1;
_tokenInputCol = -1;
setSchema(schema);
Expand Down Expand Up @@ -480,7 +479,7 @@ public boolean startNewLine() throws IOException {
}

public boolean skipLinesWhenNeeded() throws IOException {
if (!(_allowComments || _skipEmptyLines)) {
if (!(_allowComments || _skipBlankLines)) {
return hasMoreInput();
}
int firstCharacterPtr = _inputPtr;
Expand All @@ -493,15 +492,22 @@ public boolean skipLinesWhenNeeded() throws IOException {
firstCharacterPtr = _inputPtr;
continue;
}
if (_skipEmptyLines && ch == ' ') {
// skip all blanks
if (ch == ' ') {
// skip all blanks (in both comments/blanks skip mode)
continue;
}
if (_allowComments && _inputBuffer[firstCharacterPtr] == '#') {
// this line is commented, skip everything
continue;
if (_allowComments) {
if (_inputBuffer[firstCharacterPtr] == '#') {
// on a commented line, skip everything
continue;
}
if (ch == '#') {
// we reach this point when whitespaces precedes the hash character
// move the firstCharacterPtr to the '#' location in order to skip the line completely
firstCharacterPtr = _inputPtr-1;
continue;
}
}

// we reached a non skippable character, this line needs to be parsed
// rollback the input pointer to the beginning of the line
_inputPtr = firstCharacterPtr;
Expand Down
Original file line number Diff line number Diff line change
@@ -1,14 +1,16 @@
package com.fasterxml.jackson.dataformat.csv.deser;

import java.util.Map;

import com.fasterxml.jackson.databind.MappingIterator;
import com.fasterxml.jackson.dataformat.csv.*;
import com.fasterxml.jackson.dataformat.csv.CsvMapper;
import com.fasterxml.jackson.dataformat.csv.CsvParser;
import com.fasterxml.jackson.dataformat.csv.ModuleTestBase;

import java.util.Map;

// Tests for [csv#56]
public class CommentsTest extends ModuleTestBase
{
final String CSV_WITH_COMMENTS = "x,y\n# comment!\na,b\n# another...\n";
final String CSV_WITH_COMMENTS = "x,y\n# comment!\na,b\n # another...\n";

public void testWithoutComments() throws Exception
{
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -7,14 +7,15 @@
import static org.junit.Assert.assertArrayEquals;

// for [dataformats-text#15]: Allow skipping of empty lines
public class SkipEmptyLines15Test extends ModuleTestBase {
public class SkipBlankLines15Test extends ModuleTestBase {

private static final String CSV_WITH_EMPTY_LINE = "1,\"xyz\"\n\ntrue,\n";
private static final String CSV_WITH_BLANK_LINE = "1,\"xyz\"\n \ntrue,\n";
private static final String CSV_WITH_BLANK_LINE_AND_COMMENT = "1,\"xyz\"\n \n#comment\n\ntrue,\n";
private static final String CSV_WITH_BLANK_LINE_AND_COMMENT = "1,\"xyz\"\n \n #comment\n\ntrue,\n";
private static final String CSV_WITH_FIRST_BLANK_LINE = "\n1,\"xyz\"\ntrue,\n";
private static final String CSV_WITH_TRAILING_BLANK_LINES = "1,\"xyz\"\ntrue,\n \n\n";

public void testCsvWithEmptyLineSkipEmptyLinesFeatureDisabled() throws Exception {
public void testCsvWithEmptyLineSkipBlankLinesFeatureDisabled() throws Exception {
String[][] rows = mapperForCsvAsArray().readValue(CSV_WITH_EMPTY_LINE);
// First, verify default behavior:
assertArrayEquals(expected(
Expand All @@ -24,9 +25,9 @@ public void testCsvWithEmptyLineSkipEmptyLinesFeatureDisabled() throws Exception
), rows);
}

public void testCsvWithEmptyLineSkipEmptyLinesFeatureEnabled() throws Exception {
public void testCsvWithEmptyLineSkipBlankLinesFeatureEnabled() throws Exception {
String[][] rows = mapperForCsvAsArray()
.with(CsvParser.Feature.SKIP_EMPTY_LINES)
.with(CsvParser.Feature.SKIP_BLANK_LINES)
.readValue(CSV_WITH_EMPTY_LINE);
// empty line is skipped
assertArrayEquals(expected(
Expand All @@ -36,7 +37,7 @@ public void testCsvWithEmptyLineSkipEmptyLinesFeatureEnabled() throws Exception
}


public void testCsvWithBlankLineSkipEmptyLinesFeatureDisabled() throws Exception {
public void testCsvWithBlankLineSkipBlankLinesFeatureDisabled() throws Exception {
String[][] rows = mapperForCsvAsArray()
.readValue(CSV_WITH_BLANK_LINE);
// First, verify default behavior:
Expand All @@ -47,9 +48,9 @@ public void testCsvWithBlankLineSkipEmptyLinesFeatureDisabled() throws Exception
), rows);
}

public void testCsvWithBlankLineSkipEmptyLinesFeatureEnabled() throws Exception {
public void testCsvWithBlankLineSkipBlankLinesFeatureEnabled() throws Exception {
String[][] rows = mapperForCsvAsArray()
.with(CsvParser.Feature.SKIP_EMPTY_LINES)
.with(CsvParser.Feature.SKIP_BLANK_LINES)
.readValue(CSV_WITH_BLANK_LINE);
// blank line is skipped
assertArrayEquals(expected(
Expand All @@ -58,34 +59,34 @@ public void testCsvWithBlankLineSkipEmptyLinesFeatureEnabled() throws Exception
), rows);
}

public void testCsvWithBlankLineAndCommentSkipEmptyLinesFeatureDisabled() throws Exception {
public void testCsvWithBlankLineAndCommentSkipBlankLinesFeatureDisabled() throws Exception {
String[][] rows = mapperForCsvAsArray()
.readValue(CSV_WITH_BLANK_LINE_AND_COMMENT);
// First, verify default behavior:
assertArrayEquals(expected(
row("1", "xyz"),
row(" "),
row("#comment"),
row(" #comment"),
row(""),
row("true", "")
), rows);
}

public void testCsvWithBlankLineAndCommentSkipEmptyLinesFeatureEnabled() throws Exception {
public void testCsvWithBlankLineAndCommentSkipBlankLinesFeatureEnabled() throws Exception {
String[][] rows = mapperForCsvAsArray()
.with(CsvParser.Feature.SKIP_EMPTY_LINES)
.with(CsvParser.Feature.SKIP_BLANK_LINES)
.readValue(CSV_WITH_BLANK_LINE_AND_COMMENT);
// blank/empty lines are skipped
assertArrayEquals(expected(
row("1", "xyz"),
row("#comment"),
row(" #comment"),
row("true", "")
), rows);
}

public void testCsvWithBlankLineAndCommentSkipEmptyLinesFeatureEnabledAndAllowComments() throws Exception {
public void testCsvWithBlankLineAndCommentSkipBlankLinesFeatureEnabledAndAllowComments() throws Exception {
String[][] rows = mapperForCsvAsArray()
.with(CsvParser.Feature.SKIP_EMPTY_LINES)
.with(CsvParser.Feature.SKIP_BLANK_LINES)
.with(CsvParser.Feature.ALLOW_COMMENTS)
.readValue(CSV_WITH_BLANK_LINE_AND_COMMENT);
// blank/empty/comment lines are skipped
Expand All @@ -95,7 +96,7 @@ public void testCsvWithBlankLineAndCommentSkipEmptyLinesFeatureEnabledAndAllowCo
), rows);
}

public void testCsvWithFirstBlankLineSkipEmptyLinesFeatureDisabled() throws Exception {
public void testCsvWithFirstBlankLineSkipBlankLinesFeatureDisabled() throws Exception {
String[][] rows = mapperForCsvAsArray()
.readValue(CSV_WITH_FIRST_BLANK_LINE);
// First, verify default behavior:
Expand All @@ -106,9 +107,9 @@ public void testCsvWithFirstBlankLineSkipEmptyLinesFeatureDisabled() throws Exce
), rows);
}

public void testCsvWithFirstBlankLineSkipEmptyLinesFeatureEnabled() throws Exception {
public void testCsvWithFirstBlankLineSkipBlankLinesFeatureEnabled() throws Exception {
String[][] rows = mapperForCsvAsArray()
.with(CsvParser.Feature.SKIP_EMPTY_LINES)
.with(CsvParser.Feature.SKIP_BLANK_LINES)
.readValue(CSV_WITH_FIRST_BLANK_LINE);
// blank line is skipped
assertArrayEquals(expected(
Expand All @@ -117,6 +118,30 @@ public void testCsvWithFirstBlankLineSkipEmptyLinesFeatureEnabled() throws Excep
), rows);
}


public void testCsvWithTrailingBlankLineSkipBlankLinesFeatureDisabled() throws Exception {
String[][] rows = mapperForCsvAsArray()
.readValue(CSV_WITH_TRAILING_BLANK_LINES);
// First, verify default behavior:
assertArrayEquals(expected(
row("1", "xyz"),
row("true", ""),
row(" "),
row("")
), rows);
}

public void testCsvWithTrailingBlankLineSkipBlankLinesFeatureEnabled() throws Exception {
String[][] rows = mapperForCsvAsArray()
.with(CsvParser.Feature.SKIP_BLANK_LINES)
.readValue(CSV_WITH_FIRST_BLANK_LINE);
// blank lines are skipped
assertArrayEquals(expected(
row("1", "xyz"),
row("true", "")
), rows);
}

private ObjectReader mapperForCsvAsArray() {
// when wrapped as an array, we'll get array of Lists:
return mapperForCsv()
Expand Down

0 comments on commit 6afa34f

Please sign in to comment.