From be14fe27ef167d79f60b676aab97fd8784544da3 Mon Sep 17 00:00:00 2001 From: Sasindu Alahakoon Date: Wed, 10 Jul 2024 13:23:12 +0530 Subject: [PATCH] Fix new line character related issu in wiondows build --- .../user_config_with_parser_options_test.bal | 47 ++++++------- .../user-config-tests/tests/user_configs.bal | 4 +- ballerina/types.bal | 3 +- .../stdlib/data/csvdata/csv/CsvParser.java | 68 +++++++++++++++---- .../stdlib/data/csvdata/utils/Constants.java | 6 ++ .../stdlib/data/csvdata/utils/CsvUtils.java | 49 +++++++++---- 6 files changed, 123 insertions(+), 54 deletions(-) diff --git a/ballerina-tests/user-config-tests/tests/user_config_with_parser_options_test.bal b/ballerina-tests/user-config-tests/tests/user_config_with_parser_options_test.bal index 06148a9..5649d3d 100644 --- a/ballerina-tests/user-config-tests/tests/user_config_with_parser_options_test.bal +++ b/ballerina-tests/user-config-tests/tests/user_config_with_parser_options_test.bal @@ -153,11 +153,11 @@ function testHeaderOption() { function testNullConfigOption() { string csvValue1 = string `a ()`; - string csvValue2 = string `a + string csvValue2 = string `a null`; - string csvValue3 = string `c, a + string csvValue3 = string `c, a true, e`; - string csvValue4 = string `a + string csvValue4 = string `a Null`; string csvValue5 = string `b, a bN/Aa,N/A`; @@ -224,9 +224,9 @@ function testCommentConfigOption() { 1`; string csvValue2 = string `a # comment 1`; - string csvValue3 = string `a #, c + string csvValue3 = string `a #, c 1#, e`; - string csvValue4 = string `a + string csvValue4 = string `a # comment 1`; string csvValue5 = string `a, b @@ -286,21 +286,21 @@ function testCommentConfigOption2() { 1`; string csvValue2 = string `a & comment 1`; - string csvValue3 = string `a &, c + string csvValue3 = string `a &, c 1&, e`; - string csvValue4 = string `a - - - + string csvValue4 = string `a + + + & comment 1`; string csvValue5 = string `a&, b 1, 2 & comment - - + + & comment`; string csvValue6 = string ` - + a,& b 1 ,&2 & comment @@ -677,7 +677,7 @@ function testTextQuotesWithParserOptions() { 1, "2", "3" "1", 2, 3 1, "2", 3 - + "1", "2", "3"`; string csvValue2 = string ` @@ -740,7 +740,7 @@ function testHeaderQuotesWithParserOptions() { 1, "2", "3" "1", 2, 3 1, "2", 3 - + "1", "2", "3"`; string csvValue2 = string ` @@ -775,7 +775,7 @@ function testEscapeCharactersWithParserOptions() { 1, "2a\t", "3b\n" "1c\n", 2, 3 1, "2a\"", 3 - + "1a\\", "2b\\"", "3"`; string csvValue2 = string ` @@ -783,7 +783,7 @@ function testEscapeCharactersWithParserOptions() { 1, "2a\t", "3b\n" "1c\n", "/2/", 3 1, "2a\"", "3" - + "1a\\", "2b\\"", "3"`; string csvValue3 = string ` @@ -791,7 +791,7 @@ function testEscapeCharactersWithParserOptions() { 1, "2\t", "3\n" "1\n", 2, 3 1, "2\"", 3 - + "1\\", "2\\"", "3"`; record {}[]|csv:Error cn = csv:parseStringToRecord(csvValue1, {header: 1}); @@ -836,21 +836,22 @@ function testDelimiterWithParserOptions() { @test:Config function testLineTerminatorWithParserOptions() { - string csvValue = string `a,b${"\n"} 1,"2\n3"`; + string csvValue = string `a,b + 1,"2\n3"`; - record {}[]|csv:Error cn = csv:parseStringToRecord(csvValue, {header: 0, lineTerminator: csv:LF}); + record {}[]|csv:Error cn = csv:parseStringToRecord(csvValue, {header: 0, lineTerminator: [csv:CRLF, csv:LF]}); test:assertEquals(cn, [{a: 1, b: "2\n3"}]); - cn = csv:parseStringToRecord(csvValue, {header: 0, lineTerminator: [csv:LF]}); + cn = csv:parseStringToRecord(csvValue, {header: 0, lineTerminator: [csv:CRLF, csv:LF]}); test:assertEquals(cn, [{a: 1, b: "2\n3"}]); cn = csv:parseStringToRecord(csvValue, {header: 0, lineTerminator: [csv:CRLF, csv:LF]}); test:assertEquals(cn, [{a: 1, b: "2\n3"}]); - anydata[][]|csv:Error cn2 = csv:parseStringToList(csvValue, {header: 0, lineTerminator: csv:LF}); + anydata[][]|csv:Error cn2 = csv:parseStringToList(csvValue, {header: 0, lineTerminator: [csv:CRLF, csv:LF]}); test:assertEquals(cn2, [[1, "2\n3"]]); - cn2 = csv:parseStringToList(csvValue, {header: 0, lineTerminator: [csv:LF]}); + cn2 = csv:parseStringToList(csvValue, {header: 0, lineTerminator: [csv:CRLF, csv:LF]}); test:assertEquals(cn2, [[1, "2\n3"]]); cn2 = csv:parseStringToList(csvValue, {header: 0, lineTerminator: [csv:CRLF, csv:LF]}); diff --git a/ballerina-tests/user-config-tests/tests/user_configs.bal b/ballerina-tests/user-config-tests/tests/user_configs.bal index c5f08a9..8e85183 100644 --- a/ballerina-tests/user-config-tests/tests/user_configs.bal +++ b/ballerina-tests/user-config-tests/tests/user_configs.bal @@ -2,7 +2,7 @@ import ballerina/data.csv as csv; // Valid parser options csv:ParseOption option1 = {delimiter: "@", nilValue: "null", lineTerminator: [csv:LF]}; -csv:ParseOption option2 = {nilValue: "N/A", lineTerminator: [csv:CR, csv:LF], comment: "/"}; +csv:ParseOption option2 = {nilValue: "N/A", lineTerminator: [csv:CRLF, csv:LF], comment: "/"}; csv:ParseOption option3 = {nilValue: "()", header: 1, skipLines: [1, 2]}; csv:ParseOption option4 = {nilValue: "", header: 4, skipLines: "1-5"}; csv:ParseOption option5 = {nilValue: "", header: 4, skipLines: "1-1"}; @@ -17,7 +17,7 @@ csv:parseToRecordOption ptOption5 = {header: false, skipLines: [-1, -2, 5, 3]}; // Invalid parser options csv:ParseOption invalidParserOptions1 = {header: 4}; csv:ParseOption invalidParserOptions2 = {comment: "$"}; -csv:ParseOption invalidParserOptions3 = {lineTerminator: csv:CR}; +csv:ParseOption invalidParserOptions3 = {lineTerminator: csv:CRLF}; csv:ParseOption invalidParserOptions4 = {skipLines: [1000, 1001]}; csv:ParseOption invalidParserOptions5 = {skipLines: "a-b"}; csv:ParseOption invalidParserOptions6 = {skipLines: "3-1"}; diff --git a/ballerina/types.bal b/ballerina/types.bal index 0cc44fc..1bee776 100644 --- a/ballerina/types.bal +++ b/ballerina/types.bal @@ -47,7 +47,7 @@ public type ParseOption record {| # The character used for escaping. string:Char escapeChar = "\\"; # The line terminator(s) used in the data. - LineTerminator|LineTerminator[] lineTerminator = [CR, LF, CRLF]; + LineTerminator|LineTerminator[] lineTerminator = [LF, CRLF]; # The value to represent nil. NilValue? nilValue = (); # The character used to indicate comments in the data. @@ -90,7 +90,6 @@ public type ListAsRecordOption record {| # Enum representing possible line terminators. public enum LineTerminator { - CR = "\r", LF = "\n", CRLF = "\r\n" }; diff --git a/native/src/main/java/io/ballerina/stdlib/data/csvdata/csv/CsvParser.java b/native/src/main/java/io/ballerina/stdlib/data/csvdata/csv/CsvParser.java index 777b954..3f026c5 100644 --- a/native/src/main/java/io/ballerina/stdlib/data/csvdata/csv/CsvParser.java +++ b/native/src/main/java/io/ballerina/stdlib/data/csvdata/csv/CsvParser.java @@ -34,6 +34,7 @@ import io.ballerina.runtime.api.values.BArray; import io.ballerina.runtime.api.values.BError; import io.ballerina.runtime.api.values.BTypedesc; +import io.ballerina.stdlib.data.csvdata.utils.Constants; import io.ballerina.stdlib.data.csvdata.utils.CsvConfig; import io.ballerina.stdlib.data.csvdata.utils.CsvUtils; import io.ballerina.stdlib.data.csvdata.utils.DataUtils; @@ -141,33 +142,32 @@ static class StateMachine { } public void reset() { - currentCsvNode = null; - currentEscapeCharacters.clear(); - headers.clear(); - rootCsvNode = null; - fieldHierarchy.clear(); - updatedRecordFieldNames.clear(); - fields.clear(); - fieldNames.clear(); - charBuff = new char[1024]; - charBuffIndex = 0; index = 0; + currentCsvNode = null; line = 1; column = 0; restType = null; - expectedArrayElementType = null; + rootCsvNode = null; columnIndex = 0; rowIndex = 1; - lineNumber = 0; + fieldHierarchy.clear(); + updatedRecordFieldNames.clear(); + fields.clear(); + fieldNames.clear(); rootArrayType = null; config = null; + lineNumber = 0; + expectedArrayElementType = null; + headers = new ArrayList<>(); + currentEscapeCharacters = new Stack<>(); + charBuff = new char[1024]; + charBuffIndex = 0; skipTheRow = false; - insideComment = false; isCurrentCsvNodeEmpty = true; isHeaderConfigExceedLineNumber = false; + hexBuilder = new StringBuilder(4); isQuoteClosed = false; isIntersectionElementType = false; - hexBuilder = new StringBuilder(4); } private static boolean isWhitespace(char ch, Object lineTerminator) { @@ -345,6 +345,13 @@ public State transition(StateMachine sm, char[] buff, int i, int count) throws C for (; i < count; i++) { ch = buff[i]; sm.processLocation(ch); + if (ch == Constants.LineTerminator.CR) { + CsvUtils.setCarriageTokenPresent(true); + continue; + } else if (!(CsvUtils.isCarriageTokenPresent && ch == Constants.LineTerminator.LF)) { + CsvUtils.setCarriageTokenPresent(false); + } + if (sm.lineNumber < headerStartRowNumber) { sm.isHeaderConfigExceedLineNumber = true; if (sm.isNewLineOrEof(ch)) { @@ -481,6 +488,13 @@ public State transition(StateMachine sm, char[] buff, int i, int count) throws C for (; i < count; i++) { ch = buff[i]; sm.processLocation(ch); + if (ch == Constants.LineTerminator.CR) { + CsvUtils.setCarriageTokenPresent(true); + continue; + } else if (!(CsvUtils.isCarriageTokenPresent && ch == Constants.LineTerminator.LF)) { + CsvUtils.setCarriageTokenPresent(false); + } + if (sm.skipTheRow) { if (sm.isEndOfTheRowAndValueIsNotEmpty(sm, ch)) { sm.insideComment = false; @@ -690,6 +704,13 @@ public State transition(StateMachine sm, char[] buff, int i, int count) for (; i < count; i++) { ch = buff[i]; sm.processLocation(ch); + if (ch == Constants.LineTerminator.CR) { + CsvUtils.setCarriageTokenPresent(true); + continue; + } else if (!(CsvUtils.isCarriageTokenPresent && ch == Constants.LineTerminator.LF)) { + CsvUtils.setCarriageTokenPresent(false); + } + if (ch == sm.config.textEnclosure) { if (sm.isQuoteClosed) { sm.append(ch); @@ -742,6 +763,13 @@ public State transition(StateMachine sm, char[] buff, int i, int count) for (; i < count; i++) { ch = buff[i]; sm.processLocation(ch); + if (ch == Constants.LineTerminator.CR) { + CsvUtils.setCarriageTokenPresent(true); + continue; + } else if (!(CsvUtils.isCarriageTokenPresent && ch == Constants.LineTerminator.LF)) { + CsvUtils.setCarriageTokenPresent(false); + } + if (ch == sm.config.textEnclosure) { sm.isQuoteClosed = true; } else if (ch == sm.config.delimiter && sm.isQuoteClosed) { @@ -814,6 +842,13 @@ public State transition(StateMachine sm, char[] buff, int i, int count) throws C for (; i < count; i++) { ch = buff[i]; sm.processLocation(ch); + if (ch == Constants.LineTerminator.CR) { + CsvUtils.setCarriageTokenPresent(true); + continue; + } else if (!(CsvUtils.isCarriageTokenPresent && ch == Constants.LineTerminator.LF)) { + CsvUtils.setCarriageTokenPresent(false); + } + if ((ch >= '0' && ch <= '9') || (ch >= 'A' && ch <= 'F') || (ch >= 'a' && ch <= 'f')) { sm.hexBuilder.append(ch); if (sm.hexBuilder.length() >= 4) { @@ -871,6 +906,11 @@ public State transition(StateMachine sm, char[] buff, int i, int count) throws C if (i < count) { ch = buff[i]; sm.processLocation(ch); + if (ch == Constants.LineTerminator.CR) { + CsvUtils.setCarriageTokenPresent(true); + } else if (!(CsvUtils.isCarriageTokenPresent && ch == Constants.LineTerminator.LF)) { + CsvUtils.setCarriageTokenPresent(false); + } switch (ch) { case '"': sm.append(QUOTES); diff --git a/native/src/main/java/io/ballerina/stdlib/data/csvdata/utils/Constants.java b/native/src/main/java/io/ballerina/stdlib/data/csvdata/utils/Constants.java index 3e39494..ef16c39 100644 --- a/native/src/main/java/io/ballerina/stdlib/data/csvdata/utils/Constants.java +++ b/native/src/main/java/io/ballerina/stdlib/data/csvdata/utils/Constants.java @@ -29,6 +29,12 @@ public static class Values { public static final String BALLERINA_NULL = "()"; } + public static class LineTerminator { + public static final char LF = '\n'; + public static final char CR = '\r'; + public static final String CRLF = "\r\n"; + } + public static final String SKIP_LINE_RANGE_SEP = "-"; public static final String FIELD = "$field$."; public static final String NAME = "Name"; diff --git a/native/src/main/java/io/ballerina/stdlib/data/csvdata/utils/CsvUtils.java b/native/src/main/java/io/ballerina/stdlib/data/csvdata/utils/CsvUtils.java index 2769ff2..3f67f06 100644 --- a/native/src/main/java/io/ballerina/stdlib/data/csvdata/utils/CsvUtils.java +++ b/native/src/main/java/io/ballerina/stdlib/data/csvdata/utils/CsvUtils.java @@ -23,6 +23,12 @@ import static io.ballerina.stdlib.data.csvdata.utils.Constants.SKIP_LINE_RANGE_SEP; public class CsvUtils { + public static boolean isCarriageTokenPresent = false; + + public static void setCarriageTokenPresent(boolean isCarriageTokenPresent) { + CsvUtils.isCarriageTokenPresent = isCarriageTokenPresent; + } + public static void validateExpectedArraySize(int size, int currentSize) { if (size != -1 && size > currentSize) { throw DiagnosticLog.error(DiagnosticErrorCode.INVALID_EXPECTED_ARRAY_SIZE, currentSize); @@ -117,17 +123,17 @@ public static boolean isHeaderFieldsEmpty(Map currentField) { public static boolean checkTypeCompatibility(Type constraintType, Object csv, boolean stringConversion) { int tag = constraintType.getTag(); if ((csv instanceof BString && (stringConversion || tag == TypeTags.STRING_TAG - || tag == TypeTags.CHAR_STRING_TAG || isJsonOrAnyDataOrAny(tag))) + || tag == TypeTags.CHAR_STRING_TAG || isJsonOrAnyDataOrAny(tag))) || (csv instanceof Long && (tag == TypeTags.INT_TAG - || tag == TypeTags.FLOAT_TAG || tag == TypeTags.DECIMAL_TAG || tag == TypeTags.BYTE_TAG - || tag == TypeTags.SIGNED8_INT_TAG || tag == TypeTags.SIGNED16_INT_TAG - || tag == TypeTags.SIGNED32_INT_TAG || tag == TypeTags.UNSIGNED8_INT_TAG - || tag == TypeTags.UNSIGNED16_INT_TAG || tag == TypeTags.UNSIGNED32_INT_TAG - || isJsonOrAnyDataOrAny(tag))) + || tag == TypeTags.FLOAT_TAG || tag == TypeTags.DECIMAL_TAG || tag == TypeTags.BYTE_TAG + || tag == TypeTags.SIGNED8_INT_TAG || tag == TypeTags.SIGNED16_INT_TAG + || tag == TypeTags.SIGNED32_INT_TAG || tag == TypeTags.UNSIGNED8_INT_TAG + || tag == TypeTags.UNSIGNED16_INT_TAG || tag == TypeTags.UNSIGNED32_INT_TAG + || isJsonOrAnyDataOrAny(tag))) || (csv instanceof BDecimal && ((tag == TypeTags.DECIMAL_TAG - || tag == TypeTags.FLOAT_TAG || tag == TypeTags.INT_TAG) || isJsonOrAnyDataOrAny(tag))) + || tag == TypeTags.FLOAT_TAG || tag == TypeTags.INT_TAG) || isJsonOrAnyDataOrAny(tag))) || (csv instanceof Double && ((tag == TypeTags.FLOAT_TAG - || tag == TypeTags.DECIMAL_TAG || tag == TypeTags.INT_TAG) || isJsonOrAnyDataOrAny(tag))) + || tag == TypeTags.DECIMAL_TAG || tag == TypeTags.INT_TAG) || isJsonOrAnyDataOrAny(tag))) || (Boolean.class.isInstance(csv) && (tag == TypeTags.BOOLEAN_TAG || isJsonOrAnyDataOrAny(tag))) || (csv == null && (tag == TypeTags.NULL_TAG || isJsonOrAnyDataOrAny(tag)))) { return true; @@ -153,8 +159,8 @@ public static int getTheActualExpectedType(Type type) { } public static HashMap - processNameAnnotationsAndBuildCustomFieldMap(RecordType recordType, - Map fieldHierarchy) { + processNameAnnotationsAndBuildCustomFieldMap(RecordType recordType, + Map fieldHierarchy) { BMap annotations = recordType.getAnnotations(); HashMap updatedRecordFieldNames = new HashMap<>(); HashSet updatedFields = new HashSet<>(); @@ -267,11 +273,17 @@ public static boolean isNullValue(Object nullValue, Object value) { } public static boolean isCharContainsInLineTerminatorUserConfig(char c, Object lineTerminatorObj) { - String stringValue = Character.toString(c); if (lineTerminatorObj instanceof BArray) { Object[] lineTerminators = ((BArray) lineTerminatorObj).getValues(); for (Object lineTerminator: lineTerminators) { - if (lineTerminator != null && lineTerminator.toString().equals(stringValue)) { + if (lineTerminator != null && c == Constants.LineTerminator.LF) { + String lineTerminatorString = lineTerminator.toString(); + if (isCarriageTokenPresent) { + if (lineTerminatorString.equals(Constants.LineTerminator.CRLF)) { + return true; + } + continue; + } return true; } } @@ -279,7 +291,18 @@ public static boolean isCharContainsInLineTerminatorUserConfig(char c, Object li } String lineTerminator = StringUtils.getStringValue(StringUtils.fromString(lineTerminatorObj.toString())); - return lineTerminator.equals(stringValue); + if (c == Constants.LineTerminator.LF) { + if (lineTerminator != null) { + if (lineTerminator.equals(Constants.LineTerminator.CRLF)) { + if (isCarriageTokenPresent) { + return true; + } + return false; + } + return true; + } + } + return false; } public static class SortConfigurations {