From 19288040f48177559b43d2e600276c7acc9de19e Mon Sep 17 00:00:00 2001 From: SasinduDilshara Date: Mon, 12 Aug 2024 10:19:13 +0530 Subject: [PATCH] Allow empty strings to represent nil values using configs --- .../tests/parse_string_to_array_test.bal | 10 ++-- .../tests/test_data_values.bal | 6 ++ .../tests/csv_content_2.txt | 4 ++ .../tests/nill_type_test.bal | 58 +++++++++++++++++++ .../tests/parse_string_compatibality_test.bal | 33 ++++++----- .../tests/test_data_values.bal | 25 ++++++++ .../user_config_with_parser_options_test.bal | 50 +++++++++++++--- .../lib/data/csvdata/csv/CsvCreator.java | 9 ++- .../lib/data/csvdata/csv/CsvParser.java | 21 ++++--- .../csvdata/utils/DiagnosticErrorCode.java | 4 +- .../src/main/resources/csv_error.properties | 8 ++- 11 files changed, 187 insertions(+), 41 deletions(-) create mode 100644 ballerina-tests/type-compatible-tests/tests/csv_content_2.txt create mode 100644 ballerina-tests/type-compatible-tests/tests/nill_type_test.bal diff --git a/ballerina-tests/parse-string-array-types-tests/tests/parse_string_to_array_test.bal b/ballerina-tests/parse-string-array-types-tests/tests/parse_string_to_array_test.bal index 46a49c0..dfeaf58 100644 --- a/ballerina-tests/parse-string-array-types-tests/tests/parse_string_to_array_test.bal +++ b/ballerina-tests/parse-string-array-types-tests/tests/parse_string_to_array_test.bal @@ -437,14 +437,12 @@ function testParseStringArrayAsExpectedTypeWithOutputHeaders() { ]); string[][]|csv:Error cv1baa_5 = csv:parseString(csvStringWithBooleanValues1, {outputWithHeaders: true, header: 2}); - test:assertEquals(cv1baa_5, [ - ["true", "false", "true", "false"], - ["true", "false", "true", "false"] - ]); + test:assertTrue(cv1baa_5 is csv:Error); + test:assertEquals((cv1baa_5).message(), "Duplicate header found: 'true'"); - string[][]|csv:Error cv1baa_6 = csv:parseString(csvStringWithBooleanValues1, {outputWithHeaders: false, header: 2}); + string[][]|csv:Error cv1baa_6 = csv:parseString(csvStringWithBooleanValues8, {outputWithHeaders: false, header: 2}); test:assertEquals(cv1baa_6, [ - ["true", "false", "true", "false"] + ["true", "false", "true1", "false1"] ]); [string, string, string, string, string][]|csv:Error cv2baa_7 = csv:parseString(csvStringWithBooleanValues2, {outputWithHeaders: true}); diff --git a/ballerina-tests/parse-string-array-types-tests/tests/test_data_values.bal b/ballerina-tests/parse-string-array-types-tests/tests/test_data_values.bal index 4060a59..e4d152b 100644 --- a/ballerina-tests/parse-string-array-types-tests/tests/test_data_values.bal +++ b/ballerina-tests/parse-string-array-types-tests/tests/test_data_values.bal @@ -60,3 +60,9 @@ string csvStringWithBooleanValues6 = string `b2,b3 string csvStringWithBooleanValues7 = string `b1,b2,b3,b4 ${b1},${b2},(),${b4} `; + +string csvStringWithBooleanValues8 = string `b1,b2,b3,b4 +true,false,true1,false1 +true,false, true1,false1 +true,false,true1,false1 +`; diff --git a/ballerina-tests/type-compatible-tests/tests/csv_content_2.txt b/ballerina-tests/type-compatible-tests/tests/csv_content_2.txt new file mode 100644 index 0000000..c2d88ba --- /dev/null +++ b/ballerina-tests/type-compatible-tests/tests/csv_content_2.txt @@ -0,0 +1,4 @@ +a, b, c d, e +"Hello World1", \"Hello World2\", Hello World3, 21 +"Hello World1", \"Hello World2\", Hello World3, 22 +"Hello World1", \"Hello World2\", Hello World3, 23 \ No newline at end of file diff --git a/ballerina-tests/type-compatible-tests/tests/nill_type_test.bal b/ballerina-tests/type-compatible-tests/tests/nill_type_test.bal new file mode 100644 index 0000000..728d683 --- /dev/null +++ b/ballerina-tests/type-compatible-tests/tests/nill_type_test.bal @@ -0,0 +1,58 @@ +import ballerina/data.csv; +import ballerina/test; + +type Book1 record { + ()|string name; + ()|string author; + ()|string year; +}; + +string csvString1 = string `name,author,year + ,b,c + a,,c + a,b,`; + +@test:Config{} +function testEmptyStringWithNilConfig() { + Book1[]|error books1 = csv:parseString(csvString1, {nilValue: ""}); + test:assertEquals(books1, [ + {name: null, author: "b", year: "c"}, + {name: "a", author: null, year: "c"}, + {name: "a", author: "b", year: null} + ]); + + Book1[]|error books2 = csv:parseString(csvString1); + test:assertEquals(books2, [ + {name: "", author: "b", year: "c"}, + {name: "a", author: "", year: "c"}, + {name: "a", author: "b", year: ""} + ]); + + (boolean|()|string|int)[][]|error arrbooks1 = csv:parseString(csvString1, {nilValue: ""}); + test:assertEquals(arrbooks1, [ + [null, "b", "c"], + ["a", null, "c"], + ["a", "b", null] + ]); + + (boolean|()|string|int)[][2]|error arrbooks2 = csv:parseString(csvString1, {nilValue: ""}); + test:assertEquals(arrbooks2, [ + [null, "b"], + ["a", null], + ["a", "b"] + ]); + + (boolean|()|string|int)[][]|error arrbooks3 = csv:parseString(csvString1); + test:assertEquals(arrbooks3, [ + ["", "b", "c"], + ["a", "", "c"], + ["a", "b", ""] + ]); + + (boolean|()|string|int)[][2]|error arrbooks4 = csv:parseString(csvString1); + test:assertEquals(arrbooks4, [ + ["", "b"], + ["a", ""], + ["a", "b"] + ]); +} diff --git a/ballerina-tests/type-compatible-tests/tests/parse_string_compatibality_test.bal b/ballerina-tests/type-compatible-tests/tests/parse_string_compatibality_test.bal index bacada9..3aca53a 100644 --- a/ballerina-tests/type-compatible-tests/tests/parse_string_compatibality_test.bal +++ b/ballerina-tests/type-compatible-tests/tests/parse_string_compatibality_test.bal @@ -20,6 +20,7 @@ import ballerina/io; import ballerina/test; const string filepath = "tests/csv_content.txt"; +const string filepath2 = "tests/csv_content_2.txt"; const string errorFilepath = "tests/csv_error_content.txt"; @test:Config @@ -215,6 +216,7 @@ function testSpaceBetweendData() { @test:Config function testParseBytes() returns error? { byte[] csvBytes = check io:fileReadBytes(filepath); + byte[] csvBytes2 = check io:fileReadBytes(filepath2); record{}[]|csv:Error rec = csv:parseBytes(csvBytes, {}); test:assertEquals(rec, [ @@ -245,17 +247,17 @@ function testParseBytes() returns error? { ["Hello World", "\"Hello World\"", "Hello World", "2"] ]); - rec2 = csv:parseBytes(csvBytes, {outputWithHeaders: true, header: 1}); + rec2 = csv:parseBytes(csvBytes2, {outputWithHeaders: true, header: 1}); test:assertEquals(rec2, [ - ["Hello World", "\"Hello World\"", "Hello World", "2"], - ["Hello World", "\"Hello World\"", "Hello World", "2"], - ["Hello World", "\"Hello World\"", "Hello World", "2"] + ["Hello World1", "\"Hello World2\"", "Hello World3", "21"], + ["Hello World1", "\"Hello World2\"", "Hello World3", "22"], + ["Hello World1", "\"Hello World2\"", "Hello World3", "23"] ]); - rec2 = csv:parseBytes(csvBytes, { header: 1}); + rec2 = csv:parseBytes(csvBytes2, { header: 1}); test:assertEquals(rec2, [ - ["Hello World", "\"Hello World\"", "Hello World", "2"], - ["Hello World", "\"Hello World\"", "Hello World", "2"] + ["Hello World1", "\"Hello World2\"", "Hello World3", "22"], + ["Hello World1", "\"Hello World2\"", "Hello World3", "23"] ]); int[][]|csv:Error rec3 = csv:parseBytes(csvBytes, {}); @@ -270,6 +272,8 @@ function testParseBytes() returns error? { @test:Config function testParseStream() returns error? { stream csvByteStream = check io:fileReadBlocksAsStream(filepath); + stream csvByteStream2 = check io:fileReadBlocksAsStream(filepath2); + record{}[]|csv:Error rec = csv:parseStream(csvByteStream, {}); test:assertEquals(rec, [ {"a":"Hello World","b":"\"Hello World\"","c d":"Hello World","e":2}, @@ -293,12 +297,11 @@ function testParseStream() returns error? { ["Hello World", "\"Hello World\"", "Hello World", "2"] ]); - csvByteStream = check io:fileReadBlocksAsStream(filepath); - rec2 = csv:parseStream(csvByteStream, {header: 1, outputWithHeaders: true}); + rec2 = csv:parseStream(csvByteStream2, {header: 1, outputWithHeaders: true}); test:assertEquals(rec2, [ - ["Hello World", "\"Hello World\"", "Hello World", "2"], - ["Hello World", "\"Hello World\"", "Hello World", "2"], - ["Hello World", "\"Hello World\"", "Hello World", "2"] + ["Hello World1", "\"Hello World2\"", "Hello World3", "21"], + ["Hello World1", "\"Hello World2\"", "Hello World3", "22"], + ["Hello World1", "\"Hello World2\"", "Hello World3", "23"] ]); csvByteStream = check io:fileReadBlocksAsStream(filepath); @@ -310,11 +313,11 @@ function testParseStream() returns error? { ["Hello World", "\"Hello World\"", "Hello World", "2"] ]); - csvByteStream = check io:fileReadBlocksAsStream(filepath); + csvByteStream = check io:fileReadBlocksAsStream(filepath2); rec2 = csv:parseStream(csvByteStream, {header: 1}); test:assertEquals(rec2, [ - ["Hello World", "\"Hello World\"", "Hello World", "2"], - ["Hello World", "\"Hello World\"", "Hello World", "2"] + ["Hello World1", "\"Hello World2\"", "Hello World3", "22"], + ["Hello World1", "\"Hello World2\"", "Hello World3", "23"] ]); csvByteStream = check io:fileReadBlocksAsStream(filepath); diff --git a/ballerina-tests/user-config-tests/tests/test_data_values.bal b/ballerina-tests/user-config-tests/tests/test_data_values.bal index f067976..bf329ff 100644 --- a/ballerina-tests/user-config-tests/tests/test_data_values.bal +++ b/ballerina-tests/user-config-tests/tests/test_data_values.bal @@ -121,3 +121,28 @@ string csvStringData9 = string ` 4@ string@ true@ 2.234@ ()@-3.21 5@ string@ true@ 2.234@ -3.21@ null`; + +string csvStringData10 = string ` + hello, hello, (), 12, true, 12.34 + // comment + + a, b, c, d, e, f + + + 1, string1, true, 2.234, 2.234, () + 2, string2, false, 0, 0, null + 3, string3, false, 1.23, 1.23, () + 4, string4, true, -6.51, -6.52, () + 5, string5, true, 3, 31, ()`; + +string csvStringData11 = string ` + a, b, c, d, e, f + + + + 1, string1, true, 2.234, 2.234, () + 2, string2, false, 0, 0, null + 3, string3, false, 1.23, 1.23, () + + 4, string4, true, -6.51, -6.52, () + 5, string5, true, 3, 3, ()`; diff --git a/ballerina-tests/user-config-tests/tests/user_config_with_parser_options_test.bal b/ballerina-tests/user-config-tests/tests/user_config_with_parser_options_test.bal index 10a83a9..6be4c25 100644 --- a/ballerina-tests/user-config-tests/tests/user_config_with_parser_options_test.bal +++ b/ballerina-tests/user-config-tests/tests/user_config_with_parser_options_test.bal @@ -125,9 +125,9 @@ function testFromCsvStringWithParserOptions() { {a: 5, b: "string5", c: true, d: 3, e: 3, f: "()"} ]); - record {}[]|csv:Error csv3op3_4 = csv:parseString(csvStringData3, {header: 9, skipLines: "2-10"}); + record {}[]|csv:Error csv3op3_4 = csv:parseString(csvStringData11, {header: 9, skipLines: "2-10"}); test:assertEquals(csv3op3_4, [ - {'4: 5, string4: "string5", "true": true, "-6.51": 3, "()": null} + {'4: 5, string4: "string5", "true": true, "-6.51": 3, "-6.52": 3, "()": null} ]); } @@ -203,15 +203,19 @@ function testHeaderOption() { {a: 5, b: "string5", c: true, d: 3, e: 3, f: ()} ]); - record {}[]|csv:Error csv2cop2 = csv:parseString(csvStringData2, {header: 100}); + record {}[]|csv:Error csv2cop2 = csv:parseString(csvStringData10, {header: 100}); test:assertTrue(csv2cop2 is csv:Error); test:assertEquals((csv2cop2).message(), "The provided header row is empty"); - record {}[]|csv:Error csv2cop3 = csv:parseString(csvStringData2, {header: 11}); + record {}[]|csv:Error csv2cop3 = csv:parseString(csvStringData10, {header: 11}); test:assertEquals(csv2cop3, []); - record {}[]|csv:Error csv2cop4 = csv:parseString(csvStringData2, {header: 10}); - test:assertEquals(csv2cop4, [{'4: 5, string4: "string5", "true": true, "-6.51": 3, "()": ()}]); + record {}[]|csv:Error csv2cop3_2 = csv:parseString(csvStringData10, {header: 9}); + test:assertTrue(csv2cop3_2 is csv:Error); + test:assertEquals((csv2cop3_2).message(), "Duplicate header found: '1.23'"); + + record {}[]|csv:Error csv2cop4 = csv:parseString(csvStringData10, {header: 10}); + test:assertEquals(csv2cop4, [{'4: 5, string4: "string5", "true": true, "-6.51": 3, "-6.52": 31, "()": ()}]); record {}[]|csv:Error csv1cop5 = csv:parseString(csvStringData1, {}); test:assertTrue(csv1cop5 is csv:Error); @@ -313,6 +317,9 @@ function testCommentConfigOption() { string csvValue9 = string `a,# b 1 ,#2 # comment # comment`; + string csvValue10 = string `a# b + 1 ,#2 # comment + # comment`; record {int a;}[]|csv:Error cn; @@ -345,8 +352,13 @@ function testCommentConfigOption() { // TODO:Fix the error message // test:assertEquals(( cn).message(), common:generateErrorMessageForInvalidCast("1, 2", "int")); + cn = csv:parseString(csvValue10); + test:assertTrue(cn is csv:Error); + test:assertEquals(( cn).message(), "Invalid length for the headers"); + cn = csv:parseString(csvValue9); - test:assertEquals(cn, [{a: 1}]); + test:assertTrue(cn is csv:Error); + test:assertEquals(( cn).message(), "The provided header row is empty"); } @test:Config {dependsOn: [testCSVLocale]} @@ -375,6 +387,20 @@ function testCommentConfigOption2() { & comment`; + string csvValue7 = string ` + + a& b + 1 ,&2 & comment + + & comment`; + + string csvValue8 = string ` + + a,e& b + 1 ,&2 & comment + + & comment`; + record {int a; int b;}[]|csv:Error cn; record {int c;}[]|csv:Error cn2; @@ -401,8 +427,16 @@ function testCommentConfigOption2() { cn = csv:parseString(csvValue6, {comment: "&", header: 2}); test:assertTrue(cn is csv:Error); + test:assertEquals((cn).message(), "The provided header row is empty"); + + cn = csv:parseString(csvValue8, {comment: "&", header: 2}); + test:assertTrue(cn is csv:Error); test:assertEquals((cn).message(), common:generateErrorMessageForMissingRequiredField("b")); + cn = csv:parseString(csvValue7, {comment: "&", header: 2}); + test:assertTrue(cn is csv:Error); + test:assertEquals((cn).message(), "Invalid length for the headers"); + cn2 = csv:parseString(csvValue1, {comment: "&"}); test:assertTrue(cn2 is csv:Error); test:assertEquals((cn2).message(), common:generateErrorMessageForMissingRequiredField("c")); @@ -426,7 +460,7 @@ function testCommentConfigOption2() { cn2 = csv:parseString(csvValue6, {header: 2, comment: "&"}); test:assertTrue(cn2 is csv:Error); - test:assertEquals((cn2).message(), common:generateErrorMessageForMissingRequiredField("c")); + test:assertEquals((cn2).message(), "The provided header row is empty"); } @test:Config {dependsOn: [testCSVLocale]} diff --git a/native/src/main/java/io/ballerina/lib/data/csvdata/csv/CsvCreator.java b/native/src/main/java/io/ballerina/lib/data/csvdata/csv/CsvCreator.java index c7a2c4c..174e97c 100644 --- a/native/src/main/java/io/ballerina/lib/data/csvdata/csv/CsvCreator.java +++ b/native/src/main/java/io/ballerina/lib/data/csvdata/csv/CsvCreator.java @@ -66,9 +66,9 @@ static Object initRowValue(Type expectedType) { }; } - static void convertAndUpdateCurrentJsonNode(CsvParser.StateMachine sm, - String value, Type type, CsvConfig config, Type exptype, - Field currentField) { + static void convertAndUpdateCurrentCsvNode(CsvParser.StateMachine sm, + String value, Type type, CsvConfig config, Type exptype, + Field currentField) { Object currentCsv = sm.currentCsvNode; Object nilValue = config.nilValue; if (sm.config.nilAsOptionalField && !type.isNilable() @@ -91,6 +91,7 @@ static void convertAndUpdateCurrentJsonNode(CsvParser.StateMachine sm, case TypeTags.RECORD_TYPE_TAG: ((BMap) currentCsv).put(StringUtils.fromString(getHeaderValueForColumnIndex(sm)), convertedValue); + sm.currentCsvNodeLength++; return; case TypeTags.ARRAY_TAG: ArrayType arrayType = (ArrayType) currentCsvNodeType; @@ -99,9 +100,11 @@ static void convertAndUpdateCurrentJsonNode(CsvParser.StateMachine sm, return; } ((BArray) currentCsv).add(sm.columnIndex, convertedValue); + sm.currentCsvNodeLength++; return; case TypeTags.TUPLE_TAG: ((BArray) currentCsv).add(sm.columnIndex, convertedValue); + sm.currentCsvNodeLength++; return; default: } diff --git a/native/src/main/java/io/ballerina/lib/data/csvdata/csv/CsvParser.java b/native/src/main/java/io/ballerina/lib/data/csvdata/csv/CsvParser.java index d353eb2..cf7aaa8 100644 --- a/native/src/main/java/io/ballerina/lib/data/csvdata/csv/CsvParser.java +++ b/native/src/main/java/io/ballerina/lib/data/csvdata/csv/CsvParser.java @@ -145,6 +145,7 @@ static class StateMachine { State prevState; int arraySize = 0; boolean addHeadersForOutput = false; + int currentCsvNodeLength = 0; StateMachine() { reset(); @@ -180,6 +181,7 @@ public void reset() { prevState = null; arraySize = 0; addHeadersForOutput = false; + currentCsvNodeLength = 0; } private static boolean isWhitespace(char ch, Object lineTerminator) { @@ -438,9 +440,7 @@ private static void handleEndOfTheHeader(StateMachine sm) throws CsvParserExcept private static void handleEndOfTheHeader(StateMachine sm, boolean trim) throws CsvParserException { sm.isValueStart = false; - if (!sm.peek().isBlank()) { - addHeader(sm, trim); - } + addHeader(sm, trim); finalizeHeaders(sm); sm.columnIndex = 0; sm.lineNumber++; @@ -497,6 +497,9 @@ private static void addHeader(StateMachine sm, boolean trim) { if (trim) { value = value.trim(); } + if (value.isEmpty()) { + throw DiagnosticLog.error(DiagnosticErrorCode.HEADER_CANNOT_BE_EMPTY); + } if (sm.expectedArrayElementType instanceof RecordType) { String fieldName = CsvUtils.getUpdatedHeaders( sm.updatedRecordFieldNames, value, sm.fields.contains(value)); @@ -506,6 +509,9 @@ private static void addHeader(StateMachine sm, boolean trim) { sm.fieldHierarchy.remove(fieldName); } } + if (sm.headers.contains(value)) { + throw DiagnosticLog.error(DiagnosticErrorCode.DUPLICATE_HEADER, value); + } sm.headers.add(value); } @@ -631,7 +637,7 @@ private static void handleCsvRow(StateMachine sm, boolean trim) { if (trim) { value = value.trim(); } - if (!value.isBlank()) { + if (!(value.isBlank() && sm.currentCsvNodeLength == 0)) { addRowValue(sm, trim); } if (!sm.isCurrentCsvNodeEmpty) { @@ -652,6 +658,7 @@ private static void updateLineAndColumnIndexesWithoutRowIndexes(StateMachine sm) sm.currentCsvNode = null; sm.isCurrentCsvNodeEmpty = true; sm.columnIndex = 0; + sm.clear(); } private static boolean ignoreRow(long[] skipLines, int lineNumber) { @@ -693,6 +700,7 @@ private static void finalizeTheRow(StateMachine sm) { sm.rootCsvNode.add(sm.arraySize, sm.currentCsvNode); } sm.arraySize++; + sm.currentCsvNodeLength = 0; } private static void addRowValue(StateMachine sm) { @@ -715,7 +723,7 @@ private static void addRowValue(StateMachine sm, boolean trim) { } if (type != null) { - CsvCreator.convertAndUpdateCurrentJsonNode(sm, + CsvCreator.convertAndUpdateCurrentCsvNode(sm, value, type, sm.config, exptype, currentField); } sm.columnIndex++; @@ -731,7 +739,7 @@ private static void addHeaderAsRowValue(StateMachine sm, String value) { } if (type != null) { - CsvCreator.convertAndUpdateCurrentJsonNode(sm, + CsvCreator.convertAndUpdateCurrentCsvNode(sm, value, type, sm.config, exptype, currentField); } sm.columnIndex++; @@ -1023,7 +1031,6 @@ private void reset(StateMachine sm) { private char extractUnicodeChar(StateMachine sm) { return StringEscapeUtils.unescapeJava("\\u" + sm.hexBuilder.toString()).charAt(0); } - } /** diff --git a/native/src/main/java/io/ballerina/lib/data/csvdata/utils/DiagnosticErrorCode.java b/native/src/main/java/io/ballerina/lib/data/csvdata/utils/DiagnosticErrorCode.java index 32e9941..82df3a5 100644 --- a/native/src/main/java/io/ballerina/lib/data/csvdata/utils/DiagnosticErrorCode.java +++ b/native/src/main/java/io/ballerina/lib/data/csvdata/utils/DiagnosticErrorCode.java @@ -53,7 +53,9 @@ public enum DiagnosticErrorCode { SOURCE_CANNOT_CONVERT_INTO_EXP_TYPE("BDE_0026", "cannot.convert.into.exptype"), NO_CUSTOM_HEADER_PROVIDED("BDE_0027", "no.custom.header.provided"), HEADERS_WITH_VARYING_LENGTH_NOT_SUPPORTED("BDE_0027", - "headers.with.varying.length.not.supported"); + "headers.with.varying.length.not.supported"), + HEADER_VALUE_CANNOT_BE_EMPTY("BDE_0028", "header.value.cannot.be.empty"), + DUPLICATE_HEADER("BDE_0029", "duplicate.header"); String diagnosticId; String messageKey; diff --git a/native/src/main/resources/csv_error.properties b/native/src/main/resources/csv_error.properties index d653d87..ff9a22e 100644 --- a/native/src/main/resources/csv_error.properties +++ b/native/src/main/resources/csv_error.properties @@ -103,4 +103,10 @@ error.no.custom.header.provided=\ Custom headers should be provided error.headers.with.varying.length.not.supported=\ - CSV data rows with varying headers are not yet supported \ No newline at end of file + CSV data rows with varying headers are not yet supported + +error.header.value.cannot.be.empty=\ + Header cannot be empty + +error.duplicate.header=\ + Duplicate header found: ''{0}''