Skip to content

Commit

Permalink
Merge branch 'main' into fix-ingestion-aut
Browse files Browse the repository at this point in the history
  • Loading branch information
chirag-madlani authored Oct 1, 2024
2 parents 458aeb8 + 66cd00c commit aa5f199
Show file tree
Hide file tree
Showing 93 changed files with 4,028 additions and 702 deletions.
Empty file.
5 changes: 5 additions & 0 deletions bootstrap/sql/migrations/native/1.5.6/mysql/schemaChanges.sql
Original file line number Diff line number Diff line change
@@ -0,0 +1,5 @@
-- Delete Search Indexing Application
DELETE er FROM entity_relationship er JOIN installed_apps ia ON er.fromId = ia.id OR er.toId = ia.id WHERE ia.name = 'SearchIndexingApplication';
DELETE er FROM entity_relationship er JOIN apps_marketplace ia ON er.fromId = ia.id OR er.toId = ia.id WHERE ia.name = 'SearchIndexingApplication';
DELETE FROM installed_apps where name = 'SearchIndexingApplication';
DELETE FROM apps_marketplace where name = 'SearchIndexingApplication';
Empty file.
Original file line number Diff line number Diff line change
@@ -0,0 +1,5 @@
-- Delete Search Indexing Application
DELETE FROM entity_relationship er USING installed_apps ia WHERE (er.fromId = ia.id OR er.toId = ia.id) AND ia.name = 'SearchIndexingApplication';
DELETE FROM entity_relationship er USING apps_marketplace ia WHERE (er.fromId = ia.id OR er.toId = ia.id) AND ia.name = 'SearchIndexingApplication';
DELETE FROM installed_apps where name = 'SearchIndexingApplication';
DELETE FROM apps_marketplace where name = 'SearchIndexingApplication';
149 changes: 149 additions & 0 deletions openmetadata-service/src/main/java/org/openmetadata/csv/CsvUtil.java
Original file line number Diff line number Diff line change
Expand Up @@ -17,14 +17,20 @@
import static org.openmetadata.common.utils.CommonUtil.listOrEmpty;
import static org.openmetadata.common.utils.CommonUtil.nullOrEmpty;

import com.fasterxml.jackson.databind.ObjectMapper;
import java.io.IOException;
import java.io.StringReader;
import java.io.StringWriter;
import java.util.ArrayList;
import java.util.Arrays;
import java.util.Collections;
import java.util.List;
import java.util.Map;
import java.util.regex.Pattern;
import java.util.stream.Collectors;
import org.apache.commons.csv.CSVFormat;
import org.apache.commons.csv.CSVFormat.Builder;
import org.apache.commons.csv.CSVParser;
import org.apache.commons.csv.CSVPrinter;
import org.apache.commons.csv.CSVRecord;
import org.openmetadata.schema.type.EntityReference;
Expand All @@ -39,6 +45,8 @@ public final class CsvUtil {
public static final String ENTITY_TYPE_SEPARATOR = ":";
public static final String LINE_SEPARATOR = "\r\n";

public static final String INTERNAL_ARRAY_SEPARATOR = "|";

private CsvUtil() {
// Utility class hides the constructor
}
Expand Down Expand Up @@ -94,6 +102,62 @@ public static List<String> fieldToEntities(String field) {
return field == null ? null : listOf(field.split(ENTITY_TYPE_SEPARATOR));
}

public static List<String> fieldToInternalArray(String field) {
// Split a fieldValue that contains multiple elements of an array separated by
// INTERNAL_ARRAY_SEPARATOR
if (field == null || field.isBlank()) {
return Collections.emptyList();
}
return listOf(field.split(Pattern.quote(INTERNAL_ARRAY_SEPARATOR)));
}

/**
* Parses a field containing key-value pairs separated by semicolons, correctly handling quotes.
* Each key-value pair may also be enclosed in quotes, especially if it contains delimiter like (SEPARATOR , FIELD_SEPARATOR).
* Input Example:
* "key1:value1;key2:value2;\"key3:value;with;semicolon\""
* Output: [key1:value1, key2:value2, key3:value;with;semicolon]
*
*/
public static List<String> fieldToExtensionStrings(String field) throws IOException {
if (field == null || field.isBlank()) {
return List.of();
}

// Replace semicolons within quoted strings with a placeholder
String preprocessedField =
Pattern.compile("\"([^\"]*)\"") // Matches content inside double quotes
.matcher(field)
.replaceAll(mr -> "\"" + mr.group(1).replace(";", "__SEMICOLON__") + "\"");

preprocessedField = preprocessedField.replace("\n", "\\n").replace("\"", "\\\"");

CSVFormat format =
CSVFormat.DEFAULT
.withDelimiter(';')
.withQuote('"')
.withRecordSeparator(null)
.withIgnoreSurroundingSpaces(true)
.withIgnoreEmptyLines(true)
.withEscape('\\'); // Use backslash for escaping special characters

try (CSVParser parser = CSVParser.parse(new StringReader(preprocessedField), format)) {
return parser.getRecords().stream()
.flatMap(CSVRecord::stream)
.map(
value ->
value
.replace("__SEMICOLON__", ";")
.replace("\\n", "\n")) // Restore original semicolons and newlines
.map(
value ->
value.startsWith("\"") && value.endsWith("\"") // Remove outer quotes if present
? value.substring(1, value.length() - 1)
: value)
.toList();
}
}

public static String quote(String field) {
return String.format("\"%s\"", field);
}
Expand Down Expand Up @@ -205,4 +269,89 @@ private static String quoteCsvField(String str) {
}
return str;
}

public static List<String> addExtension(List<String> csvRecord, Object extension) {
if (extension == null) {
csvRecord.add(null);
return csvRecord;
}

ObjectMapper objectMapper = new ObjectMapper();
Map<String, Object> extensionMap = objectMapper.convertValue(extension, Map.class);

String extensionString =
extensionMap.entrySet().stream()
.map(
entry -> {
String key = entry.getKey();
Object value = entry.getValue();
return CsvUtil.quoteCsvField(key + ENTITY_TYPE_SEPARATOR + formatValue(value));
})
.collect(Collectors.joining(FIELD_SEPARATOR));

csvRecord.add(extensionString);
return csvRecord;
}

private static String formatValue(Object value) {
if (value instanceof Map) {
return formatMapValue((Map<String, Object>) value);
}

if (value instanceof List) {
return formatListValue((List<?>) value);
}

return value != null ? value.toString() : "";
}

private static String formatMapValue(Map<String, Object> valueMap) {
if (isEntityReference(valueMap)) {
return formatEntityReference(valueMap);
} else if (isTimeInterval(valueMap)) {
return formatTimeInterval(valueMap);
}

return valueMap.toString();
}

private static String formatListValue(List<?> list) {
if (list.isEmpty()) {
return "";
}

if (list.get(0) instanceof Map && isEnumWithDescriptions((Map<String, Object>) list.get(0))) {
return list.stream()
.map(item -> ((Map<String, Object>) item).get("key").toString())
.collect(Collectors.joining(INTERNAL_ARRAY_SEPARATOR));
} else if (list.get(0) instanceof Map) {
return list.stream()
.map(item -> formatMapValue((Map<String, Object>) item))
.collect(Collectors.joining(INTERNAL_ARRAY_SEPARATOR));
} else {
return list.stream()
.map(Object::toString)
.collect(Collectors.joining(INTERNAL_ARRAY_SEPARATOR));
}
}

private static boolean isEntityReference(Map<String, Object> valueMap) {
return valueMap.containsKey("type") && valueMap.containsKey("fullyQualifiedName");
}

private static boolean isTimeInterval(Map<String, Object> valueMap) {
return valueMap.containsKey("start") && valueMap.containsKey("end");
}

private static boolean isEnumWithDescriptions(Map<String, Object> valueMap) {
return valueMap.containsKey("key") && valueMap.containsKey("description");
}

private static String formatEntityReference(Map<String, Object> valueMap) {
return valueMap.get("type") + ENTITY_TYPE_SEPARATOR + valueMap.get("fullyQualifiedName");
}

private static String formatTimeInterval(Map<String, Object> valueMap) {
return valueMap.get("start") + ENTITY_TYPE_SEPARATOR + valueMap.get("end");
}
}
Loading

0 comments on commit aa5f199

Please sign in to comment.