From a70082385ca49e5f60a3502ad7a6bc7c014f1116 Mon Sep 17 00:00:00 2001 From: Gisa Meier Date: Wed, 3 Jun 2020 15:20:58 +0200 Subject: [PATCH] Release RapidMiner Belt Adapter 0.7 --- pom.xml | 6 +- .../belt/table/AbstractTableAccessor.java | 204 + .../rapidminer/belt/table/BeltConverter.java | 1255 +----- .../table/ConvertOnWriteExampleTable.java | 684 ++++ .../belt/table/DatetimeTableWrapper.java | 10 +- .../belt/table/DoubleTableWrapper.java | 27 +- .../belt/table/FromTableConverter.java | 498 +++ .../com/rapidminer/belt/table/LegacyRole.java | 2 +- .../com/rapidminer/belt/table/LegacyType.java | 4 +- .../belt/table/MixedTableAccessor.java | 186 + .../belt/table/NominalMappingAdapter.java | 2 +- .../belt/table/NumericTableAccessor.java | 107 + .../table/RowwiseStatisticsExampleSet.java | 2 +- .../table/ShiftedNominalMappingAdapter.java | 2 +- .../belt/table/TableViewCreator.java | 209 +- .../belt/table/ToTableConverter.java | 1176 ++++++ .../belt/table/BeltConverterTest.java | 3506 +++++++++-------- .../table/ConvertOnWriteExampleTableTest.java | 1411 +++++++ .../belt/table/NominalMappingAdapterTest.java | 2 +- .../ShiftedNominalMappingAdapterTest.java | 2 +- .../belt/table/TableViewCreatorTest.java | 1428 +++++-- .../belt/table/ViewToTableConverterTest.java | 592 +++ 22 files changed, 8116 insertions(+), 3199 deletions(-) create mode 100644 src/main/java/com/rapidminer/belt/table/AbstractTableAccessor.java create mode 100644 src/main/java/com/rapidminer/belt/table/ConvertOnWriteExampleTable.java create mode 100644 src/main/java/com/rapidminer/belt/table/FromTableConverter.java create mode 100644 src/main/java/com/rapidminer/belt/table/MixedTableAccessor.java create mode 100644 src/main/java/com/rapidminer/belt/table/NumericTableAccessor.java create mode 100644 src/main/java/com/rapidminer/belt/table/ToTableConverter.java create mode 100644 src/test/java/com/rapidminer/belt/table/ConvertOnWriteExampleTableTest.java create mode 100644 src/test/java/com/rapidminer/belt/table/ViewToTableConverterTest.java diff --git a/pom.xml b/pom.xml index e507d83..152dd4c 100644 --- a/pom.xml +++ b/pom.xml @@ -4,7 +4,7 @@ 4.0.0 com.rapidminer belt-adapter - 0.6 + 0.7 jar belt-adapter @@ -46,12 +46,12 @@ com.rapidminer belt - 1.0.0-BETA4 + 1.0.0-BETA5 com.rapidminer.studio rapidminer-studio-core - 9.5.0-BETA4 + 9.7.0-BETA3 diff --git a/src/main/java/com/rapidminer/belt/table/AbstractTableAccessor.java b/src/main/java/com/rapidminer/belt/table/AbstractTableAccessor.java new file mode 100644 index 0000000..32b6087 --- /dev/null +++ b/src/main/java/com/rapidminer/belt/table/AbstractTableAccessor.java @@ -0,0 +1,204 @@ +/** + * Copyright (C) 2001-2020 by RapidMiner and the contributors + * + * Complete list of developers available at our web site: + * + * http://rapidminer.com + * + * This program is free software: you can redistribute it and/or modify it under the terms of the + * GNU Affero General Public License as published by the Free Software Foundation, either version 3 + * of the License, or (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY; without + * even the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * Affero General Public License for more details. + * + * You should have received a copy of the GNU Affero General Public License along with this program. + * If not, see http://www.gnu.org/licenses/. + */ +package com.rapidminer.belt.table; + +import java.util.Arrays; +import java.util.Iterator; +import java.util.List; + +import com.rapidminer.belt.column.CategoricalColumn; +import com.rapidminer.belt.column.Column; +import com.rapidminer.belt.column.ColumnType; +import com.rapidminer.belt.reader.NumericReader; +import com.rapidminer.example.Attribute; +import com.rapidminer.example.Attributes; +import com.rapidminer.operator.OperatorException; +import com.rapidminer.operator.error.AttributeNotFoundError; + + +/** + * Common code for {@link MixedTableAccessor} and {@link NumericTableAccessor} that allows to access the belt {@link + * Table} together with a list of attributes as needed by the {@link ConvertOnWriteExampleTable}. Consists mainly of the + * attribute handling copied from {@link com.rapidminer.example.table.AbstractExampleTable}. Additionally, there are + * methods to create a reader object and access a data point given a reader object. + * + * @author Gisa Meier + * @since 0.7 + */ +abstract class AbstractTableAccessor { + + private static final String EMPTY_STRING = ""; + + protected final Table table; + protected final List attributes; + + + AbstractTableAccessor(Table table, List attributes) { + this.attributes = attributes; + this.table = table; + } + + /** + * Get readers for the case that the whole table is going to be read. + * + * @return readers to use in an iterator over all rows + */ + abstract Object getReaders(); + + /** + * Get readers for that one row is read without an iterator over all rows. + * + * @return readers to use for reading a single row + */ + abstract Object getUnbufferedReaders(); + + /** + * Reads the value at (rowIndex, columnIndex) using the reader object. The reader object is a parameter here so + * that + * it can be cached. + * + * @param rowIndex + * the index of the row to read + * @param columnIndex + * the index of the column to read + * @param readerObject + * the reader object to use for reading + * @return the value at the specified position + */ + abstract double get(int rowIndex, int columnIndex, Object readerObject); + + + // The following 3 methods are copied from {@link AbstractExampleTable} + + /** + * @return the attributes as a new array + */ + Attribute[] getAttributes() { + Attribute[] attribute = new Attribute[attributes.size()]; + attributes.toArray(attribute); + return attribute; + } + + /** + * Returns the attribute of the column number {@code i}. + * + * @param i + * the column index + * @return the attribute with the given index + */ + Attribute getAttribute(int i) { + return attributes.get(i); + } + + /** + * Returns the attribute with the given name. + */ + Attribute findAttribute(String name) throws OperatorException { + if (name == null) { + return null; + } + for (Attribute att : attributes) { + if (att != null && att.getName().equals(name)) { + return att; + } + } + throw new AttributeNotFoundError(null, null, name); + } + + /** + * Gets the numeric value given a row and column index and a numeric reader. Shared code used in {@link + * NumericTableAccessor} and {@link MixedTableAccessor}. + * + * @param rowIndex + * the row index + * @param columnIndex + * the column index + * @param reader + * the reader to use + * @return the value at the specified position + */ + protected double getNumericValue(int rowIndex, int columnIndex, NumericReader reader) { + // always return {@code 0} for advanced columns + if (reader == null) { + return 0; + } + // set the position only if not already at the right position + if (reader.position() != rowIndex - 1) { + reader.setPosition(rowIndex - 1); + } + // need to subtract {@code 1} in case of nominal attributes because of the shifted mapping in belt + Attribute attribute = getAttribute(columnIndex); + if (attribute.isNominal()) { + return reader.read() - 1; + } else { + return reader.read(); + } + } + + /** + * @return the underlying {@link Table} + */ + Table getTable() { + return table; + } + + /** + * Creates a copy of the {@link AbstractTableAccessor} where the unused columns have been replaced by dummy columns + * with minimal memory consumption. + * + * @param attributes + * the used attributes + * @return an accessor with cleaned up columns + */ + abstract AbstractTableAccessor columnCleanupClone(Attributes attributes); + + /** + * Creates a copy of the underlying {@link Table} where the unused columns have been replaced by dummy columns with + * minimal memory consumption. + * + * @param attributes + * the used attributes + * @return a table with cleaned up columns + */ + protected Table columnCleanup(Attributes attributes) { + String[] labels = table.labelArray(); + Column[] oldColumns = table.getColumns(); + Column[] columns = Arrays.copyOf(oldColumns, oldColumns.length); + boolean[] usedIndices = new boolean[table.width()]; + for (Iterator allIterator = attributes.allAttributes(); allIterator.hasNext(); ) { + Attribute attribute = allIterator.next(); + int tableIndex = attribute.getTableIndex(); + if (tableIndex < usedIndices.length) { + usedIndices[tableIndex] = true; + } + } + //column taking minimal memory + CategoricalColumn emptySparseColumn = + ColumnAccessor.get().newSingleValueCategoricalColumn(ColumnType.NOMINAL, EMPTY_STRING, + table.height()); + //replace unused columns by those which take minimal memory + for (int i = 0; i < columns.length; i++) { + if (!usedIndices[i]) { + columns[i] = emptySparseColumn; + } + } + return new Table(columns, labels, table.getMetaData()); + } + +} \ No newline at end of file diff --git a/src/main/java/com/rapidminer/belt/table/BeltConverter.java b/src/main/java/com/rapidminer/belt/table/BeltConverter.java index 41a95b9..e661f28 100644 --- a/src/main/java/com/rapidminer/belt/table/BeltConverter.java +++ b/src/main/java/com/rapidminer/belt/table/BeltConverter.java @@ -1,5 +1,5 @@ /** - * Copyright (C) 2001-2019 by RapidMiner and the contributors + * Copyright (C) 2001-2020 by RapidMiner and the contributors * * Complete list of developers available at our web site: * @@ -18,66 +18,29 @@ */ package com.rapidminer.belt.table; -import java.time.Instant; -import java.util.ArrayList; +import java.util.Collections; +import java.util.EnumSet; import java.util.HashMap; -import java.util.HashSet; -import java.util.Iterator; -import java.util.List; import java.util.Map; import java.util.Set; -import java.util.concurrent.Callable; -import java.util.concurrent.ExecutionException; +import java.util.regex.Matcher; +import java.util.regex.Pattern; -import com.rapidminer.adaption.belt.ContextAdapter; import com.rapidminer.adaption.belt.IOTable; -import com.rapidminer.belt.buffer.Buffers; -import com.rapidminer.belt.buffer.CategoricalBuffer; -import com.rapidminer.belt.buffer.DateTimeBuffer; -import com.rapidminer.belt.buffer.NumericBuffer; -import com.rapidminer.belt.column.BooleanDictionary; -import com.rapidminer.belt.column.CategoricalColumn; import com.rapidminer.belt.column.Column; import com.rapidminer.belt.column.ColumnType; -import com.rapidminer.belt.column.ColumnTypes; -import com.rapidminer.belt.column.Columns; import com.rapidminer.belt.column.Dictionary; -import com.rapidminer.belt.reader.CategoricalReader; -import com.rapidminer.belt.reader.NumericReader; -import com.rapidminer.belt.reader.ObjectReader; -import com.rapidminer.belt.reader.Readers; -import com.rapidminer.belt.util.ColumnMetaData; import com.rapidminer.belt.util.ColumnReference; import com.rapidminer.belt.util.ColumnRole; -import com.rapidminer.belt.util.IntegerFormats; -import com.rapidminer.belt.util.IntegerFormats.Format; -import com.rapidminer.belt.util.IntegerFormats.PackedIntegers; import com.rapidminer.core.concurrency.ConcurrencyContext; -import com.rapidminer.example.Attribute; -import com.rapidminer.example.AttributeRole; import com.rapidminer.example.Attributes; -import com.rapidminer.example.Example; import com.rapidminer.example.ExampleSet; -import com.rapidminer.example.SimpleAttributes; -import com.rapidminer.example.set.AbstractExampleSet; import com.rapidminer.example.set.HeaderExampleSet; -import com.rapidminer.example.set.SimpleExampleSet; -import com.rapidminer.example.table.AttributeFactory; -import com.rapidminer.example.table.BinominalAttribute; -import com.rapidminer.example.table.BinominalMapping; -import com.rapidminer.example.table.DateAttribute; -import com.rapidminer.example.table.ExampleTable; -import com.rapidminer.example.table.NominalMapping; -import com.rapidminer.example.table.NumericalAttribute; -import com.rapidminer.example.table.PolynominalAttribute; -import com.rapidminer.example.table.internal.ColumnarExampleTable; -import com.rapidminer.example.utils.ExampleSets; -import com.rapidminer.tools.LogService; import com.rapidminer.tools.Ontology; /** - * Converts between {@link ExampleSet}s and belt {@link Table}s. For now supports only numeric attributes. + * Converts between {@link ExampleSet}s and belt {@link Table}s. * * Please note that this class is not part of any public API and might be modified or removed in future releases without * prior warning. @@ -86,17 +49,23 @@ */ public final class BeltConverter { + /** + * The standard belt types. + */ + public static final Set STANDARD_TYPES = EnumSet.of(Column.TypeId.REAL, Column.TypeId.INTEGER_53_BIT, + Column.TypeId.NOMINAL, Column.TypeId.DATE_TIME, Column.TypeId.TIME); + /** * Marker exception for conversion failure from belt {@link Table} to {@link ExampleSet}. Happens only if the belt - * table contains custom columns. + * table contains advanced columns. */ public static class ConversionException extends UnsupportedOperationException { private final String columnName; private final transient ColumnType type; - private ConversionException(String columnName, ColumnType type) { - super("Failed to convert Table because of custom column " + columnName + " of type " + type.customTypeID()); + ConversionException(String columnName, ColumnType type) { + super("Failed to convert Table because of advanced column '" + columnName + "' of " + type); this.columnName = columnName; this.type = type; } @@ -117,112 +86,43 @@ public ColumnType getType() { } /** - * Message for when non-supported columns types are encountered - */ - private static final String MESSAGE_NON_SUPPORTED = "Type not supported for now"; - - /** - * Set of primitive attribute types that are known to be thread safe for read accesses. + * key for storing belt column meta data in the user data of an {@link ExampleSet} */ - private static final Set> SAFE_ATTRIBUTES = new HashSet<>(5); + static final String IOOBJECT_USER_DATA_COLUMN_META_DATA_KEY = BeltConverter.class.getName() + ".column_meta_data"; /** - * Number of milli-seconds in a second + * Prefix of the role names of confidence attributes */ - private static final long MILLISECONDS_PER_SECOND = 1_000; + static final String CONFIDENCE_PREFIX = Attributes.CONFIDENCE_NAME + "_"; /** - * Number of nano-seconds in a milli-second + * Pattern used to check if a studio role contains an index that needs to be removed before converting to belt. */ - private static final long NANOS_PER_MILLI_SECOND = 1_000_000; + private static final Pattern INDEX_PATTERN = Pattern.compile("(.+)_[0-9]+"); /** * String into which {@link ColumnRole#METADATA} is converted */ private static final String META_DATA_NAME = "meta_data"; - /** - * Prefix of the role names of confidence attributes - */ - private static final String CONFIDENCE_PREFIX = Attributes.CONFIDENCE_NAME + "_"; - - /** - * The length of the {@link #CONFIDENCE_PREFIX} - */ - private static final int CONFIDENCE_PREFIX_LENGHT = CONFIDENCE_PREFIX.length(); - - static { - SAFE_ATTRIBUTES.add(DateAttribute.class); - SAFE_ATTRIBUTES.add(BinominalAttribute.class); - SAFE_ATTRIBUTES.add(PolynominalAttribute.class); - SAFE_ATTRIBUTES.add(NumericalAttribute.class); - } // Suppress default constructor for noninstantiability - private BeltConverter() {throw new AssertionError();} + private BeltConverter() { + throw new AssertionError(); + } /** - * Creates a belt {@link IOTable} from the given {@link ExampleSet}. This is done in parallel if the - * exampleSet is threadsafe. + * Creates a belt {@link IOTable} from the given {@link ExampleSet}. This is done in parallel if the exampleSet is + * threadsafe. * * @param exampleSet - * the exampleSet to convert + * the exampleSet to convert * @param context - * the concurrency context to use for the conversion + * the concurrency context to use for the conversion * @return a belt table */ public static IOTable convert(ExampleSet exampleSet, ConcurrencyContext context) { - if (exampleSet == null) { - throw new IllegalArgumentException("Example set must not be null"); - } - if (context == null) { - throw new IllegalArgumentException("Context must not be null"); - } - - // check example set implementation - boolean threadSafeView = exampleSet instanceof AbstractExampleSet - && ((AbstractExampleSet) exampleSet).isThreadSafeView(); - boolean simpleView = exampleSet.getClass() == SimpleExampleSet.class; - boolean threadSafe = threadSafeView; - - // check example table implementation - if (threadSafe) { - ExampleTable table = exampleSet.getExampleTable(); - threadSafe = table.getClass() == ColumnarExampleTable.class; - } - - // check attribute implementation - if (threadSafe) { - Attributes attributes = exampleSet.getAttributes(); - threadSafe = attributes.getClass() == SimpleAttributes.class; - } - - // check individual attributes and attribute transformations - if (threadSafe) { - Iterator attributes = exampleSet.getAttributes().allAttributes(); - while (threadSafe && attributes.hasNext()) { - Attribute attribute = attributes.next(); - if (!SAFE_ATTRIBUTES.contains(attribute.getClass()) || attribute.getLastTransformation() != null) { - threadSafe = false; - } - } - } - Table table; - if (threadSafe) { - // we can safely read from the input example using multiple threads - if (simpleView) { - // we can ignore the view and read directly from the underlying example table - table = exampleTableConvert(exampleSet, context); - } else { - table = parallelConvert(exampleSet, context); - } - } else { - table = sequentialConvert(exampleSet, context); - } - IOTable tableObject = new IOTable(table); - tableObject.getAnnotations().addAll(exampleSet.getAnnotations()); - tableObject.setSource(exampleSet.getSource()); - return tableObject; + return ToTableConverter.convert(exampleSet, context); } /** @@ -234,28 +134,10 @@ public static IOTable convert(ExampleSet exampleSet, ConcurrencyContext context) * the table to extract from * @return a {@link HeaderExampleSet} where the nominal mappings of the attributes are immutable * @throws ConversionException - * if the table cannot be converted because it contains custom columns + * if the table cannot be converted because it contains non-standard columns */ public static HeaderExampleSet convertHeader(Table table) { - Attributes attributes = new SimpleAttributes(); - List labels = table.labels(); - int i = 0; - for (String label : labels) { - Column column = table.column(i); - Attribute attribute = AttributeFactory.createAttribute(label, getValueType(table, label, i)); - attribute.setTableIndex(i); - attributes.add(new AttributeRole(attribute)); - if (attribute.isNominal()) { - List mapping = ColumnAccessor.get().getDictionaryList(column.getDictionary(String.class)); - attribute.setMapping(new NominalMappingAdapter(mapping)); - } - String role = convertRole(table, label); - if (role != null) { - attributes.setSpecialAttribute(attribute, role); - } - i++; - } - return new HeaderExampleSet(attributes); + return FromTableConverter.convertHeader(table); } /** @@ -269,45 +151,10 @@ public static HeaderExampleSet convertHeader(Table table) { * @throws IllegalArgumentException * if table or context is null * @throws ConversionException - * if the table cannot be converted because it contains custom columns + * if the table cannot be converted because it contains non-standard columns */ public static ExampleSet convert(IOTable tableObject, ConcurrencyContext context) { - if (tableObject == null) { - throw new IllegalArgumentException("Table object must not be null"); - } - if (context == null) { - throw new IllegalArgumentException("Context must not be null"); - } - - Table table = tableObject.getTable(); - List attributes = new ArrayList<>(); - List labels = table.labels(); - int i = 0; - for (String label : labels) { - int valueType = getValueType(table, label, i); - attributes.add(AttributeFactory.createAttribute(label, valueType)); - i++; - } - - ExampleSet set = ExampleSets.from(attributes).withBlankSize(table.height()).build(); - ExampleTable exampleTable = set.getExampleTable(); - if (exampleTable instanceof ColumnarExampleTable) { - ColumnarExampleTable columnTable = (ColumnarExampleTable) exampleTable; - convertParallel(table, attributes, columnTable, context); - } else { - convertSequentially(table, set); - } - - Attributes allAttributes = set.getAttributes(); - for (String label : labels) { - String studioRole = convertRole(table, label); - if (studioRole != null && checkUnique(allAttributes, studioRole)) { - allAttributes.setSpecialAttribute(allAttributes.get(label), studioRole); - } - } - set.getAnnotations().addAll(tableObject.getAnnotations()); - set.setSource(tableObject.getSource()); - return set; + return FromTableConverter.convert(tableObject, context); } /** @@ -318,91 +165,12 @@ public static ExampleSet convert(IOTable tableObject, ConcurrencyContext context * the table object to convert * @return the example set * @throws ConversionException - * if the table cannot be converted because it contains custom columns + * if the table cannot be converted because it contains non-standard columns */ public static ExampleSet convertSequentially(IOTable tableObject) { - if (tableObject == null) { - throw new IllegalArgumentException("Table object must not be null"); - } - - Table table = tableObject.getTable(); - List attributes = new ArrayList<>(); - List labels = table.labels(); - int i = 0; - for (String label : labels) { - int valueType = getValueType(table, label, i); - attributes.add(AttributeFactory.createAttribute(label, valueType)); - i++; - } - - ExampleSet set = ExampleSets.from(attributes).withBlankSize(table.height()).build(); - convertSequentially(table, set); - Attributes allAttributes = set.getAttributes(); - for (String label : labels) { - String studioRole = convertRole(table, label); - if (studioRole != null && checkUnique(allAttributes, studioRole)) { - allAttributes.setSpecialAttribute(allAttributes.get(label), studioRole); - } - } - set.getAnnotations().addAll(tableObject.getAnnotations()); - set.setSource(tableObject.getSource()); - return set; - } - - /** - * While studio does not explicitly forbid {@code null} values in dictionaries, some places assume that there are - * none, so we adjust all belt dictionaries with this problem. - * - * @param column - * a nominal column - */ - private static Column removeGapsFromDictionary(Column column) { - return Columns.compactDictionary(column); - } - - /** - * Roles for ExampleSets must be unique. If the converted roles are not, we need to make them. For now we ignore - * non-unique roles. - */ - private static boolean checkUnique(Attributes allAttributes, String studioRole) { - boolean unusedRole = allAttributes.findRoleBySpecialName(studioRole) == null; - if (!unusedRole) { - LogService.getRoot().warning(() -> "Second occurence of role '" + studioRole + "' is dropped since roles " + - "in ExampleSets must be unique"); - } - return unusedRole; + return FromTableConverter.convertSequentially(tableObject); } - /** - * Converts attribute roles into belt column roles. - */ - private static ColumnRole convert(String studioRole) { - switch (studioRole) { - case Attributes.LABEL_NAME: - return ColumnRole.LABEL; - case Attributes.ID_NAME: - return ColumnRole.ID; - case Attributes.PREDICTION_NAME: - return ColumnRole.PREDICTION; - case Attributes.CONFIDENCE_NAME: - return ColumnRole.SCORE; - case Attributes.CLUSTER_NAME: - return ColumnRole.CLUSTER; - case Attributes.OUTLIER_NAME: - return ColumnRole.OUTLIER; - case Attributes.WEIGHT_NAME: - return ColumnRole.WEIGHT; - case Attributes.BATCH_NAME: - return ColumnRole.BATCH; - default: - if (studioRole.startsWith(Attributes.CONFIDENCE_NAME)) { - return ColumnRole.SCORE; - } - return ColumnRole.METADATA; - } - } - - /** * Converts the belt table role for the given label to an attribute role name. * @@ -458,7 +226,6 @@ public static String convertRole(Table table, String label) { } else { return Attributes.CONFIDENCE_NAME; } - } else if (role == ColumnRole.METADATA) { return META_DATA_NAME; } @@ -466,199 +233,25 @@ public static String convertRole(Table table, String label) { return convertedRole; } - /** - * Gets the value type from the meta data if present or from the table otherwise. - * - * @throws ConversionException - * if the column cannot be converted because it is a custom column - */ - static int getValueType(Table table, String label, int columnIndex) { - Column column = table.column(columnIndex); - int derivedOntology = convertToOntology(column, label); - LegacyType legacyType = table.getFirstMetaData(label, LegacyType.class); - if (legacyType != null) { - int legacyOntology = legacyType.ontology(); - if (isAssignable(legacyOntology, derivedOntology, column)) { - return legacyOntology; - } - } - return derivedOntology; - } - - /** - * Check if conversion to the legacy type is possible. - */ - private static boolean isAssignable(int legacyOntology, int derivedOntology, Column column) { - // legacy ontology is super type or the same - if (Ontology.ATTRIBUTE_VALUE_TYPE.isA(derivedOntology, legacyOntology)) { - return true; - } - // if binominal is requested for a polynominal derived type, check dictionary size and if only positive - if (legacyOntology == Ontology.BINOMINAL && derivedOntology == Ontology.POLYNOMINAL) { - Dictionary dictionary = column.getDictionary(String.class); - return dictionary.size() <= 2 && - //BinominalMapping can have no positive but not no negative - !(dictionary.isBoolean() && dictionary.hasPositive() && !dictionary.hasNegative()); - } - // derived ontology is a nominal subtype and legacy ontology, too - if (Ontology.ATTRIBUTE_VALUE_TYPE.isA(derivedOntology, Ontology.NOMINAL) && Ontology.ATTRIBUTE_VALUE_TYPE - .isA(legacyOntology, Ontology.NOMINAL)) { - return true; - } - // for legacy support we allow conversion from date-time to time - if (legacyOntology == Ontology.TIME && derivedOntology == Ontology.DATE_TIME) { - return true; - } - // date-time can be shown as date - return legacyOntology == Ontology.DATE && derivedOntology == Ontology.DATE_TIME; - } - - /** - * Copies the data from the table into the set sequentially. - */ - private static void convertSequentially(Table table, ExampleSet set) { - int i = 0; - for (Attribute attribute : set.getAttributes()) { - Column column = table.column(i++); - switch (attribute.getValueType()) { - case Ontology.STRING: - case Ontology.FILE_PATH: - case Ontology.NOMINAL: - case Ontology.POLYNOMINAL: - copyToNominal(set, attribute, column); - break; - case Ontology.BINOMINAL: - copyToBinominal(set, attribute, column); - break; - case Ontology.NUMERICAL: - case Ontology.REAL: - case Ontology.INTEGER: - NumericReader reader = Readers.numericReader(column, column.size()); - for (Example example : set) { - example.setValue(attribute, reader.read()); - } - break; - case Ontology.TIME: - case Ontology.DATE_TIME: - case Ontology.DATE: - copyToDateTime(set, attribute, column); - break; - default: - throw new UnsupportedOperationException(MESSAGE_NON_SUPPORTED); - } - } - } - - private static void copyToDateTime(ExampleSet set, Attribute attribute, Column column) { - ObjectReader reader = - Readers.objectReader(column, Instant.class); - for (Example example : set) { - Instant read = reader.read(); - if (read == null) { - example.setValue(attribute, Double.NaN); - } else { - example.setValue(attribute, read.toEpochMilli()); - } - } - } - - private static void copyToNominal(ExampleSet set, Attribute attribute, Column column) { - column = removeGapsFromDictionary(column); - - copyNewToOldMapping(attribute, column); - CategoricalReader reader = Readers.categoricalReader(column); - for (Example example : set) { - int read = reader.read(); - if (read == CategoricalReader.MISSING_CATEGORY) { - example.setValue(attribute, Double.NaN); - } else { - example.setValue(attribute, read - 1d); - } - } - } - - private static void copyToBinominal(ExampleSet set, Attribute attribute, Column column) { - column = removeGapsFromDictionary(column); - - Dictionary dictionary = column.getDictionary(String.class); - List mapping = ColumnAccessor.get().getDictionaryList(dictionary); - if (dictionary.isBoolean()) { - // check if last value is positive - if (dictionary.getPositiveIndex() == 2 || !dictionary.hasPositive()) { - copyNegativePositive(set, attribute, column, dictionary); - } else { - copyPositiveNegative(set, attribute, column, mapping); - } - }else{ - copyToNominal(set, attribute, column); - } - } - - /** - * Copy binominals from table to mapping in case the mapping contains first the positive, then the negative value. - */ - private static void copyPositiveNegative(ExampleSet set, Attribute attribute, Column column, List - mapping) { - //the second mapped value is negative, we have to swap indices - int positiveIndex = mapping.size() - 2; - int negativeIndex = mapping.size() - 1; - NominalMapping legacyMapping = attribute.getMapping(); - //the first mapped value is negative - legacyMapping.mapString(mapping.get(negativeIndex)); - legacyMapping.mapString(mapping.get(positiveIndex)); - CategoricalReader reader = Readers.categoricalReader(column); - for (Example example : set) { - int read = reader.read(); - if (read == negativeIndex) { - example.setValue(attribute, BinominalMapping.NEGATIVE_INDEX); - } else if (read == positiveIndex) { - example.setValue(attribute, BinominalMapping.POSITIVE_INDEX); - } else { - example.setValue(attribute, Double.NaN); - } - } - } - - /** - * Copy binominals from table to mapping in case the mapping contains first the negative, then the positive value. - */ - private static void copyNegativePositive(ExampleSet set, Attribute attribute, Column column, Dictionary - mapping) { - NominalMapping legacyMapping = attribute.getMapping(); - //the first mapped value is negative, the order is kept - for (Dictionary.Entry value : mapping) { - legacyMapping.mapString(value.getValue()); - } - CategoricalReader reader = Readers.categoricalReader(column); - for (Example example : set) { - int read = reader.read(); - if (read == CategoricalReader.MISSING_CATEGORY) { - example.setValue(attribute, Double.NaN); - } else { - example.setValue(attribute, read - 1d); - } - } - } - /** * Finds the right {@link Ontology} for a given {@link Column} * * @param column - * the column to convert + * the column to convert * @param columnName - * the name of the column, used for exceptions + * the name of the column, used for exceptions * @return the associated ontology * @throws ConversionException - * if the column cannot be converted because it is a custom column + * if the column cannot be converted because it is a non-standard column */ public static int convertToOntology(Column column, String columnName) { switch (column.type().id()) { - case INTEGER: + case INTEGER_53_BIT: return Ontology.INTEGER; case REAL: return Ontology.REAL; case NOMINAL: - Dictionary dictionary = column.getDictionary(Object.class); + Dictionary dictionary = column.getDictionary(); if (dictionary.isBoolean() && !(dictionary.hasPositive() && !dictionary.hasNegative())) { return Ontology.BINOMINAL; } @@ -674,715 +267,153 @@ public static int convertToOntology(Column column, String columnName) { } /** - * Copies the given table into the given columnTable. Copies each of the given attributes in parallel using the - * given context. - */ - private static void convertParallel(Table table, List attributes, - ColumnarExampleTable columnTable, ConcurrencyContext context) { - List> copier = new ArrayList<>(table.width()); - int i = 0; - for (Attribute attribute : attributes) { - Column column = table.column(i++); - switch (attribute.getValueType()) { - case Ontology.STRING: - case Ontology.FILE_PATH: - case Ontology.NOMINAL: - case Ontology.POLYNOMINAL: - copier.add(() -> - copyNominalColumnToRows(columnTable, attribute, column)); - break; - case Ontology.BINOMINAL: - copier.add(() -> - copyBinominalColumnToRows(columnTable, attribute, column)); - break; - case Ontology.NUMERICAL: - case Ontology.REAL: - case Ontology.INTEGER: - copier.add(() -> { - NumericReader reader = - Readers.numericReader(column); - for (int row = 0; row < columnTable.size(); row++) { - columnTable.getDataRow(row).set(attribute, reader.read()); - } - return null; - }); - break; - case Ontology.TIME: - case Ontology.DATE_TIME: - case Ontology.DATE: - copier.add(() -> copyDateTimeColumnToRows(columnTable, attribute, column)); - break; - default: - throw new UnsupportedOperationException(MESSAGE_NON_SUPPORTED); - } - } - try { - context.call(copier); - } catch (ExecutionException e) { - Throwable cause = e.getCause(); - if (cause instanceof RuntimeException) { - throw (RuntimeException) cause; - } else if (cause instanceof Error) { - throw (Error) cause; - } else { - throw new RuntimeException(cause.getMessage(), cause); - } - } - } - - private static Void copyDateTimeColumnToRows(ColumnarExampleTable columnTable, Attribute attribute, Column - column) { - ObjectReader reader = Readers.objectReader(column, Instant.class); - for (int row = 0; row < columnTable.size(); row++) { - Instant read = reader.read(); - if (read == null) { - columnTable.getDataRow(row).set(attribute, Double.NaN); - } else { - columnTable.getDataRow(row).set(attribute, read.toEpochMilli()); - } - } - return null; - } - - private static Void copyBinominalColumnToRows(ColumnarExampleTable columnTable, Attribute attribute, - Column column) { - column = removeGapsFromDictionary(column); - - Dictionary dictionary = column.getDictionary(String.class); - if(dictionary.isBoolean()) { - List mapping = ColumnAccessor.get().getDictionaryList(dictionary); - // check if last value is positive - if (dictionary.getPositiveIndex() == 2 || !dictionary.hasPositive()) { - copyNegativePositiveToRows(columnTable, attribute, column, dictionary); - } else { - copyPositiveNegativeToRows(columnTable, attribute, column, mapping); - - } - }else{ - copyNominalColumnToRows(columnTable, attribute, column); - } - return null; - } - - /** - * Copy binominals from table to mapping in case the mapping contains first the positive, then the negative value. - */ - private static void copyPositiveNegativeToRows(ColumnarExampleTable columnTable, Attribute attribute, Column - column, - List mapping) { - //the second mapped value is negative, we have to swap indices - int positiveIndex = mapping.size() - 2; - int negativeIndex = mapping.size() - 1; - - NominalMapping legacyMapping = attribute.getMapping(); - //the first mapped value is negative - legacyMapping.mapString(mapping.get(negativeIndex)); - legacyMapping.mapString(mapping.get(positiveIndex)); - CategoricalReader reader = Readers.categoricalReader(column); - for (int row = 0; row < columnTable.size(); row++) { - int read = reader.read(); - if (read == negativeIndex) { - columnTable.getDataRow(row).set(attribute, BinominalMapping.NEGATIVE_INDEX); - } else if (read == positiveIndex) { - columnTable.getDataRow(row).set(attribute, BinominalMapping.POSITIVE_INDEX); - } else { - columnTable.getDataRow(row).set(attribute, Double.NaN); - } - } - } - - /** - * Copy binominals from table to mapping in case the mapping contains first the negative, then the positive value. + * Checks if the {@link com.rapidminer.example.table.ExampleTable} of the given {@link ExampleSet} wraps a belt + * {@link Table}. In this case the performance may differ, in particular {@link ExampleSet#getExample(int)} might be + * slower than otherwise. + * + * @param exampleSet + * the {@link ExampleSet} to check + * @return {@code true} iff the example table of the example set is on top of a {@link Table} */ - private static void copyNegativePositiveToRows(ColumnarExampleTable columnTable, Attribute attribute, - Column column, Dictionary mapping) { - //the first mapped value is negative, the order is kept - NominalMapping legacyMapping = attribute.getMapping(); - for (Dictionary.Entry value : mapping) { - legacyMapping.mapString(value.getValue()); - } - CategoricalReader reader = Readers.categoricalReader(column); - for (int row = 0; row < columnTable.size(); row++) { - int read = reader.read(); - if (read == CategoricalReader.MISSING_CATEGORY) { - columnTable.getDataRow(row).set(attribute, Double.NaN); - } else { - columnTable.getDataRow(row).set(attribute, read - 1d); - } - } - } - - private static Void copyNominalColumnToRows(ColumnarExampleTable columnTable, Attribute attribute, Column column) { - column = removeGapsFromDictionary(column); - - copyNewToOldMapping(attribute, column); - CategoricalReader reader = Readers.categoricalReader(column); - for (int row = 0; row < columnTable.size(); row++) { - int read = reader.read(); - if (read == CategoricalReader.MISSING_CATEGORY) { - columnTable.getDataRow(row).set(attribute, Double.NaN); - } else { - columnTable.getDataRow(row).set(attribute, read - 1d); - } - } - return null; - } - - private static void copyNewToOldMapping(Attribute attribute, Column column) { - List mapping = ColumnAccessor.get().getDictionaryList(column.getDictionary(String.class)); - NominalMapping legacyMapping = attribute.getMapping(); - for (int j = 1; j < mapping.size(); j++) { - legacyMapping.mapString(mapping.get(j)); - } + public static boolean isTableWrapper(ExampleSet exampleSet) { + return ToTableConverter.getExampleTable(exampleSet) instanceof ConvertOnWriteExampleTable; } /** - * Conversion where the exampleSet cannot be accessed in parallel. + * Converts belt roles to studio roles and adds them to the given Attributes. Duplicate roles will be made + * unique by + * adding an index to them. */ - private static Table sequentialConvert(ExampleSet exampleSet, ConcurrencyContext context) { - int size = exampleSet.size(); - TableBuilder builder = Builders.newTableBuilder(size); - Attribute prediction = exampleSet.getAttributes().getPredictedLabel(); - for (Iterator allRoles = exampleSet.getAttributes().allAttributeRoles(); allRoles.hasNext(); ) { - AttributeRole role = allRoles.next(); - Attribute attribute = role.getAttribute(); - copyDataAndType(builder, exampleSet, size, attribute); - if (role.isSpecial()) { - String specialName = role.getSpecialName(); - ColumnRole beltRole = convert(specialName); - builder.addMetaData(attribute.getName(), beltRole); - if (beltRole == ColumnRole.METADATA) { - builder.addMetaData(attribute.getName(), new LegacyRole(specialName)); - } else if (beltRole == ColumnRole.SCORE) { - String predictionName = prediction == null ? null : prediction.getName(); - if (specialName.startsWith(CONFIDENCE_PREFIX)) { - builder.addMetaData(attribute.getName(), - new ColumnReference(predictionName, - specialName.substring(CONFIDENCE_PREFIX_LENGHT))); - } else { - builder.addMetaData(attribute.getName(), new ColumnReference(predictionName)); - } + static void convertRoles(Table table, Attributes allAttributes) { + // this map is used in case there are duplicate roles to get indices for the duplicate roles + Map nextRoleIndex = new HashMap<>(); + for (String label : table.labels()) { + String studioRole = convertRole(table, label); + if (studioRole != null) { + // add an index if necessary + String studioRoleWithIndex = studioRole; + while (!checkUnique(allAttributes, studioRoleWithIndex)) { + int index = nextRoleIndex.getOrDefault(studioRole, 2); + studioRoleWithIndex = studioRole + "_" + index; + nextRoleIndex.put(studioRole, index + 1); } + allAttributes.setSpecialAttribute(allAttributes.get(label), studioRoleWithIndex); } } - return builder.build(ContextAdapter.adapt(context)); - } - - /** - * Copies the data from the example set to the builder and adds a legacy type if the type is not determined by the - * data. - */ - private static void copyDataAndType(TableBuilder builder, ExampleSet exampleSet, int size, Attribute attribute) { - String name = attribute.getName(); - switch (attribute.getValueType()) { - case Ontology.NUMERICAL: - builder.add(name, getRealColumn(exampleSet, size, attribute)); - builder.addMetaData(name, LegacyType.NUMERICAL); - break; - case Ontology.REAL: - builder.add(name, getRealColumn(exampleSet, size, attribute)); - break; - case Ontology.INTEGER: - builder.add(name, getIntegerColumn(exampleSet, size, attribute)); - break; - case Ontology.BINOMINAL: - CategoricalColumn binominalColumn = getBinominalColumn(exampleSet, size, attribute); - builder.add(name, binominalColumn); - builder.addMetaData(name, LegacyType.BINOMINAL); - break; - case Ontology.NOMINAL: - builder.add(name, getNominalColumn(exampleSet, size, attribute)); - builder.addMetaData(name, LegacyType.NOMINAL); - break; - case Ontology.POLYNOMINAL: - builder.add(name, getNominalColumn(exampleSet, size, attribute)); - break; - case Ontology.STRING: - builder.add(name, getNominalColumn(exampleSet, size, attribute)); - builder.addMetaData(name, LegacyType.STRING); - break; - case Ontology.FILE_PATH: - builder.add(name, getNominalColumn(exampleSet, size, attribute)); - builder.addMetaData(name, LegacyType.FILE_PATH); - break; - case Ontology.DATE: - builder.add(name, getDateColumn(exampleSet, size, attribute)); - builder.addMetaData(name, LegacyType.DATE); - break; - case Ontology.DATE_TIME: - builder.add(name, getDateTimeColumn(exampleSet, size, attribute)); - break; - case Ontology.TIME: - builder.add(name, getDateTimeColumn(exampleSet, size, attribute)); - builder.addMetaData(name, LegacyType.TIME); - break; - default: - throw new UnsupportedOperationException(MESSAGE_NON_SUPPORTED); - } - } - - - private static Column getDateTimeColumn(ExampleSet exampleSet, int size, Attribute attribute) { - DateTimeBuffer buffer = Buffers.dateTimeBuffer(size, true, false); - int i = 0; - for (Example example : exampleSet) { - double value = example.getValue(attribute); - if (Double.isNaN(value)) { - buffer.set(i++, null); - } else { - long longValue = (long) value; - buffer.set(i++, Math.floorDiv(longValue, MILLISECONDS_PER_SECOND), - (int) (Math.floorMod(longValue, MILLISECONDS_PER_SECOND) * NANOS_PER_MILLI_SECOND)); - } - } - return buffer.toColumn(); - } - - private static Column getDateColumn(ExampleSet exampleSet, int size, Attribute attribute) { - DateTimeBuffer buffer = Buffers.dateTimeBuffer(size, false, false); - int i = 0; - for (Example example : exampleSet) { - double value = example.getValue(attribute); - if (Double.isNaN(value)) { - buffer.set(i++, null); - } else { - buffer.set(i++, ((long) value) / MILLISECONDS_PER_SECOND); - } - } - return buffer.toColumn(); - } - - private static Column getIntegerColumn(ExampleSet exampleSet, int size, Attribute attribute) { - NumericBuffer intBuffer = Buffers.integerBuffer(size, false); - int j = 0; - for (Example example : exampleSet) { - intBuffer.set(j++, example.getValue(attribute)); - } - return intBuffer.toColumn(); - } - - private static Column getRealColumn(ExampleSet exampleSet, int size, Attribute attribute) { - NumericBuffer buffer = Buffers.realBuffer(size, false); - int i = 0; - for (Example example : exampleSet) { - buffer.set(i++, example.getValue(attribute)); - } - return buffer.toColumn(); } /** - * Copies a binominal column from the example set by copying the mapping and the category data with a fallback in - * case the mapping is broken (contains null). Creates a boolean column if possible. - */ - private static CategoricalColumn getBinominalColumn(ExampleSet exampleSet, int size, Attribute attribute) { - NominalMapping legacyMapping = attribute.getMapping(); - if (legacyMapping.getPositiveString() != null && (legacyMapping.getNegativeString() == null - || legacyMapping.getPositiveString().equals(legacyMapping.getNegativeString()))) { - // Incompatible with Belt's 2Bit columns - return getBufferColumn(exampleSet, size, attribute); - } - List mapping = new ArrayList<>(3); - mapping.add(null); - String negativeString = legacyMapping.getNegativeString(); - if (negativeString != null) { - mapping.add(negativeString); - } - String positiveString = legacyMapping.getPositiveString(); - if (positiveString != null) { - mapping.add(positiveString); - } - byte[] data = new byte[size % 4 == 0 ? size / 4 : size / 4 + 1]; - - int i = 0; - for (Example example : exampleSet) { - double value = example.getValue(attribute); - if (!Double.isNaN(value)) { - IntegerFormats.writeUInt2(data, i, (int) (value + 1)); - } - i++; - } - - PackedIntegers packed = new PackedIntegers(data, Format.UNSIGNED_INT2, size); - //convert to a boolean column - int positiveIndex = legacyMapping.getPositiveIndex() + 1; - if (positiveIndex >= mapping.size()) { - //there is no positive value, only a negative one - positiveIndex = BooleanDictionary.NO_ENTRY; - } - return ColumnAccessor.get().newCategoricalColumn(ColumnTypes.NOMINAL, packed, mapping, positiveIndex); - } - - /** - * Copies a nominal column from the example set by copying the mapping and the category data with a fallback in case - * the mapping is broken (contains null or contains a value twice). + * Belt meta data (except for roles) cannot be stored in an ExampleSet. Therefore, we store the belt meta data in + * the ExampleSets's user data. + * + * @param table + * the table holding the belt meta data that will be stored + * @param set + * the belt meta data will be stored to this ExampleSet's user data */ - private static CategoricalColumn getNominalColumn(ExampleSet exampleSet, int size, Attribute attribute) { - NominalMapping legacyMapping = attribute.getMapping(); - List mapping = new ArrayList<>(legacyMapping.size() + 1); - mapping.add(null); - Set controlSet = new HashSet<>(); - controlSet.add(null); - for (String value : legacyMapping.getValues()) { - if (controlSet.add(value)) { - mapping.add(value); - } else { - return getBufferColumn(exampleSet, size, attribute); - } - } - int[] data = new int[size]; - int i = 0; - for (Example example : exampleSet) { - double value = example.getValue(attribute); - if (Double.isNaN(value)) { - data[i++] = 0; - } else { - data[i++] = (int) value + 1; - } - } - return ColumnAccessor.get().newCategoricalColumn(ColumnTypes.NOMINAL, data, mapping); + static void storeBeltMetaDataInExampleSetUserData(Table table, ExampleSet set) { + set.setUserData(IOOBJECT_USER_DATA_COLUMN_META_DATA_KEY, Collections.unmodifiableMap(table.getMetaData())); } /** - * Copies a nominal column from the example set using a nominal buffer. + * Gets the value type from the meta data if present or from the table otherwise. + * + * @throws ConversionException + * if the column cannot be converted because it is a non-standard column */ - private static CategoricalColumn getBufferColumn(ExampleSet exampleSet, int size, Attribute attribute) { - CategoricalBuffer nominalBuffer = BufferAccessor.get().newInt32Buffer(size); - int j = 0; - NominalMapping mapping = attribute.getMapping(); - for (Example example : exampleSet) { - double value = example.getValue(attribute); - if (Double.isNaN(value)) { - nominalBuffer.set(j++, null); - } else { - nominalBuffer.set(j++, mapping.mapIndex((int) value)); + static int getValueType(Table table, String label, int columnIndex) { + Column column = table.column(columnIndex); + int derivedOntology = convertToOntology(column, label); + LegacyType legacyType = table.getFirstMetaData(label, LegacyType.class); + if (legacyType != null) { + int legacyOntology = legacyType.ontology(); + if (useLegacyOntology(legacyOntology, derivedOntology, column)) { + return legacyOntology; } } - return nominalBuffer.toColumn(ColumnTypes.NOMINAL); + return derivedOntology; } - /** - * Conversion where the exampleSet can be accessed in parallel. - */ - private static Table parallelConvert(ExampleSet exampleSet, ConcurrencyContext context) { - int size = exampleSet.size(); - List labels = new ArrayList<>(); - List> futureColumns = new ArrayList<>(); - Map> meta = new HashMap<>(); - Attribute predictionAttribute = exampleSet.getAttributes().getPredictedLabel(); - for (Iterator allRoles = exampleSet.getAttributes().allAttributeRoles(); allRoles.hasNext(); ) { - AttributeRole role = allRoles.next(); - - Attribute attribute = role.getAttribute(); - labels.add(attribute.getName()); - - createDataFuturesAndStoreType(exampleSet, size, futureColumns, attribute, meta); - - if (role.isSpecial()) { - storeRole(role, attribute, meta, predictionAttribute); - } - } - return buildTable(futureColumns, labels, meta, context); - } /** - * Converts the column specified by exampleSet and attribute into one entry in futureColumns and meta. + * Converts attribute roles into belt column roles. */ - private static void createDataFuturesAndStoreType(ExampleSet exampleSet, int size, - List> futureColumns, Attribute attribute, - Map> meta) { - switch (attribute.getValueType()) { - case Ontology.NUMERICAL: - storeOntology(meta, attribute); - futureColumns.add(() -> getRealColumn(exampleSet, size, attribute)); - break; - case Ontology.REAL: - futureColumns.add(() -> getRealColumn(exampleSet, size, attribute)); - break; - case Ontology.INTEGER: - futureColumns.add(() -> getIntegerColumn(exampleSet, size, attribute)); - break; - case Ontology.BINOMINAL: - futureColumns.add(() -> getBinominalColumn(exampleSet, size, attribute)); - storeOntology(meta, attribute); - break; - case Ontology.POLYNOMINAL: - futureColumns.add(() -> getNominalColumn(exampleSet, size, attribute)); - break; - case Ontology.NOMINAL: - case Ontology.STRING: - case Ontology.FILE_PATH: - storeOntology(meta, attribute); - futureColumns.add(() -> getNominalColumn(exampleSet, size, attribute)); - break; - case Ontology.DATE: - storeOntology(meta, attribute); - futureColumns.add(() -> getDateColumn(exampleSet, size, attribute)); - break; - case Ontology.DATE_TIME: - futureColumns.add(() -> getDateTimeColumn(exampleSet, size, attribute)); - break; - case Ontology.TIME: - storeOntology(meta, attribute); - futureColumns.add(() -> getDateTimeColumn(exampleSet, size, attribute)); - break; + static ColumnRole convert(String studioRole) { + String withOutIndex = removeIndex(studioRole); + switch (withOutIndex) { + case Attributes.LABEL_NAME: + return ColumnRole.LABEL; + case Attributes.ID_NAME: + return ColumnRole.ID; + case Attributes.PREDICTION_NAME: + return ColumnRole.PREDICTION; + case Attributes.CONFIDENCE_NAME: + return ColumnRole.SCORE; + case Attributes.CLUSTER_NAME: + return ColumnRole.CLUSTER; + case Attributes.OUTLIER_NAME: + return ColumnRole.OUTLIER; + case Attributes.WEIGHT_NAME: + return ColumnRole.WEIGHT; + case Attributes.BATCH_NAME: + return ColumnRole.BATCH; default: - throw new UnsupportedOperationException(MESSAGE_NON_SUPPORTED); + if (withOutIndex.startsWith(Attributes.CONFIDENCE_NAME)) { + return ColumnRole.SCORE; + } + return ColumnRole.METADATA; } } /** - * Stores the ontology of the attribute in the meta map under the attribute name. - */ - private static void storeOntology(Map> meta, Attribute attribute) { - List list = new ArrayList<>(3); - list.add(LegacyType.forOntology(attribute.getValueType())); - meta.put(attribute.getName(), list); - } - - /** - * Conversion where the data is read directly from the example table and in parallel. + * If the given String ends with an index suffix this suffix is removed. */ - private static Table exampleTableConvert(ExampleSet exampleSet, ConcurrencyContext context) { - int size = exampleSet.size(); - List labels = new ArrayList<>(); - List> futureColumns = new ArrayList<>(); - Map> meta = new HashMap<>(); - ExampleTable table = exampleSet.getExampleTable(); - Attribute prediction = exampleSet.getAttributes().getPredictedLabel(); - for (Iterator allRoles = exampleSet.getAttributes().allAttributeRoles(); allRoles.hasNext(); ) { - - AttributeRole role = allRoles.next(); - Attribute attribute = role.getAttribute(); - labels.add(attribute.getName()); - - createTableFuturesAndStoreType(size, futureColumns, meta, table, attribute); - if (role.isSpecial()) { - storeRole(role, attribute, meta, prediction); - } + private static String removeIndex(String string) { + Matcher m = INDEX_PATTERN.matcher(string); + if (m.matches()) { + return m.group(1); } - return buildTable(futureColumns, labels, meta, context); + return string; } - /** - * Stores the associated belt role and, if not all the info can be captured by the belt role, stores the original - * role name. - */ - private static void storeRole(AttributeRole role, Attribute attribute, Map> meta, - Attribute prediction) { - String specialName = role.getSpecialName(); - ColumnRole beltRole = convert(specialName); - List columnMeta = - meta.computeIfAbsent(attribute.getName(), s -> new ArrayList<>(2)); - columnMeta.add(beltRole); - if (beltRole == ColumnRole.METADATA) { - columnMeta.add(new LegacyRole(specialName)); - } else if (beltRole == ColumnRole.SCORE) { - String predictionName = prediction == null ? null : prediction.getName(); - if (specialName.startsWith(CONFIDENCE_PREFIX)) { - columnMeta.add(new ColumnReference(predictionName, - specialName.substring(CONFIDENCE_PREFIX_LENGHT))); - } else { - columnMeta.add(new ColumnReference(predictionName)); - } - } - } /** - * Converts the column specified by table and attribute into one entry in futureColumns and meta. + * Checks if the role has already been set. */ - private static void createTableFuturesAndStoreType(int size, List> futureColumns, - Map> meta, ExampleTable table, - Attribute attribute) { - switch (attribute.getValueType()) { - case Ontology.NUMERICAL: - storeOntology(meta, attribute); - futureColumns.add(() -> getRealColumn(size, table, attribute)); - break; - case Ontology.REAL: - futureColumns.add(() -> getRealColumn(size, table, attribute)); - break; - case Ontology.INTEGER: - futureColumns.add(() -> getIntegerColumn(size, table, attribute)); - break; - case Ontology.BINOMINAL: - storeOntology(meta, attribute); - futureColumns.add(() -> getBinominalColumn(table, size, attribute)); - break; - case Ontology.POLYNOMINAL: - futureColumns.add(() -> getNominalColumn(table, size, attribute)); - break; - case Ontology.NOMINAL: - case Ontology.STRING: - case Ontology.FILE_PATH: - storeOntology(meta, attribute); - futureColumns.add(() -> getNominalColumn(table, size, attribute)); - break; - case Ontology.DATE: - storeOntology(meta, attribute); - futureColumns.add(() -> getSecondDateColumn(size, table, attribute)); - break; - case Ontology.DATE_TIME: - futureColumns.add(() -> getNanosecondDateColumn(size, table, attribute)); - break; - case Ontology.TIME: - storeOntology(meta, attribute); - futureColumns.add(() -> getNanosecondDateColumn(size, table, attribute)); - break; - default: - throw new UnsupportedOperationException(MESSAGE_NON_SUPPORTED); - } - } - - private static Column getNanosecondDateColumn(int size, ExampleTable table, Attribute attribute) { - DateTimeBuffer buffer = Buffers.dateTimeBuffer(size, true, false); - for (int i = 0; i < table.size(); i++) { - double value = table.getDataRow(i).get(attribute); - if (Double.isNaN(value)) { - buffer.set(i, null); - } else { - long longValue = (long) value; - buffer.set(i, Math.floorDiv(longValue, MILLISECONDS_PER_SECOND), - (int) (Math.floorMod(longValue, MILLISECONDS_PER_SECOND) * NANOS_PER_MILLI_SECOND)); - } - } - return buffer.toColumn(); - } - - private static Column getSecondDateColumn(int size, ExampleTable table, Attribute attribute) { - DateTimeBuffer buffer = Buffers.dateTimeBuffer(size, false, false); - for (int i = 0; i < table.size(); i++) { - double value = table.getDataRow(i).get(attribute); - if (Double.isNaN(value)) { - buffer.set(i, null); - } else { - buffer.set(i, ((long) value) / MILLISECONDS_PER_SECOND); - } - } - return buffer.toColumn(); - } - - private static Column getRealColumn(int size, ExampleTable table, Attribute attribute) { - NumericBuffer buffer = Buffers.realBuffer(size, false); - for (int i = 0; i < table.size(); i++) { - buffer.set(i, table.getDataRow(i).get(attribute)); - } - return buffer.toColumn(); - } - - private static Column getIntegerColumn(int size, ExampleTable table, Attribute attribute) { - NumericBuffer intBuffer = Buffers.integerBuffer(size, false); - for (int i = 0; i < table.size(); i++) { - intBuffer.set(i, table.getDataRow(i).get(attribute)); - } - return intBuffer.toColumn(); + private static boolean checkUnique(Attributes allAttributes, String studioRole) { + return allAttributes.findRoleBySpecialName(studioRole) == null; } /** - * Copies a binominal column from the example table by copying the mapping and the category data with a fallback in - * case the mapping is broken (contains null). + * Checks if the legacy type should be used instead of the given type. */ - private static Column getBinominalColumn(ExampleTable table, int size, Attribute attribute) { - NominalMapping legacyMapping = attribute.getMapping(); - if (legacyMapping.getPositiveString() != null && (legacyMapping.getNegativeString() == null - || legacyMapping.getPositiveString().equals(legacyMapping.getNegativeString()))) { - // Incompatible with Belt's 2Bit columns - return getBufferColumn(table, size, attribute); - } - List mapping = new ArrayList<>(3); - mapping.add(null); - String negativeString = legacyMapping.getNegativeString(); - if (negativeString != null) { - mapping.add(negativeString); - } - String positiveString = legacyMapping.getPositiveString(); - if (positiveString != null) { - mapping.add(positiveString); - } - byte[] data = new byte[size % 4 == 0 ? size / 4 : size / 4 + 1]; - - for (int i = 0; i < size; i++) { - double value = table.getDataRow(i).get(attribute); - if (!Double.isNaN(value)) { - IntegerFormats.writeUInt2(data, i, (int) value + 1); - } + private static boolean useLegacyOntology(int legacyOntology, int derivedOntology, Column column) { + // we never want to fall back to the legacy ontology for these two + if (derivedOntology == Ontology.INTEGER || derivedOntology == Ontology.BINOMINAL) { + return false; } - - PackedIntegers packed = new PackedIntegers(data, Format.UNSIGNED_INT2, size); - // create boolean column - int positiveIndex = legacyMapping.getPositiveIndex() + 1; - if (positiveIndex >= mapping.size()) { - //there is no positive value, only a negative one - positiveIndex = BooleanDictionary.NO_ENTRY; - } - return ColumnAccessor.get().newCategoricalColumn(ColumnTypes.NOMINAL, packed, mapping, positiveIndex); - } - - /** - * Copies a nominal column from the example table by copying the mapping and the category data with a fallback in - * case the mapping is broken (contains null or contains a value twice). - */ - private static Column getNominalColumn(ExampleTable table, int size, Attribute attribute) { - NominalMapping legacyMapping = attribute.getMapping(); - List mapping = new ArrayList<>(legacyMapping.size() + 1); - mapping.add(null); - Set controlSet = new HashSet<>(); - controlSet.add(null); - for (String value : legacyMapping.getValues()) { - if (controlSet.add(value)) { - mapping.add(value); - } else { - return getBufferColumn(table, size, attribute); - } + // legacy ontology is super type or the same + if (Ontology.ATTRIBUTE_VALUE_TYPE.isA(derivedOntology, legacyOntology)) { + return true; } - int[] data = new int[size]; - for (int i = 0; i < size; i++) { - double value = table.getDataRow(i).get(attribute); - if (Double.isNaN(value)) { - data[i] = 0; - } else { - data[i] = (int) value + 1; - } + // if binominal is requested for a polynominal derived type, check dictionary size and if only positive + if (legacyOntology == Ontology.BINOMINAL && derivedOntology == Ontology.POLYNOMINAL) { + Dictionary dictionary = column.getDictionary(); + return dictionary.size() <= 2 && + //BinominalMapping can have no positive but not no negative + !(dictionary.isBoolean() && dictionary.hasPositive() && !dictionary.hasNegative()); } - return ColumnAccessor.get().newCategoricalColumn(ColumnTypes.NOMINAL, data, mapping); - } - - /** - * Copies a nominal column from the example table using a nominal buffer. - */ - private static Column getBufferColumn(ExampleTable table, int size, Attribute attribute) { - CategoricalBuffer nominalBuffer = BufferAccessor.get().newInt32Buffer(size); - NominalMapping mapping = attribute.getMapping(); - for (int i = 0; i < size; i++) { - double value = table.getDataRow(i).get(attribute); - if (Double.isNaN(value)) { - nominalBuffer.set(i, null); - } else { - nominalBuffer.set(i, mapping.mapIndex((int) value)); - } + // derived ontology is a nominal subtype and legacy ontology, too + if (Ontology.ATTRIBUTE_VALUE_TYPE.isA(derivedOntology, Ontology.NOMINAL) && Ontology.ATTRIBUTE_VALUE_TYPE + .isA(legacyOntology, Ontology.NOMINAL)) { + return true; } - return nominalBuffer.toColumn(ColumnTypes.NOMINAL); - } - - /** - * Builds the table by running the future columns in the given context and creating a table from the results and - * the given labels. - */ - private static Table buildTable(List> futureColumns, List labels, - Map> srcMeta, ConcurrencyContext context) { - try { - List columnList = context.call(futureColumns); - return new Table(columnList.toArray(new Column[0]), - labels.toArray(new String[0]), srcMeta); - } catch (ExecutionException e) { - Throwable cause = e.getCause(); - if (cause instanceof RuntimeException) { - throw (RuntimeException) cause; - } else if (cause instanceof Error) { - throw (Error) cause; - } else { - throw new RuntimeException(cause.getMessage(), cause); - } + // for legacy support we allow conversion from date-time to time + if (legacyOntology == Ontology.TIME && derivedOntology == Ontology.DATE_TIME) { + return true; } + // date-time can be shown as date + return legacyOntology == Ontology.DATE && derivedOntology == Ontology.DATE_TIME; } } diff --git a/src/main/java/com/rapidminer/belt/table/ConvertOnWriteExampleTable.java b/src/main/java/com/rapidminer/belt/table/ConvertOnWriteExampleTable.java new file mode 100644 index 0000000..4af648e --- /dev/null +++ b/src/main/java/com/rapidminer/belt/table/ConvertOnWriteExampleTable.java @@ -0,0 +1,684 @@ +/** + * Copyright (C) 2001-2020 by RapidMiner and the contributors + * + * Complete list of developers available at our web site: + * + * http://rapidminer.com + * + * This program is free software: you can redistribute it and/or modify it under the terms of the + * GNU Affero General Public License as published by the Free Software Foundation, either version 3 + * of the License, or (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY; without + * even the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * Affero General Public License for more details. + * + * You should have received a copy of the GNU Affero General Public License along with this program. + * If not, see http://www.gnu.org/licenses/. + */ +package com.rapidminer.belt.table; + +import java.io.IOException; +import java.io.ObjectOutputStream; +import java.util.ArrayList; +import java.util.Arrays; +import java.util.Collection; +import java.util.Collections; +import java.util.Iterator; +import java.util.LinkedHashMap; +import java.util.List; +import java.util.Map; +import java.util.StringJoiner; +import java.util.concurrent.locks.Lock; +import java.util.concurrent.locks.ReadWriteLock; +import java.util.concurrent.locks.ReentrantReadWriteLock; + +import com.rapidminer.example.Attribute; +import com.rapidminer.example.AttributeRole; +import com.rapidminer.example.Attributes; +import com.rapidminer.example.ExampleSet; +import com.rapidminer.example.SimpleAttributes; +import com.rapidminer.example.set.SimpleExampleSet; +import com.rapidminer.example.table.AttributeFactory; +import com.rapidminer.example.table.DataRow; +import com.rapidminer.example.table.DataRowFactory; +import com.rapidminer.example.table.DataRowReader; +import com.rapidminer.example.table.ExampleTable; +import com.rapidminer.example.table.internal.CleanableExampleTable; +import com.rapidminer.example.table.internal.ColumnarExampleTable; +import com.rapidminer.operator.OperatorException; +import com.rapidminer.operator.error.AttributeNotFoundError; +import com.rapidminer.tools.Ontology; +import com.rapidminer.tools.Tools; +import com.rapidminer.tools.att.AttributeSet; + + +/** + * {@link ExampleTable} that wraps either a belt {@link Table} plus a {@link ColumnarExampleTable} of additional columns + * or just a {@link ColumnarExampleTable} containing the converted belt {@link Table} and the additional columns. This + * allows to read the values of the belt table and add and fill additional columns as for a normal {@link ExampleTable}. + * The conversion is only done if a {@link DataRow#set(int, double, double)} is called for a belt column. This case is a + * bug anyway, since one of the unwritten rules of {@link ExampleSet}s is not to write into columns that have not been + * added. + * + * @author Gisa Meier + * @since 0.7 + */ +class ConvertOnWriteExampleTable implements CleanableExampleTable { + + /** + * the accessor for the belt table + */ + private transient volatile AbstractTableAccessor tableAccessor; + + /** + * Extra table for adding new columns. We cannot add those to the belt table since in the belt API they are + * immutable and in the example table API they are added first and then filled + */ + private volatile ColumnarExampleTable newColumns; + + /** + * the whole converted table (converted belt table plus newColumns) + */ + private volatile ColumnarExampleTable convertedTable; + + /** + * the width of the underlying belt table + */ + private final int originalWidth; + + /** + * the table height, could be reconstructed from the convertedTable or the tableAccessor but stored for performance + * reasons + */ + private final int height; + + + /** + * Locks to prevent a convert in parallel to a write of the new columns table + */ + private final transient ReadWriteLock rwLock = new ReentrantReadWriteLock(); + /** + * Used for writing to the new Columns table, arbitrary many in parallel but not at the same time as convert + */ + private final transient Lock readLock = rwLock.readLock(); + /** + * Used for convert(), only one thread at a time + */ + private final transient Lock writeLock = rwLock.writeLock(); + + /** + * Lock object to prevent several newColumns tables being constructed in parallel. There can only be one. + */ + private final transient Object newColumnsTableLock = new Object(); + + /** + * Creates a new convert on write table based on the given belt table. The attributes in the list are neither cloned + * nor is their table index changed. + * + * @param table + * the belt table to wrap + * @param attributeList + * the list of attributes fitting to the belt table + * @param numberOfDatetime + * the number of date-time columns + */ + ConvertOnWriteExampleTable(Table table, List attributeList, int numberOfDatetime) { + if (numberOfDatetime > 0) { + tableAccessor = new MixedTableAccessor(table, attributeList, numberOfDatetime); + } else { + tableAccessor = new NumericTableAccessor(table, attributeList); + } + originalWidth = table.width(); + height = table.height(); + } + + /** + * Copy-constructor. + */ + private ConvertOnWriteExampleTable(AbstractTableAccessor tableAccessor, ColumnarExampleTable convertedTable, + ColumnarExampleTable newColumns, int originalWidth, int originalHeight) { + this.tableAccessor = tableAccessor; + this.convertedTable = convertedTable; + this.newColumns = newColumns; + this.originalWidth = originalWidth; + this.height = originalHeight; + } + + @Override + public int size() { + return height; + } + + @Override + public DataRowReader getDataRowReader() { + if (convertedTable != null) { + return convertedTable.getDataRowReader(); + } + return new DataRowReader() { + + //holds the reference as long as the reader is alive + private Object beltReader = getReader(tableAccessor); + + private int index = 0; + + @Override + public boolean hasNext() { + return index < height; + } + + @Override + public DataRow next() { + final int currentRow = index++; + if (convertedTable != null) { + return convertedTable.getDataRow(currentRow); + } + return new DataRow() { + + @Override + protected double get(int columnIndex, double defaultValue) { + return getValue(columnIndex, currentRow, beltReader); + } + + @Override + protected void set(int columnIndex, double value, double defaultValue) { + readLock.lock(); + try { + if (newColumns != null && columnIndex >= originalWidth) { + newColumns.getDataRow(currentRow).set(newColumns.getAttribute(columnIndex - originalWidth) + , value); + return; + } + } finally { + readLock.unlock(); + } + if (convertedTable == null) { + convert(); + // delete the reader reference + beltReader = null; + } + convertedTable.getDataRow(currentRow).set(convertedTable.getAttribute(columnIndex), value); + } + + @Override + protected void ensureNumberOfColumns(int i) { + //not necessary, converted table and newColumns are {@link ColumnarExampleTable} + } + + @Override + public int getType() { + return DataRowFactory.TYPE_COLUMN_VIEW; + } + + @Override + public String toString() { + StringJoiner result = new StringJoiner(","); + for (int i = 0; i < getNumberOfAttributes(); i++) { + result.add("" + get(i, 0)); + } + return result.toString(); + } + }; + } + + }; + + } + + @Override + public DataRow getDataRow(int rowIndex) { + if (convertedTable != null) { + return convertedTable.getDataRow(rowIndex); + } + return new DataRow() { + + // holds the reader reference as long as the row is alive + private transient Object reader = getUnbufferedReader(tableAccessor); + + @Override + protected double get(int columnIndex, double defaultValue) { + return getValue(columnIndex, rowIndex, reader); + } + + @Override + protected void set(int columnIndex, double value, double defaultValue) { + readLock.lock(); + try { + if (newColumns != null && columnIndex >= originalWidth) { + newColumns.getDataRow(rowIndex).set(newColumns.getAttribute(columnIndex - originalWidth), value); + return; + } + } finally { + readLock.unlock(); + } + + if (convertedTable == null) { + convert(); + // delete the reader reference + reader = null; + } + convertedTable.getDataRow(rowIndex).set(convertedTable.getAttribute(columnIndex), value); + } + + @Override + protected void ensureNumberOfColumns(int i) { + //not necessary, converted table and newColumns are {@link ColumnarExampleTable} + } + + @Override + public int getType() { + return DataRowFactory.TYPE_COLUMN_VIEW; + } + + @Override + public String toString() { + StringJoiner result = new StringJoiner(","); + for (int i = 0; i < getNumberOfAttributes(); i++) { + result.add("" + get(i, 0)); + } + return result.toString(); + } + }; + } + + @Override + public void addAttributes(Collection collection) { + readLock.lock(); + try { + if (convertedTable != null) { + convertedTable.addAttributes(collection); + return; + } + if (newColumns == null) { + createNewColumns(); + } + newColumns.addAttributes(collection); + for (Attribute attribute : collection) { + int internalIndex = attribute.getTableIndex(); + attribute.setTableIndex(originalWidth + internalIndex); + } + } finally { + readLock.unlock(); + } + } + + @Override + public int addAttribute(Attribute attribute) { + readLock.lock(); + try { + if (convertedTable != null) { + return convertedTable.addAttribute(attribute); + } + if (newColumns == null) { + createNewColumns(); + } + newColumns.addAttribute(attribute); + int tableIndex = attribute.getTableIndex(); + int shiftedIndex = tableIndex + originalWidth; + attribute.setTableIndex(shiftedIndex); + return shiftedIndex; + } finally { + readLock.unlock(); + } + } + + @Override + public void removeAttribute(Attribute attribute) { + if (convertedTable != null) { + convertedTable.removeAttribute(attribute); + return; + } + readLock.lock(); + try { + if (attribute.getTableIndex() >= originalWidth && newColumns != null) { + newColumns.removeAttribute(attribute.getTableIndex() - originalWidth); + return; + } + } finally { + readLock.unlock(); + } + + if (convertedTable == null) { + convert(); + } + convertedTable.removeAttribute(attribute); + + } + + @Override + public void removeAttribute(int i) { + if (convertedTable != null) { + convertedTable.removeAttribute(i); + return; + } + readLock.lock(); + try { + if (i >= originalWidth && newColumns != null) { + newColumns.removeAttribute(i - originalWidth); + return; + } + } finally { + readLock.unlock(); + } + + if (convertedTable == null) { + convert(); + } + convertedTable.removeAttribute(i); + } + + @Override + public Attribute[] getAttributes() { + // store references so that they do not change in parallel + ColumnarExampleTable newColumnsRef = this.newColumns; + AbstractTableAccessor tableAccessorRef = this.tableAccessor; + ColumnarExampleTable convertedTableRef = this.convertedTable; + if (convertedTableRef != null) { + return convertedTableRef.getAttributes(); + } + if (newColumnsRef == null) { + return tableAccessorRef.getAttributes(); + } + Attribute[] newAttributes = newColumnsRef.getAttributes(); + Attribute[] tableAttributes = tableAccessorRef.getAttributes(); + Attribute[] attributes = Arrays.copyOf(tableAttributes, tableAttributes.length + newAttributes.length); + for (int i = 0; i < newAttributes.length; i++) { + Attribute newAttribute = newAttributes[i]; + if (newAttribute != null) { + newAttribute = (Attribute) newAttribute.clone(); + newAttribute.setTableIndex(originalWidth + newAttribute.getTableIndex()); + } + attributes[i + originalWidth] = newAttribute; + } + return attributes; + } + + @Override + public Attribute getAttribute(int i) { + // store references so that they do not change in parallel + ColumnarExampleTable newColumnsRef = this.newColumns; + AbstractTableAccessor tableAccessorRef = this.tableAccessor; + ColumnarExampleTable convertedTableRef = this.convertedTable; + if (convertedTableRef != null) { + return convertedTableRef.getAttribute(i); + } + if (newColumnsRef == null || i < originalWidth) { + return tableAccessorRef.getAttribute(i); + } + Attribute attribute = newColumnsRef.getAttribute(i - originalWidth); + attribute = (Attribute) attribute.clone(); + attribute.setTableIndex(attribute.getTableIndex() + originalWidth); + return attribute; + } + + @Override + public Attribute findAttribute(String s) throws OperatorException { + // store references so that they do not change in parallel + ColumnarExampleTable newColumnsRef = this.newColumns; + AbstractTableAccessor tableAccessorRef = this.tableAccessor; + ColumnarExampleTable convertedTableRef = this.convertedTable; + if (convertedTableRef != null) { + return convertedTableRef.findAttribute(s); + } + if (newColumnsRef == null) { + return tableAccessorRef.findAttribute(s); + } + try { + return tableAccessorRef.findAttribute(s); + } catch (AttributeNotFoundError e) { + Attribute attribute = newColumnsRef.findAttribute(s); + attribute = (Attribute) attribute.clone(); + attribute.setTableIndex(attribute.getTableIndex() + originalWidth); + return attribute; + } + } + + @Override + public int getNumberOfAttributes() { + // store references so that they do not change in parallel + ColumnarExampleTable newColumnsRef = this.newColumns; + ColumnarExampleTable convertedTableRef = this.convertedTable; + if (convertedTableRef != null) { + return convertedTableRef.getNumberOfAttributes(); + } + if (newColumnsRef == null) { + return originalWidth; + } + return originalWidth + newColumnsRef.getNumberOfAttributes(); + } + + @Override + public int getAttributeCount() { + // store references so that they do not change in parallel + ColumnarExampleTable newColumnsRef = this.newColumns; + ColumnarExampleTable convertedTableRef = this.convertedTable; + if (convertedTableRef != null) { + return convertedTableRef.getAttributeCount(); + } + if (newColumnsRef == null) { + return originalWidth; + } + return originalWidth + newColumnsRef.getAttributeCount(); + } + + // the following 6 methods are the same as in {@link AbstractExampleTable} + @Override + public ExampleSet createExampleSet(Attribute labelAttribute) { + return createExampleSet(labelAttribute, null, null); + } + + @Override + public ExampleSet createExampleSet(Iterator newSpecialAttributes) { + Map specialAttributes = new LinkedHashMap<>(); + while (newSpecialAttributes.hasNext()) { + AttributeRole role = newSpecialAttributes.next(); + specialAttributes.put(role.getAttribute(), role.getSpecialName()); + } + return new SimpleExampleSet(this, specialAttributes); + } + + @Override + public ExampleSet createExampleSet(Attribute labelAttribute, Attribute weightAttribute, Attribute idAttribute) { + Map specialAttributes = new LinkedHashMap<>(); + if (labelAttribute != null) { + specialAttributes.put(labelAttribute, Attributes.LABEL_NAME); + } + if (weightAttribute != null) { + specialAttributes.put(weightAttribute, Attributes.WEIGHT_NAME); + } + if (idAttribute != null) { + specialAttributes.put(idAttribute, Attributes.ID_NAME); + } + return new SimpleExampleSet(this, specialAttributes); + } + + @Override + public ExampleSet createExampleSet(AttributeSet attributeSet) { + Map specialAttributes = new LinkedHashMap<>(); + Iterator i = attributeSet.getSpecialNames().iterator(); + while (i.hasNext()) { + String name = i.next(); + specialAttributes.put(attributeSet.getSpecialAttribute(name), name); + } + return createExampleSet(specialAttributes); + } + + @Override + public ExampleSet createExampleSet(Map map) { + return new SimpleExampleSet(this, map); + } + + @Override + public ExampleSet createExampleSet() { + return createExampleSet(Collections.emptyMap()); + } + + @Override + public String toString() { + return "ExampleTable, " + getNumberOfAttributes() + " attributes, " + size() + " data rows," + Tools.getLineSeparator() + + "attributes: " + Arrays.toString(getAttributes()); + } + + @Override + public String toDataString() { + StringBuilder result = new StringBuilder(toString() + Tools.getLineSeparator()); + DataRowReader reader = getDataRowReader(); + while (reader.hasNext()) { + result.append(reader.next().toString()).append(Tools.getLineSeparator()); + } + return result.toString(); + } + + @Override + public ExampleTable columnCleanupClone(Attributes attributes) { + return cleanup(newColumns, tableAccessor, convertedTable, attributes); + } + + /** + * @return the underlying belt table if it still exists + */ + Table getTable() { + AbstractTableAccessor accessor = tableAccessor; + if (accessor != null) { + return accessor.getTable(); + } + return null; + } + + /** + * @return the example table of additional columns + */ + ColumnarExampleTable getNewColumns() { + return newColumns; + } + + /** + * Before we can serialize, we need to get rid of the {@link #tableAccessor} since it contains a belt {@link Table} + * which cannot be written with standard java serialization. + */ + private void writeObject(ObjectOutputStream oos) throws IOException { + if (tableAccessor != null) { + convert(); + } + oos.defaultWriteObject(); // Calling the default serialization logic + + } + + /** + * Creates the new columns table + */ + private void createNewColumns() { + synchronized (newColumnsTableLock) { + if (newColumns == null) { + ColumnarExampleTable newColumnsRef = new ColumnarExampleTable(new ArrayList<>()); + newColumnsRef.setExpectedSize(size()); + newColumnsRef.addBlankRows(size()); + this.newColumns = newColumnsRef; + } + } + } + + /** + * Converts to one columnar example table containing the belt table values and the new columns. + */ + private void convert() { + writeLock.lock(); + try { + if (convertedTable == null) { + ColumnarExampleTable newConvertedTable = FromTableConverter.convert(tableAccessor.getTable(), tableAccessor.getAttributes()); + ColumnarExampleTable newColumnsRef = newColumns; + if (newColumnsRef != null) { + List dummyAttributes = new ArrayList<>(); + for (Attribute attribute : newColumnsRef.getAttributes()) { + if (attribute != null) { + Attribute clone = (Attribute) attribute.clone(); + newConvertedTable.addAttribute(clone); + newConvertedTable.fillColumn(clone, j -> newColumnsRef.getDataRow(j).get(attribute)); + } else { + //add dummy attribute to keep table indices + Attribute dummy = AttributeFactory.createAttribute("", Ontology.NUMERICAL); + newConvertedTable.addAttribute(dummy); + dummyAttributes.add(dummy); + } + } + if (!dummyAttributes.isEmpty()) { + for (Attribute dummy : dummyAttributes) { + newConvertedTable.removeAttribute(dummy); + } + } + + } + convertedTable = newConvertedTable; + tableAccessor = null; + newColumns = null; + } + } finally { + writeLock.unlock(); + } + } + + /** + * Gets a value either from the convertedTable, or from the belt table using the beltReader or from the newColumns. + */ + private double getValue(int columnIndex, int currentRow, Object beltReader) { + // store references so that they do not change in parallel + ColumnarExampleTable newColumnsRef = this.newColumns; + AbstractTableAccessor tableAccessorRef = this.tableAccessor; + ColumnarExampleTable convertedTableRef = this.convertedTable; + if (convertedTableRef != null) { + return convertedTableRef.getDataRow(currentRow).get(convertedTableRef.getAttribute(columnIndex)); + } + if (columnIndex < originalWidth) { + return tableAccessorRef.get(currentRow, columnIndex, beltReader); + } + return newColumnsRef.getDataRow(currentRow).get(newColumnsRef.getAttribute(columnIndex - originalWidth)); + } + + /** + * Gets a reader or {@code null} if {@link #convert()} has already been called. + */ + private static Object getReader(AbstractTableAccessor tableAccessor) { + if (tableAccessor != null) { + return tableAccessor.getReaders(); + } + return null; + } + + /** + * Gets an unbuffered reader or {@code null} if {@link #convert()} has already been called. + */ + private static Object getUnbufferedReader(AbstractTableAccessor wrapperTable) { + if (wrapperTable != null) { + return wrapperTable.getUnbufferedReaders(); + } + return null; + } + + /** + * Cleans up the columns. Has the volatile variables as parameter so that they do not change in between. + */ + private ExampleTable cleanup(ColumnarExampleTable newColumns, AbstractTableAccessor tableWrapper, ColumnarExampleTable convertedTable, + Attributes attributes) { + if (convertedTable != null) { + ColumnarExampleTable newConvertedTable = convertedTable.columnCleanupClone(attributes); + return new ConvertOnWriteExampleTable(null, newConvertedTable, null, originalWidth, height); + } + + AbstractTableAccessor newTableWrapper = tableWrapper.columnCleanupClone(attributes); + ColumnarExampleTable newNewColumns = null; + if (newColumns != null) { + //clean up new columns table, requires shifting of attributes + Attributes newColumnsAttributes = new SimpleAttributes(); + for (Iterator allIterator = attributes.allAttributes(); allIterator.hasNext(); ) { + Attribute attribute = allIterator.next(); + if (attribute.getTableIndex() >= originalWidth) { + Attribute shiftedAttribute = (Attribute) attribute.clone(); + shiftedAttribute.setTableIndex(shiftedAttribute.getTableIndex() - originalWidth); + newColumnsAttributes.addRegular(shiftedAttribute); + } + } + newNewColumns = newColumns.columnCleanupClone(newColumnsAttributes); + } + return new ConvertOnWriteExampleTable(newTableWrapper, null, newNewColumns, originalWidth, height); + } +} \ No newline at end of file diff --git a/src/main/java/com/rapidminer/belt/table/DatetimeTableWrapper.java b/src/main/java/com/rapidminer/belt/table/DatetimeTableWrapper.java index 2993509..3e8fe62 100644 --- a/src/main/java/com/rapidminer/belt/table/DatetimeTableWrapper.java +++ b/src/main/java/com/rapidminer/belt/table/DatetimeTableWrapper.java @@ -1,5 +1,5 @@ /** - * Copyright (C) 2001-2019 by RapidMiner and the contributors + * Copyright (C) 2001-2020 by RapidMiner and the contributors * * Complete list of developers available at our web site: * @@ -27,6 +27,7 @@ import com.rapidminer.belt.reader.MixedRow; import com.rapidminer.belt.reader.MixedRowReader; import com.rapidminer.belt.reader.Readers; +import com.rapidminer.belt.reader.SmallReaders; import com.rapidminer.example.Attributes; import com.rapidminer.example.Example; import com.rapidminer.example.ExampleSet; @@ -48,7 +49,7 @@ public final class DatetimeTableWrapper extends RowwiseStatisticsExampleSet { private static final long serialVersionUID = 548442173952040494L; private enum ReadType { - NUMERIC, NOMINAL, DATETIME; + NUMERIC, NOMINAL, DATETIME } /** @@ -63,7 +64,7 @@ private enum ReadType { * Creates a wrapper for a table containing datetime columns. * * @throws BeltConverter.ConversionException - * it the table contains custom columns + * it the table contains non-standard columns */ DatetimeTableWrapper(Table table) { this.table = table; @@ -80,7 +81,6 @@ private enum ReadType { } } - public DatetimeTableWrapper(DatetimeTableWrapper wrapper) { this.table = wrapper.table; this.readTypes = wrapper.readTypes; @@ -104,7 +104,7 @@ public ExampleTable getExampleTable() { @Override public Example getExample(int index) { - MixedRowReader reader = Readers.unbufferedMixedRowReader(table); + MixedRowReader reader = SmallReaders.unbufferedMixedRowReader(table); reader.setPosition(index - 1); reader.move(); return new Example(new FakeRow(reader, readTypes), header); diff --git a/src/main/java/com/rapidminer/belt/table/DoubleTableWrapper.java b/src/main/java/com/rapidminer/belt/table/DoubleTableWrapper.java index 334a1fa..d78aabf 100644 --- a/src/main/java/com/rapidminer/belt/table/DoubleTableWrapper.java +++ b/src/main/java/com/rapidminer/belt/table/DoubleTableWrapper.java @@ -1,5 +1,5 @@ /** - * Copyright (C) 2001-2019 by RapidMiner and the contributors + * Copyright (C) 2001-2020 by RapidMiner and the contributors * * Complete list of developers available at our web site: * @@ -27,6 +27,7 @@ import com.rapidminer.belt.reader.NumericRow; import com.rapidminer.belt.reader.NumericRowReader; import com.rapidminer.belt.reader.Readers; +import com.rapidminer.belt.reader.SmallReaders; import com.rapidminer.example.Attribute; import com.rapidminer.example.AttributeRole; import com.rapidminer.example.Attributes; @@ -38,7 +39,6 @@ import com.rapidminer.example.table.DataRow; import com.rapidminer.example.table.DataRowFactory; import com.rapidminer.example.table.ExampleTable; -import com.rapidminer.tools.LogService; /** @@ -64,7 +64,7 @@ public final class DoubleTableWrapper extends RowwiseStatisticsExampleSet { * Creates a wrapper for a table not containing datetime columns. * * @throws BeltConverter.ConversionException - * it the table contains custom columns + * it the table contains non-standard columns */ DoubleTableWrapper(Table table) { this.table = table; @@ -98,7 +98,7 @@ public ExampleTable getExampleTable() { @Override public Example getExample(int index) { - NumericRowReader reader = Readers.unbufferedNumericRowReader(table); + NumericRowReader reader = SmallReaders.unbufferedNumericRowReader(table); reader.setPosition(index - 1); reader.move(); return new Example(new FakeRow(reader, nominal), header); @@ -185,7 +185,7 @@ private Object writeReplace() throws ObjectStreamException { * the table to convert * @return a header example set * @throws BeltConverter.ConversionException - * it the table contains custom columns + * it the table contains non-standard columns */ static HeaderExampleSet getShiftedHeader(Table table) { Attributes attributes = new SimpleAttributes(); @@ -193,26 +193,17 @@ static HeaderExampleSet getShiftedHeader(Table table) { int i = 0; for (String label : labels) { Column column = table.column(i); - Attribute attribute = AttributeFactory.createAttribute(label, com.rapidminer.belt.table.BeltConverter.getValueType(table, label, i)); + Attribute attribute = AttributeFactory.createAttribute(label, + com.rapidminer.belt.table.BeltConverter.getValueType(table, label, i)); attribute.setTableIndex(i); attributes.add(new AttributeRole(attribute)); if (attribute.isNominal()) { - List mapping = ColumnAccessor.get().getDictionaryList(column.getDictionary(String.class)); + List mapping = ColumnAccessor.get().getDictionaryList(column.getDictionary()); attribute.setMapping(new ShiftedNominalMappingAdapter(mapping)); } - String role = BeltConverter.convertRole(table, label); - if (role != null) { - boolean unusedRole = attributes.findRoleBySpecialName(role) == null; - if (unusedRole) { - // only add the role if it does not exist yet - better lose a role than the whole attribute - attributes.setSpecialAttribute(attribute, role); - } else { - LogService.getRoot().warning(() -> "Second occurence of role '" + role + "' is dropped since roles" + - " in ExampleSets must be unique"); - } - } i++; } + BeltConverter.convertRoles(table, attributes); return new HeaderExampleSet(attributes); } diff --git a/src/main/java/com/rapidminer/belt/table/FromTableConverter.java b/src/main/java/com/rapidminer/belt/table/FromTableConverter.java new file mode 100644 index 0000000..e14c40a --- /dev/null +++ b/src/main/java/com/rapidminer/belt/table/FromTableConverter.java @@ -0,0 +1,498 @@ +/** + * Copyright (C) 2001-2020 by RapidMiner and the contributors + * + * Complete list of developers available at our web site: + * + * http://rapidminer.com + * + * This program is free software: you can redistribute it and/or modify it under the terms of the GNU Affero General + * Public License as published by the Free Software Foundation, either version 3 of the License, or (at your option) any + * later version. + * + * This program is distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY; without even the implied + * warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Affero General Public License for more + * details. + * + * You should have received a copy of the GNU Affero General Public License along with this program. If not, see + * http://www.gnu.org/licenses/. + */ +package com.rapidminer.belt.table; + +import java.time.Instant; +import java.util.ArrayList; +import java.util.Arrays; +import java.util.List; +import java.util.concurrent.Callable; +import java.util.concurrent.ExecutionException; + +import com.rapidminer.adaption.belt.IOTable; +import com.rapidminer.belt.column.Column; +import com.rapidminer.belt.column.Columns; +import com.rapidminer.belt.column.Dictionary; +import com.rapidminer.belt.reader.CategoricalReader; +import com.rapidminer.belt.reader.NumericReader; +import com.rapidminer.belt.reader.ObjectReader; +import com.rapidminer.belt.reader.Readers; +import com.rapidminer.core.concurrency.ConcurrencyContext; +import com.rapidminer.example.Attribute; +import com.rapidminer.example.AttributeRole; +import com.rapidminer.example.Attributes; +import com.rapidminer.example.Example; +import com.rapidminer.example.ExampleSet; +import com.rapidminer.example.SimpleAttributes; +import com.rapidminer.example.set.HeaderExampleSet; +import com.rapidminer.example.table.AttributeFactory; +import com.rapidminer.example.table.BinominalMapping; +import com.rapidminer.example.table.ExampleTable; +import com.rapidminer.example.table.NominalMapping; +import com.rapidminer.example.table.internal.ColumnarExampleTable; +import com.rapidminer.example.utils.ExampleSets; +import com.rapidminer.tools.Ontology; + + +/** + * Converts from belt {@link Table}s to {@link ExampleSet}s. + * + * @author Gisa Meier + * @since 0.7 + */ +enum FromTableConverter { + + ;//No instance enum + + /** + * Message for when non-supported columns types are encountered + */ + private static final String MESSAGE_NON_SUPPORTED = "Type not supported for now"; + + /** + * Extracts a {@link HeaderExampleSet} from a table. This is useful for creating a {@link + * com.rapidminer.example.set.RemappedExampleSet} or specifying training header of a {@link + * com.rapidminer.operator.Model}. + * + * @param table + * the table to extract from + * @return a {@link HeaderExampleSet} where the nominal mappings of the attributes are immutable + * @throws BeltConverter.ConversionException + * if the table cannot be converted because it contains non-standard columns + */ + static HeaderExampleSet convertHeader(Table table) { + Attributes attributes = new SimpleAttributes(); + List labels = table.labels(); + int i = 0; + for (String label : labels) { + Column column = table.column(i); + Attribute attribute = AttributeFactory.createAttribute(label, BeltConverter.getValueType(table, label, i)); + attribute.setTableIndex(i); + attributes.add(new AttributeRole(attribute)); + if (attribute.isNominal()) { + List mapping = ColumnAccessor.get().getDictionaryList(column.getDictionary()); + attribute.setMapping(new NominalMappingAdapter(mapping)); + } + i++; + } + BeltConverter.convertRoles(table, attributes); + HeaderExampleSet set = new HeaderExampleSet(attributes); + BeltConverter.storeBeltMetaDataInExampleSetUserData(table, set); + return set; + } + + /** + * Converts a belt {@link IOTable} into an {@link ExampleSet}. + * + * @param tableObject + * the table object to convert + * @param context + * the context to use for parallel execution + * @return a new example set containing the values of the table + * @throws IllegalArgumentException + * if table or context is null + * @throws BeltConverter.ConversionException + * if the table cannot be converted because it contains non-standard columns + */ + static ExampleSet convert(IOTable tableObject, ConcurrencyContext context) { + if (tableObject == null) { + throw new IllegalArgumentException("Table object must not be null"); + } + if (context == null) { + throw new IllegalArgumentException("Context must not be null"); + } + + Table table = tableObject.getTable(); + List attributes = new ArrayList<>(); + List labels = table.labels(); + int i = 0; + for (String label : labels) { + int valueType = BeltConverter.getValueType(table, label, i); + attributes.add(AttributeFactory.createAttribute(label, valueType)); + i++; + } + + ExampleSet set = ExampleSets.from(attributes).withBlankSize(table.height()).build(); + BeltConverter.storeBeltMetaDataInExampleSetUserData(table, set); + ExampleTable exampleTable = set.getExampleTable(); + if (exampleTable instanceof ColumnarExampleTable) { + ColumnarExampleTable columnTable = (ColumnarExampleTable) exampleTable; + convertParallel(table, attributes, columnTable, context); + } else { + convertSequentially(table, set); + } + + BeltConverter.convertRoles(table, set.getAttributes()); + set.getAnnotations().addAll(tableObject.getAnnotations()); + set.setSource(tableObject.getSource()); + return set; + } + + /** + * Converts a table object into an example set sequentially in case no operator is known. If possible, {@link + * #convert(IOTable, ConcurrencyContext)} should be preferred. + * + * @param tableObject + * the table object to convert + * @return the example set + * @throws BeltConverter.ConversionException + * if the table cannot be converted because it contains non-standard columns + */ + static ExampleSet convertSequentially(IOTable tableObject) { + if (tableObject == null) { + throw new IllegalArgumentException("Table object must not be null"); + } + + Table table = tableObject.getTable(); + List attributes = new ArrayList<>(); + List labels = table.labels(); + int i = 0; + for (String label : labels) { + int valueType = BeltConverter.getValueType(table, label, i); + attributes.add(AttributeFactory.createAttribute(label, valueType)); + i++; + } + + ExampleSet set = ExampleSets.from(attributes).withBlankSize(table.height()).build(); + BeltConverter.storeBeltMetaDataInExampleSetUserData(table, set); + convertSequentially(table, set); + BeltConverter.convertRoles(table, set.getAttributes()); + set.getAnnotations().addAll(tableObject.getAnnotations()); + set.setSource(tableObject.getSource()); + return set; + } + + /** + * Converts the table for the {@link ConvertOnWriteExampleTable}. + * + * @param table + * the underlying belt {@link Table} + * @param attributes + * the attributes for the table + * @return a new {@link ColumnarExampleTable} + */ + static ColumnarExampleTable convert(Table table, Attribute[] attributes) { + List attributeList = Arrays.asList(attributes); + ColumnarExampleTable columnarExampleTable = new ColumnarExampleTable(attributeList); + columnarExampleTable.addBlankRows(table.height()); + columnarExampleTable.setExpectedSize(table.height()); + ExampleSet exampleSet = columnarExampleTable.createExampleSet(); + // replace the same way as it is displayed in the view + table = TableViewCreator.INSTANCE.replaceAdvancedWithErrorMessage(table, x -> TableViewCreator.CANNOT_DISPLAY_MESSAGE); + convertSequentially(table, exampleSet); + columnarExampleTable.complete(); + return columnarExampleTable; + } + + /** + * While studio does not explicitly forbid {@code null} values in dictionaries, some places assume that there are + * none, so we adjust all belt dictionaries with this problem. + * + * @param column + * a nominal column + */ + private static Column removeGapsFromDictionary(Column column) { + return Columns.compactDictionary(column); + } + + /** + * Copies the data from the table into the set sequentially. + */ + private static void convertSequentially(Table table, ExampleSet set) { + int i = 0; + for (Attribute attribute : set.getAttributes()) { + Column column = table.column(i++); + switch (attribute.getValueType()) { + case Ontology.STRING: + case Ontology.FILE_PATH: + case Ontology.NOMINAL: + case Ontology.POLYNOMINAL: + copyToNominal(set, attribute, column); + break; + case Ontology.BINOMINAL: + copyToBinominal(set, attribute, column); + break; + case Ontology.NUMERICAL: + case Ontology.REAL: + case Ontology.INTEGER: + NumericReader reader = Readers.numericReader(column, column.size()); + for (Example example : set) { + example.setValue(attribute, reader.read()); + } + break; + case Ontology.TIME: + case Ontology.DATE_TIME: + case Ontology.DATE: + copyToDateTime(set, attribute, column); + break; + default: + throw new UnsupportedOperationException(MESSAGE_NON_SUPPORTED); + } + } + } + + private static void copyToDateTime(ExampleSet set, Attribute attribute, Column column) { + ObjectReader reader = + Readers.objectReader(column, Instant.class); + for (Example example : set) { + Instant read = reader.read(); + if (read == null) { + example.setValue(attribute, Double.NaN); + } else { + example.setValue(attribute, read.toEpochMilli()); + } + } + } + + private static void copyToNominal(ExampleSet set, Attribute attribute, Column column) { + column = removeGapsFromDictionary(column); + + copyNewToOldMapping(attribute, column); + CategoricalReader reader = Readers.categoricalReader(column); + for (Example example : set) { + int read = reader.read(); + if (read == CategoricalReader.MISSING_CATEGORY) { + example.setValue(attribute, Double.NaN); + } else { + example.setValue(attribute, read - 1d); + } + } + } + + private static void copyToBinominal(ExampleSet set, Attribute attribute, Column column) { + column = removeGapsFromDictionary(column); + + Dictionary dictionary = column.getDictionary(); + List mapping = ColumnAccessor.get().getDictionaryList(dictionary); + if (dictionary.isBoolean()) { + // check if last value is positive + if (dictionary.getPositiveIndex() == 2 || !dictionary.hasPositive()) { + copyNegativePositive(set, attribute, column, dictionary); + } else { + copyPositiveNegative(set, attribute, column, mapping); + } + } else { + copyToNominal(set, attribute, column); + } + } + + /** + * Copy binominals from table to mapping in case the mapping contains first the positive, then the negative value. + */ + private static void copyPositiveNegative(ExampleSet set, Attribute attribute, Column column, List + mapping) { + //the second mapped value is negative, we have to swap indices + int positiveIndex = mapping.size() - 2; + int negativeIndex = mapping.size() - 1; + NominalMapping legacyMapping = attribute.getMapping(); + //the first mapped value is negative + legacyMapping.mapString(mapping.get(negativeIndex)); + legacyMapping.mapString(mapping.get(positiveIndex)); + CategoricalReader reader = Readers.categoricalReader(column); + for (Example example : set) { + int read = reader.read(); + if (read == negativeIndex) { + example.setValue(attribute, BinominalMapping.NEGATIVE_INDEX); + } else if (read == positiveIndex) { + example.setValue(attribute, BinominalMapping.POSITIVE_INDEX); + } else { + example.setValue(attribute, Double.NaN); + } + } + } + + /** + * Copy binominals from table to mapping in case the mapping contains first the negative, then the positive value. + */ + private static void copyNegativePositive(ExampleSet set, Attribute attribute, Column column, Dictionary + mapping) { + NominalMapping legacyMapping = attribute.getMapping(); + //the first mapped value is negative, the order is kept + for (Dictionary.Entry value : mapping) { + legacyMapping.mapString(value.getValue()); + } + CategoricalReader reader = Readers.categoricalReader(column); + for (Example example : set) { + int read = reader.read(); + if (read == CategoricalReader.MISSING_CATEGORY) { + example.setValue(attribute, Double.NaN); + } else { + example.setValue(attribute, read - 1d); + } + } + } + + + /** + * Copies the given table into the given columnTable. Copies each of the given attributes in parallel using the + * given context. + */ + private static void convertParallel(Table table, List attributes, + ColumnarExampleTable columnTable, ConcurrencyContext context) { + List> copier = new ArrayList<>(table.width()); + int i = 0; + for (Attribute attribute : attributes) { + Column column = table.column(i++); + switch (attribute.getValueType()) { + case Ontology.STRING: + case Ontology.FILE_PATH: + case Ontology.NOMINAL: + case Ontology.POLYNOMINAL: + copier.add(() -> + copyNominalColumnToRows(columnTable, attribute, column)); + break; + case Ontology.BINOMINAL: + copier.add(() -> + copyBinominalColumnToRows(columnTable, attribute, column)); + break; + case Ontology.NUMERICAL: + case Ontology.REAL: + case Ontology.INTEGER: + copier.add(() -> { + NumericReader reader = + Readers.numericReader(column); + for (int row = 0; row < columnTable.size(); row++) { + columnTable.getDataRow(row).set(attribute, reader.read()); + } + return null; + }); + break; + case Ontology.TIME: + case Ontology.DATE_TIME: + case Ontology.DATE: + copier.add(() -> copyDateTimeColumnToRows(columnTable, attribute, column)); + break; + default: + throw new UnsupportedOperationException(MESSAGE_NON_SUPPORTED); + } + } + try { + context.call(copier); + } catch (ExecutionException e) { + ToTableConverter.handleExecutionException(e); + } + } + + private static Void copyDateTimeColumnToRows(ColumnarExampleTable columnTable, Attribute attribute, Column + column) { + ObjectReader reader = Readers.objectReader(column, Instant.class); + for (int row = 0; row < columnTable.size(); row++) { + Instant read = reader.read(); + if (read == null) { + columnTable.getDataRow(row).set(attribute, Double.NaN); + } else { + columnTable.getDataRow(row).set(attribute, read.toEpochMilli()); + } + } + return null; + } + + private static Void copyBinominalColumnToRows(ColumnarExampleTable columnTable, Attribute attribute, + Column column) { + column = removeGapsFromDictionary(column); + + Dictionary dictionary = column.getDictionary(); + if (dictionary.isBoolean()) { + List mapping = ColumnAccessor.get().getDictionaryList(dictionary); + // check if last value is positive + if (dictionary.getPositiveIndex() == 2 || !dictionary.hasPositive()) { + copyNegativePositiveToRows(columnTable, attribute, column, dictionary); + } else { + copyPositiveNegativeToRows(columnTable, attribute, column, mapping); + + } + } else { + copyNominalColumnToRows(columnTable, attribute, column); + } + return null; + } + + /** + * Copy binominals from table to mapping in case the mapping contains first the positive, then the negative value. + */ + private static void copyPositiveNegativeToRows(ColumnarExampleTable columnTable, Attribute attribute, Column + column, + List mapping) { + //the second mapped value is negative, we have to swap indices + int positiveIndex = mapping.size() - 2; + int negativeIndex = mapping.size() - 1; + + NominalMapping legacyMapping = attribute.getMapping(); + //the first mapped value is negative + legacyMapping.mapString(mapping.get(negativeIndex)); + legacyMapping.mapString(mapping.get(positiveIndex)); + CategoricalReader reader = Readers.categoricalReader(column); + for (int row = 0; row < columnTable.size(); row++) { + int read = reader.read(); + if (read == negativeIndex) { + columnTable.getDataRow(row).set(attribute, BinominalMapping.NEGATIVE_INDEX); + } else if (read == positiveIndex) { + columnTable.getDataRow(row).set(attribute, BinominalMapping.POSITIVE_INDEX); + } else { + columnTable.getDataRow(row).set(attribute, Double.NaN); + } + } + } + + /** + * Copy binominals from table to mapping in case the mapping contains first the negative, then the positive value. + */ + private static void copyNegativePositiveToRows(ColumnarExampleTable columnTable, Attribute attribute, + Column column, Dictionary mapping) { + //the first mapped value is negative, the order is kept + NominalMapping legacyMapping = attribute.getMapping(); + for (Dictionary.Entry value : mapping) { + legacyMapping.mapString(value.getValue()); + } + CategoricalReader reader = Readers.categoricalReader(column); + for (int row = 0; row < columnTable.size(); row++) { + int read = reader.read(); + if (read == CategoricalReader.MISSING_CATEGORY) { + columnTable.getDataRow(row).set(attribute, Double.NaN); + } else { + columnTable.getDataRow(row).set(attribute, read - 1d); + } + } + } + + private static Void copyNominalColumnToRows(ColumnarExampleTable columnTable, Attribute attribute, Column column) { + column = removeGapsFromDictionary(column); + + copyNewToOldMapping(attribute, column); + CategoricalReader reader = Readers.categoricalReader(column); + for (int row = 0; row < columnTable.size(); row++) { + int read = reader.read(); + if (read == CategoricalReader.MISSING_CATEGORY) { + columnTable.getDataRow(row).set(attribute, Double.NaN); + } else { + columnTable.getDataRow(row).set(attribute, read - 1d); + } + } + return null; + } + + private static void copyNewToOldMapping(Attribute attribute, Column column) { + List mapping = ColumnAccessor.get().getDictionaryList(column.getDictionary()); + NominalMapping legacyMapping = attribute.getMapping(); + for (int j = 1; j < mapping.size(); j++) { + legacyMapping.mapString(mapping.get(j)); + } + } + +} diff --git a/src/main/java/com/rapidminer/belt/table/LegacyRole.java b/src/main/java/com/rapidminer/belt/table/LegacyRole.java index fdbd0a0..816a958 100644 --- a/src/main/java/com/rapidminer/belt/table/LegacyRole.java +++ b/src/main/java/com/rapidminer/belt/table/LegacyRole.java @@ -1,5 +1,5 @@ /** - * Copyright (C) 2001-2019 by RapidMiner and the contributors + * Copyright (C) 2001-2020 by RapidMiner and the contributors * * Complete list of developers available at our web site: * diff --git a/src/main/java/com/rapidminer/belt/table/LegacyType.java b/src/main/java/com/rapidminer/belt/table/LegacyType.java index 4474b11..f30bebe 100644 --- a/src/main/java/com/rapidminer/belt/table/LegacyType.java +++ b/src/main/java/com/rapidminer/belt/table/LegacyType.java @@ -1,5 +1,5 @@ /** - * Copyright (C) 2001-2019 by RapidMiner and the contributors + * Copyright (C) 2001-2020 by RapidMiner and the contributors * * Complete list of developers available at our web site: * @@ -28,7 +28,7 @@ * * @author Gisa Meier */ -enum LegacyType implements ColumnMetaData { +public enum LegacyType implements ColumnMetaData { NOMINAL(Ontology.NOMINAL), NUMERICAL(Ontology.NUMERICAL), diff --git a/src/main/java/com/rapidminer/belt/table/MixedTableAccessor.java b/src/main/java/com/rapidminer/belt/table/MixedTableAccessor.java new file mode 100644 index 0000000..9034732 --- /dev/null +++ b/src/main/java/com/rapidminer/belt/table/MixedTableAccessor.java @@ -0,0 +1,186 @@ +/** + * Copyright (C) 2001-2020 by RapidMiner and the contributors + * + * Complete list of developers available at our web site: + * + * http://rapidminer.com + * + * This program is free software: you can redistribute it and/or modify it under the terms of the + * GNU Affero General Public License as published by the Free Software Foundation, either version 3 + * of the License, or (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY; without + * even the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * Affero General Public License for more details. + * + * You should have received a copy of the GNU Affero General Public License along with this program. + * If not, see http://www.gnu.org/licenses/. + */ +package com.rapidminer.belt.table; + +import java.lang.ref.WeakReference; +import java.time.Instant; +import java.util.ArrayList; +import java.util.List; + +import com.rapidminer.belt.column.Column; +import com.rapidminer.belt.reader.NumericReader; +import com.rapidminer.belt.reader.ObjectReader; +import com.rapidminer.belt.reader.SmallReaders; +import com.rapidminer.example.Attribute; +import com.rapidminer.example.Attributes; +import com.rapidminer.tools.container.Pair; + + +/** + * {@link AbstractTableAccessor} for the case that there are date-time columns, i.e. not everything can be read + * numerically. While reading a whole table would be normally an use case for a {@link + * com.rapidminer.belt.reader.MixedRowReader}, benchmarks have shown that using a row reader has horrible performance + * when the table is read column-wise. Therefore, we use an array of column readers which make column-wise reading much + * faster and row-wise reading only slightly slower. + *

+ * Since the date-time columns require a {@link ObjectReader} while the others must be read with a {@link + * NumericReader}, we first create one list of the date-time columns and one for the others together with a twist-array + * which maps the original column index to the new index in the table where date-time columns come last. + * + * @author Gisa Meier + * @since 0.7 + */ +class MixedTableAccessor extends AbstractTableAccessor { + + /** + * {@link ThreadLocal} with a weak reference that holds the array of numeric readers. The {@link WeakReference} is + * needed since otherwise the readers and thus all columns are kept as long as the thread is alive even if this + * {@link MixedTableAccessor} is already gone. With the weak reference, we might have to recreate the readers in + * case of garbage collection but this is better than the alternative. See also {@link NumericTableAccessor}. + */ + private final ThreadLocal[]>>> readerReference = + new ThreadLocal<>(); + + /** all non-date-time columns in the order in which they appear in the table */ + private final List numericReadableColumns; + + /** all date-time columns in the order in which they appear in the table */ + private final List dateTimeColumns; + + /** + * map from original column index to the new index in a table when first taking the {@link #numericReadableColumns} + * and then the {@link #dateTimeColumns} + */ + private final int[] twist; + + + MixedTableAccessor(Table table, List attributes, int numberOfDateTime) { + super(table, attributes); + twist = new int[table.width()]; + numericReadableColumns = new ArrayList<>(); + dateTimeColumns = new ArrayList<>(); + int normalCounter = 0; + int dateTimeCounter = attributes.size() - numberOfDateTime; + for (int i = 0; i < table.width(); i++) { + Column column = table.column(i); + // take the advanced columns a fake numeric-readable columns -> will get a {@code null} column reader + if (column.type().hasCapability(Column.Capability.NUMERIC_READABLE) || !BeltConverter.STANDARD_TYPES.contains(column.type().id())) { + numericReadableColumns.add(column); + twist[i] = normalCounter++; + } else { + dateTimeColumns.add(column); + twist[i] = dateTimeCounter++; + } + } + } + + @Override + Object getReaders() { + NumericReader[] readers = new NumericReader[numericReadableColumns.size()]; + for (int i = 0; i < readers.length; i++) { + Column column = numericReadableColumns.get(i); + // ignore the advanced columns, will always return {@code 0} for them, see {@link AbstractTableAccessor#getNumericValue} + if (BeltConverter.STANDARD_TYPES.contains(column.type().id())) { + // use a small numeric reader instead of a normal one to lower the memory consumption by multiple readers + readers[i] = SmallReaders.smallNumericReader(column); + } + } + @SuppressWarnings("unchecked") + ObjectReader[] dateReaders = new ObjectReader[dateTimeColumns.size()]; + for (int i = 0; i < dateReaders.length; i++) { + // use a small numeric reader instead of a normal one to lower the memory consumption by multiple readers + dateReaders[i] = SmallReaders.smallObjectReader(dateTimeColumns.get(i), Instant.class); + } + return new Pair<>(readers, dateReaders); + } + + @Override + Object getUnbufferedReaders() { + // if {@link ConvertOnWriteExampleTable#getDataRow} were only called for single rows, the following use of + // threadlocal would not be necessary. But if {@link ConvertOnWriteExampleTable#getDataRow} is used to read a + // whole table, this caching improves the performance + WeakReference[]>> pairWeakReference = readerReference.get(); + if (pairWeakReference != null) { + Pair[]> readerPair = pairWeakReference.get(); + if (readerPair != null) { + return readerPair; + } + } + //no cached readers found, create new ones and cache them + NumericReader[] readers = new NumericReader[numericReadableColumns.size()]; + for (int i = 0; i < readers.length; i++) { + Column column = numericReadableColumns.get(i); + if (BeltConverter.STANDARD_TYPES.contains(column.type().id())) { + readers[i] = SmallReaders.unbufferedNumericReader(column); + } + } + @SuppressWarnings("unchecked") + ObjectReader[] dateReaders = new ObjectReader[dateTimeColumns.size()]; + for (int i = 0; i < dateReaders.length; i++) { + dateReaders[i] = SmallReaders.unbufferedObjectReader(dateTimeColumns.get(i), Instant.class); + } + Pair[]> readerPair = new Pair<>(readers, dateReaders); + readerReference.set(new WeakReference<>(readerPair)); + return readerPair; + } + + + @Override + double get(int row, int columnIndex, Object beltReader) { + int indexInTwisted = twist[columnIndex]; + if (indexInTwisted < numericReadableColumns.size()) { + @SuppressWarnings("unchecked") + NumericReader[] firstReaders = ((Pair[]>) beltReader).getFirst(); + return getNumericValue(row, columnIndex, firstReaders[indexInTwisted]); + } else { + @SuppressWarnings("unchecked") + ObjectReader[] secondReaders = + ((Pair[]>) beltReader).getSecond(); + return getDateTime(row, indexInTwisted, secondReaders); + } + } + + @Override + public AbstractTableAccessor columnCleanupClone(Attributes attributes) { + Table newTable = columnCleanup(attributes); + // need to count remaining date-time columns to use constructor + int dateTimeCount = 0; + for (Column column : newTable.getColumns()) { + if (column.type().id() == Column.TypeId.DATE_TIME) { + dateTimeCount++; + } + } + return new MixedTableAccessor(newTable, this.attributes, dateTimeCount); + } + + /** + * Get the date-time value at the given position by extracting the epoch millis. + */ + private double getDateTime(int rowIndex, int twistedColumnIndex, ObjectReader[] readers) { + //calculate index in object reader array + int readerIndex = twistedColumnIndex - numericReadableColumns.size(); + ObjectReader reader = readers[readerIndex]; + // set the position only if not already at the right position + if (reader.position() != rowIndex - 1) { + reader.setPosition(rowIndex - 1); + } + Instant instant = reader.read(); + return instant == null ? Double.NaN : instant.toEpochMilli(); + } +} \ No newline at end of file diff --git a/src/main/java/com/rapidminer/belt/table/NominalMappingAdapter.java b/src/main/java/com/rapidminer/belt/table/NominalMappingAdapter.java index b9a6db0..73a294a 100644 --- a/src/main/java/com/rapidminer/belt/table/NominalMappingAdapter.java +++ b/src/main/java/com/rapidminer/belt/table/NominalMappingAdapter.java @@ -1,5 +1,5 @@ /** - * Copyright (C) 2001-2019 by RapidMiner and the contributors + * Copyright (C) 2001-2020 by RapidMiner and the contributors * * Complete list of developers available at our web site: * diff --git a/src/main/java/com/rapidminer/belt/table/NumericTableAccessor.java b/src/main/java/com/rapidminer/belt/table/NumericTableAccessor.java new file mode 100644 index 0000000..84f38d3 --- /dev/null +++ b/src/main/java/com/rapidminer/belt/table/NumericTableAccessor.java @@ -0,0 +1,107 @@ +/** + * Copyright (C) 2001-2020 by RapidMiner and the contributors + * + * Complete list of developers available at our web site: + * + * http://rapidminer.com + * + * This program is free software: you can redistribute it and/or modify it under the terms of the + * GNU Affero General Public License as published by the Free Software Foundation, either version 3 + * of the License, or (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY; without + * even the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * Affero General Public License for more details. + * + * You should have received a copy of the GNU Affero General Public License along with this program. + * If not, see http://www.gnu.org/licenses/. + */ +package com.rapidminer.belt.table; + +import java.lang.ref.WeakReference; +import java.util.List; + +import com.rapidminer.belt.column.Column; +import com.rapidminer.belt.reader.NumericReader; +import com.rapidminer.belt.reader.SmallReaders; +import com.rapidminer.example.Attribute; +import com.rapidminer.example.Attributes; + + +/** + * {@link AbstractTableAccessor} for the case that there are no date-time columns, i.e. everything can be read + * numerically. While reading a whole table numerically would be normally an use case for a {@link + * com.rapidminer.belt.reader.NumericRowReader}, benchmarks have shown that using a row reader has horrible performance + * when the table is read column-wise. Therefore, we use an array of column readers which make column-wise reading much + * faster and row-wise reading only slightly slower. + * + * @author Gisa Meier + * @since 0.7 + */ +class NumericTableAccessor extends AbstractTableAccessor { + + /** + * {@link ThreadLocal} with a weak reference that holds the array of numeric readers. The {@link WeakReference} is + * needed since otherwise the readers and thus all columns are kept as long as the thread is alive even if this + * {@link NumericTableAccessor} is already gone. With the weak reference, we might have to recreate the readers in + * case of garbage collection but this is better than the alternative. See also + * https://dzone.com/articles/painless-introduction-javas-threadlocal-storage + */ + private final ThreadLocal> readersReference = new ThreadLocal<>(); + + + NumericTableAccessor(Table table, List attributes) { + super(table, attributes); + } + + @Override + Object getReaders() { + NumericReader[] readers = new NumericReader[table.width()]; + for (int i = 0; i < readers.length; i++) { + Column column = table.column(i); + // ignore the advanced columns, will always return {@code 0} for them, see {@link AbstractTableAccessor#getNumericValue} + if (BeltConverter.STANDARD_TYPES.contains(column.type().id())) { + // use a small numeric reader instead of a normal one to lower the memory consumption by multiple readers + readers[i] = SmallReaders.smallNumericReader(column); + } + } + return readers; + } + + @Override + Object getUnbufferedReaders() { + // if {@link ConvertOnWriteExampleTable#getDataRow} were only called for single rows, the following use of + // threadlocal would not be necessary. But if {@link ConvertOnWriteExampleTable#getDataRow} is used to read a + // whole table, this caching improves the performance + WeakReference numericReadersWeakReference = readersReference.get(); + if (numericReadersWeakReference != null) { + NumericReader[] numericReaders = numericReadersWeakReference.get(); + if (numericReaders != null) { + return numericReaders; + } + } + //no cached readers found, create new ones and cache them + NumericReader[] readers = new NumericReader[table.width()]; + for (int i = 0; i < readers.length; i++) { + Column column = table.column(i); + if (BeltConverter.STANDARD_TYPES.contains(column.type().id())) { + readers[i] = SmallReaders.unbufferedNumericReader(column); + } + } + readersReference.set(new WeakReference<>(readers)); + return readers; + } + + + @Override + double get(int rowIndex, int columnIndex, Object readerObject) { + NumericReader[] readers = (NumericReader[]) readerObject; + return getNumericValue(rowIndex, columnIndex, readers[columnIndex]); + } + + @Override + public AbstractTableAccessor columnCleanupClone(Attributes attributes) { + return new NumericTableAccessor(columnCleanup(attributes), this.attributes); + } + +} \ No newline at end of file diff --git a/src/main/java/com/rapidminer/belt/table/RowwiseStatisticsExampleSet.java b/src/main/java/com/rapidminer/belt/table/RowwiseStatisticsExampleSet.java index 75c65d9..8038927 100644 --- a/src/main/java/com/rapidminer/belt/table/RowwiseStatisticsExampleSet.java +++ b/src/main/java/com/rapidminer/belt/table/RowwiseStatisticsExampleSet.java @@ -1,5 +1,5 @@ /** - * Copyright (C) 2001-2019 by RapidMiner and the contributors + * Copyright (C) 2001-2020 by RapidMiner and the contributors * * Complete list of developers available at our web site: * diff --git a/src/main/java/com/rapidminer/belt/table/ShiftedNominalMappingAdapter.java b/src/main/java/com/rapidminer/belt/table/ShiftedNominalMappingAdapter.java index 8b0938d..e677521 100644 --- a/src/main/java/com/rapidminer/belt/table/ShiftedNominalMappingAdapter.java +++ b/src/main/java/com/rapidminer/belt/table/ShiftedNominalMappingAdapter.java @@ -1,5 +1,5 @@ /** - * Copyright (C) 2001-2019 by RapidMiner and the contributors + * Copyright (C) 2001-2020 by RapidMiner and the contributors * * Complete list of developers available at our web site: * diff --git a/src/main/java/com/rapidminer/belt/table/TableViewCreator.java b/src/main/java/com/rapidminer/belt/table/TableViewCreator.java index 6f5b305..b2a19e4 100644 --- a/src/main/java/com/rapidminer/belt/table/TableViewCreator.java +++ b/src/main/java/com/rapidminer/belt/table/TableViewCreator.java @@ -1,5 +1,5 @@ /** - * Copyright (C) 2001-2019 by RapidMiner and the contributors + * Copyright (C) 2001-2020 by RapidMiner and the contributors * * Complete list of developers available at our web site: * @@ -18,18 +18,41 @@ */ package com.rapidminer.belt.table; +import static com.rapidminer.belt.table.BeltConverter.ConversionException; +import static com.rapidminer.belt.table.BeltConverter.STANDARD_TYPES; +import static com.rapidminer.belt.table.BeltConverter.convertRoles; +import static com.rapidminer.belt.table.BeltConverter.getValueType; +import static com.rapidminer.belt.table.BeltConverter.storeBeltMetaDataInExampleSetUserData; + +import java.util.ArrayList; import java.util.Arrays; +import java.util.List; import java.util.Objects; +import java.util.function.Function; +import com.rapidminer.adaption.belt.IOTable; +import com.rapidminer.belt.buffer.Buffers; +import com.rapidminer.belt.buffer.NominalBuffer; +import com.rapidminer.belt.column.CategoricalColumn; import com.rapidminer.belt.column.Column; -import com.rapidminer.belt.column.ColumnTypes; +import com.rapidminer.belt.column.ColumnType; import com.rapidminer.belt.column.Columns; import com.rapidminer.belt.column.Dictionary; +import com.rapidminer.example.Attribute; +import com.rapidminer.example.AttributeRole; +import com.rapidminer.example.Attributes; import com.rapidminer.example.ExampleSet; +import com.rapidminer.example.SimpleAttributes; +import com.rapidminer.example.table.AttributeFactory; +import com.rapidminer.example.table.BinominalAttribute; +import com.rapidminer.example.table.BinominalMapping; +import com.rapidminer.example.table.NominalMapping; +import com.rapidminer.tools.Ontology; /** - * Creates a view of a {@link Table} that can be used for visualization purposes and reading as an {@link ExampleSet}. + * Creates a view of a {@link Table} that can be used for visualization purposes and reading as an {@link ExampleSet} + * or creates a view that does a conversion on the fly if necessary. * * Please note that this class is not part of any public API and might be modified or removed in future releases without * prior warning. @@ -40,6 +63,16 @@ public enum TableViewCreator{ INSTANCE; + /** + * Message for constant replacement of advanced columns in {@link ConvertOnWriteExampleTable} + */ + static final String CANNOT_DISPLAY_MESSAGE = "Cannot display advanced column"; + + /** + * Constant mapping with only an error message entry + */ + private static final NominalMapping CANNOT_DISPLAY = new ShiftedNominalMappingAdapter(Arrays.asList(null, CANNOT_DISPLAY_MESSAGE)); + /** * Wraps the {@link Table} into an {@link ExampleSet} in order to visualize it. * @@ -48,13 +81,13 @@ public enum TableViewCreator{ * @return a view example set * @throws NullPointerException * if table is {@code null} - * @throws BeltConverter.ConversionException - * if the table cannot be converted because it contains custom columns + * @throws ConversionException + * if the table cannot be converted because it contains advanced columns */ public ExampleSet createView(Table table) { Objects.requireNonNull(table, "table must not be null"); - table = removeDictionaryGaps(table); + table = adjustDictionaries(table); for (int i = 0; i < table.width(); i++) { if (table.column(i).type().id() == Column.TypeId.DATE_TIME) { @@ -65,20 +98,95 @@ public ExampleSet createView(Table table) { } /** - * Creates a new table where custom columns are replaced with nominal columns that are constant one error value. + * Wraps the {@link Table} of the {@link IOTable} into an {@link ExampleSet} so that adding additional attributes + * works without conversion. + * + * @param ioTable + * the table to view as an {@link ExampleSet} + * @param throwOnAdvanced + * whether to throw an exception in case of advanced columns. If this is {@code false} the advanced column is + * viewed as a nominal column with a constant error message and it is recovered on the conversion back to {@link + * IOTable} + * @return a view of the ioTable that only does a conversion on a write operation into existing table data + * @throws ConversionException + * if the table contains advanced columns and thrownOnAdvanced is {@code true} + * @since 0.7 + */ + public ExampleSet convertOnWriteView(IOTable ioTable, boolean throwOnAdvanced) { + Table table = ioTable.getTable(); + table = TableViewCreator.INSTANCE.adjustDictionaries(table); + Attributes attributes = new SimpleAttributes(); + List attributeList = new ArrayList<>(); + List labels = table.labels(); + int numberOfDatetime = 0; + int i = 0; + for (String label : labels) { + Column column = table.column(i); + Attribute attribute; + if (STANDARD_TYPES.contains(column.type().id())) { + attribute = AttributeFactory.createAttribute(label, + getValueType(table, label, i)); + if (attribute.isNominal()) { + setMapping(column, attribute); + } else if (attribute.isDateTime()) { + numberOfDatetime++; + } + } else { + if (throwOnAdvanced) { + throw new ConversionException(label, column.type()); + } else { + attribute = AttributeFactory.createAttribute(label, Ontology.POLYNOMINAL); + attribute.setMapping(CANNOT_DISPLAY); + } + } + attribute.setTableIndex(i); + attributes.add(new AttributeRole(attribute)); + attributeList.add(attribute); + + i++; + } + convertRoles(table, attributes); + ExampleSet set = new ConvertOnWriteExampleTable(table, attributeList, numberOfDatetime).createExampleSet(); + adjustAttributes(attributes, attributeList, set); + set.getAnnotations().addAll(ioTable.getAnnotations()); + set.setSource(ioTable.getSource()); + storeBeltMetaDataInExampleSetUserData(table, set); + return set; + } + + + /** + * Creates a new table where advanced columns are replaced with nominal columns that are constant one error value. + * + * @param table + * the table to adjust + * @return a table without any advanced columns + */ + public Table replacedAdvancedWithError(Table table) { + return replaceAdvancedWithErrorMessage(table, oldColumn -> "Error:" + + " Cannot display advanced column of " + oldColumn.type()); + } + + /** + * Creates a new table where advanced columns are replaced with nominal columns that are constant one error value. * * @param table * the table to adjust - * @return a table without any custom columns + * @param errorMessage + * the error message to use + * @return a table without any advanced columns */ - public Table replacedCustomsWithError(Table table) { + Table replaceAdvancedWithErrorMessage(Table table, Function errorMessage) { + if (table.width() == 0) { + return table; + } Column[] columns = table.getColumns(); Column[] newColumns = Arrays.copyOf(columns, columns.length); for (int i = 0; i < columns.length; i++) { Column oldColumn = columns[i]; - if (oldColumn.type().id() == Column.TypeId.CUSTOM) { - Column newColumn = ColumnAccessor.get().newSingleValueCategoricalColumn(ColumnTypes.NOMINAL, "Error:" + - " Cannot display custom column of type " + oldColumn.type().customTypeID(), oldColumn.size()); + if (!STANDARD_TYPES.contains(oldColumn.type().id())) { + Column newColumn = ColumnAccessor.get().newSingleValueCategoricalColumn(ColumnType.NOMINAL, + errorMessage.apply(oldColumn), oldColumn.size()); newColumns[i] = newColumn; } } @@ -87,19 +195,34 @@ public Table replacedCustomsWithError(Table table) { /** - * Replaces categorical columns with gap containing dictionaries with remapped ones. + * Replaces categorical columns with gap containing dictionaries with remapped ones and remapps columns with + * boolean dictionaries that have not the negative index as first index. + * + * Package private for tests. */ - private Table removeDictionaryGaps(Table table) { + Table adjustDictionaries(Table table) { Column[] newColumns = null; int index = 0; for (Column column : table.getColumns()) { if (column.type().id() == Column.TypeId.NOMINAL) { - Dictionary dict = column.getDictionary(String.class); + Dictionary dict = column.getDictionary(); if (dict.size() != dict.maximalIndex()) { if (newColumns == null) { newColumns = Arrays.copyOf(table.getColumns(), table.width()); } newColumns[index] = Columns.compactDictionary(column); + dict = newColumns[index].getDictionary(); + + } + if (dict.isBoolean() && dict.getNegativeIndex() != 1 && dict.size() > 0) { + //binominal attributes need to have the first index as negative + + CategoricalColumn rightDictionaryColumn = getColumnWithAdjustedDictionary(dict); + + if (newColumns == null) { + newColumns = Arrays.copyOf(table.getColumns(), table.width()); + } + newColumns[index] = Columns.changeDictionary(column, rightDictionaryColumn); } } index++; @@ -111,4 +234,60 @@ private Table removeDictionaryGaps(Table table) { } } + /** + * Creates a column with a dictionary that has the negative value of the boolean dictionary first. + */ + private CategoricalColumn getColumnWithAdjustedDictionary(Dictionary dict) { + //This a bit of a hack that uses implementation details of the categorical buffer + NominalBuffer rightDictionaryBuffer = Buffers.nominalBuffer(2, 2); + String negativeValue = dict.get(dict.getNegativeIndex()); + if (negativeValue == null) { + negativeValue = "false"; + } + rightDictionaryBuffer.set(0, negativeValue); + String positiveValue = dict.get(dict.getPositiveIndex()); + rightDictionaryBuffer.set(1, positiveValue); + return rightDictionaryBuffer.toBooleanColumn(positiveValue); + } + + + /** + * Converts the dictionary of the column to a nominal mapping and sets it for the attribute. + */ + private void setMapping(Column column, Attribute attribute) { + List mapping = ColumnAccessor.get().getDictionaryList(column.getDictionary()); + if (attribute instanceof BinominalAttribute) { + BinominalMapping binMapping = new BinominalMapping(); + if (mapping.size() > 1) { + binMapping.mapString(mapping.get(1)); + } + if (mapping.size() > 2) { + binMapping.mapString(mapping.get(2)); + } + attribute.setMapping(binMapping); + } else { + attribute.setMapping(new ShiftedNominalMappingAdapter(mapping)); + } + } + + /** + * in order to keep the order of the attributes and not have specials at the end we add them again in the order of + * the attributeList. + */ + private void adjustAttributes(Attributes attributes, List attributeList, ExampleSet set) { + Attributes orderedAttributes = set.getAttributes(); + orderedAttributes.clearRegular(); + orderedAttributes.clearSpecial(); + for (Attribute attribute : attributeList) { + AttributeRole role = attributes.getRole(attribute); + if (!role.isSpecial()) { + orderedAttributes.addRegular(attribute); + } else { + AttributeRole attributeRole = new AttributeRole(attribute); + attributeRole.setSpecial(role.getSpecialName()); + orderedAttributes.add(attributeRole); + } + } + } + } \ No newline at end of file diff --git a/src/main/java/com/rapidminer/belt/table/ToTableConverter.java b/src/main/java/com/rapidminer/belt/table/ToTableConverter.java new file mode 100644 index 0000000..ea9726b --- /dev/null +++ b/src/main/java/com/rapidminer/belt/table/ToTableConverter.java @@ -0,0 +1,1176 @@ +/** + * Copyright (C) 2001-2020 by RapidMiner and the contributors + * + * Complete list of developers available at our web site: + * + * http://rapidminer.com + * + * This program is free software: you can redistribute it and/or modify it under the terms of the GNU Affero General + * Public License as published by the Free Software Foundation, either version 3 of the License, or (at your option) any + * later version. + * + * This program is distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY; without even the implied + * warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Affero General Public License for more + * details. + * + * You should have received a copy of the GNU Affero General Public License along with this program. If not, see + * http://www.gnu.org/licenses/. + */ +package com.rapidminer.belt.table; + +import static com.rapidminer.belt.table.BeltConverter.CONFIDENCE_PREFIX; + +import java.util.ArrayList; +import java.util.Arrays; +import java.util.HashMap; +import java.util.HashSet; +import java.util.Iterator; +import java.util.List; +import java.util.Map; +import java.util.Set; +import java.util.concurrent.Callable; +import java.util.concurrent.ExecutionException; + +import com.rapidminer.adaption.belt.ContextAdapter; +import com.rapidminer.adaption.belt.IOTable; +import com.rapidminer.belt.buffer.Buffers; +import com.rapidminer.belt.buffer.NominalBuffer; +import com.rapidminer.belt.buffer.DateTimeBuffer; +import com.rapidminer.belt.buffer.NumericBuffer; +import com.rapidminer.belt.column.BooleanDictionary; +import com.rapidminer.belt.column.CategoricalColumn; +import com.rapidminer.belt.column.Column; +import com.rapidminer.belt.column.ColumnType; +import com.rapidminer.belt.util.ColumnMetaData; +import com.rapidminer.belt.util.ColumnReference; +import com.rapidminer.belt.util.ColumnRole; +import com.rapidminer.belt.util.IntegerFormats; +import com.rapidminer.belt.util.IntegerFormats.Format; +import com.rapidminer.belt.util.IntegerFormats.PackedIntegers; +import com.rapidminer.core.concurrency.ConcurrencyContext; +import com.rapidminer.example.Attribute; +import com.rapidminer.example.AttributeRole; +import com.rapidminer.example.Attributes; +import com.rapidminer.example.Example; +import com.rapidminer.example.ExampleSet; +import com.rapidminer.example.SimpleAttributes; +import com.rapidminer.example.set.AbstractExampleSet; +import com.rapidminer.example.set.MappingBasedExampleSet; +import com.rapidminer.example.set.SimpleExampleSet; +import com.rapidminer.example.table.BinominalAttribute; +import com.rapidminer.example.table.DateAttribute; +import com.rapidminer.example.table.ExampleTable; +import com.rapidminer.example.table.NominalMapping; +import com.rapidminer.example.table.NumericalAttribute; +import com.rapidminer.example.table.PolynominalAttribute; +import com.rapidminer.example.table.internal.ColumnarExampleTable; +import com.rapidminer.tools.Ontology; + + +/** + * Converts between from {@link ExampleSet}s to belt {@link Table}s. + * + * @author Gisa Meier + * @since 0.7 + */ +enum ToTableConverter { + ; //No instance enum + + /** + * Message for when unknown ontologies + */ + private static final String MESSAGE_UNKNOWN_TYPE = "Unknown attribute type"; + + /** + * Set of primitive attribute types that are known to be thread safe for read accesses. + */ + private static final Set> SAFE_ATTRIBUTES = new HashSet<>(5); + + /** + * Number of milli-seconds in a second + */ + private static final long MILLISECONDS_PER_SECOND = 1_000; + + /** + * Number of nano-seconds in a milli-second + */ + private static final long NANOS_PER_MILLI_SECOND = 1_000_000; + + /** + * The length of the {@link BeltConverter#CONFIDENCE_PREFIX} + */ + private static final int CONFIDENCE_PREFIX_LENGTH = CONFIDENCE_PREFIX.length(); + + static { + SAFE_ATTRIBUTES.add(DateAttribute.class); + SAFE_ATTRIBUTES.add(BinominalAttribute.class); + SAFE_ATTRIBUTES.add(PolynominalAttribute.class); + SAFE_ATTRIBUTES.add(NumericalAttribute.class); + } + + /** + * Creates a belt {@link IOTable} from the given {@link ExampleSet}. This is done in parallel if the exampleSet is + * threadsafe. + * + * @param exampleSet + * the exampleSet to convert + * @param context + * the concurrency context to use for the conversion + * @return a belt table + */ + static IOTable convert(ExampleSet exampleSet, ConcurrencyContext context) { + if (exampleSet == null) { + throw new IllegalArgumentException("Example set must not be null"); + } + if (context == null) { + throw new IllegalArgumentException("Context must not be null"); + } + + // handle the special case where there are no columns, but a height + if (exampleSet.getAttributes().allSize() == 0 && exampleSet.size() > 0) { + return new IOTable(new Table(exampleSet.size())); + } + + // check if the example set is a wrapped belt table with a simple or stacked mapped views on top + if (getExampleTable(exampleSet) instanceof ConvertOnWriteExampleTable) { + if (exampleSet instanceof SimpleExampleSet) { + return convertWrapped(exampleSet, (SimpleExampleSet) exampleSet, context); + } + + if (exampleSet instanceof MappingBasedExampleSet) { + ExampleSet testSet = getHighestParent(exampleSet); + if (testSet instanceof SimpleExampleSet) { + return convertWrapped(exampleSet, (SimpleExampleSet) testSet, context); + } + } + //cannot reuse underlying columns, use default conversion + } + return defaultConvert(exampleSet, context); + + } + + /** + * Iteratively get the parent if the current is mapping based and the next mapping based or simple. + */ + private static ExampleSet getHighestParent(ExampleSet exampleSet) { + ExampleSet testSet = exampleSet; + while (testSet instanceof MappingBasedExampleSet && ((MappingBasedExampleSet) testSet).isParentSimpleOrMapped()) { + testSet = ((MappingBasedExampleSet) testSet).getParentClone(); + } + return testSet; + } + + /** + * Checks if conversion can be done directly on the underlying {@link ExampleTable} or in parallel on the {@link + * ExampleSet} or must be done sequentially. Then does the conversion. + */ + private static IOTable defaultConvert(ExampleSet exampleSet, ConcurrencyContext context) { + // check example set implementation + boolean threadSafe = exampleSet instanceof AbstractExampleSet + && ((AbstractExampleSet) exampleSet).isThreadSafeView(); + + // check example table implementation + if (threadSafe) { + ExampleTable table = getExampleTable(exampleSet); + threadSafe = table instanceof ColumnarExampleTable; + } + + threadSafe = areAttributesThreadsafe(exampleSet, threadSafe); + + Table table = doFittingConversion(exampleSet, threadSafe, context); + return createIOTable(exampleSet, table); + } + + /** + * Does the conversion directly on the underlying {@link ExampleTable} or in parallel on the {@link ExampleSet} or + * sequentially depending on the threadSafe parameter. + */ + private static Table doFittingConversion(ExampleSet exampleSet, boolean threadSafe, ConcurrencyContext context) { + Table table; + if (threadSafe) { + // we can safely read from the input example using multiple threads + boolean simpleView = exampleSet.getClass() == SimpleExampleSet.class; + if (simpleView) { + // we can ignore the view and read directly from the underlying example table + table = exampleTableConvert(exampleSet, context); + } else { + table = parallelConvert(exampleSet, context); + } + } else { + table = sequentialConvert(exampleSet, context); + } + return table; + } + + /** + * Creates a new {@link IOTable} for the given table with the annotations and source from the {@link ExampleSet}. + */ + private static IOTable createIOTable(ExampleSet exampleSet, Table table) { + IOTable tableObject = new IOTable(table); + tableObject.getAnnotations().addAll(exampleSet.getAnnotations()); + tableObject.setSource(exampleSet.getSource()); + return tableObject; + } + + /** + * Checks if the attributes of the example set for thread safety (if the threadSafe parameter is not {@code false} anyway). + */ + private static boolean areAttributesThreadsafe(ExampleSet exampleSet, boolean threadSafe) { + // check attribute implementation + if (threadSafe) { + threadSafe = areAttributesSimple(exampleSet); + } + + // check individual attributes and attribute transformations + if (threadSafe) { + Iterator attributes = exampleSet.getAttributes().allAttributes(); + while (attributes.hasNext()) { + Attribute attribute = attributes.next(); + if (attributeNotSafe(attribute)) { + return false; + } + } + } + return threadSafe; + } + + /** + * Checks whether the attribute is in the list of safe attributes and does not contain transformations. + */ + private static boolean attributeNotSafe(Attribute attribute) { + return !SAFE_ATTRIBUTES.contains(attribute.getClass()) || attribute.getLastTransformation() != null; + } + + /** + * Checks whether the attributes of the exampleSet are {@link SimpleAttributes}. + */ + private static boolean areAttributesSimple(ExampleSet exampleSet) { + Attributes attributes = exampleSet.getAttributes(); + return attributes.getClass() == SimpleAttributes.class; + } + + /** + * Converts the simpleOrMappingBased example set to an {@link IOTable}, reusing columns of the underlying belt + * {@link Table} whenever possible. + * + * @param simpleOrMappingBased + * the example set to convert that is either simple or stacked mapping based with a {@link + * ConvertOnWriteExampleTable} underlying + * @param simpleParent + * the simple parent of the simpleOrMappingBased + * @param context + * the context to use + * @return the converted table + */ + private static IOTable convertWrapped(ExampleSet simpleOrMappingBased, SimpleExampleSet simpleParent, + ConcurrencyContext context) { + ConvertOnWriteExampleTable exampleTable = (ConvertOnWriteExampleTable) simpleOrMappingBased.getExampleTable(); + ColumnarExampleTable newColumns = exampleTable.getNewColumns(); + Table table = exampleTable.getTable(); + boolean simpleAttributes = areAttributesSimple(simpleOrMappingBased); + if (table == null || !simpleAttributes) { + // cannot reuse wrapped table, use normal conversion + Table newTable = doFittingConversion(simpleOrMappingBased, areAttributesThreadsafe(simpleOrMappingBased, simpleAttributes), context); + return createIOTable(simpleOrMappingBased, newTable); + } + + Table newTable = convertWithReuse(simpleOrMappingBased, simpleParent, newColumns, table, context); + int[] mapping = null; + ExampleSet testSet = simpleOrMappingBased; + // go through all parents and collapse the mappings until reaching the simple example set + while (testSet instanceof MappingBasedExampleSet && ((MappingBasedExampleSet) testSet).isParentSimpleOrMapped()) { + if (mapping == null) { + mapping = ((MappingBasedExampleSet) testSet).getMappingCopy(); + } else { + mapping = collapseMappings(mapping, ((MappingBasedExampleSet) testSet).getMappingCopy()); + } + testSet = ((MappingBasedExampleSet) testSet).getParentClone(); + } + if (mapping != null) { + newTable = newTable.map(mapping, true); + } + return createIOTable(simpleOrMappingBased, newTable); + } + + /** + * Converts to a new belt table reusing columns from the given table if possible. + * + * @param attributeExampleSet + * the example set that determines the attributes + * @param simpleExampleSet + * the underlying simple example set, can be the same as the one above + * @param additionalColumns + * the additionally added columns, can be {@code null} + * @param table + * the belt table for the first columns + * @param context + * the context to use for copying in parallel + * @return the converted table + */ + private static Table convertWithReuse(ExampleSet attributeExampleSet, SimpleExampleSet simpleExampleSet, + ColumnarExampleTable additionalColumns, Table table, ConcurrencyContext context) { + int width = attributeExampleSet.getAttributes().allSize(); + String[] labels = new String[width]; + Column[] columns = new Column[width]; + Map> metaData = new HashMap<>(); + Attribute prediction = attributeExampleSet.getAttributes().getPredictedLabel(); + + List> columnCallables = new ArrayList<>(); + int newIndex = 0; + for (Iterator attributeRoleIterator = attributeExampleSet.getAttributes().allAttributeRoles(); attributeRoleIterator.hasNext(); ) { + AttributeRole next = attributeRoleIterator.next(); + Attribute attribute = next.getAttribute(); + int index = attribute.getTableIndex(); + + if (attributeNotSafe(attribute)) { + // must copy column sequentially + createDataAndStoreType(simpleExampleSet, simpleExampleSet.size(), columns, newIndex, attribute, metaData); + } else { + if (index < table.width()) { + // column is part of the old table, reuse it + reuseColumn(table, columns, attribute, newIndex, metaData, index); + } else { + // column is part of the newColumns table, create callable to copy it + Attribute shiftedAttribute = (Attribute) attribute.clone(); + shiftedAttribute.setTableIndex(shiftedAttribute.getTableIndex() - table.width()); + columnCallables.add(createTableCallablesAndStoreType(table.height(), metaData, additionalColumns, + shiftedAttribute, columns, newIndex)); + } + } + + labels[newIndex] = attribute.getName(); + if (next.isSpecial()) { + storeRole(next, attribute, metaData, prediction); + } + newIndex++; + } + + // if there are column callebles to copy from the newColumns table, execute them in parallel + if (!columnCallables.isEmpty()) { + try { + context.call(columnCallables); + } catch (ExecutionException e) { + handleExecutionException(e); + } + } + + restoreBeltMetaDataFromExampleSetUserData(attributeExampleSet, metaData, new HashSet<>(Arrays.asList(labels))); + return new Table(columns, labels, metaData); + } + + /** + * Copies the column at position table index from the table and stores it at position index in the columns array. + * Stores the type if necessary in the meta data. + */ + private static void reuseColumn(Table table, Column[] columns, Attribute attribute, int newIndex, + Map> metaData, int tableIndex) { + Column column = table.column(tableIndex); + if (integerChangedToReal(attribute, column)) { + columns[newIndex] = Buffers.realBuffer(column).toColumn(); + } else { + columns[newIndex] = column; + } + storeType(metaData, attribute); + } + + /** + * Creates a callable that creates a column and writes it into the index-position of the column array. Also stores + * the ontology, if necessary. + */ + private static Callable createTableCallablesAndStoreType(int size, Map> meta, + ExampleTable table, Attribute attribute, Column[] columns, + int index) { + switch (attribute.getValueType()) { + case Ontology.NUMERICAL: + storeOntology(meta, attribute); + return () -> { + columns[index] = getRealColumn(size, table, attribute); + return null; + }; + case Ontology.REAL: + return () -> { + columns[index] = getRealColumn(size, table, attribute); + return null; + }; + case Ontology.INTEGER: + return () -> { + columns[index] = getIntegerColumn(size, table, attribute); + return null; + }; + case Ontology.BINOMINAL: + storeOntology(meta, attribute); + return () -> { + columns[index] = getBinominalColumn(table, size, attribute); + return null; + }; + case Ontology.POLYNOMINAL: + return () -> { + columns[index] = getNominalColumn(table, size, attribute); + return null; + }; + case Ontology.NOMINAL: + case Ontology.STRING: + case Ontology.FILE_PATH: + storeOntology(meta, attribute); + return () -> { + columns[index] = getNominalColumn(table, size, attribute); + return null; + }; + case Ontology.DATE: + storeOntology(meta, attribute); + return () -> { + columns[index] = getSecondDateColumn(size, table, attribute); + return null; + }; + case Ontology.DATE_TIME: + return () -> { + columns[index] = getNanosecondDateColumn(size, table, attribute); + return null; + }; + case Ontology.TIME: + storeOntology(meta, attribute); + return () -> { + columns[index] = getNanosecondDateColumn(size, table, attribute); + return null; + }; + default: + throw new UnsupportedOperationException(MESSAGE_UNKNOWN_TYPE); + } + } + + + /** + * Stores the ontologies for which it is necessary, same as in {@link #createTableCallablesAndStoreType(int, Map, + * ExampleTable, Attribute, Column[], int)}. + */ + private static void storeType(Map> meta, Attribute attribute) { + switch (attribute.getValueType()) { + case Ontology.TIME: + case Ontology.DATE: + case Ontology.BINOMINAL: + case Ontology.NUMERICAL: + case Ontology.NOMINAL: + case Ontology.STRING: + case Ontology.FILE_PATH: + storeOntology(meta, attribute); + break; + default: + //do nothing + } + } + + /** + * Creates a new mapping from applying first mappingA and then mappingB. + */ + private static int[] collapseMappings(int[] mappingA, int[] mappingB) { + int[] newMapping = new int[mappingA.length]; + for (int i = 0; i < newMapping.length; i++) { + newMapping[i] = mappingB[mappingA[i]]; + } + return newMapping; + } + + /** + * Numeric to real only changes the ontology, not the data. If the column was integer and now is real, we need to + * copy because in belt the type cannot change. + */ + private static boolean integerChangedToReal(Attribute attribute, Column column) { + return column.type().id() == Column.TypeId.INTEGER_53_BIT && attribute.getValueType() != Ontology.INTEGER && attribute.isNumerical(); + } + + /** + * Calls {@link ExampleSet#getExampleTable()} and returns {@code null} in case of an exception. + */ + static ExampleTable getExampleTable(ExampleSet exampleSet) { + try { + return exampleSet.getExampleTable(); + } catch (UnsupportedOperationException e) { + //if exampleSet is a HeaderExampleSet we need to ignore the exception from getExampleTable() + return null; + } + } + + + /** + * Conversion where the exampleSet cannot be accessed in parallel. + */ + private static Table sequentialConvert(ExampleSet exampleSet, ConcurrencyContext context) { + int size = exampleSet.size(); + Set labels = new HashSet<>(); + TableBuilder builder = Builders.newTableBuilder(size); + Attribute prediction = exampleSet.getAttributes().getPredictedLabel(); + for (Iterator allRoles = exampleSet.getAttributes().allAttributeRoles(); allRoles.hasNext(); ) { + AttributeRole role = allRoles.next(); + Attribute attribute = role.getAttribute(); + labels.add(attribute.getName()); + copyDataAndType(builder, exampleSet, size, attribute); + if (role.isSpecial()) { + String specialName = role.getSpecialName(); + ColumnRole beltRole = BeltConverter.convert(specialName); + builder.addMetaData(attribute.getName(), beltRole); + if (beltRole == ColumnRole.METADATA) { + builder.addMetaData(attribute.getName(), new LegacyRole(specialName)); + } else if (beltRole == ColumnRole.SCORE) { + String predictionName = prediction == null ? null : prediction.getName(); + if (specialName.startsWith(CONFIDENCE_PREFIX)) { + builder.addMetaData(attribute.getName(), + new ColumnReference(predictionName, + specialName.substring(CONFIDENCE_PREFIX_LENGTH))); + } else { + builder.addMetaData(attribute.getName(), new ColumnReference(predictionName)); + if (!Attributes.CONFIDENCE_NAME.equals(specialName)) { + builder.addMetaData(attribute.getName(), new LegacyRole(specialName)); + } + } + } + } + } + restoreBeltMetaDataFromExampleSetUserData(exampleSet, builder, labels); + return builder.build(ContextAdapter.adapt(context)); + } + + /** + * Copies the data from the example set to the builder and adds a legacy type if the type is not determined by the + * data. + */ + private static void copyDataAndType(TableBuilder builder, ExampleSet exampleSet, int size, Attribute attribute) { + String name = attribute.getName(); + switch (attribute.getValueType()) { + case Ontology.NUMERICAL: + builder.add(name, getRealColumn(exampleSet, size, attribute)); + builder.addMetaData(name, LegacyType.NUMERICAL); + break; + case Ontology.REAL: + builder.add(name, getRealColumn(exampleSet, size, attribute)); + break; + case Ontology.INTEGER: + builder.add(name, getIntegerColumn(exampleSet, size, attribute)); + break; + case Ontology.BINOMINAL: + CategoricalColumn binominalColumn = getBinominalColumn(exampleSet, size, attribute); + builder.add(name, binominalColumn); + builder.addMetaData(name, LegacyType.BINOMINAL); + break; + case Ontology.NOMINAL: + builder.add(name, getNominalColumn(exampleSet, size, attribute)); + builder.addMetaData(name, LegacyType.NOMINAL); + break; + case Ontology.POLYNOMINAL: + builder.add(name, getNominalColumn(exampleSet, size, attribute)); + break; + case Ontology.STRING: + builder.add(name, getNominalColumn(exampleSet, size, attribute)); + builder.addMetaData(name, LegacyType.STRING); + break; + case Ontology.FILE_PATH: + builder.add(name, getNominalColumn(exampleSet, size, attribute)); + builder.addMetaData(name, LegacyType.FILE_PATH); + break; + case Ontology.DATE: + builder.add(name, getDateColumn(exampleSet, size, attribute)); + builder.addMetaData(name, LegacyType.DATE); + break; + case Ontology.DATE_TIME: + builder.add(name, getDateTimeColumn(exampleSet, size, attribute)); + break; + case Ontology.TIME: + builder.add(name, getDateTimeColumn(exampleSet, size, attribute)); + builder.addMetaData(name, LegacyType.TIME); + break; + default: + throw new UnsupportedOperationException(MESSAGE_UNKNOWN_TYPE); + } + } + + + private static Column getDateTimeColumn(ExampleSet exampleSet, int size, Attribute attribute) { + DateTimeBuffer buffer = Buffers.dateTimeBuffer(size, true, false); + int i = 0; + for (Example example : exampleSet) { + double value = example.getValue(attribute); + if (Double.isNaN(value)) { + buffer.set(i++, null); + } else { + long longValue = (long) value; + buffer.set(i++, Math.floorDiv(longValue, MILLISECONDS_PER_SECOND), + (int) (Math.floorMod(longValue, MILLISECONDS_PER_SECOND) * NANOS_PER_MILLI_SECOND)); + } + } + return buffer.toColumn(); + } + + private static Column getDateColumn(ExampleSet exampleSet, int size, Attribute attribute) { + DateTimeBuffer buffer = Buffers.dateTimeBuffer(size, false, false); + int i = 0; + for (Example example : exampleSet) { + double value = example.getValue(attribute); + if (Double.isNaN(value)) { + buffer.set(i++, null); + } else { + buffer.set(i++, ((long) value) / MILLISECONDS_PER_SECOND); + } + } + return buffer.toColumn(); + } + + private static Column getIntegerColumn(ExampleSet exampleSet, int size, Attribute attribute) { + NumericBuffer intBuffer = Buffers.integer53BitBuffer(size, false); + int j = 0; + for (Example example : exampleSet) { + intBuffer.set(j++, example.getValue(attribute)); + } + return intBuffer.toColumn(); + } + + private static Column getRealColumn(ExampleSet exampleSet, int size, Attribute attribute) { + NumericBuffer buffer = Buffers.realBuffer(size, false); + int i = 0; + for (Example example : exampleSet) { + buffer.set(i++, example.getValue(attribute)); + } + return buffer.toColumn(); + } + + /** + * Copies a binominal column from the example set by copying the mapping and the category data with a fallback in + * case the mapping is broken (contains null). Creates a boolean column if possible. + */ + private static CategoricalColumn getBinominalColumn(ExampleSet exampleSet, int size, Attribute attribute) { + NominalMapping legacyMapping = attribute.getMapping(); + if (legacyMapping.getPositiveString() != null && (legacyMapping.getNegativeString() == null + || legacyMapping.getPositiveString().equals(legacyMapping.getNegativeString()))) { + // Incompatible with Belt's 2Bit columns + return getBufferColumn(exampleSet, size, attribute); + } + List mapping = new ArrayList<>(3); + mapping.add(null); + String negativeString = legacyMapping.getNegativeString(); + if (negativeString != null) { + mapping.add(negativeString); + } + String positiveString = legacyMapping.getPositiveString(); + if (positiveString != null) { + mapping.add(positiveString); + } + byte[] data = new byte[size % 4 == 0 ? size / 4 : size / 4 + 1]; + + int i = 0; + for (Example example : exampleSet) { + double value = example.getValue(attribute); + if (!Double.isNaN(value)) { + IntegerFormats.writeUInt2(data, i, (int) (value + 1)); + } + i++; + } + + PackedIntegers packed = new PackedIntegers(data, Format.UNSIGNED_INT2, size); + //convert to a boolean column + int positiveIndex = legacyMapping.getPositiveIndex() + 1; + if (positiveIndex >= mapping.size()) { + //there is no positive value, only a negative one + positiveIndex = BooleanDictionary.NO_ENTRY; + } + return ColumnAccessor.get().newCategoricalColumn(ColumnType.NOMINAL, packed, mapping, positiveIndex); + } + + /** + * Copies a nominal column from the example set by copying the mapping and the category data with a fallback in + * case + * the mapping is broken (contains null or contains a value twice). + */ + private static CategoricalColumn getNominalColumn(ExampleSet exampleSet, int size, Attribute attribute) { + NominalMapping legacyMapping = attribute.getMapping(); + List mapping = new ArrayList<>(legacyMapping.size() + 1); + mapping.add(null); + Set controlSet = new HashSet<>(); + controlSet.add(null); + for (String value : legacyMapping.getValues()) { + if (controlSet.add(value)) { + mapping.add(value); + } else { + return getBufferColumn(exampleSet, size, attribute); + } + } + int[] data = new int[size]; + int i = 0; + for (Example example : exampleSet) { + double value = example.getValue(attribute); + if (Double.isNaN(value)) { + data[i++] = 0; + } else { + data[i++] = (int) value + 1; + } + } + return ColumnAccessor.get().newCategoricalColumn(ColumnType.NOMINAL, data, mapping); + } + + /** + * Copies a nominal column from the example set using a nominal buffer. + */ + private static CategoricalColumn getBufferColumn(ExampleSet exampleSet, int size, Attribute attribute) { + NominalBuffer nominalBuffer = BufferAccessor.get().newInt32Buffer(ColumnType.NOMINAL, size); + int j = 0; + NominalMapping mapping = attribute.getMapping(); + for (Example example : exampleSet) { + double value = example.getValue(attribute); + if (Double.isNaN(value)) { + nominalBuffer.set(j++, null); + } else { + nominalBuffer.set(j++, mapping.mapIndex((int) value)); + } + } + return nominalBuffer.toColumn(); + } + + /** + * Conversion where the exampleSet can be accessed in parallel. + */ + private static Table parallelConvert(ExampleSet exampleSet, ConcurrencyContext context) { + int size = exampleSet.size(); + int width = exampleSet.getAttributes().allSize(); + String[] labels = new String[width]; + Column[] columns = new Column[width]; + List> futureColumns = new ArrayList<>(); + Map> meta = new HashMap<>(); + Attribute predictionAttribute = exampleSet.getAttributes().getPredictedLabel(); + int index = 0; + for (Iterator allRoles = exampleSet.getAttributes().allAttributeRoles(); allRoles.hasNext(); ) { + AttributeRole role = allRoles.next(); + + Attribute attribute = role.getAttribute(); + labels[index] = attribute.getName(); + + futureColumns.add(createDataRunnableAndStoreType(exampleSet, size, attribute, meta, columns, index)); + + if (role.isSpecial()) { + storeRole(role, attribute, meta, predictionAttribute); + } + index++; + } + restoreBeltMetaDataFromExampleSetUserData(exampleSet, meta, new HashSet<>(Arrays.asList(labels))); + return buildTable(futureColumns, labels, columns, meta, context); + } + + /** + * Converts the column specified by exampleSet and attribute into inside a runnable and stores it into the index entry of columns. + * Stores the ontology. + */ + private static Callable createDataRunnableAndStoreType(ExampleSet exampleSet, int size, Attribute attribute, + Map> meta, Column[] columns, int index) { + switch (attribute.getValueType()) { + case Ontology.NUMERICAL: + storeOntology(meta, attribute); + return () -> { + columns[index] = getRealColumn(exampleSet, size, attribute); + return null; + }; + case Ontology.REAL: + return () -> { + columns[index] = getRealColumn(exampleSet, size, attribute); + return null; + }; + case Ontology.INTEGER: + return () -> { + columns[index] = getIntegerColumn(exampleSet, size, attribute); + return null; + }; + case Ontology.BINOMINAL: + storeOntology(meta, attribute); + return () -> { + columns[index] = getBinominalColumn(exampleSet, size, attribute); + return null; + }; + case Ontology.POLYNOMINAL: + return () -> { + columns[index] = getNominalColumn(exampleSet, size, attribute); + return null; + }; + case Ontology.NOMINAL: + case Ontology.STRING: + case Ontology.FILE_PATH: + storeOntology(meta, attribute); + return () -> { + columns[index] = getNominalColumn(exampleSet, size, attribute); + return null; + }; + case Ontology.DATE: + storeOntology(meta, attribute); + return () -> { + columns[index] = getDateColumn(exampleSet, size, attribute); + return null; + }; + case Ontology.DATE_TIME: + return () -> { + columns[index] = getDateTimeColumn(exampleSet, size, attribute); + return null; + }; + case Ontology.TIME: + storeOntology(meta, attribute); + return () -> { + columns[index] = getDateTimeColumn(exampleSet, size, attribute); + return null; + }; + default: + throw new UnsupportedOperationException(MESSAGE_UNKNOWN_TYPE); + } + } + + /** + * Same as {@link #createDataRunnableAndStoreType(ExampleSet, int, Attribute, Map, Column[], int)} but calculates + * the runnables directly. + */ + private static void createDataAndStoreType(ExampleSet exampleSet, int size, + Column[] columns, int index, Attribute attribute, + Map> meta) { + switch (attribute.getValueType()) { + case Ontology.NUMERICAL: + storeOntology(meta, attribute); + columns[index] = getRealColumn(exampleSet, size, attribute); + break; + case Ontology.REAL: + columns[index] = getRealColumn(exampleSet, size, attribute); + break; + case Ontology.INTEGER: + columns[index] = getIntegerColumn(exampleSet, size, attribute); + break; + case Ontology.BINOMINAL: + columns[index] = getBinominalColumn(exampleSet, size, attribute); + storeOntology(meta, attribute); + break; + case Ontology.POLYNOMINAL: + columns[index] = getNominalColumn(exampleSet, size, attribute); + break; + case Ontology.NOMINAL: + case Ontology.STRING: + case Ontology.FILE_PATH: + storeOntology(meta, attribute); + columns[index] = getNominalColumn(exampleSet, size, attribute); + break; + case Ontology.DATE: + storeOntology(meta, attribute); + columns[index] = getDateColumn(exampleSet, size, attribute); + break; + case Ontology.DATE_TIME: + columns[index] = getDateTimeColumn(exampleSet, size, attribute); + break; + case Ontology.TIME: + storeOntology(meta, attribute); + columns[index] = getDateTimeColumn(exampleSet, size, attribute); + break; + default: + throw new UnsupportedOperationException(MESSAGE_UNKNOWN_TYPE); + } + } + + /** + * Stores the ontology of the attribute in the meta map under the attribute name. + */ + private static void storeOntology(Map> meta, Attribute attribute) { + List list = new ArrayList<>(3); + list.add(LegacyType.forOntology(attribute.getValueType())); + meta.put(attribute.getName(), list); + } + + /** + * Conversion where the data is read directly from the example table and in parallel. + */ + private static Table exampleTableConvert(ExampleSet exampleSet, ConcurrencyContext context) { + int size = exampleSet.size(); + int width = exampleSet.getAttributes().allSize(); + String[] labels = new String[width]; + Column[] columns = new Column[width]; + List> futureColumns = new ArrayList<>(); + Map> meta = new HashMap<>(); + ExampleTable table = getExampleTable(exampleSet); + Attribute prediction = exampleSet.getAttributes().getPredictedLabel(); + int index = 0; + for (Iterator allRoles = exampleSet.getAttributes().allAttributeRoles(); allRoles.hasNext(); ) { + + AttributeRole role = allRoles.next(); + Attribute attribute = role.getAttribute(); + labels[index] = attribute.getName(); + + futureColumns.add(createTableCallablesAndStoreType(size, meta, table, attribute, columns, index)); + if (role.isSpecial()) { + storeRole(role, attribute, meta, prediction); + } + index++; + } + restoreBeltMetaDataFromExampleSetUserData(exampleSet, meta, new HashSet<>(Arrays.asList(labels))); + return buildTable(futureColumns, labels, columns, meta, context); + } + + /** + * Stores the associated belt role and, if not all the info can be captured by the belt role, stores the original + * role name. + */ + private static void storeRole(AttributeRole role, Attribute attribute, Map> meta, + Attribute prediction) { + String specialName = role.getSpecialName(); + ColumnRole beltRole = BeltConverter.convert(specialName); + List columnMeta = + meta.computeIfAbsent(attribute.getName(), s -> new ArrayList<>(2)); + columnMeta.add(beltRole); + if (beltRole == ColumnRole.METADATA) { + columnMeta.add(new LegacyRole(specialName)); + } else if (beltRole == ColumnRole.SCORE) { + String predictionName = prediction == null ? null : prediction.getName(); + if (specialName.startsWith(CONFIDENCE_PREFIX)) { + columnMeta.add(new ColumnReference(predictionName, + specialName.substring(CONFIDENCE_PREFIX_LENGTH))); + } else { + columnMeta.add(new ColumnReference(predictionName)); + if (!Attributes.CONFIDENCE_NAME.equals(specialName)) { + columnMeta.add(new LegacyRole(specialName)); + } + } + } + } + + private static Column getNanosecondDateColumn(int size, ExampleTable table, Attribute attribute) { + DateTimeBuffer buffer = Buffers.dateTimeBuffer(size, true, false); + for (int i = 0; i < table.size(); i++) { + double value = table.getDataRow(i).get(attribute); + if (Double.isNaN(value)) { + buffer.set(i, null); + } else { + long longValue = (long) value; + buffer.set(i, Math.floorDiv(longValue, MILLISECONDS_PER_SECOND), + (int) (Math.floorMod(longValue, MILLISECONDS_PER_SECOND) * NANOS_PER_MILLI_SECOND)); + } + } + return buffer.toColumn(); + } + + private static Column getSecondDateColumn(int size, ExampleTable table, Attribute attribute) { + DateTimeBuffer buffer = Buffers.dateTimeBuffer(size, false, false); + for (int i = 0; i < table.size(); i++) { + double value = table.getDataRow(i).get(attribute); + if (Double.isNaN(value)) { + buffer.set(i, null); + } else { + buffer.set(i, ((long) value) / MILLISECONDS_PER_SECOND); + } + } + return buffer.toColumn(); + } + + private static Column getRealColumn(int size, ExampleTable table, Attribute attribute) { + NumericBuffer buffer = Buffers.realBuffer(size, false); + for (int i = 0; i < table.size(); i++) { + buffer.set(i, table.getDataRow(i).get(attribute)); + } + return buffer.toColumn(); + } + + private static Column getIntegerColumn(int size, ExampleTable table, Attribute attribute) { + NumericBuffer intBuffer = Buffers.integer53BitBuffer(size, false); + for (int i = 0; i < table.size(); i++) { + intBuffer.set(i, table.getDataRow(i).get(attribute)); + } + return intBuffer.toColumn(); + } + + /** + * Copies a binominal column from the example table by copying the mapping and the category data with a fallback in + * case the mapping is broken (contains null). + */ + private static Column getBinominalColumn(ExampleTable table, int size, Attribute attribute) { + NominalMapping legacyMapping = attribute.getMapping(); + if (legacyMapping.getPositiveString() != null && (legacyMapping.getNegativeString() == null + || legacyMapping.getPositiveString().equals(legacyMapping.getNegativeString()))) { + // Incompatible with Belt's 2Bit columns + return getBufferColumn(table, size, attribute); + } + List mapping = new ArrayList<>(3); + mapping.add(null); + String negativeString = legacyMapping.getNegativeString(); + if (negativeString != null) { + mapping.add(negativeString); + } + String positiveString = legacyMapping.getPositiveString(); + if (positiveString != null) { + mapping.add(positiveString); + } + byte[] data = new byte[size % 4 == 0 ? size / 4 : size / 4 + 1]; + + for (int i = 0; i < size; i++) { + double value = table.getDataRow(i).get(attribute); + if (!Double.isNaN(value)) { + IntegerFormats.writeUInt2(data, i, (int) value + 1); + } + } + + PackedIntegers packed = new PackedIntegers(data, Format.UNSIGNED_INT2, size); + // create boolean column + int positiveIndex = legacyMapping.getPositiveIndex() + 1; + if (positiveIndex >= mapping.size()) { + //there is no positive value, only a negative one + positiveIndex = BooleanDictionary.NO_ENTRY; + } + return ColumnAccessor.get().newCategoricalColumn(ColumnType.NOMINAL, packed, mapping, positiveIndex); + } + + /** + * Copies a nominal column from the example table by copying the mapping and the category data with a fallback in + * case the mapping is broken (contains null or contains a value twice). + */ + private static Column getNominalColumn(ExampleTable table, int size, Attribute attribute) { + NominalMapping legacyMapping = attribute.getMapping(); + List mapping = new ArrayList<>(legacyMapping.size() + 1); + mapping.add(null); + Set controlSet = new HashSet<>(); + controlSet.add(null); + for (String value : legacyMapping.getValues()) { + if (controlSet.add(value)) { + mapping.add(value); + } else { + return getBufferColumn(table, size, attribute); + } + } + int[] data = new int[size]; + for (int i = 0; i < size; i++) { + double value = table.getDataRow(i).get(attribute); + if (Double.isNaN(value)) { + data[i] = 0; + } else { + data[i] = (int) value + 1; + } + } + return ColumnAccessor.get().newCategoricalColumn(ColumnType.NOMINAL, data, mapping); + } + + /** + * Copies a nominal column from the example table using a nominal buffer. + */ + private static Column getBufferColumn(ExampleTable table, int size, Attribute attribute) { + NominalBuffer nominalBuffer = BufferAccessor.get().newInt32Buffer(ColumnType.NOMINAL, size); + NominalMapping mapping = attribute.getMapping(); + for (int i = 0; i < size; i++) { + double value = table.getDataRow(i).get(attribute); + if (Double.isNaN(value)) { + nominalBuffer.set(i, null); + } else { + nominalBuffer.set(i, mapping.mapIndex((int) value)); + } + } + return nominalBuffer.toColumn(); + } + + /** + * Builds the table by running the future columns in the given context and creating a table from the results and + * the given labels. + */ + private static Table buildTable(List> columnConstructors, String[] labels, Column[] columns, + Map> srcMeta, ConcurrencyContext context) { + try { + context.call(columnConstructors); + return new Table(columns, labels, srcMeta); + } catch (ExecutionException e) { + return handleExecutionException(e); + } + } + + /** + * Handling of an {@link ExecutionException} to reuse at several places. + */ + static Table handleExecutionException(ExecutionException e) { + Throwable cause = e.getCause(); + if (cause instanceof RuntimeException) { + throw (RuntimeException) cause; + } else if (cause instanceof Error) { + throw (Error) cause; + } else { + throw new RuntimeException(cause.getMessage(), cause); + } + } + + /** + * Belt meta data (except for roles) cannot be stored in an ExampleSet. Therefore, we store the belt meta data in + * the ExampleSets's user data. This is the method that restores this saved belt meta data from the user data. Is + * adds the restored belt meta data to the given {@link TableBuilder}. + *

+ * It is important that this method is called after the rest of the metadata has already been built because this + * method uses and updates the existing metadata. + * + * @param set + * the example set potentially holding some belt meta data in its user data + * @param builder + * the table builder that the belt meta data will be added to + */ + private static void restoreBeltMetaDataFromExampleSetUserData(ExampleSet set, TableBuilder builder, + Set usedLabels) { + try { + @SuppressWarnings("unchecked") + Map> beltMetaData = + (Map>) set.getUserData(BeltConverter.IOOBJECT_USER_DATA_COLUMN_META_DATA_KEY); + if (beltMetaData != null) { + beltMetaData.forEach((label, columnMetaDataList) -> { + if (usedLabels.contains(label)) { + for (ColumnMetaData md : columnMetaDataList) { + // column roles, legacy types and legacy roles are already determined by the example set. + // for the rest we want to use the belt meta data that has been stored before + if (!(md instanceof ColumnRole || md instanceof LegacyType || md instanceof LegacyRole)) { + if (md instanceof ColumnReference) { + // This is important for the special case that a column reference has been auto- + // generated because of a confidence role in the example set. We override the auto- + // generated column reference with the actual one. + builder.removeMetaData(label, ColumnReference.class); + } + builder.addMetaData(label, md); + } + } + } + }); + } + } catch (ClassCastException e) { + // well then there is nothing we can do + } + } + + /** + * Belt meta data (except for roles) cannot be stored in an ExampleSet. Therefore, we store the belt meta data in + * the ExampleSets's user data. This is the method that restores this saved belt meta data from the user data. Is + * adds the restored belt meta data to the given map. + *

+ * It is important that this method is called after the rest of the metadata has already been built because this + * method used and updates the existing metadata. + * + * @param set + * the example set potentially holding some belt meta data in its user data + * @param incompleteMetaData + * the belt meta data will be added to this map + */ + private static void restoreBeltMetaDataFromExampleSetUserData(ExampleSet set, Map> incompleteMetaData, Set usedLabels) { + try { + @SuppressWarnings("unchecked") + Map> beltMetaData = + (Map>) set.getUserData(BeltConverter.IOOBJECT_USER_DATA_COLUMN_META_DATA_KEY); + if (beltMetaData != null) { + beltMetaData.forEach((label, columnMetaDataList) -> { + if (usedLabels.contains(label)) { + incompleteMetaData.computeIfAbsent(label, k -> new ArrayList<>()); + List metaDataForLabel = incompleteMetaData.get(label); + for (ColumnMetaData md : columnMetaDataList) { + // column roles, legacy types and legacy roles are already determined by the example set. + // for the rest we want to use the belt meta data that has been stored before + if (!(md instanceof ColumnRole || md instanceof LegacyType || md instanceof LegacyRole)) { + if (md instanceof ColumnReference) { + // This is important for the special case that a column reference has been auto- + // generated because of a confidence role in the example set. We override the auto- + // generated column reference with the actual one. + metaDataForLabel.removeIf(x -> x instanceof ColumnReference); + } + metaDataForLabel.add(md); + } + } + } + }); + } + } catch (ClassCastException e) { + // well then there is nothing we can do + } + } + +} diff --git a/src/test/java/com/rapidminer/belt/table/BeltConverterTest.java b/src/test/java/com/rapidminer/belt/table/BeltConverterTest.java index 9c848df..805e2ce 100644 --- a/src/test/java/com/rapidminer/belt/table/BeltConverterTest.java +++ b/src/test/java/com/rapidminer/belt/table/BeltConverterTest.java @@ -1,1617 +1,1889 @@ -/** - * Copyright (C) 2001-2019 by RapidMiner and the contributors - * - * Complete list of developers available at our web site: - * - * http://rapidminer.com - * - * This program is free software: you can redistribute it and/or modify it under the terms of the - * GNU Affero General Public License as published by the Free Software Foundation, either version 3 - * of the License, or (at your option) any later version. - * - * This program is distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY; without - * even the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU - * Affero General Public License for more details. - * - * You should have received a copy of the GNU Affero General Public License along with this program. - * If not, see http://www.gnu.org/licenses/. - */ -package com.rapidminer.belt.table; - -import static org.junit.Assert.assertArrayEquals; -import static org.junit.Assert.assertEquals; -import static org.junit.Assert.assertFalse; -import static org.junit.Assert.assertNull; -import static org.junit.Assert.assertTrue; - -import java.time.Instant; -import java.time.LocalTime; -import java.util.ArrayList; -import java.util.Arrays; -import java.util.Collection; -import java.util.List; -import java.util.Random; -import java.util.concurrent.Callable; -import java.util.concurrent.ExecutionException; -import java.util.concurrent.ForkJoinPool; -import java.util.concurrent.ForkJoinTask; -import java.util.concurrent.Future; -import java.util.stream.StreamSupport; - -import org.junit.BeforeClass; -import org.junit.Test; -import org.junit.experimental.runners.Enclosed; -import org.junit.runner.RunWith; -import org.junit.runners.Parameterized; -import org.junit.runners.Parameterized.Parameter; -import org.junit.runners.Parameterized.Parameters; - -import com.rapidminer.RapidMiner; -import com.rapidminer.adaption.belt.IOTable; -import com.rapidminer.belt.buffer.Buffers; -import com.rapidminer.belt.buffer.CategoricalBuffer; -import com.rapidminer.belt.column.Column; -import com.rapidminer.belt.column.ColumnType; -import com.rapidminer.belt.column.ColumnTypes; -import com.rapidminer.belt.column.Columns; -import com.rapidminer.belt.column.Dictionary; -import com.rapidminer.belt.reader.CategoricalReader; -import com.rapidminer.belt.reader.NumericReader; -import com.rapidminer.belt.reader.Readers; -import com.rapidminer.belt.util.Belt; -import com.rapidminer.belt.util.ColumnReference; -import com.rapidminer.belt.util.ColumnRole; -import com.rapidminer.core.concurrency.ConcurrencyContext; -import com.rapidminer.core.concurrency.ExecutionStoppedException; -import com.rapidminer.example.Attribute; -import com.rapidminer.example.AttributeRole; -import com.rapidminer.example.AttributeTransformation; -import com.rapidminer.example.Attributes; -import com.rapidminer.example.Example; -import com.rapidminer.example.ExampleSet; -import com.rapidminer.example.ExampleSetFactory; -import com.rapidminer.example.set.Condition; -import com.rapidminer.example.set.ConditionedExampleSet; -import com.rapidminer.example.set.HeaderExampleSet; -import com.rapidminer.example.set.MappedExampleSet; -import com.rapidminer.example.set.Partition; -import com.rapidminer.example.set.RemappedExampleSet; -import com.rapidminer.example.set.SimplePartitionBuilder; -import com.rapidminer.example.set.SortedExampleSet; -import com.rapidminer.example.set.SplittedExampleSet; -import com.rapidminer.example.table.AttributeFactory; -import com.rapidminer.example.table.NominalMapping; -import com.rapidminer.example.utils.ExampleSetBuilder; -import com.rapidminer.example.utils.ExampleSets; -import com.rapidminer.operator.Annotations; -import com.rapidminer.operator.tools.ExpressionEvaluationException; -import com.rapidminer.test.asserter.AsserterFactoryRapidMiner; -import com.rapidminer.test_utils.RapidAssert; -import com.rapidminer.tools.Ontology; -import com.rapidminer.tools.ParameterService; - - -/** - * Tests the {@link com.rapidminer.belt.table.BeltConverter}. - * - * @author Gisa Meier - */ -@RunWith(Enclosed.class) -public class BeltConverterTest { - - private static final ConcurrencyContext CONTEXT = new ConcurrencyContext() { - - private ForkJoinPool pool = new ForkJoinPool(Runtime.getRuntime().availableProcessors()); - - @Override - public List> submit(List> callables) throws IllegalArgumentException { - List> futures = new ArrayList<>(); - for (Callable callable : callables) { - futures.add(pool.submit(callable)); - } - return futures; - } - - @Override - public List call(List> callables) - throws ExecutionException, ExecutionStoppedException, IllegalArgumentException { - List> futures = submit(callables); - List results = new ArrayList<>(); - for (Future future : futures) { - try { - results.add(future.get()); - } catch (InterruptedException e) { - throw new RuntimeException("must not happen"); - } - } - return results; - } - - @Override - public void run(List runnables) - throws ExecutionException, ExecutionStoppedException, IllegalArgumentException { - } - - @Override - public List invokeAll(List> tasks) - throws ExecutionException, ExecutionStoppedException, IllegalArgumentException { - return null; - } - - @Override - public T invoke(ForkJoinTask task) - throws ExecutionException, ExecutionStoppedException, IllegalArgumentException { - return null; - } - - @Override - public int getParallelism() { - return pool.getParallelism(); - } - - @Override - public List collectResults(List> futures) - throws ExecutionException, ExecutionStoppedException, IllegalArgumentException { - return null; - } - - @Override - public void checkStatus() throws ExecutionStoppedException { - } - - }; - - private static double[] readColumnToArray(Table table, int column) { - double[] data = new double[table.height()]; - NumericReader reader = Readers.numericReader(table.column(column)); - for (int j = 0; j < table.height(); j++) { - data[j] = reader.read(); - } - return data; - } - - private static String[] readColumnToStringArray(Table table, int column) { - String[] data = new String[table.height()]; - Column col = table.column(column); - List categoricalMapping = ColumnAccessor.get().getDictionaryList(col.getDictionary(String.class)); - CategoricalReader reader = Readers.categoricalReader(col); - for (int j = 0; j < table.height(); j++) { - data[j] = categoricalMapping.get(reader.read()); - } - return data; - } - - static double[][] readTableToArray(Table table) { - double[][] result = new double[table.width()][]; - Arrays.setAll(result, i -> readColumnToArray(table, i)); - return result; - } - - private static String[][] readTableToStringArray(Table table) { - String[][] result = new String[table.width()][]; - Arrays.setAll(result, i -> readColumnToStringArray(table, i)); - return result; - } - - private static double[][] readExampleSetToArray(ExampleSet set) { - double[][] result = new double[set.getAttributes().size()][]; - int i = 0; - for (Attribute att : set.getAttributes()) { - result[i] = new double[set.size()]; - int j = 0; - for (Example example : set) { - result[i][j] = example.getValue(att); - j++; - } - i++; - } - return result; - } - - private static String[][] readExampleSetToStringArray(ExampleSet set) { - String[][] result = new String[set.getAttributes().size()][]; - int i = 0; - for (Attribute att : set.getAttributes()) { - result[i] = new String[set.size()]; - int j = 0; - for (Example example : set) { - double value = example.getValue(att); - if (Double.isNaN(value)) { - result[i][j] = null; - } else { - result[i][j] = att.getMapping().mapIndex((int) value); - } - j++; - } - i++; - } - return result; - } - - public static class InputValidation { - - @Test(expected = IllegalArgumentException.class) - public void testSetToTableNullSet() { - com.rapidminer.belt.table.BeltConverter.convert((ExampleSet) null, CONTEXT); - } - - @Test(expected = IllegalArgumentException.class) - public void testSetToTableNullContext() { - com.rapidminer.belt.table.BeltConverter.convert(ExampleSetFactory.createExampleSet(new double[][]{new double[]{0}}), null); - } - - @Test(expected = IllegalArgumentException.class) - public void testTableToSetNullTable() { - com.rapidminer.belt.table.BeltConverter.convert((IOTable) null, CONTEXT); - } - - @Test(expected = IllegalArgumentException.class) - public void testTableToSetSequentiallyNullTable() { - com.rapidminer.belt.table.BeltConverter.convertSequentially((IOTable) null); - } - - @Test(expected = IllegalArgumentException.class) - public void testTableToSetNullContext() { - com.rapidminer.belt.table.BeltConverter.convert(new IOTable(Builders.newTableBuilder(1).build(Belt.defaultContext())), null); - } - } - - @RunWith(Parameterized.class) - public static class SetToTable { - - public SetToTable(boolean legacyMode) { - ParameterService.setParameterValue(RapidMiner.PROPERTY_RAPIDMINER_SYSTEM_LEGACY_DATA_MGMT, - String.valueOf(legacyMode)); - } - - @Parameters(name = "legacyMode={0}") - public static Collection params() { - return Arrays.asList(true, false); - } - - @Test - public void testSimple() { - Attribute attribute1 = attributeInt(); - Attribute attribute2 = attributeReal(); - ExampleSet set = ExampleSets.from(attribute1, attribute2).withBlankSize(200) - .withColumnFiller(attribute1, i -> i + 1).withColumnFiller(attribute2, i -> i + 1.7).build(); - Table table = com.rapidminer.belt.table.BeltConverter.convert(set, CONTEXT).getTable(); - - double[][] result = readTableToArray(table); - double[][] expected = readExampleSetToArray(set); - assertArrayEquals(expected, result); - } - - @Test - public void testNominal() { - Attribute attribute1 = attributeDogCatMouse(); - Attribute attribute2 = attributeYesNo(); - ExampleSet set = ExampleSets.from(attribute1, attribute2).withBlankSize(200) - .withColumnFiller(attribute1, i -> i % 3).withColumnFiller(attribute2, i -> i % 2).build(); - set.getExample(10).setValue(attribute1, Double.NaN); - Table table = com.rapidminer.belt.table.BeltConverter.convert(set, CONTEXT).getTable(); - - String[][] result = readTableToStringArray(table); - String[][] expected = readExampleSetToStringArray(set); - assertArrayEquals(expected, result); - } - - @Test - public void testBinominal() { - Attribute attribute1 = AttributeFactory.createAttribute("oneNegative", Ontology.BINOMINAL); - attribute1.getMapping().mapString("one"); - assertEquals("one", attribute1.getMapping().getNegativeString()); - assertNull(attribute1.getMapping().getPositiveString()); - - Attribute attribute2 = AttributeFactory.createAttribute("empty", Ontology.BINOMINAL); - assertNull(attribute2.getMapping().getPositiveString()); - assertNull(attribute2.getMapping().getNegativeString()); - - Attribute attribute3 = AttributeFactory.createAttribute("binominal", Ontology.BINOMINAL); - attribute3.getMapping().mapString("negative"); - attribute3.getMapping().mapString("positive"); - assertEquals("negative", attribute3.getMapping().getNegativeString()); - assertEquals("positive", attribute3.getMapping().getPositiveString()); - - ExampleSet set = ExampleSets.from(attribute1, attribute2, attribute3).withBlankSize(200) - .withColumnFiller(attribute1, i -> i % 2 == 0 ? Double.NaN : 0).withColumnFiller(attribute2, - i -> Double.NaN).withColumnFiller(attribute3, i -> i % 2 == 0 ? Double.NaN : 1).build(); - Table table = com.rapidminer.belt.table.BeltConverter.convert(set, CONTEXT).getTable(); - - String[][] result = readTableToStringArray(table); - String[][] expected = readExampleSetToStringArray(set); - assertArrayEquals(expected, result); - - Dictionary oneNegative = table.column("oneNegative").getDictionary(String.class); - assertTrue(oneNegative.isBoolean()); - assertFalse(oneNegative.hasPositive()); - assertEquals(attribute1.getMapping().getNegativeString(), oneNegative.get(oneNegative.getNegativeIndex())); - assertEquals(1, oneNegative.size()); - - Dictionary empty = table.column("empty").getDictionary(String.class); - assertTrue(empty.isBoolean()); - assertFalse(empty.hasPositive()); - assertFalse(empty.hasNegative()); - assertEquals(0, empty.size()); - - Dictionary binominal = table.column("binominal").getDictionary(String.class); - assertTrue(binominal.isBoolean()); - assertEquals(2, binominal.size()); - assertEquals(attribute3.getMapping().getNegativeString(), binominal.get(binominal.getNegativeIndex())); - assertEquals(attribute3.getMapping().getPositiveString(), binominal.get(binominal.getPositiveIndex())); - } - - @Test - public void testNominalUnusedValue() { - Attribute attribute1 = attributeDogCatMouse(); - Attribute attribute2 = attributeYesNo(); - ExampleSet set = ExampleSets.from(attribute1, attribute2).withBlankSize(200) - .withColumnFiller(attribute1, i -> i % 2).withColumnFiller(attribute2, i -> 1).build(); - set.getExample(10).setValue(attribute1, Double.NaN); - Table table = com.rapidminer.belt.table.BeltConverter.convert(set, CONTEXT).getTable(); - - String[][] result = readTableToStringArray(table); - String[][] expected = readExampleSetToStringArray(set); - assertArrayEquals(expected, result); - } - - @Test - public void testNominalDoubleValue() { - Attribute attribute1 = attributeDogCatMouse(); - Attribute attribute2 = attributeYesNo(); - ExampleSet set = ExampleSets.from(attribute1, attribute2).withBlankSize(200) - .withColumnFiller(attribute1, i -> i % 3).withColumnFiller(attribute2, i -> i % 2).build(); - set.getExample(10).setValue(attribute1, Double.NaN); - attribute1.getMapping().setMapping("cat", 0); - Table table = com.rapidminer.belt.table.BeltConverter.convert(set, CONTEXT).getTable(); - - String[][] result = readTableToStringArray(table); - String[][] expected = readExampleSetToStringArray(set); - assertArrayEquals(expected, result); - } - - @Test - public void testNominalNullValue() { - Attribute attribute1 = attributeDogCatMouse(); - Attribute attribute2 = attributeYesNo(); - ExampleSet set = ExampleSets.from(attribute1, attribute2).withBlankSize(200) - .withColumnFiller(attribute1, i -> i % 3).withColumnFiller(attribute2, i -> i % 2).build(); - set.getExample(10).setValue(attribute1, Double.NaN); - attribute1.getMapping().setMapping(null, 2); - Table table = com.rapidminer.belt.table.BeltConverter.convert(set, CONTEXT).getTable(); - - String[][] result = readTableToStringArray(table); - String[][] expected = readExampleSetToStringArray(set); - assertArrayEquals(expected, result); - } - - @Test - public void testManyColumns() { - List attributes = new ArrayList<>(); - for (int i = 0; i < 60; i++) { - attributes.add(attributeReal(i)); - } - ExampleSetBuilder builder = ExampleSets.from(attributes).withBlankSize(20); - for (int i = 0; i < 60; i++) { - builder.withColumnFiller(attributes.get(i), j -> j + 1.7); - } - ExampleSet set = builder.build(); - Table table = com.rapidminer.belt.table.BeltConverter.convert(set, CONTEXT).getTable(); - - double[][] result = readTableToArray(table); - double[][] expected = readExampleSetToArray(set); - assertArrayEquals(expected, result); - } - - @Test - public void testUnsafeAttribute() { - Attribute attribute1 = attributeInt(); - Attribute attribute2 = attributeReal(); - ExampleSet set = ExampleSets.from(attribute1, attribute2).withBlankSize(200) - .withColumnFiller(attribute1, i -> i + 1).withColumnFiller(attribute2, i -> i + 1.7).build(); - set.getAttributes().allAttributes().next().addTransformation(new AttributeTransformation() { - @Override - public double transform(Attribute attribute, double value) { - return value; - } - - @Override - public double inverseTransform(Attribute attribute, double value) { - return value; - } - - @Override - public boolean isReversable() { - return false; - } - - @Override - public Object clone() { - return null; - } - }); - Table table = com.rapidminer.belt.table.BeltConverter.convert(set, CONTEXT).getTable(); - - double[][] result = readTableToArray(table); - double[][] expected = readExampleSetToArray(set); - assertArrayEquals(expected, result); - } - - - @Test - public void testTypes() { - List attributes = new ArrayList<>(); - for (int i = 1; i < Ontology.VALUE_TYPE_NAMES.length; i++) { - attributes.add(AttributeFactory.createAttribute(i)); - } - ExampleSet set = ExampleSets.from(attributes) - .build(); - Table table = com.rapidminer.belt.table.BeltConverter.convert(set, CONTEXT).getTable(); - - Column.TypeId[] result = - table.labels().stream().map(label -> table.column(label).type().id()).toArray(Column - .TypeId[]::new); - Column.TypeId[] expected = - new Column.TypeId[]{Column.TypeId.NOMINAL, Column.TypeId.REAL, Column.TypeId.INTEGER, - Column.TypeId.REAL, Column.TypeId.NOMINAL, Column.TypeId.NOMINAL, Column.TypeId.NOMINAL, - Column.TypeId.NOMINAL, Column.TypeId.DATE_TIME, Column.TypeId.DATE_TIME, - Column.TypeId.DATE_TIME}; - assertArrayEquals(expected, result); - - com.rapidminer.belt.table.LegacyType[] legacyResult = table.labels().stream() - .map(label -> table.getFirstMetaData(label, com.rapidminer.belt.table.LegacyType.class)) - .toArray(com.rapidminer.belt.table.LegacyType[]::new); - com.rapidminer.belt.table.LegacyType[] legacyExpected = - new com.rapidminer.belt.table.LegacyType[]{com.rapidminer.belt.table.LegacyType.NOMINAL, - com.rapidminer.belt.table.LegacyType.NUMERICAL, null, null, - com.rapidminer.belt.table.LegacyType.STRING, - com.rapidminer.belt.table.LegacyType.BINOMINAL, null, - com.rapidminer.belt.table.LegacyType.FILE_PATH, null, - com.rapidminer.belt.table.LegacyType.DATE, com.rapidminer.belt.table.LegacyType.TIME}; - assertArrayEquals(legacyExpected, legacyResult); - } - - @Test - public void testTypesView() { - List attributes = new ArrayList<>(); - for (int i = 1; i < Ontology.VALUE_TYPE_NAMES.length; i++) { - attributes.add(AttributeFactory.createAttribute(i)); - } - ExampleSet set = new SortedExampleSet(ExampleSets.from(attributes) - .build(), attributes.get(0), SortedExampleSet.INCREASING); - Table table = com.rapidminer.belt.table.BeltConverter.convert(set, CONTEXT).getTable(); - - Column.TypeId[] result = - table.labels().stream().map(label -> table.column(label).type().id()).toArray(Column - .TypeId[]::new); - Column.TypeId[] expected = - new Column.TypeId[]{Column.TypeId.NOMINAL, Column.TypeId.REAL, Column.TypeId.INTEGER, - Column.TypeId.REAL, Column.TypeId.NOMINAL, Column.TypeId.NOMINAL, Column.TypeId.NOMINAL, - Column.TypeId.NOMINAL, Column.TypeId.DATE_TIME, Column.TypeId.DATE_TIME, - Column.TypeId.DATE_TIME}; - assertArrayEquals(expected, result); - - com.rapidminer.belt.table.LegacyType[] legacyResult = table.labels().stream() - .map(label -> table.getFirstMetaData(label, com.rapidminer.belt.table.LegacyType.class)) - .toArray(com.rapidminer.belt.table.LegacyType[]::new); - com.rapidminer.belt.table.LegacyType[] legacyExpected = - new com.rapidminer.belt.table.LegacyType[]{com.rapidminer.belt.table.LegacyType.NOMINAL, - com.rapidminer.belt.table.LegacyType.NUMERICAL, null, null, - com.rapidminer.belt.table.LegacyType.STRING, - com.rapidminer.belt.table.LegacyType.BINOMINAL, null, - com.rapidminer.belt.table.LegacyType.FILE_PATH, null, - com.rapidminer.belt.table.LegacyType.DATE, com.rapidminer.belt.table.LegacyType.TIME}; - assertArrayEquals(legacyExpected, legacyResult); - } - - - @Test - public void testRoles() { - String[] roles = new String[]{Attributes.ID_NAME, Attributes.CONFIDENCE_NAME + "_" + "Yes", - Attributes.LABEL_NAME, Attributes.PREDICTION_NAME, - Attributes.CLUSTER_NAME, Attributes.WEIGHT_NAME, Attributes.BATCH_NAME, Attributes.OUTLIER_NAME, - Attributes.CONFIDENCE_NAME, - Attributes.CLASSIFICATION_COST, "ignore-me"}; - List attributes = new ArrayList<>(); - for (int i = 0; i < roles.length + 1; i++) { - attributes.add(AttributeFactory.createAttribute(Ontology.NUMERICAL)); - } - ExampleSetBuilder builder = ExampleSets.from(attributes); - for (int i = 1; i < roles.length + 1; i++) { - builder.withRole(attributes.get(i), roles[i - 1]); - } - ExampleSet set = builder.build(); - Table table = com.rapidminer.belt.table.BeltConverter.convert(set, CONTEXT).getTable(); - - ColumnRole[] result = table.labels().stream() - .map(label -> table.getFirstMetaData(label, ColumnRole.class)) - .toArray(ColumnRole[]::new); - ColumnRole[] expected = - new ColumnRole[]{null, ColumnRole.ID, ColumnRole.SCORE, ColumnRole.LABEL, ColumnRole.PREDICTION, - ColumnRole.CLUSTER, - ColumnRole.WEIGHT, ColumnRole.BATCH, ColumnRole.OUTLIER, ColumnRole - .SCORE, - ColumnRole.METADATA, ColumnRole.METADATA}; - assertArrayEquals(expected, result); - - com.rapidminer.belt.table.LegacyRole[] legacyResult = table.labels().stream() - .map(label -> table.getFirstMetaData(label, com.rapidminer.belt.table.LegacyRole.class)) - .toArray(com.rapidminer.belt.table.LegacyRole[]::new); - com.rapidminer.belt.table.LegacyRole[] legacyExpected = - new com.rapidminer.belt.table.LegacyRole[]{null, null, null, null, null, null, null, null, null, - null, - new com.rapidminer.belt.table.LegacyRole(Attributes.CLASSIFICATION_COST), - new com.rapidminer.belt.table.LegacyRole("ignore-me")}; - assertArrayEquals(legacyExpected, legacyResult); - - ColumnReference[] references = table.labels().stream() - .map(label -> table.getFirstMetaData(label, ColumnReference.class)) - .toArray(ColumnReference[]::new); - ColumnReference[] referencesExpected = - new ColumnReference[]{null, null, - new ColumnReference(set.getAttributes().getPredictedLabel().getName(), "Yes"), null, null, - null, null, null, null, new ColumnReference(set.getAttributes().getPredictedLabel().getName()), - null, null}; - assertArrayEquals(referencesExpected, references); - } - - @Test - public void testRolesView() { - String[] roles = new String[]{Attributes.ID_NAME, Attributes.CONFIDENCE_NAME + "_" + "Yes", - Attributes.LABEL_NAME, Attributes.PREDICTION_NAME, - Attributes.CLUSTER_NAME, Attributes.WEIGHT_NAME, Attributes.BATCH_NAME, Attributes.OUTLIER_NAME, - Attributes.CONFIDENCE_NAME, - Attributes.CLASSIFICATION_COST, "ignore-me"}; - List attributes = new ArrayList<>(); - for (int i = 0; i < roles.length + 1; i++) { - attributes.add(AttributeFactory.createAttribute(Ontology.NUMERICAL)); - } - ExampleSetBuilder builder = ExampleSets.from(attributes); - for (int i = 1; i < roles.length + 1; i++) { - builder.withRole(attributes.get(i), roles[i - 1]); - } - ExampleSet set = new SortedExampleSet(builder.build(), attributes.get(1), SortedExampleSet.DECREASING); - Table table = com.rapidminer.belt.table.BeltConverter.convert(set, CONTEXT).getTable(); - - ColumnRole[] result = table.labels().stream() - .map(label -> table.getFirstMetaData(label, ColumnRole.class)) - .toArray(ColumnRole[]::new); - ColumnRole[] expected = - new ColumnRole[]{null, ColumnRole.ID, ColumnRole.SCORE, ColumnRole.LABEL, ColumnRole.PREDICTION, - ColumnRole.CLUSTER, - ColumnRole.WEIGHT, ColumnRole.BATCH, ColumnRole.OUTLIER, ColumnRole.SCORE, - ColumnRole.METADATA, ColumnRole.METADATA}; - assertArrayEquals(expected, result); - - com.rapidminer.belt.table.LegacyRole[] legacyResult = table.labels().stream() - .map(label -> table.getFirstMetaData(label, com.rapidminer.belt.table.LegacyRole.class)) - .toArray(com.rapidminer.belt.table.LegacyRole[]::new); - com.rapidminer.belt.table.LegacyRole[] legacyExpected = - new com.rapidminer.belt.table.LegacyRole[]{null, null, null, null, null, null, null, null, null, - null, - new com.rapidminer.belt.table.LegacyRole(Attributes.CLASSIFICATION_COST), - new com.rapidminer.belt.table.LegacyRole("ignore-me")}; - assertArrayEquals(legacyExpected, legacyResult); - - ColumnReference[] references = table.labels().stream() - .map(label -> table.getFirstMetaData(label, ColumnReference.class)) - .toArray(ColumnReference[]::new); - ColumnReference[] referencesExpected = - new ColumnReference[]{null, null, - new ColumnReference(set.getAttributes().getPredictedLabel().getName(), "Yes"), null, null, - null, null, null, null, new ColumnReference(set.getAttributes().getPredictedLabel().getName()), - null, null}; - assertArrayEquals(referencesExpected, references); - } - - @Test - public void testAnnotations() { - Attribute attribute1 = attributeInt(); - Attribute attribute2 = attributeReal(); - ExampleSet set = ExampleSets.from(attribute1, attribute2).withBlankSize(10) - .withColumnFiller(attribute1, i -> i + 1).withColumnFiller(attribute2, i -> i + 1.7).build(); - set.getAnnotations().setAnnotation(Annotations.KEY_DC_AUTHOR, "gmeier"); - - IOTable table = com.rapidminer.belt.table.BeltConverter.convert(set, CONTEXT); - - assertEquals(set.getAnnotations(), table.getAnnotations()); - } - } - - @RunWith(Parameterized.class) - public static class TableToSet { - - public TableToSet(boolean legacyMode) { - ParameterService.setParameterValue(RapidMiner.PROPERTY_RAPIDMINER_SYSTEM_LEGACY_DATA_MGMT, - String.valueOf(legacyMode)); - } - - @Parameters(name = "legacyMode={0}") - public static Collection params() { - return Arrays.asList(true, false); - } - - @Test - public void testSimple() { - Table table = Builders.newTableBuilder(112).addReal("real", i -> 3 * i / 5.0).addInt("int", i -> 5 * i) - .build(Belt.defaultContext()); - - ExampleSet set = com.rapidminer.belt.table.BeltConverter.convert(new IOTable(table), CONTEXT); - - double[][] expected = readTableToArray(table); - double[][] result = readExampleSetToArray(set); - assertArrayEquals(expected, result); - } - - @Test - public void testNominal() { - CategoricalBuffer buffer = BufferAccessor.get().newUInt8Buffer(112); - for (int i = 0; i < buffer.size(); i++) { - buffer.set(i, "value" + (i % 5)); - } - CategoricalBuffer buffer2 = BufferAccessor.get().newUInt8Buffer(112); - for (int i = 0; i < buffer2.size(); i++) { - buffer2.set(i, "val" + (i % 7)); - } - buffer2.set(42, null); - Table table = Builders.newTableBuilder(112).add("first", buffer.toColumn(ColumnTypes.NOMINAL)) - .add("second", buffer2.toColumn(ColumnTypes.NOMINAL)) - .build(Belt.defaultContext()); - - ExampleSet set = com.rapidminer.belt.table.BeltConverter.convert(new IOTable(table), CONTEXT); - - String[][] expected = readTableToStringArray(table); - String[][] result = readExampleSetToStringArray(set); - assertArrayEquals(expected, result); - } - - @Test - public void testNominalGaps() { - CategoricalBuffer buffer = Buffers.categoricalBuffer(11); - for (int i = 0; i < buffer.size(); i++) { - buffer.set(i, "value" + i); - } - buffer.set(7, null); - buffer.set(5, null); - CategoricalBuffer buffer2 = Buffers.categoricalBuffer(11); - for (int i = 0; i < buffer2.size(); i++) { - buffer2.set(i, "val" + i); - } - buffer2.set(3, null); - buffer2.set(5, null); - Column column = Columns.removeUnusedDictionaryValues(buffer.toColumn(ColumnTypes.NOMINAL), - Columns.CleanupOption.REMOVE, Belt.defaultContext()); - Column column2 = Columns.removeUnusedDictionaryValues(buffer2.toColumn(ColumnTypes.NOMINAL), - Columns.CleanupOption.REMOVE, Belt.defaultContext()); - Table table = Builders.newTableBuilder(11).add("first", column) - .add("second", column2) - .build(Belt.defaultContext()); - - ExampleSet set = com.rapidminer.belt.table.BeltConverter.convert(new IOTable(table), CONTEXT); - - String[][] expected = readTableToStringArray(table); - String[][] result = readExampleSetToStringArray(set); - assertArrayEquals(expected, result); - } - - @Test - public void testBinominal() { - CategoricalBuffer buffer = Buffers.categoricalBuffer(112, 2); - for (int i = 0; i < buffer.size(); i++) { - buffer.set(i, "value" + (i % 2)); - } - buffer.set(100, null); - CategoricalBuffer buffer2 = Buffers.categoricalBuffer(112, 2); - for (int i = 0; i < buffer2.size(); i++) { - buffer2.set(i, "val" + (i % 2)); - } - buffer2.set(42, null); - CategoricalBuffer buffer3 = Buffers.categoricalBuffer(112, 2); - for (int i = 0; i < buffer.size(); i += 2) { - buffer3.set(i, "one"); - } - CategoricalBuffer buffer4 = Buffers.categoricalBuffer(112, 2); - - Table table = Builders.newTableBuilder(112).add("first", buffer.toBooleanColumn(ColumnTypes.NOMINAL, "value0")) - .add("second", buffer2.toBooleanColumn(ColumnTypes.NOMINAL, "val1")) - .add("onePositive", buffer3.toBooleanColumn(ColumnTypes.NOMINAL, "one")) - .add("oneNegative", buffer3.toBooleanColumn(ColumnTypes.NOMINAL, null)) - .add("empty", buffer4.toBooleanColumn(ColumnTypes.NOMINAL, null)) - .build(Belt.defaultContext()); - - ExampleSet set = com.rapidminer.belt.table.BeltConverter.convert(new IOTable(table), CONTEXT); - - String[][] expected = readTableToStringArray(table); - String[][] result = readExampleSetToStringArray(set); - assertArrayEquals(expected, result); - - NominalMapping first = set.getAttributes().get("first").getMapping(); - assertEquals("value1", first.getNegativeString()); - assertEquals("value0", first.getPositiveString()); - - NominalMapping second = set.getAttributes().get("second").getMapping(); - assertEquals("val0", second.getNegativeString()); - assertEquals("val1", second.getPositiveString()); - - NominalMapping oneNegative = set.getAttributes().get("oneNegative").getMapping(); - assertEquals("one", oneNegative.getNegativeString()); - assertNull(oneNegative.getPositiveString()); - - NominalMapping empty = set.getAttributes().get("empty").getMapping(); - assertNull(empty.getPositiveString()); - assertNull(empty.getNegativeString()); - - int[] valueTypes = - Arrays.stream(set.getAttributes().createRegularAttributeArray()).mapToInt(Attribute::getValueType).toArray(); - assertArrayEquals(new int[]{Ontology.BINOMINAL, Ontology.BINOMINAL, Ontology.POLYNOMINAL, - Ontology.BINOMINAL, Ontology.BINOMINAL}, valueTypes); - } - - @Test - public void testBinominalGaps() { - CategoricalBuffer buffer = BufferAccessor.get().newUInt2Buffer(112); - buffer.set(0, "bla"); - for (int i = 0; i < buffer.size(); i++) { - buffer.set(i, "blup"); - } - buffer.set(100, null); - CategoricalBuffer buffer2 = BufferAccessor.get().newUInt2Buffer(112); - buffer2.set(0, "bla"); - for (int i = 0; i < buffer.size(); i++) { - buffer2.set(i, "blup"); - } - buffer2.set(100, null); - - Column bla = Columns.removeUnusedDictionaryValues(buffer.toBooleanColumn(ColumnTypes.NOMINAL, "bla"), - Columns.CleanupOption.REMOVE, Belt.defaultContext()); - Column blup = Columns.removeUnusedDictionaryValues(buffer2.toBooleanColumn(ColumnTypes.NOMINAL, "blup"), - Columns.CleanupOption.REMOVE, Belt.defaultContext()); - Table table = Builders.newTableBuilder(112).add("first", bla) - .add("second", blup) - .build(Belt.defaultContext()); - - ExampleSet set = com.rapidminer.belt.table.BeltConverter.convert(new IOTable(table), CONTEXT); - - String[][] expected = readTableToStringArray(table); - String[][] result = readExampleSetToStringArray(set); - assertArrayEquals(expected, result); - - int[] valueTypes = - Arrays.stream(set.getAttributes().createRegularAttributeArray()).mapToInt(Attribute::getValueType).toArray(); - assertArrayEquals(new int[]{Ontology.BINOMINAL, Ontology.POLYNOMINAL}, valueTypes); - - NominalMapping first = set.getAttributes().get("first").getMapping(); - assertEquals("blup", first.getNegativeString()); - assertNull(first.getPositiveString()); - - } - - @Test - public void testNominalUnusedValue() { - CategoricalBuffer buffer = Buffers.categoricalBuffer(112); - for (int i = 0; i < buffer.size(); i++) { - buffer.set(i, "valu" + (i % 5)); - } - for (int i = 0; i < buffer.size(); i++) { - buffer.set(i, "value" + (i % 5)); - } - - CategoricalBuffer buffer2 = Buffers.categoricalBuffer(112); - for (int i = 0; i < buffer2.size(); i++) { - buffer2.set(i, "val" + (i % 7)); - } - buffer2.set(42, null); - Table table = Builders.newTableBuilder(112).add("first", buffer.toColumn(ColumnTypes.NOMINAL)) - .add("second", buffer2.toColumn(ColumnTypes.NOMINAL)) - .build(Belt.defaultContext()); - - ExampleSet set = com.rapidminer.belt.table.BeltConverter.convert(new IOTable(table), CONTEXT); - - String[][] expected = readTableToStringArray(table); - String[][] result = readExampleSetToStringArray(set); - assertArrayEquals(expected, result); - } - - @Test - public void testManyColumns() { - TableBuilder builder = Builders.newTableBuilder(11); - for (int i = 0; i < 30; i++) { - builder.addReal("real" + i, j -> 3 * j / 5.0).addInt("int" + i, j -> 5 * j); - } - Table table = builder.build(Belt.defaultContext()); - - ExampleSet set = com.rapidminer.belt.table.BeltConverter.convert(new IOTable(table), CONTEXT); - - double[][] expected = readTableToArray(table); - double[][] result = readExampleSetToArray(set); - assertArrayEquals(expected, result); - } - - @Test - public void testRoles() { - TableBuilder builder = Builders.newTableBuilder(10); - builder.addInt("att-1", i -> i); - - ColumnRole[] columnRoles = new ColumnRole[]{ColumnRole.ID, ColumnRole.LABEL, ColumnRole.PREDICTION, - ColumnRole.SCORE, ColumnRole.WEIGHT, ColumnRole.OUTLIER, ColumnRole.CLUSTER, ColumnRole.BATCH, - ColumnRole.METADATA}; - for (int i = 0; i < columnRoles.length; i++) { - builder.addReal("att" + i, j -> j); - builder.addMetaData("att" + i, columnRoles[i]); - } - - builder.addInt("batt1", i -> i); - builder.addMetaData("batt1", ColumnRole.METADATA); - builder.addMetaData("batt1", new com.rapidminer.belt.table.LegacyRole("ignore-me")); - - builder.addInt("batt2", i -> i); - builder.addMetaData("batt2", ColumnRole.SCORE); - builder.addMetaData("batt2", new com.rapidminer.belt.table.LegacyRole("confidence_Yes")); - - Table table = builder.build(Belt.defaultContext()); - - ExampleSet set = com.rapidminer.belt.table.BeltConverter.convert(new IOTable(table), CONTEXT); - - Iterable iterable = () -> set.getAttributes().allAttributeRoles(); - String[] result = StreamSupport.stream(iterable.spliterator(), false).map(AttributeRole::getSpecialName) - .toArray(String[]::new); - String[] expected = - new String[]{null, Attributes.ID_NAME, Attributes.LABEL_NAME, Attributes.PREDICTION_NAME, - Attributes.CONFIDENCE_NAME, Attributes.WEIGHT_NAME, Attributes.OUTLIER_NAME, - Attributes.CLUSTER_NAME, Attributes.BATCH_NAME, "meta_data", "ignore-me", - "confidence_Yes"}; - assertArrayEquals(expected, result); - } - - @Test - public void testTypes() { - TableBuilder builder = Builders.newTableBuilder(10); - builder.addReal("att1", i -> i); - - builder.addReal("att2", i -> i); - builder.addMetaData("att2", com.rapidminer.belt.table.LegacyType.NUMERICAL); - - builder.addInt("att3", i -> i); - - builder.addInt("att4", i -> i); - builder.addMetaData("att4", com.rapidminer.belt.table.LegacyType.NUMERICAL); - - builder.addDateTime("att5", i -> Instant.EPOCH); - - builder.addDateTime("att6", i -> Instant.EPOCH); - builder.addMetaData("att6", com.rapidminer.belt.table.LegacyType.DATE); - - builder.addDateTime("att6.5", i -> Instant.EPOCH); - builder.addMetaData("att6.5", com.rapidminer.belt.table.LegacyType.TIME); - - builder.addTime("att7", i -> LocalTime.NOON); - - builder.addTime("att7.5", i -> LocalTime.NOON); - builder.addMetaData("att7.5", com.rapidminer.belt.table.LegacyType.NUMERICAL); - - builder.addNominal("att8", i -> i % 2 == 0 ? "A" : "B"); - - builder.addNominal("att9", i -> i % 2 == 0 ? "A" : "B", 2); - - builder.addNominal("att10", i -> i % 2 == 0 ? "A" : "B"); - builder.addMetaData("att10", com.rapidminer.belt.table.LegacyType.BINOMINAL); - - builder.addNominal("att11", i -> i % 2 == 0 ? "A" : "B", 2); - builder.addMetaData("att11", com.rapidminer.belt.table.LegacyType.STRING); - - builder.addNominal("att12", i -> i % 2 == 0 ? "A" : "B"); - builder.addMetaData("att12", com.rapidminer.belt.table.LegacyType.FILE_PATH); - - builder.addNominal("att13", i -> i % 2 == 0 ? "A" : "B", 2); - builder.addMetaData("att13", com.rapidminer.belt.table.LegacyType.NOMINAL); - - builder.addBoolean("att14", i -> i % 2 == 0 ? "A" : "B", "A", ColumnTypes.NOMINAL); - - Table table = builder.build(Belt.defaultContext()); - - ExampleSet set = com.rapidminer.belt.table.BeltConverter.convert(new IOTable(table), CONTEXT); - - int[] result = - StreamSupport.stream(set.getAttributes().spliterator(), false).mapToInt(Attribute::getValueType) - .toArray(); - int[] expected = new int[]{Ontology.REAL, Ontology.NUMERICAL, Ontology.INTEGER, Ontology.NUMERICAL, - Ontology.DATE_TIME, Ontology.DATE, Ontology.TIME, Ontology.INTEGER, Ontology.NUMERICAL, Ontology.POLYNOMINAL, Ontology.POLYNOMINAL, - Ontology.BINOMINAL, Ontology.STRING, Ontology.FILE_PATH, Ontology.NOMINAL, Ontology.BINOMINAL}; - - assertArrayEquals(expected, result); - } - - @Test - public void testInvalidLegacyTypes() { - TableBuilder builder = Builders.newTableBuilder(10); - builder.addReal("att1", i -> i); - builder.addMetaData("att1", com.rapidminer.belt.table.LegacyType.DATE_TIME); - - builder.addReal("att2", i -> i); - builder.addMetaData("att2", com.rapidminer.belt.table.LegacyType.INTEGER); - - builder.addInt("att3", i -> i); - builder.addMetaData("att3", com.rapidminer.belt.table.LegacyType.REAL); - - builder.addInt("att4", i -> i); - builder.addMetaData("att4", com.rapidminer.belt.table.LegacyType.NOMINAL); - - builder.addNominal("att5", i -> i % 2 == 0 ? "A" : i % 3 == 0 ? "B" : "C", 2); - builder.addMetaData("att5", com.rapidminer.belt.table.LegacyType.BINOMINAL); - - builder.addTime("att6", i -> LocalTime.NOON); - builder.addMetaData("att6", com.rapidminer.belt.table.LegacyType.TIME); - - builder.addTime("att7", i -> LocalTime.NOON); - builder.addMetaData("att7", com.rapidminer.belt.table.LegacyType.DATE); - - builder.addTime("att8", i -> LocalTime.NOON); - builder.addMetaData("att8", com.rapidminer.belt.table.LegacyType.DATE_TIME); - - builder.addDateTime("att9", i -> Instant.EPOCH); - builder.addMetaData("att9", com.rapidminer.belt.table.LegacyType.NOMINAL); - - Table table = builder.build(Belt.defaultContext()); - - ExampleSet set = com.rapidminer.belt.table.BeltConverter.convert(new IOTable(table), CONTEXT); - - int[] result = - StreamSupport.stream(set.getAttributes().spliterator(), false).mapToInt(Attribute::getValueType) - .toArray(); - int[] expected = new int[]{Ontology.REAL, Ontology.REAL, Ontology.INTEGER, Ontology.INTEGER, - Ontology.POLYNOMINAL, Ontology.INTEGER, Ontology.INTEGER, Ontology.INTEGER, Ontology.DATE_TIME}; - assertArrayEquals(expected, result); - } - - @Test - public void testAnnotations() { - Table table = Builders.newTableBuilder(11).addReal("real", i -> 3 * i / 5.0).addInt("int", i -> 5 * i) - .build(Belt.defaultContext()); - - IOTable tableObject = new IOTable(table); - tableObject.getAnnotations().setAnnotation(Annotations.KEY_DC_AUTHOR, "gmeier"); - - ExampleSet set = com.rapidminer.belt.table.BeltConverter.convert(tableObject, CONTEXT); - - assertEquals(tableObject.getAnnotations(), set.getAnnotations()); - } - - - @Test(expected = BeltConverter.ConversionException.class) - public void testCustomColumns() { - ColumnType customType = ColumnTypes.categoricalType("com.rapidminer.custom.integer", - Integer.class, null); - Table table = Builders.newTableBuilder(11).addReal("real", i -> 3 * i / 5.0).addInt("int", i -> 5 * i) - .addCategorical("custom", i -> i, customType) - .build(Belt.defaultContext()); - - IOTable tableObject = new IOTable(table); - try { - BeltConverter.convert(tableObject, CONTEXT); - } catch (BeltConverter.ConversionException e) { - assertEquals("custom", e.getColumnName()); - assertEquals(customType, e.getType()); - throw e; - } - } - } - - @RunWith(Parameterized.class) - public static class InputDetection { - - @Parameter - public String inputType; - - @Parameter(1) - public ExampleSet input; - - @Parameters(name = "{0}") - public static Iterable inputClasses() throws ExpressionEvaluationException { - Attribute attribute1 = attributeInt(); - Attribute attribute2 = attributeReal(); - - // Simple example set with no logic in the view - ExampleSet simpleSet = ExampleSets.from(attribute1, attribute2) - .withBlankSize(200) - .withColumnFiller(attribute1, i -> i + 1) - .withColumnFiller(attribute2, i -> i + 1.7) - .build(); - - // Complex example sets that are considered thread-safe - ExampleSet conditionedSet = new ConditionedExampleSet(simpleSet, new Condition() { - @Override - public boolean conditionOk(Example example) throws ExpressionEvaluationException { - return example.getValue(attribute1) < 100 && example.getValue(attribute2) < 100; - } - - @Override - public Condition duplicate() { - return null; - } - }); - - ExampleSet sortedSet = new SortedExampleSet(simpleSet, attribute1, SortedExampleSet.DECREASING); - - ExampleSet mappedSet = new MappedExampleSet(simpleSet, new int[]{ - 133, 156, 16, 0, 20, 199, 29, 192, - 185, 33, 175, 58, 15, 100, 2, 68, - 9, 122, 87, 84, 64, 56, 83, 177, - 39, 90, 112, 66, 90, 17, 95, 25} - ); - - // Complex example set that is not considered thread-safe - Partition partition = new Partition(new double[]{0.25, 0.5, 0.25}, 200, new SimplePartitionBuilder()); - SplittedExampleSet splittedSet = new SplittedExampleSet(simpleSet, partition); - - // Simple example set with a attribute transformation that is considered unsafe - ExampleSet transformationSet = (ExampleSet) simpleSet.clone(); - Attribute clonedAttribute = transformationSet.getAttributes().get(attribute1.getName()); - clonedAttribute.addTransformation(new AttributeTransformation() { - @Override - public double transform(Attribute attribute, double value) { - return value * 42; - } - - @Override - public double inverseTransform(Attribute attribute, double value) { - throw new UnsupportedOperationException(); - } - - @Override - public boolean isReversable() { - return false; - } - - @Override - public Object clone() { - return this; - } - }); - - return Arrays.asList(new Object[][] { - {simpleSet.getClass().getSimpleName(), simpleSet}, - {conditionedSet.getClass().getSimpleName(), conditionedSet}, - {sortedSet.getClass().getSimpleName(), sortedSet}, - {mappedSet.getClass().getSimpleName(), mappedSet}, - {splittedSet.getClass().getSimpleName(), splittedSet}, - {AttributeTransformation.class.getSimpleName(), transformationSet} - }); - } - - @Test - public void testInputs() { - Table table = com.rapidminer.belt.table.BeltConverter.convert(input, CONTEXT).getTable(); - double[][] result = readTableToArray(table); - double[][] expected = readExampleSetToArray(input); - assertArrayEquals(expected, result); - } - - } - - @RunWith(Parameterized.class) - public static class BackAndForth { - - @BeforeClass - public static void setup() { - RapidAssert.ASSERTER_REGISTRY.registerAllAsserters(new AsserterFactoryRapidMiner()); - } - - public BackAndForth(boolean legacyMode) { - ParameterService.setParameterValue(RapidMiner.PROPERTY_RAPIDMINER_SYSTEM_LEGACY_DATA_MGMT, - String.valueOf(legacyMode)); - } - - @Parameters(name = "legacyMode={0}") - public static Collection params() { - return Arrays.asList(true, false); - } - - @Test - public void testAllTypes() { - List attributes = new ArrayList<>(); - for (int i = 1; i < Ontology.VALUE_TYPE_NAMES.length; i++) { - attributes.add(AttributeFactory.createAttribute(i)); - } - ExampleSet set = ExampleSets.from(attributes) - .build(); - - Table table = com.rapidminer.belt.table.BeltConverter.convert(set, CONTEXT).getTable(); - ExampleSet backSet = com.rapidminer.belt.table.BeltConverter.convert(new IOTable(table), CONTEXT); - RapidAssert.assertEquals(set, backSet); - } - - @Test - public void testAllTypesView() { - List attributes = new ArrayList<>(); - for (int i = 1; i < Ontology.VALUE_TYPE_NAMES.length; i++) { - attributes.add(AttributeFactory.createAttribute(i)); - } - ExampleSet set = new SortedExampleSet(ExampleSets.from(attributes) - .build(), attributes.get(1), SortedExampleSet.DECREASING);; - - Table table = com.rapidminer.belt.table.BeltConverter.convert(set, CONTEXT).getTable(); - ExampleSet backSet = com.rapidminer.belt.table.BeltConverter.convert(new IOTable(table), CONTEXT); - RapidAssert.assertEquals(set, backSet); - } - - @Test - public void testRoles() { - Attribute integer = attributeInt(); - Attribute animals = attributeDogCatMouse(); - Attribute real = attributeReal(); - Attribute answer = attributeYesNo(); - List attributes = Arrays.asList(integer, animals, real, answer); - - ExampleSet set = ExampleSets.from(attributes).withBlankSize(10) - .withRole(integer, Attributes.CONFIDENCE_NAME+"_"+"Yes") - .withRole(answer, Attributes.LABEL_NAME) - .withRole(animals, "someStupidRole").build(); - - Table table = com.rapidminer.belt.table.BeltConverter.convert(set, CONTEXT).getTable(); - ExampleSet backSet = com.rapidminer.belt.table.BeltConverter.convert(new IOTable(table), CONTEXT); - RapidAssert.assertEquals(set, backSet); - } - - @Test - public void testNumericTypes() { - Attribute numeric = AttributeFactory.createAttribute("numeric", Ontology.NUMERICAL); - Attribute real = AttributeFactory.createAttribute("real", Ontology.REAL); - Attribute integer = AttributeFactory.createAttribute("integer", Ontology.INTEGER); - Attribute dateTime = AttributeFactory.createAttribute("date_time", Ontology.DATE_TIME); - Attribute date = AttributeFactory.createAttribute("date", Ontology.DATE); - Attribute time = AttributeFactory.createAttribute("time", Ontology.TIME); - List attributes =Arrays.asList(numeric, real, integer, dateTime, date, time); - ExampleSet set = ExampleSets.from(attributes).withBlankSize(150) - .withColumnFiller(numeric, i -> Math.random() > 0.7 ? Double.NaN : Math.random()) - .withColumnFiller(real, i -> Math.random() > 0.7 ? Double.NaN : 42 + Math.random()) - .withColumnFiller(integer, i -> Math.random() > 0.7 ? Double.NaN : Math.round(Math.random() * 100)) - .withColumnFiller(dateTime, i -> Math.random() > 0.7 ? Double.NaN : (i % 3 == 0 ? -1 : 1) - * 1515410698d + Math.floor(Math.random() * 1000)) - .withColumnFiller(date, i -> Math.random() > 0.7 ? Double.NaN : (i % 3 == 0 ? -1 : 1) * - 230169600000d + Math.floor(Math.random() * 100) * 1000d * 60 * 60 * 24) - .withColumnFiller(time, i -> Math.random() > 0.7 ? Double.NaN : - (i % 3 == 0 ? -1 : 1) * Math.floor(Math.random() * 60 * 60 * 24 * 1000)) - .build(); - - Table table = com.rapidminer.belt.table.BeltConverter.convert(set, CONTEXT).getTable(); - ExampleSet backSet = com.rapidminer.belt.table.BeltConverter.convert(new IOTable(table), CONTEXT); - RapidAssert.assertEquals(set, backSet); - } - - @Test - public void testNominalTypes() { - Attribute nominal = AttributeFactory.createAttribute("nominal", Ontology.NOMINAL); - Attribute string = AttributeFactory.createAttribute("string", Ontology.STRING); - Attribute polynominal = AttributeFactory.createAttribute("polynominal", Ontology.POLYNOMINAL); - Attribute binominal = AttributeFactory.createAttribute("binominal", Ontology.BINOMINAL); - Attribute path = AttributeFactory.createAttribute("path", Ontology.FILE_PATH); - for (int i = 0; i < 5; i++) { - nominal.getMapping().mapString("nominalValue" + i); - } - for (int i = 0; i < 4; i++) { - string.getMapping().mapString("veryVeryLongStringValue" + i); - } - for (int i = 0; i < 6; i++) { - polynominal.getMapping().mapString("polyValue" + i); - } - for (int i = 0; i < 2; i++) { - binominal.getMapping().mapString("binominalValue" + i); - } - for (int i = 0; i < 3; i++) { - path.getMapping().mapString("//folder/sufolder/subsubfolder/file" + i); - } - - List attributes = Arrays.asList(nominal, string, polynominal, binominal, path); - Random random = new Random(); - ExampleSet set = ExampleSets.from(attributes).withBlankSize(50) - .withColumnFiller(nominal, i -> random.nextDouble() > 0.7 ? Double.NaN : random.nextInt(5)) - .withColumnFiller(string, i -> random.nextDouble() > 0.7 ? Double.NaN : random.nextInt(4)) - .withColumnFiller(polynominal, i -> random.nextDouble() > 0.7 ? Double.NaN : random.nextInt(6)) - .withColumnFiller(binominal, i -> random.nextDouble() > 0.7 ? Double.NaN : random.nextInt(2)) - .withColumnFiller(path, i -> random.nextDouble() > 0.7 ? Double.NaN : random.nextInt(3)) - .build(); - - Table table = com.rapidminer.belt.table.BeltConverter.convert(set, CONTEXT).getTable(); - ExampleSet backSet = com.rapidminer.belt.table.BeltConverter.convert(new IOTable(table), CONTEXT); - RapidAssert.assertEquals(set, backSet); - } - - @Test - public void testIncompleteBinominalTypes() { - Attribute binominalOne = AttributeFactory.createAttribute("binominalOne", Ontology.BINOMINAL); - Attribute binominalZero = AttributeFactory.createAttribute("binominalZero", Ontology.BINOMINAL); - binominalOne.getMapping().mapString("binominalValue" + 1); - - List attributes = Arrays.asList(binominalOne, binominalZero); - Random random = new Random(); - ExampleSet set = ExampleSets.from(attributes).withBlankSize(50) - .withColumnFiller(binominalOne, i -> random.nextDouble() > 0.7 ? Double.NaN : 0) - .withColumnFiller(binominalZero, i -> Double.NaN) - .build(); - - Table table = com.rapidminer.belt.table.BeltConverter.convert(set, CONTEXT).getTable(); - ExampleSet backSet = com.rapidminer.belt.table.BeltConverter.convert(new IOTable(table), CONTEXT); - RapidAssert.assertEquals(set, backSet); - } - - - } - - @RunWith(Parameterized.class) - public static class TableToHeaderSet { - - @BeforeClass - public static void setup() { - RapidAssert.ASSERTER_REGISTRY.registerAllAsserters(new AsserterFactoryRapidMiner()); - } - - public TableToHeaderSet(boolean legacyMode) { - ParameterService.setParameterValue(RapidMiner.PROPERTY_RAPIDMINER_SYSTEM_LEGACY_DATA_MGMT, - String.valueOf(legacyMode)); - } - - @Parameters(name = "legacyMode={0}") - public static Collection params() { - return Arrays.asList(true, false); - } - - - @Test - public void testNominalTypes() { - Attribute nominal = AttributeFactory.createAttribute("nominal", Ontology.NOMINAL); - Attribute string = AttributeFactory.createAttribute("string", Ontology.STRING); - Attribute polynominal = AttributeFactory.createAttribute("polynominal", Ontology.POLYNOMINAL); - Attribute binominal = AttributeFactory.createAttribute("binominal", Ontology.BINOMINAL); - Attribute path = AttributeFactory.createAttribute("path", Ontology.FILE_PATH); - for (int i = 0; i < 5; i++) { - nominal.getMapping().mapString("nominalValue" + i); - } - for (int i = 0; i < 4; i++) { - string.getMapping().mapString("veryVeryLongStringValue" + i); - } - for (int i = 0; i < 6; i++) { - polynominal.getMapping().mapString("polyValue" + i); - } - for (int i = 0; i < 2; i++) { - binominal.getMapping().mapString("binominalValue" + i); - } - for (int i = 0; i < 3; i++) { - path.getMapping().mapString("//folder/sufolder/subsubfolder/file" + i); - } - - List attributes = Arrays.asList(nominal, string, polynominal, binominal, path); - Random random = new Random(); - ExampleSet set = ExampleSets.from(attributes).withBlankSize(50) - .withColumnFiller(nominal, i -> random.nextDouble() > 0.7 ? Double.NaN : random.nextInt(5)) - .withColumnFiller(string, i -> random.nextDouble() > 0.7 ? Double.NaN : random.nextInt(4)) - .withColumnFiller(polynominal, i -> random.nextDouble() > 0.7 ? Double.NaN : random.nextInt(6)) - .withColumnFiller(binominal, i -> random.nextDouble() > 0.7 ? Double.NaN : random.nextInt(2)) - .withColumnFiller(path, i -> random.nextDouble() > 0.7 ? Double.NaN : random.nextInt(3)) - .build(); - - Table table = com.rapidminer.belt.table.BeltConverter.convert(set, CONTEXT).getTable(); - - HeaderExampleSet headerSet = com.rapidminer.belt.table.BeltConverter.convertHeader(table); - - int[] oldValueTypes = Arrays.stream(set.getAttributes().createRegularAttributeArray()) - .mapToInt(Attribute::getValueType).toArray(); - int[] headerValueTypes = Arrays.stream(headerSet.getAttributes().createRegularAttributeArray()) - .mapToInt(Attribute::getValueType).toArray(); - assertArrayEquals(oldValueTypes, headerValueTypes); - - ExampleSet remapped = RemappedExampleSet.create(set, headerSet, false, true); - - String[][] expected = readTableToStringArray(table); - String[][] result = readExampleSetToStringArray(remapped); - assertArrayEquals(expected, result); - - double[][] expectedMapping = readTableToArray(table); - double[][] resultMapping = readExampleSetToArray(remapped); - assertArrayEquals(expectedMapping, resultMapping); - } - - @Test - public void testNumericTypes() { - Attribute numeric = AttributeFactory.createAttribute("numeric", Ontology.NUMERICAL); - Attribute real = AttributeFactory.createAttribute("real", Ontology.REAL); - Attribute integer = AttributeFactory.createAttribute("integer", Ontology.INTEGER); - Attribute dateTime = AttributeFactory.createAttribute("date_time", Ontology.DATE_TIME); - Attribute date = AttributeFactory.createAttribute("date", Ontology.DATE); - Attribute time = AttributeFactory.createAttribute("time", Ontology.TIME); - List attributes = Arrays.asList(numeric, real, integer, dateTime, date, time); - ExampleSet set = ExampleSets.from(attributes).withBlankSize(50).withRole(integer, Attributes.LABEL_NAME).build(); - - Table table = com.rapidminer.belt.table.BeltConverter.convert(set, CONTEXT).getTable(); - - HeaderExampleSet headerExampleSet = com.rapidminer.belt.table.BeltConverter.convertHeader(table); - - int[] oldValueTypes = Arrays.stream(set.getAttributes().createRegularAttributeArray()) - .mapToInt(Attribute::getValueType).toArray(); - int[] headerValueTypes = Arrays.stream(headerExampleSet.getAttributes().createRegularAttributeArray()) - .mapToInt(Attribute::getValueType).toArray(); - assertArrayEquals(oldValueTypes, headerValueTypes); - } - - @Test - public void testRemappingSame() { - CategoricalBuffer buffer = BufferAccessor.get().newUInt16Buffer(112); - for (int i = 0; i < buffer.size(); i++) { - buffer.set(i, "value" + (i % 5)); - } - CategoricalBuffer buffer2 = BufferAccessor.get().newUInt16Buffer(112); - for (int i = 0; i < buffer2.size(); i++) { - buffer2.set(i, "val" + (i % 7)); - } - buffer2.set(42, null); - Table table = Builders.newTableBuilder(112).add("first", buffer.toColumn(ColumnTypes.NOMINAL)) - .add("second", buffer2.toColumn(ColumnTypes.NOMINAL)) - .build(Belt.defaultContext()); - - ExampleSet set = com.rapidminer.belt.table.BeltConverter.convert(new IOTable(table), CONTEXT); - - HeaderExampleSet remappingSet = com.rapidminer.belt.table.BeltConverter.convertHeader(table); - - ExampleSet remapped = RemappedExampleSet.create(set, remappingSet, false, true); - - String[][] expected = readTableToStringArray(table); - String[][] result = readExampleSetToStringArray(remapped); - assertArrayEquals(expected, result); - - double[][] expectedMapping = readTableToArray(table); - double[][] resultMapping = readExampleSetToArray(remapped); - assertArrayEquals(expectedMapping, resultMapping); - } - - @Test - public void testRemappingUnusedValue() { - CategoricalBuffer buffer = BufferAccessor.get().newUInt16Buffer(112); - for (int i = 0; i < buffer.size(); i++) { - buffer.set(i, "valu" + (i % 5)); - } - for (int i = 0; i < buffer.size(); i++) { - buffer.set(i, "value" + (i % 5)); - } - - CategoricalBuffer buffer2 = BufferAccessor.get().newInt32Buffer(112); - for (int i = 0; i < buffer2.size(); i++) { - buffer2.set(i, "val" + (i % 7)); - } - buffer2.set(42, null); - Table table = Builders.newTableBuilder(112).add("first", buffer.toColumn(ColumnTypes.NOMINAL)) - .add("second", buffer2.toColumn(ColumnTypes.NOMINAL)) - .build(Belt.defaultContext()); - - buffer = BufferAccessor.get().newUInt16Buffer(112); - for (int i = 0; i < buffer.size(); i++) { - buffer.set(i, "value" + (i % 5)); - } - - Table table2 = Builders.newTableBuilder(112).add("first", buffer.toColumn(ColumnTypes.NOMINAL)) - .add("second", buffer2.toColumn(ColumnTypes.NOMINAL)) - .build(Belt.defaultContext()); - - ExampleSet set = com.rapidminer.belt.table.BeltConverter.convert(new IOTable(table), CONTEXT); - - HeaderExampleSet remappingSet = com.rapidminer.belt.table.BeltConverter.convertHeader(table2); - - ExampleSet remapped = RemappedExampleSet.create(set, remappingSet, false, true); - - String[][] expected = readTableToStringArray(table); - String[][] result = readExampleSetToStringArray(remapped); - assertArrayEquals(expected, result); - - double[][] expectedMapping = readTableToArray(table2); - double[][] resultMapping = readExampleSetToArray(remapped); - assertArrayEquals(expectedMapping, resultMapping); - } - - - @Test(expected = BeltConverter.ConversionException.class) - public void testCustomColumns() { - ColumnType customType = ColumnTypes.categoricalType("com.rapidminer.custom.integer", - Integer.class, null); - Table table = Builders.newTableBuilder(11).addReal("real", i -> 3 * i / 5.0).addInt("int", i -> 5 * i) - .addCategorical("custom", i -> i, customType) - .build(Belt.defaultContext()); - try { - BeltConverter.convertHeader(table); - } catch (BeltConverter.ConversionException e) { - assertEquals("custom", e.getColumnName()); - assertEquals(customType, e.getType()); - throw e; - } - } - } - - @RunWith(Parameterized.class) - public static class TableToSetSequentially { - - public TableToSetSequentially(boolean legacyMode) { - ParameterService.setParameterValue(RapidMiner.PROPERTY_RAPIDMINER_SYSTEM_LEGACY_DATA_MGMT, - String.valueOf(legacyMode)); - } - - @Parameters(name = "legacyMode={0}") - public static Collection params() { - return Arrays.asList(true, false); - } - - @Test - public void testSimple() { - Table table = Builders.newTableBuilder(112).addReal("real", i -> 3 * i / 5.0).addInt("int", i -> 5 * i) - .build(Belt.defaultContext()); - - ExampleSet set = com.rapidminer.belt.table.BeltConverter.convertSequentially(new IOTable(table)); - - double[][] expected = readTableToArray(table); - double[][] result = readExampleSetToArray(set); - assertArrayEquals(expected, result); - } - - - @Test - public void testManyColumns() { - TableBuilder builder = Builders.newTableBuilder(11); - for (int i = 0; i < 30; i++) { - builder.addReal("real" + i, j -> 3 * j / 5.0).addInt("int" + i, j -> 5 * j); - } - Table table = builder.build(Belt.defaultContext()); - - ExampleSet set = com.rapidminer.belt.table.BeltConverter.convertSequentially(new IOTable(table)); - - double[][] expected = readTableToArray(table); - double[][] result = readExampleSetToArray(set); - assertArrayEquals(expected, result); - } - - @Test - public void testRoles() { - TableBuilder builder = Builders.newTableBuilder(10); - builder.addInt("att-1", i -> i); - - ColumnRole[] columnRoles = new ColumnRole[]{ColumnRole.ID, ColumnRole.LABEL, ColumnRole.PREDICTION, - ColumnRole.SCORE, ColumnRole.WEIGHT, ColumnRole.OUTLIER, ColumnRole.CLUSTER, ColumnRole.BATCH, - ColumnRole.METADATA}; - for (int i = 0; i < columnRoles.length; i++) { - builder.addReal("att" + i, j -> j); - builder.addMetaData("att" + i, columnRoles[i]); - } - - builder.addInt("batt1", i -> i); - builder.addMetaData("batt1", ColumnRole.METADATA); - builder.addMetaData("batt1", new com.rapidminer.belt.table.LegacyRole("ignore-me")); - - builder.addInt("batt2", i -> i); - builder.addMetaData("batt2", ColumnRole.SCORE); - builder.addMetaData("batt2", new com.rapidminer.belt.table.LegacyRole("confidence_Yes")); - - Table table = builder.build(Belt.defaultContext()); - - ExampleSet set = com.rapidminer.belt.table.BeltConverter.convertSequentially(new IOTable(table)); - - Iterable iterable = () -> set.getAttributes().allAttributeRoles(); - String[] result = StreamSupport.stream(iterable.spliterator(), false).map(AttributeRole::getSpecialName) - .toArray(String[]::new); - String[] expected = - new String[]{null, Attributes.ID_NAME, Attributes.LABEL_NAME, Attributes.PREDICTION_NAME, - Attributes.CONFIDENCE_NAME, Attributes.WEIGHT_NAME, Attributes.OUTLIER_NAME, - Attributes.CLUSTER_NAME, Attributes.BATCH_NAME, "meta_data", "ignore-me", - "confidence_Yes"}; - assertArrayEquals(expected, result); - } - - @Test - public void testTypes() { - TableBuilder builder = Builders.newTableBuilder(10); - builder.addReal("att1", i -> i); - - builder.addReal("att2", i -> i); - builder.addMetaData("att2", com.rapidminer.belt.table.LegacyType.NUMERICAL); - - builder.addInt("att3", i -> i); - - builder.addInt("att4", i -> i); - builder.addMetaData("att4", com.rapidminer.belt.table.LegacyType.NUMERICAL); - - builder.addDateTime("att5", i -> Instant.EPOCH); - - builder.addDateTime("att6", i -> Instant.EPOCH); - builder.addMetaData("att6", com.rapidminer.belt.table.LegacyType.DATE); - - builder.addDateTime("att6.5", i -> Instant.EPOCH); - builder.addMetaData("att6.5", com.rapidminer.belt.table.LegacyType.TIME); - - builder.addTime("att7", i -> LocalTime.NOON); - - builder.addTime("att7.5", i -> LocalTime.NOON); - builder.addMetaData("att7.5", com.rapidminer.belt.table.LegacyType.NUMERICAL); - - builder.addNominal("att8", i -> i % 2 == 0 ? "A" : "B"); - - builder.addNominal("att9", i -> i % 2 == 0 ? "A" : "B", 2); - - builder.addNominal("att10", i -> i % 2 == 0 ? "A" : "B"); - builder.addMetaData("att10", com.rapidminer.belt.table.LegacyType.BINOMINAL); - - builder.addNominal("att11", i -> i % 2 == 0 ? "A" : "B", 2); - builder.addMetaData("att11", com.rapidminer.belt.table.LegacyType.STRING); - - builder.addNominal("att12", i -> i % 2 == 0 ? "A" : "B"); - builder.addMetaData("att12", com.rapidminer.belt.table.LegacyType.FILE_PATH); - - builder.addNominal("att13", i -> i % 2 == 0 ? "A" : "B", 2); - builder.addMetaData("att13", com.rapidminer.belt.table.LegacyType.NOMINAL); - - builder.addBoolean("att14", i -> i % 2 == 0 ? "A" : "B", "A", ColumnTypes.NOMINAL); - - Table table = builder.build(Belt.defaultContext()); - - ExampleSet set = com.rapidminer.belt.table.BeltConverter.convertSequentially(new IOTable(table)); - - int[] result = - StreamSupport.stream(set.getAttributes().spliterator(), false).mapToInt(Attribute::getValueType) - .toArray(); - int[] expected = new int[]{Ontology.REAL, Ontology.NUMERICAL, Ontology.INTEGER, Ontology.NUMERICAL, - Ontology.DATE_TIME, Ontology.DATE, Ontology.TIME, Ontology.INTEGER, Ontology.NUMERICAL, Ontology.POLYNOMINAL, Ontology.POLYNOMINAL, - Ontology.BINOMINAL, Ontology.STRING, Ontology.FILE_PATH, Ontology.NOMINAL, Ontology.BINOMINAL}; - - assertArrayEquals(expected, result); - } - - @Test - public void testAnnotations() { - Table table = Builders.newTableBuilder(11).addReal("real", i -> 3 * i / 5.0).addInt("int", i -> 5 * i) - .build(Belt.defaultContext()); - - IOTable tableObject = new IOTable(table); - tableObject.getAnnotations().setAnnotation(Annotations.KEY_DC_AUTHOR, "gmeier"); - - ExampleSet set = BeltConverter.convertSequentially(tableObject); - - assertEquals(tableObject.getAnnotations(), set.getAnnotations()); - } - - @Test(expected = BeltConverter.ConversionException.class) - public void testCustomColumns() { - ColumnType customType = ColumnTypes.categoricalType("com.rapidminer.custom.integer", - Integer.class, null); - Table table = Builders.newTableBuilder(11).addReal("real", i -> 3 * i / 5.0).addInt("int", i -> 5 * i) - .addCategorical("custom", i -> i, customType) - .build(Belt.defaultContext()); - try { - BeltConverter.convertSequentially(new IOTable(table)); - } catch (BeltConverter.ConversionException e) { - assertEquals("custom", e.getColumnName()); - assertEquals(customType, e.getType()); - throw e; - } - } - } - - public static Attribute attributeDogCatMouse() { - Attribute a = AttributeFactory.createAttribute("animal", Ontology.NOMINAL); - a.getMapping().mapString("dog"); - a.getMapping().mapString("cat"); - a.getMapping().mapString("mouse"); - return a; - } - - public static Attribute attributeYesNo() { - Attribute a = AttributeFactory.createAttribute("decision", Ontology.NOMINAL); - a.getMapping().mapString("no"); - a.getMapping().mapString("yes"); - return a; - } - - public static Attribute attributeInt() { - return AttributeFactory.createAttribute("integer", Ontology.INTEGER); - } - - public static Attribute attributeReal() { - return AttributeFactory.createAttribute("real", Ontology.REAL); - } - - public static Attribute attributeReal(int index) { - return AttributeFactory.createAttribute("real" + index, Ontology.REAL); - } -} +/** + * Copyright (C) 2001-2020 by RapidMiner and the contributors + * + * Complete list of developers available at our web site: + * + * http://rapidminer.com + * + * This program is free software: you can redistribute it and/or modify it under the terms of the + * GNU Affero General Public License as published by the Free Software Foundation, either version 3 + * of the License, or (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY; without + * even the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * Affero General Public License for more details. + * + * You should have received a copy of the GNU Affero General Public License along with this program. + * If not, see http://www.gnu.org/licenses/. + */ +package com.rapidminer.belt.table; + +import static org.junit.Assert.assertArrayEquals; +import static org.junit.Assert.assertEquals; +import static org.junit.Assert.assertFalse; +import static org.junit.Assert.assertNull; +import static org.junit.Assert.assertTrue; + +import java.time.Instant; +import java.time.LocalTime; +import java.util.ArrayList; +import java.util.Arrays; +import java.util.Collection; +import java.util.Collections; +import java.util.List; +import java.util.Locale; +import java.util.Objects; +import java.util.Random; +import java.util.concurrent.Callable; +import java.util.concurrent.ExecutionException; +import java.util.concurrent.ForkJoinPool; +import java.util.concurrent.ForkJoinTask; +import java.util.concurrent.Future; +import java.util.stream.StreamSupport; + +import org.junit.BeforeClass; +import org.junit.Test; +import org.junit.experimental.runners.Enclosed; +import org.junit.runner.RunWith; +import org.junit.runners.Parameterized; +import org.junit.runners.Parameterized.Parameter; +import org.junit.runners.Parameterized.Parameters; + +import com.rapidminer.RapidMiner; +import com.rapidminer.adaption.belt.IOTable; +import com.rapidminer.belt.buffer.Buffers; +import com.rapidminer.belt.buffer.NominalBuffer; +import com.rapidminer.belt.column.Column; +import com.rapidminer.belt.column.ColumnType; +import com.rapidminer.belt.column.Columns; +import com.rapidminer.belt.column.Dictionary; +import com.rapidminer.belt.column.type.StringSet; +import com.rapidminer.belt.reader.CategoricalReader; +import com.rapidminer.belt.reader.NumericReader; +import com.rapidminer.belt.reader.Readers; +import com.rapidminer.belt.util.Belt; +import com.rapidminer.belt.util.ColumnAnnotation; +import com.rapidminer.belt.util.ColumnMetaData; +import com.rapidminer.belt.util.ColumnReference; +import com.rapidminer.belt.util.ColumnRole; +import com.rapidminer.core.concurrency.ConcurrencyContext; +import com.rapidminer.core.concurrency.ExecutionStoppedException; +import com.rapidminer.example.Attribute; +import com.rapidminer.example.AttributeRole; +import com.rapidminer.example.AttributeTransformation; +import com.rapidminer.example.Attributes; +import com.rapidminer.example.Example; +import com.rapidminer.example.ExampleSet; +import com.rapidminer.example.ExampleSetFactory; +import com.rapidminer.example.set.Condition; +import com.rapidminer.example.set.ConditionedExampleSet; +import com.rapidminer.example.set.HeaderExampleSet; +import com.rapidminer.example.set.MappedExampleSet; +import com.rapidminer.example.set.Partition; +import com.rapidminer.example.set.RemappedExampleSet; +import com.rapidminer.example.set.SimplePartitionBuilder; +import com.rapidminer.example.set.SortedExampleSet; +import com.rapidminer.example.set.SplittedExampleSet; +import com.rapidminer.example.table.AttributeFactory; +import com.rapidminer.example.table.NominalMapping; +import com.rapidminer.example.utils.ExampleSetBuilder; +import com.rapidminer.example.utils.ExampleSets; +import com.rapidminer.operator.Annotations; +import com.rapidminer.operator.tools.ExpressionEvaluationException; +import com.rapidminer.test.asserter.AsserterFactoryRapidMiner; +import com.rapidminer.test_utils.RapidAssert; +import com.rapidminer.tools.Ontology; +import com.rapidminer.tools.ParameterService; + + +/** + * Tests the {@link com.rapidminer.belt.table.BeltConverter}. + * + * @author Gisa Meier + */ +@RunWith(Enclosed.class) +public class BeltConverterTest { + + /** + * Meta data used for testing. + */ + private static class TestMetaData implements ColumnMetaData { + + private final String someParameter; + + public TestMetaData(String someParameter) { + this.someParameter = someParameter; + } + + @Override + public boolean equals(Object other) { + if (this == other) { + return true; + } + if (other == null || getClass() != other.getClass()) { + return false; + } + TestMetaData that = (TestMetaData) other; + return Objects.equals(someParameter, that.someParameter); + } + + @Override + public String type() { + return "com.rapidminer.belt.meta.column.testmetadata"; + } + + @Override + public Uniqueness uniqueness() { + return Uniqueness.NONE; + } + } + + private static final ConcurrencyContext CONTEXT = new ConcurrencyContext() { + + private ForkJoinPool pool = new ForkJoinPool(Runtime.getRuntime().availableProcessors()); + + @Override + public List> submit(List> callables) throws IllegalArgumentException { + List> futures = new ArrayList<>(); + for (Callable callable : callables) { + futures.add(pool.submit(callable)); + } + return futures; + } + + @Override + public List call(List> callables) + throws ExecutionException, ExecutionStoppedException, IllegalArgumentException { + List> futures = submit(callables); + List results = new ArrayList<>(); + for (Future future : futures) { + try { + results.add(future.get()); + } catch (InterruptedException e) { + throw new RuntimeException("must not happen"); + } + } + return results; + } + + @Override + public void run(List runnables) + throws ExecutionException, ExecutionStoppedException, IllegalArgumentException { + } + + @Override + public List invokeAll(List> tasks) + throws ExecutionException, ExecutionStoppedException, IllegalArgumentException { + return null; + } + + @Override + public T invoke(ForkJoinTask task) + throws ExecutionException, ExecutionStoppedException, IllegalArgumentException { + return null; + } + + @Override + public int getParallelism() { + return pool.getParallelism(); + } + + @Override + public List collectResults(List> futures) + throws ExecutionException, ExecutionStoppedException, IllegalArgumentException { + return null; + } + + @Override + public void checkStatus() throws ExecutionStoppedException { + } + + }; + + private static double[] readColumnToArray(Table table, int column) { + double[] data = new double[table.height()]; + NumericReader reader = Readers.numericReader(table.column(column)); + for (int j = 0; j < table.height(); j++) { + data[j] = reader.read(); + } + return data; + } + + private static String[] readColumnToStringArray(Table table, int column) { + String[] data = new String[table.height()]; + Column col = table.column(column); + List categoricalMapping = ColumnAccessor.get().getDictionaryList(col.getDictionary()); + CategoricalReader reader = Readers.categoricalReader(col); + for (int j = 0; j < table.height(); j++) { + data[j] = categoricalMapping.get(reader.read()); + } + return data; + } + + static double[][] readTableToArray(Table table) { + double[][] result = new double[table.width()][]; + Arrays.setAll(result, i -> readColumnToArray(table, i)); + return result; + } + + private static String[][] readTableToStringArray(Table table) { + String[][] result = new String[table.width()][]; + Arrays.setAll(result, i -> readColumnToStringArray(table, i)); + return result; + } + + static double[][] readExampleSetToArray(ExampleSet set) { + double[][] result = new double[set.getAttributes().size()][]; + int i = 0; + for (Attribute att : set.getAttributes()) { + result[i] = new double[set.size()]; + int j = 0; + for (Example example : set) { + result[i][j] = example.getValue(att); + j++; + } + i++; + } + return result; + } + + private static String[][] readExampleSetToStringArray(ExampleSet set) { + String[][] result = new String[set.getAttributes().size()][]; + int i = 0; + for (Attribute att : set.getAttributes()) { + result[i] = new String[set.size()]; + int j = 0; + for (Example example : set) { + double value = example.getValue(att); + if (Double.isNaN(value)) { + result[i][j] = null; + } else { + result[i][j] = att.getMapping().mapIndex((int) value); + } + j++; + } + i++; + } + return result; + } + + public static class InputValidation { + + @Test(expected = IllegalArgumentException.class) + public void testSetToTableNullSet() { + com.rapidminer.belt.table.BeltConverter.convert((ExampleSet) null, CONTEXT); + } + + @Test(expected = IllegalArgumentException.class) + public void testSetToTableNullContext() { + com.rapidminer.belt.table.BeltConverter.convert(ExampleSetFactory.createExampleSet(new double[][]{new double[]{0}}), null); + } + + @Test(expected = IllegalArgumentException.class) + public void testTableToSetNullTable() { + com.rapidminer.belt.table.BeltConverter.convert((IOTable) null, CONTEXT); + } + + @Test(expected = IllegalArgumentException.class) + public void testTableToSetSequentiallyNullTable() { + com.rapidminer.belt.table.BeltConverter.convertSequentially((IOTable) null); + } + + @Test(expected = IllegalArgumentException.class) + public void testTableToSetNullContext() { + com.rapidminer.belt.table.BeltConverter.convert(new IOTable(Builders.newTableBuilder(1).build(Belt.defaultContext())), null); + } + } + + @RunWith(Parameterized.class) + public static class SetToTable { + + public SetToTable(boolean legacyMode) { + ParameterService.setParameterValue(RapidMiner.PROPERTY_RAPIDMINER_SYSTEM_LEGACY_DATA_MGMT, + String.valueOf(legacyMode)); + } + + @Parameters(name = "legacyMode={0}") + public static Collection params() { + return Arrays.asList(true, false); + } + + @Test + public void testSimple() { + Attribute attribute1 = attributeInt(); + Attribute attribute2 = attributeReal(); + ExampleSet set = ExampleSets.from(attribute1, attribute2).withBlankSize(200) + .withColumnFiller(attribute1, i -> i + 1).withColumnFiller(attribute2, i -> i + 1.7).build(); + Table table = com.rapidminer.belt.table.BeltConverter.convert(set, CONTEXT).getTable(); + + double[][] result = readTableToArray(table); + double[][] expected = readExampleSetToArray(set); + assertArrayEquals(expected, result); + } + + @Test + public void testNominal() { + Attribute attribute1 = attributeDogCatMouse(); + Attribute attribute2 = attributeYesNo(); + ExampleSet set = ExampleSets.from(attribute1, attribute2).withBlankSize(200) + .withColumnFiller(attribute1, i -> i % 3).withColumnFiller(attribute2, i -> i % 2).build(); + set.getExample(10).setValue(attribute1, Double.NaN); + Table table = com.rapidminer.belt.table.BeltConverter.convert(set, CONTEXT).getTable(); + + String[][] result = readTableToStringArray(table); + String[][] expected = readExampleSetToStringArray(set); + assertArrayEquals(expected, result); + } + + @Test + public void testBinominal() { + Attribute attribute1 = AttributeFactory.createAttribute("oneNegative", Ontology.BINOMINAL); + attribute1.getMapping().mapString("one"); + assertEquals("one", attribute1.getMapping().getNegativeString()); + assertNull(attribute1.getMapping().getPositiveString()); + + Attribute attribute2 = AttributeFactory.createAttribute("empty", Ontology.BINOMINAL); + assertNull(attribute2.getMapping().getPositiveString()); + assertNull(attribute2.getMapping().getNegativeString()); + + Attribute attribute3 = AttributeFactory.createAttribute("binominal", Ontology.BINOMINAL); + attribute3.getMapping().mapString("negative"); + attribute3.getMapping().mapString("positive"); + assertEquals("negative", attribute3.getMapping().getNegativeString()); + assertEquals("positive", attribute3.getMapping().getPositiveString()); + + ExampleSet set = ExampleSets.from(attribute1, attribute2, attribute3).withBlankSize(200) + .withColumnFiller(attribute1, i -> i % 2 == 0 ? Double.NaN : 0).withColumnFiller(attribute2, + i -> Double.NaN).withColumnFiller(attribute3, i -> i % 2 == 0 ? Double.NaN : 1).build(); + Table table = com.rapidminer.belt.table.BeltConverter.convert(set, CONTEXT).getTable(); + + String[][] result = readTableToStringArray(table); + String[][] expected = readExampleSetToStringArray(set); + assertArrayEquals(expected, result); + + Dictionary oneNegative = table.column("oneNegative").getDictionary(); + assertTrue(oneNegative.isBoolean()); + assertFalse(oneNegative.hasPositive()); + assertEquals(attribute1.getMapping().getNegativeString(), oneNegative.get(oneNegative.getNegativeIndex())); + assertEquals(1, oneNegative.size()); + + Dictionary empty = table.column("empty").getDictionary(); + assertTrue(empty.isBoolean()); + assertFalse(empty.hasPositive()); + assertFalse(empty.hasNegative()); + assertEquals(0, empty.size()); + + Dictionary binominal = table.column("binominal").getDictionary(); + assertTrue(binominal.isBoolean()); + assertEquals(2, binominal.size()); + assertEquals(attribute3.getMapping().getNegativeString(), binominal.get(binominal.getNegativeIndex())); + assertEquals(attribute3.getMapping().getPositiveString(), binominal.get(binominal.getPositiveIndex())); + } + + @Test + public void testNominalUnusedValue() { + Attribute attribute1 = attributeDogCatMouse(); + Attribute attribute2 = attributeYesNo(); + ExampleSet set = ExampleSets.from(attribute1, attribute2).withBlankSize(200) + .withColumnFiller(attribute1, i -> i % 2).withColumnFiller(attribute2, i -> 1).build(); + set.getExample(10).setValue(attribute1, Double.NaN); + Table table = com.rapidminer.belt.table.BeltConverter.convert(set, CONTEXT).getTable(); + + String[][] result = readTableToStringArray(table); + String[][] expected = readExampleSetToStringArray(set); + assertArrayEquals(expected, result); + } + + @Test + public void testNominalDoubleValue() { + Attribute attribute1 = attributeDogCatMouse(); + Attribute attribute2 = attributeYesNo(); + ExampleSet set = ExampleSets.from(attribute1, attribute2).withBlankSize(200) + .withColumnFiller(attribute1, i -> i % 3).withColumnFiller(attribute2, i -> i % 2).build(); + set.getExample(10).setValue(attribute1, Double.NaN); + attribute1.getMapping().setMapping("cat", 0); + Table table = com.rapidminer.belt.table.BeltConverter.convert(set, CONTEXT).getTable(); + + String[][] result = readTableToStringArray(table); + String[][] expected = readExampleSetToStringArray(set); + assertArrayEquals(expected, result); + } + + @Test + public void testNominalNullValue() { + Attribute attribute1 = attributeDogCatMouse(); + Attribute attribute2 = attributeYesNo(); + ExampleSet set = ExampleSets.from(attribute1, attribute2).withBlankSize(200) + .withColumnFiller(attribute1, i -> i % 3).withColumnFiller(attribute2, i -> i % 2).build(); + set.getExample(10).setValue(attribute1, Double.NaN); + attribute1.getMapping().setMapping(null, 2); + Table table = com.rapidminer.belt.table.BeltConverter.convert(set, CONTEXT).getTable(); + + String[][] result = readTableToStringArray(table); + String[][] expected = readExampleSetToStringArray(set); + assertArrayEquals(expected, result); + } + + @Test + public void testManyColumns() { + List attributes = new ArrayList<>(); + for (int i = 0; i < 60; i++) { + attributes.add(attributeReal(i)); + } + ExampleSetBuilder builder = ExampleSets.from(attributes).withBlankSize(20); + for (int i = 0; i < 60; i++) { + builder.withColumnFiller(attributes.get(i), j -> j + 1.7); + } + ExampleSet set = builder.build(); + Table table = com.rapidminer.belt.table.BeltConverter.convert(set, CONTEXT).getTable(); + + double[][] result = readTableToArray(table); + double[][] expected = readExampleSetToArray(set); + assertArrayEquals(expected, result); + } + + @Test + public void testUnsafeAttribute() { + Attribute attribute1 = attributeInt(); + Attribute attribute2 = attributeReal(); + ExampleSet set = ExampleSets.from(attribute1, attribute2).withBlankSize(200) + .withColumnFiller(attribute1, i -> i + 1).withColumnFiller(attribute2, i -> i + 1.7).build(); + set.getAttributes().allAttributes().next().addTransformation(new AttributeTransformation() { + @Override + public double transform(Attribute attribute, double value) { + return value; + } + + @Override + public double inverseTransform(Attribute attribute, double value) { + return value; + } + + @Override + public boolean isReversable() { + return false; + } + + @Override + public Object clone() { + return null; + } + }); + Table table = com.rapidminer.belt.table.BeltConverter.convert(set, CONTEXT).getTable(); + + double[][] result = readTableToArray(table); + double[][] expected = readExampleSetToArray(set); + assertArrayEquals(expected, result); + } + + + @Test + public void testTypes() { + List attributes = new ArrayList<>(); + for (int i = 1; i < Ontology.VALUE_TYPE_NAMES.length; i++) { + attributes.add(AttributeFactory.createAttribute(i)); + } + ExampleSet set = ExampleSets.from(attributes) + .build(); + Table table = com.rapidminer.belt.table.BeltConverter.convert(set, CONTEXT).getTable(); + + Column.TypeId[] result = + table.labels().stream().map(label -> table.column(label).type().id()).toArray(Column + .TypeId[]::new); + Column.TypeId[] expected = + new Column.TypeId[]{Column.TypeId.NOMINAL, Column.TypeId.REAL, Column.TypeId.INTEGER_53_BIT, + Column.TypeId.REAL, Column.TypeId.NOMINAL, Column.TypeId.NOMINAL, Column.TypeId.NOMINAL, + Column.TypeId.NOMINAL, Column.TypeId.DATE_TIME, Column.TypeId.DATE_TIME, + Column.TypeId.DATE_TIME}; + assertArrayEquals(expected, result); + + com.rapidminer.belt.table.LegacyType[] legacyResult = table.labels().stream() + .map(label -> table.getFirstMetaData(label, com.rapidminer.belt.table.LegacyType.class)) + .toArray(com.rapidminer.belt.table.LegacyType[]::new); + com.rapidminer.belt.table.LegacyType[] legacyExpected = + new com.rapidminer.belt.table.LegacyType[]{com.rapidminer.belt.table.LegacyType.NOMINAL, + com.rapidminer.belt.table.LegacyType.NUMERICAL, null, null, + com.rapidminer.belt.table.LegacyType.STRING, + com.rapidminer.belt.table.LegacyType.BINOMINAL, null, + com.rapidminer.belt.table.LegacyType.FILE_PATH, null, + com.rapidminer.belt.table.LegacyType.DATE, com.rapidminer.belt.table.LegacyType.TIME}; + assertArrayEquals(legacyExpected, legacyResult); + } + + @Test + public void testTypesView() { + List attributes = new ArrayList<>(); + for (int i = 1; i < Ontology.VALUE_TYPE_NAMES.length; i++) { + attributes.add(AttributeFactory.createAttribute(i)); + } + ExampleSet set = new SortedExampleSet(ExampleSets.from(attributes) + .build(), attributes.get(0), SortedExampleSet.INCREASING); + Table table = com.rapidminer.belt.table.BeltConverter.convert(set, CONTEXT).getTable(); + + Column.TypeId[] result = + table.labels().stream().map(label -> table.column(label).type().id()).toArray(Column + .TypeId[]::new); + Column.TypeId[] expected = + new Column.TypeId[]{Column.TypeId.NOMINAL, Column.TypeId.REAL, Column.TypeId.INTEGER_53_BIT, + Column.TypeId.REAL, Column.TypeId.NOMINAL, Column.TypeId.NOMINAL, Column.TypeId.NOMINAL, + Column.TypeId.NOMINAL, Column.TypeId.DATE_TIME, Column.TypeId.DATE_TIME, + Column.TypeId.DATE_TIME}; + assertArrayEquals(expected, result); + + com.rapidminer.belt.table.LegacyType[] legacyResult = table.labels().stream() + .map(label -> table.getFirstMetaData(label, com.rapidminer.belt.table.LegacyType.class)) + .toArray(com.rapidminer.belt.table.LegacyType[]::new); + com.rapidminer.belt.table.LegacyType[] legacyExpected = + new com.rapidminer.belt.table.LegacyType[]{com.rapidminer.belt.table.LegacyType.NOMINAL, + com.rapidminer.belt.table.LegacyType.NUMERICAL, null, null, + com.rapidminer.belt.table.LegacyType.STRING, + com.rapidminer.belt.table.LegacyType.BINOMINAL, null, + com.rapidminer.belt.table.LegacyType.FILE_PATH, null, + com.rapidminer.belt.table.LegacyType.DATE, com.rapidminer.belt.table.LegacyType.TIME}; + assertArrayEquals(legacyExpected, legacyResult); + } + + + @Test + public void testRoles() { + String[] roles = new String[]{Attributes.ID_NAME, Attributes.CONFIDENCE_NAME + "_" + "Yes", + Attributes.LABEL_NAME, Attributes.PREDICTION_NAME, + Attributes.CLUSTER_NAME, Attributes.WEIGHT_NAME, Attributes.BATCH_NAME, Attributes.OUTLIER_NAME, + Attributes.CONFIDENCE_NAME, + Attributes.CLASSIFICATION_COST, "ignore-me", "confidence(yes)", "cluster_1_probability"}; + List attributes = new ArrayList<>(); + for (int i = 0; i < roles.length + 1; i++) { + attributes.add(AttributeFactory.createAttribute(Ontology.NUMERICAL)); + } + ExampleSetBuilder builder = ExampleSets.from(attributes); + for (int i = 1; i < roles.length + 1; i++) { + builder.withRole(attributes.get(i), roles[i - 1]); + } + ExampleSet set = builder.build(); + Table table = com.rapidminer.belt.table.BeltConverter.convert(set, CONTEXT).getTable(); + + ColumnRole[] result = table.labels().stream() + .map(label -> table.getFirstMetaData(label, ColumnRole.class)) + .toArray(ColumnRole[]::new); + ColumnRole[] expected = + new ColumnRole[]{null, ColumnRole.ID, ColumnRole.SCORE, ColumnRole.LABEL, ColumnRole.PREDICTION, + ColumnRole.CLUSTER, + ColumnRole.WEIGHT, ColumnRole.BATCH, ColumnRole.OUTLIER, ColumnRole + .SCORE, ColumnRole.METADATA, ColumnRole.METADATA, ColumnRole.SCORE, ColumnRole.METADATA}; + assertArrayEquals(expected, result); + + com.rapidminer.belt.table.LegacyRole[] legacyResult = table.labels().stream() + .map(label -> table.getFirstMetaData(label, com.rapidminer.belt.table.LegacyRole.class)) + .toArray(com.rapidminer.belt.table.LegacyRole[]::new); + com.rapidminer.belt.table.LegacyRole[] legacyExpected = + new com.rapidminer.belt.table.LegacyRole[]{null, null, null, null, null, null, null, null, null, + null, + new LegacyRole(Attributes.CLASSIFICATION_COST), + new LegacyRole("ignore-me"), new LegacyRole("confidence(yes)"), new LegacyRole("cluster_1_probability")}; + assertArrayEquals(legacyExpected, legacyResult); + + ColumnReference[] references = table.labels().stream() + .map(label -> table.getFirstMetaData(label, ColumnReference.class)) + .toArray(ColumnReference[]::new); + ColumnReference[] referencesExpected = + new ColumnReference[]{null, null, + new ColumnReference(set.getAttributes().getPredictedLabel().getName(), "Yes"), null, null, + null, null, null, null, new ColumnReference(set.getAttributes().getPredictedLabel().getName()), + null, null, new ColumnReference(set.getAttributes().getPredictedLabel().getName()), null}; + assertArrayEquals(referencesExpected, references); + } + + @Test + public void testDuplicateRoles() { + ColumnRole[] roles = ColumnRole.values(); + // build expected example set + List attributes = new ArrayList<>(); + for (ColumnRole role : roles) { + String name1 = role.name() + "-1", name2 = role.name() + "-2", name3 = role.name() + "-3"; + attributes.add(AttributeFactory.createAttribute(name1, Ontology.REAL)); + attributes.add(AttributeFactory.createAttribute(name2, Ontology.REAL)); + attributes.add(AttributeFactory.createAttribute(name3, Ontology.REAL)); + } + ExampleSetBuilder exampleSetBuilder = ExampleSets.from(attributes); + int attributeIndex = 0; + for (ColumnRole role : roles) { + String baseName; + if (role == ColumnRole.SCORE) { + baseName = Attributes.CONFIDENCE_NAME; + } else if (role == ColumnRole.METADATA) { + baseName = "meta_data"; + } else { + baseName = role.name().toLowerCase(Locale.ROOT); + } + exampleSetBuilder.withRole(attributes.get(attributeIndex++), baseName); + exampleSetBuilder.withRole(attributes.get(attributeIndex++), baseName + "_2"); + exampleSetBuilder.withRole(attributes.get(attributeIndex++), baseName + "_3"); + } + ExampleSet expectedExampleSet = exampleSetBuilder.build(); + + // build expected table + TableBuilder tableBuilder = new TableBuilder(0); + for (ColumnRole role : roles) { + String name1 = role.name() + "-1", name2 = role.name() + "-2", name3 = role.name() + "-3"; + tableBuilder.addReal(name1, i -> Math.random()); + tableBuilder.addMetaData(name1, role); + tableBuilder.addReal(name2, i -> Math.random()); + tableBuilder.addMetaData(name2, role); + tableBuilder.addReal(name3, i -> Math.random()); + tableBuilder.addMetaData(name3, role); + } + Table expectedTable = tableBuilder.build(Belt.defaultContext()); + + // convert example set / table + ExampleSet convertedExampleSet = BeltConverter.convert(new IOTable(expectedTable), CONTEXT); + Table convertedTable = BeltConverter.convert(expectedExampleSet, CONTEXT).getTable(); + + // check if results are equal + for (ColumnRole role : roles) { + String name1 = role.name() + "-1", name2 = role.name() + "-2", name3 = role.name() + "-3"; + assertEquals(expectedExampleSet.getAttributes().getRole(name1).getSpecialName(), + convertedExampleSet.getAttributes().getRole(name1).getSpecialName()); + assertEquals(expectedExampleSet.getAttributes().getRole(name2).getSpecialName(), + convertedExampleSet.getAttributes().getRole(name2).getSpecialName()); + assertEquals(expectedExampleSet.getAttributes().getRole(name3).getSpecialName(), + convertedExampleSet.getAttributes().getRole(name3).getSpecialName()); + } + assertArrayEquals(expectedTable.labels().stream() + .map(label -> expectedTable.getFirstMetaData(label, ColumnRole.class)) + .toArray(ColumnRole[]::new), convertedTable.labels().stream() + .map(label -> convertedTable.getFirstMetaData(label, ColumnRole.class)) + .toArray(ColumnRole[]::new)); + + // and convert them again to make sure + Table doubleConvertedTable = BeltConverter.convert(convertedExampleSet, CONTEXT).getTable(); + ExampleSet doubleConvertedExampleSet = BeltConverter.convert(new IOTable(convertedTable), CONTEXT); + + for (ColumnRole role : roles) { + String name1 = role.name() + "-1", name2 = role.name() + "-2", name3 = role.name() + "-3"; + assertEquals(expectedExampleSet.getAttributes().getRole(name1).getSpecialName(), + doubleConvertedExampleSet.getAttributes().getRole(name1).getSpecialName()); + assertEquals(expectedExampleSet.getAttributes().getRole(name2).getSpecialName(), + doubleConvertedExampleSet.getAttributes().getRole(name2).getSpecialName()); + assertEquals(expectedExampleSet.getAttributes().getRole(name3).getSpecialName(), + doubleConvertedExampleSet.getAttributes().getRole(name3).getSpecialName()); + } + assertArrayEquals(expectedTable.labels().stream() + .map(label -> expectedTable.getFirstMetaData(label, ColumnRole.class)) + .toArray(ColumnRole[]::new), doubleConvertedTable.labels().stream() + .map(label -> doubleConvertedTable.getFirstMetaData(label, ColumnRole.class)) + .toArray(ColumnRole[]::new)); + } + + /** + * Some of the belt meta data has no equivalent in the example set representation. Therefore, we store the belt + * meta data in the ExampleSet's user data. This test checks if this is done correctly. + */ + @Test + public void testPreserveBeltMetaData() { + // BUILD ORIGINAL TABLE + TableBuilder tableBuilder = new TableBuilder(0); + + // add a prediction and two corresponding score columns + tableBuilder.addNominal("PredictionColumnOne", i -> i % 2 == 0 ? "YES" : "NO"); + tableBuilder.addReal("ScoreColumnYES", i -> Math.random()); + tableBuilder.addReal("ScoreColumnNO", i -> Math.random()); + tableBuilder.addMetaData("ScoreColumnYES", ColumnRole.SCORE); + tableBuilder.addMetaData("ScoreColumnNO", ColumnRole.SCORE); + tableBuilder.addMetaData("PredictionColumnOne", ColumnRole.PREDICTION); + tableBuilder.addMetaData("PredictionColumnOne", new ColumnAnnotation("This is my wonderful prediction!")); + tableBuilder.addMetaData("ScoreColumnYES", new ColumnReference("PredictionColumnOne", "YES")); + tableBuilder.addMetaData("ScoreColumnNO", new ColumnReference("PredictionColumnOne", "NO")); + + // add another prediction and two corresponding score columns + tableBuilder.addNominal("PredictionColumnTwo", i -> i % 2 == 0 ? "play" : "work"); + tableBuilder.addReal("ScoreColumnPlay", i -> Math.random()); + tableBuilder.addReal("ScoreColumnWork", i -> Math.random()); + tableBuilder.addMetaData("ScoreColumnPlay", ColumnRole.SCORE); + tableBuilder.addMetaData("ScoreColumnWork", ColumnRole.SCORE); + tableBuilder.addMetaData("PredictionColumnTwo", ColumnRole.PREDICTION); + tableBuilder.addMetaData("PredictionColumnTwo", new TestMetaData("A first parameter")); + tableBuilder.addMetaData("PredictionColumnTwo", new TestMetaData("A second parameter")); + tableBuilder.addMetaData("PredictionColumnTwo", new TestMetaData("A third parameter")); + tableBuilder.addMetaData("ScoreColumnWork", new ColumnReference("PredictionColumnTwo", "work")); + tableBuilder.addMetaData("ScoreColumnPlay", new ColumnReference("PredictionColumnTwo", "play")); + + Table originalTable = tableBuilder.build(Belt.defaultContext()); + + // CHECK CONVERTED EXAMPLE SET + ExampleSet convertedExampleSet = BeltConverter.convert(new IOTable(originalTable), CONTEXT); + assertEquals("confidence_YES", convertedExampleSet.getAttributes().findRoleByName("ScoreColumnYES").getSpecialName()); + assertEquals("confidence_NO", convertedExampleSet.getAttributes().findRoleByName("ScoreColumnNO").getSpecialName()); + assertEquals("confidence_work", convertedExampleSet.getAttributes().findRoleByName("ScoreColumnWork").getSpecialName()); + assertEquals("confidence_play", convertedExampleSet.getAttributes().findRoleByName("ScoreColumnPlay").getSpecialName()); + String predictionOneRoleName = convertedExampleSet.getAttributes().findRoleByName("PredictionColumnOne").getSpecialName(); + String predictionTwoRoleName = convertedExampleSet.getAttributes().findRoleByName("PredictionColumnTwo").getSpecialName(); + String predictedLabel = convertedExampleSet.getAttributes().getPredictedLabel().getName(); + assertTrue(predictionOneRoleName.equals("prediction") && predictionTwoRoleName.equals("prediction_2") + || predictionTwoRoleName.equals("prediction") && predictionOneRoleName.equals("prediction_2")); + assertTrue(predictedLabel.equals("PredictionColumnOne") || predictedLabel.equals("PredictionColumnTwo")); + assertEquals(6, convertedExampleSet.getAttributes().specialSize()); + + // CHECK DOUBLE-CONVERTED TABLE + Table convertedTable = BeltConverter.convert(convertedExampleSet, CONTEXT).getTable(); + assertEquals(2, convertedTable.getMetaData().get("PredictionColumnOne").size()); + assertEquals(4, convertedTable.getMetaData().get("PredictionColumnTwo").size()); + assertEquals(2, convertedTable.getMetaData().get("ScoreColumnYES").size()); + assertEquals(2, convertedTable.getMetaData().get("ScoreColumnNO").size()); + assertEquals(2, convertedTable.getMetaData().get("ScoreColumnPlay").size()); + assertEquals(2, convertedTable.getMetaData().get("ScoreColumnWork").size()); + assertEquals(6, convertedTable.width()); + + assertEquals(ColumnRole.PREDICTION, convertedTable.getFirstMetaData("PredictionColumnOne", ColumnRole.class)); + assertEquals(ColumnRole.PREDICTION, convertedTable.getFirstMetaData("PredictionColumnTwo", ColumnRole.class)); + assertEquals(ColumnRole.SCORE, convertedTable.getFirstMetaData("ScoreColumnYES", ColumnRole.class)); + assertEquals(ColumnRole.SCORE, convertedTable.getFirstMetaData("ScoreColumnNO", ColumnRole.class)); + assertEquals(ColumnRole.SCORE, convertedTable.getFirstMetaData("ScoreColumnPlay", ColumnRole.class)); + assertEquals(ColumnRole.SCORE, convertedTable.getFirstMetaData("ScoreColumnWork", ColumnRole.class)); + + assertEquals(new ColumnReference("PredictionColumnOne", "YES"), + convertedTable.getFirstMetaData("ScoreColumnYES", ColumnReference.class)); + assertEquals(new ColumnReference("PredictionColumnOne", "NO"), + convertedTable.getFirstMetaData("ScoreColumnNO", ColumnReference.class)); + assertEquals(new ColumnReference("PredictionColumnTwo", "play"), + convertedTable.getFirstMetaData("ScoreColumnPlay", ColumnReference.class)); + assertEquals(new ColumnReference("PredictionColumnTwo", "work"), + convertedTable.getFirstMetaData("ScoreColumnWork", ColumnReference.class)); + + assertEquals(new ColumnAnnotation("This is my wonderful prediction!"), + convertedTable.getFirstMetaData("PredictionColumnOne", ColumnAnnotation.class)); + + List testMetaData = convertedTable.getMetaData("PredictionColumnTwo", TestMetaData.class); + assertEquals(3, testMetaData.size()); + assertTrue(testMetaData.contains(new TestMetaData("A first parameter"))); + assertTrue(testMetaData.contains(new TestMetaData("A second parameter"))); + assertTrue(testMetaData.contains(new TestMetaData("A third parameter"))); + } + + /** + * A belt table is converted to example set, an attribute is removed / renamed and then the example set is + * converted back. This test checks that no exception is thrown. + */ + @Test + public void testRemoveAndRenameAttribute() { + // BUILD ORIGINAL TABLE + TableBuilder tableBuilder = new TableBuilder(0); + + // add a prediction and two corresponding score columns + tableBuilder.addNominal("PredictionColumnOne", i -> i % 2 == 0 ? "YES" : "NO"); + tableBuilder.addReal("ScoreColumnYES", i -> Math.random()); + tableBuilder.addReal("ScoreColumnNO", i -> Math.random()); + tableBuilder.addMetaData("ScoreColumnYES", ColumnRole.SCORE); + tableBuilder.addMetaData("ScoreColumnNO", ColumnRole.SCORE); + tableBuilder.addMetaData("PredictionColumnOne", ColumnRole.PREDICTION); + tableBuilder.addMetaData("PredictionColumnOne", new ColumnAnnotation("This is my wonderful prediction!")); + tableBuilder.addMetaData("ScoreColumnYES", new ColumnReference("PredictionColumnOne", "YES")); + tableBuilder.addMetaData("ScoreColumnNO", new ColumnReference("PredictionColumnOne", "NO")); + + // add another prediction and two corresponding score columns + tableBuilder.addNominal("PredictionColumnTwo", i -> i % 2 == 0 ? "play" : "work"); + tableBuilder.addReal("ScoreColumnPlay", i -> Math.random()); + tableBuilder.addReal("ScoreColumnWork", i -> Math.random()); + tableBuilder.addMetaData("ScoreColumnPlay", ColumnRole.SCORE); + tableBuilder.addMetaData("ScoreColumnWork", ColumnRole.SCORE); + tableBuilder.addMetaData("PredictionColumnTwo", ColumnRole.PREDICTION); + tableBuilder.addMetaData("PredictionColumnTwo", new TestMetaData("A first parameter")); + tableBuilder.addMetaData("ScoreColumnWork", new ColumnReference("PredictionColumnTwo", "work")); + tableBuilder.addMetaData("ScoreColumnPlay", new ColumnReference("PredictionColumnTwo", "play")); + + Table originalTable = tableBuilder.build(Belt.defaultContext()); + + // MODIFY CONVERTED EXAMPLE SET + ExampleSet convertedExampleSet = BeltConverter.convert(new IOTable(originalTable), CONTEXT); + Attributes attributes = convertedExampleSet.getAttributes(); + attributes.remove(attributes.get("PredictionColumnOne")); + attributes.remove(attributes.get("ScoreColumnWork")); + attributes.get("PredictionColumnTwo").setName("NewName"); + assertEquals(4, attributes.specialSize()); + assertTrue(attributes.contains(attributes.get("NewName"))); + + // CHECK DOUBLE-CONVERTED TABLE + Table convertedTable = BeltConverter.convert(convertedExampleSet, CONTEXT).getTable(); + assertFalse(convertedTable.contains("PredictionColumnOne")); + assertFalse(convertedTable.contains("ScoreColumnWork")); + assertFalse(convertedTable.contains("PredictionColumnTwo")); + assertEquals(1, convertedTable.getMetaData().get("NewName").size()); + assertEquals(2, convertedTable.getMetaData().get("ScoreColumnYES").size()); + assertEquals(2, convertedTable.getMetaData().get("ScoreColumnNO").size()); + assertEquals(2, convertedTable.getMetaData().get("ScoreColumnPlay").size()); + assertEquals(4, convertedTable.width()); + + assertEquals(ColumnRole.PREDICTION, convertedTable.getFirstMetaData("NewName", ColumnRole.class)); + assertEquals(ColumnRole.SCORE, convertedTable.getFirstMetaData("ScoreColumnYES", ColumnRole.class)); + assertEquals(ColumnRole.SCORE, convertedTable.getFirstMetaData("ScoreColumnNO", ColumnRole.class)); + assertEquals(ColumnRole.SCORE, convertedTable.getFirstMetaData("ScoreColumnPlay", ColumnRole.class)); + + assertEquals(new ColumnReference("PredictionColumnOne", "YES"), + convertedTable.getFirstMetaData("ScoreColumnYES", ColumnReference.class)); + assertEquals(new ColumnReference("PredictionColumnOne", "NO"), + convertedTable.getFirstMetaData("ScoreColumnNO", ColumnReference.class)); + assertEquals(new ColumnReference("PredictionColumnTwo", "play"), + convertedTable.getFirstMetaData("ScoreColumnPlay", ColumnReference.class)); + } + + @Test + public void testRolesView() { + String[] roles = new String[]{Attributes.ID_NAME, Attributes.CONFIDENCE_NAME + "_" + "Yes", + Attributes.LABEL_NAME, Attributes.PREDICTION_NAME, + Attributes.CLUSTER_NAME, Attributes.WEIGHT_NAME, Attributes.BATCH_NAME, Attributes.OUTLIER_NAME, + Attributes.CONFIDENCE_NAME, + Attributes.CLASSIFICATION_COST, "ignore-me"}; + List attributes = new ArrayList<>(); + for (int i = 0; i < roles.length + 1; i++) { + attributes.add(AttributeFactory.createAttribute(Ontology.NUMERICAL)); + } + ExampleSetBuilder builder = ExampleSets.from(attributes); + for (int i = 1; i < roles.length + 1; i++) { + builder.withRole(attributes.get(i), roles[i - 1]); + } + ExampleSet set = new SortedExampleSet(builder.build(), attributes.get(1), SortedExampleSet.DECREASING); + Table table = com.rapidminer.belt.table.BeltConverter.convert(set, CONTEXT).getTable(); + + ColumnRole[] result = table.labels().stream() + .map(label -> table.getFirstMetaData(label, ColumnRole.class)) + .toArray(ColumnRole[]::new); + ColumnRole[] expected = + new ColumnRole[]{null, ColumnRole.ID, ColumnRole.SCORE, ColumnRole.LABEL, ColumnRole.PREDICTION, + ColumnRole.CLUSTER, + ColumnRole.WEIGHT, ColumnRole.BATCH, ColumnRole.OUTLIER, ColumnRole.SCORE, + ColumnRole.METADATA, ColumnRole.METADATA}; + assertArrayEquals(expected, result); + + com.rapidminer.belt.table.LegacyRole[] legacyResult = table.labels().stream() + .map(label -> table.getFirstMetaData(label, com.rapidminer.belt.table.LegacyRole.class)) + .toArray(com.rapidminer.belt.table.LegacyRole[]::new); + com.rapidminer.belt.table.LegacyRole[] legacyExpected = + new com.rapidminer.belt.table.LegacyRole[]{null, null, null, null, null, null, null, null, null, + null, + new com.rapidminer.belt.table.LegacyRole(Attributes.CLASSIFICATION_COST), + new com.rapidminer.belt.table.LegacyRole("ignore-me")}; + assertArrayEquals(legacyExpected, legacyResult); + + ColumnReference[] references = table.labels().stream() + .map(label -> table.getFirstMetaData(label, ColumnReference.class)) + .toArray(ColumnReference[]::new); + ColumnReference[] referencesExpected = + new ColumnReference[]{null, null, + new ColumnReference(set.getAttributes().getPredictedLabel().getName(), "Yes"), null, null, + null, null, null, null, new ColumnReference(set.getAttributes().getPredictedLabel().getName()), + null, null}; + assertArrayEquals(referencesExpected, references); + } + + @Test + public void testAnnotations() { + Attribute attribute1 = attributeInt(); + Attribute attribute2 = attributeReal(); + ExampleSet set = ExampleSets.from(attribute1, attribute2).withBlankSize(10) + .withColumnFiller(attribute1, i -> i + 1).withColumnFiller(attribute2, i -> i + 1.7).build(); + set.getAnnotations().setAnnotation(Annotations.KEY_DC_AUTHOR, "gmeier"); + + IOTable table = com.rapidminer.belt.table.BeltConverter.convert(set, CONTEXT); + + assertEquals(set.getAnnotations(), table.getAnnotations()); + } + } + + @RunWith(Parameterized.class) + public static class TableToSet { + + public TableToSet(boolean legacyMode) { + ParameterService.setParameterValue(RapidMiner.PROPERTY_RAPIDMINER_SYSTEM_LEGACY_DATA_MGMT, + String.valueOf(legacyMode)); + } + + @Parameters(name = "legacyMode={0}") + public static Collection params() { + return Arrays.asList(true, false); + } + + @Test + public void testSimple() { + Table table = Builders.newTableBuilder(112).addReal("real", i -> 3 * i / 5.0).addInt53Bit("int", i -> 5 * i) + .build(Belt.defaultContext()); + + ExampleSet set = com.rapidminer.belt.table.BeltConverter.convert(new IOTable(table), CONTEXT); + + double[][] expected = readTableToArray(table); + double[][] result = readExampleSetToArray(set); + assertArrayEquals(expected, result); + } + + @Test + public void testNominal() { + NominalBuffer buffer = BufferAccessor.get().newUInt8Buffer(ColumnType.NOMINAL, 112); + for (int i = 0; i < buffer.size(); i++) { + buffer.set(i, "value" + (i % 5)); + } + NominalBuffer buffer2 = BufferAccessor.get().newUInt8Buffer(ColumnType.NOMINAL, 112); + for (int i = 0; i < buffer2.size(); i++) { + buffer2.set(i, "val" + (i % 7)); + } + buffer2.set(42, null); + Table table = Builders.newTableBuilder(112).add("first", buffer.toColumn()) + .add("second", buffer2.toColumn()) + .build(Belt.defaultContext()); + + ExampleSet set = com.rapidminer.belt.table.BeltConverter.convert(new IOTable(table), CONTEXT); + + String[][] expected = readTableToStringArray(table); + String[][] result = readExampleSetToStringArray(set); + assertArrayEquals(expected, result); + } + + @Test + public void testNominalGaps() { + NominalBuffer buffer = Buffers.nominalBuffer(11); + for (int i = 0; i < buffer.size(); i++) { + buffer.set(i, "value" + i); + } + buffer.set(7, null); + buffer.set(5, null); + NominalBuffer buffer2 = Buffers.nominalBuffer(11); + for (int i = 0; i < buffer2.size(); i++) { + buffer2.set(i, "val" + i); + } + buffer2.set(3, null); + buffer2.set(5, null); + Column column = Columns.removeUnusedDictionaryValues(buffer.toColumn(), + Columns.CleanupOption.REMOVE, Belt.defaultContext()); + Column column2 = Columns.removeUnusedDictionaryValues(buffer2.toColumn(), + Columns.CleanupOption.REMOVE, Belt.defaultContext()); + Table table = Builders.newTableBuilder(11).add("first", column) + .add("second", column2) + .build(Belt.defaultContext()); + + ExampleSet set = com.rapidminer.belt.table.BeltConverter.convert(new IOTable(table), CONTEXT); + + String[][] expected = readTableToStringArray(table); + String[][] result = readExampleSetToStringArray(set); + assertArrayEquals(expected, result); + } + + @Test + public void testBinominal() { + NominalBuffer buffer = Buffers.nominalBuffer(112, 2); + for (int i = 0; i < buffer.size(); i++) { + buffer.set(i, "value" + (i % 2)); + } + buffer.set(100, null); + NominalBuffer buffer2 = Buffers.nominalBuffer(112, 2); + for (int i = 0; i < buffer2.size(); i++) { + buffer2.set(i, "val" + (i % 2)); + } + buffer2.set(42, null); + NominalBuffer buffer3 = Buffers.nominalBuffer(112, 2); + for (int i = 0; i < buffer.size(); i += 2) { + buffer3.set(i, "one"); + } + NominalBuffer buffer4 = Buffers.nominalBuffer(112, 2); + + Table table = Builders.newTableBuilder(112).add("first", buffer.toBooleanColumn( "value0")) + .add("second", buffer2.toBooleanColumn( "val1")) + .add("onePositive", buffer3.toBooleanColumn( "one")) + .add("oneNegative", buffer3.toBooleanColumn( null)) + .add("empty", buffer4.toBooleanColumn( null)) + .build(Belt.defaultContext()); + + ExampleSet set = com.rapidminer.belt.table.BeltConverter.convert(new IOTable(table), CONTEXT); + + String[][] expected = readTableToStringArray(table); + String[][] result = readExampleSetToStringArray(set); + assertArrayEquals(expected, result); + + NominalMapping first = set.getAttributes().get("first").getMapping(); + assertEquals("value1", first.getNegativeString()); + assertEquals("value0", first.getPositiveString()); + + NominalMapping second = set.getAttributes().get("second").getMapping(); + assertEquals("val0", second.getNegativeString()); + assertEquals("val1", second.getPositiveString()); + + NominalMapping oneNegative = set.getAttributes().get("oneNegative").getMapping(); + assertEquals("one", oneNegative.getNegativeString()); + assertNull(oneNegative.getPositiveString()); + + NominalMapping empty = set.getAttributes().get("empty").getMapping(); + assertNull(empty.getPositiveString()); + assertNull(empty.getNegativeString()); + + int[] valueTypes = + Arrays.stream(set.getAttributes().createRegularAttributeArray()).mapToInt(Attribute::getValueType).toArray(); + assertArrayEquals(new int[]{Ontology.BINOMINAL, Ontology.BINOMINAL, Ontology.POLYNOMINAL, + Ontology.BINOMINAL, Ontology.BINOMINAL}, valueTypes); + } + + @Test + public void testBinominalGaps() { + NominalBuffer buffer = BufferAccessor.get().newUInt2Buffer(ColumnType.NOMINAL, 112); + buffer.set(0, "bla"); + for (int i = 0; i < buffer.size(); i++) { + buffer.set(i, "blup"); + } + buffer.set(100, null); + NominalBuffer buffer2 = BufferAccessor.get().newUInt2Buffer(ColumnType.NOMINAL, 112); + buffer2.set(0, "bla"); + for (int i = 0; i < buffer.size(); i++) { + buffer2.set(i, "blup"); + } + buffer2.set(100, null); + + Column bla = Columns.removeUnusedDictionaryValues(buffer.toBooleanColumn( "bla"), + Columns.CleanupOption.REMOVE, Belt.defaultContext()); + Column blup = Columns.removeUnusedDictionaryValues(buffer2.toBooleanColumn("blup"), + Columns.CleanupOption.REMOVE, Belt.defaultContext()); + Table table = Builders.newTableBuilder(112).add("first", bla) + .add("second", blup) + .build(Belt.defaultContext()); + + ExampleSet set = com.rapidminer.belt.table.BeltConverter.convert(new IOTable(table), CONTEXT); + + String[][] expected = readTableToStringArray(table); + String[][] result = readExampleSetToStringArray(set); + assertArrayEquals(expected, result); + + int[] valueTypes = + Arrays.stream(set.getAttributes().createRegularAttributeArray()).mapToInt(Attribute::getValueType).toArray(); + assertArrayEquals(new int[]{Ontology.BINOMINAL, Ontology.POLYNOMINAL}, valueTypes); + + NominalMapping first = set.getAttributes().get("first").getMapping(); + assertEquals("blup", first.getNegativeString()); + assertNull(first.getPositiveString()); + + } + + @Test + public void testNominalUnusedValue() { + NominalBuffer buffer = Buffers.nominalBuffer(112); + for (int i = 0; i < buffer.size(); i++) { + buffer.set(i, "valu" + (i % 5)); + } + for (int i = 0; i < buffer.size(); i++) { + buffer.set(i, "value" + (i % 5)); + } + + NominalBuffer buffer2 = Buffers.nominalBuffer(112); + for (int i = 0; i < buffer2.size(); i++) { + buffer2.set(i, "val" + (i % 7)); + } + buffer2.set(42, null); + Table table = Builders.newTableBuilder(112).add("first", buffer.toColumn()) + .add("second", buffer2.toColumn()) + .build(Belt.defaultContext()); + + ExampleSet set = com.rapidminer.belt.table.BeltConverter.convert(new IOTable(table), CONTEXT); + + String[][] expected = readTableToStringArray(table); + String[][] result = readExampleSetToStringArray(set); + assertArrayEquals(expected, result); + } + + @Test + public void testManyColumns() { + TableBuilder builder = Builders.newTableBuilder(11); + for (int i = 0; i < 30; i++) { + builder.addReal("real" + i, j -> 3 * j / 5.0).addInt53Bit("int" + i, j -> 5 * j); + } + Table table = builder.build(Belt.defaultContext()); + + ExampleSet set = com.rapidminer.belt.table.BeltConverter.convert(new IOTable(table), CONTEXT); + + double[][] expected = readTableToArray(table); + double[][] result = readExampleSetToArray(set); + assertArrayEquals(expected, result); + } + + @Test + public void testRoles() { + TableBuilder builder = Builders.newTableBuilder(10); + builder.addInt53Bit("att-1", i -> i); + + ColumnRole[] columnRoles = new ColumnRole[]{ColumnRole.ID, ColumnRole.LABEL, ColumnRole.PREDICTION, + ColumnRole.SCORE, ColumnRole.WEIGHT, ColumnRole.OUTLIER, ColumnRole.CLUSTER, ColumnRole.BATCH, + ColumnRole.METADATA}; + for (int i = 0; i < columnRoles.length; i++) { + builder.addReal("att" + i, j -> j); + builder.addMetaData("att" + i, columnRoles[i]); + } + + builder.addInt53Bit("batt1", i -> i); + builder.addMetaData("batt1", ColumnRole.METADATA); + builder.addMetaData("batt1", new com.rapidminer.belt.table.LegacyRole("ignore-me")); + + builder.addInt53Bit("batt2", i -> i); + builder.addMetaData("batt2", ColumnRole.SCORE); + builder.addMetaData("batt2", new com.rapidminer.belt.table.LegacyRole("confidence_Yes")); + + Table table = builder.build(Belt.defaultContext()); + + ExampleSet set = com.rapidminer.belt.table.BeltConverter.convert(new IOTable(table), CONTEXT); + + Iterable iterable = () -> set.getAttributes().allAttributeRoles(); + String[] result = StreamSupport.stream(iterable.spliterator(), false).map(AttributeRole::getSpecialName) + .toArray(String[]::new); + String[] expected = + new String[]{null, Attributes.ID_NAME, Attributes.LABEL_NAME, Attributes.PREDICTION_NAME, + Attributes.CONFIDENCE_NAME, Attributes.WEIGHT_NAME, Attributes.OUTLIER_NAME, + Attributes.CLUSTER_NAME, Attributes.BATCH_NAME, "meta_data", "ignore-me", + "confidence_Yes"}; + assertArrayEquals(expected, result); + } + + @Test + public void testTypes() { + TableBuilder builder = Builders.newTableBuilder(10); + builder.addReal("att1", i -> i); + + builder.addReal("att2", i -> i); + builder.addMetaData("att2", com.rapidminer.belt.table.LegacyType.NUMERICAL); + + builder.addInt53Bit("att3", i -> i); + + builder.addInt53Bit("att4", i -> i); + builder.addMetaData("att4", com.rapidminer.belt.table.LegacyType.NUMERICAL); + + builder.addDateTime("att5", i -> Instant.EPOCH); + + builder.addDateTime("att6", i -> Instant.EPOCH); + builder.addMetaData("att6", com.rapidminer.belt.table.LegacyType.DATE); + + builder.addDateTime("att6.5", i -> Instant.EPOCH); + builder.addMetaData("att6.5", com.rapidminer.belt.table.LegacyType.TIME); + + builder.addTime("att7", i -> LocalTime.NOON); + + builder.addTime("att7.5", i -> LocalTime.NOON); + builder.addMetaData("att7.5", com.rapidminer.belt.table.LegacyType.NUMERICAL); + + builder.addNominal("att8", i -> i % 2 == 0 ? "A" : "B"); + + builder.addNominal("att9", i -> i % 2 == 0 ? "A" : "B", 2); + + builder.addNominal("att10", i -> i % 2 == 0 ? "A" : "B"); + builder.addMetaData("att10", com.rapidminer.belt.table.LegacyType.BINOMINAL); + + builder.addNominal("att11", i -> i % 2 == 0 ? "A" : "B", 2); + builder.addMetaData("att11", com.rapidminer.belt.table.LegacyType.STRING); + + builder.addNominal("att12", i -> i % 2 == 0 ? "A" : "B"); + builder.addMetaData("att12", com.rapidminer.belt.table.LegacyType.FILE_PATH); + + builder.addNominal("att13", i -> i % 2 == 0 ? "A" : "B", 2); + builder.addMetaData("att13", com.rapidminer.belt.table.LegacyType.NOMINAL); + + builder.addBoolean("att14", i -> i % 2 == 0 ? "A" : "B", "A"); + + Table table = builder.build(Belt.defaultContext()); + + ExampleSet set = com.rapidminer.belt.table.BeltConverter.convert(new IOTable(table), CONTEXT); + + int[] result = + StreamSupport.stream(set.getAttributes().spliterator(), false).mapToInt(Attribute::getValueType) + .toArray(); + int[] expected = new int[]{Ontology.REAL, Ontology.NUMERICAL, Ontology.INTEGER, Ontology.INTEGER, + Ontology.DATE_TIME, Ontology.DATE, Ontology.TIME, Ontology.INTEGER, Ontology.INTEGER, Ontology.POLYNOMINAL, Ontology.POLYNOMINAL, + Ontology.BINOMINAL, Ontology.STRING, Ontology.FILE_PATH, Ontology.NOMINAL, Ontology.BINOMINAL}; + + assertArrayEquals(expected, result); + } + + @Test + public void testInvalidLegacyTypes() { + TableBuilder builder = Builders.newTableBuilder(10); + builder.addReal("att1", i -> i); + builder.addMetaData("att1", com.rapidminer.belt.table.LegacyType.DATE_TIME); + + builder.addReal("att2", i -> i); + builder.addMetaData("att2", com.rapidminer.belt.table.LegacyType.INTEGER); + + builder.addInt53Bit("att3", i -> i); + builder.addMetaData("att3", com.rapidminer.belt.table.LegacyType.REAL); + + builder.addInt53Bit("att4", i -> i); + builder.addMetaData("att4", com.rapidminer.belt.table.LegacyType.NOMINAL); + + builder.addNominal("att5", i -> i % 2 == 0 ? "A" : i % 3 == 0 ? "B" : "C", 2); + builder.addMetaData("att5", com.rapidminer.belt.table.LegacyType.BINOMINAL); + + builder.addTime("att6", i -> LocalTime.NOON); + builder.addMetaData("att6", com.rapidminer.belt.table.LegacyType.TIME); + + builder.addTime("att7", i -> LocalTime.NOON); + builder.addMetaData("att7", com.rapidminer.belt.table.LegacyType.DATE); + + builder.addTime("att8", i -> LocalTime.NOON); + builder.addMetaData("att8", com.rapidminer.belt.table.LegacyType.DATE_TIME); + + builder.addDateTime("att9", i -> Instant.EPOCH); + builder.addMetaData("att9", com.rapidminer.belt.table.LegacyType.NOMINAL); + + Table table = builder.build(Belt.defaultContext()); + + ExampleSet set = com.rapidminer.belt.table.BeltConverter.convert(new IOTable(table), CONTEXT); + + int[] result = + StreamSupport.stream(set.getAttributes().spliterator(), false).mapToInt(Attribute::getValueType) + .toArray(); + int[] expected = new int[]{Ontology.REAL, Ontology.REAL, Ontology.INTEGER, Ontology.INTEGER, + Ontology.POLYNOMINAL, Ontology.INTEGER, Ontology.INTEGER, Ontology.INTEGER, Ontology.DATE_TIME}; + assertArrayEquals(expected, result); + } + + @Test + public void testAnnotations() { + Table table = Builders.newTableBuilder(11).addReal("real", i -> 3 * i / 5.0).addInt53Bit("int", i -> 5 * i) + .build(Belt.defaultContext()); + + IOTable tableObject = new IOTable(table); + tableObject.getAnnotations().setAnnotation(Annotations.KEY_DC_AUTHOR, "gmeier"); + + ExampleSet set = com.rapidminer.belt.table.BeltConverter.convert(tableObject, CONTEXT); + + assertEquals(tableObject.getAnnotations(), set.getAnnotations()); + } + + + @Test(expected = BeltConverter.ConversionException.class) + public void testAdvancedColumns() { + Table table = Builders.newTableBuilder(11).addReal("real", i -> 3 * i / 5.0).addInt53Bit("int", i -> 5 * i) + .addTextset("textset", i -> new StringSet(Collections.singleton("val"+i))) + .build(Belt.defaultContext()); + + IOTable tableObject = new IOTable(table); + try { + BeltConverter.convert(tableObject, CONTEXT); + } catch (BeltConverter.ConversionException e) { + assertEquals("textset", e.getColumnName()); + assertEquals(ColumnType.TEXTSET, e.getType()); + throw e; + } + } + } + + @RunWith(Parameterized.class) + public static class InputDetection { + + @Parameter + public String inputType; + + @Parameter(1) + public ExampleSet input; + + @Parameters(name = "{0}") + public static Iterable inputClasses() throws ExpressionEvaluationException { + Attribute attribute1 = attributeInt(); + Attribute attribute2 = attributeReal(); + + // Simple example set with no logic in the view + ExampleSet simpleSet = ExampleSets.from(attribute1, attribute2) + .withBlankSize(200) + .withColumnFiller(attribute1, i -> i + 1) + .withColumnFiller(attribute2, i -> i + 1.7) + .build(); + + // Complex example sets that are considered thread-safe + ExampleSet conditionedSet = new ConditionedExampleSet(simpleSet, new Condition() { + @Override + public boolean conditionOk(Example example) throws ExpressionEvaluationException { + return example.getValue(attribute1) < 100 && example.getValue(attribute2) < 100; + } + + @Override + public Condition duplicate() { + return null; + } + }); + + ExampleSet sortedSet = new SortedExampleSet(simpleSet, attribute1, SortedExampleSet.DECREASING); + + ExampleSet mappedSet = new MappedExampleSet(simpleSet, new int[]{ + 133, 156, 16, 0, 20, 199, 29, 192, + 185, 33, 175, 58, 15, 100, 2, 68, + 9, 122, 87, 84, 64, 56, 83, 177, + 39, 90, 112, 66, 90, 17, 95, 25} + ); + + // Complex example set that is not considered thread-safe + Partition partition = new Partition(new double[]{0.25, 0.5, 0.25}, 200, new SimplePartitionBuilder()); + SplittedExampleSet splittedSet = new SplittedExampleSet(simpleSet, partition); + + // Simple example set with a attribute transformation that is considered unsafe + ExampleSet transformationSet = (ExampleSet) simpleSet.clone(); + Attribute clonedAttribute = transformationSet.getAttributes().get(attribute1.getName()); + clonedAttribute.addTransformation(new AttributeTransformation() { + @Override + public double transform(Attribute attribute, double value) { + return value * 42; + } + + @Override + public double inverseTransform(Attribute attribute, double value) { + throw new UnsupportedOperationException(); + } + + @Override + public boolean isReversable() { + return false; + } + + @Override + public Object clone() { + return this; + } + }); + + return Arrays.asList(new Object[][]{ + {simpleSet.getClass().getSimpleName(), simpleSet}, + {conditionedSet.getClass().getSimpleName(), conditionedSet}, + {sortedSet.getClass().getSimpleName(), sortedSet}, + {mappedSet.getClass().getSimpleName(), mappedSet}, + {splittedSet.getClass().getSimpleName(), splittedSet}, + {AttributeTransformation.class.getSimpleName(), transformationSet} + }); + } + + @Test + public void testInputs() { + Table table = com.rapidminer.belt.table.BeltConverter.convert(input, CONTEXT).getTable(); + double[][] result = readTableToArray(table); + double[][] expected = readExampleSetToArray(input); + assertArrayEquals(expected, result); + } + + } + + @RunWith(Parameterized.class) + public static class BackAndForth { + + @BeforeClass + public static void setup() { + RapidAssert.ASSERTER_REGISTRY.registerAllAsserters(new AsserterFactoryRapidMiner()); + } + + public BackAndForth(boolean legacyMode) { + ParameterService.setParameterValue(RapidMiner.PROPERTY_RAPIDMINER_SYSTEM_LEGACY_DATA_MGMT, + String.valueOf(legacyMode)); + } + + @Parameters(name = "legacyMode={0}") + public static Collection params() { + return Arrays.asList(true, false); + } + + @Test + public void testAllTypes() { + List attributes = new ArrayList<>(); + for (int i = 1; i < Ontology.VALUE_TYPE_NAMES.length; i++) { + attributes.add(AttributeFactory.createAttribute(i)); + } + ExampleSet set = ExampleSets.from(attributes) + .build(); + + Table table = com.rapidminer.belt.table.BeltConverter.convert(set, CONTEXT).getTable(); + ExampleSet backSet = com.rapidminer.belt.table.BeltConverter.convert(new IOTable(table), CONTEXT); + RapidAssert.assertEquals(set, backSet); + } + + @Test + public void testAllTypesView() { + List attributes = new ArrayList<>(); + for (int i = 1; i < Ontology.VALUE_TYPE_NAMES.length; i++) { + attributes.add(AttributeFactory.createAttribute(i)); + } + ExampleSet set = new SortedExampleSet(ExampleSets.from(attributes) + .build(), attributes.get(1), SortedExampleSet.DECREASING); + + Table table = com.rapidminer.belt.table.BeltConverter.convert(set, CONTEXT).getTable(); + ExampleSet backSet = com.rapidminer.belt.table.BeltConverter.convert(new IOTable(table), CONTEXT); + RapidAssert.assertEquals(set, backSet); + } + + @Test + public void testRoles() { + Attribute integer = attributeInt(); + Attribute animals = attributeDogCatMouse(); + Attribute real = attributeReal(); + Attribute answer = attributeYesNo(); + Attribute confidence = attributeReal(); + confidence.setName("confidence"); + Attribute cluster = attributeReal(); + cluster.setName("cluster"); + List attributes = Arrays.asList(integer, animals, real, answer, confidence, cluster); + + ExampleSet set = ExampleSets.from(attributes).withBlankSize(10) + .withRole(integer, Attributes.CONFIDENCE_NAME + "_" + "Yes") + .withRole(answer, Attributes.LABEL_NAME) + .withRole(confidence, "confidence(yes)") + .withRole(cluster, "cluster_1_probability") + .withRole(animals, "someStupidRole").build(); + + Table table = com.rapidminer.belt.table.BeltConverter.convert(set, CONTEXT).getTable(); + ExampleSet backSet = com.rapidminer.belt.table.BeltConverter.convert(new IOTable(table), CONTEXT); + RapidAssert.assertEquals(set, backSet); + } + + @Test + public void testNumericTypes() { + Attribute numeric = AttributeFactory.createAttribute("numeric", Ontology.NUMERICAL); + Attribute real = AttributeFactory.createAttribute("real", Ontology.REAL); + Attribute integer = AttributeFactory.createAttribute("integer", Ontology.INTEGER); + Attribute dateTime = AttributeFactory.createAttribute("date_time", Ontology.DATE_TIME); + Attribute date = AttributeFactory.createAttribute("date", Ontology.DATE); + Attribute time = AttributeFactory.createAttribute("time", Ontology.TIME); + List attributes = Arrays.asList(numeric, real, integer, dateTime, date, time); + ExampleSet set = ExampleSets.from(attributes).withBlankSize(150) + .withColumnFiller(numeric, i -> Math.random() > 0.7 ? Double.NaN : Math.random()) + .withColumnFiller(real, i -> Math.random() > 0.7 ? Double.NaN : 42 + Math.random()) + .withColumnFiller(integer, i -> Math.random() > 0.7 ? Double.NaN : Math.round(Math.random() * 100)) + .withColumnFiller(dateTime, i -> Math.random() > 0.7 ? Double.NaN : (i % 3 == 0 ? -1 : 1) + * 1515410698d + Math.floor(Math.random() * 1000)) + .withColumnFiller(date, i -> Math.random() > 0.7 ? Double.NaN : (i % 3 == 0 ? -1 : 1) * + 230169600000d + Math.floor(Math.random() * 100) * 1000d * 60 * 60 * 24) + .withColumnFiller(time, i -> Math.random() > 0.7 ? Double.NaN : + (i % 3 == 0 ? -1 : 1) * Math.floor(Math.random() * 60 * 60 * 24 * 1000)) + .build(); + + Table table = com.rapidminer.belt.table.BeltConverter.convert(set, CONTEXT).getTable(); + ExampleSet backSet = com.rapidminer.belt.table.BeltConverter.convert(new IOTable(table), CONTEXT); + RapidAssert.assertEquals(set, backSet); + } + + @Test + public void testNominalTypes() { + Attribute nominal = AttributeFactory.createAttribute("nominal", Ontology.NOMINAL); + Attribute string = AttributeFactory.createAttribute("string", Ontology.STRING); + Attribute polynominal = AttributeFactory.createAttribute("polynominal", Ontology.POLYNOMINAL); + Attribute binominal = AttributeFactory.createAttribute("binominal", Ontology.BINOMINAL); + Attribute path = AttributeFactory.createAttribute("path", Ontology.FILE_PATH); + for (int i = 0; i < 5; i++) { + nominal.getMapping().mapString("nominalValue" + i); + } + for (int i = 0; i < 4; i++) { + string.getMapping().mapString("veryVeryLongStringValue" + i); + } + for (int i = 0; i < 6; i++) { + polynominal.getMapping().mapString("polyValue" + i); + } + for (int i = 0; i < 2; i++) { + binominal.getMapping().mapString("binominalValue" + i); + } + for (int i = 0; i < 3; i++) { + path.getMapping().mapString("//folder/sufolder/subsubfolder/file" + i); + } + + List attributes = Arrays.asList(nominal, string, polynominal, binominal, path); + Random random = new Random(); + ExampleSet set = ExampleSets.from(attributes).withBlankSize(50) + .withColumnFiller(nominal, i -> random.nextDouble() > 0.7 ? Double.NaN : random.nextInt(5)) + .withColumnFiller(string, i -> random.nextDouble() > 0.7 ? Double.NaN : random.nextInt(4)) + .withColumnFiller(polynominal, i -> random.nextDouble() > 0.7 ? Double.NaN : random.nextInt(6)) + .withColumnFiller(binominal, i -> random.nextDouble() > 0.7 ? Double.NaN : random.nextInt(2)) + .withColumnFiller(path, i -> random.nextDouble() > 0.7 ? Double.NaN : random.nextInt(3)) + .build(); + + Table table = com.rapidminer.belt.table.BeltConverter.convert(set, CONTEXT).getTable(); + ExampleSet backSet = com.rapidminer.belt.table.BeltConverter.convert(new IOTable(table), CONTEXT); + RapidAssert.assertEquals(set, backSet); + } + + @Test + public void testIncompleteBinominalTypes() { + Attribute binominalOne = AttributeFactory.createAttribute("binominalOne", Ontology.BINOMINAL); + Attribute binominalZero = AttributeFactory.createAttribute("binominalZero", Ontology.BINOMINAL); + binominalOne.getMapping().mapString("binominalValue" + 1); + + List attributes = Arrays.asList(binominalOne, binominalZero); + Random random = new Random(); + ExampleSet set = ExampleSets.from(attributes).withBlankSize(50) + .withColumnFiller(binominalOne, i -> random.nextDouble() > 0.7 ? Double.NaN : 0) + .withColumnFiller(binominalZero, i -> Double.NaN) + .build(); + + Table table = com.rapidminer.belt.table.BeltConverter.convert(set, CONTEXT).getTable(); + ExampleSet backSet = com.rapidminer.belt.table.BeltConverter.convert(new IOTable(table), CONTEXT); + RapidAssert.assertEquals(set, backSet); + } + + + } + + @RunWith(Parameterized.class) + public static class TableToHeaderSet { + + @BeforeClass + public static void setup() { + RapidAssert.ASSERTER_REGISTRY.registerAllAsserters(new AsserterFactoryRapidMiner()); + } + + public TableToHeaderSet(boolean legacyMode) { + ParameterService.setParameterValue(RapidMiner.PROPERTY_RAPIDMINER_SYSTEM_LEGACY_DATA_MGMT, + String.valueOf(legacyMode)); + } + + @Parameters(name = "legacyMode={0}") + public static Collection params() { + return Arrays.asList(true, false); + } + + + @Test + public void testNominalTypes() { + Attribute nominal = AttributeFactory.createAttribute("nominal", Ontology.NOMINAL); + Attribute string = AttributeFactory.createAttribute("string", Ontology.STRING); + Attribute polynominal = AttributeFactory.createAttribute("polynominal", Ontology.POLYNOMINAL); + Attribute binominal = AttributeFactory.createAttribute("binominal", Ontology.BINOMINAL); + Attribute path = AttributeFactory.createAttribute("path", Ontology.FILE_PATH); + for (int i = 0; i < 5; i++) { + nominal.getMapping().mapString("nominalValue" + i); + } + for (int i = 0; i < 4; i++) { + string.getMapping().mapString("veryVeryLongStringValue" + i); + } + for (int i = 0; i < 6; i++) { + polynominal.getMapping().mapString("polyValue" + i); + } + for (int i = 0; i < 2; i++) { + binominal.getMapping().mapString("binominalValue" + i); + } + for (int i = 0; i < 3; i++) { + path.getMapping().mapString("//folder/sufolder/subsubfolder/file" + i); + } + + List attributes = Arrays.asList(nominal, string, polynominal, binominal, path); + Random random = new Random(); + ExampleSet set = ExampleSets.from(attributes).withBlankSize(50) + .withColumnFiller(nominal, i -> random.nextDouble() > 0.7 ? Double.NaN : random.nextInt(5)) + .withColumnFiller(string, i -> random.nextDouble() > 0.7 ? Double.NaN : random.nextInt(4)) + .withColumnFiller(polynominal, i -> random.nextDouble() > 0.7 ? Double.NaN : random.nextInt(6)) + .withColumnFiller(binominal, i -> random.nextDouble() > 0.7 ? Double.NaN : random.nextInt(2)) + .withColumnFiller(path, i -> random.nextDouble() > 0.7 ? Double.NaN : random.nextInt(3)) + .build(); + + Table table = com.rapidminer.belt.table.BeltConverter.convert(set, CONTEXT).getTable(); + + HeaderExampleSet headerSet = com.rapidminer.belt.table.BeltConverter.convertHeader(table); + + int[] oldValueTypes = Arrays.stream(set.getAttributes().createRegularAttributeArray()) + .mapToInt(Attribute::getValueType).toArray(); + int[] headerValueTypes = Arrays.stream(headerSet.getAttributes().createRegularAttributeArray()) + .mapToInt(Attribute::getValueType).toArray(); + assertArrayEquals(oldValueTypes, headerValueTypes); + + ExampleSet remapped = RemappedExampleSet.create(set, headerSet, false, true); + + String[][] expected = readTableToStringArray(table); + String[][] result = readExampleSetToStringArray(remapped); + assertArrayEquals(expected, result); + + double[][] expectedMapping = readTableToArray(table); + double[][] resultMapping = readExampleSetToArray(remapped); + assertArrayEquals(expectedMapping, resultMapping); + } + + @Test + public void testNumericTypes() { + Attribute numeric = AttributeFactory.createAttribute("numeric", Ontology.NUMERICAL); + Attribute real = AttributeFactory.createAttribute("real", Ontology.REAL); + Attribute integer = AttributeFactory.createAttribute("integer", Ontology.INTEGER); + Attribute dateTime = AttributeFactory.createAttribute("date_time", Ontology.DATE_TIME); + Attribute date = AttributeFactory.createAttribute("date", Ontology.DATE); + Attribute time = AttributeFactory.createAttribute("time", Ontology.TIME); + List attributes = Arrays.asList(numeric, real, integer, dateTime, date, time); + ExampleSet set = ExampleSets.from(attributes).withBlankSize(50).withRole(integer, Attributes.LABEL_NAME).build(); + + Table table = com.rapidminer.belt.table.BeltConverter.convert(set, CONTEXT).getTable(); + + HeaderExampleSet headerExampleSet = com.rapidminer.belt.table.BeltConverter.convertHeader(table); + + int[] oldValueTypes = Arrays.stream(set.getAttributes().createRegularAttributeArray()) + .mapToInt(Attribute::getValueType).toArray(); + int[] headerValueTypes = Arrays.stream(headerExampleSet.getAttributes().createRegularAttributeArray()) + .mapToInt(Attribute::getValueType).toArray(); + assertArrayEquals(oldValueTypes, headerValueTypes); + } + + @Test + public void testRemappingSame() { + NominalBuffer buffer = BufferAccessor.get().newUInt16Buffer(ColumnType.NOMINAL, 112); + for (int i = 0; i < buffer.size(); i++) { + buffer.set(i, "value" + (i % 5)); + } + NominalBuffer buffer2 = BufferAccessor.get().newUInt16Buffer(ColumnType.NOMINAL, 112); + for (int i = 0; i < buffer2.size(); i++) { + buffer2.set(i, "val" + (i % 7)); + } + buffer2.set(42, null); + Table table = Builders.newTableBuilder(112).add("first", buffer.toColumn()) + .add("second", buffer2.toColumn()) + .build(Belt.defaultContext()); + + ExampleSet set = com.rapidminer.belt.table.BeltConverter.convert(new IOTable(table), CONTEXT); + + HeaderExampleSet remappingSet = com.rapidminer.belt.table.BeltConverter.convertHeader(table); + + ExampleSet remapped = RemappedExampleSet.create(set, remappingSet, false, true); + + String[][] expected = readTableToStringArray(table); + String[][] result = readExampleSetToStringArray(remapped); + assertArrayEquals(expected, result); + + double[][] expectedMapping = readTableToArray(table); + double[][] resultMapping = readExampleSetToArray(remapped); + assertArrayEquals(expectedMapping, resultMapping); + } + + @Test + public void testRemappingUnusedValue() { + NominalBuffer buffer = BufferAccessor.get().newUInt16Buffer(ColumnType.NOMINAL, 112); + for (int i = 0; i < buffer.size(); i++) { + buffer.set(i, "valu" + (i % 5)); + } + for (int i = 0; i < buffer.size(); i++) { + buffer.set(i, "value" + (i % 5)); + } + + NominalBuffer buffer2 = BufferAccessor.get().newInt32Buffer(ColumnType.NOMINAL, 112); + for (int i = 0; i < buffer2.size(); i++) { + buffer2.set(i, "val" + (i % 7)); + } + buffer2.set(42, null); + Table table = Builders.newTableBuilder(112).add("first", buffer.toColumn()) + .add("second", buffer2.toColumn()) + .build(Belt.defaultContext()); + + buffer = BufferAccessor.get().newUInt16Buffer(ColumnType.NOMINAL, 112); + for (int i = 0; i < buffer.size(); i++) { + buffer.set(i, "value" + (i % 5)); + } + + Table table2 = Builders.newTableBuilder(112).add("first", buffer.toColumn()) + .add("second", buffer2.toColumn()) + .build(Belt.defaultContext()); + + ExampleSet set = com.rapidminer.belt.table.BeltConverter.convert(new IOTable(table), CONTEXT); + + HeaderExampleSet remappingSet = com.rapidminer.belt.table.BeltConverter.convertHeader(table2); + + ExampleSet remapped = RemappedExampleSet.create(set, remappingSet, false, true); + + String[][] expected = readTableToStringArray(table); + String[][] result = readExampleSetToStringArray(remapped); + assertArrayEquals(expected, result); + + double[][] expectedMapping = readTableToArray(table2); + double[][] resultMapping = readExampleSetToArray(remapped); + assertArrayEquals(expectedMapping, resultMapping); + } + + + @Test(expected = BeltConverter.ConversionException.class) + public void testAdvancedColumns() { + Table table = Builders.newTableBuilder(11).addReal("real", i -> 3 * i / 5.0).addInt53Bit("int", i -> 5 * i) + .addTextset("textset", i -> new StringSet(Collections.singleton("val"+i))) + .build(Belt.defaultContext()); + + try { + BeltConverter.convertHeader(table); + } catch (BeltConverter.ConversionException e) { + assertEquals("textset", e.getColumnName()); + assertEquals(ColumnType.TEXTSET, e.getType()); + throw e; + } + } + } + + @RunWith(Parameterized.class) + public static class TableToSetSequentially { + + public TableToSetSequentially(boolean legacyMode) { + ParameterService.setParameterValue(RapidMiner.PROPERTY_RAPIDMINER_SYSTEM_LEGACY_DATA_MGMT, + String.valueOf(legacyMode)); + } + + @Parameters(name = "legacyMode={0}") + public static Collection params() { + return Arrays.asList(true, false); + } + + @Test + public void testSimple() { + Table table = Builders.newTableBuilder(112).addReal("real", i -> 3 * i / 5.0).addInt53Bit("int", i -> 5 * i) + .build(Belt.defaultContext()); + + ExampleSet set = com.rapidminer.belt.table.BeltConverter.convertSequentially(new IOTable(table)); + + double[][] expected = readTableToArray(table); + double[][] result = readExampleSetToArray(set); + assertArrayEquals(expected, result); + } + + + @Test + public void testManyColumns() { + TableBuilder builder = Builders.newTableBuilder(11); + for (int i = 0; i < 30; i++) { + builder.addReal("real" + i, j -> 3 * j / 5.0).addInt53Bit("int" + i, j -> 5 * j); + } + Table table = builder.build(Belt.defaultContext()); + + ExampleSet set = com.rapidminer.belt.table.BeltConverter.convertSequentially(new IOTable(table)); + + double[][] expected = readTableToArray(table); + double[][] result = readExampleSetToArray(set); + assertArrayEquals(expected, result); + } + + @Test + public void testRoles() { + TableBuilder builder = Builders.newTableBuilder(10); + builder.addInt53Bit("att-1", i -> i); + + ColumnRole[] columnRoles = new ColumnRole[]{ColumnRole.ID, ColumnRole.LABEL, ColumnRole.PREDICTION, + ColumnRole.SCORE, ColumnRole.WEIGHT, ColumnRole.OUTLIER, ColumnRole.CLUSTER, ColumnRole.BATCH, + ColumnRole.METADATA}; + for (int i = 0; i < columnRoles.length; i++) { + builder.addReal("att" + i, j -> j); + builder.addMetaData("att" + i, columnRoles[i]); + } + + builder.addInt53Bit("batt1", i -> i); + builder.addMetaData("batt1", ColumnRole.METADATA); + builder.addMetaData("batt1", new com.rapidminer.belt.table.LegacyRole("ignore-me")); + + builder.addInt53Bit("batt2", i -> i); + builder.addMetaData("batt2", ColumnRole.SCORE); + builder.addMetaData("batt2", new com.rapidminer.belt.table.LegacyRole("confidence_Yes")); + + Table table = builder.build(Belt.defaultContext()); + + ExampleSet set = com.rapidminer.belt.table.BeltConverter.convertSequentially(new IOTable(table)); + + Iterable iterable = () -> set.getAttributes().allAttributeRoles(); + String[] result = StreamSupport.stream(iterable.spliterator(), false).map(AttributeRole::getSpecialName) + .toArray(String[]::new); + String[] expected = + new String[]{null, Attributes.ID_NAME, Attributes.LABEL_NAME, Attributes.PREDICTION_NAME, + Attributes.CONFIDENCE_NAME, Attributes.WEIGHT_NAME, Attributes.OUTLIER_NAME, + Attributes.CLUSTER_NAME, Attributes.BATCH_NAME, "meta_data", "ignore-me", + "confidence_Yes"}; + assertArrayEquals(expected, result); + } + + @Test + public void testTypes() { + TableBuilder builder = Builders.newTableBuilder(10); + builder.addReal("att1", i -> i); + + builder.addReal("att2", i -> i); + builder.addMetaData("att2", com.rapidminer.belt.table.LegacyType.NUMERICAL); + + builder.addInt53Bit("att3", i -> i); + + builder.addInt53Bit("att4", i -> i); + builder.addMetaData("att4", com.rapidminer.belt.table.LegacyType.NUMERICAL); + + builder.addDateTime("att5", i -> Instant.EPOCH); + + builder.addDateTime("att6", i -> Instant.EPOCH); + builder.addMetaData("att6", com.rapidminer.belt.table.LegacyType.DATE); + + builder.addDateTime("att6.5", i -> Instant.EPOCH); + builder.addMetaData("att6.5", com.rapidminer.belt.table.LegacyType.TIME); + + builder.addTime("att7", i -> LocalTime.NOON); + + builder.addTime("att7.5", i -> LocalTime.NOON); + builder.addMetaData("att7.5", com.rapidminer.belt.table.LegacyType.NUMERICAL); + + builder.addNominal("att8", i -> i % 2 == 0 ? "A" : "B"); + + builder.addNominal("att9", i -> i % 2 == 0 ? "A" : "B", 2); + + builder.addNominal("att10", i -> i % 2 == 0 ? "A" : "B"); + builder.addMetaData("att10", com.rapidminer.belt.table.LegacyType.BINOMINAL); + + builder.addNominal("att11", i -> i % 2 == 0 ? "A" : "B", 2); + builder.addMetaData("att11", com.rapidminer.belt.table.LegacyType.STRING); + + builder.addNominal("att12", i -> i % 2 == 0 ? "A" : "B"); + builder.addMetaData("att12", com.rapidminer.belt.table.LegacyType.FILE_PATH); + + builder.addNominal("att13", i -> i % 2 == 0 ? "A" : "B", 2); + builder.addMetaData("att13", com.rapidminer.belt.table.LegacyType.NOMINAL); + + builder.addBoolean("att14", i -> i % 2 == 0 ? "A" : "B", "A"); + + Table table = builder.build(Belt.defaultContext()); + + ExampleSet set = com.rapidminer.belt.table.BeltConverter.convertSequentially(new IOTable(table)); + + int[] result = + StreamSupport.stream(set.getAttributes().spliterator(), false).mapToInt(Attribute::getValueType) + .toArray(); + int[] expected = new int[]{Ontology.REAL, Ontology.NUMERICAL, Ontology.INTEGER, Ontology.INTEGER, + Ontology.DATE_TIME, Ontology.DATE, Ontology.TIME, Ontology.INTEGER, Ontology.INTEGER, Ontology.POLYNOMINAL, Ontology.POLYNOMINAL, + Ontology.BINOMINAL, Ontology.STRING, Ontology.FILE_PATH, Ontology.NOMINAL, Ontology.BINOMINAL}; + + assertArrayEquals(expected, result); + } + + @Test + public void testAnnotations() { + Table table = Builders.newTableBuilder(11).addReal("real", i -> 3 * i / 5.0).addInt53Bit("int", i -> 5 * i) + .build(Belt.defaultContext()); + + IOTable tableObject = new IOTable(table); + tableObject.getAnnotations().setAnnotation(Annotations.KEY_DC_AUTHOR, "gmeier"); + + ExampleSet set = BeltConverter.convertSequentially(tableObject); + + assertEquals(tableObject.getAnnotations(), set.getAnnotations()); + } + + @Test(expected = BeltConverter.ConversionException.class) + public void testAdvancedColumns() { + Table table = Builders.newTableBuilder(11).addReal("real", i -> 3 * i / 5.0).addInt53Bit("int", i -> 5 * i) + .addTextset("textset", i -> new StringSet(Collections.singleton("val"+i))) + .build(Belt.defaultContext()); + + try { + BeltConverter.convertSequentially(new IOTable(table)); + } catch (BeltConverter.ConversionException e) { + assertEquals("textset", e.getColumnName()); + assertEquals(ColumnType.TEXTSET, e.getType()); + throw e; + } + } + } + + public static Attribute attributeDogCatMouse() { + Attribute a = AttributeFactory.createAttribute("animal", Ontology.NOMINAL); + a.getMapping().mapString("dog"); + a.getMapping().mapString("cat"); + a.getMapping().mapString("mouse"); + return a; + } + + public static Attribute attributeYesNo() { + Attribute a = AttributeFactory.createAttribute("decision", Ontology.NOMINAL); + a.getMapping().mapString("no"); + a.getMapping().mapString("yes"); + return a; + } + + public static Attribute attributeInt() { + return AttributeFactory.createAttribute("integer", Ontology.INTEGER); + } + + public static Attribute attributeReal() { + return AttributeFactory.createAttribute("real", Ontology.REAL); + } + + public static Attribute attributeReal(int index) { + return AttributeFactory.createAttribute("real" + index, Ontology.REAL); + } +} diff --git a/src/test/java/com/rapidminer/belt/table/ConvertOnWriteExampleTableTest.java b/src/test/java/com/rapidminer/belt/table/ConvertOnWriteExampleTableTest.java new file mode 100644 index 0000000..100691e --- /dev/null +++ b/src/test/java/com/rapidminer/belt/table/ConvertOnWriteExampleTableTest.java @@ -0,0 +1,1411 @@ +/** + * Copyright (C) 2001-2020 by RapidMiner and the contributors + * + * Complete list of developers available at our web site: + * + * http://rapidminer.com + * + * This program is free software: you can redistribute it and/or modify it under the terms of the GNU Affero General + * Public License as published by the Free Software Foundation, either version 3 of the License, or (at your option) any + * later version. + * + * This program is distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY; without even the implied + * warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Affero General Public License for more + * details. + * + * You should have received a copy of the GNU Affero General Public License along with this program. If not, see + * http://www.gnu.org/licenses/. + */ +package com.rapidminer.belt.table; + +import static org.junit.Assert.assertEquals; +import static org.junit.Assert.assertFalse; +import static org.junit.Assert.assertNotNull; +import static org.junit.Assert.fail; + +import java.io.ByteArrayInputStream; +import java.io.ByteArrayOutputStream; +import java.io.IOException; +import java.io.ObjectInputStream; +import java.io.ObjectOutputStream; +import java.util.ArrayList; +import java.util.Arrays; +import java.util.Collection; +import java.util.HashMap; +import java.util.Iterator; +import java.util.List; +import java.util.Map; +import java.util.Random; +import java.util.concurrent.Callable; +import java.util.concurrent.CountDownLatch; +import java.util.concurrent.ExecutionException; +import java.util.concurrent.ExecutorService; +import java.util.concurrent.Executors; +import java.util.concurrent.ForkJoinPool; +import java.util.concurrent.ForkJoinTask; +import java.util.concurrent.Future; +import java.util.concurrent.TimeUnit; +import java.util.concurrent.atomic.AtomicBoolean; + +import org.junit.BeforeClass; +import org.junit.FixMethodOrder; +import org.junit.Test; +import org.junit.experimental.runners.Enclosed; +import org.junit.runner.RunWith; +import org.junit.runners.MethodSorters; +import org.junit.runners.Parameterized; + +import com.rapidminer.adaption.belt.IOTable; +import com.rapidminer.core.concurrency.ConcurrencyContext; +import com.rapidminer.core.concurrency.ExecutionStoppedException; +import com.rapidminer.example.Attribute; +import com.rapidminer.example.AttributeRole; +import com.rapidminer.example.Attributes; +import com.rapidminer.example.Example; +import com.rapidminer.example.ExampleSet; +import com.rapidminer.example.table.AttributeFactory; +import com.rapidminer.example.table.ExampleTable; +import com.rapidminer.example.utils.ExampleSets; +import com.rapidminer.operator.OperatorException; +import com.rapidminer.operator.preprocessing.MaterializeDataInMemory; +import com.rapidminer.test.asserter.AsserterFactoryRapidMiner; +import com.rapidminer.test_utils.RapidAssert; +import com.rapidminer.tools.Ontology; +import com.rapidminer.tools.att.AttributeSet; + + +/** + * Tests the {@link ConvertOnWriteExampleTable}. + * + * @author Gisa Meier + */ +@RunWith(Enclosed.class) +public class ConvertOnWriteExampleTableTest { + + private static final ConcurrencyContext CONTEXT = new ConcurrencyContext() { + + private ForkJoinPool pool = new ForkJoinPool(Runtime.getRuntime().availableProcessors()); + + @Override + public List> submit(List> callables) throws IllegalArgumentException { + List> futures = new ArrayList<>(); + for (Callable callable : callables) { + futures.add(pool.submit(callable)); + } + return futures; + } + + @Override + public List call(List> callables) + throws ExecutionException, ExecutionStoppedException, IllegalArgumentException { + List> futures = submit(callables); + List results = new ArrayList<>(); + for (Future future : futures) { + try { + results.add(future.get()); + } catch (InterruptedException e) { + throw new RuntimeException("must not happen"); + } + } + return results; + } + + @Override + public void run(List runnables) + throws ExecutionException, ExecutionStoppedException, IllegalArgumentException { + } + + @Override + public List invokeAll(List> tasks) + throws ExecutionException, ExecutionStoppedException, IllegalArgumentException { + return null; + } + + @Override + public T invoke(ForkJoinTask task) + throws ExecutionException, ExecutionStoppedException, IllegalArgumentException { + return null; + } + + @Override + public int getParallelism() { + return pool.getParallelism(); + } + + @Override + public List collectResults(List> futures) + throws ExecutionException, ExecutionStoppedException, IllegalArgumentException { + return null; + } + + @Override + public void checkStatus() throws ExecutionStoppedException { + } + + }; + + @RunWith(Parameterized.class) + @FixMethodOrder(MethodSorters.NAME_ASCENDING) //want the get methods first and add methods last + public static class TableMethods { + + @BeforeClass + public static void setup() { + RapidAssert.ASSERTER_REGISTRY.registerAllAsserters(new AsserterFactoryRapidMiner()); + } + + @Parameterized.Parameter + public ExampleTable comparisonTable; + + @Parameterized.Parameter(value = 1) + public ConvertOnWriteExampleTable testTable; + + @Parameterized.Parameters + public static Collection params() { + List params = new ArrayList<>(); + ExampleSet set = getNumericExampleSet(); + ExampleSet view = TableViewCreator.INSTANCE.convertOnWriteView(BeltConverter.convert(set, CONTEXT), true); + // only wrapped table + params.add(new Object[]{set.getExampleTable(), (ConvertOnWriteExampleTable)view.getExampleTable()}); + + Attribute numeric = AttributeFactory.createAttribute("numeric", Ontology.NUMERICAL); + set = getNumericExampleSet(); + + IOTable table = BeltConverter.convert(set, CONTEXT); + view = TableViewCreator.INSTANCE.convertOnWriteView(table, true); + + Attribute numericClone = (Attribute) numeric.clone(); + view.getExampleTable().addAttribute(numericClone); + view.getAttributes().addRegular(numericClone); + int i = 0; + for (Example example : view) { + example.setValue(numericClone, 0.42 * (i++)); + } + + set.getExampleTable().addAttribute(numeric); + set.getAttributes().addRegular(numeric); + i = 0; + for (Example example : set) { + example.setValue(numeric, 0.42 * (i++)); + } + // with added columns + params.add(new Object[]{set.getExampleTable(), (ConvertOnWriteExampleTable)view.getExampleTable()}); + + set = getNumericExampleSet(); + Attribute real = set.getAttributes().get("real"); + + table = BeltConverter.convert(set, CONTEXT); + view = TableViewCreator.INSTANCE.convertOnWriteView(table, true); + Attribute realView = view.getAttributes().get(real.getName()); + view.getAttributes().remove(realView); + view.getExampleTable().removeAttribute(realView); + + set.getAttributes().remove(real); + set.getExampleTable().removeAttribute(real); + + // with converted + params.add(new Object[]{set.getExampleTable(), (ConvertOnWriteExampleTable)view.getExampleTable()}); + return params; + } + + @Test + public void a1testGetAttributes() { + RapidAssert.assertArrayEquals(comparisonTable.getAttributes(), testTable.getAttributes()); + Object[] objects = new Object[4]; + Object[] expectedObjects = new Object[4]; + Arrays.setAll(objects, i-> testTable.getAttribute(i)); + Arrays.setAll(expectedObjects, i-> comparisonTable.getAttribute(i)); + RapidAssert.assertArrayEquals(expectedObjects, objects); + } + + @Test + public void a2testFindAttributes() { + String[] labels = new String[]{"real", "integer", "numeric", "dateTime", "date", "time", "buntekuh", null}; + Object[] objects = new Object[labels.length]; + Object[] expectedObjects = new Object[labels.length]; + Arrays.setAll(objects, i-> { + try { + return testTable.findAttribute(labels[i]); + } catch (OperatorException e) { + return null; + } + }); + Arrays.setAll(expectedObjects, i-> { + try { + return comparisonTable.findAttribute(labels[i]); + } catch (OperatorException e) { + return null; + } + }); + RapidAssert.assertArrayEquals(expectedObjects, objects); + } + + @Test + public void testGetAttributeNumbers() { + assertEquals(comparisonTable.getAttributeCount(), testTable.getAttributeCount()); + assertEquals(comparisonTable.getNumberOfAttributes(), testTable.getNumberOfAttributes()); + } + + @Test + public void testCreateExampleSet() { + RapidAssert.assertEquals(comparisonTable.createExampleSet(comparisonTable.getAttribute(0)), + testTable.createExampleSet(testTable.getAttribute(0))); + } + + @Test + public void testCreateExampleSet1() { + AttributeRole label = new AttributeRole(comparisonTable.getAttribute(2)); + label.setSpecial(Attributes.LABEL_NAME); + List comparisonList = Arrays.asList(new AttributeRole(comparisonTable.getAttribute(3)), label, + new AttributeRole(comparisonTable.getAttribute(1))); + AttributeRole testLabel = new AttributeRole(testTable.getAttribute(2)); + testLabel.setSpecial(Attributes.LABEL_NAME); + List testList = Arrays.asList(new AttributeRole(testTable.getAttribute(3)), testLabel, + new AttributeRole(testTable.getAttribute(1))); + + RapidAssert.assertEquals(comparisonTable.createExampleSet(comparisonList.iterator()), + testTable.createExampleSet(testList.iterator())); + } + + @Test + public void testCreateExampleSet2() { + RapidAssert.assertEquals(comparisonTable.createExampleSet(comparisonTable.getAttribute(0), comparisonTable.getAttribute(1), comparisonTable.getAttribute(2)), + testTable.createExampleSet(testTable.getAttribute(0), testTable.getAttribute(1), testTable.getAttribute(2))); + } + + @Test + public void testCreateExampleSet3() { + Map comparisonMap = new HashMap<>(); + comparisonMap.put(Attributes.LABEL_NAME, comparisonTable.getAttribute(2)); + comparisonMap.put("blablup", comparisonTable.getAttribute(3)); + AttributeSet comparisonSet = new AttributeSet(Arrays.asList(comparisonTable.getAttribute(0), comparisonTable.getAttribute(1)),comparisonMap); + + Map testMap = new HashMap<>(); + testMap.put(Attributes.LABEL_NAME, testTable.getAttribute(2)); + testMap.put("blablup", testTable.getAttribute(3)); + AttributeSet testSet = new AttributeSet(Arrays.asList(testTable.getAttribute(0), testTable.getAttribute(1)),testMap); + + RapidAssert.assertEquals(comparisonTable.createExampleSet(comparisonSet), + testTable.createExampleSet(testSet)); + } + + @Test + public void testToString() { + assertEquals(comparisonTable.toString(), testTable.toString()); + assertEquals(comparisonTable.toDataString(), testTable.toDataString()); + } + + @Test + public void testRemoveAdddedAttribute() { + Attribute comparisonAttribute = AttributeFactory.createAttribute("test", Ontology.NUMERICAL); + comparisonTable.addAttribute(comparisonAttribute); + Attribute testAttribute = (Attribute) comparisonAttribute.clone(); + testTable.addAttribute(testAttribute); + comparisonTable.removeAttribute(comparisonAttribute.getTableIndex()); + testTable.removeAttribute(testAttribute.getTableIndex()); + RapidAssert.assertArrayEquals(comparisonTable.getAttributes(), testTable.getAttributes()); + } + + @Test + public void z1testAddAttribute() { + Attribute comparisonAttribute = AttributeFactory.createAttribute("test", Ontology.NUMERICAL); + comparisonTable.addAttribute(comparisonAttribute); + testTable.addAttribute((Attribute) comparisonAttribute.clone()); + RapidAssert.assertArrayEquals(comparisonTable.getAttributes(), testTable.getAttributes()); + } + + @Test + public void z2testAddAttributes() { + Attribute comparisonAttribute = AttributeFactory.createAttribute("test", Ontology.NUMERICAL); + Attribute comparisonAttribute2 = AttributeFactory.createAttribute("test2", Ontology.NUMERICAL); + comparisonTable.addAttributes(Arrays.asList(comparisonAttribute, comparisonAttribute2)); + testTable.addAttributes(Arrays.asList((Attribute) comparisonAttribute.clone(), (Attribute) comparisonAttribute2.clone())); + RapidAssert.assertArrayEquals(comparisonTable.getAttributes(), testTable.getAttributes()); + } + + } + + public static class ExampleSetMethods { + + @BeforeClass + public static void setup() { + RapidAssert.ASSERTER_REGISTRY.registerAllAsserters(new AsserterFactoryRapidMiner()); + } + + @Test + public void testSetNumeric() { + Attribute numeric = AttributeFactory.createAttribute("numeric", Ontology.NUMERICAL); + Attribute real = AttributeFactory.createAttribute("real", Ontology.REAL); + Attribute integer = AttributeFactory.createAttribute("integer", Ontology.INTEGER); + Attribute dateTime = AttributeFactory.createAttribute("date_time", Ontology.DATE_TIME); + Attribute date = AttributeFactory.createAttribute("date", Ontology.DATE); + Attribute time = AttributeFactory.createAttribute("time", Ontology.TIME); + List attributes = Arrays.asList(numeric, real, dateTime, date, time, integer); + ExampleSet set = ExampleSets.from(attributes).withBlankSize(150) + .withColumnFiller(numeric, i -> Math.random() > 0.7 ? Double.NaN : Math.random()) + .withColumnFiller(real, i -> Math.random() > 0.7 ? Double.NaN : 42 + Math.random()) + .withColumnFiller(integer, i -> Math.random() > 0.7 ? Double.NaN : Math.round(Math.random() * 100)) + .withColumnFiller(dateTime, + i -> Math.random() > 0.7 ? Double.NaN : 1515410698d + Math.floor(Math.random() * 1000)) + .withColumnFiller(date, i -> Math.random() > 0.7 ? Double.NaN : + 230169600000d + Math.floor(Math.random() * 100) * 1000d * 60 * 60 * 24) + .withColumnFiller(time, + i -> Math.random() > 0.7 ? Double.NaN : Math.floor(Math.random() * 60 * 60 * 24 * 1000)) + .withRole(numeric, Attributes.LABEL_NAME) + .build(); + + IOTable table = BeltConverter.convert(set, CONTEXT); + ExampleSet view = TableViewCreator.INSTANCE.convertOnWriteView(table, true); + + Attribute realInView = view.getAttributes().get(real.getName()); + for (Example example : view) { + example.setValue(realInView, 0.42); + } + + for (Example example : set) { + example.setValue(real, 0.42); + } + + RapidAssert.assertEquals(set, view); + } + + @Test + public void testSetNominal() { + Attribute nominal = AttributeFactory.createAttribute("nominal", Ontology.NOMINAL); + Attribute string = AttributeFactory.createAttribute("string", Ontology.STRING); + Attribute polynominal = AttributeFactory.createAttribute("polynominal", Ontology.POLYNOMINAL); + Attribute binominal = AttributeFactory.createAttribute("binominal", Ontology.BINOMINAL); + Attribute path = AttributeFactory.createAttribute("path", Ontology.FILE_PATH); + for (int i = 0; i < 5; i++) { + nominal.getMapping().mapString("nominalValue" + i); + } + for (int i = 0; i < 4; i++) { + string.getMapping().mapString("veryVeryLongStringValue" + i); + } + for (int i = 0; i < 6; i++) { + polynominal.getMapping().mapString("polyValue" + i); + } + for (int i = 0; i < 2; i++) { + binominal.getMapping().mapString("binominalValue" + i); + } + for (int i = 0; i < 3; i++) { + path.getMapping().mapString("//folder/sufolder/subsubfolder/file" + i); + } + + List attributes = Arrays.asList(nominal, string, polynominal, binominal, path); + Random random = new Random(); + ExampleSet set = ExampleSets.from(attributes).withBlankSize(50) + .withColumnFiller(nominal, i -> random.nextDouble() > 0.7 ? Double.NaN : random.nextInt(5)) + .withColumnFiller(string, i -> random.nextDouble() > 0.7 ? Double.NaN : random.nextInt(4)) + .withColumnFiller(polynominal, i -> random.nextDouble() > 0.7 ? Double.NaN : random.nextInt(6)) + .withColumnFiller(binominal, i -> random.nextDouble() > 0.7 ? Double.NaN : random.nextInt(2)) + .withColumnFiller(path, i -> random.nextDouble() > 0.7 ? Double.NaN : random.nextInt(3)) + .build(); + + IOTable table = BeltConverter.convert(set, CONTEXT); + ExampleSet view = TableViewCreator.INSTANCE.convertOnWriteView(table, true); + Attribute polynominalInView = view.getAttributes().get(polynominal.getName()); + view.getExample(42).setValue(polynominalInView, "hello"); + + set.getExample(42).setValue(polynominal, "hello"); + + RapidAssert.assertEquals(set, view); + } + + @Test + public void testAddAndSet() { + Attribute numeric = AttributeFactory.createAttribute("numeric", Ontology.NUMERICAL); + Attribute real = AttributeFactory.createAttribute("real", Ontology.REAL); + Attribute integer = AttributeFactory.createAttribute("integer", Ontology.INTEGER); + List attributes = Arrays.asList(real, integer); + ExampleSet set = ExampleSets.from(attributes).withBlankSize(150) + .withColumnFiller(real, i -> Math.random() > 0.7 ? Double.NaN : 42 + Math.random()) + .withColumnFiller(integer, i -> Math.random() > 0.7 ? Double.NaN : Math.round(Math.random() * 100)) + .build(); + + IOTable table = BeltConverter.convert(set, CONTEXT); + ExampleSet view = TableViewCreator.INSTANCE.convertOnWriteView(table, true); + + Attribute numericClone = (Attribute) numeric.clone(); + view.getExampleTable().addAttribute(numericClone); + view.getAttributes().addRegular(numericClone); + int i = 0; + for (Example example : view) { + example.setValue(numericClone, 0.42 * (i++)); + } + view.getExample(42).setValue(numericClone, Double.NaN); + + set.getExampleTable().addAttribute(numeric); + set.getAttributes().addRegular(numeric); + i = 0; + for (Example example : set) { + example.setValue(numeric, 0.42 * (i++)); + } + set.getExample(42).setValue(numeric, Double.NaN); + + RapidAssert.assertEquals(set, view); + } + + @Test + public void testAddAndSetAndCleanup() { + Attribute numeric = AttributeFactory.createAttribute("numeric", Ontology.NUMERICAL); + Attribute real = AttributeFactory.createAttribute("real", Ontology.REAL); + Attribute integer = AttributeFactory.createAttribute("integer", Ontology.INTEGER); + List attributes = Arrays.asList(real, integer); + ExampleSet set = ExampleSets.from(attributes).withBlankSize(150) + .withColumnFiller(real, i -> Math.random() > 0.7 ? Double.NaN : 42 + Math.random()) + .withColumnFiller(integer, i -> Math.random() > 0.7 ? Double.NaN : Math.round(Math.random() * 100)) + .build(); + + IOTable table = BeltConverter.convert(set, CONTEXT); + ExampleSet view = TableViewCreator.INSTANCE.convertOnWriteView(table, true); + + Attribute numericClone = (Attribute) numeric.clone(); + view.getExampleTable().addAttribute(numericClone); + view.getAttributes().addRegular(numericClone); + int i = 0; + for (Example example : view) { + example.setValue(numericClone, 0.42 * (i++)); + } + view.getAttributes().remove(view.getAttributes().get(integer.getName())); + view.cleanup(); + + set.getExampleTable().addAttribute(numeric); + set.getAttributes().addRegular(numeric); + i = 0; + for (Example example : set) { + example.setValue(numeric, 0.42 * (i++)); + } + set.getAttributes().remove(integer); + set.cleanup(); + + RapidAssert.assertEquals(set, view); + } + + @Test + public void testAddAndSetAndConvertAndCleanup() { + Attribute numeric = AttributeFactory.createAttribute("numeric", Ontology.NUMERICAL); + Attribute real = AttributeFactory.createAttribute("real", Ontology.REAL); + Attribute integer = AttributeFactory.createAttribute("integer", Ontology.INTEGER); + List attributes = Arrays.asList(real, integer); + ExampleSet set = ExampleSets.from(attributes).withBlankSize(150) + .withColumnFiller(real, i -> Math.random() > 0.7 ? Double.NaN : 42 + Math.random()) + .withColumnFiller(integer, i -> Math.random() > 0.7 ? Double.NaN : Math.round(Math.random() * 100)) + .build(); + + IOTable table = BeltConverter.convert(set, CONTEXT); + ExampleSet view = TableViewCreator.INSTANCE.convertOnWriteView(table, true); + + Attribute numericClone = (Attribute) numeric.clone(); + view.getExampleTable().addAttribute(numericClone); + view.getAttributes().addRegular(numericClone); + int i = 0; + for (Example example : view) { + example.setValue(numericClone, 0.42 * (i++)); + } + view.getExample(42).setValue(view.getAttributes().get(real.getName()), 42); + + view.getAttributes().remove(view.getAttributes().get(integer.getName())); + view.cleanup(); + + set.getExampleTable().addAttribute(numeric); + set.getAttributes().addRegular(numeric); + i = 0; + for (Example example : set) { + example.setValue(numeric, 0.42 * (i++)); + } + set.getExample(42).setValue(real, 42); + + set.getAttributes().remove(integer); + set.cleanup(); + + RapidAssert.assertEquals(set, view); + } + + @Test + public void testAddAndSetAndMaterialize() { + Random random = new Random(); + Attribute numeric = AttributeFactory.createAttribute("numeric", Ontology.NUMERICAL); + Attribute real = AttributeFactory.createAttribute("real", Ontology.REAL); + Attribute integer = AttributeFactory.createAttribute("integer", Ontology.INTEGER); + Attribute polynominal = AttributeFactory.createAttribute("polynominal", Ontology.POLYNOMINAL); + Attribute binominal = AttributeFactory.createAttribute("binominal", Ontology.BINOMINAL); + Attribute dateTime = AttributeFactory.createAttribute("date_time", Ontology.DATE_TIME); + Attribute date = AttributeFactory.createAttribute("date", Ontology.DATE); + Attribute time = AttributeFactory.createAttribute("time", Ontology.TIME); + for (int i = 0; i < 6; i++) { + polynominal.getMapping().mapString("polyValue" + i); + } + for (int i = 0; i < 2; i++) { + binominal.getMapping().mapString("binominalValue" + i); + } + List attributes = Arrays.asList(real, date, integer, time, polynominal, dateTime, binominal); + ExampleSet set = ExampleSets.from(attributes).withBlankSize(150) + .withColumnFiller(real, i -> Math.random() > 0.7 ? Double.NaN : 42 + Math.random()) + .withColumnFiller(integer, i -> Math.random() > 0.7 ? Double.NaN : Math.round(Math.random() * 100)) + .withColumnFiller(polynominal, i -> random.nextDouble() > 0.7 ? Double.NaN : random.nextInt(6)) + .withColumnFiller(binominal, i -> random.nextDouble() > 0.7 ? Double.NaN : random.nextInt(2)) + .withColumnFiller(dateTime, + i -> Math.random() > 0.7 ? Double.NaN : 1515410698d + Math.floor(Math.random() * 1000)) + .withColumnFiller(date, i -> Math.random() > 0.7 ? Double.NaN : + 230169600000d + Math.floor(Math.random() * 100) * 1000d * 60 * 60 * 24) + .withColumnFiller(time, + i -> Math.random() > 0.7 ? Double.NaN : Math.floor(Math.random() * 60 * 60 * 24 * 1000)) + .build(); + + IOTable table = BeltConverter.convert(set, CONTEXT); + ExampleSet view = TableViewCreator.INSTANCE.convertOnWriteView(table, true); + + Attribute numericClone = (Attribute) numeric.clone(); + view.getExampleTable().addAttribute(numericClone); + view.getAttributes().addRegular(numericClone); + int i = 0; + for (Example example : view) { + example.setValue(numericClone, 0.42 * (i++)); + } + + set.getExampleTable().addAttribute(numeric); + set.getAttributes().addRegular(numeric); + i = 0; + for (Example example : set) { + example.setValue(numeric, 0.42 * (i++)); + } + + RapidAssert.assertEquals(set, MaterializeDataInMemory.materializeExampleSet(view)); + } + + @Test + public void testAddAndSetAndRemove() { + Attribute numeric = AttributeFactory.createAttribute("numeric", Ontology.NUMERICAL); + Attribute real = AttributeFactory.createAttribute("real", Ontology.REAL); + Attribute integer = AttributeFactory.createAttribute("integer", Ontology.INTEGER); + List attributes = Arrays.asList(real, integer); + ExampleSet set = ExampleSets.from(attributes).withBlankSize(150) + .withColumnFiller(real, i -> Math.random() > 0.7 ? Double.NaN : 42 + Math.random()) + .withColumnFiller(integer, i -> Math.random() > 0.7 ? Double.NaN : Math.round(Math.random() * 100)) + .build(); + + IOTable table = BeltConverter.convert(set, CONTEXT); + ExampleSet view = TableViewCreator.INSTANCE.convertOnWriteView(table, true); + + Attribute numericClone = (Attribute) numeric.clone(); + view.getExampleTable().addAttribute(numericClone); + view.getAttributes().addRegular(numericClone); + int i = 0; + for (Example example : view) { + example.setValue(numericClone, 0.42 * (i++)); + } + view.getAttributes().remove(numericClone); + view.getExampleTable().removeAttribute(numericClone); + + set.getExampleTable().addAttribute(numeric); + set.getAttributes().addRegular(numeric); + i = 0; + for (Example example : set) { + example.setValue(numeric, 0.42 * (i++)); + } + set.getAttributes().remove(numeric); + set.getExampleTable().removeAttribute(numeric); + + RapidAssert.assertEquals(set, view); + } + + @Test + public void testRemoveFromExisting() { + Random random = new Random(); + Attribute real = AttributeFactory.createAttribute("real", Ontology.REAL); + Attribute integer = AttributeFactory.createAttribute("integer", Ontology.INTEGER); + Attribute polynominal = AttributeFactory.createAttribute("polynominal", Ontology.POLYNOMINAL); + Attribute binominal = AttributeFactory.createAttribute("binominal", Ontology.BINOMINAL); + Attribute dateTime = AttributeFactory.createAttribute("date_time", Ontology.DATE_TIME); + Attribute date = AttributeFactory.createAttribute("date", Ontology.DATE); + Attribute time = AttributeFactory.createAttribute("time", Ontology.TIME); + for (int i = 0; i < 6; i++) { + polynominal.getMapping().mapString("polyValue" + i); + } + for (int i = 0; i < 2; i++) { + binominal.getMapping().mapString("binominalValue" + i); + } + List attributes = Arrays.asList(real, date, integer, time, polynominal, dateTime, binominal); + ExampleSet set = ExampleSets.from(attributes).withBlankSize(150) + .withColumnFiller(real, i -> Math.random() > 0.7 ? Double.NaN : 42 + Math.random()) + .withColumnFiller(integer, i -> Math.random() > 0.7 ? Double.NaN : Math.round(Math.random() * 100)) + .withColumnFiller(polynominal, i -> random.nextDouble() > 0.7 ? Double.NaN : random.nextInt(6)) + .withColumnFiller(binominal, i -> random.nextDouble() > 0.7 ? Double.NaN : random.nextInt(2)) + .withColumnFiller(dateTime, + i -> Math.random() > 0.7 ? Double.NaN : 1515410698d + Math.floor(Math.random() * 1000)) + .withColumnFiller(date, i -> Math.random() > 0.7 ? Double.NaN : + 230169600000d + Math.floor(Math.random() * 100) * 1000d * 60 * 60 * 24) + .withColumnFiller(time, + i -> Math.random() > 0.7 ? Double.NaN : Math.floor(Math.random() * 60 * 60 * 24 * 1000)) + .build(); + + IOTable table = BeltConverter.convert(set, CONTEXT); + ExampleSet view = TableViewCreator.INSTANCE.convertOnWriteView(table, true); + + Attributes viewAttributes = view.getAttributes(); + Attribute integerInView = viewAttributes.get(integer.getName()); + viewAttributes.remove(integerInView); + Attribute binominalInView = viewAttributes.get(binominal.getName()); + viewAttributes.remove(binominalInView); + Attribute dateInView = viewAttributes.get(date.getName()); + viewAttributes.remove(dateInView); + view.getExampleTable().removeAttribute(integerInView); + view.getExampleTable().removeAttribute(binominalInView); + view.getExampleTable().removeAttribute(dateInView); + + Attributes setAttributes = set.getAttributes(); + setAttributes.remove(integer); + setAttributes.remove(binominal); + setAttributes.remove(date); + set.getExampleTable().removeAttribute(integer); + set.getExampleTable().removeAttribute(binominal); + set.getExampleTable().removeAttribute(date); + + RapidAssert.assertEquals(set, view); + } + + @Test + public void testRemoveFromExistingWithTableIndex() { + Random random = new Random(); + Attribute real = AttributeFactory.createAttribute("real", Ontology.REAL); + Attribute integer = AttributeFactory.createAttribute("integer", Ontology.INTEGER); + Attribute polynominal = AttributeFactory.createAttribute("polynominal", Ontology.POLYNOMINAL); + Attribute binominal = AttributeFactory.createAttribute("binominal", Ontology.BINOMINAL); + Attribute dateTime = AttributeFactory.createAttribute("date_time", Ontology.DATE_TIME); + Attribute date = AttributeFactory.createAttribute("date", Ontology.DATE); + Attribute time = AttributeFactory.createAttribute("time", Ontology.TIME); + for (int i = 0; i < 6; i++) { + polynominal.getMapping().mapString("polyValue" + i); + } + for (int i = 0; i < 2; i++) { + binominal.getMapping().mapString("binominalValue" + i); + } + List attributes = Arrays.asList(real, date, integer, time, polynominal, dateTime, binominal); + ExampleSet set = ExampleSets.from(attributes).withBlankSize(150) + .withColumnFiller(real, i -> Math.random() > 0.7 ? Double.NaN : 42 + Math.random()) + .withColumnFiller(integer, i -> Math.random() > 0.7 ? Double.NaN : Math.round(Math.random() * 100)) + .withColumnFiller(polynominal, i -> random.nextDouble() > 0.7 ? Double.NaN : random.nextInt(6)) + .withColumnFiller(binominal, i -> random.nextDouble() > 0.7 ? Double.NaN : random.nextInt(2)) + .withColumnFiller(dateTime, + i -> Math.random() > 0.7 ? Double.NaN : 1515410698d + Math.floor(Math.random() * 1000)) + .withColumnFiller(date, i -> Math.random() > 0.7 ? Double.NaN : + 230169600000d + Math.floor(Math.random() * 100) * 1000d * 60 * 60 * 24) + .withColumnFiller(time, + i -> Math.random() > 0.7 ? Double.NaN : Math.floor(Math.random() * 60 * 60 * 24 * 1000)) + .build(); + + IOTable table = BeltConverter.convert(set, CONTEXT); + ExampleSet view = TableViewCreator.INSTANCE.convertOnWriteView(table, true); + + Attributes viewAttributes = view.getAttributes(); + Attribute integerInView = viewAttributes.get(integer.getName()); + viewAttributes.remove(integerInView); + Attribute binominalInView = viewAttributes.get(binominal.getName()); + viewAttributes.remove(binominalInView); + Attribute dateInView = viewAttributes.get(date.getName()); + viewAttributes.remove(dateInView); + view.getExampleTable().removeAttribute(integerInView.getTableIndex()); + view.getExampleTable().removeAttribute(binominalInView.getTableIndex()); + view.getExampleTable().removeAttribute(dateInView.getTableIndex()); + + Attributes setAttributes = set.getAttributes(); + setAttributes.remove(integer); + setAttributes.remove(binominal); + setAttributes.remove(date); + set.getExampleTable().removeAttribute(integer.getTableIndex()); + set.getExampleTable().removeAttribute(binominal.getTableIndex()); + set.getExampleTable().removeAttribute(date.getTableIndex()); + + RapidAssert.assertEquals(set, view); + } + + + @Test + public void testDataRowToString() { + Attribute numeric = AttributeFactory.createAttribute("numeric", Ontology.NUMERICAL); + Attribute real = AttributeFactory.createAttribute("real", Ontology.REAL); + Attribute integer = AttributeFactory.createAttribute("integer", Ontology.INTEGER); + Attribute nominal = AttributeFactory.createAttribute("nominal", Ontology.NOMINAL); + Attribute polynominal = AttributeFactory.createAttribute("polynominal", Ontology.POLYNOMINAL); + Attribute binominal = AttributeFactory.createAttribute("binominal", Ontology.BINOMINAL); + for (int i = 0; i < 5; i++) { + nominal.getMapping().mapString("nominalValue" + i); + } + for (int i = 0; i < 6; i++) { + polynominal.getMapping().mapString("polyValue" + i); + } + for (int i = 0; i < 2; i++) { + binominal.getMapping().mapString("binominalValue" + i); + } + List attributes = Arrays.asList(nominal, numeric, real, integer, polynominal, binominal); + Random random = new Random(); + ExampleSet set = ExampleSets.from(attributes).withBlankSize(15) + .withColumnFiller(numeric, i -> Math.random() > 0.7 ? Double.NaN : Math.random()) + .withColumnFiller(real, i -> Math.random() > 0.7 ? Double.NaN : 42 + Math.random()) + .withColumnFiller(integer, i -> Math.random() > 0.7 ? Double.NaN : Math.round(Math.random() * 100)) + .withColumnFiller(nominal, i -> random.nextDouble() > 0.7 ? Double.NaN : random.nextInt(5)) + .withColumnFiller(polynominal, i -> random.nextDouble() > 0.7 ? Double.NaN : random.nextInt(6)) + .withColumnFiller(binominal, i -> random.nextDouble() > 0.7 ? Double.NaN : random.nextInt(2)) + .build(); + + IOTable table = BeltConverter.convert(set, CONTEXT); + + ExampleSet view = TableViewCreator.INSTANCE.convertOnWriteView(table, true); + Iterator iterator = view.iterator(); + for (int i = 0; i < view.size(); i++) { + RapidAssert.assertEquals("test", set.getExample(i).getDataRow().toString(), iterator.next().getDataRow().toString()); + } + RapidAssert + .assertEquals(view.getExample(0).getDataRow().toString(), set.getExample(0).getDataRow().toString()); + } + + @Test + public void testSerializationWithAdditionalColumn() throws IOException, ClassNotFoundException { + Attribute numeric = AttributeFactory.createAttribute("numeric", Ontology.NUMERICAL); + Attribute real = AttributeFactory.createAttribute("real", Ontology.REAL); + Attribute integer = AttributeFactory.createAttribute("integer", Ontology.INTEGER); + Attribute nominal = AttributeFactory.createAttribute("nominal", Ontology.NOMINAL); + Attribute polynominal = AttributeFactory.createAttribute("polynominal", Ontology.POLYNOMINAL); + Attribute binominal = AttributeFactory.createAttribute("binominal", Ontology.BINOMINAL); + for (int i = 0; i < 5; i++) { + nominal.getMapping().mapString("nominalValue" + i); + } + for (int i = 0; i < 6; i++) { + polynominal.getMapping().mapString("polyValue" + i); + } + for (int i = 0; i < 2; i++) { + binominal.getMapping().mapString("binominalValue" + i); + } + List attributes = Arrays.asList(nominal, real, integer, polynominal, binominal); + Random random = new Random(); + ExampleSet set = ExampleSets.from(attributes).withBlankSize(150) + .withColumnFiller(real, i -> Math.random() > 0.7 ? Double.NaN : 42 + Math.random()) + .withColumnFiller(integer, i -> Math.random() > 0.7 ? Double.NaN : Math.round(Math.random() * 100)) + .withColumnFiller(nominal, i -> random.nextDouble() > 0.7 ? Double.NaN : random.nextInt(5)) + .withColumnFiller(polynominal, i -> random.nextDouble() > 0.7 ? Double.NaN : random.nextInt(6)) + .withColumnFiller(binominal, i -> random.nextDouble() > 0.7 ? Double.NaN : random.nextInt(2)) + .build(); + + IOTable table = BeltConverter.convert(set, CONTEXT); + ExampleSet view = TableViewCreator.INSTANCE.convertOnWriteView(table, true); + + Attribute numericClone = (Attribute) numeric.clone(); + view.getExampleTable().addAttribute(numericClone); + view.getAttributes().addRegular(numericClone); + int i = 0; + for (Example example : view) { + example.setValue(numericClone, 0.42 * (i++)); + } + + set.getExampleTable().addAttribute(numeric); + set.getAttributes().addRegular(numeric); + i = 0; + for (Example example : set) { + example.setValue(numeric, 0.42 * (i++)); + } + + + byte[] serialized = serialize(view); + Object deserialized = deserialize(serialized); + + RapidAssert.assertEquals(set, (ExampleSet) deserialized); + } + + @Test + public void testSerializationAfterSetExisting() throws IOException, ClassNotFoundException { + Attribute numeric = AttributeFactory.createAttribute("numeric", Ontology.NUMERICAL); + Attribute real = AttributeFactory.createAttribute("real", Ontology.REAL); + Attribute integer = AttributeFactory.createAttribute("integer", Ontology.INTEGER); + Attribute nominal = AttributeFactory.createAttribute("nominal", Ontology.NOMINAL); + Attribute polynominal = AttributeFactory.createAttribute("polynominal", Ontology.POLYNOMINAL); + Attribute binominal = AttributeFactory.createAttribute("binominal", Ontology.BINOMINAL); + for (int i = 0; i < 5; i++) { + nominal.getMapping().mapString("nominalValue" + i); + } + for (int i = 0; i < 6; i++) { + polynominal.getMapping().mapString("polyValue" + i); + } + for (int i = 0; i < 2; i++) { + binominal.getMapping().mapString("binominalValue" + i); + } + List attributes = Arrays.asList(nominal, numeric, real, integer, polynominal, binominal); + Random random = new Random(); + ExampleSet set = ExampleSets.from(attributes).withBlankSize(150) + .withColumnFiller(numeric, i -> Math.random() > 0.7 ? Double.NaN : Math.random()) + .withColumnFiller(real, i -> Math.random() > 0.7 ? Double.NaN : 42 + Math.random()) + .withColumnFiller(integer, i -> Math.random() > 0.7 ? Double.NaN : Math.round(Math.random() * 100)) + .withColumnFiller(nominal, i -> random.nextDouble() > 0.7 ? Double.NaN : random.nextInt(5)) + .withColumnFiller(polynominal, i -> random.nextDouble() > 0.7 ? Double.NaN : random.nextInt(6)) + .withColumnFiller(binominal, i -> random.nextDouble() > 0.7 ? Double.NaN : random.nextInt(2)) + .withRole(numeric, Attributes.LABEL_NAME) + .build(); + + IOTable table = BeltConverter.convert(set, CONTEXT); + ExampleSet view = TableViewCreator.INSTANCE.convertOnWriteView(table, true); + view.getExample(42).setValue(view.getAttributes().get(real.getName()), 42); + byte[] serialized = serialize(view); + Object deserialized = deserialize(serialized); + + set.getExample(42).setValue(real, 42); + RapidAssert.assertEquals(set, (ExampleSet) deserialized); + } + + } + + public static class Concurrency { + + @Test + public void testConvertAndAdd() throws InterruptedException { + ExecutorService executorService = Executors.newFixedThreadPool(4); + List result = new ArrayList<>(); + for (int i = 0; i < 1000; i++) { + CountDownLatch start = new CountDownLatch(4); + AtomicBoolean failed = new AtomicBoolean(false); + result.add(failed); + + ExampleSet set = getNumericExampleSet(); + IOTable ioTable = BeltConverter.convert(set, CONTEXT); + ExampleSet view = TableViewCreator.INSTANCE.convertOnWriteView(ioTable, false); + + executorService.submit(() -> { + ExampleSet exampleSet = (ExampleSet) view.clone(); + Attribute att = exampleSet.getAttributes().get("integer"); + start.countDown(); + try { + start.await(); + } catch (InterruptedException e) { + e.printStackTrace(); + } + try { + exampleSet.getExample(3).setValue(att, 42); + assertEquals(42, exampleSet.getExample(3).getValue(att), 0); + } catch (Throwable e) { + e.printStackTrace(); + failed.set(true); + } + }); + + executorService.submit(() -> { + ExampleSet exampleSet = (ExampleSet) view.clone(); + Attribute att = AttributeFactory.createAttribute("test", Ontology.NUMERICAL); + exampleSet.getAttributes().addRegular(att); + start.countDown(); + try { + start.await(); + } catch (InterruptedException e) { + e.printStackTrace(); + } + try { + exampleSet.getExampleTable().addAttribute(att); + exampleSet.getExample(5).setValue(att, 47); + assertEquals(47, exampleSet.getExample(5).getValue(att), 0); + } catch (Throwable e) { + e.printStackTrace(); + failed.set(true); + } + }); + + executorService.submit(() -> { + ExampleSet exampleSet = (ExampleSet) view.clone(); + Attribute att = AttributeFactory.createAttribute("test", Ontology.NUMERICAL); + exampleSet.getAttributes().addRegular(att); + Iterator iterator = exampleSet.iterator(); + iterator.next(); + iterator.next(); + iterator.next(); + start.countDown(); + try { + start.await(); + } catch (InterruptedException e) { + e.printStackTrace(); + } + try { + exampleSet.getExampleTable().addAttribute(att); + exampleSet.getExample(3).setValue(att, 41); + assertEquals(41, iterator.next().getValue(att), 0); + } catch (Throwable e) { + e.printStackTrace(); + failed.set(true); + } + }); + + executorService.submit(() -> { + ExampleSet exampleSet = (ExampleSet) view.clone(); + Attribute att = AttributeFactory.createAttribute("test", Ontology.NUMERICAL); + exampleSet.getAttributes().addRegular(att); + Iterator iterator = exampleSet.iterator(); + iterator.next(); + iterator.next(); + iterator.next(); + start.countDown(); + try { + start.await(); + } catch (InterruptedException e) { + e.printStackTrace(); + } + try { + exampleSet.getExampleTable().addAttribute(att); + iterator.next().setValue(att, 411); + assertEquals(411, exampleSet.getExample(3).getValue(att), 0); + } catch (Throwable e) { + e.printStackTrace(); + failed.set(true); + } + }); + + } + executorService.shutdown(); + executorService.awaitTermination(5, TimeUnit.SECONDS); + for (AtomicBoolean atomicBoolean : result) { + assertFalse(atomicBoolean.get()); + } + } + + @Test + public void testConvertAndAddMultiple() throws InterruptedException { + ExecutorService executorService = Executors.newFixedThreadPool(3); + List result = new ArrayList<>(); + for (int i = 0; i < 1000; i++) { + CountDownLatch start = new CountDownLatch(2); + AtomicBoolean failed = new AtomicBoolean(false); + result.add(failed); + + ExampleSet set = getNumericExampleSet(); + IOTable ioTable = BeltConverter.convert(set, CONTEXT); + ExampleSet view = TableViewCreator.INSTANCE.convertOnWriteView(ioTable, false); + + executorService.submit(() -> { + ExampleSet exampleSet = (ExampleSet) view.clone(); + Attribute att = exampleSet.getAttributes().get("integer"); + start.countDown(); + try { + start.await(); + } catch (InterruptedException e) { + e.printStackTrace(); + } + try { + exampleSet.getExample(3).setValue(att, 42); + assertEquals(42, exampleSet.getExample(3).getValue(att), 0); + } catch (Throwable e) { + e.printStackTrace(); + failed.set(true); + } + }); + + executorService.submit(() -> { + ExampleSet exampleSet = (ExampleSet) view.clone(); + Attribute att1 = AttributeFactory.createAttribute("test", Ontology.NUMERICAL); + Attribute att2 = AttributeFactory.createAttribute("test2", Ontology.NUMERICAL); + Attribute att3 = AttributeFactory.createAttribute("test3", Ontology.NUMERICAL); + exampleSet.getAttributes().addRegular(att1); + exampleSet.getAttributes().addRegular(att2); + exampleSet.getAttributes().addRegular(att3); + List list = Arrays.asList(att1, att2, att3); + start.countDown(); + try { + start.await(); + } catch (InterruptedException e) { + e.printStackTrace(); + } + try { + exampleSet.getExampleTable().addAttributes(list); + exampleSet.getExample(5).setValue(att2, 47); + assertEquals(47, exampleSet.getExample(5).getValue(att2), 0); + } catch (Throwable e) { + e.printStackTrace(); + failed.set(true); + } + }); + + executorService.submit(() -> { + ExampleSet exampleSet = (ExampleSet) view.clone(); + Attribute att1 = AttributeFactory.createAttribute("test", Ontology.NUMERICAL); + Attribute att2 = AttributeFactory.createAttribute("test2", Ontology.NUMERICAL); + Attribute att3 = AttributeFactory.createAttribute("test3", Ontology.NUMERICAL); + exampleSet.getAttributes().addRegular(att1); + exampleSet.getAttributes().addRegular(att3); + List list = Arrays.asList(att1, att2, att3); + start.countDown(); + try { + start.await(); + } catch (InterruptedException e) { + e.printStackTrace(); + } + try { + exampleSet.getExampleTable().addAttributes(list); + exampleSet.getExampleTable().removeAttribute(att2); + exampleSet.getExample(5).setValue(att3, 47); + assertEquals(47, exampleSet.getExample(5).getValue(att3), 0); + } catch (Throwable e) { + e.printStackTrace(); + failed.set(true); + } + }); + } + executorService.shutdown(); + executorService.awaitTermination(5, TimeUnit.SECONDS); + for (AtomicBoolean atomicBoolean : result) { + assertFalse(atomicBoolean.get()); + } + } + + + @Test + public void testConvertAndRemove() throws InterruptedException { + ExecutorService executorService = Executors.newFixedThreadPool(3); + List result = new ArrayList<>(); + for (int i = 0; i < 1000; i++) { + CountDownLatch start = new CountDownLatch(3); + AtomicBoolean failed = new AtomicBoolean(false); + result.add(failed); + + ExampleSet set = getNumericExampleSet(); + IOTable ioTable = BeltConverter.convert(set, CONTEXT); + ExampleSet view = TableViewCreator.INSTANCE.convertOnWriteView(ioTable, false); + + executorService.submit(() -> { + ExampleSet exampleSet = (ExampleSet) view.clone(); + Attribute att = exampleSet.getAttributes().get("integer"); + start.countDown(); + try { + start.await(); + } catch (InterruptedException e) { + e.printStackTrace(); + } + try { + exampleSet.getExample(3).setValue(att, 42); + assertEquals(42, exampleSet.getExample(3).getValue(att), 0); + } catch (Throwable e) { + e.printStackTrace(); + failed.set(true); + } + }); + + executorService.submit(() -> { + ExampleSet exampleSet = (ExampleSet) view.clone(); + Attribute att = AttributeFactory.createAttribute("test42", Ontology.NUMERICAL); + ExampleTable exampleTable = exampleSet.getExampleTable(); + exampleTable.addAttribute(att); + try { + assertNotNull(exampleTable.findAttribute(att.getName())); + } catch (OperatorException e) { + e.printStackTrace(); + } + start.countDown(); + try { + start.await(); + } catch (InterruptedException e) { + e.printStackTrace(); + } + try { + exampleTable.removeAttribute(att); + try { + exampleTable.findAttribute("test42"); + fail(); + } catch (OperatorException e) { + // good case + } + } catch (Throwable e) { + e.printStackTrace(); + failed.set(true); + } + }); + + executorService.submit(() -> { + ExampleSet exampleSet = (ExampleSet) view.clone(); + Attribute att = AttributeFactory.createAttribute("test007", Ontology.NUMERICAL); + ExampleTable exampleTable = exampleSet.getExampleTable(); + exampleTable.addAttribute(att); + try { + assertNotNull(exampleTable.findAttribute(att.getName())); + } catch (OperatorException e) { + e.printStackTrace(); + } + int index = att.getTableIndex(); + start.countDown(); + try { + start.await(); + } catch (InterruptedException e) { + e.printStackTrace(); + } + try { + exampleTable.removeAttribute(index); + try { + exampleTable.findAttribute("test007"); + fail(); + } catch (OperatorException e) { + // good case + } + } catch (Throwable e) { + e.printStackTrace(); + failed.set(true); + } + }); + + } + executorService.shutdown(); + executorService.awaitTermination(5, TimeUnit.SECONDS); + for (AtomicBoolean atomicBoolean : result) { + assertFalse(atomicBoolean.get()); + } + } + + @Test + public void testEverything() throws InterruptedException { + ExecutorService executorService = Executors.newFixedThreadPool(9); + List result = new ArrayList<>(); + for (int i = 0; i < 1000; i++) { + CountDownLatch start = new CountDownLatch(8); //intentionally one lower so that one might start later + AtomicBoolean failed = new AtomicBoolean(false); + result.add(failed); + + ExampleSet set = getNumericExampleSet(); + IOTable ioTable = BeltConverter.convert(set, CONTEXT); + ExampleSet view = TableViewCreator.INSTANCE.convertOnWriteView(ioTable, false); + + executorService.submit(() -> { + ExampleSet exampleSet = (ExampleSet) view.clone(); + Attribute att = exampleSet.getAttributes().get("integer"); + start.countDown(); + try { + start.await(); + } catch (InterruptedException e) { + e.printStackTrace(); + } + try { + exampleSet.getExample(3).setValue(att, 42); + assertEquals(42, exampleSet.getExample(3).getValue(att), 0); + } catch (Throwable e) { + e.printStackTrace(); + failed.set(true); + } + }); + + executorService.submit(() -> { + ExampleSet exampleSet = (ExampleSet) view.clone(); + Attribute att = AttributeFactory.createAttribute("test42", Ontology.NUMERICAL); + ExampleTable exampleTable = exampleSet.getExampleTable(); + start.countDown(); + try { + start.await(); + } catch (InterruptedException e) { + e.printStackTrace(); + } + try { + exampleTable.addAttribute(att); + exampleTable.removeAttribute(att); + try { + exampleTable.findAttribute("test42"); + fail(); + } catch (OperatorException e) { + // good case + } + } catch (Throwable e) { + e.printStackTrace(); + failed.set(true); + } + }); + + executorService.submit(() -> { + ExampleSet exampleSet = (ExampleSet) view.clone(); + Attribute att = AttributeFactory.createAttribute("test007", Ontology.NUMERICAL); + ExampleTable exampleTable = exampleSet.getExampleTable(); + start.countDown(); + try { + start.await(); + } catch (InterruptedException e) { + e.printStackTrace(); + } + try { + exampleTable.addAttribute(att); + int index = att.getTableIndex(); + exampleTable.removeAttribute(index); + try { + exampleTable.findAttribute("test007"); + fail(); + } catch (OperatorException e) { + // good case + } + } catch (Throwable e) { + e.printStackTrace(); + failed.set(true); + } + }); + + executorService.submit(() -> { + ExampleSet exampleSet = (ExampleSet) view.clone(); + Attribute att = exampleSet.getAttributes().get("integer"); + start.countDown(); + try { + start.await(); + } catch (InterruptedException e) { + e.printStackTrace(); + } + try { + exampleSet.getExample(3).setValue(att, 42); + assertEquals(42, exampleSet.getExample(3).getValue(att), 0); + } catch (Throwable e) { + e.printStackTrace(); + failed.set(true); + } + }); + + executorService.submit(() -> { + ExampleSet exampleSet = (ExampleSet) view.clone(); + Attribute att1 = AttributeFactory.createAttribute("test", Ontology.NUMERICAL); + Attribute att2 = AttributeFactory.createAttribute("test2", Ontology.NUMERICAL); + Attribute att3 = AttributeFactory.createAttribute("test3", Ontology.NUMERICAL); + exampleSet.getAttributes().addRegular(att1); + exampleSet.getAttributes().addRegular(att2); + exampleSet.getAttributes().addRegular(att3); + List list = Arrays.asList(att1, att2, att3); + start.countDown(); + try { + start.await(); + } catch (InterruptedException e) { + e.printStackTrace(); + } + try { + exampleSet.getExampleTable().addAttributes(list); + exampleSet.getExample(5).setValue(att2, 47); + assertEquals(47, exampleSet.getExample(5).getValue(att2), 0); + } catch (Throwable e) { + e.printStackTrace(); + failed.set(true); + } + }); + + executorService.submit(() -> { + ExampleSet exampleSet = (ExampleSet) view.clone(); + Attribute att1 = AttributeFactory.createAttribute("test", Ontology.NUMERICAL); + Attribute att2 = AttributeFactory.createAttribute("test2", Ontology.NUMERICAL); + Attribute att3 = AttributeFactory.createAttribute("test3", Ontology.NUMERICAL); + exampleSet.getAttributes().addRegular(att1); + exampleSet.getAttributes().addRegular(att3); + List list = Arrays.asList(att1, att2, att3); + start.countDown(); + try { + start.await(); + } catch (InterruptedException e) { + e.printStackTrace(); + } + try { + exampleSet.getExampleTable().addAttributes(list); + exampleSet.getExampleTable().removeAttribute(att2); + exampleSet.getExample(5).setValue(att3, 47); + assertEquals(47, exampleSet.getExample(5).getValue(att3), 0); + } catch (Throwable e) { + e.printStackTrace(); + failed.set(true); + } + }); + + executorService.submit(() -> { + ExampleSet exampleSet = (ExampleSet) view.clone(); + Attribute att = AttributeFactory.createAttribute("test", Ontology.NUMERICAL); + exampleSet.getAttributes().addRegular(att); + start.countDown(); + try { + start.await(); + } catch (InterruptedException e) { + e.printStackTrace(); + } + try { + exampleSet.getExampleTable().addAttribute(att); + exampleSet.getExample(5).setValue(att, 47); + assertEquals(47, exampleSet.getExample(5).getValue(att), 0); + } catch (Throwable e) { + e.printStackTrace(); + failed.set(true); + } + }); + + executorService.submit(() -> { + ExampleSet exampleSet = (ExampleSet) view.clone(); + Attribute att = AttributeFactory.createAttribute("test", Ontology.NUMERICAL); + exampleSet.getAttributes().addRegular(att); + Iterator iterator = exampleSet.iterator(); + iterator.next(); + iterator.next(); + iterator.next(); + start.countDown(); + try { + start.await(); + } catch (InterruptedException e) { + e.printStackTrace(); + } + try { + exampleSet.getExampleTable().addAttribute(att); + exampleSet.getExample(3).setValue(att, 41); + assertEquals(41, iterator.next().getValue(att), 0); + } catch (Throwable e) { + e.printStackTrace(); + failed.set(true); + } + }); + + executorService.submit(() -> { + ExampleSet exampleSet = (ExampleSet) view.clone(); + Attribute att = AttributeFactory.createAttribute("test", Ontology.NUMERICAL); + exampleSet.getAttributes().addRegular(att); + Iterator iterator = exampleSet.iterator(); + iterator.next(); + iterator.next(); + iterator.next(); + start.countDown(); + try { + start.await(); + } catch (InterruptedException e) { + e.printStackTrace(); + } + try { + exampleSet.getExampleTable().addAttribute(att); + iterator.next().setValue(att, 411); + assertEquals(411, exampleSet.getExample(3).getValue(att), 0); + } catch (Throwable e) { + e.printStackTrace(); + failed.set(true); + } + }); + + } + executorService.shutdown(); + executorService.awaitTermination(5, TimeUnit.SECONDS); + for (AtomicBoolean atomicBoolean : result) { + assertFalse(atomicBoolean.get()); + } + } + } + + private static ExampleSet getNumericExampleSet() { + Attribute real = AttributeFactory.createAttribute("real", Ontology.REAL); + Attribute integer = AttributeFactory.createAttribute("integer", Ontology.INTEGER); + Attribute dateTime = AttributeFactory.createAttribute("date_time", Ontology.DATE_TIME); + Attribute date = AttributeFactory.createAttribute("date", Ontology.DATE); + Attribute time = AttributeFactory.createAttribute("time", Ontology.TIME); + List attributes = Arrays.asList(real, dateTime, date, time, integer); + return ExampleSets.from(attributes).withBlankSize(150) + .withColumnFiller(real, i -> Math.random() > 0.7 ? Double.NaN : 42 + Math.random()) + .withColumnFiller(integer, i -> Math.random() > 0.7 ? Double.NaN : Math.round(Math.random() * 100)) + .withColumnFiller(dateTime, + i -> Math.random() > 0.7 ? Double.NaN : 1515410698d + Math.floor(Math.random() * 1000)) + .withColumnFiller(date, i -> Math.random() > 0.7 ? Double.NaN : + 230169600000d + Math.floor(Math.random() * 100) * 1000d * 60 * 60 * 24) + .withColumnFiller(time, + i -> Math.random() > 0.7 ? Double.NaN : Math.floor(Math.random() * 60 * 60 * 24 * 1000)) + .build(); + } + + private static byte[] serialize(Object obj) throws IOException { + ByteArrayOutputStream b = new ByteArrayOutputStream(); + ObjectOutputStream o = new ObjectOutputStream(b); + o.writeObject(obj); + return b.toByteArray(); + } + + private static Object deserialize(byte[] bytes) throws IOException, ClassNotFoundException { + ByteArrayInputStream b = new ByteArrayInputStream(bytes); + ObjectInputStream o = new ObjectInputStream(b); + return o.readObject(); + } +} diff --git a/src/test/java/com/rapidminer/belt/table/NominalMappingAdapterTest.java b/src/test/java/com/rapidminer/belt/table/NominalMappingAdapterTest.java index 137ae4f..ecfe75b 100644 --- a/src/test/java/com/rapidminer/belt/table/NominalMappingAdapterTest.java +++ b/src/test/java/com/rapidminer/belt/table/NominalMappingAdapterTest.java @@ -1,5 +1,5 @@ /** - * Copyright (C) 2001-2019 by RapidMiner and the contributors + * Copyright (C) 2001-2020 by RapidMiner and the contributors * * Complete list of developers available at our web site: * diff --git a/src/test/java/com/rapidminer/belt/table/ShiftedNominalMappingAdapterTest.java b/src/test/java/com/rapidminer/belt/table/ShiftedNominalMappingAdapterTest.java index 2b5e495..2d34998 100644 --- a/src/test/java/com/rapidminer/belt/table/ShiftedNominalMappingAdapterTest.java +++ b/src/test/java/com/rapidminer/belt/table/ShiftedNominalMappingAdapterTest.java @@ -1,5 +1,5 @@ /** - * Copyright (C) 2001-2019 by RapidMiner and the contributors + * Copyright (C) 2001-2020 by RapidMiner and the contributors * * Complete list of developers available at our web site: * diff --git a/src/test/java/com/rapidminer/belt/table/TableViewCreatorTest.java b/src/test/java/com/rapidminer/belt/table/TableViewCreatorTest.java index bd32cc0..aa5d660 100644 --- a/src/test/java/com/rapidminer/belt/table/TableViewCreatorTest.java +++ b/src/test/java/com/rapidminer/belt/table/TableViewCreatorTest.java @@ -1,5 +1,5 @@ /** - * Copyright (C) 2001-2019 by RapidMiner and the contributors + * Copyright (C) 2001-2020 by RapidMiner and the contributors * * Complete list of developers available at our web site: * @@ -20,6 +20,7 @@ import static org.junit.Assert.assertArrayEquals; import static org.junit.Assert.assertEquals; +import static org.junit.Assert.assertFalse; import java.io.ByteArrayInputStream; import java.io.ByteArrayOutputStream; @@ -28,6 +29,7 @@ import java.io.ObjectOutputStream; import java.util.ArrayList; import java.util.Arrays; +import java.util.Collections; import java.util.List; import java.util.Random; import java.util.concurrent.Callable; @@ -38,12 +40,16 @@ import org.junit.BeforeClass; import org.junit.Test; +import org.junit.experimental.runners.Enclosed; +import org.junit.runner.RunWith; -import com.rapidminer.belt.buffer.CategoricalBuffer; +import com.rapidminer.adaption.belt.IOTable; +import com.rapidminer.belt.buffer.Buffers; +import com.rapidminer.belt.buffer.NominalBuffer; import com.rapidminer.belt.column.Column; import com.rapidminer.belt.column.ColumnType; -import com.rapidminer.belt.column.ColumnTypes; import com.rapidminer.belt.column.Columns; +import com.rapidminer.belt.column.type.StringSet; import com.rapidminer.belt.util.Belt; import com.rapidminer.core.concurrency.ConcurrencyContext; import com.rapidminer.core.concurrency.ExecutionStoppedException; @@ -52,6 +58,7 @@ import com.rapidminer.example.ExampleSet; import com.rapidminer.example.Statistics; import com.rapidminer.example.table.AttributeFactory; +import com.rapidminer.example.table.NominalMapping; import com.rapidminer.example.utils.ExampleSets; import com.rapidminer.test.asserter.AsserterFactoryRapidMiner; import com.rapidminer.test_utils.RapidAssert; @@ -63,6 +70,7 @@ * * @author Gisa Meier */ +@RunWith(Enclosed.class) public class TableViewCreatorTest { private static final ConcurrencyContext CONTEXT = new ConcurrencyContext() { @@ -127,385 +135,1024 @@ public void checkStatus() throws ExecutionStoppedException { }; - @BeforeClass - public static void setup() { - RapidAssert.ASSERTER_REGISTRY.registerAllAsserters(new AsserterFactoryRapidMiner()); - } + public static class ExampleSetView { + @BeforeClass + public static void setup() { + RapidAssert.ASSERTER_REGISTRY.registerAllAsserters(new AsserterFactoryRapidMiner()); + } + + @Test + public void testNominalTypes() { + Attribute nominal = AttributeFactory.createAttribute("nominal", Ontology.NOMINAL); + Attribute string = AttributeFactory.createAttribute("string", Ontology.STRING); + Attribute polynominal = AttributeFactory.createAttribute("polynominal", Ontology.POLYNOMINAL); + Attribute binominal = AttributeFactory.createAttribute("binominal", Ontology.BINOMINAL); + Attribute path = AttributeFactory.createAttribute("path", Ontology.FILE_PATH); + for (int i = 0; i < 5; i++) { + nominal.getMapping().mapString("nominalValue" + i); + } + for (int i = 0; i < 4; i++) { + string.getMapping().mapString("veryVeryLongStringValue" + i); + } + for (int i = 0; i < 6; i++) { + polynominal.getMapping().mapString("polyValue" + i); + } + for (int i = 0; i < 2; i++) { + binominal.getMapping().mapString("binominalValue" + i); + } + for (int i = 0; i < 3; i++) { + path.getMapping().mapString("//folder/sufolder/subsubfolder/file" + i); + } + + List attributes = Arrays.asList(nominal, string, polynominal, binominal, path); + Random random = new Random(); + ExampleSet set = ExampleSets.from(attributes).withBlankSize(50) + .withColumnFiller(nominal, i -> random.nextDouble() > 0.7 ? Double.NaN : random.nextInt(5)) + .withColumnFiller(string, i -> random.nextDouble() > 0.7 ? Double.NaN : random.nextInt(4)) + .withColumnFiller(polynominal, i -> random.nextDouble() > 0.7 ? Double.NaN : random.nextInt(6)) + .withColumnFiller(binominal, i -> random.nextDouble() > 0.7 ? Double.NaN : random.nextInt(2)) + .withColumnFiller(path, i -> random.nextDouble() > 0.7 ? Double.NaN : random.nextInt(3)) + .build(); + + Table table = com.rapidminer.belt.table.BeltConverter.convert(set, CONTEXT).getTable(); + ExampleSet view = com.rapidminer.belt.table.TableViewCreator.INSTANCE.createView(table); - @Test - public void testNominalTypes() { - Attribute nominal = AttributeFactory.createAttribute("nominal", Ontology.NOMINAL); - Attribute string = AttributeFactory.createAttribute("string", Ontology.STRING); - Attribute polynominal = AttributeFactory.createAttribute("polynominal", Ontology.POLYNOMINAL); - Attribute binominal = AttributeFactory.createAttribute("binominal", Ontology.BINOMINAL); - Attribute path = AttributeFactory.createAttribute("path", Ontology.FILE_PATH); - for (int i = 0; i < 5; i++) { - nominal.getMapping().mapString("nominalValue" + i); + RapidAssert.assertEquals(set, view); } - for (int i = 0; i < 4; i++) { - string.getMapping().mapString("veryVeryLongStringValue" + i); + + @Test + public void testNumericTypes() { + Attribute numeric = AttributeFactory.createAttribute("numeric", Ontology.NUMERICAL); + Attribute real = AttributeFactory.createAttribute("real", Ontology.REAL); + Attribute integer = AttributeFactory.createAttribute("integer", Ontology.INTEGER); + List attributes = Arrays.asList(numeric, real, integer); + ExampleSet set = ExampleSets.from(attributes).withBlankSize(150) + .withColumnFiller(numeric, i -> Math.random() > 0.7 ? Double.NaN : Math.random()) + .withColumnFiller(real, i -> Math.random() > 0.7 ? Double.NaN : 42 + Math.random()) + .withColumnFiller(integer, i -> Math.random() > 0.7 ? Double.NaN : Math.round(Math.random() * 100)) + .withRole(numeric, Attributes.LABEL_NAME) + .build(); + + Table table = com.rapidminer.belt.table.BeltConverter.convert(set, CONTEXT).getTable(); + + RapidAssert.assertEquals(set, com.rapidminer.belt.table.TableViewCreator.INSTANCE.createView(table)); } - for (int i = 0; i < 6; i++) { - polynominal.getMapping().mapString("polyValue" + i); + + @Test + public void testNumericAndDateTypes() { + Attribute numeric = AttributeFactory.createAttribute("numeric", Ontology.NUMERICAL); + Attribute real = AttributeFactory.createAttribute("real", Ontology.REAL); + Attribute integer = AttributeFactory.createAttribute("integer", Ontology.INTEGER); + Attribute dateTime = AttributeFactory.createAttribute("date_time", Ontology.DATE_TIME); + Attribute date = AttributeFactory.createAttribute("date", Ontology.DATE); + Attribute time = AttributeFactory.createAttribute("time", Ontology.TIME); + List attributes = Arrays.asList(numeric, real, dateTime, date, time, integer); + ExampleSet set = ExampleSets.from(attributes).withBlankSize(150) + .withColumnFiller(numeric, i -> Math.random() > 0.7 ? Double.NaN : Math.random()) + .withColumnFiller(real, i -> Math.random() > 0.7 ? Double.NaN : 42 + Math.random()) + .withColumnFiller(integer, i -> Math.random() > 0.7 ? Double.NaN : Math.round(Math.random() * 100)) + .withColumnFiller(dateTime, + i -> Math.random() > 0.7 ? Double.NaN : 1515410698d + Math.floor(Math.random() * 1000)) + .withColumnFiller(date, i -> Math.random() > 0.7 ? Double.NaN : + 230169600000d + Math.floor(Math.random() * 100) * 1000d * 60 * 60 * 24) + .withColumnFiller(time, + i -> Math.random() > 0.7 ? Double.NaN : Math.floor(Math.random() * 60 * 60 * 24 * 1000)) + .withRole(numeric, Attributes.LABEL_NAME) + .build(); + + Table table = com.rapidminer.belt.table.BeltConverter.convert(set, CONTEXT).getTable(); + + RapidAssert.assertEquals(set, com.rapidminer.belt.table.TableViewCreator.INSTANCE.createView(table)); } - for (int i = 0; i < 2; i++) { - binominal.getMapping().mapString("binominalValue" + i); + + @Test + public void testExamplesEqual() { + Attribute numeric = AttributeFactory.createAttribute("numeric", Ontology.NUMERICAL); + Attribute real = AttributeFactory.createAttribute("real", Ontology.REAL); + Attribute integer = AttributeFactory.createAttribute("integer", Ontology.INTEGER); + Attribute nominal = AttributeFactory.createAttribute("nominal", Ontology.NOMINAL); + Attribute polynominal = AttributeFactory.createAttribute("polynominal", Ontology.POLYNOMINAL); + Attribute binominal = AttributeFactory.createAttribute("binominal", Ontology.BINOMINAL); + for (int i = 0; i < 5; i++) { + nominal.getMapping().mapString("nominalValue" + i); + } + for (int i = 0; i < 6; i++) { + polynominal.getMapping().mapString("polyValue" + i); + } + for (int i = 0; i < 2; i++) { + binominal.getMapping().mapString("binominalValue" + i); + } + List attributes = Arrays.asList(nominal, numeric, real, integer, polynominal, binominal); + Random random = new Random(); + ExampleSet set = ExampleSets.from(attributes).withBlankSize(150) + .withColumnFiller(numeric, i -> Math.random() > 0.7 ? Double.NaN : Math.random()) + .withColumnFiller(real, i -> Math.random() > 0.7 ? Double.NaN : 42 + Math.random()) + .withColumnFiller(integer, i -> Math.random() > 0.7 ? Double.NaN : Math.round(Math.random() * 100)) + .withColumnFiller(nominal, i -> random.nextDouble() > 0.7 ? Double.NaN : random.nextInt(5)) + .withColumnFiller(polynominal, i -> random.nextDouble() > 0.7 ? Double.NaN : random.nextInt(6)) + .withColumnFiller(binominal, i -> random.nextDouble() > 0.7 ? Double.NaN : random.nextInt(2)) + .withRole(numeric, Attributes.LABEL_NAME) + .build(); + + Table table = com.rapidminer.belt.table.BeltConverter.convert(set, CONTEXT).getTable(); + + ExampleSet view1 = new DoubleTableWrapper(table); + ExampleSet view2 = new DatetimeTableWrapper(table); + for (int i = 0; i < table.height(); i++) { + RapidAssert.assertEquals("test", view1.getExample(i), view2.getExample(i)); + RapidAssert.assertEquals("test2", set.getExample(i), view2.getExample(i)); + } + RapidAssert + .assertEquals(view1.getExample(0).getDataRow().getType(), view2.getExample(0).getDataRow().getType()); + RapidAssert + .assertEquals(view1.getExample(1).getDataRow().toString(), view2.getExample(1).getDataRow().toString + ()); } - for (int i = 0; i < 3; i++) { - path.getMapping().mapString("//folder/sufolder/subsubfolder/file" + i); + + @Test(expected = UnsupportedOperationException.class) + public void testExampleWriteUnsupported() { + Attribute numeric = AttributeFactory.createAttribute("numeric", Ontology.NUMERICAL); + ExampleSet set = ExampleSets.from(numeric).withBlankSize(15) + .withColumnFiller(numeric, i -> Math.random() > 0.7 ? Double.NaN : Math.random()).build(); + Table table = com.rapidminer.belt.table.BeltConverter.convert(set, CONTEXT).getTable(); + + ExampleSet view = com.rapidminer.belt.table.TableViewCreator.INSTANCE.createView(table); + view.getExample(4).setValue(numeric, 5); } - List attributes = Arrays.asList(nominal, string, polynominal, binominal, path); - Random random = new Random(); - ExampleSet set = ExampleSets.from(attributes).withBlankSize(50) - .withColumnFiller(nominal, i -> random.nextDouble() > 0.7 ? Double.NaN : random.nextInt(5)) - .withColumnFiller(string, i -> random.nextDouble() > 0.7 ? Double.NaN : random.nextInt(4)) - .withColumnFiller(polynominal, i -> random.nextDouble() > 0.7 ? Double.NaN : random.nextInt(6)) - .withColumnFiller(binominal, i -> random.nextDouble() > 0.7 ? Double.NaN : random.nextInt(2)) - .withColumnFiller(path, i -> random.nextDouble() > 0.7 ? Double.NaN : random.nextInt(3)) - .build(); + @Test(expected = UnsupportedOperationException.class) + public void testExampleWriteUnsupportedDate() { + Attribute dateTime = AttributeFactory.createAttribute("date_time", Ontology.DATE_TIME); + ExampleSet set = ExampleSets.from(dateTime).withBlankSize(15) + .withColumnFiller(dateTime, + i -> Math.random() > 0.7 ? Double.NaN : 1515410698d + Math.floor(Math.random() * 1000)) + .build(); + Table table = com.rapidminer.belt.table.BeltConverter.convert(set, CONTEXT).getTable(); + + ExampleSet view = com.rapidminer.belt.table.TableViewCreator.INSTANCE.createView(table); + view.getExample(4).setValue(dateTime, 5); + } - Table table = com.rapidminer.belt.table.BeltConverter.convert(set, CONTEXT).getTable(); - ExampleSet view = com.rapidminer.belt.table.TableViewCreator.INSTANCE.createView(table); + @Test + public void testStatistics() { + Attribute numeric = AttributeFactory.createAttribute("numeric", Ontology.NUMERICAL); + Attribute real = AttributeFactory.createAttribute("real", Ontology.REAL); + Attribute integer = AttributeFactory.createAttribute("integer", Ontology.INTEGER); + Attribute dateTime = AttributeFactory.createAttribute("date_time", Ontology.DATE_TIME); + Attribute date = AttributeFactory.createAttribute("date", Ontology.DATE); + Attribute time = AttributeFactory.createAttribute("time", Ontology.TIME); + List attributes = Arrays.asList(numeric, real, dateTime, date, time, integer); + ExampleSet set = ExampleSets.from(attributes).withBlankSize(150) + .withColumnFiller(numeric, i -> Math.random() > 0.7 ? Double.NaN : Math.random()) + .withColumnFiller(real, i -> Math.random() > 0.7 ? Double.NaN : 42 + Math.random()) + .withColumnFiller(integer, i -> Math.random() > 0.7 ? Double.NaN : Math.round(Math.random() * 100)) + .withColumnFiller(dateTime, + i -> Math.random() > 0.7 ? Double.NaN : 1515410698d + Math.floor(Math.random() * 1000)) + .withColumnFiller(date, i -> Math.random() > 0.7 ? Double.NaN : + 230169600000d + Math.floor(Math.random() * 100) * 1000d * 60 * 60 * 24) + .withColumnFiller(time, + i -> Math.random() > 0.7 ? Double.NaN : Math.floor(Math.random() * 60 * 60 * 24 * 1000)) + .withRole(numeric, Attributes.LABEL_NAME) + .build(); + + Table table = com.rapidminer.belt.table.BeltConverter.convert(set, CONTEXT).getTable(); + set.recalculateAllAttributeStatistics(); + + ExampleSet view = com.rapidminer.belt.table.TableViewCreator.INSTANCE.createView(table); + view.recalculateAllAttributeStatistics(); + + List setStatistics = new ArrayList<>(); + for (Attribute att : set.getAttributes()) { + if (att.isNumerical()) { + setStatistics.add(set.getStatistics(att, Statistics.AVERAGE)); + } + setStatistics.add(set.getStatistics(att, Statistics.UNKNOWN)); + setStatistics.add(set.getStatistics(att, Statistics.MINIMUM)); + setStatistics.add(set.getStatistics(att, Statistics.MAXIMUM)); + } - RapidAssert.assertEquals(set, view); - } + List viewStatistics = new ArrayList<>(); + for (Attribute att : view.getAttributes()) { + if (att.isNumerical()) { + viewStatistics.add(view.getStatistics(att, Statistics.AVERAGE)); + } + viewStatistics.add(view.getStatistics(att, Statistics.UNKNOWN)); + viewStatistics.add(view.getStatistics(att, Statistics.MINIMUM)); + viewStatistics.add(view.getStatistics(att, Statistics.MAXIMUM)); + } - @Test - public void testNumericTypes() { - Attribute numeric = AttributeFactory.createAttribute("numeric", Ontology.NUMERICAL); - Attribute real = AttributeFactory.createAttribute("real", Ontology.REAL); - Attribute integer = AttributeFactory.createAttribute("integer", Ontology.INTEGER); - List attributes = Arrays.asList(numeric, real, integer); - ExampleSet set = ExampleSets.from(attributes).withBlankSize(150) - .withColumnFiller(numeric, i -> Math.random() > 0.7 ? Double.NaN : Math.random()) - .withColumnFiller(real, i -> Math.random() > 0.7 ? Double.NaN : 42 + Math.random()) - .withColumnFiller(integer, i -> Math.random() > 0.7 ? Double.NaN : Math.round(Math.random() * 100)) - .withRole(numeric, Attributes.LABEL_NAME) - .build(); - - Table table = com.rapidminer.belt.table.BeltConverter.convert(set, CONTEXT).getTable(); - - RapidAssert.assertEquals(set, com.rapidminer.belt.table.TableViewCreator.INSTANCE.createView(table)); - } + assertArrayEquals(setStatistics.toArray(), viewStatistics.toArray()); + } - @Test - public void testNumericAndDateTypes() { - Attribute numeric = AttributeFactory.createAttribute("numeric", Ontology.NUMERICAL); - Attribute real = AttributeFactory.createAttribute("real", Ontology.REAL); - Attribute integer = AttributeFactory.createAttribute("integer", Ontology.INTEGER); - Attribute dateTime = AttributeFactory.createAttribute("date_time", Ontology.DATE_TIME); - Attribute date = AttributeFactory.createAttribute("date", Ontology.DATE); - Attribute time = AttributeFactory.createAttribute("time", Ontology.TIME); - List attributes = Arrays.asList(numeric, real, dateTime, date, time, integer); - ExampleSet set = ExampleSets.from(attributes).withBlankSize(150) - .withColumnFiller(numeric, i -> Math.random() > 0.7 ? Double.NaN : Math.random()) - .withColumnFiller(real, i -> Math.random() > 0.7 ? Double.NaN : 42 + Math.random()) - .withColumnFiller(integer, i -> Math.random() > 0.7 ? Double.NaN : Math.round(Math.random() * 100)) - .withColumnFiller(dateTime, - i -> Math.random() > 0.7 ? Double.NaN : 1515410698d + Math.floor(Math.random() * 1000)) - .withColumnFiller(date, i -> Math.random() > 0.7 ? Double.NaN : - 230169600000d + Math.floor(Math.random() * 100) * 1000d * 60 * 60 * 24) - .withColumnFiller(time, - i -> Math.random() > 0.7 ? Double.NaN : Math.floor(Math.random() * 60 * 60 * 24 * 1000)) - .withRole(numeric, Attributes.LABEL_NAME) - .build(); - - Table table = com.rapidminer.belt.table.BeltConverter.convert(set, CONTEXT).getTable(); - - RapidAssert.assertEquals(set, com.rapidminer.belt.table.TableViewCreator.INSTANCE.createView(table)); - } + @Test + public void testStatisticsWithWeight() { + Attribute numeric = AttributeFactory.createAttribute("numeric", Ontology.NUMERICAL); + Attribute real = AttributeFactory.createAttribute("real", Ontology.REAL); + Attribute integer = AttributeFactory.createAttribute("integer", Ontology.INTEGER); + Attribute nominal = AttributeFactory.createAttribute("nominal", Ontology.NOMINAL); + Attribute polynominal = AttributeFactory.createAttribute("polynominal", Ontology.POLYNOMINAL); + Attribute binominal = AttributeFactory.createAttribute("binominal", Ontology.BINOMINAL); + for (int i = 0; i < 5; i++) { + nominal.getMapping().mapString("nominalValue" + i); + } + for (int i = 0; i < 6; i++) { + polynominal.getMapping().mapString("polyValue" + i); + } + for (int i = 0; i < 2; i++) { + binominal.getMapping().mapString("binominalValue" + i); + } + List attributes = Arrays.asList(nominal, numeric, real, integer, polynominal, binominal); + Random random = new Random(); + ExampleSet set = ExampleSets.from(attributes).withBlankSize(150) + .withColumnFiller(numeric, i -> Math.random() > 0.7 ? Double.NaN : Math.random()) + .withColumnFiller(real, i -> Math.random() > 0.7 ? Double.NaN : 42 + Math.random()) + .withColumnFiller(integer, i -> Math.random() > 0.7 ? Double.NaN : Math.round(Math.random() * 100)) + .withColumnFiller(nominal, i -> random.nextDouble() > 0.7 ? Double.NaN : random.nextInt(5)) + .withColumnFiller(polynominal, i -> random.nextDouble() > 0.7 ? Double.NaN : random.nextInt(6)) + .withColumnFiller(binominal, i -> random.nextDouble() > 0.7 ? Double.NaN : random.nextInt(2)) + .withRole(numeric, Attributes.WEIGHT_NAME) + .build(); + + Table table = com.rapidminer.belt.table.BeltConverter.convert(set, CONTEXT).getTable(); + set.recalculateAllAttributeStatistics(); + + ExampleSet view = com.rapidminer.belt.table.TableViewCreator.INSTANCE.createView(table); + view.recalculateAllAttributeStatistics(); + + List setStatistics = new ArrayList<>(); + for (Attribute att : set.getAttributes()) { + if (att.isNumerical()) { + setStatistics.add(set.getStatistics(att, Statistics.AVERAGE)); + } + setStatistics.add(set.getStatistics(att, Statistics.UNKNOWN)); + setStatistics.add(set.getStatistics(att, Statistics.MINIMUM)); + setStatistics.add(set.getStatistics(att, Statistics.MAXIMUM)); + } - @Test - public void testExamplesEqual() { - Attribute numeric = AttributeFactory.createAttribute("numeric", Ontology.NUMERICAL); - Attribute real = AttributeFactory.createAttribute("real", Ontology.REAL); - Attribute integer = AttributeFactory.createAttribute("integer", Ontology.INTEGER); - Attribute nominal = AttributeFactory.createAttribute("nominal", Ontology.NOMINAL); - Attribute polynominal = AttributeFactory.createAttribute("polynominal", Ontology.POLYNOMINAL); - Attribute binominal = AttributeFactory.createAttribute("binominal", Ontology.BINOMINAL); - for (int i = 0; i < 5; i++) { - nominal.getMapping().mapString("nominalValue" + i); - } - for (int i = 0; i < 6; i++) { - polynominal.getMapping().mapString("polyValue" + i); - } - for (int i = 0; i < 2; i++) { - binominal.getMapping().mapString("binominalValue" + i); - } - List attributes = Arrays.asList(nominal, numeric, real, integer, polynominal, binominal); - Random random = new Random(); - ExampleSet set = ExampleSets.from(attributes).withBlankSize(150) - .withColumnFiller(numeric, i -> Math.random() > 0.7 ? Double.NaN : Math.random()) - .withColumnFiller(real, i -> Math.random() > 0.7 ? Double.NaN : 42 + Math.random()) - .withColumnFiller(integer, i -> Math.random() > 0.7 ? Double.NaN : Math.round(Math.random() * 100)) - .withColumnFiller(nominal, i -> random.nextDouble() > 0.7 ? Double.NaN : random.nextInt(5)) - .withColumnFiller(polynominal, i -> random.nextDouble() > 0.7 ? Double.NaN : random.nextInt(6)) - .withColumnFiller(binominal, i -> random.nextDouble() > 0.7 ? Double.NaN : random.nextInt(2)) - .withRole(numeric, Attributes.LABEL_NAME) - .build(); - - Table table = com.rapidminer.belt.table.BeltConverter.convert(set, CONTEXT).getTable(); - - ExampleSet view1 = new DoubleTableWrapper(table); - ExampleSet view2 = new DatetimeTableWrapper(table); - for (int i = 0; i < table.height(); i++) { - RapidAssert.assertEquals("test", view1.getExample(i), view2.getExample(i)); - RapidAssert.assertEquals("test2", set.getExample(i), view2.getExample(i)); - } - RapidAssert - .assertEquals(view1.getExample(0).getDataRow().getType(), view2.getExample(0).getDataRow().getType()); - RapidAssert - .assertEquals(view1.getExample(1).getDataRow().toString(), view2.getExample(1).getDataRow().toString - ()); - } + List viewStatistics = new ArrayList<>(); + for (Attribute att : view.getAttributes()) { + if (att.isNumerical()) { + viewStatistics.add(view.getStatistics(att, Statistics.AVERAGE)); + } + viewStatistics.add(view.getStatistics(att, Statistics.UNKNOWN)); + viewStatistics.add(view.getStatistics(att, Statistics.MINIMUM)); + viewStatistics.add(view.getStatistics(att, Statistics.MAXIMUM)); + } - @Test(expected = UnsupportedOperationException.class) - public void testExampleWriteUnsupported() { - Attribute numeric = AttributeFactory.createAttribute("numeric", Ontology.NUMERICAL); - ExampleSet set = ExampleSets.from(numeric).withBlankSize(15) - .withColumnFiller(numeric, i -> Math.random() > 0.7 ? Double.NaN : Math.random()).build(); - Table table = com.rapidminer.belt.table.BeltConverter.convert(set, CONTEXT).getTable(); + assertArrayEquals(setStatistics.toArray(), viewStatistics.toArray()); + } - ExampleSet view = com.rapidminer.belt.table.TableViewCreator.INSTANCE.createView(table); - view.getExample(4).setValue(numeric, 5); - } + @Test + public void testSerialization() throws IOException, ClassNotFoundException { + Attribute numeric = AttributeFactory.createAttribute("numeric", Ontology.NUMERICAL); + Attribute real = AttributeFactory.createAttribute("real", Ontology.REAL); + Attribute integer = AttributeFactory.createAttribute("integer", Ontology.INTEGER); + Attribute nominal = AttributeFactory.createAttribute("nominal", Ontology.NOMINAL); + Attribute polynominal = AttributeFactory.createAttribute("polynominal", Ontology.POLYNOMINAL); + Attribute binominal = AttributeFactory.createAttribute("binominal", Ontology.BINOMINAL); + for (int i = 0; i < 5; i++) { + nominal.getMapping().mapString("nominalValue" + i); + } + for (int i = 0; i < 6; i++) { + polynominal.getMapping().mapString("polyValue" + i); + } + for (int i = 0; i < 2; i++) { + binominal.getMapping().mapString("binominalValue" + i); + } + List attributes = Arrays.asList(nominal, numeric, real, integer, polynominal, binominal); + Random random = new Random(); + ExampleSet set = ExampleSets.from(attributes).withBlankSize(150) + .withColumnFiller(numeric, i -> Math.random() > 0.7 ? Double.NaN : Math.random()) + .withColumnFiller(real, i -> Math.random() > 0.7 ? Double.NaN : 42 + Math.random()) + .withColumnFiller(integer, i -> Math.random() > 0.7 ? Double.NaN : Math.round(Math.random() * 100)) + .withColumnFiller(nominal, i -> random.nextDouble() > 0.7 ? Double.NaN : random.nextInt(5)) + .withColumnFiller(polynominal, i -> random.nextDouble() > 0.7 ? Double.NaN : random.nextInt(6)) + .withColumnFiller(binominal, i -> random.nextDouble() > 0.7 ? Double.NaN : random.nextInt(2)) + .withRole(numeric, Attributes.LABEL_NAME) + .build(); + + Table table = com.rapidminer.belt.table.BeltConverter.convert(set, CONTEXT).getTable(); + byte[] serialized = serialize(com.rapidminer.belt.table.TableViewCreator.INSTANCE.createView(table)); + Object deserialized = deserialize(serialized); + + RapidAssert.assertEquals(set, (ExampleSet) deserialized); + } - @Test(expected = UnsupportedOperationException.class) - public void testExampleWriteUnsupportedDate() { - Attribute dateTime = AttributeFactory.createAttribute("date_time", Ontology.DATE_TIME); - ExampleSet set = ExampleSets.from(dateTime).withBlankSize(15) - .withColumnFiller(dateTime, - i -> Math.random() > 0.7 ? Double.NaN : 1515410698d + Math.floor(Math.random() * 1000)) - .build(); - Table table = com.rapidminer.belt.table.BeltConverter.convert(set, CONTEXT).getTable(); - - ExampleSet view = com.rapidminer.belt.table.TableViewCreator.INSTANCE.createView(table); - view.getExample(4).setValue(dateTime, 5); - } - @Test - public void testStatistics() { - Attribute numeric = AttributeFactory.createAttribute("numeric", Ontology.NUMERICAL); - Attribute real = AttributeFactory.createAttribute("real", Ontology.REAL); - Attribute integer = AttributeFactory.createAttribute("integer", Ontology.INTEGER); - Attribute dateTime = AttributeFactory.createAttribute("date_time", Ontology.DATE_TIME); - Attribute date = AttributeFactory.createAttribute("date", Ontology.DATE); - Attribute time = AttributeFactory.createAttribute("time", Ontology.TIME); - List attributes = Arrays.asList(numeric, real, dateTime, date, time, integer); - ExampleSet set = ExampleSets.from(attributes).withBlankSize(150) - .withColumnFiller(numeric, i -> Math.random() > 0.7 ? Double.NaN : Math.random()) - .withColumnFiller(real, i -> Math.random() > 0.7 ? Double.NaN : 42 + Math.random()) - .withColumnFiller(integer, i -> Math.random() > 0.7 ? Double.NaN : Math.round(Math.random() * 100)) - .withColumnFiller(dateTime, - i -> Math.random() > 0.7 ? Double.NaN : 1515410698d + Math.floor(Math.random() * 1000)) - .withColumnFiller(date, i -> Math.random() > 0.7 ? Double.NaN : - 230169600000d + Math.floor(Math.random() * 100) * 1000d * 60 * 60 * 24) - .withColumnFiller(time, - i -> Math.random() > 0.7 ? Double.NaN : Math.floor(Math.random() * 60 * 60 * 24 * 1000)) - .withRole(numeric, Attributes.LABEL_NAME) - .build(); - - Table table = com.rapidminer.belt.table.BeltConverter.convert(set, CONTEXT).getTable(); - set.recalculateAllAttributeStatistics(); - - ExampleSet view = com.rapidminer.belt.table.TableViewCreator.INSTANCE.createView(table); - view.recalculateAllAttributeStatistics(); - - List setStatistics = new ArrayList<>(); - for(Attribute att: set.getAttributes()){ - if(att.isNumerical()){ - setStatistics.add(set.getStatistics(att, Statistics.AVERAGE)); - } - setStatistics.add(set.getStatistics(att, Statistics.UNKNOWN)); - setStatistics.add(set.getStatistics(att, Statistics.MINIMUM)); - setStatistics.add(set.getStatistics(att, Statistics.MAXIMUM)); - } - - List viewStatistics = new ArrayList<>(); - for(Attribute att: view.getAttributes()){ - if(att.isNumerical()){ - viewStatistics.add(view.getStatistics(att, Statistics.AVERAGE)); - } - viewStatistics.add(view.getStatistics(att, Statistics.UNKNOWN)); - viewStatistics.add(view.getStatistics(att, Statistics.MINIMUM)); - viewStatistics.add(view.getStatistics(att, Statistics.MAXIMUM)); - } - - assertArrayEquals(setStatistics.toArray(), viewStatistics.toArray()); - } + @Test + public void testSerializationDate() throws IOException, ClassNotFoundException { + Attribute numeric = AttributeFactory.createAttribute("numeric", Ontology.NUMERICAL); + Attribute real = AttributeFactory.createAttribute("real", Ontology.REAL); + Attribute integer = AttributeFactory.createAttribute("integer", Ontology.INTEGER); + Attribute dateTime = AttributeFactory.createAttribute("date_time", Ontology.DATE_TIME); + Attribute date = AttributeFactory.createAttribute("date", Ontology.DATE); + Attribute time = AttributeFactory.createAttribute("time", Ontology.TIME); + List attributes = Arrays.asList(numeric, real, dateTime, date, time, integer); + ExampleSet set = ExampleSets.from(attributes).withBlankSize(150) + .withColumnFiller(numeric, i -> Math.random() > 0.7 ? Double.NaN : Math.random()) + .withColumnFiller(real, i -> Math.random() > 0.7 ? Double.NaN : 42 + Math.random()) + .withColumnFiller(integer, i -> Math.random() > 0.7 ? Double.NaN : Math.round(Math.random() * 100)) + .withColumnFiller(dateTime, + i -> Math.random() > 0.7 ? Double.NaN : 1515410698d + Math.floor(Math.random() * 1000)) + .withColumnFiller(date, i -> Math.random() > 0.7 ? Double.NaN : + 230169600000d + Math.floor(Math.random() * 100) * 1000d * 60 * 60 * 24) + .withColumnFiller(time, + i -> Math.random() > 0.7 ? Double.NaN : Math.floor(Math.random() * 60 * 60 * 24 * 1000)) + .withRole(numeric, Attributes.LABEL_NAME) + .build(); + + Table table = com.rapidminer.belt.table.BeltConverter.convert(set, CONTEXT).getTable(); + byte[] serialized = serialize(com.rapidminer.belt.table.TableViewCreator.INSTANCE.createView(table)); + Object deserialized = deserialize(serialized); + + RapidAssert.assertEquals(set, (ExampleSet) deserialized); + } - @Test - public void testStatisticsWithWeight() { - Attribute numeric = AttributeFactory.createAttribute("numeric", Ontology.NUMERICAL); - Attribute real = AttributeFactory.createAttribute("real", Ontology.REAL); - Attribute integer = AttributeFactory.createAttribute("integer", Ontology.INTEGER); - Attribute nominal = AttributeFactory.createAttribute("nominal", Ontology.NOMINAL); - Attribute polynominal = AttributeFactory.createAttribute("polynominal", Ontology.POLYNOMINAL); - Attribute binominal = AttributeFactory.createAttribute("binominal", Ontology.BINOMINAL); - for (int i = 0; i < 5; i++) { - nominal.getMapping().mapString("nominalValue" + i); - } - for (int i = 0; i < 6; i++) { - polynominal.getMapping().mapString("polyValue" + i); - } - for (int i = 0; i < 2; i++) { - binominal.getMapping().mapString("binominalValue" + i); - } - List attributes = Arrays.asList(nominal, numeric, real, integer, polynominal, binominal); - Random random = new Random(); - ExampleSet set = ExampleSets.from(attributes).withBlankSize(150) - .withColumnFiller(numeric, i -> Math.random() > 0.7 ? Double.NaN : Math.random()) - .withColumnFiller(real, i -> Math.random() > 0.7 ? Double.NaN : 42 + Math.random()) - .withColumnFiller(integer, i -> Math.random() > 0.7 ? Double.NaN : Math.round(Math.random() * 100)) - .withColumnFiller(nominal, i -> random.nextDouble() > 0.7 ? Double.NaN : random.nextInt(5)) - .withColumnFiller(polynominal, i -> random.nextDouble() > 0.7 ? Double.NaN : random.nextInt(6)) - .withColumnFiller(binominal, i -> random.nextDouble() > 0.7 ? Double.NaN : random.nextInt(2)) - .withRole(numeric, Attributes.WEIGHT_NAME) - .build(); - - Table table = com.rapidminer.belt.table.BeltConverter.convert(set, CONTEXT).getTable(); - set.recalculateAllAttributeStatistics(); - - ExampleSet view = com.rapidminer.belt.table.TableViewCreator.INSTANCE.createView(table); - view.recalculateAllAttributeStatistics(); - - List setStatistics = new ArrayList<>(); - for(Attribute att: set.getAttributes()){ - if(att.isNumerical()){ - setStatistics.add(set.getStatistics(att, Statistics.AVERAGE)); - } - setStatistics.add(set.getStatistics(att, Statistics.UNKNOWN)); - setStatistics.add(set.getStatistics(att, Statistics.MINIMUM)); - setStatistics.add(set.getStatistics(att, Statistics.MAXIMUM)); - } - - List viewStatistics = new ArrayList<>(); - for(Attribute att: view.getAttributes()){ - if(att.isNumerical()){ - viewStatistics.add(view.getStatistics(att, Statistics.AVERAGE)); - } - viewStatistics.add(view.getStatistics(att, Statistics.UNKNOWN)); - viewStatistics.add(view.getStatistics(att, Statistics.MINIMUM)); - viewStatistics.add(view.getStatistics(att, Statistics.MAXIMUM)); - } - - assertArrayEquals(setStatistics.toArray(), viewStatistics.toArray()); - } + @Test + public void testSerializationGaps() throws IOException, ClassNotFoundException { + NominalBuffer buffer = BufferAccessor.get().newUInt8Buffer(ColumnType.NOMINAL, 21); + for (int i = 0; i < buffer.size(); i++) { + buffer.set(i, "value" + i); + } + buffer.set(7, null); + buffer.set(5, null); + NominalBuffer buffer2 = BufferAccessor.get().newUInt8Buffer(ColumnType.NOMINAL, 21); + for (int i = 0; i < buffer2.size(); i++) { + buffer2.set(i, "val" + i); + } + buffer2.set(3, null); + buffer2.set(5, null); + Column column = Columns.removeUnusedDictionaryValues(buffer.toColumn(), + Columns.CleanupOption.REMOVE, Belt.defaultContext()); + Column column2 = Columns.removeUnusedDictionaryValues(buffer2.toColumn(), + Columns.CleanupOption.REMOVE, Belt.defaultContext()); + + NominalBuffer buffer3 = BufferAccessor.get().newUInt2Buffer(ColumnType.NOMINAL, 21); + buffer3.set(0, "bla"); + for (int i = 0; i < buffer3.size(); i++) { + buffer3.set(i, "blup"); + } + buffer3.set(10, null); + NominalBuffer buffer4 = BufferAccessor.get().newUInt2Buffer(ColumnType.NOMINAL, 21); + buffer4.set(0, "bla"); + for (int i = 0; i < buffer.size(); i++) { + buffer4.set(i, "blup"); + } + buffer4.set(10, null); - @Test - public void testSerialization() throws IOException, ClassNotFoundException { - Attribute numeric = AttributeFactory.createAttribute("numeric", Ontology.NUMERICAL); - Attribute real = AttributeFactory.createAttribute("real", Ontology.REAL); - Attribute integer = AttributeFactory.createAttribute("integer", Ontology.INTEGER); - Attribute nominal = AttributeFactory.createAttribute("nominal", Ontology.NOMINAL); - Attribute polynominal = AttributeFactory.createAttribute("polynominal", Ontology.POLYNOMINAL); - Attribute binominal = AttributeFactory.createAttribute("binominal", Ontology.BINOMINAL); - for (int i = 0; i < 5; i++) { - nominal.getMapping().mapString("nominalValue" + i); - } - for (int i = 0; i < 6; i++) { - polynominal.getMapping().mapString("polyValue" + i); - } - for (int i = 0; i < 2; i++) { - binominal.getMapping().mapString("binominalValue" + i); - } - List attributes = Arrays.asList(nominal, numeric, real, integer, polynominal, binominal); - Random random = new Random(); - ExampleSet set = ExampleSets.from(attributes).withBlankSize(150) - .withColumnFiller(numeric, i -> Math.random() > 0.7 ? Double.NaN : Math.random()) - .withColumnFiller(real, i -> Math.random() > 0.7 ? Double.NaN : 42 + Math.random()) - .withColumnFiller(integer, i -> Math.random() > 0.7 ? Double.NaN : Math.round(Math.random() * 100)) - .withColumnFiller(nominal, i -> random.nextDouble() > 0.7 ? Double.NaN : random.nextInt(5)) - .withColumnFiller(polynominal, i -> random.nextDouble() > 0.7 ? Double.NaN : random.nextInt(6)) - .withColumnFiller(binominal, i -> random.nextDouble() > 0.7 ? Double.NaN : random.nextInt(2)) - .withRole(numeric, Attributes.LABEL_NAME) - .build(); - - Table table = com.rapidminer.belt.table.BeltConverter.convert(set, CONTEXT).getTable(); - byte[] serialized = serialize(com.rapidminer.belt.table.TableViewCreator.INSTANCE.createView(table)); - Object deserialized = deserialize(serialized); - - RapidAssert.assertEquals(set, (ExampleSet) deserialized); - } + Column bla = Columns.removeUnusedDictionaryValues(buffer3.toBooleanColumn("bla"), + Columns.CleanupOption.REMOVE, Belt.defaultContext()); + Column blup = Columns.removeUnusedDictionaryValues(buffer4.toBooleanColumn("blup"), + Columns.CleanupOption.REMOVE, Belt.defaultContext()); + + + Table table = Builders.newTableBuilder(21).add("first", column) + .add("second", column2).add("bla", bla).add("blup", blup) + .build(Belt.defaultContext()); + + ExampleSet set = com.rapidminer.belt.table.TableViewCreator.INSTANCE.createView(table); + byte[] serialized = serialize(set); + Object deserialized = deserialize(serialized); + + RapidAssert.assertEquals(set, (ExampleSet) deserialized); + } + + @Test + public void testWrongNegativeIndex() throws IOException, ClassNotFoundException { + //(value0, value1) with value0 positive + NominalBuffer buffer = Buffers.nominalBuffer(4, 3); + for (int i = 0; i < buffer.size(); i++) { + buffer.set(i, "value" + (i % 2)); + } + Column column1 = buffer.toBooleanColumn("value0"); + assertEquals(1, column1.getDictionary().getPositiveIndex()); + + //(value) with value positive + NominalBuffer buffer2 = Buffers.nominalBuffer(4, 3); + for (int i = 0; i < buffer.size(); i++) { + buffer2.set(i, "value"); + } + Column column2 = buffer2.toBooleanColumn("value"); + assertEquals(1, column2.getDictionary().getPositiveIndex()); + assertFalse(column2.getDictionary().hasNegative()); + //() with no positive + Column column3 = Buffers.nominalBuffer(4, 1).toBooleanColumn(null); - @Test - public void testSerializationDate() throws IOException, ClassNotFoundException { - Attribute numeric = AttributeFactory.createAttribute("numeric", Ontology.NUMERICAL); - Attribute real = AttributeFactory.createAttribute("real", Ontology.REAL); - Attribute integer = AttributeFactory.createAttribute("integer", Ontology.INTEGER); - Attribute dateTime = AttributeFactory.createAttribute("date_time", Ontology.DATE_TIME); - Attribute date = AttributeFactory.createAttribute("date", Ontology.DATE); - Attribute time = AttributeFactory.createAttribute("time", Ontology.TIME); - List attributes = Arrays.asList(numeric, real, dateTime, date, time, integer); - ExampleSet set = ExampleSets.from(attributes).withBlankSize(150) - .withColumnFiller(numeric, i -> Math.random() > 0.7 ? Double.NaN : Math.random()) - .withColumnFiller(real, i -> Math.random() > 0.7 ? Double.NaN : 42 + Math.random()) - .withColumnFiller(integer, i -> Math.random() > 0.7 ? Double.NaN : Math.round(Math.random() * 100)) - .withColumnFiller(dateTime, - i -> Math.random() > 0.7 ? Double.NaN : 1515410698d + Math.floor(Math.random() * 1000)) - .withColumnFiller(date, i -> Math.random() > 0.7 ? Double.NaN : - 230169600000d + Math.floor(Math.random() * 100) * 1000d * 60 * 60 * 24) - .withColumnFiller(time, - i -> Math.random() > 0.7 ? Double.NaN : Math.floor(Math.random() * 60 * 60 * 24 * 1000)) - .withRole(numeric, Attributes.LABEL_NAME) - .build(); - - Table table = com.rapidminer.belt.table.BeltConverter.convert(set, CONTEXT).getTable(); - byte[] serialized = serialize(com.rapidminer.belt.table.TableViewCreator.INSTANCE.createView(table)); - Object deserialized = deserialize(serialized); - - RapidAssert.assertEquals(set, (ExampleSet) deserialized); + //(value0, value1) with value1 positive and value0 unused + NominalBuffer buffer4 = Buffers.nominalBuffer(4, 3); + for (int i = 0; i < buffer4.size(); i++) { + buffer4.set(i, "value" + (i % 2)); + } + buffer4.set(0, null); + buffer4.set(2, null); + Column column4 = buffer4.toBooleanColumn("value1"); + assertEquals(1, column4.getDictionary().getNegativeIndex()); + Column column4new = Columns.removeUnusedDictionaryValues(column4, Columns.CleanupOption.REMOVE, Belt.defaultContext()); + Column column5 = Columns.removeUnusedDictionaryValues(column4, Columns.CleanupOption.COMPACT, Belt.defaultContext()); + + Table table = new Table(new Column[]{column1, column2, column3, column4new, column5}, new String[]{"a", "b", "c", "d", "e"}); + Table adjustedTable = TableViewCreator.INSTANCE.adjustDictionaries(table); + + assertEquals(1, adjustedTable.column(0).getDictionary().getNegativeIndex()); + assertEquals(1, adjustedTable.column(1).getDictionary().getNegativeIndex()); + assertEquals(1, adjustedTable.column(3).getDictionary().getNegativeIndex()); + assertEquals(1, adjustedTable.column(4).getDictionary().getNegativeIndex()); + assertEquals(0, adjustedTable.column(2).getDictionary().size()); + assertEquals(table.toString(), adjustedTable.toString()); + } + + @Test + public void testClone() { + Attribute numeric = AttributeFactory.createAttribute("numeric", Ontology.NUMERICAL); + ExampleSet set = ExampleSets.from(numeric).withBlankSize(15) + .withColumnFiller(numeric, i -> Math.random() > 0.7 ? Double.NaN : Math.random()).build(); + Table table = com.rapidminer.belt.table.BeltConverter.convert(set, CONTEXT).getTable(); + + ExampleSet view = com.rapidminer.belt.table.TableViewCreator.INSTANCE.createView(table); + ExampleSet clone = (ExampleSet) view.clone(); + RapidAssert.assertEquals(view, clone); + } + + @Test + public void testCloneDate() { + Attribute dateTime = AttributeFactory.createAttribute("date_time", Ontology.DATE_TIME); + ExampleSet set = ExampleSets.from(dateTime).withBlankSize(15) + .withColumnFiller(dateTime, + i -> Math.random() > 0.7 ? Double.NaN : 1515410698d + Math.floor(Math.random() * 1000)) + .build(); + Table table = BeltConverter.convert(set, CONTEXT).getTable(); + + ExampleSet view = TableViewCreator.INSTANCE.createView(table); + ExampleSet clone = (ExampleSet) view.clone(); + RapidAssert.assertEquals(view, clone); + } + + @Test(expected = BeltConverter.ConversionException.class) + public void testAdvancedColumns() { + Table table = Builders.newTableBuilder(11).addReal("real", i -> 3 * i / 5.0).addInt53Bit("int", i -> 5 * i) + .addTextset("textset", i -> new StringSet(Collections.singleton("val" + i))) + .build(Belt.defaultContext()); + TableViewCreator.INSTANCE.createView(table); + } + + @Test + public void testReplaceAdvancedColumns() { + Table table = Builders.newTableBuilder(11).addReal("real", i -> 3 * i / 5.0) + .addTextset("textset", i -> new StringSet(Collections.singleton("val" + i))).addInt53Bit("int", i -> 5 * i) + .build(Belt.defaultContext()); + Table replaced = TableViewCreator.INSTANCE.replacedAdvancedWithError(table); + double[] first = new double[11]; + table.column("real").fill(first, 0); + double[] third = new double[11]; + table.column("int").fill(third, 0); + double[] constant = new double[11]; + Arrays.fill(constant, 1); + assertArrayEquals(new double[][]{first, constant, third}, + BeltConverterTest.readTableToArray(replaced)); + Object[] message = new Object[1]; + replaced.column("textset").fill(message, 0); + assertEquals("Error: Cannot display advanced column of Column type Text-Set", message[0]); + } } - @Test - public void testSerializationGaps() throws IOException, ClassNotFoundException { - CategoricalBuffer buffer = BufferAccessor.get().newUInt8Buffer(21); - for (int i = 0; i < buffer.size(); i++) { - buffer.set(i, "value" + i); - } - buffer.set(7, null); - buffer.set(5, null); - CategoricalBuffer buffer2 = BufferAccessor.get().newUInt8Buffer(21); - for (int i = 0; i < buffer2.size(); i++) { - buffer2.set(i, "val" + i); - } - buffer2.set(3, null); - buffer2.set(5, null); - Column column = Columns.removeUnusedDictionaryValues(buffer.toColumn(ColumnTypes.NOMINAL), - Columns.CleanupOption.REMOVE, Belt.defaultContext()); - Column column2 = Columns.removeUnusedDictionaryValues(buffer2.toColumn(ColumnTypes.NOMINAL), - Columns.CleanupOption.REMOVE, Belt.defaultContext()); - - CategoricalBuffer buffer3 = BufferAccessor.get().newUInt2Buffer(21); - buffer3.set(0, "bla"); - for (int i = 0; i < buffer3.size(); i++) { - buffer3.set(i, "blup"); - } - buffer3.set(10, null); - CategoricalBuffer buffer4 = BufferAccessor.get().newUInt2Buffer(21); - buffer4.set(0, "bla"); - for (int i = 0; i < buffer.size(); i++) { - buffer4.set(i, "blup"); - } - buffer4.set(10, null); - - Column bla = Columns.removeUnusedDictionaryValues(buffer3.toBooleanColumn(ColumnTypes.NOMINAL, "bla"), - Columns.CleanupOption.REMOVE, Belt.defaultContext()); - Column blup = Columns.removeUnusedDictionaryValues(buffer4.toBooleanColumn(ColumnTypes.NOMINAL, "blup"), - Columns.CleanupOption.REMOVE, Belt.defaultContext()); - - - Table table = Builders.newTableBuilder(21).add("first", column) - .add("second", column2).add("bla", bla).add("blup", blup) - .build(Belt.defaultContext()); - - ExampleSet set = com.rapidminer.belt.table.TableViewCreator.INSTANCE.createView(table); - byte[] serialized = serialize(set); - Object deserialized = deserialize(serialized); - - RapidAssert.assertEquals(set, (ExampleSet) deserialized); + public static class ExampleTableView { + + @BeforeClass + public static void setup() { + RapidAssert.ASSERTER_REGISTRY.registerAllAsserters(new AsserterFactoryRapidMiner()); + } + + @Test + public void testNominalTypes() { + Attribute nominal = AttributeFactory.createAttribute("nominal", Ontology.NOMINAL); + Attribute string = AttributeFactory.createAttribute("string", Ontology.STRING); + Attribute polynominal = AttributeFactory.createAttribute("polynominal", Ontology.POLYNOMINAL); + Attribute binominal = AttributeFactory.createAttribute("binominal", Ontology.BINOMINAL); + Attribute path = AttributeFactory.createAttribute("path", Ontology.FILE_PATH); + for (int i = 0; i < 5; i++) { + nominal.getMapping().mapString("nominalValue" + i); + } + for (int i = 0; i < 4; i++) { + string.getMapping().mapString("veryVeryLongStringValue" + i); + } + for (int i = 0; i < 6; i++) { + polynominal.getMapping().mapString("polyValue" + i); + } + for (int i = 0; i < 2; i++) { + binominal.getMapping().mapString("binominalValue" + i); + } + for (int i = 0; i < 3; i++) { + path.getMapping().mapString("//folder/sufolder/subsubfolder/file" + i); + } + + List attributes = Arrays.asList(nominal, string, polynominal, binominal, path); + Random random = new Random(); + ExampleSet set = ExampleSets.from(attributes).withBlankSize(50) + .withColumnFiller(nominal, i -> random.nextDouble() > 0.7 ? Double.NaN : random.nextInt(5)) + .withColumnFiller(string, i -> random.nextDouble() > 0.7 ? Double.NaN : random.nextInt(4)) + .withColumnFiller(polynominal, i -> random.nextDouble() > 0.7 ? Double.NaN : random.nextInt(6)) + .withColumnFiller(binominal, i -> random.nextDouble() > 0.7 ? Double.NaN : random.nextInt(2)) + .withColumnFiller(path, i -> random.nextDouble() > 0.7 ? Double.NaN : random.nextInt(3)) + .build(); + + IOTable table = BeltConverter.convert(set, CONTEXT); + ExampleSet view = com.rapidminer.belt.table.TableViewCreator.INSTANCE.convertOnWriteView(table, true); + + RapidAssert.assertEquals(set, view); + } + + @Test + public void testNumericTypes() { + Attribute numeric = AttributeFactory.createAttribute("numeric", Ontology.NUMERICAL); + Attribute real = AttributeFactory.createAttribute("real", Ontology.REAL); + Attribute integer = AttributeFactory.createAttribute("integer", Ontology.INTEGER); + List attributes = Arrays.asList(numeric, real, integer); + ExampleSet set = ExampleSets.from(attributes).withBlankSize(150) + .withColumnFiller(numeric, i -> Math.random() > 0.7 ? Double.NaN : Math.random()) + .withColumnFiller(real, i -> Math.random() > 0.7 ? Double.NaN : 42 + Math.random()) + .withColumnFiller(integer, i -> Math.random() > 0.7 ? Double.NaN : Math.round(Math.random() * 100)) + .withRole(numeric, Attributes.LABEL_NAME) + .build(); + + IOTable table = BeltConverter.convert(set, CONTEXT); + ExampleSet view = com.rapidminer.belt.table.TableViewCreator.INSTANCE.convertOnWriteView(table, true); + + RapidAssert.assertEquals(set, view); + } + + @Test + public void testNumericAndDateTypes() { + Attribute numeric = AttributeFactory.createAttribute("numeric", Ontology.NUMERICAL); + Attribute real = AttributeFactory.createAttribute("real", Ontology.REAL); + Attribute integer = AttributeFactory.createAttribute("integer", Ontology.INTEGER); + Attribute dateTime = AttributeFactory.createAttribute("date_time", Ontology.DATE_TIME); + Attribute date = AttributeFactory.createAttribute("date", Ontology.DATE); + Attribute time = AttributeFactory.createAttribute("time", Ontology.TIME); + List attributes = Arrays.asList(numeric, real, dateTime, date, time, integer); + ExampleSet set = ExampleSets.from(attributes).withBlankSize(150) + .withColumnFiller(numeric, i -> Math.random() > 0.7 ? Double.NaN : Math.random()) + .withColumnFiller(real, i -> Math.random() > 0.7 ? Double.NaN : 42 + Math.random()) + .withColumnFiller(integer, i -> Math.random() > 0.7 ? Double.NaN : Math.round(Math.random() * 100)) + .withColumnFiller(dateTime, + i -> Math.random() > 0.7 ? Double.NaN : 1515410698d + Math.floor(Math.random() * 1000)) + .withColumnFiller(date, i -> Math.random() > 0.7 ? Double.NaN : + 230169600000d + Math.floor(Math.random() * 100) * 1000d * 60 * 60 * 24) + .withColumnFiller(time, + i -> Math.random() > 0.7 ? Double.NaN : Math.floor(Math.random() * 60 * 60 * 24 * 1000)) + .withRole(numeric, Attributes.LABEL_NAME) + .build(); + + IOTable table = BeltConverter.convert(set, CONTEXT); + ExampleSet view = com.rapidminer.belt.table.TableViewCreator.INSTANCE.convertOnWriteView(table, true); + + RapidAssert.assertEquals(set, view); + } + + @Test + public void testExamplesEqual() { + Attribute numeric = AttributeFactory.createAttribute("numeric", Ontology.NUMERICAL); + Attribute real = AttributeFactory.createAttribute("real", Ontology.REAL); + Attribute integer = AttributeFactory.createAttribute("integer", Ontology.INTEGER); + Attribute nominal = AttributeFactory.createAttribute("nominal", Ontology.NOMINAL); + Attribute polynominal = AttributeFactory.createAttribute("polynominal", Ontology.POLYNOMINAL); + Attribute binominal = AttributeFactory.createAttribute("binominal", Ontology.BINOMINAL); + for (int i = 0; i < 5; i++) { + nominal.getMapping().mapString("nominalValue" + i); + } + for (int i = 0; i < 6; i++) { + polynominal.getMapping().mapString("polyValue" + i); + } + for (int i = 0; i < 2; i++) { + binominal.getMapping().mapString("binominalValue" + i); + } + List attributes = Arrays.asList(nominal, numeric, real, integer, polynominal, binominal); + Random random = new Random(); + ExampleSet set = ExampleSets.from(attributes).withBlankSize(150) + .withColumnFiller(numeric, i -> Math.random() > 0.7 ? Double.NaN : Math.random()) + .withColumnFiller(real, i -> Math.random() > 0.7 ? Double.NaN : 42 + Math.random()) + .withColumnFiller(integer, i -> Math.random() > 0.7 ? Double.NaN : Math.round(Math.random() * 100)) + .withColumnFiller(nominal, i -> random.nextDouble() > 0.7 ? Double.NaN : random.nextInt(5)) + .withColumnFiller(polynominal, i -> random.nextDouble() > 0.7 ? Double.NaN : random.nextInt(6)) + .withColumnFiller(binominal, i -> random.nextDouble() > 0.7 ? Double.NaN : random.nextInt(2)) + .withRole(numeric, Attributes.LABEL_NAME) + .build(); + + IOTable table = BeltConverter.convert(set, CONTEXT); + + ExampleSet view = TableViewCreator.INSTANCE.convertOnWriteView(table, true); + for (int i = 0; i < view.size(); i++) { + RapidAssert.assertEquals("test", set.getExample(i), view.getExample(i)); + } + RapidAssert + .assertEquals(view.getExample(0).getDataRow().getType(), set.getExample(0).getDataRow().getType()); + RapidAssert + .assertEquals(view.getExample(1).toString(), set.getExample(1).toString()); + RapidAssert + .assertEquals(view.iterator().next().getDataRow().getType(), set.iterator().next().getDataRow().getType()); + } + + + @Test + public void testStatistics() { + Attribute numeric = AttributeFactory.createAttribute("numeric", Ontology.NUMERICAL); + Attribute real = AttributeFactory.createAttribute("real", Ontology.REAL); + Attribute integer = AttributeFactory.createAttribute("integer", Ontology.INTEGER); + Attribute dateTime = AttributeFactory.createAttribute("date_time", Ontology.DATE_TIME); + Attribute date = AttributeFactory.createAttribute("date", Ontology.DATE); + Attribute time = AttributeFactory.createAttribute("time", Ontology.TIME); + List attributes = Arrays.asList(numeric, real, dateTime, date, time, integer); + ExampleSet set = ExampleSets.from(attributes).withBlankSize(150) + .withColumnFiller(numeric, i -> Math.random() > 0.7 ? Double.NaN : Math.random()) + .withColumnFiller(real, i -> Math.random() > 0.7 ? Double.NaN : 42 + Math.random()) + .withColumnFiller(integer, i -> Math.random() > 0.7 ? Double.NaN : Math.round(Math.random() * 100)) + .withColumnFiller(dateTime, + i -> Math.random() > 0.7 ? Double.NaN : 1515410698d + Math.floor(Math.random() * 1000)) + .withColumnFiller(date, i -> Math.random() > 0.7 ? Double.NaN : + 230169600000d + Math.floor(Math.random() * 100) * 1000d * 60 * 60 * 24) + .withColumnFiller(time, + i -> Math.random() > 0.7 ? Double.NaN : Math.floor(Math.random() * 60 * 60 * 24 * 1000)) + .withRole(numeric, Attributes.LABEL_NAME) + .build(); + + set.recalculateAllAttributeStatistics(); + + IOTable table = BeltConverter.convert(set, CONTEXT); + ExampleSet view = com.rapidminer.belt.table.TableViewCreator.INSTANCE.convertOnWriteView(table, true); + view.recalculateAllAttributeStatistics(); + + List setStatistics = new ArrayList<>(); + for (Attribute att : set.getAttributes()) { + if (att.isNumerical()) { + setStatistics.add(set.getStatistics(att, Statistics.AVERAGE)); + } + setStatistics.add(set.getStatistics(att, Statistics.UNKNOWN)); + setStatistics.add(set.getStatistics(att, Statistics.MINIMUM)); + setStatistics.add(set.getStatistics(att, Statistics.MAXIMUM)); + } + + List viewStatistics = new ArrayList<>(); + for (Attribute att : view.getAttributes()) { + if (att.isNumerical()) { + viewStatistics.add(view.getStatistics(att, Statistics.AVERAGE)); + } + viewStatistics.add(view.getStatistics(att, Statistics.UNKNOWN)); + viewStatistics.add(view.getStatistics(att, Statistics.MINIMUM)); + viewStatistics.add(view.getStatistics(att, Statistics.MAXIMUM)); + } + + assertArrayEquals(setStatistics.toArray(), viewStatistics.toArray()); + } + + @Test + public void testStatisticsWithWeight() { + Attribute numeric = AttributeFactory.createAttribute("numeric", Ontology.NUMERICAL); + Attribute real = AttributeFactory.createAttribute("real", Ontology.REAL); + Attribute integer = AttributeFactory.createAttribute("integer", Ontology.INTEGER); + Attribute nominal = AttributeFactory.createAttribute("nominal", Ontology.NOMINAL); + Attribute polynominal = AttributeFactory.createAttribute("polynominal", Ontology.POLYNOMINAL); + Attribute binominal = AttributeFactory.createAttribute("binominal", Ontology.BINOMINAL); + for (int i = 0; i < 5; i++) { + nominal.getMapping().mapString("nominalValue" + i); + } + for (int i = 0; i < 6; i++) { + polynominal.getMapping().mapString("polyValue" + i); + } + for (int i = 0; i < 2; i++) { + binominal.getMapping().mapString("binominalValue" + i); + } + List attributes = Arrays.asList(nominal, numeric, real, integer, polynominal, binominal); + Random random = new Random(); + ExampleSet set = ExampleSets.from(attributes).withBlankSize(150) + .withColumnFiller(numeric, i -> Math.random() > 0.7 ? Double.NaN : Math.random()) + .withColumnFiller(real, i -> Math.random() > 0.7 ? Double.NaN : 42 + Math.random()) + .withColumnFiller(integer, i -> Math.random() > 0.7 ? Double.NaN : Math.round(Math.random() * 100)) + .withColumnFiller(nominal, i -> random.nextDouble() > 0.7 ? Double.NaN : random.nextInt(5)) + .withColumnFiller(polynominal, i -> random.nextDouble() > 0.7 ? Double.NaN : random.nextInt(6)) + .withColumnFiller(binominal, i -> random.nextDouble() > 0.7 ? Double.NaN : random.nextInt(2)) + .withRole(numeric, Attributes.WEIGHT_NAME) + .build(); + + set.recalculateAllAttributeStatistics(); + + IOTable table = BeltConverter.convert(set, CONTEXT); + ExampleSet view = com.rapidminer.belt.table.TableViewCreator.INSTANCE.convertOnWriteView(table, true); + view.recalculateAllAttributeStatistics(); + + List setStatistics = new ArrayList<>(); + for (Attribute att : set.getAttributes()) { + if (att.isNumerical()) { + setStatistics.add(set.getStatistics(att, Statistics.AVERAGE)); + } + setStatistics.add(set.getStatistics(att, Statistics.UNKNOWN)); + setStatistics.add(set.getStatistics(att, Statistics.MINIMUM)); + setStatistics.add(set.getStatistics(att, Statistics.MAXIMUM)); + } + + List viewStatistics = new ArrayList<>(); + for (Attribute att : view.getAttributes()) { + if (att.isNumerical()) { + viewStatistics.add(view.getStatistics(att, Statistics.AVERAGE)); + } + viewStatistics.add(view.getStatistics(att, Statistics.UNKNOWN)); + viewStatistics.add(view.getStatistics(att, Statistics.MINIMUM)); + viewStatistics.add(view.getStatistics(att, Statistics.MAXIMUM)); + } + + assertArrayEquals(setStatistics.toArray(), viewStatistics.toArray()); + } + + @Test + public void testSerialization() throws IOException, ClassNotFoundException { + Attribute numeric = AttributeFactory.createAttribute("numeric", Ontology.NUMERICAL); + Attribute real = AttributeFactory.createAttribute("real", Ontology.REAL); + Attribute integer = AttributeFactory.createAttribute("integer", Ontology.INTEGER); + Attribute nominal = AttributeFactory.createAttribute("nominal", Ontology.NOMINAL); + Attribute polynominal = AttributeFactory.createAttribute("polynominal", Ontology.POLYNOMINAL); + Attribute binominal = AttributeFactory.createAttribute("binominal", Ontology.BINOMINAL); + for (int i = 0; i < 5; i++) { + nominal.getMapping().mapString("nominalValue" + i); + } + for (int i = 0; i < 6; i++) { + polynominal.getMapping().mapString("polyValue" + i); + } + for (int i = 0; i < 2; i++) { + binominal.getMapping().mapString("binominalValue" + i); + } + List attributes = Arrays.asList(nominal, numeric, real, integer, polynominal, binominal); + Random random = new Random(); + ExampleSet set = ExampleSets.from(attributes).withBlankSize(150) + .withColumnFiller(numeric, i -> Math.random() > 0.7 ? Double.NaN : Math.random()) + .withColumnFiller(real, i -> Math.random() > 0.7 ? Double.NaN : 42 + Math.random()) + .withColumnFiller(integer, i -> Math.random() > 0.7 ? Double.NaN : Math.round(Math.random() * 100)) + .withColumnFiller(nominal, i -> random.nextDouble() > 0.7 ? Double.NaN : random.nextInt(5)) + .withColumnFiller(polynominal, i -> random.nextDouble() > 0.7 ? Double.NaN : random.nextInt(6)) + .withColumnFiller(binominal, i -> random.nextDouble() > 0.7 ? Double.NaN : random.nextInt(2)) + .withRole(numeric, Attributes.LABEL_NAME) + .build(); + + IOTable table = BeltConverter.convert(set, CONTEXT); + byte[] serialized = serialize(TableViewCreator.INSTANCE.convertOnWriteView(table, true)); + Object deserialized = deserialize(serialized); + + RapidAssert.assertEquals(set, (ExampleSet) deserialized); + } + + + @Test + public void testSerializationDate() throws IOException, ClassNotFoundException { + Attribute numeric = AttributeFactory.createAttribute("numeric", Ontology.NUMERICAL); + Attribute real = AttributeFactory.createAttribute("real", Ontology.REAL); + Attribute integer = AttributeFactory.createAttribute("integer", Ontology.INTEGER); + Attribute dateTime = AttributeFactory.createAttribute("date_time", Ontology.DATE_TIME); + Attribute date = AttributeFactory.createAttribute("date", Ontology.DATE); + Attribute time = AttributeFactory.createAttribute("time", Ontology.TIME); + List attributes = Arrays.asList(numeric, real, dateTime, date, time, integer); + ExampleSet set = ExampleSets.from(attributes).withBlankSize(150) + .withColumnFiller(numeric, i -> Math.random() > 0.7 ? Double.NaN : Math.random()) + .withColumnFiller(real, i -> Math.random() > 0.7 ? Double.NaN : 42 + Math.random()) + .withColumnFiller(integer, i -> Math.random() > 0.7 ? Double.NaN : Math.round(Math.random() * 100)) + .withColumnFiller(dateTime, + i -> Math.random() > 0.7 ? Double.NaN : 1515410698d + Math.floor(Math.random() * 1000)) + .withColumnFiller(date, i -> Math.random() > 0.7 ? Double.NaN : + 230169600000d + Math.floor(Math.random() * 100) * 1000d * 60 * 60 * 24) + .withColumnFiller(time, + i -> Math.random() > 0.7 ? Double.NaN : Math.floor(Math.random() * 60 * 60 * 24 * 1000)) + .withRole(numeric, Attributes.LABEL_NAME) + .build(); + + IOTable table = BeltConverter.convert(set, CONTEXT); + byte[] serialized = serialize(TableViewCreator.INSTANCE.convertOnWriteView(table, true)); + Object deserialized = deserialize(serialized); + + RapidAssert.assertEquals(set, (ExampleSet) deserialized); + } + + @Test + public void testSerializationGaps() throws IOException, ClassNotFoundException { + NominalBuffer buffer = BufferAccessor.get().newUInt8Buffer(ColumnType.NOMINAL, 21); + for (int i = 0; i < buffer.size(); i++) { + buffer.set(i, "value" + i); + } + buffer.set(7, null); + buffer.set(5, null); + NominalBuffer buffer2 = BufferAccessor.get().newUInt8Buffer(ColumnType.NOMINAL, 21); + for (int i = 0; i < buffer2.size(); i++) { + buffer2.set(i, "val" + i); + } + buffer2.set(3, null); + buffer2.set(5, null); + Column column = Columns.removeUnusedDictionaryValues(buffer.toColumn(), + Columns.CleanupOption.REMOVE, Belt.defaultContext()); + Column column2 = Columns.removeUnusedDictionaryValues(buffer2.toColumn(), + Columns.CleanupOption.REMOVE, Belt.defaultContext()); + + NominalBuffer buffer3 = BufferAccessor.get().newUInt2Buffer(ColumnType.NOMINAL, 21); + buffer3.set(0, "bla"); + for (int i = 0; i < buffer3.size(); i++) { + buffer3.set(i, "blup"); + } + buffer3.set(10, null); + NominalBuffer buffer4 = BufferAccessor.get().newUInt2Buffer(ColumnType.NOMINAL, 21); + buffer4.set(0, "bla"); + for (int i = 0; i < buffer.size(); i++) { + buffer4.set(i, "blup"); + } + buffer4.set(10, null); + + Column bla = Columns.removeUnusedDictionaryValues(buffer3.toBooleanColumn("bla"), + Columns.CleanupOption.REMOVE, Belt.defaultContext()); + Column blup = Columns.removeUnusedDictionaryValues(buffer4.toBooleanColumn("blup"), + Columns.CleanupOption.REMOVE, Belt.defaultContext()); + + + Table table = Builders.newTableBuilder(21).add("first", column) + .add("second", column2).add("bla", bla).add("blup", blup) + .build(Belt.defaultContext()); + + ExampleSet set = com.rapidminer.belt.table.TableViewCreator.INSTANCE.convertOnWriteView(new IOTable(table), true); + byte[] serialized = serialize(set); + Object deserialized = deserialize(serialized); + + RapidAssert.assertEquals(set, (ExampleSet) deserialized); + } + + @Test + public void testClone() { + Attribute numeric = AttributeFactory.createAttribute("numeric", Ontology.NUMERICAL); + ExampleSet set = ExampleSets.from(numeric).withBlankSize(15) + .withColumnFiller(numeric, i -> Math.random() > 0.7 ? Double.NaN : Math.random()).build(); + IOTable table = BeltConverter.convert(set, CONTEXT); + + ExampleSet view = com.rapidminer.belt.table.TableViewCreator.INSTANCE.convertOnWriteView(table, true); + ExampleSet clone = (ExampleSet) view.clone(); + RapidAssert.assertEquals(view, clone); + } + + @Test + public void testCloneDate() { + Attribute dateTime = AttributeFactory.createAttribute("date_time", Ontology.DATE_TIME); + ExampleSet set = ExampleSets.from(dateTime).withBlankSize(15) + .withColumnFiller(dateTime, + i -> Math.random() > 0.7 ? Double.NaN : 1515410698d + Math.floor(Math.random() * 1000)) + .build(); + IOTable table = BeltConverter.convert(set, CONTEXT); + + ExampleSet view = TableViewCreator.INSTANCE.convertOnWriteView(table, false); + ExampleSet clone = (ExampleSet) view.clone(); + RapidAssert.assertEquals(view, clone); + } + + @Test(expected = BeltConverter.ConversionException.class) + public void testAdvancedColumns() { + Table table = Builders.newTableBuilder(11).addReal("real", i -> 3 * i / 5.0).addInt53Bit("int", i -> 5 * i) + .addTextset("textset", i -> new StringSet(Collections.singleton("val" + i))) + .build(Belt.defaultContext()); + TableViewCreator.INSTANCE.convertOnWriteView(new IOTable(table), true); + } + + @Test + public void testReplaceAdvancedColumns() { + int size = 11; + Table table = Builders.newTableBuilder(size).addReal("real", i -> 3 * i / 5.0) + .addTextset("textset", i -> new StringSet(Collections.singleton("val" + i))).addInt53Bit("int", i -> 5 * i) + .build(Belt.defaultContext()); + ExampleSet replaced = TableViewCreator.INSTANCE.convertOnWriteView(new IOTable(table), false); + double[] first = new double[size]; + table.column("real").fill(first, 0); + double[] third = new double[size]; + table.column("int").fill(third, 0); + double[] constant = new double[size]; + Arrays.fill(constant, 0); + assertArrayEquals(new double[][]{first, constant, third}, + BeltConverterTest.readExampleSetToArray(replaced)); + NominalMapping mapping = replaced.getAttributes().get("textset").getMapping(); + assertEquals(1, mapping.size()); + assertEquals("Cannot display advanced column", mapping.mapIndex(0)); + } + + @Test + public void testColumnCleanupNoDate() { + Attribute numeric = AttributeFactory.createAttribute("numeric", Ontology.NUMERICAL); + Attribute real = AttributeFactory.createAttribute("real", Ontology.REAL); + Attribute integer = AttributeFactory.createAttribute("integer", Ontology.INTEGER); + Attribute nominal = AttributeFactory.createAttribute("nominal", Ontology.NOMINAL); + Attribute polynominal = AttributeFactory.createAttribute("polynominal", Ontology.POLYNOMINAL); + Attribute binominal = AttributeFactory.createAttribute("binominal", Ontology.BINOMINAL); + for (int i = 0; i < 5; i++) { + nominal.getMapping().mapString("nominalValue" + i); + } + for (int i = 0; i < 6; i++) { + polynominal.getMapping().mapString("polyValue" + i); + } + for (int i = 0; i < 2; i++) { + binominal.getMapping().mapString("binominalValue" + i); + } + List attributes = Arrays.asList(nominal, numeric, real, integer, polynominal, binominal); + Random random = new Random(); + ExampleSet set = ExampleSets.from(attributes).withBlankSize(150) + .withColumnFiller(numeric, i -> Math.random() > 0.7 ? Double.NaN : Math.random()) + .withColumnFiller(real, i -> Math.random() > 0.7 ? Double.NaN : 42 + Math.random()) + .withColumnFiller(integer, i -> Math.random() > 0.7 ? Double.NaN : Math.round(Math.random() * 100)) + .withColumnFiller(nominal, i -> random.nextDouble() > 0.7 ? Double.NaN : random.nextInt(5)) + .withColumnFiller(polynominal, i -> random.nextDouble() > 0.7 ? Double.NaN : random.nextInt(6)) + .withColumnFiller(binominal, i -> random.nextDouble() > 0.7 ? Double.NaN : random.nextInt(2)) + .withRole(numeric, Attributes.WEIGHT_NAME) + .build(); + + IOTable table = BeltConverter.convert(set, CONTEXT); + ExampleSet view = TableViewCreator.INSTANCE.convertOnWriteView(table, true); + + Attributes setAttributes = set.getAttributes(); + setAttributes.remove(real); + setAttributes.remove(polynominal); + set.cleanup(); + + Attributes viewAttributes = view.getAttributes(); + viewAttributes.remove(viewAttributes.findRoleByName(real.getName())); + viewAttributes.remove(viewAttributes.findRoleByName(polynominal.getName())); + view.cleanup(); + + RapidAssert.assertEquals(set, view); + } + + @Test + public void testColumnCleanupWithDate() { + Attribute numeric = AttributeFactory.createAttribute("numeric", Ontology.NUMERICAL); + Attribute real = AttributeFactory.createAttribute("real", Ontology.REAL); + Attribute integer = AttributeFactory.createAttribute("integer", Ontology.INTEGER); + Attribute dateTime = AttributeFactory.createAttribute("date_time", Ontology.DATE_TIME); + Attribute date = AttributeFactory.createAttribute("date", Ontology.DATE); + Attribute time = AttributeFactory.createAttribute("time", Ontology.TIME); + List attributes = Arrays.asList(numeric, real, dateTime, date, time, integer); + ExampleSet set = ExampleSets.from(attributes).withBlankSize(150) + .withColumnFiller(numeric, i -> Math.random() > 0.7 ? Double.NaN : Math.random()) + .withColumnFiller(real, i -> Math.random() > 0.7 ? Double.NaN : 42 + Math.random()) + .withColumnFiller(integer, i -> Math.random() > 0.7 ? Double.NaN : Math.round(Math.random() * 100)) + .withColumnFiller(dateTime, + i -> Math.random() > 0.7 ? Double.NaN : 1515410698d + Math.floor(Math.random() * 1000)) + .withColumnFiller(date, i -> Math.random() > 0.7 ? Double.NaN : + 230169600000d + Math.floor(Math.random() * 100) * 1000d * 60 * 60 * 24) + .withColumnFiller(time, + i -> Math.random() > 0.7 ? Double.NaN : Math.floor(Math.random() * 60 * 60 * 24 * 1000)) + .withRole(numeric, Attributes.LABEL_NAME) + .build(); + + IOTable table = BeltConverter.convert(set, CONTEXT); + ExampleSet view = TableViewCreator.INSTANCE.convertOnWriteView(table, true); + + Attributes setAttributes = set.getAttributes(); + setAttributes.remove(real); + setAttributes.remove(date); + set.cleanup(); + + Attributes viewAttributes = view.getAttributes(); + viewAttributes.remove(viewAttributes.findRoleByName(real.getName())); + viewAttributes.remove(viewAttributes.findRoleByName(date.getName())); + view.cleanup(); + + RapidAssert.assertEquals(set, view); + } + + @Test + public void testColumnCleanupWithDateNoDateAfter() { + Attribute numeric = AttributeFactory.createAttribute("numeric", Ontology.NUMERICAL); + Attribute real = AttributeFactory.createAttribute("real", Ontology.REAL); + Attribute integer = AttributeFactory.createAttribute("integer", Ontology.INTEGER); + Attribute dateTime = AttributeFactory.createAttribute("date_time", Ontology.DATE_TIME); + Attribute date = AttributeFactory.createAttribute("date", Ontology.DATE); + Attribute time = AttributeFactory.createAttribute("time", Ontology.TIME); + List attributes = Arrays.asList(numeric, real, dateTime, date, time, integer); + ExampleSet set = ExampleSets.from(attributes).withBlankSize(150) + .withColumnFiller(numeric, i -> Math.random() > 0.7 ? Double.NaN : Math.random()) + .withColumnFiller(real, i -> Math.random() > 0.7 ? Double.NaN : 42 + Math.random()) + .withColumnFiller(integer, i -> Math.random() > 0.7 ? Double.NaN : Math.round(Math.random() * 100)) + .withColumnFiller(dateTime, + i -> Math.random() > 0.7 ? Double.NaN : 1515410698d + Math.floor(Math.random() * 1000)) + .withColumnFiller(date, i -> Math.random() > 0.7 ? Double.NaN : + 230169600000d + Math.floor(Math.random() * 100) * 1000d * 60 * 60 * 24) + .withColumnFiller(time, + i -> Math.random() > 0.7 ? Double.NaN : Math.floor(Math.random() * 60 * 60 * 24 * 1000)) + .withRole(numeric, Attributes.LABEL_NAME) + .build(); + + IOTable table = BeltConverter.convert(set, CONTEXT); + ExampleSet view = TableViewCreator.INSTANCE.convertOnWriteView(table, true); + + Attributes setAttributes = set.getAttributes(); + setAttributes.remove(dateTime); + setAttributes.remove(date); + setAttributes.remove(time); + set.cleanup(); + + Attributes viewAttributes = view.getAttributes(); + viewAttributes.remove(viewAttributes.findRoleByName(dateTime.getName())); + viewAttributes.remove(viewAttributes.findRoleByName(date.getName())); + viewAttributes.remove(viewAttributes.findRoleByName(time.getName())); + view.cleanup(); + + RapidAssert.assertEquals(set, view); + } } @@ -521,65 +1168,4 @@ private static Object deserialize(byte[] bytes) throws IOException, ClassNotFoun ObjectInputStream o = new ObjectInputStream(b); return o.readObject(); } - - @Test - public void testClone() { - Attribute numeric = AttributeFactory.createAttribute("numeric", Ontology.NUMERICAL); - ExampleSet set = ExampleSets.from(numeric).withBlankSize(15) - .withColumnFiller(numeric, i -> Math.random() > 0.7 ? Double.NaN : Math.random()).build(); - Table table = com.rapidminer.belt.table.BeltConverter.convert(set, CONTEXT).getTable(); - - ExampleSet view = com.rapidminer.belt.table.TableViewCreator.INSTANCE.createView(table); - ExampleSet clone = (ExampleSet) view.clone(); - RapidAssert.assertEquals(view, clone); - } - - @Test - public void testCloneDate() { - Attribute dateTime = AttributeFactory.createAttribute("date_time", Ontology.DATE_TIME); - ExampleSet set = ExampleSets.from(dateTime).withBlankSize(15) - .withColumnFiller(dateTime, - i -> Math.random() > 0.7 ? Double.NaN : 1515410698d + Math.floor(Math.random() * 1000)) - .build(); - Table table = BeltConverter.convert(set, CONTEXT).getTable(); - - ExampleSet view = TableViewCreator.INSTANCE.createView(table); - ExampleSet clone = (ExampleSet) view.clone(); - RapidAssert.assertEquals(view, clone); - } - - @Test(expected = BeltConverter.ConversionException.class) - public void testCustomColumns() { - ColumnType customType = ColumnTypes.categoricalType("com.rapidminer.custom.integer", Integer.class, - null); - Table table = Builders.newTableBuilder(11).addReal("real", i -> 3 * i / 5.0).addInt("int", i -> 5 * i) - .addCategorical("custom", i -> i, customType) - .build(Belt.defaultContext()); - TableViewCreator.INSTANCE.createView(table); - } - - @Test - public void testReplaceCustomColumns() { - ColumnType customType = ColumnTypes.objectType("com.rapidminer.custom.double", Double.class, null); - ColumnType customType2 = ColumnTypes.categoricalType("com.rapidminer.custom.integer", Integer.class, - null); - Table table = Builders.newTableBuilder(11).addReal("real", i -> 3 * i / 5.0) - .addObject("custom", i -> (double) i, customType).addInt("int", i -> 5 * i) - .addCategorical("custom2", i -> i, customType2) - .build(Belt.defaultContext()); - Table replaced = TableViewCreator.INSTANCE.replacedCustomsWithError(table); - double[] first = new double[11]; - table.column("real").fill(first, 0); - double[] third = new double[11]; - table.column("int").fill(third, 0); - double[] constant = new double[11]; - Arrays.fill(constant, 1); - assertArrayEquals(new double[][]{first, constant, third, constant}, - BeltConverterTest.readTableToArray(replaced)); - Object[] message = new Object[1]; - replaced.column("custom").fill(message, 0); - assertEquals("Error: Cannot display custom column of type com.rapidminer.custom.double", message[0]); - replaced.column("custom2").fill(message, 0); - assertEquals("Error: Cannot display custom column of type com.rapidminer.custom.integer", message[0]); - } } diff --git a/src/test/java/com/rapidminer/belt/table/ViewToTableConverterTest.java b/src/test/java/com/rapidminer/belt/table/ViewToTableConverterTest.java new file mode 100644 index 0000000..ad8bf60 --- /dev/null +++ b/src/test/java/com/rapidminer/belt/table/ViewToTableConverterTest.java @@ -0,0 +1,592 @@ +/** + * Copyright (C) 2001-2020 by RapidMiner and the contributors + * + * Complete list of developers available at our web site: + * + * http://rapidminer.com + * + * This program is free software: you can redistribute it and/or modify it under the terms of the GNU Affero General + * Public License as published by the Free Software Foundation, either version 3 of the License, or (at your option) any + * later version. + * + * This program is distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY; without even the implied + * warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Affero General Public License for more + * details. + * + * You should have received a copy of the GNU Affero General Public License along with this program. If not, see + * http://www.gnu.org/licenses/. + */ +package com.rapidminer.belt.table; + +import java.io.ByteArrayInputStream; +import java.io.ByteArrayOutputStream; +import java.io.IOException; +import java.io.ObjectInputStream; +import java.io.ObjectOutputStream; +import java.time.Instant; +import java.time.LocalTime; +import java.util.ArrayList; +import java.util.Arrays; +import java.util.Collections; +import java.util.Iterator; +import java.util.List; +import java.util.concurrent.Callable; +import java.util.concurrent.ExecutionException; +import java.util.concurrent.ForkJoinPool; +import java.util.concurrent.ForkJoinTask; +import java.util.concurrent.Future; + +import org.apache.commons.math3.util.MathArrays; +import org.junit.Assert; +import org.junit.BeforeClass; +import org.junit.Test; + +import com.rapidminer.adaption.belt.ContextAdapter; +import com.rapidminer.adaption.belt.IOTable; +import com.rapidminer.belt.buffer.Buffers; +import com.rapidminer.belt.column.DateTimeColumn; +import com.rapidminer.belt.column.type.StringSet; +import com.rapidminer.belt.execution.Context; +import com.rapidminer.belt.util.ColumnAnnotation; +import com.rapidminer.belt.util.ColumnRole; +import com.rapidminer.belt.util.Order; +import com.rapidminer.core.concurrency.ConcurrencyContext; +import com.rapidminer.core.concurrency.ExecutionStoppedException; +import com.rapidminer.example.Attribute; +import com.rapidminer.example.AttributeRole; +import com.rapidminer.example.Attributes; +import com.rapidminer.example.Example; +import com.rapidminer.example.ExampleSet; +import com.rapidminer.example.set.MappedExampleSet; +import com.rapidminer.example.set.Partition; +import com.rapidminer.example.set.SortedExampleSet; +import com.rapidminer.example.set.SplittedExampleSet; +import com.rapidminer.example.table.AbstractAttribute; +import com.rapidminer.example.table.AttributeFactory; +import com.rapidminer.example.table.NominalMapping; +import com.rapidminer.example.table.ViewAttribute; +import com.rapidminer.operator.ViewModel; +import com.rapidminer.operator.preprocessing.filter.NominalToNumeric; +import com.rapidminer.operator.preprocessing.filter.NominalToNumericModel; +import com.rapidminer.test.asserter.AsserterFactoryRapidMiner; +import com.rapidminer.test_utils.RapidAssert; +import com.rapidminer.tools.Ontology; +import com.rapidminer.tools.ParameterService; + + +/** + * Tests the {@link BeltConverter#convert(ExampleSet, ConcurrencyContext)} method when it is applied after + * {@link TableViewCreator#convertOnWriteView(IOTable, boolean)}. + * + * @author Gisa Meier + */ +public class ViewToTableConverterTest { + + private static final ConcurrencyContext CONTEXT = new ConcurrencyContext() { + + private ForkJoinPool pool = new ForkJoinPool(Runtime.getRuntime().availableProcessors()); + + @Override + public List> submit(List> callables) throws IllegalArgumentException { + List> futures = new ArrayList<>(); + for (Callable callable : callables) { + futures.add(pool.submit(callable)); + } + return futures; + } + + @Override + public List call(List> callables) + throws ExecutionException, ExecutionStoppedException, IllegalArgumentException { + List> futures = submit(callables); + List results = new ArrayList<>(); + for (Future future : futures) { + try { + results.add(future.get()); + } catch (InterruptedException e) { + throw new RuntimeException("must not happen"); + } + } + return results; + } + + @Override + public void run(List runnables) + throws ExecutionException, ExecutionStoppedException, IllegalArgumentException { + } + + @Override + public List invokeAll(List> tasks) + throws ExecutionException, ExecutionStoppedException, IllegalArgumentException { + return null; + } + + @Override + public T invoke(ForkJoinTask task) + throws ExecutionException, ExecutionStoppedException, IllegalArgumentException { + return null; + } + + @Override + public int getParallelism() { + return pool.getParallelism(); + } + + @Override + public List collectResults(List> futures) + throws ExecutionException, ExecutionStoppedException, IllegalArgumentException { + return null; + } + + @Override + public void checkStatus() throws ExecutionStoppedException { + } + + }; + + + @BeforeClass + public static void setup() { + RapidAssert.ASSERTER_REGISTRY.registerAllAsserters(new AsserterFactoryRapidMiner()); + ParameterService.init(); + } + + @Test + public void testSimple() { + Table table = getTable(); + + IOTable ioTable = new IOTable(table); + ExampleSet view = TableViewCreator.INSTANCE.convertOnWriteView(ioTable, false); + + IOTable converted = BeltConverter.convert(view, CONTEXT); + + RapidAssert.assertEquals(ioTable, converted); + } + + @Test + public void testRemove() { + Table table = getTable(); + + IOTable ioTable = new IOTable(table); + ExampleSet view = TableViewCreator.INSTANCE.convertOnWriteView(ioTable, false); + view.getAttributes().remove(view.getAttributes().get("integer")); + + IOTable converted = BeltConverter.convert(view, CONTEXT); + + Table expected = Builders.newTableBuilder(table).remove("integer").build(ContextAdapter.adapt(CONTEXT)); + RapidAssert.assertEquals(new IOTable(expected), converted); + } + + @Test + public void testRemoveFromTable() { + Table table = getTable(); + + IOTable ioTable = new IOTable(table); + ExampleSet view = TableViewCreator.INSTANCE.convertOnWriteView(ioTable, false); + Attribute textset = view.getAttributes().get("textSet"); + view.getAttributes().remove(textset); + view.getExampleTable().removeAttribute(textset); + view.getAttributes().remove(view.getAttributes().get("time")); + + IOTable converted = BeltConverter.convert(view, CONTEXT); + Context ctx = ContextAdapter.adapt(CONTEXT); + Table expected = Builders.newTableBuilder(table).remove("textSet").remove("time") + .replace("date-time", table.transform("date-time").applyObjectToDateTime(Instant.class, + i -> i == null ? null : Instant.ofEpochMilli(i.toEpochMilli()), ctx).toColumn()).build(ctx); + RapidAssert.assertEquals(new IOTable(expected), converted); + } + + @Test + public void testAddToFromTable() { + Table table = getTable(); + + IOTable ioTable = new IOTable(table); + ExampleSet view = TableViewCreator.INSTANCE.convertOnWriteView(ioTable, false); + Attribute attribute = AttributeFactory.createAttribute("new", Ontology.NUMERICAL); + view.getAttributes().addRegular(attribute); + view.getExampleTable().addAttribute(attribute); + int index = 0; + for (Example example : view) { + example.setValue(attribute, 0.231 * index++); + } + + IOTable converted = BeltConverter.convert(view, CONTEXT); + Table expected = Builders.newTableBuilder(table).addReal("new", i -> 0.231 * i).build(ContextAdapter.adapt(CONTEXT)); + RapidAssert.assertEquals(new IOTable(expected), converted); + } + + @Test + public void testReorder() { + Table table = getTable(); + IOTable ioTable = new IOTable(table); + ExampleSet view = TableViewCreator.INSTANCE.convertOnWriteView(ioTable, false); + Attribute attribute = AttributeFactory.createAttribute("new", Ontology.NUMERICAL); + view.getAttributes().addRegular(attribute); + view.getExampleTable().addAttribute(attribute); + int index =0; + for (Example example : view) { + example.setValue(attribute, 0.231*index++); + } + + ExampleSet clone = (ExampleSet) view.clone(); + Attributes viewAttributes = view.getAttributes(); + List names = new ArrayList<>(); + for (Iterator attributeIterator = viewAttributes.allAttributes(); attributeIterator.hasNext(); ) { + Attribute next = attributeIterator.next(); + names.add(next.getName()); + } + + Collections.shuffle(names); + + Attributes cloneAttributes = clone.getAttributes(); + cloneAttributes.clearRegular(); + cloneAttributes.clearSpecial(); + for (String name : names) { + cloneAttributes.add(viewAttributes.getRole(name)); + } + + IOTable converted = BeltConverter.convert(clone, CONTEXT); + Table expected = Builders.newTableBuilder(table).addReal("new", i -> 0.231 * i).build(ContextAdapter.adapt(CONTEXT)); + expected = expected.columns(names); + RapidAssert.assertEquals(new IOTable(expected), converted); + } + + @Test + public void testRename() { + Table table = getTable(); + + IOTable ioTable = new IOTable(table); + ExampleSet view = TableViewCreator.INSTANCE.convertOnWriteView(ioTable, false); + Attribute integer = view.getAttributes().get("integer"); + //this rename does not work + //view.getAttributes().rename(integer, "integer2"); + //this is what the Rename operator does + integer.setName("integer2"); + IOTable converted = BeltConverter.convert(view, CONTEXT); + + Table expected = Builders.newTableBuilder(table).rename("integer", "integer2").build(ContextAdapter.adapt(CONTEXT)); + RapidAssert.assertEquals(new IOTable(expected), converted); + } + + @Test + public void testChangeRoles() { + Table table = getTable(); + + IOTable ioTable = new IOTable(table); + ExampleSet view = TableViewCreator.INSTANCE.convertOnWriteView(ioTable, false); + Attribute integer = view.getAttributes().get("integer"); + Attribute textset = view.getAttributes().get("textSet"); + Attribute bool = view.getAttributes().get("boolean"); + // this is what Set Role does + view.getAttributes().remove(integer); + view.getAttributes().setSpecialAttribute(integer, Attributes.ID_NAME); + view.getAttributes().remove(textset); + view.getAttributes().setSpecialAttribute(textset, "i am special"); + view.getAttributes().remove(bool); + view.getAttributes().addRegular(bool); + IOTable converted = BeltConverter.convert(view, CONTEXT); + + Table expected = Builders.newTableBuilder(table).remove("integer") + .add("integer", table.column("integer")) + .addMetaData("integer", ColumnRole.ID) + .remove("textSet").add("textSet", table.column("textSet")) + .addMetaData("textSet", ColumnRole.METADATA) + .addMetaData("textSet", new LegacyRole("i am special")) + .remove("boolean").add("boolean", table.column("boolean")) + .build(ContextAdapter.adapt(CONTEXT)); + RapidAssert.assertEquals(new IOTable(expected), converted); + Assert.assertEquals(expected.getMetaData("textSet"), converted.getTable().getMetaData("textSet")); + } + + @Test + public void testOneMapped() { + Table table = getTable(); + + IOTable ioTable = new IOTable(table); + ExampleSet view = TableViewCreator.INSTANCE.convertOnWriteView(ioTable, false); + + int[] mapping = new int[table.height()]; + Arrays.setAll(mapping, i -> i); + MathArrays.shuffle(mapping); + + ExampleSet mapped = new MappedExampleSet(view, mapping); + + IOTable converted = BeltConverter.convert(mapped, CONTEXT); + + Table expected = table.rows(mapping, ContextAdapter.adapt(CONTEXT)); + RapidAssert.assertEquals(new IOTable(expected), converted); + } + + @Test + public void testTwoMapped() { + Table table = getTable(); + + IOTable ioTable = new IOTable(table); + ExampleSet view = TableViewCreator.INSTANCE.convertOnWriteView(ioTable, false); + + SortedExampleSet sorted = new SortedExampleSet(view, view.getAttributes().get("real"), SortedExampleSet.INCREASING); + + int[] mapping = new int[table.height() / 2]; + Arrays.setAll(mapping, i -> i); + MathArrays.shuffle(mapping); + + ExampleSet mapped = new MappedExampleSet(sorted, mapping); + + IOTable converted = BeltConverter.convert(mapped, CONTEXT); + + Table expected = table.sort("real", Order.ASCENDING, ContextAdapter.adapt(CONTEXT)) + .rows(mapping, ContextAdapter.adapt(CONTEXT)); + RapidAssert.assertEquals(new IOTable(expected), converted); + } + + @Test + public void testThreeMapped() { + Table table = getTable(); + + IOTable ioTable = new IOTable(table); + ExampleSet view = TableViewCreator.INSTANCE.convertOnWriteView(ioTable, false); + + SortedExampleSet sorted = new SortedExampleSet(view, view.getAttributes().get("real"), SortedExampleSet.INCREASING); + + int[] mapping = new int[table.height() / 2]; + Arrays.setAll(mapping, i -> i); + MathArrays.shuffle(mapping); + + ExampleSet mapped = new MappedExampleSet(sorted, mapping); + + int[] elements = new int[mapped.size()]; + Arrays.setAll(elements, i -> i % 3); + SplittedExampleSet splitted = new SplittedExampleSet(mapped, new Partition(elements, 3)); + splitted.selectSingleSubset(0); + + IOTable converted = BeltConverter.convert(splitted, CONTEXT); + + Table expected = table.sort("real", Order.ASCENDING, ContextAdapter.adapt(CONTEXT)) + .rows(mapping, ContextAdapter.adapt(CONTEXT)); + int[] rows2 = new int[expected.height() / 3 + (expected.height() % 3 > 0 ? 1 : 0)]; + Arrays.setAll(rows2, i -> 3 * i); + expected = expected.rows(rows2, ContextAdapter.adapt(CONTEXT)); + RapidAssert.assertEquals(new IOTable(expected), converted); + } + + @Test + public void testModelViewColumn() { + Table table = getTable(); + + IOTable ioTable = new IOTable(table); + ExampleSet view = TableViewCreator.INSTANCE.convertOnWriteView(ioTable, false); + ViewModel model = new NominalToNumericModel(view, NominalToNumeric.INTEGERS_CODING); + + Attribute nominal = view.getAttributes().get("nominal"); + view.getAttributes().remove(nominal); + view.getAttributes().addRegular(new ViewAttribute(model, nominal, nominal.getName(), Ontology.INTEGER, null)); + + IOTable converted = BeltConverter.convert(view, CONTEXT); + + Table expected = Builders.newTableBuilder(table).remove("nominal") + .add("nominal", table.transform("nominal") + .applyCategoricalToInteger53Bit(i -> i - 1, ContextAdapter.adapt(CONTEXT)).toColumn()) + .build(ContextAdapter.adapt(CONTEXT)); + + RapidAssert.assertEquals(new IOTable(expected), converted); + } + + @Test + public void testModelViewColumnOnMapped() { + Table table = getTable(); + + IOTable ioTable = new IOTable(table); + ExampleSet view = TableViewCreator.INSTANCE.convertOnWriteView(ioTable, false); + ViewModel model = new NominalToNumericModel(view, NominalToNumeric.INTEGERS_CODING); + + int[] mapping = new int[table.height() / 2]; + Arrays.setAll(mapping, i -> 2 * i); + + ExampleSet mapped = new MappedExampleSet(view, mapping); + Attribute nominal = mapped.getAttributes().get("nominal"); + mapped.getAttributes().remove(nominal); + mapped.getAttributes().addRegular(new ViewAttribute(model, nominal, nominal.getName(), Ontology.INTEGER, null)); + + IOTable converted = BeltConverter.convert(mapped, CONTEXT); + + Context adapted = ContextAdapter.adapt(CONTEXT); + Table mappedTable = table.rows(mapping, adapted); + Table expected = Builders.newTableBuilder(mappedTable).remove("nominal") + .add("nominal", mappedTable.transform("nominal") + .applyCategoricalToInteger53Bit(i -> i - 1, adapted).toColumn()) + .build(adapted); + + RapidAssert.assertEquals(new IOTable(expected), converted); + } + + @Test + public void testIntegerToReal() { + Table table = getTable(); + + IOTable ioTable = new IOTable(table); + ExampleSet view = TableViewCreator.INSTANCE.convertOnWriteView(ioTable, false); + + Attribute attribute = view.getAttributes().get("integer"); + + //That is what Numeric2Real does + Attribute newAttribute = AttributeFactory.changeValueType(attribute, Ontology.REAL); + view.getAttributes().replace(attribute, newAttribute); + + IOTable converted = BeltConverter.convert(view, CONTEXT); + + Table expected = Builders.newTableBuilder(table).replace("integer", Buffers.realBuffer(table.column("integer")).toColumn()) + .build(ContextAdapter.adapt(CONTEXT)); + + RapidAssert.assertEquals(new IOTable(expected), converted); + } + + @Test + public void testNotSafeColumn() { + Table table = getTable(); + Context ctx = ContextAdapter.adapt(CONTEXT); + table = Builders.newTableBuilder(table).remove("textSet").remove("time") + .add("numeric", table.column("real")) + .addMetaData("numeric", LegacyType.NUMERICAL) + .add("nominal2", table.column("nominal")) + .addMetaData("nominal2", LegacyType.NOMINAL) + .add("studio date", table.column("date-time")) + .addMetaData("studio date", LegacyType.DATE) + .add("studio time", table.column("date-time")) + .addMetaData("studio time", LegacyType.TIME) + .removeMetaData("boolean", ColumnRole.class) + .addMetaData("boolean", LegacyType.BINOMINAL) + .addMetaData("boolean", ColumnRole.LABEL) + .build(ctx); + + IOTable ioTable = new IOTable(table); + ExampleSet view = TableViewCreator.INSTANCE.convertOnWriteView(ioTable, false); + + Attributes viewAttributes = view.getAttributes(); + List names = new ArrayList<>(); + for (Iterator attributeIterator = viewAttributes.allAttributes(); attributeIterator.hasNext(); ) { + Attribute next = attributeIterator.next(); + names.add(next.getName()); + } + + for (String name : names) { + Attribute attribute = viewAttributes.get(name); + AttributeRole role = viewAttributes.getRole(attribute); + DummyAttribute newAttribute = new DummyAttribute(attribute); + viewAttributes.remove(attribute); + if (role.isSpecial()) { + viewAttributes.setSpecialAttribute(newAttribute, role.getSpecialName()); + } else { + viewAttributes.addRegular(newAttribute); + } + } + + IOTable converted = BeltConverter.convert(view, CONTEXT); + + //loosing nanoseconds when reading via studio wrapper + DateTimeColumn studioDateTimeColumn = table.transform("date-time") + .applyObjectToDateTime(Instant.class, i -> i == null ? null : Instant.ofEpochMilli(i.toEpochMilli()), ctx).toColumn(); + DateTimeColumn studioDateColumn = table.transform("date-time") + .applyObjectToDateTime(Instant.class, i -> i == null ? null : Instant.ofEpochSecond(i.getEpochSecond()), ctx).toColumn(); + Table expected = Builders.newTableBuilder(table).replace("date-time", studioDateTimeColumn) + .replace("studio date", studioDateColumn) + .replace("studio time", studioDateTimeColumn).build(ctx); + + RapidAssert.assertEquals(new IOTable(expected), converted); + Assert.assertEquals(expected.getMetaData(), converted.getTable().getMetaData()); + } + + @Test + public void testEmpty() { + Table table = new Table(1234); + + IOTable ioTable = new IOTable(table); + ExampleSet view = TableViewCreator.INSTANCE.convertOnWriteView(ioTable, false); + + IOTable converted = BeltConverter.convert(view, CONTEXT); + + RapidAssert.assertEquals(new IOTable(table), converted); + } + + + private static class DummyAttribute extends AbstractAttribute { + + private final Attribute attribute; + + private DummyAttribute(Attribute attribute) { + super(attribute.getName(), attribute.getValueType()); + this.attribute = attribute; + } + + @Override + public Object clone() { + return attribute.clone(); + } + + @Override + public int getTableIndex() { + return attribute.getTableIndex(); + } + + @Override + public NominalMapping getMapping() { + return attribute.getMapping(); + } + + @Override + public void setMapping(NominalMapping nominalMapping) { + attribute.setMapping(nominalMapping); + } + + @Override + public boolean isNominal() { + return attribute.isNominal(); + } + + @Override + public boolean isNumerical() { + return attribute.isNumerical(); + } + + @Override + public boolean isDateTime() { + return attribute.isDateTime(); + } + + @Override + public String getAsString(double value, int digits, boolean quoteNominal) { + return attribute.getAsString(value, digits, quoteNominal); + } + } + + + private static Table getTable() { + TableBuilder builder = Builders.newTableBuilder(111); + builder.addNominal("nominal", i -> "value" + (i % 10)); + builder.addBoolean("boolean", i -> "val" + (i % 2), "val1"); + builder.addReal("real", i -> Math.random() > 0.7 ? Double.NaN : Math.random()); + builder.addInt53Bit("integer", i -> Math.random() > 0.7 ? Double.NaN : Math.random() * 1000); + builder.addTime("time", + i -> Math.random() > 0.7 ? null : LocalTime.of((int) (Math.random() * 24), (int) (Math.random() * 60), (int) (Math.random() * 60), (int) (Math.random() * 100000))); + builder.addDateTime("date-time", i -> Math.random() > 0.7 ? null : Instant.ofEpochSecond((long) (Math.random() * 1587727537), (long) (Math.random() * 999999999))); + builder.addTextset("textSet", i -> new StringSet(Arrays.asList("val" + i, "value" + i, "val" + (i - 1)))); + builder.addMetaData("boolean", ColumnRole.LABEL); + builder.addMetaData("real", ColumnRole.LABEL); + builder.addMetaData("time", new ColumnAnnotation("blablup")); + builder.addMetaData("real", new ColumnAnnotation("blablup")); + return builder.build(ContextAdapter.adapt(CONTEXT)); + } + + + private static byte[] serialize(Object obj) throws IOException { + ByteArrayOutputStream b = new ByteArrayOutputStream(); + ObjectOutputStream o = new ObjectOutputStream(b); + o.writeObject(obj); + return b.toByteArray(); + } + + private static Object deserialize(byte[] bytes) throws IOException, ClassNotFoundException { + ByteArrayInputStream b = new ByteArrayInputStream(bytes); + ObjectInputStream o = new ObjectInputStream(b); + return o.readObject(); + } +}