From b82191fc1a47684d48dd9200b807408fbbfad3c6 Mon Sep 17 00:00:00 2001 From: Andreas Timm Date: Tue, 20 Oct 2020 07:41:19 +0200 Subject: [PATCH] Release RapidMiner Belt Adapter 0.8 --- pom.xml | 6 +- .../belt/table/AbstractTableAccessor.java | 36 ++++- .../rapidminer/belt/table/BeltConverter.java | 32 ++++- .../table/ConvertOnWriteExampleTable.java | 16 ++- .../belt/table/DoubleTableWrapper.java | 7 +- .../belt/table/FromTableConverter.java | 41 +++++- .../belt/table/MixedTableAccessor.java | 23 +++- .../belt/table/NumericTableAccessor.java | 19 ++- .../belt/table/TableViewCreator.java | 22 +-- .../belt/table/BeltConverterTest.java | 62 ++++++++- .../table/ConvertOnWriteExampleTableTest.java | 128 ++++++++++++++++++ .../belt/table/TableViewCreatorTest.java | 99 ++++++++++++-- 12 files changed, 432 insertions(+), 59 deletions(-) diff --git a/pom.xml b/pom.xml index 152dd4c..047945f 100644 --- a/pom.xml +++ b/pom.xml @@ -4,7 +4,7 @@ 4.0.0 com.rapidminer belt-adapter - 0.7 + 0.8 jar belt-adapter @@ -46,12 +46,12 @@ com.rapidminer belt - 1.0.0-BETA5 + 1.0.0-BETA6 com.rapidminer.studio rapidminer-studio-core - 9.7.0-BETA3 + 9.7.2 diff --git a/src/main/java/com/rapidminer/belt/table/AbstractTableAccessor.java b/src/main/java/com/rapidminer/belt/table/AbstractTableAccessor.java index 32b6087..41c3d9e 100644 --- a/src/main/java/com/rapidminer/belt/table/AbstractTableAccessor.java +++ b/src/main/java/com/rapidminer/belt/table/AbstractTableAccessor.java @@ -18,6 +18,7 @@ */ package com.rapidminer.belt.table; +import java.util.ArrayList; import java.util.Arrays; import java.util.Iterator; import java.util.List; @@ -30,6 +31,7 @@ import com.rapidminer.example.Attributes; import com.rapidminer.operator.OperatorException; import com.rapidminer.operator.error.AttributeNotFoundError; +import com.rapidminer.tools.container.Triple; /** @@ -46,12 +48,23 @@ abstract class AbstractTableAccessor { private static final String EMPTY_STRING = ""; protected final Table table; - protected final List attributes; + private final List attributes; + private final int unusedAttributes; - - AbstractTableAccessor(Table table, List attributes) { + /** + * Creates a new accessor for a belt table. + * + * @param table + * the table to wrap + * @param attributes + * the attributes matching the table, can contain {@code null} for unused columns + * @param unusedAttributes + * the number of {@code null}s in the attributes + */ + AbstractTableAccessor(Table table, List attributes, int unusedAttributes) { this.attributes = attributes; this.table = table; + this.unusedAttributes = unusedAttributes; } /** @@ -174,12 +187,13 @@ Table getTable() { * * @param attributes * the used attributes - * @return a table with cleaned up columns + * @return a triple of a table with cleaned up columns, attributes adjusted accordingly and the number of unused attributes */ - protected Table columnCleanup(Attributes attributes) { + protected Triple, Integer> columnCleanup(Attributes attributes) { String[] labels = table.labelArray(); Column[] oldColumns = table.getColumns(); Column[] columns = Arrays.copyOf(oldColumns, oldColumns.length); + boolean[] usedIndices = new boolean[table.width()]; for (Iterator allIterator = attributes.allAttributes(); allIterator.hasNext(); ) { Attribute attribute = allIterator.next(); @@ -193,12 +207,22 @@ protected Table columnCleanup(Attributes attributes) { ColumnAccessor.get().newSingleValueCategoricalColumn(ColumnType.NOMINAL, EMPTY_STRING, table.height()); //replace unused columns by those which take minimal memory + int unused = 0; + List newAttributes = new ArrayList<>(this.attributes); for (int i = 0; i < columns.length; i++) { if (!usedIndices[i]) { columns[i] = emptySparseColumn; + newAttributes.set(i, null); + unused++; } } - return new Table(columns, labels, table.getMetaData()); + return new Triple<>(new Table(columns, labels, table.getMetaData()), newAttributes, unused); } + /** + * @return the number of cleaned up attributes that are not used anymore + */ + int getUnused(){ + return unusedAttributes; + } } \ No newline at end of file diff --git a/src/main/java/com/rapidminer/belt/table/BeltConverter.java b/src/main/java/com/rapidminer/belt/table/BeltConverter.java index e661f28..3826938 100644 --- a/src/main/java/com/rapidminer/belt/table/BeltConverter.java +++ b/src/main/java/com/rapidminer/belt/table/BeltConverter.java @@ -105,6 +105,20 @@ public ColumnType getType() { */ private static final String META_DATA_NAME = "meta_data"; + /** + * String into which {@link ColumnRole#INTERPRETATION} is converted + */ + static final String INTERPRETATION_NAME = "interpretation"; + + /** + * String into which {@link ColumnRole#ENCODING} is converted + */ + static final String ENCODING_NAME = "encoding"; + + /** + * String into which {@link ColumnRole#SOURCE} is converted + */ + static final String SOURCE_NAME = "source"; // Suppress default constructor for noninstantiability private BeltConverter() { @@ -209,6 +223,15 @@ public static String convertRole(Table table, String label) { case BATCH: convertedRole = Attributes.BATCH_NAME; break; + case SOURCE: + convertedRole = SOURCE_NAME; + break; + case ENCODING: + convertedRole = ENCODING_NAME; + break; + case INTERPRETATION: + convertedRole = INTERPRETATION_NAME; + break; default: convertedRole = null; break; @@ -281,8 +304,7 @@ public static boolean isTableWrapper(ExampleSet exampleSet) { /** * Converts belt roles to studio roles and adds them to the given Attributes. Duplicate roles will be made - * unique by - * adding an index to them. + * unique by adding an index to them. */ static void convertRoles(Table table, Attributes allAttributes) { // this map is used in case there are duplicate roles to get indices for the duplicate roles @@ -357,6 +379,12 @@ static ColumnRole convert(String studioRole) { return ColumnRole.WEIGHT; case Attributes.BATCH_NAME: return ColumnRole.BATCH; + case SOURCE_NAME: + return ColumnRole.SOURCE; + case ENCODING_NAME: + return ColumnRole.ENCODING; + case INTERPRETATION_NAME: + return ColumnRole.INTERPRETATION; default: if (withOutIndex.startsWith(Attributes.CONFIDENCE_NAME)) { return ColumnRole.SCORE; diff --git a/src/main/java/com/rapidminer/belt/table/ConvertOnWriteExampleTable.java b/src/main/java/com/rapidminer/belt/table/ConvertOnWriteExampleTable.java index 4af648e..afa6f1d 100644 --- a/src/main/java/com/rapidminer/belt/table/ConvertOnWriteExampleTable.java +++ b/src/main/java/com/rapidminer/belt/table/ConvertOnWriteExampleTable.java @@ -125,9 +125,9 @@ class ConvertOnWriteExampleTable implements CleanableExampleTable { */ ConvertOnWriteExampleTable(Table table, List attributeList, int numberOfDatetime) { if (numberOfDatetime > 0) { - tableAccessor = new MixedTableAccessor(table, attributeList, numberOfDatetime); + tableAccessor = new MixedTableAccessor(table, attributeList, numberOfDatetime, 0); } else { - tableAccessor = new NumericTableAccessor(table, attributeList); + tableAccessor = new NumericTableAccessor(table, attributeList, 0); } originalWidth = table.width(); height = table.height(); @@ -450,15 +450,16 @@ public int getNumberOfAttributes() { @Override public int getAttributeCount() { // store references so that they do not change in parallel + AbstractTableAccessor tableAccessorRef = this.tableAccessor; ColumnarExampleTable newColumnsRef = this.newColumns; ColumnarExampleTable convertedTableRef = this.convertedTable; if (convertedTableRef != null) { return convertedTableRef.getAttributeCount(); } if (newColumnsRef == null) { - return originalWidth; + return originalWidth - tableAccessorRef.getUnused(); } - return originalWidth + newColumnsRef.getAttributeCount(); + return originalWidth - tableAccessorRef.getUnused() + newColumnsRef.getAttributeCount(); } // the following 6 methods are the same as in {@link AbstractExampleTable} @@ -589,6 +590,13 @@ private void convert() { ColumnarExampleTable newColumnsRef = newColumns; if (newColumnsRef != null) { List dummyAttributes = new ArrayList<>(); + // add dummy attributes to prevent adding into the holes of the table accessor attributes, + // addAttribute fills holes first before adding at the end + for (int i = 0; i < tableAccessor.getUnused(); i++) { + Attribute dummy = AttributeFactory.createAttribute("", Ontology.NUMERICAL); + newConvertedTable.addAttribute(dummy); + dummyAttributes.add(dummy); + } for (Attribute attribute : newColumnsRef.getAttributes()) { if (attribute != null) { Attribute clone = (Attribute) attribute.clone(); diff --git a/src/main/java/com/rapidminer/belt/table/DoubleTableWrapper.java b/src/main/java/com/rapidminer/belt/table/DoubleTableWrapper.java index d78aabf..f822a24 100644 --- a/src/main/java/com/rapidminer/belt/table/DoubleTableWrapper.java +++ b/src/main/java/com/rapidminer/belt/table/DoubleTableWrapper.java @@ -19,6 +19,7 @@ package com.rapidminer.belt.table; import java.io.ObjectStreamException; +import java.util.ArrayList; import java.util.Iterator; import java.util.List; @@ -189,6 +190,7 @@ private Object writeReplace() throws ObjectStreamException { */ static HeaderExampleSet getShiftedHeader(Table table) { Attributes attributes = new SimpleAttributes(); + List orderedAttributes = new ArrayList<>(); List labels = table.labels(); int i = 0; for (String label : labels) { @@ -197,6 +199,7 @@ static HeaderExampleSet getShiftedHeader(Table table) { com.rapidminer.belt.table.BeltConverter.getValueType(table, label, i)); attribute.setTableIndex(i); attributes.add(new AttributeRole(attribute)); + orderedAttributes.add(attribute); if (attribute.isNominal()) { List mapping = ColumnAccessor.get().getDictionaryList(column.getDictionary()); attribute.setMapping(new ShiftedNominalMappingAdapter(mapping)); @@ -204,7 +207,9 @@ static HeaderExampleSet getShiftedHeader(Table table) { i++; } BeltConverter.convertRoles(table, attributes); - return new HeaderExampleSet(attributes); + HeaderExampleSet exampleSet = new HeaderExampleSet(attributes); + FromTableConverter.adjustAttributes((Attributes) attributes.clone(), orderedAttributes, exampleSet); + return exampleSet; } } \ No newline at end of file diff --git a/src/main/java/com/rapidminer/belt/table/FromTableConverter.java b/src/main/java/com/rapidminer/belt/table/FromTableConverter.java index e14c40a..77a4ed3 100644 --- a/src/main/java/com/rapidminer/belt/table/FromTableConverter.java +++ b/src/main/java/com/rapidminer/belt/table/FromTableConverter.java @@ -139,11 +139,34 @@ static ExampleSet convert(IOTable tableObject, ConcurrencyContext context) { } BeltConverter.convertRoles(table, set.getAttributes()); + //adjust attribute order so that it is kept instead of adding special attributes at the end + adjustAttributes((Attributes)set.getAttributes().clone(), attributes, set); set.getAnnotations().addAll(tableObject.getAnnotations()); set.setSource(tableObject.getSource()); return set; } + + /** + * in order to keep the order of the attributes and not have specials at the end we add them again in the order of + * the attributeList. + */ + static void adjustAttributes(Attributes attributes, List attributeList, ExampleSet set) { + Attributes orderedAttributes = set.getAttributes(); + orderedAttributes.clearRegular(); + orderedAttributes.clearSpecial(); + for (Attribute attribute : attributeList) { + AttributeRole role = attributes.getRole(attribute); + if (!role.isSpecial()) { + orderedAttributes.addRegular(attribute); + } else { + AttributeRole attributeRole = new AttributeRole(attribute); + attributeRole.setSpecial(role.getSpecialName()); + orderedAttributes.add(attributeRole); + } + } + } + /** * Converts a table object into an example set sequentially in case no operator is known. If possible, {@link * #convert(IOTable, ConcurrencyContext)} should be preferred. @@ -189,14 +212,29 @@ static ExampleSet convertSequentially(IOTable tableObject) { */ static ColumnarExampleTable convert(Table table, Attribute[] attributes) { List attributeList = Arrays.asList(attributes); + //replace nulls by dummy attributes + List dummyAttributes = new ArrayList<>(); + for (int i = 0; i < attributeList.size(); i++) { + if (attributeList.get(i) == null) { + Attribute dummy = AttributeFactory.createAttribute("", Ontology.NUMERICAL); + dummyAttributes.add(dummy); + attributeList.set(i, dummy); + } + } + ColumnarExampleTable columnarExampleTable = new ColumnarExampleTable(attributeList); columnarExampleTable.addBlankRows(table.height()); columnarExampleTable.setExpectedSize(table.height()); + + for (Attribute dummyAttribute : dummyAttributes) { + columnarExampleTable.removeAttribute(dummyAttribute); + } ExampleSet exampleSet = columnarExampleTable.createExampleSet(); // replace the same way as it is displayed in the view table = TableViewCreator.INSTANCE.replaceAdvancedWithErrorMessage(table, x -> TableViewCreator.CANNOT_DISPLAY_MESSAGE); convertSequentially(table, exampleSet); columnarExampleTable.complete(); + return columnarExampleTable; } @@ -215,9 +253,8 @@ private static Column removeGapsFromDictionary(Column column) { * Copies the data from the table into the set sequentially. */ private static void convertSequentially(Table table, ExampleSet set) { - int i = 0; for (Attribute attribute : set.getAttributes()) { - Column column = table.column(i++); + Column column = table.column(attribute.getTableIndex()); switch (attribute.getValueType()) { case Ontology.STRING: case Ontology.FILE_PATH: diff --git a/src/main/java/com/rapidminer/belt/table/MixedTableAccessor.java b/src/main/java/com/rapidminer/belt/table/MixedTableAccessor.java index 9034732..21d261c 100644 --- a/src/main/java/com/rapidminer/belt/table/MixedTableAccessor.java +++ b/src/main/java/com/rapidminer/belt/table/MixedTableAccessor.java @@ -30,6 +30,7 @@ import com.rapidminer.example.Attribute; import com.rapidminer.example.Attributes; import com.rapidminer.tools.container.Pair; +import com.rapidminer.tools.container.Triple; /** @@ -69,9 +70,20 @@ class MixedTableAccessor extends AbstractTableAccessor { */ private final int[] twist; - - MixedTableAccessor(Table table, List attributes, int numberOfDateTime) { - super(table, attributes); + /** + * Creates a new accessor for a belt table with date-time columns. + * + * @param table + * the table to wrap + * @param attributes + * the attributes matching the table, can contain {@code null} for unused columns + * @param numberOfDateTime + * the number of date-time columns + * @param unusedAttributes + * the number of {@code null}s in the attributes + */ + MixedTableAccessor(Table table, List attributes, int numberOfDateTime, int unusedAttributes) { + super(table, attributes, unusedAttributes); twist = new int[table.width()]; numericReadableColumns = new ArrayList<>(); dateTimeColumns = new ArrayList<>(); @@ -158,7 +170,8 @@ Object getUnbufferedReaders() { @Override public AbstractTableAccessor columnCleanupClone(Attributes attributes) { - Table newTable = columnCleanup(attributes); + Triple, Integer> cleaned = columnCleanup(attributes); + Table newTable = cleaned.getFirst(); // need to count remaining date-time columns to use constructor int dateTimeCount = 0; for (Column column : newTable.getColumns()) { @@ -166,7 +179,7 @@ public AbstractTableAccessor columnCleanupClone(Attributes attributes) { dateTimeCount++; } } - return new MixedTableAccessor(newTable, this.attributes, dateTimeCount); + return new MixedTableAccessor(newTable, cleaned.getSecond(), dateTimeCount, cleaned.getThird()); } /** diff --git a/src/main/java/com/rapidminer/belt/table/NumericTableAccessor.java b/src/main/java/com/rapidminer/belt/table/NumericTableAccessor.java index 84f38d3..2b33055 100644 --- a/src/main/java/com/rapidminer/belt/table/NumericTableAccessor.java +++ b/src/main/java/com/rapidminer/belt/table/NumericTableAccessor.java @@ -26,6 +26,7 @@ import com.rapidminer.belt.reader.SmallReaders; import com.rapidminer.example.Attribute; import com.rapidminer.example.Attributes; +import com.rapidminer.tools.container.Triple; /** @@ -49,9 +50,18 @@ class NumericTableAccessor extends AbstractTableAccessor { */ private final ThreadLocal> readersReference = new ThreadLocal<>(); - - NumericTableAccessor(Table table, List attributes) { - super(table, attributes); + /** + * Creates a new accessor for a belt table without date-time columns. + * + * @param table + * the table to wrap + * @param attributes + * the attributes matching the table, can contain {@code null} for unused columns + * @param unusedAttributes + * the number of {@code null}s in the attributes + */ + NumericTableAccessor(Table table, List attributes, int unusedAttributes) { + super(table, attributes, unusedAttributes); } @Override @@ -101,7 +111,8 @@ Object getUnbufferedReaders() { @Override public AbstractTableAccessor columnCleanupClone(Attributes attributes) { - return new NumericTableAccessor(columnCleanup(attributes), this.attributes); + Triple, Integer> cleaned = columnCleanup(attributes); + return new NumericTableAccessor(cleaned.getFirst(), cleaned.getSecond(), cleaned.getThird()); } } \ No newline at end of file diff --git a/src/main/java/com/rapidminer/belt/table/TableViewCreator.java b/src/main/java/com/rapidminer/belt/table/TableViewCreator.java index b2a19e4..dfa89af 100644 --- a/src/main/java/com/rapidminer/belt/table/TableViewCreator.java +++ b/src/main/java/com/rapidminer/belt/table/TableViewCreator.java @@ -147,7 +147,7 @@ public ExampleSet convertOnWriteView(IOTable ioTable, boolean throwOnAdvanced) { } convertRoles(table, attributes); ExampleSet set = new ConvertOnWriteExampleTable(table, attributeList, numberOfDatetime).createExampleSet(); - adjustAttributes(attributes, attributeList, set); + FromTableConverter.adjustAttributes(attributes, attributeList, set); set.getAnnotations().addAll(ioTable.getAnnotations()); set.setSource(ioTable.getSource()); storeBeltMetaDataInExampleSetUserData(table, set); @@ -270,24 +270,6 @@ private void setMapping(Column column, Attribute attribute) { } } - /** - * in order to keep the order of the attributes and not have specials at the end we add them again in the order of - * the attributeList. - */ - private void adjustAttributes(Attributes attributes, List attributeList, ExampleSet set) { - Attributes orderedAttributes = set.getAttributes(); - orderedAttributes.clearRegular(); - orderedAttributes.clearSpecial(); - for (Attribute attribute : attributeList) { - AttributeRole role = attributes.getRole(attribute); - if (!role.isSpecial()) { - orderedAttributes.addRegular(attribute); - } else { - AttributeRole attributeRole = new AttributeRole(attribute); - attributeRole.setSpecial(role.getSpecialName()); - orderedAttributes.add(attributeRole); - } - } - } + } \ No newline at end of file diff --git a/src/test/java/com/rapidminer/belt/table/BeltConverterTest.java b/src/test/java/com/rapidminer/belt/table/BeltConverterTest.java index 805e2ce..d765b72 100644 --- a/src/test/java/com/rapidminer/belt/table/BeltConverterTest.java +++ b/src/test/java/com/rapidminer/belt/table/BeltConverterTest.java @@ -30,6 +30,7 @@ import java.util.Arrays; import java.util.Collection; import java.util.Collections; +import java.util.Iterator; import java.util.List; import java.util.Locale; import java.util.Objects; @@ -548,6 +549,7 @@ public void testTypesView() { public void testRoles() { String[] roles = new String[]{Attributes.ID_NAME, Attributes.CONFIDENCE_NAME + "_" + "Yes", Attributes.LABEL_NAME, Attributes.PREDICTION_NAME, + BeltConverter.INTERPRETATION_NAME, BeltConverter.ENCODING_NAME, BeltConverter.SOURCE_NAME, Attributes.CLUSTER_NAME, Attributes.WEIGHT_NAME, Attributes.BATCH_NAME, Attributes.OUTLIER_NAME, Attributes.CONFIDENCE_NAME, Attributes.CLASSIFICATION_COST, "ignore-me", "confidence(yes)", "cluster_1_probability"}; @@ -567,6 +569,7 @@ public void testRoles() { .toArray(ColumnRole[]::new); ColumnRole[] expected = new ColumnRole[]{null, ColumnRole.ID, ColumnRole.SCORE, ColumnRole.LABEL, ColumnRole.PREDICTION, + ColumnRole.INTERPRETATION, ColumnRole.ENCODING, ColumnRole.SOURCE, ColumnRole.CLUSTER, ColumnRole.WEIGHT, ColumnRole.BATCH, ColumnRole.OUTLIER, ColumnRole .SCORE, ColumnRole.METADATA, ColumnRole.METADATA, ColumnRole.SCORE, ColumnRole.METADATA}; @@ -577,7 +580,7 @@ public void testRoles() { .toArray(com.rapidminer.belt.table.LegacyRole[]::new); com.rapidminer.belt.table.LegacyRole[] legacyExpected = new com.rapidminer.belt.table.LegacyRole[]{null, null, null, null, null, null, null, null, null, - null, + null, null, null, null, new LegacyRole(Attributes.CLASSIFICATION_COST), new LegacyRole("ignore-me"), new LegacyRole("confidence(yes)"), new LegacyRole("cluster_1_probability")}; assertArrayEquals(legacyExpected, legacyResult); @@ -587,7 +590,8 @@ public void testRoles() { .toArray(ColumnReference[]::new); ColumnReference[] referencesExpected = new ColumnReference[]{null, null, - new ColumnReference(set.getAttributes().getPredictedLabel().getName(), "Yes"), null, null, + new ColumnReference(set.getAttributes().getPredictedLabel().getName(), "Yes"), null, + null, null, null, null, null, null, null, null, new ColumnReference(set.getAttributes().getPredictedLabel().getName()), null, null, new ColumnReference(set.getAttributes().getPredictedLabel().getName()), null}; assertArrayEquals(referencesExpected, references); @@ -829,6 +833,7 @@ public void testRemoveAndRenameAttribute() { public void testRolesView() { String[] roles = new String[]{Attributes.ID_NAME, Attributes.CONFIDENCE_NAME + "_" + "Yes", Attributes.LABEL_NAME, Attributes.PREDICTION_NAME, + BeltConverter.INTERPRETATION_NAME, BeltConverter.ENCODING_NAME, BeltConverter.SOURCE_NAME, Attributes.CLUSTER_NAME, Attributes.WEIGHT_NAME, Attributes.BATCH_NAME, Attributes.OUTLIER_NAME, Attributes.CONFIDENCE_NAME, Attributes.CLASSIFICATION_COST, "ignore-me"}; @@ -848,6 +853,7 @@ public void testRolesView() { .toArray(ColumnRole[]::new); ColumnRole[] expected = new ColumnRole[]{null, ColumnRole.ID, ColumnRole.SCORE, ColumnRole.LABEL, ColumnRole.PREDICTION, + ColumnRole.INTERPRETATION, ColumnRole.ENCODING, ColumnRole.SOURCE, ColumnRole.CLUSTER, ColumnRole.WEIGHT, ColumnRole.BATCH, ColumnRole.OUTLIER, ColumnRole.SCORE, ColumnRole.METADATA, ColumnRole.METADATA}; @@ -858,7 +864,7 @@ public void testRolesView() { .toArray(com.rapidminer.belt.table.LegacyRole[]::new); com.rapidminer.belt.table.LegacyRole[] legacyExpected = new com.rapidminer.belt.table.LegacyRole[]{null, null, null, null, null, null, null, null, null, - null, + null, null, null, null, new com.rapidminer.belt.table.LegacyRole(Attributes.CLASSIFICATION_COST), new com.rapidminer.belt.table.LegacyRole("ignore-me")}; assertArrayEquals(legacyExpected, legacyResult); @@ -868,7 +874,8 @@ public void testRolesView() { .toArray(ColumnReference[]::new); ColumnReference[] referencesExpected = new ColumnReference[]{null, null, - new ColumnReference(set.getAttributes().getPredictedLabel().getName(), "Yes"), null, null, + new ColumnReference(set.getAttributes().getPredictedLabel().getName(), "Yes"), null, + null, null, null, null, null, null, null, null, new ColumnReference(set.getAttributes().getPredictedLabel().getName()), null, null}; assertArrayEquals(referencesExpected, references); @@ -1386,6 +1393,7 @@ public void testAllTypes() { Table table = com.rapidminer.belt.table.BeltConverter.convert(set, CONTEXT).getTable(); ExampleSet backSet = com.rapidminer.belt.table.BeltConverter.convert(new IOTable(table), CONTEXT); RapidAssert.assertEquals(set, backSet); + assertAttributeOrder(set, backSet); } @Test @@ -1400,6 +1408,7 @@ public void testAllTypesView() { Table table = com.rapidminer.belt.table.BeltConverter.convert(set, CONTEXT).getTable(); ExampleSet backSet = com.rapidminer.belt.table.BeltConverter.convert(new IOTable(table), CONTEXT); RapidAssert.assertEquals(set, backSet); + assertAttributeOrder(set, backSet); } @Test @@ -1424,6 +1433,7 @@ public void testRoles() { Table table = com.rapidminer.belt.table.BeltConverter.convert(set, CONTEXT).getTable(); ExampleSet backSet = com.rapidminer.belt.table.BeltConverter.convert(new IOTable(table), CONTEXT); RapidAssert.assertEquals(set, backSet); + assertAttributeOrder(set, backSet); } @Test @@ -1450,6 +1460,7 @@ public void testNumericTypes() { Table table = com.rapidminer.belt.table.BeltConverter.convert(set, CONTEXT).getTable(); ExampleSet backSet = com.rapidminer.belt.table.BeltConverter.convert(new IOTable(table), CONTEXT); RapidAssert.assertEquals(set, backSet); + assertAttributeOrder(set, backSet); } @Test @@ -1488,6 +1499,7 @@ public void testNominalTypes() { Table table = com.rapidminer.belt.table.BeltConverter.convert(set, CONTEXT).getTable(); ExampleSet backSet = com.rapidminer.belt.table.BeltConverter.convert(new IOTable(table), CONTEXT); RapidAssert.assertEquals(set, backSet); + assertAttributeOrder(set, backSet); } @Test @@ -1506,9 +1518,51 @@ public void testIncompleteBinominalTypes() { Table table = com.rapidminer.belt.table.BeltConverter.convert(set, CONTEXT).getTable(); ExampleSet backSet = com.rapidminer.belt.table.BeltConverter.convert(new IOTable(table), CONTEXT); RapidAssert.assertEquals(set, backSet); + assertAttributeOrder(set, backSet); } + @Test + public void testAttributeOrder() { + List attributes = new ArrayList<>(); + for (int i = 1; i < Ontology.VALUE_TYPE_NAMES.length; i++) { + attributes.add(AttributeFactory.createAttribute(i)); + } + ExampleSet set = ExampleSets.from(attributes) + .build(); + //reoder attributes and include specials + set.getAttributes().setSpecialAttribute(attributes.get(2), Attributes.LABEL_NAME); + set.getAttributes().setSpecialAttribute(attributes.get(1), Attributes.CLUSTER_NAME); + set.getAttributes().remove(attributes.get(0)); + set.getAttributes().addRegular(attributes.get(0)); + set.getAttributes().remove(attributes.get(4)); + set.getAttributes().addRegular(attributes.get(4)); + set.getAttributes().remove(attributes.get(6)); + set.getAttributes().addRegular(attributes.get(6)); + set.getAttributes().remove(attributes.get(5)); + set.getAttributes().addRegular(attributes.get(5)); + + Table table = com.rapidminer.belt.table.BeltConverter.convert(set, CONTEXT).getTable(); + ExampleSet backSet = com.rapidminer.belt.table.BeltConverter.convert(new IOTable(table), CONTEXT); + RapidAssert.assertEquals(set, backSet); + + assertAttributeOrder(set, backSet); + } + + } + /** + * Asserts that the order of the attributes is the same. + */ + static void assertAttributeOrder(ExampleSet expected, ExampleSet actual) { + RapidAssert.assertEquals(getOrderedAttributeNames(expected), getOrderedAttributeNames(actual)); + } + + private static List getOrderedAttributeNames(ExampleSet exampleSet) { + List names = new ArrayList<>(); + for (Iterator it = exampleSet.getAttributes().allAttributes(); it.hasNext(); ) { + names.add(it.next().getName()); + } + return names; } @RunWith(Parameterized.class) diff --git a/src/test/java/com/rapidminer/belt/table/ConvertOnWriteExampleTableTest.java b/src/test/java/com/rapidminer/belt/table/ConvertOnWriteExampleTableTest.java index 100691e..57a7519 100644 --- a/src/test/java/com/rapidminer/belt/table/ConvertOnWriteExampleTableTest.java +++ b/src/test/java/com/rapidminer/belt/table/ConvertOnWriteExampleTableTest.java @@ -850,6 +850,134 @@ public void testSerializationAfterSetExisting() throws IOException, ClassNotFoun RapidAssert.assertEquals(set, (ExampleSet) deserialized); } + @Test + public void testCleanupAndConvert() { + Attribute numeric = AttributeFactory.createAttribute("numeric", Ontology.NUMERICAL); + Attribute real = AttributeFactory.createAttribute("real", Ontology.REAL); + Attribute integer = AttributeFactory.createAttribute("integer", Ontology.INTEGER); + Attribute dateTime = AttributeFactory.createAttribute("date_time", Ontology.DATE_TIME); + Attribute date = AttributeFactory.createAttribute("date", Ontology.DATE); + Attribute time = AttributeFactory.createAttribute("time", Ontology.TIME); + List attributes = Arrays.asList(numeric, real, dateTime, date, time, integer); + ExampleSet set = ExampleSets.from(attributes).withBlankSize(150) + .withColumnFiller(numeric, i -> Math.random() > 0.7 ? Double.NaN : Math.random()) + .withColumnFiller(real, i -> Math.random() > 0.7 ? Double.NaN : 42 + Math.random()) + .withColumnFiller(integer, i -> Math.random() > 0.7 ? Double.NaN : Math.round(Math.random() * 100)) + .withColumnFiller(dateTime, + i -> Math.random() > 0.7 ? Double.NaN : 1515410698d + Math.floor(Math.random() * 1000)) + .withColumnFiller(date, i -> Math.random() > 0.7 ? Double.NaN : + 230169600000d + Math.floor(Math.random() * 100) * 1000d * 60 * 60 * 24) + .withColumnFiller(time, + i -> Math.random() > 0.7 ? Double.NaN : Math.floor(Math.random() * 60 * 60 * 24 * 1000)) + .withRole(numeric, Attributes.LABEL_NAME) + .build(); + + IOTable table = BeltConverter.convert(set, CONTEXT); + ExampleSet view = TableViewCreator.INSTANCE.convertOnWriteView(table, true); + set.getAttributes().remove(set.getAttributes().get("real")); + set.getAttributes().remove(set.getAttributes().get("date")); + view.getAttributes().remove(view.getAttributes().get("real")); + view.getAttributes().remove(view.getAttributes().get("date")); + set.cleanup(); + view.cleanup(); + set.getExampleTable().removeAttribute(set.getAttributes().get("integer")); + view.getExampleTable().removeAttribute(view.getAttributes().get("integer")); + + RapidAssert.assertEquals(set, view); + assertEquals(set.getExampleTable().getAttributeCount(), view.getExampleTable().getAttributeCount()); + } + + @Test + public void testCleanupAndConvertWithAdditional() { + Attribute numeric = AttributeFactory.createAttribute("numeric", Ontology.NUMERICAL); + Attribute real = AttributeFactory.createAttribute("real", Ontology.REAL); + Attribute integer = AttributeFactory.createAttribute("integer", Ontology.INTEGER); + Attribute dateTime = AttributeFactory.createAttribute("date_time", Ontology.DATE_TIME); + Attribute date = AttributeFactory.createAttribute("date", Ontology.DATE); + Attribute time = AttributeFactory.createAttribute("time", Ontology.TIME); + List attributes = Arrays.asList(numeric, real, dateTime, date, time, integer); + ExampleSet set = ExampleSets.from(attributes).withBlankSize(150) + .withColumnFiller(numeric, i -> Math.random() > 0.7 ? Double.NaN : Math.random()) + .withColumnFiller(real, i -> Math.random() > 0.7 ? Double.NaN : 42 + Math.random()) + .withColumnFiller(integer, i -> Math.random() > 0.7 ? Double.NaN : Math.round(Math.random() * 100)) + .withColumnFiller(dateTime, + i -> Math.random() > 0.7 ? Double.NaN : 1515410698d + Math.floor(Math.random() * 1000)) + .withColumnFiller(date, i -> Math.random() > 0.7 ? Double.NaN : + 230169600000d + Math.floor(Math.random() * 100) * 1000d * 60 * 60 * 24) + .withColumnFiller(time, + i -> Math.random() > 0.7 ? Double.NaN : Math.floor(Math.random() * 60 * 60 * 24 * 1000)) + .withRole(numeric, Attributes.LABEL_NAME) + .build(); + + IOTable table = BeltConverter.convert(set, CONTEXT); + ExampleSet view = TableViewCreator.INSTANCE.convertOnWriteView(table, true); + + set.getAttributes().remove(set.getAttributes().get("real")); + Attribute numeric2 = AttributeFactory.createAttribute("numeric2", Ontology.NUMERICAL); + set.getExampleTable().addAttribute(numeric2); + set.getAttributes().addRegular(numeric2); + Attribute integer2 = AttributeFactory.createAttribute("integer2", Ontology.INTEGER); + set.getExampleTable().addAttribute(integer2); + set.getAttributes().addRegular(integer2); + set.getAttributes().remove(numeric2); + + + view.getAttributes().remove(view.getAttributes().get("real")); + numeric2 = AttributeFactory.createAttribute("numeric2", Ontology.NUMERICAL); + view.getExampleTable().addAttribute(numeric2); + view.getAttributes().addRegular(numeric2); + integer2 = AttributeFactory.createAttribute("integer2", Ontology.INTEGER); + view.getExampleTable().addAttribute(integer2); + view.getAttributes().addRegular(integer2); + view.getAttributes().remove(numeric2); + + set.cleanup(); + view.cleanup(); + + set.getExampleTable().removeAttribute(set.getAttributes().get("integer")); + view.getExampleTable().removeAttribute(view.getAttributes().get("integer")); + + RapidAssert.assertEquals(set, view); + assertEquals(set.getExampleTable().getAttributeCount(), view.getExampleTable().getAttributeCount()); + } + + @Test + public void testCleanupAndConvertNominal() throws OperatorException { + Attribute nominal = AttributeFactory.createAttribute("nominal", Ontology.NOMINAL); + Attribute polynominal = AttributeFactory.createAttribute("polynominal", Ontology.POLYNOMINAL); + Attribute binominal = AttributeFactory.createAttribute("binominal", Ontology.BINOMINAL); + for (int i = 0; i < 5; i++) { + nominal.getMapping().mapString("nominalValue" + i); + } + for (int i = 0; i < 6; i++) { + polynominal.getMapping().mapString("polyValue" + i); + } + for (int i = 0; i < 2; i++) { + binominal.getMapping().mapString("binominalValue" + i); + } + List attributes = Arrays.asList(nominal, polynominal, binominal); + Random random = new Random(); + ExampleSet set = ExampleSets.from(attributes).withBlankSize(150) + .withColumnFiller(nominal, i -> random.nextDouble() > 0.7 ? Double.NaN : random.nextInt(5)) + .withColumnFiller(polynominal, i -> random.nextDouble() > 0.7 ? Double.NaN : random.nextInt(6)) + .withColumnFiller(binominal, i -> random.nextDouble() > 0.7 ? Double.NaN : random.nextInt(2)) + .build(); + + IOTable table = BeltConverter.convert(set, CONTEXT); + ExampleSet view = TableViewCreator.INSTANCE.convertOnWriteView(table, true); + Attribute viewPolynominal = view.getExampleTable().findAttribute("polynominal"); + set.getAttributes().remove(set.getAttributes().get("polynominal")); + view.getAttributes().remove(view.getAttributes().get("polynominal")); + set.cleanup(); + view.cleanup(); + set.getExampleTable().removeAttribute(set.getAttributes().get("binominal")); + view.getExampleTable().removeAttribute(set.getAttributes().get("binominal")); + + RapidAssert.assertEquals(set, view); + assertEquals(-1, viewPolynominal.getMapping().getIndex("")); + assertEquals(set.getExampleTable().getAttributeCount(), view.getExampleTable().getAttributeCount()); + } + } public static class Concurrency { diff --git a/src/test/java/com/rapidminer/belt/table/TableViewCreatorTest.java b/src/test/java/com/rapidminer/belt/table/TableViewCreatorTest.java index aa5d660..d04b565 100644 --- a/src/test/java/com/rapidminer/belt/table/TableViewCreatorTest.java +++ b/src/test/java/com/rapidminer/belt/table/TableViewCreatorTest.java @@ -18,6 +18,7 @@ */ package com.rapidminer.belt.table; +import static com.rapidminer.belt.table.BeltConverterTest.assertAttributeOrder; import static org.junit.Assert.assertArrayEquals; import static org.junit.Assert.assertEquals; import static org.junit.Assert.assertFalse; @@ -195,7 +196,9 @@ public void testNumericTypes() { Table table = com.rapidminer.belt.table.BeltConverter.convert(set, CONTEXT).getTable(); - RapidAssert.assertEquals(set, com.rapidminer.belt.table.TableViewCreator.INSTANCE.createView(table)); + ExampleSet view = TableViewCreator.INSTANCE.createView(table); + RapidAssert.assertEquals(set, view); + assertAttributeOrder(set, view); } @Test @@ -222,7 +225,9 @@ public void testNumericAndDateTypes() { Table table = com.rapidminer.belt.table.BeltConverter.convert(set, CONTEXT).getTable(); - RapidAssert.assertEquals(set, com.rapidminer.belt.table.TableViewCreator.INSTANCE.createView(table)); + ExampleSet view = TableViewCreator.INSTANCE.createView(table); + RapidAssert.assertEquals(set, view); + assertAttributeOrder(set, view); } @Test @@ -435,7 +440,9 @@ public void testSerialization() throws IOException, ClassNotFoundException { byte[] serialized = serialize(com.rapidminer.belt.table.TableViewCreator.INSTANCE.createView(table)); Object deserialized = deserialize(serialized); - RapidAssert.assertEquals(set, (ExampleSet) deserialized); + ExampleSet deserializedES = (ExampleSet) deserialized; + RapidAssert.assertEquals(set, deserializedES); + assertAttributeOrder(set, deserializedES); } @@ -465,7 +472,9 @@ public void testSerializationDate() throws IOException, ClassNotFoundException { byte[] serialized = serialize(com.rapidminer.belt.table.TableViewCreator.INSTANCE.createView(table)); Object deserialized = deserialize(serialized); - RapidAssert.assertEquals(set, (ExampleSet) deserialized); + ExampleSet deserializedES = (ExampleSet) deserialized; + RapidAssert.assertEquals(set, deserializedES); + assertAttributeOrder(set, deserializedES); } @Test @@ -514,7 +523,9 @@ public void testSerializationGaps() throws IOException, ClassNotFoundException { byte[] serialized = serialize(set); Object deserialized = deserialize(serialized); - RapidAssert.assertEquals(set, (ExampleSet) deserialized); + ExampleSet deserializedES = (ExampleSet) deserialized; + RapidAssert.assertEquals(set, deserializedES); + assertAttributeOrder(set, deserializedES); } @Test @@ -572,6 +583,7 @@ public void testClone() { ExampleSet view = com.rapidminer.belt.table.TableViewCreator.INSTANCE.createView(table); ExampleSet clone = (ExampleSet) view.clone(); RapidAssert.assertEquals(view, clone); + assertAttributeOrder(view, clone); } @Test @@ -586,6 +598,7 @@ public void testCloneDate() { ExampleSet view = TableViewCreator.INSTANCE.createView(table); ExampleSet clone = (ExampleSet) view.clone(); RapidAssert.assertEquals(view, clone); + assertAttributeOrder(view, clone); } @Test(expected = BeltConverter.ConversionException.class) @@ -614,6 +627,34 @@ public void testReplaceAdvancedColumns() { replaced.column("textset").fill(message, 0); assertEquals("Error: Cannot display advanced column of Column type Text-Set", message[0]); } + + @Test + public void testAttributeOrder() { + List attributes = new ArrayList<>(); + for (int i = 1; i < Ontology.VALUE_TYPE_NAMES.length; i++) { + attributes.add(AttributeFactory.createAttribute(i)); + } + ExampleSet set = ExampleSets.from(attributes) + .build(); + //reoder attributes and include specials + set.getAttributes().setSpecialAttribute(attributes.get(2), Attributes.LABEL_NAME); + set.getAttributes().setSpecialAttribute(attributes.get(1), Attributes.CLUSTER_NAME); + set.getAttributes().remove(attributes.get(0)); + set.getAttributes().addRegular(attributes.get(0)); + set.getAttributes().remove(attributes.get(4)); + set.getAttributes().addRegular(attributes.get(4)); + set.getAttributes().remove(attributes.get(6)); + set.getAttributes().addRegular(attributes.get(6)); + set.getAttributes().remove(attributes.get(5)); + set.getAttributes().addRegular(attributes.get(5)); + + Table table = BeltConverter.convert(set, CONTEXT).getTable(); + + ExampleSet backSet = TableViewCreator.INSTANCE.createView(table); + RapidAssert.assertEquals(set, backSet); + + assertAttributeOrder(set, backSet); + } } public static class ExampleTableView { @@ -660,6 +701,7 @@ public void testNominalTypes() { ExampleSet view = com.rapidminer.belt.table.TableViewCreator.INSTANCE.convertOnWriteView(table, true); RapidAssert.assertEquals(set, view); + assertAttributeOrder(set, view); } @Test @@ -679,6 +721,7 @@ public void testNumericTypes() { ExampleSet view = com.rapidminer.belt.table.TableViewCreator.INSTANCE.convertOnWriteView(table, true); RapidAssert.assertEquals(set, view); + assertAttributeOrder(set, view); } @Test @@ -707,6 +750,7 @@ public void testNumericAndDateTypes() { ExampleSet view = com.rapidminer.belt.table.TableViewCreator.INSTANCE.convertOnWriteView(table, true); RapidAssert.assertEquals(set, view); + assertAttributeOrder(set, view); } @Test @@ -895,7 +939,9 @@ public void testSerialization() throws IOException, ClassNotFoundException { byte[] serialized = serialize(TableViewCreator.INSTANCE.convertOnWriteView(table, true)); Object deserialized = deserialize(serialized); - RapidAssert.assertEquals(set, (ExampleSet) deserialized); + ExampleSet deserializedES = (ExampleSet) deserialized; + RapidAssert.assertEquals(set, deserializedES); + assertAttributeOrder(set, deserializedES); } @@ -925,7 +971,9 @@ public void testSerializationDate() throws IOException, ClassNotFoundException { byte[] serialized = serialize(TableViewCreator.INSTANCE.convertOnWriteView(table, true)); Object deserialized = deserialize(serialized); - RapidAssert.assertEquals(set, (ExampleSet) deserialized); + ExampleSet deserializedES = (ExampleSet) deserialized; + RapidAssert.assertEquals(set, deserializedES); + assertAttributeOrder(set, deserializedES); } @Test @@ -974,7 +1022,9 @@ public void testSerializationGaps() throws IOException, ClassNotFoundException { byte[] serialized = serialize(set); Object deserialized = deserialize(serialized); - RapidAssert.assertEquals(set, (ExampleSet) deserialized); + ExampleSet deserializedES = (ExampleSet) deserialized; + RapidAssert.assertEquals(set, deserializedES); + assertAttributeOrder(set, deserializedES); } @Test @@ -987,6 +1037,7 @@ public void testClone() { ExampleSet view = com.rapidminer.belt.table.TableViewCreator.INSTANCE.convertOnWriteView(table, true); ExampleSet clone = (ExampleSet) view.clone(); RapidAssert.assertEquals(view, clone); + assertAttributeOrder(view, clone); } @Test @@ -1001,6 +1052,7 @@ public void testCloneDate() { ExampleSet view = TableViewCreator.INSTANCE.convertOnWriteView(table, false); ExampleSet clone = (ExampleSet) view.clone(); RapidAssert.assertEquals(view, clone); + assertAttributeOrder(view, clone); } @Test(expected = BeltConverter.ConversionException.class) @@ -1074,6 +1126,7 @@ public void testColumnCleanupNoDate() { view.cleanup(); RapidAssert.assertEquals(set, view); + assertAttributeOrder(set, view); } @Test @@ -1112,6 +1165,7 @@ public void testColumnCleanupWithDate() { view.cleanup(); RapidAssert.assertEquals(set, view); + assertAttributeOrder(set, view); } @Test @@ -1152,6 +1206,35 @@ public void testColumnCleanupWithDateNoDateAfter() { view.cleanup(); RapidAssert.assertEquals(set, view); + assertAttributeOrder(set, view); + } + + @Test + public void testAttributeOrder() { + List attributes = new ArrayList<>(); + for (int i = 1; i < Ontology.VALUE_TYPE_NAMES.length; i++) { + attributes.add(AttributeFactory.createAttribute(i)); + } + ExampleSet set = ExampleSets.from(attributes) + .build(); + //reoder attributes and include specials + set.getAttributes().setSpecialAttribute(attributes.get(2), Attributes.LABEL_NAME); + set.getAttributes().setSpecialAttribute(attributes.get(1), Attributes.CLUSTER_NAME); + set.getAttributes().remove(attributes.get(0)); + set.getAttributes().addRegular(attributes.get(0)); + set.getAttributes().remove(attributes.get(4)); + set.getAttributes().addRegular(attributes.get(4)); + set.getAttributes().remove(attributes.get(6)); + set.getAttributes().addRegular(attributes.get(6)); + set.getAttributes().remove(attributes.get(5)); + set.getAttributes().addRegular(attributes.get(5)); + + IOTable table = BeltConverter.convert(set, CONTEXT); + + ExampleSet backSet = TableViewCreator.INSTANCE.convertOnWriteView(table, true); + RapidAssert.assertEquals(set, backSet); + + assertAttributeOrder(set, backSet); } }