Skip to content

Commit

Permalink
Release RapidMiner Belt Adapter 0.8
Browse files Browse the repository at this point in the history
  • Loading branch information
Andreas Timm committed Oct 20, 2020
1 parent a700823 commit b82191f
Show file tree
Hide file tree
Showing 12 changed files with 432 additions and 59 deletions.
6 changes: 3 additions & 3 deletions pom.xml
Original file line number Diff line number Diff line change
Expand Up @@ -4,7 +4,7 @@
<modelVersion>4.0.0</modelVersion>
<groupId>com.rapidminer</groupId>
<artifactId>belt-adapter</artifactId>
<version>0.7</version>
<version>0.8</version>
<packaging>jar</packaging>
<name>belt-adapter</name>
<properties>
Expand Down Expand Up @@ -46,12 +46,12 @@
<dependency>
<groupId>com.rapidminer</groupId>
<artifactId>belt</artifactId>
<version>1.0.0-BETA5</version>
<version>1.0.0-BETA6</version>
</dependency>
<dependency>
<groupId>com.rapidminer.studio</groupId>
<artifactId>rapidminer-studio-core</artifactId>
<version>9.7.0-BETA3</version>
<version>9.7.2</version>
</dependency>
</dependencies>
<profiles>
Expand Down
36 changes: 30 additions & 6 deletions src/main/java/com/rapidminer/belt/table/AbstractTableAccessor.java
Original file line number Diff line number Diff line change
Expand Up @@ -18,6 +18,7 @@
*/
package com.rapidminer.belt.table;

import java.util.ArrayList;
import java.util.Arrays;
import java.util.Iterator;
import java.util.List;
Expand All @@ -30,6 +31,7 @@
import com.rapidminer.example.Attributes;
import com.rapidminer.operator.OperatorException;
import com.rapidminer.operator.error.AttributeNotFoundError;
import com.rapidminer.tools.container.Triple;


/**
Expand All @@ -46,12 +48,23 @@ abstract class AbstractTableAccessor {
private static final String EMPTY_STRING = "";

protected final Table table;
protected final List<Attribute> attributes;
private final List<Attribute> attributes;
private final int unusedAttributes;


AbstractTableAccessor(Table table, List<Attribute> attributes) {
/**
* Creates a new accessor for a belt table.
*
* @param table
* the table to wrap
* @param attributes
* the attributes matching the table, can contain {@code null} for unused columns
* @param unusedAttributes
* the number of {@code null}s in the attributes
*/
AbstractTableAccessor(Table table, List<Attribute> attributes, int unusedAttributes) {
this.attributes = attributes;
this.table = table;
this.unusedAttributes = unusedAttributes;
}

/**
Expand Down Expand Up @@ -174,12 +187,13 @@ Table getTable() {
*
* @param attributes
* the used attributes
* @return a table with cleaned up columns
* @return a triple of a table with cleaned up columns, attributes adjusted accordingly and the number of unused attributes
*/
protected Table columnCleanup(Attributes attributes) {
protected Triple<Table, List<Attribute>, Integer> columnCleanup(Attributes attributes) {
String[] labels = table.labelArray();
Column[] oldColumns = table.getColumns();
Column[] columns = Arrays.copyOf(oldColumns, oldColumns.length);

boolean[] usedIndices = new boolean[table.width()];
for (Iterator<Attribute> allIterator = attributes.allAttributes(); allIterator.hasNext(); ) {
Attribute attribute = allIterator.next();
Expand All @@ -193,12 +207,22 @@ protected Table columnCleanup(Attributes attributes) {
ColumnAccessor.get().newSingleValueCategoricalColumn(ColumnType.NOMINAL, EMPTY_STRING,
table.height());
//replace unused columns by those which take minimal memory
int unused = 0;
List<Attribute> newAttributes = new ArrayList<>(this.attributes);
for (int i = 0; i < columns.length; i++) {
if (!usedIndices[i]) {
columns[i] = emptySparseColumn;
newAttributes.set(i, null);
unused++;
}
}
return new Table(columns, labels, table.getMetaData());
return new Triple<>(new Table(columns, labels, table.getMetaData()), newAttributes, unused);
}

/**
* @return the number of cleaned up attributes that are not used anymore
*/
int getUnused(){
return unusedAttributes;
}
}
32 changes: 30 additions & 2 deletions src/main/java/com/rapidminer/belt/table/BeltConverter.java
Original file line number Diff line number Diff line change
Expand Up @@ -105,6 +105,20 @@ public ColumnType<?> getType() {
*/
private static final String META_DATA_NAME = "meta_data";

/**
* String into which {@link ColumnRole#INTERPRETATION} is converted
*/
static final String INTERPRETATION_NAME = "interpretation";

/**
* String into which {@link ColumnRole#ENCODING} is converted
*/
static final String ENCODING_NAME = "encoding";

/**
* String into which {@link ColumnRole#SOURCE} is converted
*/
static final String SOURCE_NAME = "source";

// Suppress default constructor for noninstantiability
private BeltConverter() {
Expand Down Expand Up @@ -209,6 +223,15 @@ public static String convertRole(Table table, String label) {
case BATCH:
convertedRole = Attributes.BATCH_NAME;
break;
case SOURCE:
convertedRole = SOURCE_NAME;
break;
case ENCODING:
convertedRole = ENCODING_NAME;
break;
case INTERPRETATION:
convertedRole = INTERPRETATION_NAME;
break;
default:
convertedRole = null;
break;
Expand Down Expand Up @@ -281,8 +304,7 @@ public static boolean isTableWrapper(ExampleSet exampleSet) {

/**
* Converts belt roles to studio roles and adds them to the given Attributes. Duplicate roles will be made
* unique by
* adding an index to them.
* unique by adding an index to them.
*/
static void convertRoles(Table table, Attributes allAttributes) {
// this map is used in case there are duplicate roles to get indices for the duplicate roles
Expand Down Expand Up @@ -357,6 +379,12 @@ static ColumnRole convert(String studioRole) {
return ColumnRole.WEIGHT;
case Attributes.BATCH_NAME:
return ColumnRole.BATCH;
case SOURCE_NAME:
return ColumnRole.SOURCE;
case ENCODING_NAME:
return ColumnRole.ENCODING;
case INTERPRETATION_NAME:
return ColumnRole.INTERPRETATION;
default:
if (withOutIndex.startsWith(Attributes.CONFIDENCE_NAME)) {
return ColumnRole.SCORE;
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -125,9 +125,9 @@ class ConvertOnWriteExampleTable implements CleanableExampleTable {
*/
ConvertOnWriteExampleTable(Table table, List<Attribute> attributeList, int numberOfDatetime) {
if (numberOfDatetime > 0) {
tableAccessor = new MixedTableAccessor(table, attributeList, numberOfDatetime);
tableAccessor = new MixedTableAccessor(table, attributeList, numberOfDatetime, 0);
} else {
tableAccessor = new NumericTableAccessor(table, attributeList);
tableAccessor = new NumericTableAccessor(table, attributeList, 0);
}
originalWidth = table.width();
height = table.height();
Expand Down Expand Up @@ -450,15 +450,16 @@ public int getNumberOfAttributes() {
@Override
public int getAttributeCount() {
// store references so that they do not change in parallel
AbstractTableAccessor tableAccessorRef = this.tableAccessor;
ColumnarExampleTable newColumnsRef = this.newColumns;
ColumnarExampleTable convertedTableRef = this.convertedTable;
if (convertedTableRef != null) {
return convertedTableRef.getAttributeCount();
}
if (newColumnsRef == null) {
return originalWidth;
return originalWidth - tableAccessorRef.getUnused();
}
return originalWidth + newColumnsRef.getAttributeCount();
return originalWidth - tableAccessorRef.getUnused() + newColumnsRef.getAttributeCount();
}

// the following 6 methods are the same as in {@link AbstractExampleTable}
Expand Down Expand Up @@ -589,6 +590,13 @@ private void convert() {
ColumnarExampleTable newColumnsRef = newColumns;
if (newColumnsRef != null) {
List<Attribute> dummyAttributes = new ArrayList<>();
// add dummy attributes to prevent adding into the holes of the table accessor attributes,
// addAttribute fills holes first before adding at the end
for (int i = 0; i < tableAccessor.getUnused(); i++) {
Attribute dummy = AttributeFactory.createAttribute("", Ontology.NUMERICAL);
newConvertedTable.addAttribute(dummy);
dummyAttributes.add(dummy);
}
for (Attribute attribute : newColumnsRef.getAttributes()) {
if (attribute != null) {
Attribute clone = (Attribute) attribute.clone();
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -19,6 +19,7 @@
package com.rapidminer.belt.table;

import java.io.ObjectStreamException;
import java.util.ArrayList;
import java.util.Iterator;
import java.util.List;

Expand Down Expand Up @@ -189,6 +190,7 @@ private Object writeReplace() throws ObjectStreamException {
*/
static HeaderExampleSet getShiftedHeader(Table table) {
Attributes attributes = new SimpleAttributes();
List<Attribute> orderedAttributes = new ArrayList<>();
List<String> labels = table.labels();
int i = 0;
for (String label : labels) {
Expand All @@ -197,14 +199,17 @@ static HeaderExampleSet getShiftedHeader(Table table) {
com.rapidminer.belt.table.BeltConverter.getValueType(table, label, i));
attribute.setTableIndex(i);
attributes.add(new AttributeRole(attribute));
orderedAttributes.add(attribute);
if (attribute.isNominal()) {
List<String> mapping = ColumnAccessor.get().getDictionaryList(column.getDictionary());
attribute.setMapping(new ShiftedNominalMappingAdapter(mapping));
}
i++;
}
BeltConverter.convertRoles(table, attributes);
return new HeaderExampleSet(attributes);
HeaderExampleSet exampleSet = new HeaderExampleSet(attributes);
FromTableConverter.adjustAttributes((Attributes) attributes.clone(), orderedAttributes, exampleSet);
return exampleSet;
}

}
41 changes: 39 additions & 2 deletions src/main/java/com/rapidminer/belt/table/FromTableConverter.java
Original file line number Diff line number Diff line change
Expand Up @@ -139,11 +139,34 @@ static ExampleSet convert(IOTable tableObject, ConcurrencyContext context) {
}

BeltConverter.convertRoles(table, set.getAttributes());
//adjust attribute order so that it is kept instead of adding special attributes at the end
adjustAttributes((Attributes)set.getAttributes().clone(), attributes, set);
set.getAnnotations().addAll(tableObject.getAnnotations());
set.setSource(tableObject.getSource());
return set;
}


/**
* in order to keep the order of the attributes and not have specials at the end we add them again in the order of
* the attributeList.
*/
static void adjustAttributes(Attributes attributes, List<Attribute> attributeList, ExampleSet set) {
Attributes orderedAttributes = set.getAttributes();
orderedAttributes.clearRegular();
orderedAttributes.clearSpecial();
for (Attribute attribute : attributeList) {
AttributeRole role = attributes.getRole(attribute);
if (!role.isSpecial()) {
orderedAttributes.addRegular(attribute);
} else {
AttributeRole attributeRole = new AttributeRole(attribute);
attributeRole.setSpecial(role.getSpecialName());
orderedAttributes.add(attributeRole);
}
}
}

/**
* Converts a table object into an example set sequentially in case no operator is known. If possible, {@link
* #convert(IOTable, ConcurrencyContext)} should be preferred.
Expand Down Expand Up @@ -189,14 +212,29 @@ static ExampleSet convertSequentially(IOTable tableObject) {
*/
static ColumnarExampleTable convert(Table table, Attribute[] attributes) {
List<Attribute> attributeList = Arrays.asList(attributes);
//replace nulls by dummy attributes
List<Attribute> dummyAttributes = new ArrayList<>();
for (int i = 0; i < attributeList.size(); i++) {
if (attributeList.get(i) == null) {
Attribute dummy = AttributeFactory.createAttribute("", Ontology.NUMERICAL);
dummyAttributes.add(dummy);
attributeList.set(i, dummy);
}
}

ColumnarExampleTable columnarExampleTable = new ColumnarExampleTable(attributeList);
columnarExampleTable.addBlankRows(table.height());
columnarExampleTable.setExpectedSize(table.height());

for (Attribute dummyAttribute : dummyAttributes) {
columnarExampleTable.removeAttribute(dummyAttribute);
}
ExampleSet exampleSet = columnarExampleTable.createExampleSet();
// replace the same way as it is displayed in the view
table = TableViewCreator.INSTANCE.replaceAdvancedWithErrorMessage(table, x -> TableViewCreator.CANNOT_DISPLAY_MESSAGE);
convertSequentially(table, exampleSet);
columnarExampleTable.complete();

return columnarExampleTable;
}

Expand All @@ -215,9 +253,8 @@ private static Column removeGapsFromDictionary(Column column) {
* Copies the data from the table into the set sequentially.
*/
private static void convertSequentially(Table table, ExampleSet set) {
int i = 0;
for (Attribute attribute : set.getAttributes()) {
Column column = table.column(i++);
Column column = table.column(attribute.getTableIndex());
switch (attribute.getValueType()) {
case Ontology.STRING:
case Ontology.FILE_PATH:
Expand Down
23 changes: 18 additions & 5 deletions src/main/java/com/rapidminer/belt/table/MixedTableAccessor.java
Original file line number Diff line number Diff line change
Expand Up @@ -30,6 +30,7 @@
import com.rapidminer.example.Attribute;
import com.rapidminer.example.Attributes;
import com.rapidminer.tools.container.Pair;
import com.rapidminer.tools.container.Triple;


/**
Expand Down Expand Up @@ -69,9 +70,20 @@ class MixedTableAccessor extends AbstractTableAccessor {
*/
private final int[] twist;


MixedTableAccessor(Table table, List<Attribute> attributes, int numberOfDateTime) {
super(table, attributes);
/**
* Creates a new accessor for a belt table with date-time columns.
*
* @param table
* the table to wrap
* @param attributes
* the attributes matching the table, can contain {@code null} for unused columns
* @param numberOfDateTime
* the number of date-time columns
* @param unusedAttributes
* the number of {@code null}s in the attributes
*/
MixedTableAccessor(Table table, List<Attribute> attributes, int numberOfDateTime, int unusedAttributes) {
super(table, attributes, unusedAttributes);
twist = new int[table.width()];
numericReadableColumns = new ArrayList<>();
dateTimeColumns = new ArrayList<>();
Expand Down Expand Up @@ -158,15 +170,16 @@ Object getUnbufferedReaders() {

@Override
public AbstractTableAccessor columnCleanupClone(Attributes attributes) {
Table newTable = columnCleanup(attributes);
Triple<Table, List<Attribute>, Integer> cleaned = columnCleanup(attributes);
Table newTable = cleaned.getFirst();
// need to count remaining date-time columns to use constructor
int dateTimeCount = 0;
for (Column column : newTable.getColumns()) {
if (column.type().id() == Column.TypeId.DATE_TIME) {
dateTimeCount++;
}
}
return new MixedTableAccessor(newTable, this.attributes, dateTimeCount);
return new MixedTableAccessor(newTable, cleaned.getSecond(), dateTimeCount, cleaned.getThird());
}

/**
Expand Down
Loading

0 comments on commit b82191f

Please sign in to comment.