-
Notifications
You must be signed in to change notification settings - Fork 0
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
- Loading branch information
Gisa Meier
committed
Jun 3, 2020
1 parent
39a3d96
commit a700823
Showing
22 changed files
with
8,116 additions
and
3,199 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
204 changes: 204 additions & 0 deletions
204
src/main/java/com/rapidminer/belt/table/AbstractTableAccessor.java
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,204 @@ | ||
/** | ||
* Copyright (C) 2001-2020 by RapidMiner and the contributors | ||
* | ||
* Complete list of developers available at our web site: | ||
* | ||
* http://rapidminer.com | ||
* | ||
* This program is free software: you can redistribute it and/or modify it under the terms of the | ||
* GNU Affero General Public License as published by the Free Software Foundation, either version 3 | ||
* of the License, or (at your option) any later version. | ||
* | ||
* This program is distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY; without | ||
* even the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU | ||
* Affero General Public License for more details. | ||
* | ||
* You should have received a copy of the GNU Affero General Public License along with this program. | ||
* If not, see http://www.gnu.org/licenses/. | ||
*/ | ||
package com.rapidminer.belt.table; | ||
|
||
import java.util.Arrays; | ||
import java.util.Iterator; | ||
import java.util.List; | ||
|
||
import com.rapidminer.belt.column.CategoricalColumn; | ||
import com.rapidminer.belt.column.Column; | ||
import com.rapidminer.belt.column.ColumnType; | ||
import com.rapidminer.belt.reader.NumericReader; | ||
import com.rapidminer.example.Attribute; | ||
import com.rapidminer.example.Attributes; | ||
import com.rapidminer.operator.OperatorException; | ||
import com.rapidminer.operator.error.AttributeNotFoundError; | ||
|
||
|
||
/** | ||
* Common code for {@link MixedTableAccessor} and {@link NumericTableAccessor} that allows to access the belt {@link | ||
* Table} together with a list of attributes as needed by the {@link ConvertOnWriteExampleTable}. Consists mainly of the | ||
* attribute handling copied from {@link com.rapidminer.example.table.AbstractExampleTable}. Additionally, there are | ||
* methods to create a reader object and access a data point given a reader object. | ||
* | ||
* @author Gisa Meier | ||
* @since 0.7 | ||
*/ | ||
abstract class AbstractTableAccessor { | ||
|
||
private static final String EMPTY_STRING = ""; | ||
|
||
protected final Table table; | ||
protected final List<Attribute> attributes; | ||
|
||
|
||
AbstractTableAccessor(Table table, List<Attribute> attributes) { | ||
this.attributes = attributes; | ||
this.table = table; | ||
} | ||
|
||
/** | ||
* Get readers for the case that the whole table is going to be read. | ||
* | ||
* @return readers to use in an iterator over all rows | ||
*/ | ||
abstract Object getReaders(); | ||
|
||
/** | ||
* Get readers for that one row is read without an iterator over all rows. | ||
* | ||
* @return readers to use for reading a single row | ||
*/ | ||
abstract Object getUnbufferedReaders(); | ||
|
||
/** | ||
* Reads the value at (rowIndex, columnIndex) using the reader object. The reader object is a parameter here so | ||
* that | ||
* it can be cached. | ||
* | ||
* @param rowIndex | ||
* the index of the row to read | ||
* @param columnIndex | ||
* the index of the column to read | ||
* @param readerObject | ||
* the reader object to use for reading | ||
* @return the value at the specified position | ||
*/ | ||
abstract double get(int rowIndex, int columnIndex, Object readerObject); | ||
|
||
|
||
// The following 3 methods are copied from {@link AbstractExampleTable} | ||
|
||
/** | ||
* @return the attributes as a new array | ||
*/ | ||
Attribute[] getAttributes() { | ||
Attribute[] attribute = new Attribute[attributes.size()]; | ||
attributes.toArray(attribute); | ||
return attribute; | ||
} | ||
|
||
/** | ||
* Returns the attribute of the column number {@code i}. | ||
* | ||
* @param i | ||
* the column index | ||
* @return the attribute with the given index | ||
*/ | ||
Attribute getAttribute(int i) { | ||
return attributes.get(i); | ||
} | ||
|
||
/** | ||
* Returns the attribute with the given name. | ||
*/ | ||
Attribute findAttribute(String name) throws OperatorException { | ||
if (name == null) { | ||
return null; | ||
} | ||
for (Attribute att : attributes) { | ||
if (att != null && att.getName().equals(name)) { | ||
return att; | ||
} | ||
} | ||
throw new AttributeNotFoundError(null, null, name); | ||
} | ||
|
||
/** | ||
* Gets the numeric value given a row and column index and a numeric reader. Shared code used in {@link | ||
* NumericTableAccessor} and {@link MixedTableAccessor}. | ||
* | ||
* @param rowIndex | ||
* the row index | ||
* @param columnIndex | ||
* the column index | ||
* @param reader | ||
* the reader to use | ||
* @return the value at the specified position | ||
*/ | ||
protected double getNumericValue(int rowIndex, int columnIndex, NumericReader reader) { | ||
// always return {@code 0} for advanced columns | ||
if (reader == null) { | ||
return 0; | ||
} | ||
// set the position only if not already at the right position | ||
if (reader.position() != rowIndex - 1) { | ||
reader.setPosition(rowIndex - 1); | ||
} | ||
// need to subtract {@code 1} in case of nominal attributes because of the shifted mapping in belt | ||
Attribute attribute = getAttribute(columnIndex); | ||
if (attribute.isNominal()) { | ||
return reader.read() - 1; | ||
} else { | ||
return reader.read(); | ||
} | ||
} | ||
|
||
/** | ||
* @return the underlying {@link Table} | ||
*/ | ||
Table getTable() { | ||
return table; | ||
} | ||
|
||
/** | ||
* Creates a copy of the {@link AbstractTableAccessor} where the unused columns have been replaced by dummy columns | ||
* with minimal memory consumption. | ||
* | ||
* @param attributes | ||
* the used attributes | ||
* @return an accessor with cleaned up columns | ||
*/ | ||
abstract AbstractTableAccessor columnCleanupClone(Attributes attributes); | ||
|
||
/** | ||
* Creates a copy of the underlying {@link Table} where the unused columns have been replaced by dummy columns with | ||
* minimal memory consumption. | ||
* | ||
* @param attributes | ||
* the used attributes | ||
* @return a table with cleaned up columns | ||
*/ | ||
protected Table columnCleanup(Attributes attributes) { | ||
String[] labels = table.labelArray(); | ||
Column[] oldColumns = table.getColumns(); | ||
Column[] columns = Arrays.copyOf(oldColumns, oldColumns.length); | ||
boolean[] usedIndices = new boolean[table.width()]; | ||
for (Iterator<Attribute> allIterator = attributes.allAttributes(); allIterator.hasNext(); ) { | ||
Attribute attribute = allIterator.next(); | ||
int tableIndex = attribute.getTableIndex(); | ||
if (tableIndex < usedIndices.length) { | ||
usedIndices[tableIndex] = true; | ||
} | ||
} | ||
//column taking minimal memory | ||
CategoricalColumn emptySparseColumn = | ||
ColumnAccessor.get().newSingleValueCategoricalColumn(ColumnType.NOMINAL, EMPTY_STRING, | ||
table.height()); | ||
//replace unused columns by those which take minimal memory | ||
for (int i = 0; i < columns.length; i++) { | ||
if (!usedIndices[i]) { | ||
columns[i] = emptySparseColumn; | ||
} | ||
} | ||
return new Table(columns, labels, table.getMetaData()); | ||
} | ||
|
||
} |
Oops, something went wrong.