Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Auto Sort labels bug and other label handling issues #5630

Open
wants to merge 1 commit into
base: development
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
113 changes: 80 additions & 33 deletions CommonData/column.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -17,28 +17,43 @@ void Column::setAutoSortByValuesByDefault(bool autoSort)
_autoSortByValuesByDefault = autoSort;
}

Column::Column(DataSet * data, int id)
Column::Column(DataSet * data, int id, columnType colType, computedColumnType computedType, bool autoSort)
: DataSetBaseNode(dataSetBaseNodeType::column, data->dataNode()),
_data( data),
_id( id),
_emptyValues( new EmptyValues(data->emptyValues())),
_doubleDummy( new Label(this)),
_autoSortByValue( _autoSortByValuesByDefault)
_type( colType),
_codeType( computedType),
_autoSortByValue( autoSort)
{}

Column::~Column()

Column* Column::addColumn(DataSet * data, int index, const std::string& name, columnType colType, computedColumnType computedType, bool alterDataSetTable)
{
labelsTempReset();
delete _emptyValues;
delete _doubleDummy;
int id = data->db().columnInsert(data->id(), index, colType, computedType, Column::autoSortByValuesByDefault());
Column* col = new Column(data, id, colType, computedType, Column::autoSortByValuesByDefault());

if (!name.empty())
col->setName(name);

return col;
}

void Column::dbCreate(int index)
Column* Column::loadColumn(DataSet * data, int index)
{
JASPTIMER_SCOPE(Column::dbCreate);
Column* col = new Column(data, index, columnType::unknown, computedColumnType::notComputed, Column::autoSortByValuesByDefault());
col->dbLoadIndex(index, false);

assert(_id == -1);
db().columnInsert(_id, index);
return col;
}


Column::~Column()
{
labelsTempReset();
delete _emptyValues;
delete _doubleDummy;
}

void Column::dbLoad(int id, bool getValues)
Expand Down Expand Up @@ -401,18 +416,17 @@ columnTypeChangeResult Column::changeType(columnType colType)
if(codeType() == computedColumnType::analysis)
return columnTypeChangeResult::generatedFromAnalysis;

setDefaultValues(colType);
if (colType != columnType::unknown)
setType(colType);
setDefaultValues();
invalidate();
return columnTypeChangeResult::changed;
}
}

void Column::setDefaultValues(enum columnType columnType)
void Column::setDefaultValues()
{
JASPTIMER_SCOPE(Column::setDefaultValues);

if(columnType != columnType::unknown)
setType(columnType);

for(size_t i=0; i<_ints.size(); i++)
{
Expand Down Expand Up @@ -1638,7 +1652,7 @@ void Column::labelsOrderByValue(bool doDbUpdateEtc)
bool replaceAllDoubles = false;
static double dummy;

for(Label * label : labels())
for(Label * label : labels())
if(!label->isEmptyValue() && !(label->originalValue().isDouble() || ColumnUtils::getDoubleValue(label->originalValueAsString(), dummy)))
{
replaceAllDoubles = true;
Expand All @@ -1648,24 +1662,43 @@ void Column::labelsOrderByValue(bool doDbUpdateEtc)
if(replaceAllDoubles)
replaceDoublesTillLabelsRowWithLabels(labelsTempCount());

doublevec asc = valuesNumericOrdered();
size_t curMax = asc.size()+1;
std::map<double, int> orderMap;

for(size_t i=0; i<asc.size(); i++)
orderMap[asc[i]] = i;

//and now to write them back into the data
for(Label * label : _labels)
doublevec asc = valuesNumericOrdered();

if (asc.empty())
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

So with this change it either orders the numeric ones or the non-numerics if there are no numerics?

Why not order both?

{
double aValue = EmptyValues::missingValueDouble;

if(label->originalValue().isDouble())
aValue = label->originalValue().asDouble();
else
ColumnUtils::getDoubleValue(label->originalValueAsString(), aValue);

label->setOrder(!std::isnan(aValue) ? orderMap[aValue] : curMax++);
stringvec orderedstrings = valuesAlphabeticOrdered();
size_t curMax = orderedstrings.size()+1;
std::map<std::string, int> orderMap;

for(size_t i=0; i<orderedstrings.size(); i++)
orderMap[orderedstrings[i]] = i;

for(Label * label : _labels)
{
std::string aValue = label->originalValueAsString();
label->setOrder(!isEmptyValue(aValue) ? orderMap[aValue] : curMax++);
}
}
else
{
size_t curMax = asc.size()+1;
std::map<double, int> orderMap;

for(size_t i=0; i<asc.size(); i++)
orderMap[asc[i]] = i;

//and now to write them back into the data
for(Label * label : _labels)
{
double aValue = EmptyValues::missingValueDouble;

if(label->originalValue().isDouble())
aValue = label->originalValue().asDouble();
else
ColumnUtils::getDoubleValue(label->originalValueAsString(), aValue);

label->setOrder(!std::isnan(aValue) ? orderMap[aValue] : curMax++);
}
}

_sortLabelsByOrder();
Expand Down Expand Up @@ -1694,6 +1727,20 @@ doublevec Column::valuesNumericOrdered()
return doublevec(values.begin(), values.end());
}

stringvec Column::valuesAlphabeticOrdered()
{
stringset values;

for(const Label * label : _labels)
{
std::string aValue = label->originalValueAsString();
if (!isEmptyValue(aValue))
values.insert(aValue);
}

return stringvec(values.begin(), values.end());
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

You are implicitly sorting it by pushing it into a stringvec? or what?

}

void Column::valuesReverse()
{
JASPTIMER_SCOPE(Column::valuesReverse);
Expand Down
14 changes: 9 additions & 5 deletions CommonData/column.h
Original file line number Diff line number Diff line change
Expand Up @@ -40,14 +40,17 @@ class Analysis;
/// It also handles storing the information of computed columns (those used to be split off)
class Column : public DataSetBaseNode
{
public:
Column(DataSet * data, int id = -1);
private:
Column(DataSet * data, int id, columnType colType, computedColumnType computedType, bool autoSort);

public:
static Column * addColumn(DataSet* data, int index = -1, const std::string & name = "", columnType colType = columnType::scale, computedColumnType computedType = computedColumnType::notComputed, bool alterDataSetTable = true);
static Column * loadColumn(DataSet* data, int index);
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

This function obviously should be on DataSet...

~Column();

DatabaseInterface & db();
const DatabaseInterface & db() const;

void dbCreate( int index);
void dbLoad( int id=-1, bool getValues = true); ///< Loads *and* reloads from DB!
void dbLoadIndex(int index, bool getValues = true);
void dbUpdateComputedColumnStuff();
Expand All @@ -71,7 +74,7 @@ class Column : public DataSetBaseNode
void setInvalidated( bool invalidated );
void setForceType( bool force );
void setCompColStuff( bool invalidated, bool forceSourceColType, computedColumnType codeType, const std::string & rCode, const std::string & error, const Json::Value & constructorJson);
void setDefaultValues( enum columnType columnType = columnType::unknown);
void setDefaultValues();

bool setAsNominalOrOrdinal( const intvec & values, bool is_ordinal = false);
bool setAsNominalOrOrdinal( const intvec & values, intstrmap uniqueValues, bool is_ordinal = false);
Expand Down Expand Up @@ -238,7 +241,8 @@ class Column : public DataSetBaseNode
columnTypeChangeResult _changeColumnToScale();
void _convertVectorIntToDouble(intvec & intValues, doublevec & doubleValues);
void _resetLabelValueMap();
doublevec valuesNumericOrdered();
doublevec valuesNumericOrdered();
stringvec valuesAlphabeticOrdered();

private:
DataSet * const _data;
Expand Down
15 changes: 9 additions & 6 deletions CommonData/databaseinterface.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -392,11 +392,11 @@ void DatabaseInterface::filterWrite(int filterIndex, const std::vector<bool> & v
transactionWriteEnd();
}

int DatabaseInterface::columnInsert(int dataSetId, int index, const std::string & name, columnType colType, bool alterTable)
int DatabaseInterface::columnInsert(int dataSetId, int index, columnType colType, computedColumnType computedType, bool autoSort, bool alterTable)
{
JASPTIMER_SCOPE(DatabaseInterface::columnInsert);
transactionWriteBegin();

if(index == -1) index = columnLastFreeIndex(dataSetId);
else columnIndexIncrements(dataSetId, index);

Expand All @@ -405,14 +405,17 @@ int DatabaseInterface::columnInsert(int dataSetId, int index, const std::string
#endif

//Create column entry
int columnId = runStatementsId("INSERT INTO Columns (dataSet, name, columnType, colIdx, analysisId) VALUES (?, ?, ?, ?, -1) RETURNING id;", [&](sqlite3_stmt * stmt)
int columnId = runStatementsId("INSERT INTO Columns (dataSet, columnType, codeType, autoSortByValue, colIdx, analysisId) VALUES (?, ?, ?, ?, ?, -1) RETURNING id;", [&](sqlite3_stmt * stmt)
{
sqlite3_bind_int(stmt, 1, dataSetId);
sqlite3_bind_text(stmt, 2, name.c_str(), name.length(), SQLITE_TRANSIENT);

std::string colT = columnTypeToString(colType);
sqlite3_bind_text(stmt, 3, colT.c_str(), colT.length(), SQLITE_TRANSIENT);
sqlite3_bind_int(stmt, 4, index);
std::string codeT = computedColumnTypeToString(computedType);

sqlite3_bind_text(stmt, 2, colT.c_str(), colT.length(), SQLITE_TRANSIENT);
sqlite3_bind_text(stmt, 3, codeT.c_str(), codeT.length(), SQLITE_TRANSIENT);
sqlite3_bind_int(stmt, 4, autoSort);
sqlite3_bind_int(stmt, 5, index);
});

#ifdef SIR_LOG_A_LOT
Expand Down
2 changes: 1 addition & 1 deletion CommonData/databaseinterface.h
Original file line number Diff line number Diff line change
Expand Up @@ -110,7 +110,7 @@ class DatabaseInterface

//Columns & Data/Values
//Index stuff:
int columnInsert( int dataSetId, int index = -1, const std::string & name = "", columnType colType = columnType::unknown, bool alterTable=true); ///< Insert a row into Columns and create the corresponding columns in DataSet_? Also makes sure the indices are correct
int columnInsert( int dataSetId, int index, columnType colType, computedColumnType computedType, bool autoSort, bool alterTable=true); ///< Insert a row into Columns and create the corresponding columns in DataSet_? Also makes sure the indices are correct
int columnLastFreeIndex( int dataSetId);
void columnIndexIncrements( int dataSetId, int index); ///< If index already is in use that column and all after are incremented by 1
void columnIndexDecrements( int dataSetId, int index); ///< Indices bigger than index are decremented, assumption is that the previous one using it has been removed already
Expand Down
17 changes: 9 additions & 8 deletions CommonData/dataset.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -172,25 +172,26 @@ void DataSet::removeColumn(const std::string & name)
}
}

void DataSet::insertColumn(size_t index, bool alterDataSetTable)
Column* DataSet::insertColumn(size_t index, bool alterDataSetTable, const std::string & name, columnType colType, computedColumnType computedType)
{

assert(_dataSetID > 0);

Column * newColumn = new Column(this, db().columnInsert(_dataSetID, index, "", columnType::unknown, alterDataSetTable));
Column * newColumn = Column::addColumn(this, index, name, colType, computedType, alterDataSetTable);

_columns.insert(_columns.begin()+index, newColumn);

newColumn->setRowCount(_rowCount);
newColumn->setDefaultValues();

incRevision();

return newColumn;
}

Column * DataSet::newColumn(const std::string &name)
{
assert(_dataSetID > 0);
Column * col = new Column(this, db().columnInsert(_dataSetID, -1, name));
col->setName(name);
Column * col = Column::addColumn(this, -1, name);

_columns.push_back(col);

Expand Down Expand Up @@ -281,9 +282,9 @@ void DataSet::dbLoad(int index, std::function<void(float)> progressCallback, boo
for(size_t i=0; i<colCount; i++)
{
if(_columns.size() == i)
_columns.push_back(new Column(this));

_columns[i]->dbLoadIndex(i, false);
_columns.push_back(Column::loadColumn(this, i));
else
_columns[i]->dbLoadIndex(i, false);

progressCallback(0.2 + (i * colProgressMult * 0.3)); //should end at 0.5
}
Expand Down
2 changes: 1 addition & 1 deletion CommonData/dataset.h
Original file line number Diff line number Diff line change
Expand Up @@ -44,7 +44,7 @@ class DataSet : public DataSetBaseNode
void removeColumn( const std::string & name );
void removeColumn( size_t index );
void removeColumnById( size_t id );
void insertColumn( size_t index, bool alterDataSetTable = true);
Column * insertColumn(size_t index, bool alterDataSetTable = true, const std::string & name = "", columnType colType = columnType::unknown, computedColumnType computedType = computedColumnType::notComputed);
Column * newColumn( const std::string & name);
int getColumnIndex( const std::string & name ) const;
int columnIndex( const Column * col ) const;
Expand Down
Loading
Loading