Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

[WIP] [standalone] Dimension reduction standalone module, and a few dimension reduction changes #998

Draft
wants to merge 7 commits into
base: dev
Choose a base branch
from
Draft
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
27 changes: 27 additions & 0 deletions core/base/common/CommandLineParser.h
Original file line number Diff line number Diff line change
Expand Up @@ -162,6 +162,10 @@ namespace ttk {
arguments_[j].doubleValueList_->size() + 1);
s >> arguments_[j].doubleValueList_->back();
arguments_[j].isSet_ = true;
} else if(arguments_[j].boolValue_) {
std::stringstream s(argv[i + 1]);
s >> *(arguments_[j].boolValue_);
arguments_[j].isSet_ = true;
}
}
} else {
Expand Down Expand Up @@ -257,6 +261,12 @@ namespace ttk {
s += " ";
}
}
} else if(arguments_[i].boolValue_) {
if(!arguments_[i].isSet_) {
s += "(not set)";
} else {
s += std::to_string(*(arguments_[i].boolValue_));
}
}

printMsg(s, debug::Priority::INFO, debug::LineMode::NEW, o);
Expand Down Expand Up @@ -310,6 +320,23 @@ namespace ttk {
return 0;
}

inline int setArgument(const std::string &key,
bool *value,
const std::string &description = "",
const bool &optional = false) {

if(!value)
return -1;

arguments_.resize(arguments_.size() + 1);
arguments_.back().isOptional_ = optional;
arguments_.back().key_ = key;
arguments_.back().description_ = description;
arguments_.back().boolValue_ = value;
arguments_.back().isAnOption_ = false;

return 0;
}
int setArgument(const std::string &key,
double *value,
const std::string &description = "",
Expand Down
16 changes: 13 additions & 3 deletions core/base/dimensionReduction/DimensionReduction.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -15,15 +15,25 @@
using namespace std;
using namespace ttk;

DimensionReduction::~DimensionReduction()
{
#ifdef TTK_ENABLE_SCIKIT_LEARN
// If Python was not initialized by a still alive object.
if (!this->wasPythonInitialized_) {
Py_Finalize();
}
#endif
}



DimensionReduction::DimensionReduction() {
this->setDebugMsgPrefix("DimensionReduction");

#ifdef TTK_ENABLE_SCIKIT_LEARN
auto finalize_callback = []() { Py_Finalize(); };

if(!Py_IsInitialized()) {
this->wasPythonInitialized_ = false;
Py_Initialize();
atexit(finalize_callback);
}

const char *version = Py_GetVersion();
Expand Down
12 changes: 9 additions & 3 deletions core/base/dimensionReduction/DimensionReduction.h
Original file line number Diff line number Diff line change
Expand Up @@ -56,6 +56,8 @@ namespace ttk {
public:
DimensionReduction();

~DimensionReduction();

/** Scikit-Learn Dimension Reduction algorithms */
enum class METHOD {
/** Spectral Embedding */
Expand Down Expand Up @@ -209,6 +211,7 @@ namespace ttk {
this->se_Affinity = "precomputed";
this->mds_Dissimilarity = "precomputed";
this->tsne_Metric = "precomputed";
this->tsne_Init = "random";
this->iso_Metric = "precomputed";
} else {
this->se_Affinity = "nearest_neighbors";
Expand Down Expand Up @@ -258,7 +261,7 @@ namespace ttk {
int tsne_MaxIterationProgress{300};
float tsne_GradientThreshold{1e-7};
std::string tsne_Metric{"euclidean"};
std::string tsne_Init{"random"};
std::string tsne_Init{"pca"};
int tsne_Verbose{0};
std::string tsne_Method{"barnes_hut"};
float tsne_Angle{0.5};
Expand All @@ -279,8 +282,8 @@ namespace ttk {
std::string pca_MaxIteration{"auto"};

// TopoMap
size_t topomap_AngularSampleNb;
bool topomap_CheckMST;
size_t topomap_AngularSampleNb{2};
bool topomap_CheckMST{false};
TopoMap::STRATEGY topomap_Strategy{TopoMap::STRATEGY::KRUSKAL};

// testing
Expand All @@ -294,5 +297,8 @@ namespace ttk {
int IsDeterministic{true};
char majorVersion_{'0'};
bool IsInputADistanceMatrix{false};

private:
bool wasPythonInitialized_{true};
};
} // namespace ttk
2 changes: 1 addition & 1 deletion core/base/topoMap/TopoMap.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -23,7 +23,7 @@ bool computeConvexHull_aux(const std::vector<double> &coords,
}

// Qhull gives us the coordinates of the points in the convex hull. Here we
// retrive the indices of this points in the list we provided. We will also
// retrieve the indices of this points in the list we provided. We will also
// compute the barycenter of the points in the convex hull.
for(const auto &u : qhull.vertexList()) {
const orgQhull::QhullPoint &qhullPt = u.point();
Expand Down
25 changes: 21 additions & 4 deletions paraview/xmls/DimensionReduction.xml
Original file line number Diff line number Diff line change
Expand Up @@ -295,6 +295,7 @@ IEEE Transactions on Visualization and Computer Graphics 27(2): 561-571, 2020.
panel_visibility="advanced">
<DoubleRangeDomain name="range" min="0.0" max="1.0" />
<Documentation>
Kernel coefficient for rbf kernel. Only used if the affinity is set to rbf.
</Documentation>
</DoubleVectorProperty>

Expand Down Expand Up @@ -348,6 +349,7 @@ IEEE Transactions on Visualization and Computer Graphics 27(2): 561-571, 2020.
panel_visibility="advanced">
<DoubleRangeDomain name="range" min="0.0" max="1.0" />
<Documentation>
Not used if the solver is set to dense.
</Documentation>
</DoubleVectorProperty>

Expand Down Expand Up @@ -435,6 +437,7 @@ IEEE Transactions on Visualization and Computer Graphics 27(2): 561-571, 2020.
panel_visibility="advanced">
<IntRangeDomain name="range" min="0" max="100" />
<Documentation>
Number of times the SMACOF algorithm will be run with different initializations. The final results will be the best output of the runs, determined by the run with the smallest final stress.
</Documentation>
</IntVectorProperty>

Expand All @@ -446,6 +449,7 @@ IEEE Transactions on Visualization and Computer Graphics 27(2): 561-571, 2020.
panel_visibility="advanced">
<IntRangeDomain name="range" min="0" max="100" />
<Documentation>
Maximum number of iterations of the SMACOF algorithm for a single run.
</Documentation>
</IntVectorProperty>

Expand All @@ -468,6 +472,7 @@ IEEE Transactions on Visualization and Computer Graphics 27(2): 561-571, 2020.
panel_visibility="advanced">
<DoubleRangeDomain name="range" min="0.0" max="1.0" />
<Documentation>
Relative tolerance with respect to stress at which to declare convergence.
</Documentation>
</DoubleVectorProperty>

Expand All @@ -477,8 +482,9 @@ IEEE Transactions on Visualization and Computer Graphics 27(2): 561-571, 2020.
number_of_elements="1"
default_values="30"
panel_visibility="advanced">
<DoubleRangeDomain name="range" min="0.0" max="1.0" />
<DoubleRangeDomain name="range" min="5.0" max="50.0" />
<Documentation>
The perplexity is related to the number of nearest neighbors that is used in other manifold learning algorithms. Larger datasets usually require a larger perplexity. Consider selecting a value between 5 and 50.
</Documentation>
</DoubleVectorProperty>

Expand All @@ -490,6 +496,7 @@ IEEE Transactions on Visualization and Computer Graphics 27(2): 561-571, 2020.
panel_visibility="advanced">
<DoubleRangeDomain name="range" min="0.0" max="1.0" />
<Documentation>
Controls how tight natural clusters in the original space are in the embedded space and how much space will be between them. For larger values, the space between natural clusters will be larger in the embedded space.
</Documentation>
</DoubleVectorProperty>

Expand All @@ -501,6 +508,7 @@ IEEE Transactions on Visualization and Computer Graphics 27(2): 561-571, 2020.
panel_visibility="advanced">
<DoubleRangeDomain name="range" min="0.0" max="1.0" />
<Documentation>
The learning rate for t-SNE is usually in the range [10.0, 1000.0]. If the learning rate is too high, the data may look like a ‘ball’ with any point approximately equidistant from its nearest neighbours. If the learning rate is too low, most points may look compressed in a dense cloud with few outliers. If the cost function gets stuck in a bad local minimum increasing the learning rate may help.
</Documentation>
</DoubleVectorProperty>

Expand All @@ -512,6 +520,7 @@ IEEE Transactions on Visualization and Computer Graphics 27(2): 561-571, 2020.
panel_visibility="advanced">
<IntRangeDomain name="range" min="0" max="100" />
<Documentation>
Maximum number of iterations for the optimization. Should be at least 250.
</Documentation>
</IntVectorProperty>

Expand All @@ -523,7 +532,7 @@ IEEE Transactions on Visualization and Computer Graphics 27(2): 561-571, 2020.
panel_visibility="advanced">
<IntRangeDomain name="range" min="0" max="100" />
<Documentation>
Documentation.
Maximum number of iterations without progress before we abort the optimization, used after 250 initial iterations with early exaggeration. This value is rounded to the next multiple of 50.
</Documentation>
</IntVectorProperty>

Expand All @@ -535,6 +544,7 @@ IEEE Transactions on Visualization and Computer Graphics 27(2): 561-571, 2020.
panel_visibility="advanced">
<DoubleRangeDomain name="range" min="0.0" max="1.0" />
<Documentation>
If the gradient norm is below this threshold, the optimization will be stopped.
</Documentation>
</DoubleVectorProperty>

Expand All @@ -544,21 +554,23 @@ IEEE Transactions on Visualization and Computer Graphics 27(2): 561-571, 2020.
number_of_elements="1"
default_values="euclidean"
panel_visibility="advanced">
<Documentation>
<Documentation>
The metric to use when calculating distance between instances in a feature array.
</Documentation>
</StringVectorProperty>

<StringVectorProperty name="tsne_Init"
label="Init"
command="Settsne_Init"
number_of_elements="1"
default_values="random"
default_values="pca"
panel_visibility="advanced">
<StringListDomain name="enum">
<String value="random"/>
<String value="pca"/>
</StringListDomain>
<Documentation>
Initialization of embedding. PCA initialization cannot be used with precomputed distances and is usually more globally stable than random initialization.
</Documentation>
</StringVectorProperty>

Expand All @@ -584,6 +596,7 @@ IEEE Transactions on Visualization and Computer Graphics 27(2): 561-571, 2020.
<String value="exact"/>
</StringListDomain>
<Documentation>
By default the gradient calculation algorithm uses Barnes-Hut approximation running in O(NlogN) time. exact will run in time O(N^2) time. The exact algorithm should be used when nearest-neighbor errors need to be better than 3%.
</Documentation>
</StringVectorProperty>

Expand All @@ -595,6 +608,7 @@ IEEE Transactions on Visualization and Computer Graphics 27(2): 561-571, 2020.
panel_visibility="advanced">
<DoubleRangeDomain name="range" min="0.0" max="1.0" />
<Documentation>
Only used for Barnes Hut. This is the trade-off between speed and accuracy for Barnes-Hut T-SNE. Angle less than 0.2 has quickly increasing computation time and angle greater 0.8 has quickly increasing error.
</Documentation>
</DoubleVectorProperty>

Expand All @@ -621,6 +635,7 @@ IEEE Transactions on Visualization and Computer Graphics 27(2): 561-571, 2020.
panel_visibility="advanced">
<DoubleRangeDomain name="range" min="0.0" max="1.0" />
<Documentation>
Only used if the solver is set to arpack.
</Documentation>
</DoubleVectorProperty>

Expand All @@ -632,6 +647,7 @@ IEEE Transactions on Visualization and Computer Graphics 27(2): 561-571, 2020.
panel_visibility="advanced">
<IntRangeDomain name="range" min="0" max="100" />
<Documentation>
Only used if the solver is set to arpack.
</Documentation>
</IntVectorProperty>

Expand All @@ -647,6 +663,7 @@ IEEE Transactions on Visualization and Computer Graphics 27(2): 561-571, 2020.
<String value="D"/>
</StringListDomain>
<Documentation>
Choosing between Floyd-Warshall and Dijkstra algorithm.
</Documentation>
</StringVectorProperty>

Expand Down
25 changes: 25 additions & 0 deletions standalone/DimensionReduction/CMakeLists.txt
Original file line number Diff line number Diff line change
@@ -0,0 +1,25 @@
cmake_minimum_required(VERSION 3.21)

project(ttkDimensionReductionCmd)

if(TARGET ttkDimensionReduction)
add_executable(${PROJECT_NAME} main.cpp)
target_link_libraries(${PROJECT_NAME}
PRIVATE
ttkDimensionReduction
ttkDataSetToTable
VTK::IOXML
VTK::IOInfovis
)
set_target_properties(${PROJECT_NAME}
PROPERTIES
INSTALL_RPATH
"${CMAKE_INSTALL_RPATH}"
)
install(
TARGETS
${PROJECT_NAME}
RUNTIME DESTINATION
${TTK_INSTALL_BINARY_DIR}
)
endif()
Loading
Loading