diff --git a/Makefile b/Makefile index 1bb9a9e..c5cf741 100644 --- a/Makefile +++ b/Makefile @@ -1,5 +1,21 @@ -build: +build: ripser + + +all: ripser ripser_coeff ripser_dipha ripser_dipha_coeff + + +ripser: ripser.cpp c++ -std=c++11 ripser.cpp -o ripser -Ofast -D NDEBUG -D FILE_FORMAT_LOWER_TRIANGULAR_CSV -coefficients: +ripser_coeff: ripser.cpp c++ -std=c++11 ripser.cpp -o ripser_coeff -Ofast -D NDEBUG -D FILE_FORMAT_LOWER_TRIANGULAR_CSV -D USE_COEFFICIENTS + +ripser_dipha: ripser.cpp + c++ -std=c++11 ripser.cpp -o ripser_dipha -Ofast -D NDEBUG -D FILE_FORMAT_DIPHA + +ripser_dipha_coeff: ripser.cpp + c++ -std=c++11 ripser.cpp -o ripser_dipha_coeff -Ofast -D NDEBUG -D FILE_FORMAT_DIPHA -D USE_COEFFICIENTS + + +clean: + rm ripser ripser_coeff ripser_dipha ripser_dipha_coeff diff --git a/README.md b/README.md index a3fa958..cfe2f2f 100644 --- a/README.md +++ b/README.md @@ -1,101 +1,101 @@ -# Ripser - -Copyright © 2015–2016 [Ulrich Bauer]. - - -### Description - -Ripser is a lean C++ code for the computation of Vietoris–Rips persistence barcodes. It can do just this one thing, but does it extremely well. - -The main features of Ripser: - - - time- and memory-efficient - - support for coefficients in prime finite fields - - less than 1000 lines of code in a single C++ file - - no external dependencies (optional support for Google's [sparsehash]) - -Currently, Ripser outperforms other codes ([Dionysus], [DIPHA], [GUDHI], [Perseus], [PHAT]) by a factor of at least 40 in computation time and a factor of at least 15 in memory efficiency. (Note that [PHAT] does not contain code for generating Vietoris–Rips filtrations). - -Input formats currently supported by Ripser: - - - comma-separated lower triangular distance matrix (preferred) - - comma-separated lower triangular distance matrix (MATLAB output from the function `pdist`) - - [DIPHA] distance matrix data - -Ripser's efficiency is based on a few important concepts and principles: - - - Compute persistent *co*homology - - Don't compute information that is never needed - (for the experts: employ the *clearing* optimization, aka *persistence with a twist*) - - Don't store information that can be readily recomputed - - Take obvious shortcuts (*apparent persistence pairs*) - - -### Version -[Latest release][latest-release]: 1.0 (July 2016) - - -### Building - -Ripser requires a C++11 compiler. Here is how to obtain, build, and run Ripser: - -```sh -git clone https://github.com/Ripser/ripser.git -cd ripser -make -./ripser examples/sphere_3_192.lower_distance_matrix -``` - - -### Options - -Ripser supports several compile-time options. They are switched on by defining the C preprocessor macros listed below, either using `#define` in the code or by passing an argument to the compiler. The following options are supported: - - - `ASSEMBLE_REDUCTION_MATRIX`: store the reduction matrix; may speed up computation but will also increase memory usage - - `USE_COEFFICIENTS`: enable support for coeffitients in a prime field - - `INDICATE_PROGRESS`: indicate the current progress in the console - - `PRINT_PERSISTENCE_PAIRS`: output the computed persistence pairs (enabled by default) - - `USE_GOOGLE_HASHMAP`: enable support for Google's [sparsehash] data structure; may further reducue memory footprint - -Furthermore, one of the following options needs to be chosen to specify the format for the input files: - - - `FILE_FORMAT_LOWER_TRIANGULAR_CSV`: lower triangular distance matrix; a comma separated list of the distance matrix entries below the diagonal, sorted lexicographically by row index and column index - - `FILE_FORMAT_UPPER_TRIANGULAR_CSV`: upper triangular distance matrix; similar to the previous, but for the entries above the diagonal; suitable for output from the MATLAB function `pdist`, saved in a CSV file - - `FILE_FORMAT_DIPHA`: DIPHA distance matrix as described on the [DIPHA] website - -For example, to build with support for coefficients: - -```sh -$ c++ -std=c++11 ripser.cpp -o ripser -Ofast -D NDEBUG -D FILE_FORMAT_LOWER_TRIANGULAR_CSV -D USE_COEFFICIENTS +# Ripser + +Copyright © 2015–2016 [Ulrich Bauer]. + + +### Description + +Ripser is a lean C++ code for the computation of Vietoris–Rips persistence barcodes. It can do just this one thing, but does it extremely well. + +The main features of Ripser: + + - time- and memory-efficient + - less than 1000 lines of code in a single C++ file + - support for coefficients in prime finite fields + - no external dependencies (optional support for Google's [sparsehash]) + +Currently, Ripser outperforms other codes ([Dionysus], [DIPHA], [GUDHI], [Perseus], [PHAT]) by a factor of more than 40 in computation time and a factor of more than 15 in memory efficiency. (Note that [PHAT] does not contain code for generating Vietoris–Rips filtrations). + +Input formats currently supported by Ripser: + + - comma-separated values lower triangular distance matrix (preferred) + - comma-separated values upper triangular distance matrix (MATLAB output from the function `pdist`) + - [DIPHA] distance matrix data + +Ripser's efficiency is based on a few important concepts and principles: + + - Compute persistent *co*homology + - Don't compute information that is never needed + (for the experts: employ the *clearing* optimization, aka *persistence with a twist*) + - Don't store information that can be readily recomputed + - Take obvious shortcuts (*apparent persistence pairs*) + + +### Version +[Latest release][latest-release]: 1.0 (July 2016) + + +### Building + +Ripser requires a C++11 compiler. Here is how to obtain, build, and run Ripser: + +```sh +git clone https://github.com/Ripser/ripser.git +cd ripser +make +./ripser examples/sphere_3_192.lower_distance_matrix +``` + + +### Options + +Ripser supports several compile-time options. They are switched on by defining the C preprocessor macros listed below, either using `#define` in the code or by passing an argument to the compiler. The following options are supported: + + - `ASSEMBLE_REDUCTION_MATRIX`: store the reduction matrix; may speed up computation but will also increase memory usage + - `USE_COEFFICIENTS`: enable support for coefficients in a prime field + - `INDICATE_PROGRESS`: indicate the current progress in the console + - `PRINT_PERSISTENCE_PAIRS`: output the computed persistence pairs (enabled by default) + - `USE_GOOGLE_HASHMAP`: enable support for Google's [sparsehash] data structure; may further reducue memory footprint + +Furthermore, one of the following options needs to be chosen to specify the format for the input files: + + - `FILE_FORMAT_LOWER_TRIANGULAR_CSV`: lower triangular distance matrix; a comma (or whitespace, or other non-numerical character) separated list of the distance matrix entries below the diagonal, sorted lexicographically by row index, then column index + - `FILE_FORMAT_UPPER_TRIANGULAR_CSV`: upper triangular distance matrix; similar to the previous, but for the entries above the diagonal; suitable for output from the MATLAB function `pdist`, saved in a CSV file + - `FILE_FORMAT_DIPHA`: DIPHA distance matrix as described on the [DIPHA] website + +For example, to build with support for coefficients: + +```sh +$ c++ -std=c++11 ripser.cpp -o ripser -Ofast -D NDEBUG -D FILE_FORMAT_LOWER_TRIANGULAR_CSV -D USE_COEFFICIENTS ``` The following options are supported at the command line: - - `--top_dim k`: compute persistent homology up to dimension *k* + - `--dim k`: compute persistent homology up to dimension *k* - `--threshold t`: compute Rips complexes up to diameter *t* - `--modulus p`: compute homology with coefficients in the prime field Z/*p*Z (only available when build with the option `USE_COEFFICIENTS`) - -### Planned features - -The following features are currently planned for future versions: - - - support for point clouds - - computation of representative cycles for persistent homology (currenly only *co*cycles are computed) - - support for sparse distance matrices - - -### License - -[LGPL] 3.0 - - -[Ulrich Bauer]: -[latest-release]: -[Dionysus]: -[DIPHA]: -[PHAT]: -[Perseus]: -[GUDHI]: -[sparsehash]: + +### Planned features + +The following features are currently planned for future versions: + + - support for point clouds + - computation of representative cycles for persistent homology (currenly only *co*cycles are computed) + - support for sparse distance matrices + + +### License + +[LGPL] 3.0 + + +[Ulrich Bauer]: +[latest-release]: +[Dionysus]: +[DIPHA]: +[PHAT]: +[Perseus]: +[GUDHI]: +[sparsehash]: [LGPL]: \ No newline at end of file diff --git a/examples/projective_plane.lower_distance_matrix b/examples/projective_plane.lower_distance_matrix index 29134ec..a7ad7e6 100644 --- a/examples/projective_plane.lower_distance_matrix +++ b/examples/projective_plane.lower_distance_matrix @@ -1,13 +1,13 @@ -1, -1,2, -1,2,2, -1,2,2,2, -1,1,2,1,2, -1,2,1,2,1,2, -1,1,2,2,1,2,2, -1,2,1,1,2,2,2,2, -2,1,1,2,2,1,1,1,1, -2,2,2,2,2,1,1,2,2,1, -2,2,2,2,2,2,2,1,1,1,2, -2,2,2,1,1,1,1,1,1,2,1,1, \ No newline at end of file +1 +1,2 +1,2,2 +1,2,2,2 +1,1,2,1,2 +1,2,1,2,1,2 +1,1,2,2,1,2,2 +1,2,1,1,2,2,2,2 +2,1,1,2,2,1,1,1,1 +2,2,2,2,2,1,1,2,2,1 +2,2,2,2,2,2,2,1,1,1,2 +2,2,2,1,1,1,1,1,1,2,1,1 \ No newline at end of file diff --git a/ripser.cpp b/ripser.cpp index 3661b9c..68d0811 100644 --- a/ripser.cpp +++ b/ripser.cpp @@ -10,6 +10,7 @@ //#define USE_GOOGLE_HASHMAP +#include #include #include #include @@ -362,11 +363,10 @@ template diameter_entry_t pop_pivot(Heap& column, coefficient_t column.pop(); if (coefficient == 0) { - if (column.empty()) { + if (column.empty()) return diameter_entry_t(-1); - } else { + else pivot = column.top(); - } } } while (!column.empty() && get_index(column.top()) == get_index(pivot)); if (get_index(pivot) != -1) { set_coefficient(pivot, coefficient); } @@ -401,6 +401,11 @@ void assemble_columns_to_reduce(std::vector& columns_to_reduce columns_to_reduce.clear(); +#ifdef INDICATE_PROGRESS + std::cout << "\033[K" + << "assembling " << num_simplices << " columns" << std::flush << "\r"; +#endif + for (index_t index = 0; index < num_simplices; ++index) { if (pivot_column_index.find(index) == pivot_column_index.end()) { value_t diameter = comp.diameter(index); @@ -408,8 +413,16 @@ void assemble_columns_to_reduce(std::vector& columns_to_reduce } } +#ifdef INDICATE_PROGRESS + std::cout << "\033[K" + << "sorting " << num_simplices << " columns" << std::flush << "\r"; +#endif + std::sort(columns_to_reduce.begin(), columns_to_reduce.end(), greater_diameter_or_smaller_index()); +#ifdef INDICATE_PROGRESS + std::cout << "\033[K"; +#endif } template class compressed_sparse_matrix { @@ -496,9 +509,10 @@ void compute_pairs(std::vector& columns_to_reduce, value_t diameter = get_diameter(column_to_reduce); #ifdef INDICATE_PROGRESS - std::cout << "\033[K" - << "reducing column " << i + 1 << "/" << columns_to_reduce.size() << " (diameter " - << diameter << ")" << std::flush << "\r"; + if ((i + 1) % 1000 == 0) + std::cout << "\033[K" + << "reducing column " << i + 1 << "/" << columns_to_reduce.size() + << " (diameter " << diameter << ")" << std::flush << "\r"; #endif index_t j = i; @@ -666,6 +680,12 @@ bool is_prime(const coefficient_t n) { return true; } +template T read(std::ifstream& s) { + T result; + s.read(reinterpret_cast(&result), sizeof(T)); + return result; // on little endian: boost::endian::little_to_native(result); +} + void print_usage_and_exit(int exit_code) { std::cerr << "Usage: " << "ripser " @@ -674,7 +694,7 @@ void print_usage_and_exit(int exit_code) { std::cerr << "Options:" << std::endl; std::cerr << std::endl; std::cerr << " --help print this screen" << std::endl; - std::cerr << " --top_dim compute persistent homology up to dimension " << std::endl; + std::cerr << " --dim compute persistent homology up to dimension " << std::endl; std::cerr << " --threshold compute Rips complexes up to diameter " << std::endl; #ifdef USE_COEFFICIENTS std::cerr << " --modulus

compute homology with coefficients in the prime field Z/

Z" @@ -703,7 +723,7 @@ int main(int argc, char** argv) { const std::string arg(argv[i]); if (arg == "--help") { print_usage_and_exit(0); - } else if (arg == "--top_dim") { + } else if (arg == "--dim") { std::string parameter = std::string(argv[++i]); size_t next_pos; dim_max = std::stol(parameter, &next_pos); @@ -735,33 +755,24 @@ int main(int argc, char** argv) { std::vector diameters; #ifdef FILE_FORMAT_DIPHA - int64_t magic_number; - input_stream.read(reinterpret_cast(&magic_number), sizeof(int64_t)); - if (magic_number != 8067171840) { + + if (read(input_stream) != 8067171840) { std::cerr << filename << " is not a Dipha file (magic number: 8067171840)" << std::endl; exit(-1); } - int64_t file_type; - input_stream.read(reinterpret_cast(&file_type), sizeof(int64_t)); - if (file_type != 7) { + if (read(input_stream) != 7) { std::cerr << filename << " is not a Dipha distance matrix (file type: 7)" << std::endl; exit(-1); } - int64_t n; - input_stream.read(reinterpret_cast(&n), sizeof(int64_t)); + index_t n = read(input_stream); distance_matrix dist_full; dist_full.distances = std::vector>(n, std::vector(n)); - for (int i = 0; i < n; ++i) { - for (int j = 0; j < n; ++j) { - double val; - input_stream.read(reinterpret_cast(&val), sizeof(double)); - dist_full.distances[i][j] = val; - } - } + for (int i = 0; i < n; ++i) + for (int j = 0; j < n; ++j) dist_full.distances[i][j] = read(input_stream); std::cout << "distance matrix with " << n << " points" << std::endl; compressed_lower_distance_matrix_adapter dist(diameters, dist_full); @@ -770,9 +781,8 @@ int main(int argc, char** argv) { #ifdef FILE_FORMAT_UPPER_TRIANGULAR_CSV std::vector distances; - std::string value_string; - while (std::getline(input_stream, value_string, ',')) - distances.push_back(std::stod(value_string)); + value_t value; + while ((input_stream >> value).ignore()) distances.push_back(value); compressed_upper_distance_matrix_adapter dist_upper(distances); @@ -785,9 +795,8 @@ int main(int argc, char** argv) { #ifdef FILE_FORMAT_LOWER_TRIANGULAR_CSV std::vector& distances = diameters; - std::string value_string; - while (std::getline(input_stream, value_string, ',')) - distances.push_back(std::stod(value_string)); + value_t value; + while ((input_stream >> value).ignore()) distances.push_back(value); compressed_lower_distance_matrix_adapter dist(distances); @@ -799,7 +808,7 @@ int main(int argc, char** argv) { auto result = std::minmax_element(dist.distances.begin(), dist.distances.end()); std::cout << "value range: [" << *result.first << "," << *result.second << "]" << std::endl; - assert(dim_max <= n - 2); + dim_max = std::min(dim_max, n - 2); binomial_coeff_table binomial_coeff(n, dim_max + 2); std::vector multiplicative_inverse(multiplicative_inverse_vector(modulus));