diff --git a/.gitignore b/.gitignore index b0de132..e8efe02 100644 --- a/.gitignore +++ b/.gitignore @@ -2,3 +2,4 @@ build/*.o combigen combigen.exe combigen.obj +*.txt \ No newline at end of file diff --git a/README.md b/README.md index b0979ec..0ba9bde 100644 --- a/README.md +++ b/README.md @@ -15,7 +15,7 @@ Basic commands are listed below: Usage: combigen [options] -h Displays this help message - -a Generates every possible combination + -a Generates every possible combination, restricted to memory mode. (Note: this should be used with caution when storing to disk) -n Generate combination at nth index @@ -33,6 +33,11 @@ Usage: combigen [options] -k Display the keys on the first line of output (for .csv) + -p Use performance mode to generate combinations faster at the + expense of higher RAM usage. + (Note: this is only recommended for computers with large amounts + of RAM when generating a large number of random combinations) + -v Display version number ``` @@ -102,6 +107,7 @@ Or you can feed in an input from `stdin`: $ cat example_data/combinations.json | combigen -n 100 # Find the combination at index 100 ``` +Alternatively, if you want to manually type in your string, the program will await user input until EOF. For Windows, this is `CTRL+Z`. For Linux/UNIX, this is `CTRL+D`. ### Output @@ -207,6 +213,51 @@ $ combigen -i example_data/combinations.json -r 5 -t json # Generate 5 random c $ ``` +## Using Performance Mode + +When generating a large number of combinations, there come a desire to speed up the process. For this case, use the `-p` flag to set combigen to switch to Performance Mode. This will generate all of the combinations at once before outputting them to `stdout`. **Note: this is only recommended for systems with a large amount of RAM when generating incredibly large sets of data**. + +This begins to make a difference when the generated sets of data start to become quite large, as opposed to the default Memory Mode. See the results of some tests below for more information. + +For now, when generating every possible combination this will be performed in Memory Mode to save RAM space. + +### Performance Tests + +To visualize the performance differences between Memory Mode and Performance Mode, a small test was performed to illustrate where Performance Mode begins to offer a significant advantage. + +#### Testing Parameters + +Each iteration of a test would time the amount of time it takes to generate *n* amount of random combinations and write them to disk; 5 times each. Then, for each amount of *n*, the average of these 5 iterations would be recorded and graphed. + +The following tests were performed on a Lenovo ThinkPad T460 with the following specs: + +* Windows 7 Enterprise +* 256GB SSD w/full disk encryption +* 8GB Ram +* Intel Core i5 - 6300U @ 2.40GHz + +The environment was tested with the following: + +* Compiled with Visual Studio Developer Tools 2017 with the compile flags listed above +* Git Bash as a shell to utilize the UNIX `time` function +* Each iteration was generated using the command `time ./combigen.exe -i example_data/combinations.json -r "$n" # amount of random combinations > output.txt` + +The source code for these shell scripts can be found in the [peformance_tests](performance_tests/) folder. + +#### Testing Results + +The results from the test were graphed: + +![Testing Results](performance_tests/performance-mode-vs-memory-mode-test-results.png) + +#### Conclusion + +Based on the results above, Performance Mode will only start to offer real benefits when the amount of combinations is quite large. However, this should only be used when the computer can truly handle storing all of these combinations in RAM. Ultimately, it boils down to two factors: + +* If you can spare time and don't want to bog down your machine (or the amount of generated combinations is small), stick with the default Memory Mode +* If you have a well-spec'd machine and can sacrifice the RAM when generating a large amount of combinations, choose Performance Mode. + +Regardless, a large amount of combinations requires a large amount of disk space, so keep this into account when generating data. ## Third-Party Libraries @@ -223,4 +274,4 @@ Combigen uses the following open-source libraries: Pull-requests are always welcome ## License -Licensed under GPLv3, see [LICENSE](https://github.com/iamtheburd/blob/master/LICENSE) +Licensed under GPLv3, see [LICENSE](https://github.com/iamtheburd/combigen/blob/master/LICENSE) diff --git a/performance_tests/performance-mode-vs-memory-mode-test-results.png b/performance_tests/performance-mode-vs-memory-mode-test-results.png new file mode 100644 index 0000000..bc261b4 Binary files /dev/null and b/performance_tests/performance-mode-vs-memory-mode-test-results.png differ diff --git a/performance_tests/test_memory_mode b/performance_tests/test_memory_mode new file mode 100644 index 0000000..e56bd3f --- /dev/null +++ b/performance_tests/test_memory_mode @@ -0,0 +1,13 @@ +#!/bin/bash + +for amount in 100000 500000 1000000 1500000 2000000 2500000 3000000 +do + for i in {1..5} + do + echo "$amount - $i"; + time ./combigen.exe -i example_data/combinations.json -r "$amount" -k > output.txt + echo + done +done +rm output.txt + diff --git a/performance_tests/test_performance_mode b/performance_tests/test_performance_mode new file mode 100644 index 0000000..d75ca52 --- /dev/null +++ b/performance_tests/test_performance_mode @@ -0,0 +1,13 @@ +#!/bin/bash + +for amount in 100000 500000 1000000 1500000 2000000 2500000 3000000 +do + for i in {1..5} + do + echo "$amount - $i"; + time ./combigen.exe -i example_data/combinations.json -r "$amount" -k -p > output.txt + echo + done +done +rm output.txt + diff --git a/src/combigen.cpp b/src/combigen.cpp index cbfde69..3f554fd 100644 --- a/src/combigen.cpp +++ b/src/combigen.cpp @@ -2,10 +2,12 @@ int main(int argc, char* argv[]) { - int c; + int c; + bool args_provided = false; generation_args args; - while ( (c = getopt(argc, argv, "han:i:t:r:d:kv")) != -1) + + while ( (c = getopt(argc, argv, "han:i:t:r:d:kvp")) != -1) { switch (c) { @@ -13,6 +15,7 @@ int main(int argc, char* argv[]) display_help(); exit(0); case 'a': + args_provided = true; args.generate_all_combinations = true; break; case 'n': @@ -20,12 +23,14 @@ int main(int argc, char* argv[]) { istringstream iss (optarg); iss >> args.entry_at; + args_provided = true; } break; case 'i': if (optarg) { args.input = optarg; + args_provided = true; } break; case 't': @@ -48,6 +53,7 @@ int main(int argc, char* argv[]) { istringstream iss (optarg); iss >> args.sample_size; + args_provided = true; } break; case 'd': @@ -57,16 +63,24 @@ int main(int argc, char* argv[]) } break; case 'k': - args.display_keys = true; + args_provided = true; break; case 'v': cout << "combigen - v" << COMBIGEN_MAJOR_VERSION << '.' << COMBIGEN_MINOR_VERSION << '.' << COMBIGEN_REVISION_VERSION << '\n'; exit(0); + case 'p': + args.perf_mode = true; + break; default: display_help(); exit(-1); } } + if (!args_provided) + { + display_help(); + exit(0); + } if (args.input.empty()) { istreambuf_iterator begin(cin), end; @@ -78,6 +92,7 @@ int main(int argc, char* argv[]) { args.pc = parse_file(args.input); } + try { parse_args(args); @@ -125,7 +140,7 @@ static const void display_help(void) { cout << "Usage: combigen [options]" << "\n" << " -h Displays this help message" << "\n\n" - << " -a Generates every possible combination" << "\n" + << " -a Generates every possible combination, restricted to memory mode." << "\n" << " (Note: this should be used with caution when storing to disk)" << "\n\n" << " -n Generate combination at nth index" << "\n\n" << " -i Take the given .json file as input. Otherwise, input will come" << "\n" @@ -136,6 +151,10 @@ static const void display_help(void) << " the possible set of combinations" << "\n\n" << " -d Set the delimiter when displaying combinations (default is ',')" << "\n\n" << " -k Display the keys on the first line of output (for .csv)" << "\n\n" + << " -p Use performance mode to generate combinations faster at the" << "\n" + << " expense of higher RAM usage." << "\n" + << " (Note: this is only recommended for computers with large amounts" << "\n" + << " of RAM when generating a large number of random combinations)" << "\n\n" << " -v Display version number" << "\n"; } @@ -196,6 +215,35 @@ static const void generate_random_samples(const vector &range, const gener } } +static const void generate_random_samples_performance_mode( const generation_args &args) +{ + const vector> results = lazy_cartesian_product::generate_samples(args.pc.combinations, args.sample_size); + + if (!args.display_json) + { + if (args.display_keys) + { + display_csv_keys(args.pc.keys, args.delim); + } + } + else + { + cout << "[\n"; + } + for( const vector &row: results) + { + output_result(row, args, true); + if (args.display_json && &row != &results.back()) + { + cout << ","; + } + } + if (args.display_json) + { + cout << "]\n"; + } +} + static const void output_result(const vector &result, const generation_args &args, const bool &for_optimization) { if (!args.display_json) @@ -261,8 +309,15 @@ static const void parse_args(const generation_args &args) cerr << "ERROR: Sample size cannot be greater than maximum possible combinations\n"; exit(-1); } - vector range = lazy_cartesian_product::generate_random_indices(n, max_size); - generate_random_samples(range, args); + if (args.perf_mode) + { + generate_random_samples_performance_mode(args); + } + else + { + vector range = lazy_cartesian_product::generate_random_indices(n, max_size); + generate_random_samples(range, args); + } exit(0); } else @@ -326,4 +381,5 @@ static const possible_combinations parse_stdin(const string &input) exit(-1); } return pc; -} \ No newline at end of file +} + diff --git a/src/combigen.h b/src/combigen.h index 4602ccc..9e882c5 100644 --- a/src/combigen.h +++ b/src/combigen.h @@ -7,7 +7,7 @@ #define COMBIGEN_MAJOR_VERSION 1 #define COMBIGEN_MINOR_VERSION 2 -#define COMBIGEN_REVISION_VERSION 0 +#define COMBIGEN_REVISION_VERSION 2 #include #include @@ -58,12 +58,14 @@ struct generation_args bool generate_all_combinations = false; bool display_keys = false; bool display_json = false; + bool perf_mode = false; }; static const void display_csv_keys(const vector &keys, const string &delim); static const void display_help(void); static const void generate_all(const long &max_size, const generation_args &args); static const void generate_random_samples(const vector &range, const generation_args &args); +static const void generate_random_samples_performance_mode(const generation_args &args); static const void output_result(const vector &result, const generation_args &args, const bool &for_optimization); static const void parse_args(const generation_args &args); static const possible_combinations parse_file(const string &input);