diff --git a/.gitignore b/.gitignore index e8efe02..c3c2e3a 100644 --- a/.gitignore +++ b/.gitignore @@ -1,5 +1,5 @@ -build/*.o +build/*/*.o +*.obj combigen combigen.exe -combigen.obj -*.txt \ No newline at end of file +*.txt diff --git a/Makefile b/Makefile index a36e0a2..d49ffd0 100644 --- a/Makefile +++ b/Makefile @@ -1,19 +1,36 @@ CXX = g++ -CXXFLAGS = -Wall -O2 -std=c++11 +CXXFLAGS = -Wall -O2 -std=c++14 +LIBFLAGS = +BOOSTFLAGS = -DUSE_BOOST PREFIX = /usr/local -COMBIGEN_DIR = ./src +COMBIGENDIR = ./src +COMBIGENFILE = combigen.cpp +BUILDDIR = release -all: combigen +all: main -combigen: combigen.o - $(CXX) $(CXXFLAGS) build/combigen.o -o combigen +main: cli_functions.o combigen.o main.o + $(CXX) $(CXXFLAGS) build/$(BUILDDIR)/main.o build/$(BUILDIR)/combigen.o build/$(BUILDDIR)/cli_functions.o -o combigen $(LIBFLAGS) -combigen.o: $(COMBIGEN_DIR)/combigen.cpp $(COMBIGEN_DIR)/combigen.h - $(CXX) $(CXXFLAGS) $(COMBIGEN_DIR)/combigen.cpp -c -o build/combigen.o +main.o: $(COMBIGENDIR)/main.cpp + $(CXX) $(CXXFLAGS) $(COMBIGENDIR)/main.cpp -c -o build/$(BUILDDIR)/main.o + +combigen.o: $(COMBIGENDIR)/combigen.cpp $(COMBIGENDIR)/combigen.h + $(CXX) $(CXXFLAGS) $(COMBIGENDIR)/$(COMBIGENFILE) -c -o build/$(BUILDDIR)/combigen.o + +cli_functions.o: $(COMBIGENDIR)/cli_functions.cpp $(COMBIGENDIR)/combigen.h + $(CXX) $(CXXFLAGS) $(COMBIGENDIR)/cli_functions.cpp -c -o build/$(BUILDDIR)/cli_functions.o + +.PHONY: perf +perf: CXXFLAGS += $(BOOSTFLAGS) +perf: LIBFLAGS += -lboost_random +perf: COMBIGENFILE = boost_functions.cpp +perf: BUILDDIR = perf +perf: main .PHONY: clean clean: - @rm -f build/*.o combigen + @rm -f build/*/*.o combigen .PHONY: install diff --git a/README.md b/README.md index 4b0df3e..a2d42f6 100644 --- a/README.md +++ b/README.md @@ -2,7 +2,7 @@ An efficient CLI tool to generate possible combinations written in C++ ## Introduction -Combigen aims to assist with data generation and exploration. Given a `.json` input where each key contains an array of string values, combigen can either generate every possible combination or a random subset of the possible combinations. It aims to be memory-efficient while maintaining high-performance. This can be especially useful when large amounts of data are needed for statistical analysis or mock data in an application. +Combigen aims to assist with data generation and exploration. Given a `.json` input where each key contains an array of string values (or simply an array of string arrays), combigen can either generate every possible combination or a random, evenly-distributed subset of the possible combinations. It aims to be memory-efficient while maintaining high-performance. This can be especially useful when large amounts of data are needed for statistical analysis or mock data in an application. It supports output as `.csv` and `.json`. @@ -41,8 +41,38 @@ Usage: combigen [options] -v Display version number ``` -## Installation +## Prerequisites +### Linux/UNIX/Cygwin +**Required:** +* make +* g++ (capable of compiling to the C++14 standard or higher) +**Optional:** +* [Boost](https://www.boost.org), in case you are working with large sets of data + +If you need to install Boost, I recommend utilizing your distro's package manager: + +#### Debian/Ubuntu +`$ sudo apt install libboost-all-dev` + +#### Fedora +`$ sudo dnf install boost` + +#### Arch/Manjaro/Antergos +`$ sudo pacman -Sy boost` + + +### Windows +**Required:** +* Visual Studio 2015 or higher + +**Optional:** +* [Boost](https://www.boost.org), in case you are working with large sets of data. +I recommend downloaded the precompiled libraries and placing them somewhere easy to remember on your machine. + + +## Building From Source and Installing +Note: for Windows, if you do not want to/don't have the ability to compile from the source files you can go to the [Release](https://github.com/iamtheburd/combigen/releases) page and directly download the `combigen.exe` binary from there. This also has the added of benefit of being compiled with the Boost libraries already. ### Linux/UNIX @@ -58,6 +88,12 @@ $ git clone --recurse-submodules -j8 https://github.com/iamtheburd/combigen.git $ make ``` +If you need support for larger sets of data (and have Boost installed), instead build with `make perf`: + +``` +$ make perf +``` + 3. Install: ``` @@ -66,10 +102,9 @@ $ sudo make install ### Windows +1. Download Visual Studio 2015+ and install. -1. Download Visual Studio and install first - -2. Clone the repository to some directory +2. Clone the repository to some directory using the above command 3. Open up the Developer Command Prompt (can usually be found by searching in the Start menu) @@ -78,16 +113,21 @@ $ sudo make install 5. Build the file: ``` -> cl src\combigen.cpp /EHsc /O2 +> cl /EHsc /O2 src\cli_functions.cpp src\combigen.cpp src\main.cpp /Fe".\combigen.exe" +``` + +Alternatively, if you need support for large rsets of data (and have Boost installed somewhere on your machine), run this command instead. Ensure you fill in the proper path to your Boost directory (this example assumes Boost 1.68.0 installed): + +``` +> cl /EHsc /DUSE_BOOST /O2 /I C:\path\to\boost_1_68_0 src\cli_functions.cpp src\boost_functions.cpp src\main.cpp /Fe".\combigen.exe" /link /LIBPATH:C:\path\to\boost_1_68_0\lib64-msvc-14.1 ``` 6. Place the resulting `combigen.exe` wherever you desire -Alternatively, you can also check out the [Releases](https://github.com/iamtheburd/combigen/releases) tab and directly download the `combigen.exe` from there. ## Usage -Using the example `.json` data provided, here are some examples showcasing some features: +Using the example `combinations.json` data provided, here are some examples showcasing some features: ### Input @@ -116,6 +156,16 @@ $ combigen -i example_data/combinations.json -r 50000 > output.txt # Generate 5 # and store them in output.txt ``` +### Large Sets of Data + +To demonstrate how `combigen` can even work with large sets of data (when compiled with the Boost library) we can use the example `large_bits.json` file. Unlike the above example data, this file only contains an array of string arrays. In this set of data, the maximum size is equivalent to 3 ^ 256. We can still find the last entry (max size - 1): + +``` +$ combigen -i example_data/large_bits.json -n 139008452377144732764939786789661303114218850808529137991604824430036072629766435941001769154109609521811665540548899435520 +2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2 +$ +``` + ### Types @@ -140,8 +190,8 @@ $ You can also change the delimiter with the `-d` flag: ``` -$ combigen -i example_data/combinations.json -r 3 -k -d "|" # Generate 3 random combinations, display the keys, - # and set the delimiter to || +$ combigen -i example_data/combinations.json -r 3 -k -d "||" # Generate 3 random combinations, display the keys, + # and set the delimiter to || Age||First Name||Last Name||Number of Children||Number of Pets||Primary Desktop OS||Primary Mobile Phone OS||Residence||State/Territory 20||Samantha||Harris||3||4||Windows||Other||RV||GA 25||Matthew||Thomas||2||0||Windows||Other||Town Home||IL @@ -267,6 +317,8 @@ Combigen uses the following open-source libraries: * [skandhurkat/Getopt-for-Visual-Studio](https://github.com/skandhurkat/Getopt-for-Visual-Studio) - Port of the MinGW version of `getopt.h` so that the CLI works on Windows +* [Boost](https://www.boost.org) - For operating with incredibly large sets of data that push the limits of an `unsigned long long`. + ## Contributing Pull-requests are always welcome diff --git a/build/perf/.gitkeep b/build/perf/.gitkeep new file mode 100644 index 0000000..e69de29 diff --git a/build/release/.gitkeep b/build/release/.gitkeep new file mode 100644 index 0000000..e69de29 diff --git a/doc/combigen.1 b/doc/combigen.1 index f338ee1..809fb8c 100644 --- a/doc/combigen.1 +++ b/doc/combigen.1 @@ -1,6 +1,6 @@ .\" Manpage for combigen .\" Send an email to tylerburdsall@gmail.com for questions or concerns regarding this man page -.TH man 1 "05 Jun 2018" "1.2.2" "combigen man page" +.TH man 1 "28 Nov 2018" "1.3.0" "combigen man page" .SH NAME combigen \- efficiently generate combinations .SH SYNOPSIS @@ -19,6 +19,7 @@ Usage: combigen [options] -i Take the given .json file as input. Otherwise, input will come from stdin. Example: "{ "foo": [ "a", "b", "c" ], "bar": [ "1", "2" ] }" + Or: "[ ["1", "2"], ["3", "4", "a", "b"] ]" -t Output type (csv or json). Defaults to csv diff --git a/example_data/large_bits.json b/example_data/large_bits.json new file mode 100644 index 0000000..9edeaa5 --- /dev/null +++ b/example_data/large_bits.json @@ -0,0 +1 @@ +[["0", "1", "2"], ["0", "1", "2"], ["0", "1", "2"], ["0", "1", "2"], ["0", "1", "2"], ["0", "1", "2"], ["0", "1", "2"], ["0", "1", "2"], ["0", "1", "2"], ["0", "1", "2"], ["0", "1", "2"], ["0", "1", "2"], ["0", "1", "2"], ["0", "1", "2"], ["0", "1", "2"], ["0", "1", "2"], ["0", "1", "2"], ["0", "1", "2"], ["0", "1", "2"], ["0", "1", "2"], ["0", "1", "2"], ["0", "1", "2"], ["0", "1", "2"], ["0", "1", "2"], ["0", "1", "2"], ["0", "1", "2"], ["0", "1", "2"], ["0", "1", "2"], ["0", "1", "2"], ["0", "1", "2"], ["0", "1", "2"], ["0", "1", "2"], ["0", "1", "2"], ["0", "1", "2"], ["0", "1", "2"], ["0", "1", "2"], ["0", "1", "2"], ["0", "1", "2"], ["0", "1", "2"], ["0", "1", "2"], ["0", "1", "2"], ["0", "1", "2"], ["0", "1", "2"], ["0", "1", "2"], ["0", "1", "2"], ["0", "1", "2"], ["0", "1", "2"], ["0", "1", "2"], ["0", "1", "2"], ["0", "1", "2"], ["0", "1", "2"], ["0", "1", "2"], ["0", "1", "2"], ["0", "1", "2"], ["0", "1", "2"], ["0", "1", "2"], ["0", "1", "2"], ["0", "1", "2"], ["0", "1", "2"], ["0", "1", "2"], ["0", "1", "2"], ["0", "1", "2"], ["0", "1", "2"], ["0", "1", "2"], ["0", "1", "2"], ["0", "1", "2"], ["0", "1", "2"], ["0", "1", "2"], ["0", "1", "2"], ["0", "1", "2"], ["0", "1", "2"], ["0", "1", "2"], ["0", "1", "2"], ["0", "1", "2"], ["0", "1", "2"], ["0", "1", "2"], ["0", "1", "2"], ["0", "1", "2"], ["0", "1", "2"], ["0", "1", "2"], ["0", "1", "2"], ["0", "1", "2"], ["0", "1", "2"], ["0", "1", "2"], ["0", "1", "2"], ["0", "1", "2"], ["0", "1", "2"], ["0", "1", "2"], ["0", "1", "2"], ["0", "1", "2"], ["0", "1", "2"], ["0", "1", "2"], ["0", "1", "2"], ["0", "1", "2"], ["0", "1", "2"], ["0", "1", "2"], ["0", "1", "2"], ["0", "1", "2"], ["0", "1", "2"], ["0", "1", "2"], ["0", "1", "2"], ["0", "1", "2"], ["0", "1", "2"], ["0", "1", "2"], ["0", "1", "2"], ["0", "1", "2"], ["0", "1", "2"], ["0", "1", "2"], ["0", "1", "2"], ["0", "1", "2"], ["0", "1", "2"], ["0", "1", "2"], ["0", "1", "2"], ["0", "1", "2"], ["0", "1", "2"], ["0", "1", "2"], ["0", "1", "2"], ["0", "1", "2"], ["0", "1", "2"], ["0", "1", "2"], ["0", "1", "2"], ["0", "1", "2"], ["0", "1", "2"], ["0", "1", "2"], ["0", "1", "2"], ["0", "1", "2"], ["0", "1", "2"], ["0", "1", "2"], ["0", "1", "2"], ["0", "1", "2"], ["0", "1", "2"], ["0", "1", "2"], ["0", "1", "2"], ["0", "1", "2"], ["0", "1", "2"], ["0", "1", "2"], ["0", "1", "2"], ["0", "1", "2"], ["0", "1", "2"], ["0", "1", "2"], ["0", "1", "2"], ["0", "1", "2"], ["0", "1", "2"], ["0", "1", "2"], ["0", "1", "2"], ["0", "1", "2"], ["0", "1", "2"], ["0", "1", "2"], ["0", "1", "2"], ["0", "1", "2"], ["0", "1", "2"], ["0", "1", "2"], ["0", "1", "2"], ["0", "1", "2"], ["0", "1", "2"], ["0", "1", "2"], ["0", "1", "2"], ["0", "1", "2"], ["0", "1", "2"], ["0", "1", "2"], ["0", "1", "2"], ["0", "1", "2"], ["0", "1", "2"], ["0", "1", "2"], ["0", "1", "2"], ["0", "1", "2"], ["0", "1", "2"], ["0", "1", "2"], ["0", "1", "2"], ["0", "1", "2"], ["0", "1", "2"], ["0", "1", "2"], ["0", "1", "2"], ["0", "1", "2"], ["0", "1", "2"], ["0", "1", "2"], ["0", "1", "2"], ["0", "1", "2"], ["0", "1", "2"], ["0", "1", "2"], ["0", "1", "2"], ["0", "1", "2"], ["0", "1", "2"], ["0", "1", "2"], ["0", "1", "2"], ["0", "1", "2"], ["0", "1", "2"], ["0", "1", "2"], ["0", "1", "2"], ["0", "1", "2"], ["0", "1", "2"], ["0", "1", "2"], ["0", "1", "2"], ["0", "1", "2"], ["0", "1", "2"], ["0", "1", "2"], ["0", "1", "2"], ["0", "1", "2"], ["0", "1", "2"], ["0", "1", "2"], ["0", "1", "2"], ["0", "1", "2"], ["0", "1", "2"], ["0", "1", "2"], ["0", "1", "2"], ["0", "1", "2"], ["0", "1", "2"], ["0", "1", "2"], ["0", "1", "2"], ["0", "1", "2"], ["0", "1", "2"], ["0", "1", "2"], ["0", "1", "2"], ["0", "1", "2"], ["0", "1", "2"], ["0", "1", "2"], ["0", "1", "2"], ["0", "1", "2"], ["0", "1", "2"], ["0", "1", "2"], ["0", "1", "2"], ["0", "1", "2"], ["0", "1", "2"], ["0", "1", "2"], ["0", "1", "2"], ["0", "1", "2"], ["0", "1", "2"], ["0", "1", "2"], ["0", "1", "2"], ["0", "1", "2"], ["0", "1", "2"], ["0", "1", "2"], ["0", "1", "2"], ["0", "1", "2"], ["0", "1", "2"], ["0", "1", "2"], ["0", "1", "2"], ["0", "1", "2"], ["0", "1", "2"], ["0", "1", "2"], ["0", "1", "2"], ["0", "1", "2"], ["0", "1", "2"], ["0", "1", "2"], ["0", "1", "2"], ["0", "1", "2"], ["0", "1", "2"], ["0", "1", "2"], ["0", "1", "2"], ["0", "1", "2"], ["0", "1", "2"], ["0", "1", "2"], ["0", "1", "2"], ["0", "1", "2"], ["0", "1", "2"], ["0", "1", "2"]] diff --git a/src/boost_functions.cpp b/src/boost_functions.cpp new file mode 100644 index 0000000..3c140fe --- /dev/null +++ b/src/boost_functions.cpp @@ -0,0 +1,158 @@ +/* boost_functions.cpp + * + * Copyright (C) 2018 Tyler Burdsall + * + * This program is free software: you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation, either version 3 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program. If not, see . + */ + +#ifndef BOOST_FUNCTIONS +#define BOOST_FUNCTIONS + +#include "combigen.h" + +// Forward declare functions from cli_functions.h +const void output_result(const vector &result, const generation_args &args, const bool &for_optimization); +const void display_help(void); +const void display_csv_keys(const vector &keys, const string &delim); + +const void generate_random_samples_performance_mode(const generation_args &args) +{ + const vector> results = lazy_cartesian_product::boost_generate_samples(args.pc.combinations, args.sample_size); + if (!args.display_json) + { + if (args.display_keys) + { + display_csv_keys(args.pc.keys, args.delim); + } + } + else + { + cout << "[\n"; + } + for( const vector &row: results) + { + output_result(row, args, true); + if (args.display_json && &row != &results.back()) + { + cout << ","; + } + } + if (args.display_json) + { + cout << "]\n"; + } +} + +const void parse_args(const generation_args &args) +{ + const uint1024_t max_size = lazy_cartesian_product::boost_compute_max_size(args.pc.combinations); + if (args.generate_all_combinations) + { + generate_all(max_size, args); + exit(0); + } + else + { + const uint1024_t sample_size(args.sample_size); + if (sample_size == 0 && args.entry_at_provided && !args.generate_all_combinations) + { + const uint1024_t entry_at(args.entry_at); + vector result = lazy_cartesian_product::boost_entry_at(args.pc.combinations, args.entry_at); + output_result(result, args, false); + exit(0); + } + else if (sample_size >= 0) + { + const uint1024_t n(args.sample_size); + if (n > max_size) + { + cerr << "ERROR: Sample size cannot be greater than maximum possible combinations\n"; + exit(-1); + } + if (args.perf_mode) + { + generate_random_samples_performance_mode(args); + } + else + { + vector range = lazy_cartesian_product::boost_generate_random_indices(args.sample_size, max_size); + generate_random_samples(range, args); + } + exit(0); + } + else + { + display_help(); + exit(-1); + } + } +} + +const void generate_random_samples(const vector &range, const generation_args &args) +{ + if (!args.display_json) + { + if (args.display_keys) + { + display_csv_keys(args.pc.keys, args.delim); + } + } + else + { + cout << "[\n"; + } + for (const uint1024_t &i: range) + { + vector result = lazy_cartesian_product::boost_entry_at(args.pc.combinations, i.convert_to()); + output_result(result, args, true); + if (args.display_json && &i != &range.back()) + { + cout << ","; + } + } + if (args.display_json) + { + cout << "]\n"; + } +} + +const void generate_all(const uint1024_t &max_size, const generation_args &args) +{ + if (!args.display_json) + { + if (args.display_keys) + { + display_csv_keys(args.pc.keys, args.delim); + } + } + else + { + cout << "[\n"; + } + const uint1024_t last = max_size - 1; + for (uint1024_t i = 0; i < max_size; ++i) + { + vector result = lazy_cartesian_product::boost_entry_at(args.pc.combinations, i.convert_to()); + output_result(result, args, true); + if (args.display_json && i != last) + { + cout << ","; + } + } + if (args.display_json) + { + cout << "]\n"; + } +} +#endif diff --git a/src/cli_functions.cpp b/src/cli_functions.cpp new file mode 100644 index 0000000..75e3e76 --- /dev/null +++ b/src/cli_functions.cpp @@ -0,0 +1,179 @@ +/* cli_functions.cpp + * + * Copyright (C) 2018 Tyler Burdsall + * + * This program is free software: you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation, either version 3 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program. If not, see . + */ + +#ifndef CLI_FUNCTIONS_CPP +#define CLI_FUNCTIONS_CPP + +#include "cli_functions.h" + +const void display_help(void) +{ + cout << "Usage: combigen [options]" << "\n" + << " -h Displays this help message" << "\n\n" + << " -a Generates every possible combination, restricted to memory mode." << "\n" + << " (Note: this should be used with caution when storing to disk)" << "\n\n" + << " -n Generate combination at nth index" << "\n\n" + << " -i Take the given .json file as input. Otherwise, input will come" << "\n" + << " from stdin." << "\n" + << " Example: \"{ \"foo\": [ \"a\", \"b\", \"c\" ], \"bar\": [ \"1\", \"2\" ] }\"" << "\n" + << " Or: \"[ [\"1\", \"2\"], [\"3\", \"4\", \"a\", \"b\"] ]\"" << "\n\n" + << " -t Output type (csv or json). Defaults to csv" << "\n\n" + << " -r Generate a random sample of size r from" << "\n" + << " the possible set of combinations" << "\n\n" + << " -d Set the delimiter when displaying combinations (default is ',')" << "\n\n" + << " -k Display the keys on the first line of output (for .csv)" << "\n\n" + << " -p Use performance mode to generate combinations faster at the" << "\n" + << " expense of higher RAM usage." << "\n" + << " (Note: this is only recommended for computers with large amounts" << "\n" + << " of RAM when generating a large number of random combinations)" << "\n\n" + << " -v Display version number" << "\n"; +} + + +const void display_csv_keys(const vector &keys, const string &delim) +{ + for (auto& s: keys) + { + if (&s == &keys.back()) + { + cout << s; + } + else + { + cout << s << delim; + } + } + cout << '\n'; +} + +const void output_result(const vector &result, const generation_args &args, const bool &for_optimization) +{ + if (!args.display_json) + { + if (args.display_keys && !for_optimization) + { + display_csv_keys(args.pc.keys, args.delim); + } + for (auto &s: result) + { + if (&s == &result.back()) + { + cout << s; + } + else + { + cout << s << args.delim; + } + } + cout << "\n"; + } + else + { + const unsigned long long key_size = args.pc.keys.size(); + if (!for_optimization) + { + cout << "[\n"; + } + json entry; + for (unsigned long long j = 0; j < key_size; ++j) + { + entry[args.pc.keys[j]] = result[j]; + } + cout << entry.dump(4); + if (!for_optimization) + { + cout << "]\n"; + } + } +} + +const possible_combinations parse_file(const string &input) +{ + possible_combinations pc; + try + { + ifstream i(input); + json json_file; + i >> json_file; + + if (json_file.type() == json::value_t::array) + { + for (json::iterator arr = json_file.begin(); arr != json_file.end(); ++arr) + { + pc.combinations.push_back(arr.value()); + } + } + else + { + for (auto obj = json_file.begin(); obj != json_file.end(); ++obj) + { + pc.keys.push_back(obj.key()); + pc.combinations.push_back(obj.value()); + } + } + } + catch (const nlohmann::detail::parse_error&) + { + cerr << "ERROR: Couldn't parse the given file, please ensure the file is in valid .json format and is accessible." << '\n'; + exit(-1); + } + catch (const nlohmann::detail::type_error&) + { + cerr << "ERROR: All values in input must be an array containing strings" << '\n'; + exit(-1); + } + return pc; +} + +const possible_combinations parse_stdin(const string &input) +{ + possible_combinations pc; + try + { + auto parsed = json::parse(input); + if (parsed.type() == json::value_t::array) + { + for (json::iterator arr = parsed.begin(); arr != parsed.end(); ++arr) + { + pc.combinations.push_back(arr.value()); + } + } + else + { + for (auto obj = parsed.begin(); obj != parsed.end(); ++obj) + { + pc.keys.push_back(obj.key()); + pc.combinations.push_back(obj.value()); + } + } + + } + catch (const nlohmann::detail::type_error&) + { + cerr << "ERROR: All values in input must be an array containing strings" << '\n'; + exit(-1); + } + catch (const nlohmann::detail::parse_error&) + { + cerr << "ERROR: Unable to parse the given input, please ensure a valid .json input has been provided" << '\n'; + exit(-1); + } + return pc; +} + +#endif diff --git a/src/cli_functions.h b/src/cli_functions.h new file mode 100644 index 0000000..e01569a --- /dev/null +++ b/src/cli_functions.h @@ -0,0 +1,29 @@ +/* cli_functions.h + * + * Copyright (C) 2018 Tyler Burdsall + * + * This program is free software: you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation, either version 3 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program. If not, see . + */ + +#ifndef CLI_FUNCTIONS_H +#define CLI_FUNCTIONS_H + +#include "combigen.h" + +const void display_csv_keys(const vector &keys, const string &delim); +const void display_help(void); +const void output_result(const vector &result, const generation_args &args, const bool &for_optimization); +const possible_combinations parse_file(const string &input); +const possible_combinations parse_stdin(const string &input); +#endif diff --git a/src/combigen.cpp b/src/combigen.cpp index 2223ee4..11cb672 100644 --- a/src/combigen.cpp +++ b/src/combigen.cpp @@ -1,231 +1,31 @@ -#include "combigen.h" - -int main(int argc, char* argv[]) -{ - int c; - bool args_provided = false; - generation_args args; - - - while ( (c = getopt(argc, argv, "han:i:t:r:d:kvp")) != -1) - { - switch (c) - { - case 'h': - display_help(); - exit(0); - case 'a': - args_provided = true; - args.generate_all_combinations = true; - break; - case 'n': - if (optarg) - { - istringstream iss (optarg); - string s = optarg; - if (s.at(0) == '-') - { - display_help(); - exit(-1); - } - iss >> args.entry_at; - args.entry_at_provided = true; - args_provided = true; - } - break; - case 'i': - if (optarg) - { - args.input = optarg; - args_provided = true; - } - break; - case 't': - if (optarg) - { - string s = optarg; - if (s == "json") - { - args.display_json = true; - } - else if (s != "csv") - { - display_help(); - exit(-1); - } - } - break; - case 'r': - if (optarg) - { - istringstream iss (optarg); - iss >> args.sample_size; - args_provided = true; - } - break; - case 'd': - if (optarg) - { - args.delim = optarg; - } - break; - case 'k': - args_provided = true; - break; - case 'v': - cout << "combigen - v" << COMBIGEN_MAJOR_VERSION << '.' << COMBIGEN_MINOR_VERSION << '.' << COMBIGEN_REVISION_VERSION << '\n'; - exit(0); - case 'p': - args.perf_mode = true; - break; - default: - display_help(); - exit(-1); - } - } - if (!args_provided) - { - display_help(); - exit(0); - } - if (args.input.empty()) - { - istreambuf_iterator begin(cin), end; - string input_stream(begin, end); - args.input = input_stream; - args.pc = parse_stdin(args.input); - } - else - { - args.pc = parse_file(args.input); - } - - try - { - parse_args(args); - } - catch (const lazycp::errors::index_error&) - { - cerr << "ERROR: the given index cannot be out of range\n"; - } - catch (const lazycp::errors::empty_list_error&) - { - cerr << "ERROR: an empty list cannot be a value for a key\n"; - } - catch (const lazycp::errors::empty_answers_error&) - { - cerr << "ERROR: an empty list cannot be a value for a key\n"; - } - catch (const lazycp::errors::invalid_sample_size_error&) - { - cerr << "ERROR: the given sample size cannot be out of range\n"; - } - catch (...) - { - cerr << "ERROR: an unknown error occurred\n"; - } - return 0; -} +/* combigen.cpp + * + * Copyright (C) 2018 Tyler Burdsall + * + * This program is free software: you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation, either version 3 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program. If not, see . + */ -static const void display_csv_keys(const vector &keys, const string &delim) -{ - for (auto& s: keys) - { - if (&s == &keys.back()) - { - cout << s; - } - else - { - cout << s << delim; - } - } - cout << '\n'; -} +#ifndef COMBIGEN_CPP +#define COMBIGEN_CPP -static const void display_help(void) -{ - cout << "Usage: combigen [options]" << "\n" - << " -h Displays this help message" << "\n\n" - << " -a Generates every possible combination, restricted to memory mode." << "\n" - << " (Note: this should be used with caution when storing to disk)" << "\n\n" - << " -n Generate combination at nth index" << "\n\n" - << " -i Take the given .json file as input. Otherwise, input will come" << "\n" - << " from stdin." << "\n" - << " Example: \"{ \"foo\": [ \"a\", \"b\", \"c\" ], \"bar\": [ \"1\", \"2\" ] }\"" << "\n\n" - << " -t Output type (csv or json). Defaults to csv" << "\n\n" - << " -r Generate a random sample of size r from" << "\n" - << " the possible set of combinations" << "\n\n" - << " -d Set the delimiter when displaying combinations (default is ',')" << "\n\n" - << " -k Display the keys on the first line of output (for .csv)" << "\n\n" - << " -p Use performance mode to generate combinations faster at the" << "\n" - << " expense of higher RAM usage." << "\n" - << " (Note: this is only recommended for computers with large amounts" << "\n" - << " of RAM when generating a large number of random combinations)" << "\n\n" - << " -v Display version number" << "\n"; -} - -static const void generate_all(const unsigned long long &max_size, const generation_args &args) -{ - if (!args.display_json) - { - if (args.display_keys) - { - display_csv_keys(args.pc.keys, args.delim); - } - } - else - { - cout << "[\n"; - } - const unsigned long long last = max_size - 1; - for (unsigned long long i = 0; i < max_size; ++i) - { - vector result = lazy_cartesian_product::entry_at(args.pc.combinations, i); - output_result(result, args, true); - if (args.display_json && i != last) - { - cout << ","; - } - } - if (args.display_json) - { - cout << "]\n"; - } -} - -static const void generate_random_samples(const vector &range, const generation_args &args) -{ - if (!args.display_json) - { - if (args.display_keys) - { - display_csv_keys(args.pc.keys, args.delim); - } - } - else - { - cout << "[\n"; - } - for (const unsigned long long &i: range) - { - vector result = lazy_cartesian_product::entry_at(args.pc.combinations, i); - output_result(result, args, true); - if (args.display_json && &i != &range.back()) - { - cout << ","; - } - } - if (args.display_json) - { - cout << "]\n"; - } -} +#include "combigen.h" +#include "cli_functions.h" -static const void generate_random_samples_performance_mode(const generation_args &args) +const void generate_random_samples_performance_mode(const generation_args &args) { - const vector> results = lazy_cartesian_product::generate_samples(args.pc.combinations, args.sample_size); - + unsigned long long sample_size = stoull(args.sample_size, 0, 10); + const vector> results = lazy_cartesian_product::generate_samples(args.pc.combinations, sample_size); if (!args.display_json) { if (args.display_keys) @@ -251,50 +51,10 @@ static const void generate_random_samples_performance_mode(const generation_args } } -static const void output_result(const vector &result, const generation_args &args, const bool &for_optimization) -{ - if (!args.display_json) - { - if (args.display_keys && !for_optimization) - { - display_csv_keys(args.pc.keys, args.delim); - } - for (auto &s: result) - { - if (&s == &result.back()) - { - cout << s; - } - else - { - cout << s << args.delim; - } - } - cout << "\n"; - } - else - { - const unsigned long long key_size = args.pc.keys.size(); - if (!for_optimization) - { - cout << "[\n"; - } - json entry; - for (unsigned long long j = 0; j < key_size; ++j) - { - entry[args.pc.keys[j]] = result[j]; - } - cout << entry.dump(4); - if (!for_optimization) - { - cout << "]\n"; - } - } -} -static const void parse_args(const generation_args &args) +const void parse_args(const generation_args &args) { - unsigned long long max_size = lazy_cartesian_product::compute_max_size(args.pc.combinations); + const unsigned long long max_size = lazy_cartesian_product::compute_max_size(args.pc.combinations); if (args.generate_all_combinations) { generate_all(max_size, args); @@ -302,15 +62,17 @@ static const void parse_args(const generation_args &args) } else { - if (args.sample_size == 0 && args.entry_at_provided && !args.generate_all_combinations) + const unsigned long long sample_size = stoull(args.sample_size, 0, 10); + if (sample_size == 0 && args.entry_at_provided && !args.generate_all_combinations) { - vector result = lazy_cartesian_product::entry_at(args.pc.combinations, args.entry_at); + const unsigned long long entry_at = stoull(args.entry_at, 0, 10); + vector result = lazy_cartesian_product::entry_at(args.pc.combinations, entry_at); output_result(result, args, false); exit(0); } - else if (args.sample_size >= 0) + else if (sample_size >= 0) { - const unsigned long long n = args.sample_size; + const unsigned long long n = stoull(args.sample_size, 0, 10); if (n > max_size) { cerr << "ERROR: Sample size cannot be greater than maximum possible combinations\n"; @@ -335,58 +97,61 @@ static const void parse_args(const generation_args &args) } } -static const possible_combinations parse_file(const string &input) + +const void generate_all(const unsigned long long &max_size, const generation_args &args) { - possible_combinations pc; - try + if (!args.display_json) { - ifstream i(input); - json json_file; - i >> json_file; - - for (auto obj = json_file.begin(); obj != json_file.end(); ++obj) + if (args.display_keys) { - pc.keys.push_back(obj.key()); - vector vals = json_file[obj.key()]; - pc.combinations.push_back(vals); + display_csv_keys(args.pc.keys, args.delim); } } - catch (const nlohmann::detail::parse_error&) + else { - cerr << "ERROR: Couldn't parse the given file, please ensure the file is in valid .json format and is accessible." << '\n'; - exit(-1); + cout << "[\n"; } - catch (const nlohmann::detail::type_error&) + const unsigned long long last = max_size - 1; + for (unsigned long long i = 0; i < max_size; ++i) { - cerr << "ERROR: All values in input must be an array containing strings" << '\n'; - exit(-1); + vector result = lazy_cartesian_product::entry_at(args.pc.combinations, i); + output_result(result, args, true); + if (args.display_json && i != last) + { + cout << ","; + } + } + if (args.display_json) + { + cout << "]\n"; } - return pc; } -static const possible_combinations parse_stdin(const string &input) +const void generate_random_samples(const vector &range, const generation_args &args) { - possible_combinations pc; - try + if (!args.display_json) { - auto parsed = json::parse(input); - for (auto obj = parsed.begin(); obj != parsed.end(); ++obj) + if (args.display_keys) { - pc.keys.push_back(obj.key()); - vector vals = parsed[obj.key()]; - pc.combinations.push_back(vals); + display_csv_keys(args.pc.keys, args.delim); } } - catch (const nlohmann::detail::type_error&) + else { - cerr << "ERROR: All values in input must be an array containing strings" << '\n'; - exit(-1); + cout << "[\n"; } - catch (const nlohmann::detail::parse_error&) + for (const unsigned long long &i: range) { - cerr << "ERROR: Unable to parse the given input, please ensure a valid .json input has been provided" << '\n'; - exit(-1); + vector result = lazy_cartesian_product::entry_at(args.pc.combinations, i); + output_result(result, args, true); + if (args.display_json && &i != &range.back()) + { + cout << ","; + } + } + if (args.display_json) + { + cout << "]\n"; } - return pc; } - +#endif diff --git a/src/combigen.h b/src/combigen.h index f1cf6d6..f3e936d 100644 --- a/src/combigen.h +++ b/src/combigen.h @@ -1,38 +1,49 @@ /* combigen.h - * - * (c) Tyler Burdsall - 2018 + * + * Copyright (C) 2018 Tyler Burdsall + * + * This program is free software: you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation, either version 3 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program. If not, see . */ -#ifndef COMBIGEN -#define COMBIGEN -#define COMBIGEN_MAJOR_VERSION 1 -#define COMBIGEN_MINOR_VERSION 2 -#define COMBIGEN_REVISION_VERSION 2 +#ifndef COMBIGEN_H +#define COMBIGEN_H #include #include #include -#include #include #include #include - -#if defined(WIN32) || defined(_WIN32) || defined(__WIN32) && !defined(__CYGWIN__) -#include "lib/win-getopt/getopt.h" -#else -#include -#endif - #include #include #include "lib/nlohmann/json/single_include/nlohmann/json.hpp" #include "lib/iamtheburd/lazy-cartesian-product/lazy-cartesian-product.hpp" +#ifdef USE_BOOST +#include +#include +using boost::container::vector; +using namespace boost::multiprecision; +#else +#include +using std::vector; +using std::stoull; +#endif using std::cout; using std::cin; using std::cerr; -using std::vector; using std::string; using std::ifstream; using std::istringstream; @@ -53,8 +64,8 @@ struct generation_args possible_combinations pc; string input; string delim = ","; - unsigned long long entry_at = 0; - unsigned long long sample_size = 0; + string entry_at = "0"; + string sample_size = "0"; bool generate_all_combinations = false; bool display_keys = false; bool display_json = false; @@ -62,14 +73,14 @@ struct generation_args bool entry_at_provided = false; }; -static const void display_csv_keys(const vector &keys, const string &delim); -static const void display_help(void); -static const void generate_all(const unsigned long long &max_size, const generation_args &args); -static const void generate_random_samples(const vector &range, const generation_args &args); -static const void generate_random_samples_performance_mode(const generation_args &args); -static const void output_result(const vector &result, const generation_args &args, const bool &for_optimization); -static const void parse_args(const generation_args &args); -static const possible_combinations parse_file(const string &input); -static const possible_combinations parse_stdin(const string &input); +#ifdef USE_BOOST +const void generate_all(const uint1024_t &max_size, const generation_args &args); +const void generate_random_samples(const vector &range, const generation_args &args); +#else +const void generate_all(const unsigned long long &max_size, const generation_args &args); +const void generate_random_samples(const vector &range, const generation_args &args); +#endif +const void generate_random_samples_performance_mode(const generation_args &args); +const void parse_args(const generation_args &args); #endif diff --git a/src/lib/iamtheburd/lazy-cartesian-product b/src/lib/iamtheburd/lazy-cartesian-product index 7dfae2d..7bf2344 160000 --- a/src/lib/iamtheburd/lazy-cartesian-product +++ b/src/lib/iamtheburd/lazy-cartesian-product @@ -1 +1 @@ -Subproject commit 7dfae2dd583fcc98412bcebe2a373ebf7dd8d2d2 +Subproject commit 7bf2344484822b038f18852a068ec2d2e496c39c diff --git a/src/main.cpp b/src/main.cpp new file mode 100644 index 0000000..0cb7e58 --- /dev/null +++ b/src/main.cpp @@ -0,0 +1,159 @@ +/* main.cpp + * + * Copyright (C) 2018 Tyler Burdsall + * + * This program is free software: you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation, either version 3 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program. If not, see . + */ + +#ifndef MAIN +#define MAIN + +#define COMBIGEN_MAJOR_VERSION 1 +#define COMBIGEN_MINOR_VERSION 3 +#define COMBIGEN_REVISION_VERSION 0 + +#if defined(WIN32) || defined(_WIN32) || defined(__WIN32) && !defined(__CYGWIN__) +#include "lib/win-getopt/getopt.h" +#else +#include +#endif + +#include "combigen.h" +#include "cli_functions.h" + +int main(int argc, char* argv[]) +{ + int c; + bool args_provided = false; + generation_args args; + while ( (c = getopt(argc, argv, "han:i:t:r:d:kvp")) != -1) + { + switch (c) + { + case 'h': + display_help(); + exit(0); + case 'a': + args_provided = true; + args.generate_all_combinations = true; + break; + case 'n': + if (optarg) + { + istringstream iss (optarg); + string s = optarg; + if (s.at(0) == '-') + { + display_help(); + exit(-1); + } + iss >> args.entry_at; + args.entry_at_provided = true; + args_provided = true; + } + break; + case 'i': + if (optarg) + { + args.input = optarg; + args_provided = true; + } + break; + case 't': + if (optarg) + { + string s = optarg; + if (s == "json") + { + args.display_json = true; + } + else if (s != "csv") + { + display_help(); + exit(-1); + } + } + break; + case 'r': + if (optarg) + { + istringstream iss (optarg); + iss >> args.sample_size; + args_provided = true; + } + break; + case 'd': + if (optarg) + { + args.delim = optarg; + } + break; + case 'k': + args_provided = true; + break; + case 'v': + cout << "combigen - v" << COMBIGEN_MAJOR_VERSION << '.' << COMBIGEN_MINOR_VERSION << '.' << COMBIGEN_REVISION_VERSION << '\n'; + exit(0); + case 'p': + args.perf_mode = true; + break; + default: + display_help(); + exit(-1); + } + } + if (!args_provided) + { + display_help(); + exit(0); + } + if (args.input.empty()) + { + istreambuf_iterator begin(cin), end; + string input_stream(begin, end); + args.input = input_stream; + args.pc = parse_stdin(args.input); + } + else + { + args.pc = parse_file(args.input); + } + + try + { + parse_args(args); + } + catch (const lazycp::errors::index_error&) + { + cerr << "ERROR: the given index cannot be out of range\n"; + } + catch (const lazycp::errors::empty_list_error&) + { + cerr << "ERROR: an empty list cannot be a value for a key\n"; + } + catch (const lazycp::errors::empty_answers_error&) + { + cerr << "ERROR: an empty list cannot be a value for a key\n"; + } + catch (const lazycp::errors::invalid_sample_size_error&) + { + cerr << "ERROR: the given sample size cannot be out of range\n"; + } + catch (...) + { + cerr << "ERROR: an unknown error occurred\n"; + } + return 0; +} +#endif