Skip to content

Commit

Permalink
Merge pull request #9 from iamtheburd/include-boost
Browse files Browse the repository at this point in the history
v1.3.0 - Add support for larger sets of data with Boost
  • Loading branch information
Tyler Burdsall authored Nov 29, 2018
2 parents 38842d0 + 3bdbd4f commit 734fc4a
Show file tree
Hide file tree
Showing 14 changed files with 726 additions and 354 deletions.
6 changes: 3 additions & 3 deletions .gitignore
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
build/*.o
build/*/*.o
*.obj
combigen
combigen.exe
combigen.obj
*.txt
*.txt
33 changes: 25 additions & 8 deletions Makefile
Original file line number Diff line number Diff line change
@@ -1,19 +1,36 @@
CXX = g++
CXXFLAGS = -Wall -O2 -std=c++11
CXXFLAGS = -Wall -O2 -std=c++14
LIBFLAGS =
BOOSTFLAGS = -DUSE_BOOST
PREFIX = /usr/local
COMBIGEN_DIR = ./src
COMBIGENDIR = ./src
COMBIGENFILE = combigen.cpp
BUILDDIR = release

all: combigen
all: main

combigen: combigen.o
$(CXX) $(CXXFLAGS) build/combigen.o -o combigen
main: cli_functions.o combigen.o main.o
$(CXX) $(CXXFLAGS) build/$(BUILDDIR)/main.o build/$(BUILDIR)/combigen.o build/$(BUILDDIR)/cli_functions.o -o combigen $(LIBFLAGS)

combigen.o: $(COMBIGEN_DIR)/combigen.cpp $(COMBIGEN_DIR)/combigen.h
$(CXX) $(CXXFLAGS) $(COMBIGEN_DIR)/combigen.cpp -c -o build/combigen.o
main.o: $(COMBIGENDIR)/main.cpp
$(CXX) $(CXXFLAGS) $(COMBIGENDIR)/main.cpp -c -o build/$(BUILDDIR)/main.o

combigen.o: $(COMBIGENDIR)/combigen.cpp $(COMBIGENDIR)/combigen.h
$(CXX) $(CXXFLAGS) $(COMBIGENDIR)/$(COMBIGENFILE) -c -o build/$(BUILDDIR)/combigen.o

cli_functions.o: $(COMBIGENDIR)/cli_functions.cpp $(COMBIGENDIR)/combigen.h
$(CXX) $(CXXFLAGS) $(COMBIGENDIR)/cli_functions.cpp -c -o build/$(BUILDDIR)/cli_functions.o

.PHONY: perf
perf: CXXFLAGS += $(BOOSTFLAGS)
perf: LIBFLAGS += -lboost_random
perf: COMBIGENFILE = boost_functions.cpp
perf: BUILDDIR = perf
perf: main

.PHONY: clean
clean:
@rm -f build/*.o combigen
@rm -f build/*/*.o combigen


.PHONY: install
Expand Down
72 changes: 62 additions & 10 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,7 @@
An efficient CLI tool to generate possible combinations written in C++

## Introduction
Combigen aims to assist with data generation and exploration. Given a `.json` input where each key contains an array of string values, combigen can either generate every possible combination or a random subset of the possible combinations. It aims to be memory-efficient while maintaining high-performance. This can be especially useful when large amounts of data are needed for statistical analysis or mock data in an application.
Combigen aims to assist with data generation and exploration. Given a `.json` input where each key contains an array of string values (or simply an array of string arrays), combigen can either generate every possible combination or a random, evenly-distributed subset of the possible combinations. It aims to be memory-efficient while maintaining high-performance. This can be especially useful when large amounts of data are needed for statistical analysis or mock data in an application.

It supports output as `.csv` and `.json`.

Expand Down Expand Up @@ -41,8 +41,38 @@ Usage: combigen [options]
-v Display version number
```

## Installation
## Prerequisites
### Linux/UNIX/Cygwin
**Required:**
* make
* g++ (capable of compiling to the C++14 standard or higher)

**Optional:**
* [Boost](https://www.boost.org), in case you are working with large sets of data

If you need to install Boost, I recommend utilizing your distro's package manager:

#### Debian/Ubuntu
`$ sudo apt install libboost-all-dev`

#### Fedora
`$ sudo dnf install boost`

#### Arch/Manjaro/Antergos
`$ sudo pacman -Sy boost`


### Windows
**Required:**
* Visual Studio 2015 or higher

**Optional:**
* [Boost](https://www.boost.org), in case you are working with large sets of data.
I recommend downloaded the precompiled libraries and placing them somewhere easy to remember on your machine.


## Building From Source and Installing
Note: for Windows, if you do not want to/don't have the ability to compile from the source files you can go to the [Release](https://github.com/iamtheburd/combigen/releases) page and directly download the `combigen.exe` binary from there. This also has the added of benefit of being compiled with the Boost libraries already.

### Linux/UNIX

Expand All @@ -58,6 +88,12 @@ $ git clone --recurse-submodules -j8 https://github.com/iamtheburd/combigen.git
$ make
```

If you need support for larger sets of data (and have Boost installed), instead build with `make perf`:

```
$ make perf
```

3. Install:

```
Expand All @@ -66,10 +102,9 @@ $ sudo make install

### Windows

1. Download Visual Studio 2015+ and install.

1. Download Visual Studio and install first

2. Clone the repository to some directory
2. Clone the repository to some directory using the above command

3. Open up the Developer Command Prompt (can usually be found by searching in the Start menu)

Expand All @@ -78,16 +113,21 @@ $ sudo make install
5. Build the file:

```
> cl src\combigen.cpp /EHsc /O2
> cl /EHsc /O2 src\cli_functions.cpp src\combigen.cpp src\main.cpp /Fe".\combigen.exe"
```

Alternatively, if you need support for large rsets of data (and have Boost installed somewhere on your machine), run this command instead. Ensure you fill in the proper path to your Boost directory (this example assumes Boost 1.68.0 installed):

```
> cl /EHsc /DUSE_BOOST /O2 /I C:\path\to\boost_1_68_0 src\cli_functions.cpp src\boost_functions.cpp src\main.cpp /Fe".\combigen.exe" /link /LIBPATH:C:\path\to\boost_1_68_0\lib64-msvc-14.1
```

6. Place the resulting `combigen.exe` wherever you desire

Alternatively, you can also check out the [Releases](https://github.com/iamtheburd/combigen/releases) tab and directly download the `combigen.exe` from there.

## Usage

Using the example `.json` data provided, here are some examples showcasing some features:
Using the example `combinations.json` data provided, here are some examples showcasing some features:

### Input

Expand Down Expand Up @@ -116,6 +156,16 @@ $ combigen -i example_data/combinations.json -r 50000 > output.txt # Generate 5
# and store them in output.txt
```

### Large Sets of Data

To demonstrate how `combigen` can even work with large sets of data (when compiled with the Boost library) we can use the example `large_bits.json` file. Unlike the above example data, this file only contains an array of string arrays. In this set of data, the maximum size is equivalent to 3 ^ 256. We can still find the last entry (max size - 1):

```
$ combigen -i example_data/large_bits.json -n 139008452377144732764939786789661303114218850808529137991604824430036072629766435941001769154109609521811665540548899435520
2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2
$
```


### Types

Expand All @@ -140,8 +190,8 @@ $
You can also change the delimiter with the `-d` flag:

```
$ combigen -i example_data/combinations.json -r 3 -k -d "|" # Generate 3 random combinations, display the keys,
# and set the delimiter to ||
$ combigen -i example_data/combinations.json -r 3 -k -d "||" # Generate 3 random combinations, display the keys,
# and set the delimiter to ||
Age||First Name||Last Name||Number of Children||Number of Pets||Primary Desktop OS||Primary Mobile Phone OS||Residence||State/Territory
20||Samantha||Harris||3||4||Windows||Other||RV||GA
25||Matthew||Thomas||2||0||Windows||Other||Town Home||IL
Expand Down Expand Up @@ -267,6 +317,8 @@ Combigen uses the following open-source libraries:

* [skandhurkat/Getopt-for-Visual-Studio](https://github.com/skandhurkat/Getopt-for-Visual-Studio) - Port of the MinGW version of `getopt.h` so that the CLI works on Windows

* [Boost](https://www.boost.org) - For operating with incredibly large sets of data that push the limits of an `unsigned long long`.


## Contributing
Pull-requests are always welcome
Expand Down
Empty file added build/perf/.gitkeep
Empty file.
Empty file added build/release/.gitkeep
Empty file.
3 changes: 2 additions & 1 deletion doc/combigen.1
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
.\" Manpage for combigen
.\" Send an email to tylerburdsall@gmail.com for questions or concerns regarding this man page
.TH man 1 "05 Jun 2018" "1.2.2" "combigen man page"
.TH man 1 "28 Nov 2018" "1.3.0" "combigen man page"
.SH NAME
combigen \- efficiently generate combinations
.SH SYNOPSIS
Expand All @@ -19,6 +19,7 @@ Usage: combigen [options]
-i <input> Take the given .json file as input. Otherwise, input will come
from stdin.
Example: "{ "foo": [ "a", "b", "c" ], "bar": [ "1", "2" ] }"
Or: "[ ["1", "2"], ["3", "4", "a", "b"] ]"

-t <type> Output type (csv or json). Defaults to csv

Expand Down
1 change: 1 addition & 0 deletions example_data/large_bits.json
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
[["0", "1", "2"], ["0", "1", "2"], ["0", "1", "2"], ["0", "1", "2"], ["0", "1", "2"], ["0", "1", "2"], ["0", "1", "2"], ["0", "1", "2"], ["0", "1", "2"], ["0", "1", "2"], ["0", "1", "2"], ["0", "1", "2"], ["0", "1", "2"], ["0", "1", "2"], ["0", "1", "2"], ["0", "1", "2"], ["0", "1", "2"], ["0", "1", "2"], ["0", "1", "2"], ["0", "1", "2"], ["0", "1", "2"], ["0", "1", "2"], ["0", "1", "2"], ["0", "1", "2"], ["0", "1", "2"], ["0", "1", "2"], ["0", "1", "2"], ["0", "1", "2"], ["0", "1", "2"], ["0", "1", "2"], ["0", "1", "2"], ["0", "1", "2"], ["0", "1", "2"], ["0", "1", "2"], ["0", "1", "2"], ["0", "1", "2"], ["0", "1", "2"], ["0", "1", "2"], ["0", "1", "2"], ["0", "1", "2"], ["0", "1", "2"], ["0", "1", "2"], ["0", "1", "2"], ["0", "1", "2"], ["0", "1", "2"], ["0", "1", "2"], ["0", "1", "2"], ["0", "1", "2"], ["0", "1", "2"], ["0", "1", "2"], ["0", "1", "2"], ["0", "1", "2"], ["0", "1", "2"], ["0", "1", "2"], ["0", "1", "2"], ["0", "1", "2"], ["0", "1", "2"], ["0", "1", "2"], ["0", "1", "2"], ["0", "1", "2"], ["0", "1", "2"], ["0", "1", "2"], ["0", "1", "2"], ["0", "1", "2"], ["0", "1", "2"], ["0", "1", "2"], ["0", "1", "2"], ["0", "1", "2"], ["0", "1", "2"], ["0", "1", "2"], ["0", "1", "2"], ["0", "1", "2"], ["0", "1", "2"], ["0", "1", "2"], ["0", "1", "2"], ["0", "1", "2"], ["0", "1", "2"], ["0", "1", "2"], ["0", "1", "2"], ["0", "1", "2"], ["0", "1", "2"], ["0", "1", "2"], ["0", "1", "2"], ["0", "1", "2"], ["0", "1", "2"], ["0", "1", "2"], ["0", "1", "2"], ["0", "1", "2"], ["0", "1", "2"], ["0", "1", "2"], ["0", "1", "2"], ["0", "1", "2"], ["0", "1", "2"], ["0", "1", "2"], ["0", "1", "2"], ["0", "1", "2"], ["0", "1", "2"], ["0", "1", "2"], ["0", "1", "2"], ["0", "1", "2"], ["0", "1", "2"], ["0", "1", "2"], ["0", "1", "2"], ["0", "1", "2"], ["0", "1", "2"], ["0", "1", "2"], ["0", "1", "2"], ["0", "1", "2"], ["0", "1", "2"], ["0", "1", "2"], ["0", "1", "2"], ["0", "1", "2"], ["0", "1", "2"], ["0", "1", "2"], ["0", "1", "2"], ["0", "1", "2"], ["0", "1", "2"], ["0", "1", "2"], ["0", "1", "2"], ["0", "1", "2"], ["0", "1", "2"], ["0", "1", "2"], ["0", "1", "2"], ["0", "1", "2"], ["0", "1", "2"], ["0", "1", "2"], ["0", "1", "2"], ["0", "1", "2"], ["0", "1", "2"], ["0", "1", "2"], ["0", "1", "2"], ["0", "1", "2"], ["0", "1", "2"], ["0", "1", "2"], ["0", "1", "2"], ["0", "1", "2"], ["0", "1", "2"], ["0", "1", "2"], ["0", "1", "2"], ["0", "1", "2"], ["0", "1", "2"], ["0", "1", "2"], ["0", "1", "2"], ["0", "1", "2"], ["0", "1", "2"], ["0", "1", "2"], ["0", "1", "2"], ["0", "1", "2"], ["0", "1", "2"], ["0", "1", "2"], ["0", "1", "2"], ["0", "1", "2"], ["0", "1", "2"], ["0", "1", "2"], ["0", "1", "2"], ["0", "1", "2"], ["0", "1", "2"], ["0", "1", "2"], ["0", "1", "2"], ["0", "1", "2"], ["0", "1", "2"], ["0", "1", "2"], ["0", "1", "2"], ["0", "1", "2"], ["0", "1", "2"], ["0", "1", "2"], ["0", "1", "2"], ["0", "1", "2"], ["0", "1", "2"], ["0", "1", "2"], ["0", "1", "2"], ["0", "1", "2"], ["0", "1", "2"], ["0", "1", "2"], ["0", "1", "2"], ["0", "1", "2"], ["0", "1", "2"], ["0", "1", "2"], ["0", "1", "2"], ["0", "1", "2"], ["0", "1", "2"], ["0", "1", "2"], ["0", "1", "2"], ["0", "1", "2"], ["0", "1", "2"], ["0", "1", "2"], ["0", "1", "2"], ["0", "1", "2"], ["0", "1", "2"], ["0", "1", "2"], ["0", "1", "2"], ["0", "1", "2"], ["0", "1", "2"], ["0", "1", "2"], ["0", "1", "2"], ["0", "1", "2"], ["0", "1", "2"], ["0", "1", "2"], ["0", "1", "2"], ["0", "1", "2"], ["0", "1", "2"], ["0", "1", "2"], ["0", "1", "2"], ["0", "1", "2"], ["0", "1", "2"], ["0", "1", "2"], ["0", "1", "2"], ["0", "1", "2"], ["0", "1", "2"], ["0", "1", "2"], ["0", "1", "2"], ["0", "1", "2"], ["0", "1", "2"], ["0", "1", "2"], ["0", "1", "2"], ["0", "1", "2"], ["0", "1", "2"], ["0", "1", "2"], ["0", "1", "2"], ["0", "1", "2"], ["0", "1", "2"], ["0", "1", "2"], ["0", "1", "2"], ["0", "1", "2"], ["0", "1", "2"], ["0", "1", "2"], ["0", "1", "2"], ["0", "1", "2"], ["0", "1", "2"], ["0", "1", "2"], ["0", "1", "2"], ["0", "1", "2"], ["0", "1", "2"], ["0", "1", "2"], ["0", "1", "2"], ["0", "1", "2"], ["0", "1", "2"], ["0", "1", "2"], ["0", "1", "2"], ["0", "1", "2"], ["0", "1", "2"], ["0", "1", "2"], ["0", "1", "2"], ["0", "1", "2"], ["0", "1", "2"], ["0", "1", "2"], ["0", "1", "2"], ["0", "1", "2"], ["0", "1", "2"], ["0", "1", "2"], ["0", "1", "2"], ["0", "1", "2"], ["0", "1", "2"], ["0", "1", "2"], ["0", "1", "2"], ["0", "1", "2"]]
158 changes: 158 additions & 0 deletions src/boost_functions.cpp
Original file line number Diff line number Diff line change
@@ -0,0 +1,158 @@
/* boost_functions.cpp
*
* Copyright (C) 2018 Tyler Burdsall
*
* This program is free software: you can redistribute it and/or modify
* it under the terms of the GNU General Public License as published by
* the Free Software Foundation, either version 3 of the License, or
* (at your option) any later version.
*
* This program is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU General Public License for more details.
*
* You should have received a copy of the GNU General Public License
* along with this program. If not, see <http://www.gnu.org/licenses/>.
*/

#ifndef BOOST_FUNCTIONS
#define BOOST_FUNCTIONS

#include "combigen.h"

// Forward declare functions from cli_functions.h
const void output_result(const vector<string> &result, const generation_args &args, const bool &for_optimization);
const void display_help(void);
const void display_csv_keys(const vector<string> &keys, const string &delim);

const void generate_random_samples_performance_mode(const generation_args &args)
{
const vector<vector<string>> results = lazy_cartesian_product::boost_generate_samples(args.pc.combinations, args.sample_size);
if (!args.display_json)
{
if (args.display_keys)
{
display_csv_keys(args.pc.keys, args.delim);
}
}
else
{
cout << "[\n";
}
for( const vector<string> &row: results)
{
output_result(row, args, true);
if (args.display_json && &row != &results.back())
{
cout << ",";
}
}
if (args.display_json)
{
cout << "]\n";
}
}

const void parse_args(const generation_args &args)
{
const uint1024_t max_size = lazy_cartesian_product::boost_compute_max_size(args.pc.combinations);
if (args.generate_all_combinations)
{
generate_all(max_size, args);
exit(0);
}
else
{
const uint1024_t sample_size(args.sample_size);
if (sample_size == 0 && args.entry_at_provided && !args.generate_all_combinations)
{
const uint1024_t entry_at(args.entry_at);
vector<string> result = lazy_cartesian_product::boost_entry_at(args.pc.combinations, args.entry_at);
output_result(result, args, false);
exit(0);
}
else if (sample_size >= 0)
{
const uint1024_t n(args.sample_size);
if (n > max_size)
{
cerr << "ERROR: Sample size cannot be greater than maximum possible combinations\n";
exit(-1);
}
if (args.perf_mode)
{
generate_random_samples_performance_mode(args);
}
else
{
vector<uint1024_t> range = lazy_cartesian_product::boost_generate_random_indices(args.sample_size, max_size);
generate_random_samples(range, args);
}
exit(0);
}
else
{
display_help();
exit(-1);
}
}
}

const void generate_random_samples(const vector<uint1024_t> &range, const generation_args &args)
{
if (!args.display_json)
{
if (args.display_keys)
{
display_csv_keys(args.pc.keys, args.delim);
}
}
else
{
cout << "[\n";
}
for (const uint1024_t &i: range)
{
vector<string> result = lazy_cartesian_product::boost_entry_at(args.pc.combinations, i.convert_to<string>());
output_result(result, args, true);
if (args.display_json && &i != &range.back())
{
cout << ",";
}
}
if (args.display_json)
{
cout << "]\n";
}
}

const void generate_all(const uint1024_t &max_size, const generation_args &args)
{
if (!args.display_json)
{
if (args.display_keys)
{
display_csv_keys(args.pc.keys, args.delim);
}
}
else
{
cout << "[\n";
}
const uint1024_t last = max_size - 1;
for (uint1024_t i = 0; i < max_size; ++i)
{
vector<string> result = lazy_cartesian_product::boost_entry_at(args.pc.combinations, i.convert_to<string>());
output_result(result, args, true);
if (args.display_json && i != last)
{
cout << ",";
}
}
if (args.display_json)
{
cout << "]\n";
}
}
#endif
Loading

0 comments on commit 734fc4a

Please sign in to comment.