-
Notifications
You must be signed in to change notification settings - Fork 0
/
kb_bitset.hpp
176 lines (158 loc) · 4.24 KB
/
kb_bitset.hpp
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
#ifndef KMER_BOOLEAN_BITSET_H_
#define KMER_BOOLEAN_BITSET_H_
#include <vector>
#include <exception>
#include <stdexcept>
#include <map>
#include <cstring>
#include <climits>
namespace kmer_boolean
{
class KB_Bitset
{
private:
std::vector<unsigned char> _bitset;
int _k;
int _nbytes;
int _max_bits;
inline int
_byte_offset_from_index(const int& idx)
{
int o = 0;
int m = idx;
while (m > 0) {
m -= CHAR_BIT;
if (m >= 0) {
o++;
}
}
return o;
}
public:
typedef enum
{
MerFilterUndefined = 0,
MerFilterPresent,
MerFilterAbsent,
MerFilterAll,
} MerFilterType;
std::vector<unsigned char>& bitset() { return _bitset; }
size_t nbytes() const { return _nbytes; }
std::map<unsigned char, int> create_fmap(void) {
std::map<unsigned char, int> m;
m['A'] = 0;
m['C'] = 1;
m['T'] = 2;
m['G'] = 3;
return m;
}
std::map<unsigned char, int> fmap = create_fmap();
std::map<int, unsigned char> create_rmap(void) {
std::map<int, unsigned char> m;
m[0] = 'A';
m[1] = 'C';
m[2] = 'T';
m[3] = 'G';
return m;
}
std::map<int, unsigned char> rmap = create_rmap();
void
reserve_for_k(const int& k)
{
//
// we need 4^k entries (bits) to store kmer presence/absence
// state, which therefore takes 4^k/8 bytes (unsigned chars)
// or 2^(2k-3) bytes
//
try {
if (k < 1) {
throw std::domain_error("Error: k must be positive, non-zero integer");
}
_k = k;
_nbytes = 1 << (2*_k - 3);
_max_bits = (k > 1) ? CHAR_BIT : fmap.size();
_nbytes = (_nbytes > 1) ? _nbytes : 1;
_bitset.reserve(_nbytes);
} catch (const std::exception& e) {
std::cout << e.what() << std::endl;
std::terminate();
}
}
inline void
set_all(const bool b)
{
unsigned char ucb = (b) ? 0xff : 0x00;
for (int bidx = 0; bidx < _nbytes; ++bidx) {
_bitset.push_back(ucb);
}
}
void
set(const int& idx, const bool& v)
{
int x = v ? 1 : 0;
int r = idx % _max_bits;
int offset = _byte_offset_from_index(idx);
_bitset[offset] ^= (-x ^ _bitset[offset]) & (1UL << r);
}
bool
get(const int& idx)
{
int r = idx % _max_bits;
int offset = _byte_offset_from_index(idx);
return (_bitset[offset] >> r) & 1U;
}
void
get_all(const MerFilterType ft)
{
std::vector<size_t> mer_inc(_k, 0);
for (int byte = 0; byte < _nbytes; ++byte) {
unsigned char bits = _bitset[byte];
#ifdef DEBUG_FLAG
std::cout << "byte [" << byte << "] bytes [" << byte_to_binary(bits) << "]" << std::endl;
#endif
for (int bidx = 0; bidx < _max_bits; ++bidx) {
bool bit = (bits >> bidx) & 1U;
// print mer
if (((ft == MerFilterPresent) && bit) || ((ft == MerFilterAbsent) && !bit) || (ft == MerFilterAll)) {
for (std::vector<size_t>::const_reverse_iterator inc = mer_inc.rbegin(); inc != mer_inc.rend(); ++inc) {
std::cout << rmap[*inc];
}
if (((ft == MerFilterPresent) || (ft == MerFilterAll)) && bit) {
std::cout << " found" << std::endl;
}
else if (((ft == MerFilterAbsent) || (ft == MerFilterAll)) && !bit) {
std::cout << " not found" << std::endl;
}
}
// increment the lowest digit
mer_inc[0]++;
for (int ridx = 0; ridx < _k - 1; ++ridx) {
if (mer_inc[ridx] == rmap.size()) {
// reset digit and increment next digit
mer_inc[ridx] = 0;
mer_inc[ridx + 1]++;
}
}
}
}
}
const char *
byte_to_binary(const int& byte)
{
static char binary[CHAR_BIT + 1];
binary[CHAR_BIT] = '\0';
int z;
for (z = 128; z > 0; z >>= 1) {
std::strcat(binary, ((byte & z) == z) ? "1" : "0");
}
return binary;
}
KB_Bitset();
~KB_Bitset();
};
KB_Bitset::KB_Bitset() {
}
KB_Bitset::~KB_Bitset() {
}
}
#endif // KMER_BOOLEAN_BITSET_H_