-
Notifications
You must be signed in to change notification settings - Fork 0
/
etdc_mag_new.cpp
129 lines (96 loc) · 4.29 KB
/
etdc_mag_new.cpp
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
#ifndef Q_GRAM_UTIL_V
#define Q_GRAM_UTIL_V q_gram_utilities_l_bits
#endif
#ifndef Q_GRAM_UTIL_NEW_L
#define Q_GRAM_UTIL_NEW_L 2
#endif
#include "etdc_mag_new.h"
#include "etdc_tool.h"
#include <set>
#define SEARCH_FUNCTION_NAME etdc_mag
#define BUILD_INDEXED_PATTERNS_FUNCTION etdc_mag_build_indexed_patterns
using namespace Q_GRAM_UTIL_V; // defines which q-gram creation should be used
#include "etdc_wrappers_new.h"
using namespace ETDC;
/* MAIN */
int main(int argc, char *argv[]) {
if ( argc < 4 ) return EXIT_FAILURE;
t_timemes t_pre = {0}, t_run = {0};
t_init(t_pos_find); t_init(t_pre); t_init(t_run);
t_start(t_pre);
l2 = new unsigned long[50000000];
/* Reading data from HD */
const char *patterns_filename = argv[1], *enc_text_filename = argv[3];
const unsigned int U = atoi(argv[4]);
const int k = atoi(argv[5]); /* q */
const int q_size = atoi(argv[6]);
long sigma = atoi(argv[7]);
int denominator = atoi(argv[8]);
char *orig_filename = argv[9];
int ex_offset = argc>10 ? atoi(argv[10]) : 0;
if(denominator!=0 && ex_offset!=0 || denominator==0 && ex_offset==0) { printf("Error: There should be set only one: denominator or offset"); exit(EXIT_FAILURE); }
char processed_filename[255] = {0};
sprintf(processed_filename, "%s.processed", orig_filename);
long processed_file_size = file_size(processed_filename);
text = NULL; FILE * file = NULL;
int mod = 0, div = 0;
t_stop(t_pre);
// check dictionary size
char dict_filename[250] = {0};
char *dict = NULL;
sprintf(dict_filename, "%s.processed.dict", orig_filename);
long dict_file_size = read_file_content<char>(&dict, dict_filename);
if(dict==NULL) {printf("Error: dict==NULL (filename=%s)", dict_filename); return EXIT_FAILURE;}
/* read text from disk */
text_size = read_file_content<byte>(&text, enc_text_filename);
if (text == NULL) { printf("Error: text==NULL\n"); return EXIT_FAILURE;}
if(denominator!=0) offset = (double)text_size / (double)denominator; else offset = ex_offset;
char index_filename[250] = {0};
sprintf(index_filename, "%s.%ld.idx", enc_text_filename, offset);
index_number = ETDC_read_index_file(index_filename, &etdc_idxs);
ETDC_create_map_etdc_to_str(dict, &map_etdc_to_str);
/* read patterns file */
pattern_text_size = read_file_content<byte>(&patterns_text, patterns_filename);
t_start(t_pre);
uint64_t as = get_super_sigma(q_size);
if( get_super_sigma(q_size) > 200000000 || get_super_sigma(q_size) == 0) { printf("Error: Q is too big for choosen sigma size { q=%d, super_sigma=%zu }\n", q_size, get_super_sigma(q_size)); exit(EXIT_FAILURE); }
indexed_patterns = (plist*)calloc(get_super_sigma(q_size), sizeof(plist));
for(int i = 0; i < get_super_sigma(q_size); ++i) {
indexed_patterns[i].list = (pelem*)calloc(1, sizeof(pelem));
}
pattern_size = 999;
build_indexed_patterns_wrapper(q_size);
mod = pattern_size % q_size;
div = pattern_size / q_size;
patt_size = div - ( mod < (q_size - 1) ? 1 : 0 );
int max_m = ( MAX_WORD_SIZE / k + 1 - U ) * k - 2;
if(max_m < (int)patt_size) {
patt_size = max_m;
}
if(patt_size <= k) { printf("Error: (m/2 and max_m)>=k, m=%d, max_m=%d, k=%d\n", patt_size, max_m, k); return EXIT_FAILURE; }
for(unsigned int l = 0; l < log2cache; ++l) log2v[l] = ilog2((ldouble)l);
t_stop(t_pre);
t_start(t_run);
search_wrapper(U, k, q_size);
t_stop(t_run);
long int matches = indexes.size();
#ifdef PRINT_RESULTS
std::set<unsigned long> idx_ord(indexes.begin(), indexes.end());
std::set<unsigned long>::iterator l = idx_ord.begin();
while( l != idx_ord.end()) {
printf("%d\t%d\n", *l, l2[*l]);
fflush(stdout);
++l;
}
print_result(t_get_seconds(t_data_acc), t_get_seconds(t_pre), t_get_seconds(t_run), v_count, verif, matches, sigma, get_super_sigma(q_size), text_size, processed_file_size, dict_file_size, index_number * sizeof(uint32_t), denominator, offset, pattern_size, map_etdc_to_str.size());
#else
print_result(t_get_seconds(t_pos_find), t_get_seconds(t_pre), t_get_seconds(t_run), v_count, verif, matches, sigma, get_super_sigma(q_size), text_size, processed_file_size, dict_file_size, index_number * sizeof(uint32_t), denominator, offset, pattern_size, map_etdc_to_str.size());
#endif
for(int i = 0; i < get_super_sigma(q_size); ++i) {
free(indexed_patterns[i].list);
}
free(indexed_patterns);
free(patterns_text);
free(text);
return EXIT_SUCCESS;
}