forked from sears/bLSM
-
Notifications
You must be signed in to change notification settings - Fork 0
/
dataTuple.h
223 lines (195 loc) · 6.55 KB
/
dataTuple.h
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
/*
* datatuple.h
*
* Copyright 2010-2012 Yahoo! Inc.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*
* Author: sears
*/
#ifndef _DATATUPLE_H_
#define _DATATUPLE_H_
#include <string>
#include <stdlib.h>
#include <stasis/common.h>
#include <cstring>
#include <assert.h>
typedef uint32_t len_t ;
static const len_t DELETE = ((len_t)0) - 1;
typedef struct dataTuple
{
public:
typedef unsigned char* key_t ;
typedef unsigned char* data_t ;
private:
len_t datalen_;
byte* data_; // aliases key(). data_ - 1 should be the \0 terminating key().
dataTuple* sanity_check() {
assert(rawkeylen() < 3000);
return this;
}
public:
inline len_t rawkeylen() const {
return data_ - rawkey();
}
inline len_t strippedkeylen() const {
//const size_t ts_sz = sizeof(uint64_t)+1;
size_t al = rawkeylen();
return al;
#if 0
// hack for gc
if(al <= ts_sz || rawkey()[al-ts_sz]!=0) {
return al;
} else {
return al - ts_sz;
}
#endif
}
inline len_t datalen() const {
return (datalen_ == DELETE) ? 0 : datalen_;
}
//returns the length of the byte array representation
len_t byte_length() const {
return sizeof(len_t) + sizeof(len_t) + rawkeylen() + datalen();
}
static len_t length_from_header(len_t keylen, len_t datalen) {
return keylen + ((datalen == DELETE) ? 0 : datalen);
}
inline key_t rawkey() const {
return (key_t)(this+1);
}
inline data_t data() const {
return data_;
}
inline key_t strippedkey() const {
return (key_t)(this+1);
}
//this is used by the stl set
bool operator() (const dataTuple* lhs, const dataTuple* rhs) const {
return compare(lhs->strippedkey(), lhs->strippedkeylen(), rhs->strippedkey(), rhs->strippedkeylen()) < 0; //strcmp((char*)lhs.key(),(char*)rhs.key()) < 0;
}
/**
* This function handles ASCII and UTF-8 correctly, as well as 64-bit
* and shorter integers encoded with the most significant byte first.
*
* It also handles a special tuple encoding, where multiple utf-8 strings
* are concatenated with \254, and \255 is used as a high key. \254 and
* \255 never occur in valid UTF-8 strings. (However, this encoding
* tie-breaks by placing substrings *later* in the sort order, which
* is non-standard.)
*
* If a key is greater than 64-bits long, then this function checks to see
* if the 9th to last byte is zero (null bytes are disallowed by both ASCII
* and UTF-8. If so, it discards everything after the null. This allows
* us to pack a 64-bit timestamp into the end of the key.
*
*
* return -1 if k1 < k2
* 0 if k1 == k2
* 1 of k1 > k2
*/
static int compare(const byte* k1,size_t k1l, const byte* k2, size_t k2l) {
#if 0
// hack for gc
const size_t ts_sz = sizeof(int64_t)+1;
if(k1l > ts_sz && ! k1[k1l-ts_sz]) {
k1l -= ts_sz;
}
if(k2l > ts_sz && ! k2[k2l-ts_sz]) {
k2l -= ts_sz;
}
#endif
size_t min_l = k1l < k2l ? k1l : k2l;
int ret = memcmp(k1,k2, min_l);
if(ret) return ret;
if(k1l < k2l) return -1;
if(k1l == k2l) return 0;
return 1;
}
int64_t timestamp() {
const size_t ts_sz = sizeof(uint64_t)+1;
size_t al = rawkeylen();
if(al <= ts_sz || rawkey()[al-ts_sz]!=0) { return (uint64_t)-1; }
return (int64_t)*(uint64_t*)(rawkey()+1+al-ts_sz);
}
static int compare_obj(const dataTuple * a, const dataTuple* b) {
return compare(a->strippedkey(), a->strippedkeylen(), b->strippedkey(), b->strippedkeylen());
}
inline void setDelete() {
datalen_ = DELETE;
}
inline bool isDelete() const {
return datalen_ == DELETE;
}
static std::string key_to_str(const byte* k) {
//for strings
return std::string((char*)k);
//for int
/*
std::ostringstream ostr;
ostr << *((int32_t*)k);
return ostr.str();
*/
}
//copy the tuple. does a deep copy of the contents.
dataTuple* create_copy() const {
return create(rawkey(), rawkeylen(), data(), datalen_);
}
static dataTuple* create(const void* key, len_t keylen) {
return create(key, keylen, 0, DELETE);
}
static dataTuple* create(const void* key, len_t keylen, const void* data, len_t datalen) {
dataTuple *ret = (dataTuple*)malloc(sizeof(dataTuple) + length_from_header(keylen,datalen));
memcpy(ret->rawkey(), key, keylen);
ret->data_ = ret->rawkey() + keylen; // need to set this even if delete, since it encodes the key length.
if(datalen != DELETE) {
memcpy(ret->data_, data, datalen);
}
ret->datalen_ = datalen;
return ret->sanity_check();
}
//format: key length _ data length _ key _ data
byte * to_bytes() const {
byte *ret = (byte*)malloc(byte_length());
((len_t*)ret)[0] = rawkeylen();
((len_t*)ret)[1] = datalen_;
memcpy(((len_t*)ret)+2, rawkey(), length_from_header(rawkeylen(), datalen_));
return ret;
}
const byte* get_bytes(len_t *keylen, len_t *datalen) const {
*keylen = this->rawkeylen();
*datalen = datalen_;
return rawkey();
}
//format of buf: key _ data. The caller needs to 'peel' off key length and data length for this call.
static dataTuple* from_bytes(len_t keylen, len_t datalen, byte* buf) {
dataTuple *dt = (dataTuple*) malloc(sizeof(dataTuple) + length_from_header(keylen,datalen));
dt->datalen_ = datalen;
memcpy(dt->rawkey(),buf, length_from_header(keylen,datalen));
dt->data_ = dt->rawkey() + keylen;
return dt->sanity_check();
}
static dataTuple* from_bytes(byte* buf) {
len_t keylen = ((len_t*)buf)[0];
len_t buflen = length_from_header(keylen, ((len_t*)buf)[1]);
dataTuple *dt = (dataTuple*) malloc(sizeof(dataTuple) + buflen);
dt->datalen_ = ((len_t*)buf)[1];
memcpy(dt->rawkey(),((len_t*)buf)+2,buflen);
dt->data_ = dt->rawkey() + keylen;
return dt->sanity_check();
}
static inline void freetuple(dataTuple* dt) {
free(dt);
}
} dataTuple;
#endif