Skip to content

Commit

Permalink
do NOT sort vocab lazily, because in a canister query you re-do it (#7)
Browse files Browse the repository at this point in the history
  • Loading branch information
icppWorld authored Sep 3, 2023
1 parent 5c6e9e6 commit c81c0f0
Show file tree
Hide file tree
Showing 3 changed files with 30 additions and 10 deletions.
19 changes: 18 additions & 1 deletion icpp_llama2/src/initialize.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -23,8 +23,10 @@ void build_tokenizer(Tokenizer *t, int vocab_size) {
t->vocab_size = vocab_size;
// malloc space to hold the scores and the strings
t->vocab = (char **)malloc(vocab_size * sizeof(char *));
if (!t->vocab) IC_API::trap("Failed to allocate memory for vocab.");
t->vocab_scores = (float *)malloc(vocab_size * sizeof(float));
t->sorted_vocab = NULL; // initialized lazily
if (!t->vocab_scores)
IC_API::trap("Failed to allocate memory for vocab_scores.");
for (int i = 0; i < 256; i++) {
t->byte_pieces[i * 2] = (unsigned char)i;
t->byte_pieces[i * 2 + 1] = '\0';
Expand Down Expand Up @@ -64,6 +66,9 @@ void build_tokenizer(Tokenizer *t, int vocab_size) {
data_ptr += sizeof(int);

t->vocab[i] = (char *)malloc(len + 1);
if (!t->vocab[i])
IC_API::trap("Failed to allocate memory for vocab[" + std::to_string(i) +
"].");
// if (fread(t->vocab[i], len, 1, file) != 1) {
// fprintf(stderr, "failed read\n");
// exit(EXIT_FAILURE);
Expand All @@ -74,6 +79,18 @@ void build_tokenizer(Tokenizer *t, int vocab_size) {
t->vocab[i][len] = '\0'; // add the string terminating token
}
// fclose(file);

// Do this now, and not lazily
// malloc and sort the vocabulary
t->sorted_vocab = (TokenIndex *)malloc(t->vocab_size * sizeof(TokenIndex));
if (!t->sorted_vocab)
IC_API::trap("Failed to allocate memory for sorted_vocab.");

for (int i = 0; i < t->vocab_size; i++) {
t->sorted_vocab[i].str = t->vocab[i];
t->sorted_vocab[i].id = i;
}
qsort(t->sorted_vocab, t->vocab_size, sizeof(TokenIndex), compare_tokens);
}

// This is an exact copy of code in these methods of run.c
Expand Down
20 changes: 11 additions & 9 deletions icpp_llama2/src/run.c
Original file line number Diff line number Diff line change
Expand Up @@ -492,15 +492,17 @@ void encode(Tokenizer* t, const char *text, int8_t bos, int8_t eos, int *tokens,
// fprintf(stderr, "cannot encode NULL text\n"); exit(EXIT_FAILURE);
}

if (t->sorted_vocab == NULL) {
// lazily malloc and sort the vocabulary
t->sorted_vocab = malloc(t->vocab_size * sizeof(TokenIndex));
for (int i = 0; i < t->vocab_size; i++) {
t->sorted_vocab[i].str = t->vocab[i];
t->sorted_vocab[i].id = i;
}
qsort(t->sorted_vocab, t->vocab_size, sizeof(TokenIndex), compare_tokens);
}
// ICPP: we do not want to do this lazily, because during a query, it is not stored
// instead, we do this as part of initialize.cpp
// if (t->sorted_vocab == NULL) {
// // lazily malloc and sort the vocabulary
// t->sorted_vocab = malloc(t->vocab_size * sizeof(TokenIndex));
// for (int i = 0; i < t->vocab_size; i++) {
// t->sorted_vocab[i].str = t->vocab[i];
// t->sorted_vocab[i].id = i;
// }
// qsort(t->sorted_vocab, t->vocab_size, sizeof(TokenIndex), compare_tokens);
// }

// create a temporary buffer that will store merge candidates of always two consecutive tokens
// *2 for concat, +1 for null terminator +2 for UTF8 (in case max_token_length is 1)
Expand Down
1 change: 1 addition & 0 deletions icpp_llama2/src/run.h
Original file line number Diff line number Diff line change
Expand Up @@ -104,6 +104,7 @@ extern Sampler sampler;
// At inference
extern unsigned long long rng_seed;

int compare_tokens(const void *a, const void *b);
bool malloc_run_state(RunState *s, Config *p);
void memory_map_weights(TransformerWeights *w, Config *p, float *ptr,
int shared_weights);
Expand Down

0 comments on commit c81c0f0

Please sign in to comment.