do NOT sort vocab lazily, because in a canister query you re-do it (#7)

icppWorld · Sep 3, 2023 · c81c0f0 · c81c0f0
1 parent 5c6e9e6
commit c81c0f0
Show file tree

Hide file tree

Showing 3 changed files with 30 additions and 10 deletions.
diff --git a/icpp_llama2/src/initialize.cpp b/icpp_llama2/src/initialize.cpp
@@ -23,8 +23,10 @@ void build_tokenizer(Tokenizer *t, int vocab_size) {
   t->vocab_size = vocab_size;
   // malloc space to hold the scores and the strings
   t->vocab = (char **)malloc(vocab_size * sizeof(char *));
+  if (!t->vocab) IC_API::trap("Failed to allocate memory for vocab.");
   t->vocab_scores = (float *)malloc(vocab_size * sizeof(float));
-  t->sorted_vocab = NULL; // initialized lazily
+  if (!t->vocab_scores)
+    IC_API::trap("Failed to allocate memory for vocab_scores.");
   for (int i = 0; i < 256; i++) {
     t->byte_pieces[i * 2] = (unsigned char)i;
     t->byte_pieces[i * 2 + 1] = '\0';
@@ -64,6 +66,9 @@ void build_tokenizer(Tokenizer *t, int vocab_size) {
     data_ptr += sizeof(int);
 
     t->vocab[i] = (char *)malloc(len + 1);
+    if (!t->vocab[i])
+      IC_API::trap("Failed to allocate memory for vocab[" + std::to_string(i) +
+                   "].");
     // if (fread(t->vocab[i], len, 1, file) != 1) {
     //   fprintf(stderr, "failed read\n");
     //   exit(EXIT_FAILURE);
@@ -74,6 +79,18 @@ void build_tokenizer(Tokenizer *t, int vocab_size) {
     t->vocab[i][len] = '\0'; // add the string terminating token
   }
   // fclose(file);
+
+  // Do this now, and not lazily
+  // malloc and sort the vocabulary
+  t->sorted_vocab = (TokenIndex *)malloc(t->vocab_size * sizeof(TokenIndex));
+  if (!t->sorted_vocab)
+    IC_API::trap("Failed to allocate memory for sorted_vocab.");
+
+  for (int i = 0; i < t->vocab_size; i++) {
+    t->sorted_vocab[i].str = t->vocab[i];
+    t->sorted_vocab[i].id = i;
+  }
+  qsort(t->sorted_vocab, t->vocab_size, sizeof(TokenIndex), compare_tokens);
 }
 
 // This is an exact copy of code in these methods of run.c

diff --git a/icpp_llama2/src/run.c b/icpp_llama2/src/run.c
@@ -492,15 +492,17 @@ void encode(Tokenizer* t, const char *text, int8_t bos, int8_t eos, int *tokens,
         // fprintf(stderr, "cannot encode NULL text\n"); exit(EXIT_FAILURE); 
         }
 
-    if (t->sorted_vocab == NULL) {
-        // lazily malloc and sort the vocabulary
-        t->sorted_vocab = malloc(t->vocab_size * sizeof(TokenIndex));
-        for (int i = 0; i < t->vocab_size; i++) {
-            t->sorted_vocab[i].str = t->vocab[i];
-            t->sorted_vocab[i].id = i;
-        }
-        qsort(t->sorted_vocab, t->vocab_size, sizeof(TokenIndex), compare_tokens);
-    }
+    // ICPP: we do not want to do this lazily, because during a query, it is not stored
+    //       instead, we do this as part of initialize.cpp
+    // if (t->sorted_vocab == NULL) {
+    //     // lazily malloc and sort the vocabulary
+    //     t->sorted_vocab = malloc(t->vocab_size * sizeof(TokenIndex));
+    //     for (int i = 0; i < t->vocab_size; i++) {
+    //         t->sorted_vocab[i].str = t->vocab[i];
+    //         t->sorted_vocab[i].id = i;
+    //     }
+    //     qsort(t->sorted_vocab, t->vocab_size, sizeof(TokenIndex), compare_tokens);
+    // }
 
     // create a temporary buffer that will store merge candidates of always two consecutive tokens
     // *2 for concat, +1 for null terminator +2 for UTF8 (in case max_token_length is 1)

diff --git a/icpp_llama2/src/run.h b/icpp_llama2/src/run.h
@@ -104,6 +104,7 @@ extern Sampler sampler;
 // At inference
 extern unsigned long long rng_seed;
 
+int compare_tokens(const void *a, const void *b);
 bool malloc_run_state(RunState *s, Config *p);
 void memory_map_weights(TransformerWeights *w, Config *p, float *ptr,
                         int shared_weights);