From 5c6e9e66d47285ed005e0df7a3f03f22f837df5c Mon Sep 17 00:00:00 2001 From: icppWorld <124377669+icppWorld@users.noreply.github.com> Date: Sun, 3 Sep 2023 15:32:33 +0200 Subject: [PATCH] Run stories260K as the native test in cicd (#6) --- .github/workflows/cicd.yml | 1 + Makefile | 7 +++++ icpp_llama2/native/main.cpp | 56 +++++++++++++++++++++++++++++-------- 3 files changed, 53 insertions(+), 11 deletions(-) diff --git a/.github/workflows/cicd.yml b/.github/workflows/cicd.yml index 3223076..16d4409 100644 --- a/.github/workflows/cicd.yml +++ b/.github/workflows/cicd.yml @@ -69,6 +69,7 @@ jobs: shell: bash -l {0} run: | make icpp_llama2_get_stories15M + make icpp_llama2_get_stories260K - name: all-tests shell: bash -l {0} diff --git a/Makefile b/Makefile index 09dd4f8..cb6a926 100644 --- a/Makefile +++ b/Makefile @@ -49,6 +49,13 @@ CLANG_TIDY = $(ICPP_COMPILER_ROOT)/bin/clang-tidy .PHONY: all-tests all-tests: all-static test-all-llms +.PHONY: icpp_llama2_get_stories260K +icpp_llama2_get_stories260K: + cd icpp_llama2 && \ + mkdir -p stories260K && \ + wget -P stories260K https://huggingface.co/karpathy/tinyllamas/resolve/main/stories260K/stories260K.bin && \ + wget -P stories260K https://huggingface.co/karpathy/tinyllamas/resolve/main/stories260K/tok512.bin + .PHONY: icpp_llama2_get_stories15M icpp_llama2_get_stories15M: cd icpp_llama2 && \ diff --git a/icpp_llama2/native/main.cpp b/icpp_llama2/native/main.cpp index 4f9bdd2..e31426f 100644 --- a/icpp_llama2/native/main.cpp +++ b/icpp_llama2/native/main.cpp @@ -62,10 +62,23 @@ int main() { bool silent_on_trap = true; + // The model & tokenizer to use + int model_to_use = 2; + + // Use this during final QA + std::string model_path = "models/stories15M.bin"; + std::string tokenizer_path = "tokenizers/tokenizer.bin"; + if (model_to_use == 2) { + // Use this really small model during development + model_path = "stories260K/stories260K.bin"; + tokenizer_path = "stories260K/tok512.bin"; + } + std::cout << "model_path = " << model_path << "\n"; + std::cout << "tokenizer_path = " << tokenizer_path << "\n"; + // ----------------------------------------------------------------------------- // Read the models/stories15M.bin file into a bytes vector - std::string model_path = "models/stories15M.bin"; std::streamsize file_size; // file size bytes std::vector model_bytes; // bytes to upload { @@ -102,7 +115,6 @@ int main() { // ----------------------------------------------------------------------------- // Read the tokenizers/tokenizer.bin file into a bytes vector - std::string tokenizer_path = "tokenizers/tokenizer.bin"; // std::streamsize file_size; // file size bytes std::vector tokenizer_bytes; // bytes to upload { @@ -276,28 +288,50 @@ int main() { }, )' */ - mockIC.run_test( - "get_model_config", get_model_config, "4449444c0000", - "4449444c016c07c8fab0027cb087c0d9017cd58488bc027cb3fdc984037cf3e0d4d6057cf5cfd3fc057c82c3e4f60f7c0100a0020680fa01800606800206", - silent_on_trap, my_principal); + std::string expected_response = + "4449444c016c07c8fab0027cb087c0d9017cd58488bc027cb3fdc984037cf3e0d4d6057cf5cfd3fc057c82c3e4f60f7c0100a0020680fa01800606800206"; + if (model_to_use == 2) { + /* + '()' -> + '( + record { + dim = 64 : int; + hidden_dim = 4 : int; + n_layers = 512 : int; + n_heads = 172 : int; + n_kv_heads = 5 : int; + vocab_size = 512 : int; + seq_len = 8 : int; + }, + )' + */ + expected_response = + "4449444c016c07c8fab0027cb087c0d9017cd58488bc027cb3fdc984037cf3e0d4d6057cf5cfd3fc057c82c3e4f60f7c0100c000048004ac0105800408"; + } + mockIC.run_test("get_model_config", get_model_config, "4449444c0000", + expected_response, silent_on_trap, my_principal); // With temperature=0.0: greedy argmax sampling -> the story will be the same every time // '(record {prompt = "" : text; steps = 100 : nat64; temperature = 0.0 : float32; topp = 1.0 : float32; rng_seed = 0 : nat64;})' // -> '(variant { ok = "Once upon a time, there was a little girl named Lily. She loved to play outside in the sunshine. One day, she saw a big, red ball in the sky. It was the sun! She thought it was so pretty.\nLily wanted to play with the ball, but it was too high up in the sky. She tried to jump and reach it, but she couldn\'t. Then, she had an idea. She would use a stick to knock the\n" : text })' + expected_response = + "4449444c016b019cc20171010000ee024f6e63652075706f6e20612074696d652c207468657265207761732061206c6974746c65206769726c206e616d6564204c696c792e20536865206c6f76656420746f20706c6179206f75747369646520696e207468652073756e7368696e652e204f6e65206461792c20736865207361772061206269672c207265642062616c6c20696e2074686520736b792e20497420776173207468652073756e21205368652074686f756768742069742077617320736f207072657474792e0a4c696c792077616e74656420746f20706c61792077697468207468652062616c6c2c206275742069742077617320746f6f206869676820757020696e2074686520736b792e2053686520747269656420746f206a756d7020616e642072656163682069742c206275742073686520636f756c646e27742e205468656e2c207368652068616420616e20696465612e2053686520776f756c6420757365206120737469636b20746f206b6e6f636b207468650a"; + if (model_to_use == 2) { + // -> '(variant { ok = ""Once upon a time, there was a little girl named Lily. She loved to play outside in the park. One day, she saw a big, red ball. She wanted to play with it, but it was too high.\nLily\'s mom said, \"Lily, let\'s go to the park.\" Lily was sad and didn\'t know w\n"" : text })' + expected_response = + "4449444c016b019cc20171010000fe014f6e63652075706f6e20612074696d652c207468657265207761732061206c6974746c65206769726c206e616d6564204c696c792e20536865206c6f76656420746f20706c6179206f75747369646520696e20746865207061726b2e204f6e65206461792c20736865207361772061206269672c207265642062616c6c2e205368652077616e74656420746f20706c617920776974682069742c206275742069742077617320746f6f20686967682e0a4c696c792773206d6f6d20736169642c20224c696c792c206c6574277320676f20746f20746865207061726b2e22204c696c79207761732073616420616e64206469646e2774206b6e6f7720770a"; + } mockIC.run_test( "inference 1", inference, "4449444c016c05b4e8c2e40373bbb885e80473a7f7b9a00878c5c8cea60878a4a3e1aa0b710100000000000000803f6400000000000000000000000000000000", - "4449444c016b019cc20171010000ee024f6e63652075706f6e20612074696d652c207468657265207761732061206c6974746c65206769726c206e616d6564204c696c792e20536865206c6f76656420746f20706c6179206f75747369646520696e207468652073756e7368696e652e204f6e65206461792c20736865207361772061206269672c207265642062616c6c20696e2074686520736b792e20497420776173207468652073756e21205368652074686f756768742069742077617320736f207072657474792e0a4c696c792077616e74656420746f20706c61792077697468207468652062616c6c2c206275742069742077617320746f6f206869676820757020696e2074686520736b792e2053686520747269656420746f206a756d7020616e642072656163682069742c206275742073686520636f756c646e27742e205468656e2c207368652068616420616e20696465612e2053686520776f756c6420757365206120737469636b20746f206b6e6f636b207468650a", - silent_on_trap, my_principal); + expected_response, silent_on_trap, my_principal); // With temperature=0.0 & topp=0.9, still greedy argmax sampling -> the story will be the same every time // '(record {prompt = "" : text; steps = 100 : nat64; temperature = 0.0 : float32; topp = 0.9 : float32; rng_seed = 0 : nat64;})' - // -> '(variant { ok = "Once upon a time, there was a little girl named Lily. She loved to play outside in the sunshine. One day, she saw a big, red ball in the sky. It was the sun! She thought it was so pretty.\nLily wanted to play with the ball, but it was too high up in the sky. She tried to jump and reach it, but she couldn\'t. Then, she had an idea. She would use a stick to knock the\n" : text })' mockIC.run_test( "inference 2", inference, "4449444c016c05b4e8c2e40373bbb885e80473a7f7b9a00878c5c8cea60878a4a3e1aa0b710100000000006666663f6400000000000000000000000000000000", - "4449444c016b019cc20171010000ee024f6e63652075706f6e20612074696d652c207468657265207761732061206c6974746c65206769726c206e616d6564204c696c792e20536865206c6f76656420746f20706c6179206f75747369646520696e207468652073756e7368696e652e204f6e65206461792c20736865207361772061206269672c207265642062616c6c20696e2074686520736b792e20497420776173207468652073756e21205368652074686f756768742069742077617320736f207072657474792e0a4c696c792077616e74656420746f20706c61792077697468207468652062616c6c2c206275742069742077617320746f6f206869676820757020696e2074686520736b792e2053686520747269656420746f206a756d7020616e642072656163682069742c206275742073686520636f756c646e27742e205468656e2c207368652068616420616e20696465612e2053686520776f756c6420757365206120737469636b20746f206b6e6f636b207468650a", - silent_on_trap, my_principal); + expected_response, silent_on_trap, my_principal); // With temperature>0.0 & topp=1.0: regular sampling // '(record {prompt = "" : text; steps = 100 : nat64; temperature = 0.9 : float32; topp = 1.0 : float32; rng_seed = 0 : nat64;})'