diff --git a/.gitignore b/.gitignore
index 42879dd..dfd6b42 100644
--- a/.gitignore
+++ b/.gitignore
@@ -8,7 +8,8 @@ paths.txt
 secret
 
 # llama2 models
-models
+stories15M.bin
+stories42Mtok4096.bin
 #stories260K
 
 # llama2 metrics
diff --git a/icpp_llama2/README.md b/icpp_llama2/README.md
index 368ad58..b30289e 100644
--- a/icpp_llama2/README.md
+++ b/icpp_llama2/README.md
@@ -1,9 +1,24 @@
 # [karpathy/llama2.c](https://github.com/karpathy/llama2.c) for the Internet Computer
 
-# Instructions
+# Try it out
+
+The 15M parameter model is the backend of [ICGPT](https://icgpt.icpp.world/).
+
+# Getting Started
 
 - Install the C++ development environment for the Internet Computer ([docs](https://docs.icpp.world/installation.html)):
-  - Install the required python packages *(icpp-pro & ic-py)*:
+
+  - Create a python environment. (We like MiniConda, but use whatever you like!)
+    ```bash
+    conda create --name myllama2 python=3.11
+    conda activate myllama2
+    ```
+  - Clone this repo and enter the icpp_llama2 folder
+    ```bash
+    git clone https://github.com/icppWorld/icpp_llm.git
+    cd icpp_llm/icpp_llama2
+    ```
+  - Install the required python packages _(icpp-pro & ic-py)_:
     ```bash
     pip install -r requirements.txt
     ```
@@ -12,78 +27,165 @@
     icpp install-wasi-sdk
     ```
   - Install dfx:
+
     ```bash
     sh -ci "$(curl -fsSL https://internetcomputer.org/install.sh)"
+
+    # Configure your shell
+    source "$HOME/.local/share/dfx/env"
     ```
-    *(Note: On Windows, just install dfx in wsl, and icpp-pro in PowerShell will know where to find it. )*
-    
 
-- Get a model checkpoint, as explained in [karpathy/llama2.c](https://github.com/karpathy/llama2.c):
+    _(Note: On Windows, just install dfx in wsl, and icpp-pro in PowerShell will know where to find it. )_
 
-   This command downloads the 15M parameter model that was trained on the TinyStories dataset (~60MB download) and stores it in a `models` folder:
+- Deploy the 15M parameter pre-trained model to canister `llama2_15M`:
 
-   ```bash
-   # on Linux/Mac
-   mkdir -p models
-   wget -P models https://huggingface.co/karpathy/tinyllamas/resolve/main/stories15M.bin
-   ```
+  - Start the local network:
+    ```bash
+    dfx start --clean
+    ```
+  - Compile & link to WebAssembly (wasm), as defined in `icpp.toml`:
+    ```bash
+    icpp build-wasm
+    ```
+  - Deploy the wasm to a canister on the local network:
+    ```bash
+    dfx deploy llama2_15M
+    ```
+  - Check the health endpoint of the `llama2_15M` canister:
+    ```bash
+    $ dfx canister call llama2_15M health
+    (variant { Ok = record { status_code = 200 : nat16 } })
+    ```
+  - Set the canister mode to 'chat-principal'
+    ```
+    $ dfx canister call llama2_15M set_canister_mode chat-principal
+    (variant { Ok = record { status_code = 200 : nat16 } })
+    ```
+  - Upload the 15M parameter model & tokenizer:
+    _(We have included a fine-tuned model based on a 4096 tokens tokenizer)_
+    ```bash
+    python -m scripts.upload --network local --canister llama2_15M --model models/stories15Mtok4096.bin --tokenizer tokenizers/tok4096.bin
+    ```
+  - Check the readiness endpoint, indicating it can be used for inference:
+    ```bash
+    $ dfx canister call llama2_15M ready
+    (variant { Ok = record { status_code = 200 : nat16 } })
+    ```
+
+- Test it with dfx.
 
-   ```powershell
-   # in Windows PowerShell (Miniconda recommended)
-   if (-not (Test-Path -Path .\models)) {
-    New-Item -Path .\models -ItemType Directory
-   }
-   Invoke-WebRequest -Uri https://huggingface.co/karpathy/tinyllamas/resolve/main/stories15M.bin -OutFile .\models\stories15M.bin
-   ```
+  - Generate a new story, 60 tokens at a time, starting with an empty prompt:
+
+    _(Your story will be slightly different, because the temperature > 0.0)_
+
+    ```bash
+    $ dfx canister call llama2_15M new_chat '()'
+    (variant { Ok = record { status_code = 200 : nat16 } })
+
+    $ dfx canister call llama2_15M inference '(record {prompt = "" : text; steps = 60 : nat64; temperature = 0.1 : float32; topp = 0.9 : float32; rng_seed = 0 : nat64;})'
+    (
+      variant {
+        Ok = record {
+          num_tokens = 60 : nat64;
+          inference = "Once upon a time, there was a little girl named Lily. She loved to play outside in the park. One day, she saw a big tree with a swing hanging from it. She ran to the swing and started to swing back and forth. It was so much fun!\nSuddenly,";
+        }
+      },
+    )
+
+    $ dfx canister call llama2_15M inference '(record {prompt = "" : text; steps = 60 : nat64; temperature = 0.1 : float32; topp = 0.9 : float32; rng_seed = 0 : nat64;})'
+    (
+      variant {
+        Ok = record {
+          num_tokens = 60 : nat64;
+          inference = " Lily saw a boy who was crying. She asked him what was wrong. The boy said he lost his toy car. Lily felt sad for him and wanted to help. She asked the boy if he wanted to play with her. The boy smiled and said yes.\nLily and the boy played together";
+        }
+      },
+    )
+
+    # etc.
+    # If you keep going, at some point the LLM will end the story
+    ```
+
+  - Now generate a new story, starting with your own, non-empty prompt:
+
+    ```bash
+    $ dfx canister call llama2_15M new_chat '()'
+    (variant { Ok = record { status_code = 200 : nat16 } })
+
+    $ dfx canister call llama2_15M inference '(record {prompt = "Timmy climbed in a tree" : text; steps = 60 : nat64; temperature = 0.1 : float32; topp = 0.9 : float32; rng_seed = 0 : nat64;})'
+    (
+      variant {
+        Ok = record {
+          num_tokens = 5 : nat64;
+          inference = "Timmy climbed in a tree";
+        }
+      },
+    )
+
+    $ dfx canister call llama2_15M inference '(record {prompt = "" : text; steps = 60 : nat64; temperature = 0.1 : float32; topp = 0.9 : float32; rng_seed = 0 : nat64;})'
+    (
+      variant {
+        Ok = record {
+          num_tokens = 60 : nat64;
+          inference = ". He was so excited to see what was on the roof. He looked up and saw a big bird. It was so big and it was so high up. Timmy wanted to get closer to the bird, so he started to climb.\nHe climbed and climbed until he reached the roof.";
+        }
+      },
+    )
+
+    # etc.
+    # If you keep going, at some point the LLM will end the story
+    ```
+
+# Next steps
+
+You also will notice that using dfx to generate stories is not very user friendly. We created a little react frontend, available as an open source project: https://github.com/icppWorld/icgpt, and deployed to the IC as deployed as [ICGPT](https://icgpt.icpp.world/).
+
+# llama2_260K
+
+For quick tests, we have included a really small model, with only 260K parameters and fine-tuned with a tokenizer of 512 tokens.
+
+- model: stories260K/stories260k.bin
+- tokenizer: stories260K/tok512.bin
+
+The CI/CD using a GitHub actions workflow, and the demo_pytest.sh script are based on this model.
+
+# demo_pytest.sh
 
 - The demo_pytest.sh script starts the local network, deploys llama2_260K, uploads the model & tokenizer, and runs the QA with pytest:
-  - `./demo_pytest.sh`  , on Linux / Mac
 
-- The *demo* script starts the local network, deploys llama2, uploads the model & tokenizer, and generates two stories:
-  - `./demo.sh`  , on Linux / Mac
+  - `./demo_pytest.sh` , on Linux / Mac
+
+# demo shell scripts
+
+- The _demo_ script starts the local network, deploys llama2, uploads the model & tokenizer, and generates two stories:
+  - `./demo.sh` , on Linux / Mac
   - `.\demo.ps1` , in Windows PowerShell (Miniconda recommended)
 
-​        This screenshot shows the generation of the second story:
+# More models
+
+- You can get other model checkpoints, as explained in [karpathy/llama2.c](https://github.com/karpathy/llama2.c):
+
+  This command downloads the 15M parameter model that was trained on the TinyStories dataset (~60MB download) and stores it in a `models` folder:
+
+  ```bash
+  # on Linux/Mac
+  mkdir -p models
+  wget -P models https://huggingface.co/karpathy/tinyllamas/resolve/main/stories15M.bin
+  ```
+
+  ```powershell
+  # in Windows PowerShell (Miniconda recommended)
+  if (-not (Test-Path -Path .\models)) {
+   New-Item -Path .\models -ItemType Directory
+  }
+  Invoke-WebRequest -Uri https://huggingface.co/karpathy/tinyllamas/resolve/main/stories15M.bin -OutFile .\models\stories15M.bin
+  ```
+
+# Deploying to the IC main net
 
-![icpp_llama2_without_limits](../assets/icpp_llama2_without_limits.png)
+- Deploying IC main network is as usual, but you will likely run into a time-out error during upload of the model. You have to patch ic-py as described here:
 
-- To deploy all the LLM canisters and upload their models + tokenizers:
   ```bash
-  # ---------------------------------------------------------------
-  # Build WASM & deploy 
-  icpp build-wasm
-  dfx deploy
-
-  # Set the canister mode to either 'chat-principal' or 'nft-ordinal'
-  # for normal LLMs
-  dfx canister call llama2_260K set_canister_mode chat-principal
-  dfx canister call llama2      set_canister_mode chat-principal
-  dfx canister call llama2_42M  set_canister_mode chat-principal
-  dfx canister call llama2_110M set_canister_mode chat-principal
-
-  # for NFT LLMs controlled by bitcoin ordinals
-  dfx canister call <...>     set_canister_mode nft-ordinal
-
-  # ---------------------------------------------------------------
-  # Call `nft_init`
-  
-  # For the ICGPT backend canisters, minting is not supported:
-  python -m scripts.nft_init --network local --canister llama2_260K --nft-supply-cap 0 --nft-symbol "" --nft-name "" --nft-description ""
-  python -m scripts.nft_init --network local --canister llama2      --nft-supply-cap 0 --nft-symbol "" --nft-name "" --nft-description ""
-  python -m scripts.nft_init --network local --canister llama2_42M  --nft-supply-cap 0 --nft-symbol "" --nft-name "" --nft-description ""
-  python -m scripts.nft_init --network local --canister llama2_110M --nft-supply-cap 0 --nft-symbol "" --nft-name "" --nft-description ""
-
-  # For an NFT canister: Initialize the NFT Collection
-  python -m scripts.nft_init --network local --canister <...> --nft-supply-cap <...> --nft-symbol "..." --nft-name "..." --nft-description "..." 
-
-  # ---------------------------------------------------------------
-  # Upload the models & tokenizers to the canisters
-  # Notes:
-  # (-) The storiesXX.bin files are not included in the github repo
-  # (-) See `Get a model checkpoint` above
-  # (-) The default tokenizer.bin is included in the github repo
-  # (-) See `stories260k` below how to build the tok512.bin for the stories260K model
-  # (-) Use `--network ic` when deploying to mainnet
   #
   # IMPORTANT: ic-py will through a timeout => patch it here:
   # /home/arjaan/miniconda3/envs/icpp-pro-w-llama2/lib/python3.11/site-packages/httpx/_config.py
@@ -97,74 +199,10 @@
             # ENDPATCH
             self._sock.settimeout(timeout)
             return self._sock.recv(max_bytes)
-  
-  # The ICGPT backend canisters
-  python -m scripts.upload --network local --canister llama2_260K --model stories260K/stories260K.bin --tokenizer stories260K/tok512.bin
-
-  python -m scripts.upload --network local --canister llama2 --model models/stories15Mtok4096.bin --tokenizer tokenizers/tok4096.bin
 
-  python -m scripts.upload --network local --canister llama2_42M --model models/stories42Mtok4096.bin --tokenizer tokenizers/tok4096.bin
-
-  python -m scripts.upload --network local --canister llama2_110M --model models/stories110M.bin --tokenizer tokenizers/tokenizer.bin
-
-  #
-  # The NFT canister
-  python -m scripts.upload --network local --canister <...> --model <...> --tokenizer <...>
-
-  # ---------------------------------------------------------------
-  # For an NFT canister: mint the NFTs
-  => TODO
-  
-  # ---------------------------------------------------------------
-  # Run tests
-  pytest
-  
-  ```
 
-# stories260K
-
-The default model is `stories15M.bin`, with `tokenizer.bin`, which contains the default llama2 tokenizer using 32000 tokens. 
-
-For testing, it is nice to be able to work with a smaller model & tokenizer:
-- Download the model & tokenizer from [huggingface stories260K](https://huggingface.co/karpathy/tinyllamas/tree/main/stories260K) and store them in:
-  - stories260K/stories260K.bin
-  - stories260K/tok512.bin
-  - stories260K/tok512.model
-- Deploy the canister:
-  ```bash
-  icpp build-wasm
-  dfx deploy
-  ```
-- Upload the model & tokenizer:
-  ```bash
-  python -m scripts.upload --canister llama2_260K --model stories260K/stories260K.bin --tokenizer stories260K/tok512.bin
-  ```
-- Inference is now possible with many more tokens before hitting the instruction limit, but off course, the stories are not as good:
-  ```bash
-  # Create a new chat
-  $ dfx canister call llama2_260K new_chat '()'
-
-  # Start the chat by providing the starting prompt
-  $ dfx canister call llama2_260K inference '(record {prompt = "Lilly went swimming yesterday  " : text; steps = 100 : nat64; temperature = 0.9 : float32; topp = 0.9 : float32; rng_seed = 0 : nat64;})'
-  (
-    variant {
-      Ok = "Lilly went swimming yesterday  order. She had a great eyes that was closed. One day, she asked her mom why the cloud was close to the pond. \n\"Mommy, I will take clothes away,\" Lila said. \"Th\n"
-    },
-  )
-
-  # Continue the current chat by calling again, with an empty prompt
-  $ dfx canister call llama2_260K inference '(record {prompt = "" : text; steps = 100 : nat64; temperature = 0.9 : float32; topp = 0.9 : float32; rng_seed = 0 : nat64;})'
-  (
-    variant {
-      Ok = "eone replace it.\nThe fox agreed to go as fast as they set on the other birds. They searched, and it didn\'t give up. They started to scared the bird. The forest was so careful and jumped up."
-    },
-  )
-
-  # Retrieve your full story, by calling with curl, passing the principal by which the LLM knows you in the body
-  $ dfx canister call llama2_260K whoami
-  '("<your-principal>")'
-
-  $ curl -X GET -d '{"principal":"<your-principal>"}' http://localhost:$(dfx info webserver-port)?canisterId=$(dfx canister id llama2_260K)
+  # Now, this command should work
+  python -m scripts.upload --network local --canister llama2_15M --model models/stories15Mtok4096.bin --tokenizer tokenizers/tok4096.bin
 
   ```
 
@@ -188,10 +226,10 @@ make run
 
 # Fine tuning
 
-  When making your own checkpoint via fine-tuning, make sure to train with the correct version of [karpathy/llama2.c](https://github.com/karpathy/llama2.c):
+When making your own checkpoint via fine-tuning, make sure to train with the correct version of [karpathy/llama2.c](https://github.com/karpathy/llama2.c):
 
-  | release | commit sha                                |
-  | --------| ----------------------------------------- |
-  | 0.1.0   |  b28c1e26c5ab5660267633e1bdc910a43b7255bf |
-  | 0.2.0   |  57bf0e9ee4bbd61c98c4ad204b72f2b8881ac8cd |
-  | 0.3.0   |  b9fb86169f56bd787bb644c62a80bbab56f8dccc |
\ No newline at end of file
+| release | commit sha                               |
+| ------- | ---------------------------------------- |
+| 0.1.0   | b28c1e26c5ab5660267633e1bdc910a43b7255bf |
+| 0.2.0   | 57bf0e9ee4bbd61c98c4ad204b72f2b8881ac8cd |
+| 0.3.0   | b9fb86169f56bd787bb644c62a80bbab56f8dccc |
diff --git a/icpp_llama2/canister_ids.json b/icpp_llama2/canister_ids.json
index 1d85e23..4696707 100644
--- a/icpp_llama2/canister_ids.json
+++ b/icpp_llama2/canister_ids.json
@@ -1,5 +1,5 @@
 {
-  "llama2": {
+  "llama2_15M": {
     "ic": "4c4bn-daaaa-aaaag-abvcq-cai"
   },
   "llama2_110M": {
diff --git a/icpp_llama2/demo.ps1 b/icpp_llama2/demo.ps1
index 795c645..9419173 100644
--- a/icpp_llama2/demo.ps1
+++ b/icpp_llama2/demo.ps1
@@ -51,55 +51,55 @@ wsl --% dfx deploy
 Write-Host " "
 Write-Host "--------------------------------------------------"
 Write-Host "Checking health endpoint"
-wsl --% dfx canister call llama2 health
+wsl --% dfx canister call llama2_15M health
 
 #######################################################################
 Write-Host " "
 Write-Host "--------------------------------------------------"
 Write-Host "Upload the model & tokenizer for 260K and 15M"
 python -m scripts.upload --canister llama2_260K --model stories260K/stories260K.bin --tokenizer stories260K/tok512.bin
-python -m scripts.upload --canister llama2 --model models/stories15M.bin --tokenizer tokenizers/tokenizer.bin
+python -m scripts.upload --canister llama2_15M --model models/stories15Mtok4096.bin --tokenizer tokenizers/tok4096.bin
 
 #######################################################################
 Write-Host " "
 Write-Host "--------------------------------------------------"
 Write-Host "Checking readiness endpoint"
-wsl --% dfx canister call llama2 ready
+wsl --% dfx canister call llama2_15M ready
 
 #######################################################################
 Write-Host " "
 Write-Host "--------------------------------------------------"
 Write-Host "Generate a new story, 10 tokens at a time, starting with an empty prompt."
-wsl --% dfx canister call llama2 new_chat '()'
-wsl --% dfx canister call llama2 inference '(record {prompt = "" : text; steps = 10 : nat64; temperature = 0.0 : float32; topp = 0.9 : float32; rng_seed = 0 : nat64;})'
-wsl --% dfx canister call llama2 inference '(record {prompt = "" : text; steps = 10 : nat64; temperature = 0.0 : float32; topp = 0.9 : float32; rng_seed = 0 : nat64;})'
-wsl --% dfx canister call llama2 inference '(record {prompt = "" : text; steps = 10 : nat64; temperature = 0.0 : float32; topp = 0.9 : float32; rng_seed = 0 : nat64;})'
-wsl --% dfx canister call llama2 inference '(record {prompt = "" : text; steps = 10 : nat64; temperature = 0.0 : float32; topp = 0.9 : float32; rng_seed = 0 : nat64;})'
-wsl --% dfx canister call llama2 inference '(record {prompt = "" : text; steps = 10 : nat64; temperature = 0.0 : float32; topp = 0.9 : float32; rng_seed = 0 : nat64;})'
-wsl --% dfx canister call llama2 inference '(record {prompt = "" : text; steps = 10 : nat64; temperature = 0.0 : float32; topp = 0.9 : float32; rng_seed = 0 : nat64;})'
-wsl --% dfx canister call llama2 inference '(record {prompt = "" : text; steps = 10 : nat64; temperature = 0.0 : float32; topp = 0.9 : float32; rng_seed = 0 : nat64;})'
-wsl --% dfx canister call llama2 inference '(record {prompt = "" : text; steps = 10 : nat64; temperature = 0.0 : float32; topp = 0.9 : float32; rng_seed = 0 : nat64;})'
+wsl --% dfx canister call llama2_15M new_chat '()'
+wsl --% dfx canister call llama2_15M inference '(record {prompt = "" : text; steps = 10 : nat64; temperature = 0.0 : float32; topp = 0.9 : float32; rng_seed = 0 : nat64;})'
+wsl --% dfx canister call llama2_15M inference '(record {prompt = "" : text; steps = 10 : nat64; temperature = 0.0 : float32; topp = 0.9 : float32; rng_seed = 0 : nat64;})'
+wsl --% dfx canister call llama2_15M inference '(record {prompt = "" : text; steps = 10 : nat64; temperature = 0.0 : float32; topp = 0.9 : float32; rng_seed = 0 : nat64;})'
+wsl --% dfx canister call llama2_15M inference '(record {prompt = "" : text; steps = 10 : nat64; temperature = 0.0 : float32; topp = 0.9 : float32; rng_seed = 0 : nat64;})'
+wsl --% dfx canister call llama2_15M inference '(record {prompt = "" : text; steps = 10 : nat64; temperature = 0.0 : float32; topp = 0.9 : float32; rng_seed = 0 : nat64;})'
+wsl --% dfx canister call llama2_15M inference '(record {prompt = "" : text; steps = 10 : nat64; temperature = 0.0 : float32; topp = 0.9 : float32; rng_seed = 0 : nat64;})'
+wsl --% dfx canister call llama2_15M inference '(record {prompt = "" : text; steps = 10 : nat64; temperature = 0.0 : float32; topp = 0.9 : float32; rng_seed = 0 : nat64;})'
+wsl --% dfx canister call llama2_15M inference '(record {prompt = "" : text; steps = 10 : nat64; temperature = 0.0 : float32; topp = 0.9 : float32; rng_seed = 0 : nat64;})'
 
 Write-Host " "
 Write-Host "--------------------------------------------------"
 Write-Host "Generate a new story, 10 tokens at a time, using a starting prompt"
-wsl --% dfx canister call llama2 new_chat '()'
+wsl --% dfx canister call llama2_15M new_chat '()'
 # You can build the prompt in multiple calls
-wsl --% dfx canister call llama2 inference '(record {prompt = "Lilly went to"           : text; steps = 0  : nat64; temperature = 0.0 : float32; topp = 0.9 : float32; rng_seed = 0 : nat64;})'
-wsl --% dfx canister call llama2 inference '(record {prompt = "the beach this morning." : text; steps = 0  : nat64; temperature = 0.0 : float32; topp = 0.9 : float32; rng_seed = 0 : nat64;})'
-wsl --% dfx canister call llama2 inference '(record {prompt = "She saw a little boat"   : text; steps = 0  : nat64; temperature = 0.0 : float32; topp = 0.9 : float32; rng_seed = 0 : nat64;})'
-wsl --% dfx canister call llama2 inference '(record {prompt = "with her friend Billy"   : text; steps = 0  : nat64; temperature = 0.0 : float32; topp = 0.9 : float32; rng_seed = 0 : nat64;})'
+wsl --% dfx canister call llama2_15M inference '(record {prompt = "Lilly went to"           : text; steps = 0  : nat64; temperature = 0.0 : float32; topp = 0.9 : float32; rng_seed = 0 : nat64;})'
+wsl --% dfx canister call llama2_15M inference '(record {prompt = "the beach this morning." : text; steps = 0  : nat64; temperature = 0.0 : float32; topp = 0.9 : float32; rng_seed = 0 : nat64;})'
+wsl --% dfx canister call llama2_15M inference '(record {prompt = "She saw a little boat"   : text; steps = 0  : nat64; temperature = 0.0 : float32; topp = 0.9 : float32; rng_seed = 0 : nat64;})'
+wsl --% dfx canister call llama2_15M inference '(record {prompt = "with her friend Billy"   : text; steps = 0  : nat64; temperature = 0.0 : float32; topp = 0.9 : float32; rng_seed = 0 : nat64;})'
 # Followed by building out the story
-wsl --% dfx canister call llama2 inference '(record {prompt = ""                        : text; steps = 10 : nat64; temperature = 0.0 : float32; topp = 0.9 : float32; rng_seed = 0 : nat64;})'
-wsl --% dfx canister call llama2 inference '(record {prompt = ""                        : text; steps = 10 : nat64; temperature = 0.0 : float32; topp = 0.9 : float32; rng_seed = 0 : nat64;})'
-wsl --% dfx canister call llama2 inference '(record {prompt = ""                        : text; steps = 10 : nat64; temperature = 0.0 : float32; topp = 0.9 : float32; rng_seed = 0 : nat64;})'
-wsl --% dfx canister call llama2 inference '(record {prompt = ""                        : text; steps = 10 : nat64; temperature = 0.0 : float32; topp = 0.9 : float32; rng_seed = 0 : nat64;})'
-wsl --% dfx canister call llama2 inference '(record {prompt = ""                        : text; steps = 10 : nat64; temperature = 0.0 : float32; topp = 0.9 : float32; rng_seed = 0 : nat64;})'
-wsl --% dfx canister call llama2 inference '(record {prompt = ""                        : text; steps = 10 : nat64; temperature = 0.0 : float32; topp = 0.9 : float32; rng_seed = 0 : nat64;})'
-wsl --% dfx canister call llama2 inference '(record {prompt = ""                        : text; steps = 10 : nat64; temperature = 0.0 : float32; topp = 0.9 : float32; rng_seed = 0 : nat64;})'
-wsl --% dfx canister call llama2 inference '(record {prompt = ""                        : text; steps = 10 : nat64; temperature = 0.0 : float32; topp = 0.9 : float32; rng_seed = 0 : nat64;})'
-wsl --% dfx canister call llama2 inference '(record {prompt = ""                        : text; steps = 10 : nat64; temperature = 0.0 : float32; topp = 0.9 : float32; rng_seed = 0 : nat64;})'
-wsl --% dfx canister call llama2 inference '(record {prompt = ""                        : text; steps = 10 : nat64; temperature = 0.0 : float32; topp = 0.9 : float32; rng_seed = 0 : nat64;})'
+wsl --% dfx canister call llama2_15M inference '(record {prompt = ""                        : text; steps = 10 : nat64; temperature = 0.0 : float32; topp = 0.9 : float32; rng_seed = 0 : nat64;})'
+wsl --% dfx canister call llama2_15M inference '(record {prompt = ""                        : text; steps = 10 : nat64; temperature = 0.0 : float32; topp = 0.9 : float32; rng_seed = 0 : nat64;})'
+wsl --% dfx canister call llama2_15M inference '(record {prompt = ""                        : text; steps = 10 : nat64; temperature = 0.0 : float32; topp = 0.9 : float32; rng_seed = 0 : nat64;})'
+wsl --% dfx canister call llama2_15M inference '(record {prompt = ""                        : text; steps = 10 : nat64; temperature = 0.0 : float32; topp = 0.9 : float32; rng_seed = 0 : nat64;})'
+wsl --% dfx canister call llama2_15M inference '(record {prompt = ""                        : text; steps = 10 : nat64; temperature = 0.0 : float32; topp = 0.9 : float32; rng_seed = 0 : nat64;})'
+wsl --% dfx canister call llama2_15M inference '(record {prompt = ""                        : text; steps = 10 : nat64; temperature = 0.0 : float32; topp = 0.9 : float32; rng_seed = 0 : nat64;})'
+wsl --% dfx canister call llama2_15M inference '(record {prompt = ""                        : text; steps = 10 : nat64; temperature = 0.0 : float32; topp = 0.9 : float32; rng_seed = 0 : nat64;})'
+wsl --% dfx canister call llama2_15M inference '(record {prompt = ""                        : text; steps = 10 : nat64; temperature = 0.0 : float32; topp = 0.9 : float32; rng_seed = 0 : nat64;})'
+wsl --% dfx canister call llama2_15M inference '(record {prompt = ""                        : text; steps = 10 : nat64; temperature = 0.0 : float32; topp = 0.9 : float32; rng_seed = 0 : nat64;})'
+wsl --% dfx canister call llama2_15M inference '(record {prompt = ""                        : text; steps = 10 : nat64; temperature = 0.0 : float32; topp = 0.9 : float32; rng_seed = 0 : nat64;})'
 
 # #######################################################################
 # Write-Host " "
diff --git a/icpp_llama2/demo.sh b/icpp_llama2/demo.sh
index 058347e..76a1b5a 100755
--- a/icpp_llama2/demo.sh
+++ b/icpp_llama2/demo.sh
@@ -25,77 +25,77 @@ echo " "
 echo "--------------------------------------------------"
 echo "Deploying the wasm to a canister on the local network"
 dfx deploy llama2_260K
-dfx deploy llama2
+dfx deploy llama2_15M
 
 #######################################################################
 echo " "
 echo "--------------------------------------------------"
 echo "Setting canister_mode to chat-principal"
 dfx canister call llama2_260K set_canister_mode chat-principal
-dfx canister call llama2 set_canister_mode chat-principal
+dfx canister call llama2_15M set_canister_mode chat-principal
 
 #######################################################################
 echo " "
 echo "--------------------------------------------------"
 echo "Checking health endpoint"
 dfx canister call llama2_260K health
-dfx canister call llama2 health
+dfx canister call llama2_15M health
 
 #######################################################################
 echo " "
 echo "--------------------------------------------------"
 echo "Initializing the canister configurations"
 python -m scripts.nft_init --network local --canister llama2_260K --nft-supply-cap 0 --nft-symbol "" --nft-name "" --nft-description ""
-python -m scripts.nft_init --network local --canister llama2 --nft-supply-cap 0 --nft-symbol "" --nft-name "" --nft-description ""
+python -m scripts.nft_init --network local --canister llama2_15M --nft-supply-cap 0 --nft-symbol "" --nft-name "" --nft-description ""
 
 #######################################################################
 echo " "
 echo "--------------------------------------------------"
 echo "Upload the model & tokenizer for 260K and 15M"
 python -m scripts.upload --canister llama2_260K --model stories260K/stories260K.bin --tokenizer stories260K/tok512.bin
-python -m scripts.upload --canister llama2 --model models/stories15M.bin --tokenizer tokenizers/tokenizer.bin
+python -m scripts.upload --canister llama2_15M --model models/stories15Mtok4096.bin --tokenizer tokenizers/tok4096.bin
 
 #######################################################################
 echo " "
 echo "--------------------------------------------------"
 echo "Checking readiness endpoint"
 dfx canister call llama2_260K ready
-dfx canister call llama2 ready
+dfx canister call llama2_15M ready
 
 #######################################################################
 echo " "
 echo "--------------------------------------------------"
 echo "Generate a new story, 10 tokens at a time, starting with an empty prompt."
-dfx canister call llama2 new_chat '()'
-dfx canister call llama2 inference '(record {prompt = "" : text; steps = 10 : nat64; temperature = 0.0 : float32; topp = 0.9 : float32; rng_seed = 0 : nat64;})'
-dfx canister call llama2 inference '(record {prompt = "" : text; steps = 10 : nat64; temperature = 0.0 : float32; topp = 0.9 : float32; rng_seed = 0 : nat64;})'
-dfx canister call llama2 inference '(record {prompt = "" : text; steps = 10 : nat64; temperature = 0.0 : float32; topp = 0.9 : float32; rng_seed = 0 : nat64;})'
-dfx canister call llama2 inference '(record {prompt = "" : text; steps = 10 : nat64; temperature = 0.0 : float32; topp = 0.9 : float32; rng_seed = 0 : nat64;})'
-dfx canister call llama2 inference '(record {prompt = "" : text; steps = 10 : nat64; temperature = 0.0 : float32; topp = 0.9 : float32; rng_seed = 0 : nat64;})'
-dfx canister call llama2 inference '(record {prompt = "" : text; steps = 10 : nat64; temperature = 0.0 : float32; topp = 0.9 : float32; rng_seed = 0 : nat64;})'
-dfx canister call llama2 inference '(record {prompt = "" : text; steps = 10 : nat64; temperature = 0.0 : float32; topp = 0.9 : float32; rng_seed = 0 : nat64;})'
-dfx canister call llama2 inference '(record {prompt = "" : text; steps = 10 : nat64; temperature = 0.0 : float32; topp = 0.9 : float32; rng_seed = 0 : nat64;})'
+dfx canister call llama2_15M new_chat '()'
+dfx canister call llama2_15M inference '(record {prompt = "" : text; steps = 10 : nat64; temperature = 0.0 : float32; topp = 0.9 : float32; rng_seed = 0 : nat64;})'
+dfx canister call llama2_15M inference '(record {prompt = "" : text; steps = 10 : nat64; temperature = 0.0 : float32; topp = 0.9 : float32; rng_seed = 0 : nat64;})'
+dfx canister call llama2_15M inference '(record {prompt = "" : text; steps = 10 : nat64; temperature = 0.0 : float32; topp = 0.9 : float32; rng_seed = 0 : nat64;})'
+dfx canister call llama2_15M inference '(record {prompt = "" : text; steps = 10 : nat64; temperature = 0.0 : float32; topp = 0.9 : float32; rng_seed = 0 : nat64;})'
+dfx canister call llama2_15M inference '(record {prompt = "" : text; steps = 10 : nat64; temperature = 0.0 : float32; topp = 0.9 : float32; rng_seed = 0 : nat64;})'
+dfx canister call llama2_15M inference '(record {prompt = "" : text; steps = 10 : nat64; temperature = 0.0 : float32; topp = 0.9 : float32; rng_seed = 0 : nat64;})'
+dfx canister call llama2_15M inference '(record {prompt = "" : text; steps = 10 : nat64; temperature = 0.0 : float32; topp = 0.9 : float32; rng_seed = 0 : nat64;})'
+dfx canister call llama2_15M inference '(record {prompt = "" : text; steps = 10 : nat64; temperature = 0.0 : float32; topp = 0.9 : float32; rng_seed = 0 : nat64;})'
 
 echo " "
 echo "--------------------------------------------------"
 echo "Generate a new story, 10 tokens at a time, using a starting prompt"
-dfx canister call llama2 new_chat '()'
+dfx canister call llama2_15M new_chat '()'
 # You can build the prompt in multiple calls
-dfx canister call llama2 inference '(record {prompt = "Lilly went to"           : text; steps = 0  : nat64; temperature = 0.0 : float32; topp = 0.9 : float32; rng_seed = 0 : nat64;})'
-dfx canister call llama2 inference '(record {prompt = "the beach this morning." : text; steps = 0  : nat64; temperature = 0.0 : float32; topp = 0.9 : float32; rng_seed = 0 : nat64;})'
-dfx canister call llama2 inference '(record {prompt = "She saw a little boat"   : text; steps = 0  : nat64; temperature = 0.0 : float32; topp = 0.9 : float32; rng_seed = 0 : nat64;})'
-dfx canister call llama2 inference '(record {prompt = "with her friend Billy"   : text; steps = 0  : nat64; temperature = 0.0 : float32; topp = 0.9 : float32; rng_seed = 0 : nat64;})'
+dfx canister call llama2_15M inference '(record {prompt = "Lilly went to"           : text; steps = 0  : nat64; temperature = 0.0 : float32; topp = 0.9 : float32; rng_seed = 0 : nat64;})'
+dfx canister call llama2_15M inference '(record {prompt = "the beach this morning." : text; steps = 0  : nat64; temperature = 0.0 : float32; topp = 0.9 : float32; rng_seed = 0 : nat64;})'
+dfx canister call llama2_15M inference '(record {prompt = "She saw a little boat"   : text; steps = 0  : nat64; temperature = 0.0 : float32; topp = 0.9 : float32; rng_seed = 0 : nat64;})'
+dfx canister call llama2_15M inference '(record {prompt = "with her friend Billy"   : text; steps = 0  : nat64; temperature = 0.0 : float32; topp = 0.9 : float32; rng_seed = 0 : nat64;})'
 # Followed by building out the story
-dfx canister call llama2 inference '(record {prompt = ""                        : text; steps = 10 : nat64; temperature = 0.0 : float32; topp = 0.9 : float32; rng_seed = 0 : nat64;})'
-dfx canister call llama2 inference '(record {prompt = ""                        : text; steps = 10 : nat64; temperature = 0.0 : float32; topp = 0.9 : float32; rng_seed = 0 : nat64;})'
-dfx canister call llama2 inference '(record {prompt = ""                        : text; steps = 10 : nat64; temperature = 0.0 : float32; topp = 0.9 : float32; rng_seed = 0 : nat64;})'
-dfx canister call llama2 inference '(record {prompt = ""                        : text; steps = 10 : nat64; temperature = 0.0 : float32; topp = 0.9 : float32; rng_seed = 0 : nat64;})'
-dfx canister call llama2 inference '(record {prompt = ""                        : text; steps = 10 : nat64; temperature = 0.0 : float32; topp = 0.9 : float32; rng_seed = 0 : nat64;})'
-dfx canister call llama2 inference '(record {prompt = ""                        : text; steps = 10 : nat64; temperature = 0.0 : float32; topp = 0.9 : float32; rng_seed = 0 : nat64;})'
-dfx canister call llama2 inference '(record {prompt = ""                        : text; steps = 10 : nat64; temperature = 0.0 : float32; topp = 0.9 : float32; rng_seed = 0 : nat64;})'
-dfx canister call llama2 inference '(record {prompt = ""                        : text; steps = 10 : nat64; temperature = 0.0 : float32; topp = 0.9 : float32; rng_seed = 0 : nat64;})'
-dfx canister call llama2 inference '(record {prompt = ""                        : text; steps = 10 : nat64; temperature = 0.0 : float32; topp = 0.9 : float32; rng_seed = 0 : nat64;})'
-dfx canister call llama2 inference '(record {prompt = ""                        : text; steps = 10 : nat64; temperature = 0.0 : float32; topp = 0.9 : float32; rng_seed = 0 : nat64;})'
+dfx canister call llama2_15M inference '(record {prompt = ""                        : text; steps = 10 : nat64; temperature = 0.0 : float32; topp = 0.9 : float32; rng_seed = 0 : nat64;})'
+dfx canister call llama2_15M inference '(record {prompt = ""                        : text; steps = 10 : nat64; temperature = 0.0 : float32; topp = 0.9 : float32; rng_seed = 0 : nat64;})'
+dfx canister call llama2_15M inference '(record {prompt = ""                        : text; steps = 10 : nat64; temperature = 0.0 : float32; topp = 0.9 : float32; rng_seed = 0 : nat64;})'
+dfx canister call llama2_15M inference '(record {prompt = ""                        : text; steps = 10 : nat64; temperature = 0.0 : float32; topp = 0.9 : float32; rng_seed = 0 : nat64;})'
+dfx canister call llama2_15M inference '(record {prompt = ""                        : text; steps = 10 : nat64; temperature = 0.0 : float32; topp = 0.9 : float32; rng_seed = 0 : nat64;})'
+dfx canister call llama2_15M inference '(record {prompt = ""                        : text; steps = 10 : nat64; temperature = 0.0 : float32; topp = 0.9 : float32; rng_seed = 0 : nat64;})'
+dfx canister call llama2_15M inference '(record {prompt = ""                        : text; steps = 10 : nat64; temperature = 0.0 : float32; topp = 0.9 : float32; rng_seed = 0 : nat64;})'
+dfx canister call llama2_15M inference '(record {prompt = ""                        : text; steps = 10 : nat64; temperature = 0.0 : float32; topp = 0.9 : float32; rng_seed = 0 : nat64;})'
+dfx canister call llama2_15M inference '(record {prompt = ""                        : text; steps = 10 : nat64; temperature = 0.0 : float32; topp = 0.9 : float32; rng_seed = 0 : nat64;})'
+dfx canister call llama2_15M inference '(record {prompt = ""                        : text; steps = 10 : nat64; temperature = 0.0 : float32; topp = 0.9 : float32; rng_seed = 0 : nat64;})'
 
 # #######################################################################
 # echo " "
diff --git a/icpp_llama2/demo1-ic.sh b/icpp_llama2/demo1-ic.sh
index 857a760..2f45862 100755
--- a/icpp_llama2/demo1-ic.sh
+++ b/icpp_llama2/demo1-ic.sh
@@ -9,20 +9,20 @@
 #    $ dfx identity new demo1
 echo " "
 echo "--------------------------------------------------"
-echo "Generate a new story using llama2 (15M model), 10 tokens at a time, starting with an empty prompt."
-dfx canister call --identity demo1 --network ic  llama2 new_chat '()'
-dfx canister call --identity demo1 --network ic  llama2 inference '(record {prompt = "" : text; steps = 10 : nat64; temperature = 0.0 : float32; topp = 0.9 : float32; rng_seed = 0 : nat64;})'
-dfx canister call --identity demo1 --network ic  llama2 inference '(record {prompt = "" : text; steps = 10 : nat64; temperature = 0.0 : float32; topp = 0.9 : float32; rng_seed = 0 : nat64;})'
-dfx canister call --identity demo1 --network ic  llama2 inference '(record {prompt = "" : text; steps = 10 : nat64; temperature = 0.0 : float32; topp = 0.9 : float32; rng_seed = 0 : nat64;})'
-dfx canister call --identity demo1 --network ic  llama2 inference '(record {prompt = "" : text; steps = 10 : nat64; temperature = 0.0 : float32; topp = 0.9 : float32; rng_seed = 0 : nat64;})'
-dfx canister call --identity demo1 --network ic  llama2 inference '(record {prompt = "" : text; steps = 10 : nat64; temperature = 0.0 : float32; topp = 0.9 : float32; rng_seed = 0 : nat64;})'
-dfx canister call --identity demo1 --network ic  llama2 inference '(record {prompt = "" : text; steps = 10 : nat64; temperature = 0.0 : float32; topp = 0.9 : float32; rng_seed = 0 : nat64;})'
-dfx canister call --identity demo1 --network ic  llama2 inference '(record {prompt = "" : text; steps = 10 : nat64; temperature = 0.0 : float32; topp = 0.9 : float32; rng_seed = 0 : nat64;})'
-dfx canister call --identity demo1 --network ic  llama2 inference '(record {prompt = "" : text; steps = 10 : nat64; temperature = 0.0 : float32; topp = 0.9 : float32; rng_seed = 0 : nat64;})'
-dfx canister call --identity demo1 --network ic  llama2 inference '(record {prompt = "" : text; steps = 10 : nat64; temperature = 0.0 : float32; topp = 0.9 : float32; rng_seed = 0 : nat64;})'
-dfx canister call --identity demo1 --network ic  llama2 inference '(record {prompt = "" : text; steps = 10 : nat64; temperature = 0.0 : float32; topp = 0.9 : float32; rng_seed = 0 : nat64;})'
-dfx canister call --identity demo1 --network ic  llama2 inference '(record {prompt = "" : text; steps = 10 : nat64; temperature = 0.0 : float32; topp = 0.9 : float32; rng_seed = 0 : nat64;})'
-dfx canister call --identity demo1 --network ic  llama2 inference '(record {prompt = "" : text; steps = 10 : nat64; temperature = 0.0 : float32; topp = 0.9 : float32; rng_seed = 0 : nat64;})'
+echo "Generate a new story using llama2_15M (15M model), 10 tokens at a time, starting with an empty prompt."
+dfx canister call --identity demo1 --network ic  llama2_15M new_chat '()'
+dfx canister call --identity demo1 --network ic  llama2_15M inference '(record {prompt = "" : text; steps = 10 : nat64; temperature = 0.0 : float32; topp = 0.9 : float32; rng_seed = 0 : nat64;})'
+dfx canister call --identity demo1 --network ic  llama2_15M inference '(record {prompt = "" : text; steps = 10 : nat64; temperature = 0.0 : float32; topp = 0.9 : float32; rng_seed = 0 : nat64;})'
+dfx canister call --identity demo1 --network ic  llama2_15M inference '(record {prompt = "" : text; steps = 10 : nat64; temperature = 0.0 : float32; topp = 0.9 : float32; rng_seed = 0 : nat64;})'
+dfx canister call --identity demo1 --network ic  llama2_15M inference '(record {prompt = "" : text; steps = 10 : nat64; temperature = 0.0 : float32; topp = 0.9 : float32; rng_seed = 0 : nat64;})'
+dfx canister call --identity demo1 --network ic  llama2_15M inference '(record {prompt = "" : text; steps = 10 : nat64; temperature = 0.0 : float32; topp = 0.9 : float32; rng_seed = 0 : nat64;})'
+dfx canister call --identity demo1 --network ic  llama2_15M inference '(record {prompt = "" : text; steps = 10 : nat64; temperature = 0.0 : float32; topp = 0.9 : float32; rng_seed = 0 : nat64;})'
+dfx canister call --identity demo1 --network ic  llama2_15M inference '(record {prompt = "" : text; steps = 10 : nat64; temperature = 0.0 : float32; topp = 0.9 : float32; rng_seed = 0 : nat64;})'
+dfx canister call --identity demo1 --network ic  llama2_15M inference '(record {prompt = "" : text; steps = 10 : nat64; temperature = 0.0 : float32; topp = 0.9 : float32; rng_seed = 0 : nat64;})'
+dfx canister call --identity demo1 --network ic  llama2_15M inference '(record {prompt = "" : text; steps = 10 : nat64; temperature = 0.0 : float32; topp = 0.9 : float32; rng_seed = 0 : nat64;})'
+dfx canister call --identity demo1 --network ic  llama2_15M inference '(record {prompt = "" : text; steps = 10 : nat64; temperature = 0.0 : float32; topp = 0.9 : float32; rng_seed = 0 : nat64;})'
+dfx canister call --identity demo1 --network ic  llama2_15M inference '(record {prompt = "" : text; steps = 10 : nat64; temperature = 0.0 : float32; topp = 0.9 : float32; rng_seed = 0 : nat64;})'
+dfx canister call --identity demo1 --network ic  llama2_15M inference '(record {prompt = "" : text; steps = 10 : nat64; temperature = 0.0 : float32; topp = 0.9 : float32; rng_seed = 0 : nat64;})'
 
 echo " "
 echo "--------------------------------------------------"
diff --git a/icpp_llama2/demo2-ic.sh b/icpp_llama2/demo2-ic.sh
index 424fcf5..d87f593 100755
--- a/icpp_llama2/demo2-ic.sh
+++ b/icpp_llama2/demo2-ic.sh
@@ -10,20 +10,20 @@
 echo " "
 echo "--------------------------------------------------"
 echo "Generate a new story, 10 tokens at a time, using a starting prompt"
-dfx canister call --identity demo2 --network ic  llama2 new_chat '()'
+dfx canister call --identity demo2 --network ic  llama2_15M new_chat '()'
 # You can build the prompt in multiple calls
-dfx canister call --identity demo2 --network ic  llama2 inference '(record {prompt = "Lilly went to"           : text; steps = 0  : nat64; temperature = 0.0 : float32; topp = 0.9 : float32; rng_seed = 0 : nat64;})'
-dfx canister call --identity demo2 --network ic  llama2 inference '(record {prompt = "the beach this morning." : text; steps = 0  : nat64; temperature = 0.0 : float32; topp = 0.9 : float32; rng_seed = 0 : nat64;})'
-dfx canister call --identity demo2 --network ic  llama2 inference '(record {prompt = "She saw a little boat"   : text; steps = 0  : nat64; temperature = 0.0 : float32; topp = 0.9 : float32; rng_seed = 0 : nat64;})'
-dfx canister call --identity demo2 --network ic  llama2 inference '(record {prompt = "with her friend Billy"   : text; steps = 0  : nat64; temperature = 0.0 : float32; topp = 0.9 : float32; rng_seed = 0 : nat64;})'
+dfx canister call --identity demo2 --network ic  llama2_15M inference '(record {prompt = "Lilly went to"           : text; steps = 0  : nat64; temperature = 0.0 : float32; topp = 0.9 : float32; rng_seed = 0 : nat64;})'
+dfx canister call --identity demo2 --network ic  llama2_15M inference '(record {prompt = "the beach this morning." : text; steps = 0  : nat64; temperature = 0.0 : float32; topp = 0.9 : float32; rng_seed = 0 : nat64;})'
+dfx canister call --identity demo2 --network ic  llama2_15M inference '(record {prompt = "She saw a little boat"   : text; steps = 0  : nat64; temperature = 0.0 : float32; topp = 0.9 : float32; rng_seed = 0 : nat64;})'
+dfx canister call --identity demo2 --network ic  llama2_15M inference '(record {prompt = "with her friend Billy"   : text; steps = 0  : nat64; temperature = 0.0 : float32; topp = 0.9 : float32; rng_seed = 0 : nat64;})'
 # Followed by building out the story
-dfx canister call --identity demo2 --network ic  llama2 inference '(record {prompt = ""                        : text; steps = 10 : nat64; temperature = 0.0 : float32; topp = 0.9 : float32; rng_seed = 0 : nat64;})'
-dfx canister call --identity demo2 --network ic  llama2 inference '(record {prompt = ""                        : text; steps = 10 : nat64; temperature = 0.0 : float32; topp = 0.9 : float32; rng_seed = 0 : nat64;})'
-dfx canister call --identity demo2 --network ic  llama2 inference '(record {prompt = ""                        : text; steps = 10 : nat64; temperature = 0.0 : float32; topp = 0.9 : float32; rng_seed = 0 : nat64;})'
-dfx canister call --identity demo2 --network ic  llama2 inference '(record {prompt = ""                        : text; steps = 10 : nat64; temperature = 0.0 : float32; topp = 0.9 : float32; rng_seed = 0 : nat64;})'
-dfx canister call --identity demo2 --network ic  llama2 inference '(record {prompt = ""                        : text; steps = 10 : nat64; temperature = 0.0 : float32; topp = 0.9 : float32; rng_seed = 0 : nat64;})'
-dfx canister call --identity demo2 --network ic  llama2 inference '(record {prompt = ""                        : text; steps = 10 : nat64; temperature = 0.0 : float32; topp = 0.9 : float32; rng_seed = 0 : nat64;})'
-dfx canister call --identity demo2 --network ic  llama2 inference '(record {prompt = ""                        : text; steps = 10 : nat64; temperature = 0.0 : float32; topp = 0.9 : float32; rng_seed = 0 : nat64;})'
-dfx canister call --identity demo2 --network ic  llama2 inference '(record {prompt = ""                        : text; steps = 10 : nat64; temperature = 0.0 : float32; topp = 0.9 : float32; rng_seed = 0 : nat64;})'
-dfx canister call --identity demo2 --network ic  llama2 inference '(record {prompt = ""                        : text; steps = 10 : nat64; temperature = 0.0 : float32; topp = 0.9 : float32; rng_seed = 0 : nat64;})'
-dfx canister call --identity demo2 --network ic  llama2 inference '(record {prompt = ""                        : text; steps = 10 : nat64; temperature = 0.0 : float32; topp = 0.9 : float32; rng_seed = 0 : nat64;})'
\ No newline at end of file
+dfx canister call --identity demo2 --network ic  llama2_15M inference '(record {prompt = ""                        : text; steps = 10 : nat64; temperature = 0.0 : float32; topp = 0.9 : float32; rng_seed = 0 : nat64;})'
+dfx canister call --identity demo2 --network ic  llama2_15M inference '(record {prompt = ""                        : text; steps = 10 : nat64; temperature = 0.0 : float32; topp = 0.9 : float32; rng_seed = 0 : nat64;})'
+dfx canister call --identity demo2 --network ic  llama2_15M inference '(record {prompt = ""                        : text; steps = 10 : nat64; temperature = 0.0 : float32; topp = 0.9 : float32; rng_seed = 0 : nat64;})'
+dfx canister call --identity demo2 --network ic  llama2_15M inference '(record {prompt = ""                        : text; steps = 10 : nat64; temperature = 0.0 : float32; topp = 0.9 : float32; rng_seed = 0 : nat64;})'
+dfx canister call --identity demo2 --network ic  llama2_15M inference '(record {prompt = ""                        : text; steps = 10 : nat64; temperature = 0.0 : float32; topp = 0.9 : float32; rng_seed = 0 : nat64;})'
+dfx canister call --identity demo2 --network ic  llama2_15M inference '(record {prompt = ""                        : text; steps = 10 : nat64; temperature = 0.0 : float32; topp = 0.9 : float32; rng_seed = 0 : nat64;})'
+dfx canister call --identity demo2 --network ic  llama2_15M inference '(record {prompt = ""                        : text; steps = 10 : nat64; temperature = 0.0 : float32; topp = 0.9 : float32; rng_seed = 0 : nat64;})'
+dfx canister call --identity demo2 --network ic  llama2_15M inference '(record {prompt = ""                        : text; steps = 10 : nat64; temperature = 0.0 : float32; topp = 0.9 : float32; rng_seed = 0 : nat64;})'
+dfx canister call --identity demo2 --network ic  llama2_15M inference '(record {prompt = ""                        : text; steps = 10 : nat64; temperature = 0.0 : float32; topp = 0.9 : float32; rng_seed = 0 : nat64;})'
+dfx canister call --identity demo2 --network ic  llama2_15M inference '(record {prompt = ""                        : text; steps = 10 : nat64; temperature = 0.0 : float32; topp = 0.9 : float32; rng_seed = 0 : nat64;})'
\ No newline at end of file
diff --git a/icpp_llama2/demo2.sh b/icpp_llama2/demo2.sh
index ac12211..02da7bd 100755
--- a/icpp_llama2/demo2.sh
+++ b/icpp_llama2/demo2.sh
@@ -10,20 +10,20 @@
 echo " "
 echo "--------------------------------------------------"
 echo "Generate a new story, 10 tokens at a time, using a starting prompt"
-dfx canister call --identity demo2 llama2 new_chat '()'
+dfx canister call --identity demo2 llama2_15M new_chat '()'
 # You can build the prompt in multiple calls
-dfx canister call --identity demo2 llama2 inference '(record {prompt = "Lilly went to"           : text; steps = 0  : nat64; temperature = 0.0 : float32; topp = 0.9 : float32; rng_seed = 0 : nat64;})'
-dfx canister call --identity demo2 llama2 inference '(record {prompt = "the beach this morning." : text; steps = 0  : nat64; temperature = 0.0 : float32; topp = 0.9 : float32; rng_seed = 0 : nat64;})'
-dfx canister call --identity demo2 llama2 inference '(record {prompt = "She saw a little boat"   : text; steps = 0  : nat64; temperature = 0.0 : float32; topp = 0.9 : float32; rng_seed = 0 : nat64;})'
-dfx canister call --identity demo2 llama2 inference '(record {prompt = "with her friend Billy"   : text; steps = 0  : nat64; temperature = 0.0 : float32; topp = 0.9 : float32; rng_seed = 0 : nat64;})'
+dfx canister call --identity demo2 llama2_15M inference '(record {prompt = "Lilly went to"           : text; steps = 0  : nat64; temperature = 0.0 : float32; topp = 0.9 : float32; rng_seed = 0 : nat64;})'
+dfx canister call --identity demo2 llama2_15M inference '(record {prompt = "the beach this morning." : text; steps = 0  : nat64; temperature = 0.0 : float32; topp = 0.9 : float32; rng_seed = 0 : nat64;})'
+dfx canister call --identity demo2 llama2_15M inference '(record {prompt = "She saw a little boat"   : text; steps = 0  : nat64; temperature = 0.0 : float32; topp = 0.9 : float32; rng_seed = 0 : nat64;})'
+dfx canister call --identity demo2 llama2_15M inference '(record {prompt = "with her friend Billy"   : text; steps = 0  : nat64; temperature = 0.0 : float32; topp = 0.9 : float32; rng_seed = 0 : nat64;})'
 # Followed by building out the story
-dfx canister call --identity demo2 llama2 inference '(record {prompt = ""                        : text; steps = 10 : nat64; temperature = 0.0 : float32; topp = 0.9 : float32; rng_seed = 0 : nat64;})'
-dfx canister call --identity demo2 llama2 inference '(record {prompt = ""                        : text; steps = 10 : nat64; temperature = 0.0 : float32; topp = 0.9 : float32; rng_seed = 0 : nat64;})'
-dfx canister call --identity demo2 llama2 inference '(record {prompt = ""                        : text; steps = 10 : nat64; temperature = 0.0 : float32; topp = 0.9 : float32; rng_seed = 0 : nat64;})'
-dfx canister call --identity demo2 llama2 inference '(record {prompt = ""                        : text; steps = 10 : nat64; temperature = 0.0 : float32; topp = 0.9 : float32; rng_seed = 0 : nat64;})'
-dfx canister call --identity demo2 llama2 inference '(record {prompt = ""                        : text; steps = 10 : nat64; temperature = 0.0 : float32; topp = 0.9 : float32; rng_seed = 0 : nat64;})'
-dfx canister call --identity demo2 llama2 inference '(record {prompt = ""                        : text; steps = 10 : nat64; temperature = 0.0 : float32; topp = 0.9 : float32; rng_seed = 0 : nat64;})'
-dfx canister call --identity demo2 llama2 inference '(record {prompt = ""                        : text; steps = 10 : nat64; temperature = 0.0 : float32; topp = 0.9 : float32; rng_seed = 0 : nat64;})'
-dfx canister call --identity demo2 llama2 inference '(record {prompt = ""                        : text; steps = 10 : nat64; temperature = 0.0 : float32; topp = 0.9 : float32; rng_seed = 0 : nat64;})'
-dfx canister call --identity demo2 llama2 inference '(record {prompt = ""                        : text; steps = 10 : nat64; temperature = 0.0 : float32; topp = 0.9 : float32; rng_seed = 0 : nat64;})'
-dfx canister call --identity demo2 llama2 inference '(record {prompt = ""                        : text; steps = 10 : nat64; temperature = 0.0 : float32; topp = 0.9 : float32; rng_seed = 0 : nat64;})'
\ No newline at end of file
+dfx canister call --identity demo2 llama2_15M inference '(record {prompt = ""                        : text; steps = 10 : nat64; temperature = 0.0 : float32; topp = 0.9 : float32; rng_seed = 0 : nat64;})'
+dfx canister call --identity demo2 llama2_15M inference '(record {prompt = ""                        : text; steps = 10 : nat64; temperature = 0.0 : float32; topp = 0.9 : float32; rng_seed = 0 : nat64;})'
+dfx canister call --identity demo2 llama2_15M inference '(record {prompt = ""                        : text; steps = 10 : nat64; temperature = 0.0 : float32; topp = 0.9 : float32; rng_seed = 0 : nat64;})'
+dfx canister call --identity demo2 llama2_15M inference '(record {prompt = ""                        : text; steps = 10 : nat64; temperature = 0.0 : float32; topp = 0.9 : float32; rng_seed = 0 : nat64;})'
+dfx canister call --identity demo2 llama2_15M inference '(record {prompt = ""                        : text; steps = 10 : nat64; temperature = 0.0 : float32; topp = 0.9 : float32; rng_seed = 0 : nat64;})'
+dfx canister call --identity demo2 llama2_15M inference '(record {prompt = ""                        : text; steps = 10 : nat64; temperature = 0.0 : float32; topp = 0.9 : float32; rng_seed = 0 : nat64;})'
+dfx canister call --identity demo2 llama2_15M inference '(record {prompt = ""                        : text; steps = 10 : nat64; temperature = 0.0 : float32; topp = 0.9 : float32; rng_seed = 0 : nat64;})'
+dfx canister call --identity demo2 llama2_15M inference '(record {prompt = ""                        : text; steps = 10 : nat64; temperature = 0.0 : float32; topp = 0.9 : float32; rng_seed = 0 : nat64;})'
+dfx canister call --identity demo2 llama2_15M inference '(record {prompt = ""                        : text; steps = 10 : nat64; temperature = 0.0 : float32; topp = 0.9 : float32; rng_seed = 0 : nat64;})'
+dfx canister call --identity demo2 llama2_15M inference '(record {prompt = ""                        : text; steps = 10 : nat64; temperature = 0.0 : float32; topp = 0.9 : float32; rng_seed = 0 : nat64;})'
\ No newline at end of file
diff --git a/icpp_llama2/dfx.json b/icpp_llama2/dfx.json
index 6a6b74d..384e3eb 100644
--- a/icpp_llama2/dfx.json
+++ b/icpp_llama2/dfx.json
@@ -6,7 +6,7 @@
       "candid": "src/llama2.did",
       "wasm": "build/llama2.wasm"
     },
-    "llama2": {
+    "llama2_15M": {
       "type": "custom",
       "candid": "src/llama2.did",
       "wasm": "build/llama2.wasm"
diff --git a/icpp_llama2/models/stories15Mtok4096.bin b/icpp_llama2/models/stories15Mtok4096.bin
new file mode 100644
index 0000000..e09c9f7
Binary files /dev/null and b/icpp_llama2/models/stories15Mtok4096.bin differ
diff --git a/icpp_llama2/native/main.cpp b/icpp_llama2/native/main.cpp
index 79e92a9..06d87da 100644
--- a/icpp_llama2/native/main.cpp
+++ b/icpp_llama2/native/main.cpp
@@ -86,7 +86,7 @@ int main() {
     tokenizer_path = "stories260K/tok512.bin";
   } else if (model_to_use == 2) {
     // Use this during final QA
-    model_path = "models/stories15M.bin";
+    model_path = "models/stories15Mtok4096.bin";
     tokenizer_path = "tokenizers/tokenizer.bin";
   } else if (model_to_use == 3) {
     model_path = "models/stories42M.bin";
diff --git a/icpp_llama2/scripts/icpp_llama2_sizer.py b/icpp_llama2/scripts/icpp_llama2_sizer.py
index 0c7a760..8a46624 100644
--- a/icpp_llama2/scripts/icpp_llama2_sizer.py
+++ b/icpp_llama2/scripts/icpp_llama2_sizer.py
@@ -1,4 +1,5 @@
 """Calculates the require resources to deploy a Llama2 model to an IC canister"""
+
 # pylint: disable=invalid-name
 import sys
 import struct
@@ -232,7 +233,7 @@ def main() -> int:
     """Reads the model.bin files and summarizes the resource requirements."""
     file_paths: dict[str, Path] = {
         "260K": ROOT_PATH / "stories260K/stories260K.bin",
-        "15M": ROOT_PATH / "models/stories15M.bin",
+        "15M": ROOT_PATH / "models/stories15Mtok4096.bin",
         "42M": ROOT_PATH / "models/stories42M.bin",
         "110M": ROOT_PATH / "models/stories110M.bin",
     }
@@ -243,7 +244,7 @@ def main() -> int:
         data[key] = calculate_memory(config)
 
     output_path = ROOT_PATH / "README_icpp_llama2_resource_requirements.md"
-    with open(output_path, "w") as file:
+    with open(output_path, "w", encoding="utf-8") as file:
         file.write("# Canister resource requirements for icpp_llama2.")
         file.write("\n")
         file.write("\nDo not edit this file. It is created with the command: ")
diff --git a/icpp_llama2/scripts/parse_args_upload.py b/icpp_llama2/scripts/parse_args_upload.py
index fc34aa8..543a17d 100644
--- a/icpp_llama2/scripts/parse_args_upload.py
+++ b/icpp_llama2/scripts/parse_args_upload.py
@@ -33,14 +33,14 @@ def parse_args() -> argparse.Namespace:
     parser.add_argument(
         "--model",
         type=str,
-        default="models/stories15M.bin",
-        help="Model file (e.g. models/stories15M.bin)",
+        default="models/stories15Mtok4096.bin",
+        help="Model file (e.g. models/stories15Mtok4096.bin)",
     )
     parser.add_argument(
         "--tokenizer",
         type=str,
-        default="tokenizers/tokenizer.bin",
-        help="Tokenizer file (e.g. tokenizers/tokenizer.bin)",
+        default="tokenizers/tok4096.bin",
+        help="Tokenizer file (e.g. tokenizers/tok4096.bin)",
     )
     parser.add_argument(
         "--chunksize",
diff --git a/icpp_llama2/scripts/upload.py b/icpp_llama2/scripts/upload.py
index 505d723..8f1132f 100644
--- a/icpp_llama2/scripts/upload.py
+++ b/icpp_llama2/scripts/upload.py
@@ -23,7 +23,7 @@
 
 # ------------------------------------------------------------------------------
 def read_file_bytes(file_path: Path) -> bytes:
-    """Returns the stories15M.bin file as a bytes array"""
+    """Returns the stories15Mtok4096.bin file as a bytes array"""
     file_bytes = b""
     try:
         with open(file_path, "rb") as file:
diff --git a/icpp_llama2/src/initialize.cpp b/icpp_llama2/src/initialize.cpp
index 8b49e59..dbf8367 100644
--- a/icpp_llama2/src/initialize.cpp
+++ b/icpp_llama2/src/initialize.cpp
@@ -21,7 +21,7 @@ Tokenizer tokenizer;
 // -----------------------------------------------------------------------
 
 // This is an exact copy of code in this method run.c,
-// Modified to read the data from the uploaded tokenizer.bin bytes
+// Modified to read the data from the uploaded tok4096.bin bytes
 bool build_tokenizer(Tokenizer *t, int vocab_size, IC_API &ic_api) {
   if (!p_tokenizer_bytes or
       (p_tokenizer_bytes && p_tokenizer_bytes->vec.size() == 0)) {
@@ -147,7 +147,7 @@ bool build_tokenizer(Tokenizer *t, int vocab_size, IC_API &ic_api) {
 // - read_checkpoint
 // - build_transformer
 // - free_transformer
-// Modified to read the data from the uploaded tokenizer.bin bytes
+// Modified to read the data from the uploaded tok4096.bin bytes
 void read_checkpoint(Config *config, TransformerWeights *weights) {
   // FILE *file = fopen(checkpoint, "rb");
   // if (!file) {
diff --git a/icpp_llama2/src/run.c b/icpp_llama2/src/run.c
index d59dbd0..089b52e 100644
--- a/icpp_llama2/src/run.c
+++ b/icpp_llama2/src/run.c
@@ -960,7 +960,7 @@ int sample(Sampler* sampler, float* logits) {
 
 //     // default parameters
 //     char *checkpoint_path = NULL;  // e.g. out/model.bin
-//     char *tokenizer_path = "tokenizer.bin";
+//     char *tokenizer_path = "tok4096.bin";
 //     float temperature = 1.0f;   // 0.0 = greedy deterministic. 1.0 = original. don't set higher
 //     float topp = 0.9f;          // top-p in nucleus sampling. 1.0 = off. 0.9 works well, but slower
 //     int steps = 256;            // number of steps to run for
diff --git a/icpp_llama2/src/upload.cpp b/icpp_llama2/src/upload.cpp
index 9c324b0..cd8cfe5 100644
--- a/icpp_llama2/src/upload.cpp
+++ b/icpp_llama2/src/upload.cpp
@@ -1,5 +1,5 @@
 // Endpoints for uploading the trained model           & the tokenizer files
-// eg.                         models/stories15M.bin   & tokenizers/tokenizer.bin
+// eg.                         models/stories15Mtok4096.bin   & tokenizers/tok4096.bin
 
 #include "upload.h"
 
@@ -135,7 +135,7 @@ void reset_tokenizer() {
   ic_api.to_wire(CandidTypeVariant{"Ok", status_code_record});
 }
 
-// Endpoint for uploading the stories15M.bin file as bytes
+// Endpoint for uploading the stories15Mtok4096.bin file as bytes
 void upload_model_bytes_chunk() {
   IC_API ic_api(CanisterUpdate{std::string(__func__)}, false);
   if (!is_canister_owner(ic_api)) return;
@@ -156,7 +156,7 @@ void upload_model_bytes_chunk() {
   ic_api.to_wire(CandidTypeVariant{"Ok", status_code_record});
 }
 
-// Endpoint for uploading the tokenizer.bin file as bytes
+// Endpoint for uploading the tok4096.bin file as bytes
 void upload_tokenizer_bytes_chunk() {
   IC_API ic_api(CanisterUpdate{std::string(__func__)}, false);
   if (!is_canister_owner(ic_api)) return;
diff --git a/icpp_llama2/src/upload.h b/icpp_llama2/src/upload.h
index 1772b0d..4702bc8 100644
--- a/icpp_llama2/src/upload.h
+++ b/icpp_llama2/src/upload.h
@@ -4,14 +4,14 @@
 #include <cstdint>
 #include <vector>
 
-// The uploaded bytes of the trained model (eg. models/stories15M.bin)
+// The uploaded bytes of the trained model (eg. models/stories15Mtok4096.bin)
 class ModelBytes {
 public:
   std::vector<uint8_t> vec;
 };
 extern ModelBytes *p_model_bytes;
 
-// The uploaded bytes of the tokenizer (eg. tokenizers/tokenizer.bin)
+// The uploaded bytes of the tokenizer (eg. tokenizers/tok4096.bin)
 class TokenizerBytes {
 public:
   std::vector<uint8_t> vec;
diff --git a/icpp_llama2/test/conftest.py b/icpp_llama2/test/conftest.py
index 83010bd..00b5fbe 100644
--- a/icpp_llama2/test/conftest.py
+++ b/icpp_llama2/test/conftest.py
@@ -1,6 +1,7 @@
 """The pytest fixtures
    https://docs.pytest.org/en/latest/fixture.html
 """
+
 # pylint: disable=missing-function-docstring, unused-import, wildcard-import, unused-wildcard-import
 import pytest
 from icpp.conftest_base import *  # pytest fixtures provided by icpp