From febea1301025856fd7f79bc19a11e94c4496ac32 Mon Sep 17 00:00:00 2001
From: egcash <85733533+egcash@users.noreply.github.com>
Date: Thu, 15 Aug 2024 10:44:55 +0530
Subject: [PATCH 1/3] =?UTF-8?q?Added=20Tokenizer=20for=20Claude=20Models?=
 =?UTF-8?q?=20=E2=9C=85?=
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

---
 tokencost/costs.py | 24 ++++++++++++++++++++++++
 1 file changed, 24 insertions(+)

diff --git a/tokencost/costs.py b/tokencost/costs.py
index feb931a..bfe0208 100644
--- a/tokencost/costs.py
+++ b/tokencost/costs.py
@@ -1,8 +1,10 @@
+
 """
 Costs dictionary and utility tool for counting tokens
 """
 
 import tiktoken
+import anthropic
 from typing import Union, List, Dict
 from .constants import TOKEN_COSTS
 from decimal import Decimal
@@ -39,6 +41,18 @@ def count_message_tokens(messages: List[Dict[str, str]], model: str) -> int:
     """
     model = model.lower()
     model = strip_ft_model_name(model)
+    
+    if "claude-" in model:
+        """
+        Note that this is only accurate for older models, e.g. `claude-2.1`. 
+        For newer models this can only be used as a _very_ rough estimate, 
+        instead you should rely on the `usage` property in the response for exact counts.
+        """
+        prompt = "".join(message["content"] for message in messages)
+        client = anthropic.Client()
+        token_count = client.count_tokens(prompt)
+        return token_count
+        
     try:
         encoding = tiktoken.encoding_for_model(model)
     except KeyError:
@@ -104,6 +118,16 @@ def count_string_tokens(prompt: str, model: str) -> int:
         int: The number of tokens in the text string.
     """
     model = model.lower()
+    if "claude-" in model:
+        """
+        Note that this is only accurate for older models, e.g. `claude-2.1`. 
+        For newer models this can only be used as a _very_ rough estimate, 
+        instead you should rely on the `usage` property in the response for exact counts.
+        """
+        client = anthropic.Client()
+        token_count = client.count_tokens(prompt)
+        return token_count
+
     try:
         encoding = tiktoken.encoding_for_model(model)
     except KeyError:

From d6ea769351bafa15265c94ef919f4d8158c373a5 Mon Sep 17 00:00:00 2001
From: egcash <85733533+egcash@users.noreply.github.com>
Date: Thu, 15 Aug 2024 10:47:54 +0530
Subject: [PATCH 2/3] New Dependency: Anthropic

---
 pyproject.toml | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/pyproject.toml b/pyproject.toml
index 4085f4b..0c3405b 100644
--- a/pyproject.toml
+++ b/pyproject.toml
@@ -25,7 +25,8 @@ classifiers = [
 ]
 dependencies = [
     "tiktoken>=0.7.0",
-    "aiohttp>=3.9.3"
+    "aiohttp>=3.9.3",
+    "anthropic>=0.34.0"
 ]
 
 [project.optional-dependencies]

From e91c68a71b7edfa29204568ab2e0a30318a97edb Mon Sep 17 00:00:00 2001
From: egcash <85733533+egcash@users.noreply.github.com>
Date: Thu, 15 Aug 2024 11:08:38 +0530
Subject: [PATCH 3/3] Added Claude token Counter

---
 tokencost/costs.py | 4 +---
 1 file changed, 1 insertion(+), 3 deletions(-)

diff --git a/tokencost/costs.py b/tokencost/costs.py
index bfe0208..ce29d0d 100644
--- a/tokencost/costs.py
+++ b/tokencost/costs.py
@@ -49,9 +49,7 @@ def count_message_tokens(messages: List[Dict[str, str]], model: str) -> int:
         instead you should rely on the `usage` property in the response for exact counts.
         """
         prompt = "".join(message["content"] for message in messages)
-        client = anthropic.Client()
-        token_count = client.count_tokens(prompt)
-        return token_count
+        return count_string_tokens(prompt,model)
         
     try:
         encoding = tiktoken.encoding_for_model(model)