diff --git a/README.md b/README.md index f0a3c8f..6d1e12c 100755 --- a/README.md +++ b/README.md @@ -35,7 +35,7 @@ Transform texts in a hundred different [languages](https://github.com/artitw/tex ## Colab Notebooks -* Assistant (free private chatGPT alternative) [![Open In Colab](https://colab.research.google.com/assets/colab-badge.svg)](https://colab.research.google.com/drive/1K6Kk80w9vjFZ7PL9dPRgVuOPuaWcY4ae?usp=sharing) +* Assistant (free private ChatGPT LLM alternative) [![Open In Colab](https://colab.research.google.com/assets/colab-badge.svg)](https://colab.research.google.com/drive/1K6Kk80w9vjFZ7PL9dPRgVuOPuaWcY4ae?usp=sharing) * Assistant with knowledge base [![Open In Colab](https://colab.research.google.com/assets/colab-badge.svg)](https://colab.research.google.com/drive/1hkNgpSmmUA-mzUibqz25xq-E8KYOLuVx?usp=sharing) * STF-IDF multilingual search [![Open In Colab](https://colab.research.google.com/assets/colab-badge.svg)](https://colab.research.google.com/drive/1RaWj5SqWvyC2SsCTGg8IAVcl9G5hOB50?usp=sharing) * All examples [![Open In Colab](https://colab.research.google.com/assets/colab-badge.svg)](https://colab.research.google.com/drive/1LE_ifTpOGO5QJCKNQYtZe6c_tjbwnulR) @@ -201,19 +201,16 @@ res = asst.transform([instructions]) #OpenAI API format input_prompts = ["Hello, world!"] -print( - asst.completion_tokens(input_prompts), - asst.completion(input_prompts) -) -#[13] -#['Hello there! How can I help you today? If you have any questions or need assistance with something, feel free to ask.'] +asst.completion_tokens(input_prompts) #[13] +asst.completion(input_prompts) #['Hello there! How can I help you today? If you have any questions or need assistance with something, feel free to ask.'] -results = asst.chat_completion([ +chat_history = [ {"role": "user", "content": "Hi"}, {"role": "assistant", "content": "Hello, how are you?"}, {"role": "user", "content": "What should I do today?"} -]) -#{'role': 'assistant', 'content': '1. Make a list of things to be grateful for.\n2. Go outside and take a walk in nature.\n3. Practice mindfulness meditation.\n4. Connect with a loved one or friend.\n5. Do something kind for someone else.\n6. Engage in a creative activity like drawing or writing.\n7. Read an uplifting book or listen to motivational podcasts.'} +] +asst.chat_completion_tokens(chat_history) #31 +asst.chat_completion(chat_history) #{'role': 'assistant', 'content': '1. Make a list of things to be grateful for.\n2. Go outside and take a walk in nature.\n3. Practice mindfulness meditation.\n4. Connect with a loved one or friend.\n5. Do something kind for someone else.\n6. Engage in a creative activity like drawing or writing.\n7. Read an uplifting book or listen to motivational podcasts.'} ``` - To use a dynamic knowledge base, see [![Q&A Assistant Demo](https://colab.research.google.com/assets/colab-badge.svg)](https://colab.research.google.com/drive/1hkNgpSmmUA-mzUibqz25xq-E8KYOLuVx?usp=sharing) - To use with LangChain, see [![LangChain integration](https://colab.research.google.com/assets/colab-badge.svg)](https://colab.research.google.com/drive/1K6Kk80w9vjFZ7PL9dPRgVuOPuaWcY4ae?usp=sharing) diff --git a/demos/Text2Text<>LangChain.ipynb b/demos/Text2Text_LLM.ipynb similarity index 68% rename from demos/Text2Text<>LangChain.ipynb rename to demos/Text2Text_LLM.ipynb index 4f62135..5f2e416 100644 --- a/demos/Text2Text<>LangChain.ipynb +++ b/demos/Text2Text_LLM.ipynb @@ -18,7 +18,7 @@ "cells": [ { "cell_type": "code", - "execution_count": 3, + "execution_count": null, "metadata": { "colab": { "base_uri": "https://localhost:8080/" @@ -40,6 +40,31 @@ "pip install -qq -U text2text" ] }, + { + "cell_type": "code", + "source": [ + "import text2text as t2t\n", + "asst = t2t.Assistant()\n", + "\n", + "#OpenAI API format\n", + "input_prompts = [\"Hello, world!\"]\n", + "asst.completion_tokens(input_prompts) #[13]\n", + "asst.completion(input_prompts) #['Hello there! How can I help you today? If you have any questions or need assistance with something, feel free to ask.']\n", + "\n", + "chat_history = [\n", + " {\"role\": \"user\", \"content\": \"Hi\"},\n", + " {\"role\": \"assistant\", \"content\": \"Hello, how are you?\"},\n", + " {\"role\": \"user\", \"content\": \"What should I do today?\"}\n", + "]\n", + "asst.chat_completion_tokens(chat_history) #31\n", + "asst.chat_completion(chat_history) #{'role': 'assistant', 'content': '1. Make a list of things to be grateful for.\\n2. Go outside and take a walk in nature.\\n3. Practice mindfulness meditation.\\n4. Connect with a loved one or friend.\\n5. Do something kind for someone else.\\n6. Engage in a creative activity like drawing or writing.\\n7. Read an uplifting book or listen to motivational podcasts.'}" + ], + "metadata": { + "id": "UqprqtKi33IE" + }, + "execution_count": null, + "outputs": [] + }, { "cell_type": "code", "source": [ @@ -60,7 +85,7 @@ "id": "a8aELPRAo4HP", "outputId": "5296cfe9-2b9e-4929-aaa0-ce5a7d5b2420" }, - "execution_count": 6, + "execution_count": null, "outputs": [ { "output_type": "stream", diff --git a/setup.py b/setup.py index 460e5ab..af2595f 100755 --- a/setup.py +++ b/setup.py @@ -5,7 +5,7 @@ setuptools.setup( name="text2text", - version="1.3.3", + version="1.3.4", author="artitw", author_email="artitw@gmail.com", description="Text2Text: Crosslingual NLP/G toolkit", diff --git a/text2text/assistant.py b/text2text/assistant.py index 34a08f2..28988aa 100644 --- a/text2text/assistant.py +++ b/text2text/assistant.py @@ -24,7 +24,7 @@ def __init__(self, **kwargs): quantize_config=None ) - def preprocess(self, input_lines, retriever=None, **kwargs): + def completion_preprocess(self, input_lines, retriever=None, **kwargs): df = pd.DataFrame({"input_line": input_lines}) if retriever: k = kwargs.get('k', 1) @@ -34,13 +34,13 @@ def preprocess(self, input_lines, retriever=None, **kwargs): return df def completion_tokens(self, input_lines): - df = self.preprocess(input_lines) + df = self.completion_preprocess(input_lines) tok = self.__class__.tokenizer input_ids = tok(df["input_line"].tolist(), return_tensors="pt", padding=True).input_ids return [len(x) for x in input_ids] def transform(self, input_lines, retriever=None, **kwargs): - df = self.preprocess(input_lines, retriever, **kwargs) + df = self.completion_preprocess(input_lines, retriever, **kwargs) temperature = kwargs.get('temperature', 0.7) top_p = kwargs.get('top_p', 0.95) top_k = kwargs.get('top_k', 0) @@ -68,12 +68,20 @@ def transform(self, input_lines, retriever=None, **kwargs): completion = transform - def chat_completion(self, input_lines, **kwargs): - chat_history = [] - for line in input_lines: - chat_history.append(f'{line["role"].upper()}: {line["content"]}') + def chat_completion_preprocess(self, messages): + chat_history = [f'{line["role"].upper()}: {line["content"]}' for line in messages] chat_history.append("ASSISTANT: ") input_prompt = "\n".join(chat_history) + return input_prompt + + def chat_completion_tokens(self, messages): + input_prompt = self.chat_completion_preprocess(messages) + tok = self.__class__.tokenizer + input_ids = tok([input_prompt], return_tensors="pt", padding=True).input_ids[0] + return len(input_ids) + + def chat_completion(self, messages, **kwargs): + input_prompt = self.chat_completion_preprocess(messages) temperature = kwargs.get('temperature', 0.7) top_p = kwargs.get('top_p', 0.95)