Skip to content

Commit

Permalink
ollama
Browse files Browse the repository at this point in the history
  • Loading branch information
piEsposito committed Jun 13, 2024
1 parent cd8856c commit ffd9d80
Show file tree
Hide file tree
Showing 5 changed files with 140 additions and 2 deletions.
8 changes: 7 additions & 1 deletion README.md
Original file line number Diff line number Diff line change
Expand Up @@ -7,6 +7,7 @@ I want to change LLMs with ease, while knowing what is happening under the hood.
Simple and tiny, that's the goal.

Features:

- OpenAI
- Anthropic
- Async
Expand All @@ -15,13 +16,16 @@ Features:
- Vision
- PyPI package `tiny-ai-client`
- Gemini (vision, no tools)

- Ollama (text, no vision, no tools) (you can also pass a custom model_server_url to AI/AsyncAI)

Roadmap:

- Gemini tools

## Simple

`tiny-ai-client` is simple and intuitive:

- Do you want set your model? Just pass the model name.
- Do you want to change your model? Just change the model name.
- Want to send a message? `msg: str = ai("hello")` and say goodbye to parsing a complex json.
Expand All @@ -31,6 +35,7 @@ Roadmap:
- Video? Just pass a list of `PIL.Image.Image`.

## Tiny

- `tiny-ai-client` is very small, its core logic is < 250 lines of code (including comments and docstrings) and ideally won't pass 500. It is and always will be easy to understand, tweak and use.
- The core logic is in `tiny_ai_client/models.py`
- Vision utils are in `tiny_ai_client/vision.py`
Expand Down Expand Up @@ -60,6 +65,7 @@ response = await ai("What is the meaning of life?")
```

For Anthropic:

```python
from tiny_ai_client import AI, AsyncAI

Expand Down
39 changes: 39 additions & 0 deletions examples/ollama_.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,39 @@
import asyncio

from tiny_ai_client import AI, AsyncAI


async def async_ai_main():
print("### ASYNC AI ###")
ai = AsyncAI(
model_name="ollama:llama3",
system="You are Spock, from Star Trek.",
max_new_tokens=128,
model_server_url="http://localhost:11434/api/chat",
)
response = await ai("What is the meaning of life?")
print(f"{response=}")
response = await ai("Did it work?")
print(f"{response=}")
print(f"{ai.chat=}")


def main():
print("### SYNC AI ###")
ai = AI(
model_name="ollama:llama3",
system="You are Spock, from Star Trek.",
max_new_tokens=128,
model_server_url="http://localhost:11434/api/chat",
# tools=[get_current_weather],
)
response = ai("What is the meaning of life?")
print(f"{response=}")
response = ai("Did it work?")
print(f"{response=}")
print(f"{ai.chat=}")


if __name__ == "__main__":
main()
asyncio.run(async_ai_main())
2 changes: 1 addition & 1 deletion pyproject.toml
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
[tool.poetry]
name = "tiny-ai-client"
version = "0.0.7"
version = "0.0.8"
description = "Tiny AI client for LLMs. As simple as it gets."
authors = ["piEsposito <piero.skywalker@gmail.com>"]
license = "Apache 2.0"
Expand Down
11 changes: 11 additions & 0 deletions tiny_ai_client/models.py
Original file line number Diff line number Diff line change
Expand Up @@ -16,7 +16,9 @@ def __init__(
timeout: int = 30,
tools: List[Union[Callable, Dict]] | None = None,
chat: List["Message"] | None = None,
model_server_url: str | None = None,
):
self.model_server_url = model_server_url
# llm sampling parameters
self.temperature: int = temperature
self.max_new_tokens: int | None = max_new_tokens
Expand All @@ -31,6 +33,7 @@ def __init__(

self.model_name: str = model_name
self.system: str = system

self.client_wrapper: LLMClientWrapper = self.get_llm_client_wrapper(
model_name=model_name, tools=self.tools
)
Expand Down Expand Up @@ -60,6 +63,7 @@ def __call__(
if response_msg.tool_call:
func = self.tools_dict[response_msg.tool_call.name]
tool_input = json_to_function_input(func, response_msg.tool_call.parameters)
print(f"{tool_input=}")
tool_result = func(tool_input)
response_msg.tool_call.result = tool_result
return response_msg.text or (
Expand All @@ -69,6 +73,13 @@ def __call__(
def get_llm_client_wrapper(
self, model_name: str, tools: List[Union[Callable, Dict]]
) -> "LLMClientWrapper":
if model_name.startswith("ollama:"):
from tiny_ai_client.ollama_ import OllamaClientWrapper

kwargs = {}
if self.model_server_url:
kwargs["url"] = self.model_server_url
return OllamaClientWrapper(model_name, tools, **kwargs)
if "gpt" in model_name:
from tiny_ai_client.openai_ import OpenAIClientWrapper

Expand Down
82 changes: 82 additions & 0 deletions tiny_ai_client/ollama_.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,82 @@
from typing import Any, Callable, Dict, List, Union

import aiohttp
import requests

from tiny_ai_client.models import LLMClientWrapper, Message


class OllamaClientWrapper(LLMClientWrapper):
def __init__(
self,
model_name: str,
tools: List[Union[Callable, Dict]],
url: str = "http://localhost:11434/api/chat",
):
self.model_name = model_name.split("ollama:")[1]
self.url = url
if tools:
raise ValueError("Ollama does not support tools")

def build_model_input(self, messages: List["Message"]) -> Any:
input_messages = []
for message in messages:
if message.tool_call:
raise ValueError("Ollama does not support tool calls")
else:
if message.text is not None:
content = message.text
if message.images:
raise ValueError("Ollama does not support images")
model_input_message = {
"role": message.role,
"content": content,
}
input_messages.append(model_input_message)
model_input = {
"messages": input_messages,
"model": self.model_name,
"stream": False,
}
return model_input

def call_llm_provider(
self,
model_input: Any,
temperature: int | None,
max_new_tokens: int | None,
timeout: int,
) -> str:
kwargs = {}
if temperature is not None:
kwargs["temperature"] = temperature
if max_new_tokens is not None:
kwargs["num_ctx"] = max_new_tokens
model_input["options"] = kwargs
response = requests.post(self.url, json=model_input, timeout=timeout)
response = response.json()
chat_response = response["message"]["content"]
if chat_response is not None:
return Message(text=chat_response, role="assistant")

async def async_call_llm_provider(
self,
model_input: Any,
temperature: int | None,
max_new_tokens: int | None,
timeout: int,
) -> str:
kwargs = {}
if temperature is not None:
kwargs["temperature"] = temperature
if max_new_tokens is not None:
kwargs["num_ctx"] = max_new_tokens
model_input["options"] = kwargs
async with aiohttp.ClientSession() as session:
async with session.post(
self.url, json=model_input, timeout=timeout
) as response:
response_data = await response.json()
chat_response = response_data["message"]["content"]
if chat_response is not None:
return Message(text=chat_response.strip(), role="assistant")

0 comments on commit ffd9d80

Please sign in to comment.