Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Introduces MockLLM and provides an example #130

Closed
wants to merge 5 commits into from
Closed
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
39 changes: 39 additions & 0 deletions examples/feedback/echo/echo.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,39 @@
import random
from pydantic import BaseModel

def ground_truth(n: str) -> str:
return str(n)

def noisy_predictor(n: str) -> str:
n_int = int(n)
return str(n_int + random.randint(-10, 10)) if random.randint(1, 10) <= 5 else n

def _echo_eval(ground_truth: str, prediction: str) -> int:
return int(ground_truth) - int(prediction)

def _rating_emoji(diff: int) -> str:
if diff == 0:
return "✅"
elif diff > 0:
return f"↗️"
else:
return "↘"

def _verbal_rating(diff: int) -> str:
ratings = [
(lambda d: d == 0, "perfect"),
(lambda d: 0 < d < 3, "good"),
(lambda d: 3 <= d < 6, "okay"),
(lambda d: 6 <= d < 8, "bad"),
(lambda d: d >= 8, "terrible"),
]
return next(rating for condition, rating in ratings if condition(abs(diff)))

class EchoFeedback(BaseModel):
direction: str
verbal_rating: str

@staticmethod
def create(ground_truth: str, prediction: str) -> "EchoFeedback":
diff = _echo_eval(ground_truth, prediction)
return EchoFeedback(direction=_rating_emoji(diff), verbal_rating=_verbal_rating(diff))
39 changes: 39 additions & 0 deletions examples/feedback/echo/noisy_echo_feedback.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,39 @@
import random
from dotenv import load_dotenv
from random_word import RandomWords
from echo import ground_truth, noisy_predictor, EchoFeedback
from log10.llm import MockLLM, Message, Log10Config
from log10.feedback.feedback import Feedback
from log10.feedback.feedback_task import FeedbackTask


load_dotenv()

def mk_tag(prefix):
rw = RandomWords()
return f"{prefix}-{rw.get_random_word()}-{rw.get_random_word()}"


if __name__ == "__main__":
# each time you run you have the same task name, but a different session name
task_name = "echo"
session_tag = mk_tag(task_name)
input_offset = random.randint(0, 100)
random_seed = 42
# set a random seed for the noisy predictor.
# This seed will be logged as a tag for reproducibility.
random.seed(random_seed)
config = Log10Config(tags=[session_tag, task_name, f"random_seed:{random_seed}"])
# we will mock the llm with a function
client = MockLLM(mock_function=noisy_predictor, log10_config=config)
task = FeedbackTask().create(name=task_name,
task_schema=EchoFeedback.model_json_schema())
task_id = task.json()["id"]
for i in range(10):
x = i + input_offset
y = ground_truth(x)
response = client.chat([Message(role="user", content=str(x))])
y_hat = response.content
l10fb = EchoFeedback.create(y, y_hat)
response = Feedback().create(task_id=task_id, values=l10fb.model_dump(), completion_tags_selector=config.tags)
print(f"{response.json()['id']}: {l10fb}")
83 changes: 81 additions & 2 deletions log10/llm.py
Original file line number Diff line number Diff line change
@@ -1,14 +1,15 @@
import json
import logging
import os
import time
import traceback
from abc import ABC
from copy import deepcopy
from enum import Enum
from typing import List, Optional
from typing import Callable, List, Optional

import requests


Role = Enum("Role", ["system", "assistant", "user"])
Kind = Enum("Kind", ["chat", "text"])

Expand Down Expand Up @@ -250,3 +251,81 @@ def chat(self, messages: List[Message], hparams: dict = None) -> ChatCompletion:
def text(self, prompt: str, hparams: dict = None) -> TextCompletion:
logging.info("Received text completion requst: " + prompt)
return TextCompletion(text="I'm not a real LLM")

class MockLLM(LLM):
'''
MockLLM is an LLM interface that allows you to mock the behavior of an LLM using any Python function.
It is useful for log10 testing and development without having to setup or call a real LLM.
Example:
>>> from log10.llm import MockLLM, Message, Log10Config
>>> # a mock llm that reverses that reverses any input
>>> client = MockLLM(mock_function=lambda x: str(x)[::-1], log10_config=config)
>>> response = client.chat([Message(role="user", content="hello world")])
For a longer example, see examples/feedback/echo
'''
def __init__(self, hparams: dict = {}, log10_config=None, mock_function: Callable = None):
'''
hparams: dict = {}
log10_config: Log10Config = None
mock_function: Callable = None
If mock_function is not provided, it is assigned to an identity function.
'''
hparams["model"] = hparams.get("model", "MockLLM")
super().__init__(hparams, log10_config)
Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

how about marking {"model": "MockLLM"}, maybe when hparams["model"] is None?

self.mock_function = mock_function

def chat(self, messages: List[Message], hparams: dict = None) -> ChatCompletion:
request = self.chat_request(messages, hparams)

start_time = time.perf_counter()
content = messages[-1].content if len(messages) > 0 else ""
content = self.mock_function(content)

self.completion_id = self.log_start(request, Kind.chat)
completion = {
"choices": [
{
"message": {
"role": "assistant",
"content": content,
Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

seems the completion is the last message of messages. And request also contains the full messages. Depends on how you want to use the logs in future, you may want to remove the last message (completion) from request.
Or maybe have a mock_function to do that, which takes the full messages.

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

please check the chat_expected method I will be committing soon.

}
}
]
}
response = ChatCompletion(
role=Role.assistant,
content=content,
response=completion,
)

self.log_end(
self.completion_id,
completion,
time.perf_counter() - start_time,
)

return response

def chat_expected(self, messages: List[Message], expected: Message, hparams: dict = None) -> ChatCompletion:
'''
Similar to chat, except it takes a list of messages and a single expected message.
It will return the expected message. If any `mock_function` is set previously, it will be ignored.
'''
tmp_mock_func = self.mock_function
self.mock_function = lambda _: expected.content
response = self.chat(messages, hparams)
self.mock_function = tmp_mock_func
return response

def chat_request(self, messages: List[Message], hparams: dict = None) -> dict:
merged_hparams = deepcopy(self.hparams)
if hparams:
merged_hparams.update(hparams)

return {
"messages": [message.to_dict() for message in messages],
**merged_hparams,
}

def text(self, prompt: str, hparams: dict = None) -> TextCompletion:
return TextCompletion(text="Not implemented in MockLLM")
Loading