-
Notifications
You must be signed in to change notification settings - Fork 8
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Introduces MockLLM and provides an example #130
Changes from all commits
41952ad
beac839
e5fa1ff
6fed812
fae4b87
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,39 @@ | ||
import random | ||
from pydantic import BaseModel | ||
|
||
def ground_truth(n: str) -> str: | ||
return str(n) | ||
|
||
def noisy_predictor(n: str) -> str: | ||
n_int = int(n) | ||
return str(n_int + random.randint(-10, 10)) if random.randint(1, 10) <= 5 else n | ||
|
||
def _echo_eval(ground_truth: str, prediction: str) -> int: | ||
return int(ground_truth) - int(prediction) | ||
|
||
def _rating_emoji(diff: int) -> str: | ||
if diff == 0: | ||
return "✅" | ||
elif diff > 0: | ||
return f"↗️" | ||
else: | ||
return "↘" | ||
|
||
def _verbal_rating(diff: int) -> str: | ||
ratings = [ | ||
(lambda d: d == 0, "perfect"), | ||
(lambda d: 0 < d < 3, "good"), | ||
(lambda d: 3 <= d < 6, "okay"), | ||
(lambda d: 6 <= d < 8, "bad"), | ||
(lambda d: d >= 8, "terrible"), | ||
] | ||
return next(rating for condition, rating in ratings if condition(abs(diff))) | ||
|
||
class EchoFeedback(BaseModel): | ||
direction: str | ||
verbal_rating: str | ||
|
||
@staticmethod | ||
def create(ground_truth: str, prediction: str) -> "EchoFeedback": | ||
diff = _echo_eval(ground_truth, prediction) | ||
return EchoFeedback(direction=_rating_emoji(diff), verbal_rating=_verbal_rating(diff)) |
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,39 @@ | ||
import random | ||
from dotenv import load_dotenv | ||
from random_word import RandomWords | ||
from echo import ground_truth, noisy_predictor, EchoFeedback | ||
from log10.llm import MockLLM, Message, Log10Config | ||
from log10.feedback.feedback import Feedback | ||
from log10.feedback.feedback_task import FeedbackTask | ||
|
||
|
||
load_dotenv() | ||
|
||
def mk_tag(prefix): | ||
rw = RandomWords() | ||
return f"{prefix}-{rw.get_random_word()}-{rw.get_random_word()}" | ||
|
||
|
||
if __name__ == "__main__": | ||
# each time you run you have the same task name, but a different session name | ||
task_name = "echo" | ||
session_tag = mk_tag(task_name) | ||
input_offset = random.randint(0, 100) | ||
random_seed = 42 | ||
# set a random seed for the noisy predictor. | ||
# This seed will be logged as a tag for reproducibility. | ||
random.seed(random_seed) | ||
config = Log10Config(tags=[session_tag, task_name, f"random_seed:{random_seed}"]) | ||
# we will mock the llm with a function | ||
client = MockLLM(mock_function=noisy_predictor, log10_config=config) | ||
task = FeedbackTask().create(name=task_name, | ||
task_schema=EchoFeedback.model_json_schema()) | ||
task_id = task.json()["id"] | ||
for i in range(10): | ||
x = i + input_offset | ||
y = ground_truth(x) | ||
response = client.chat([Message(role="user", content=str(x))]) | ||
y_hat = response.content | ||
l10fb = EchoFeedback.create(y, y_hat) | ||
response = Feedback().create(task_id=task_id, values=l10fb.model_dump(), completion_tags_selector=config.tags) | ||
print(f"{response.json()['id']}: {l10fb}") |
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -1,14 +1,15 @@ | ||
import json | ||
import logging | ||
import os | ||
import time | ||
import traceback | ||
from abc import ABC | ||
from copy import deepcopy | ||
from enum import Enum | ||
from typing import List, Optional | ||
from typing import Callable, List, Optional | ||
|
||
import requests | ||
|
||
|
||
Role = Enum("Role", ["system", "assistant", "user"]) | ||
Kind = Enum("Kind", ["chat", "text"]) | ||
|
||
|
@@ -250,3 +251,81 @@ def chat(self, messages: List[Message], hparams: dict = None) -> ChatCompletion: | |
def text(self, prompt: str, hparams: dict = None) -> TextCompletion: | ||
logging.info("Received text completion requst: " + prompt) | ||
return TextCompletion(text="I'm not a real LLM") | ||
|
||
class MockLLM(LLM): | ||
''' | ||
MockLLM is an LLM interface that allows you to mock the behavior of an LLM using any Python function. | ||
It is useful for log10 testing and development without having to setup or call a real LLM. | ||
Example: | ||
>>> from log10.llm import MockLLM, Message, Log10Config | ||
>>> # a mock llm that reverses that reverses any input | ||
>>> client = MockLLM(mock_function=lambda x: str(x)[::-1], log10_config=config) | ||
>>> response = client.chat([Message(role="user", content="hello world")]) | ||
For a longer example, see examples/feedback/echo | ||
''' | ||
def __init__(self, hparams: dict = {}, log10_config=None, mock_function: Callable = None): | ||
''' | ||
hparams: dict = {} | ||
log10_config: Log10Config = None | ||
mock_function: Callable = None | ||
If mock_function is not provided, it is assigned to an identity function. | ||
''' | ||
hparams["model"] = hparams.get("model", "MockLLM") | ||
super().__init__(hparams, log10_config) | ||
self.mock_function = mock_function | ||
|
||
def chat(self, messages: List[Message], hparams: dict = None) -> ChatCompletion: | ||
request = self.chat_request(messages, hparams) | ||
|
||
start_time = time.perf_counter() | ||
content = messages[-1].content if len(messages) > 0 else "" | ||
content = self.mock_function(content) | ||
|
||
self.completion_id = self.log_start(request, Kind.chat) | ||
completion = { | ||
"choices": [ | ||
{ | ||
"message": { | ||
"role": "assistant", | ||
"content": content, | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. seems the There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. please check the |
||
} | ||
} | ||
] | ||
} | ||
response = ChatCompletion( | ||
role=Role.assistant, | ||
content=content, | ||
response=completion, | ||
) | ||
|
||
self.log_end( | ||
self.completion_id, | ||
completion, | ||
time.perf_counter() - start_time, | ||
) | ||
|
||
return response | ||
|
||
def chat_expected(self, messages: List[Message], expected: Message, hparams: dict = None) -> ChatCompletion: | ||
''' | ||
Similar to chat, except it takes a list of messages and a single expected message. | ||
It will return the expected message. If any `mock_function` is set previously, it will be ignored. | ||
''' | ||
tmp_mock_func = self.mock_function | ||
self.mock_function = lambda _: expected.content | ||
response = self.chat(messages, hparams) | ||
self.mock_function = tmp_mock_func | ||
return response | ||
|
||
def chat_request(self, messages: List[Message], hparams: dict = None) -> dict: | ||
merged_hparams = deepcopy(self.hparams) | ||
if hparams: | ||
merged_hparams.update(hparams) | ||
|
||
return { | ||
"messages": [message.to_dict() for message in messages], | ||
**merged_hparams, | ||
} | ||
|
||
def text(self, prompt: str, hparams: dict = None) -> TextCompletion: | ||
return TextCompletion(text="Not implemented in MockLLM") |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
how about marking
{"model": "MockLLM"}
, maybe whenhparams["model"]
is None?