diff --git a/examples/feedback/echo/echo.py b/examples/feedback/echo/echo.py new file mode 100644 index 00000000..e77d0b3b --- /dev/null +++ b/examples/feedback/echo/echo.py @@ -0,0 +1,39 @@ +import random +from pydantic import BaseModel + +def ground_truth(n: str) -> str: + return str(n) + +def noisy_predictor(n: str) -> str: + n_int = int(n) + return str(n_int + random.randint(-10, 10)) if random.randint(1, 10) <= 5 else n + +def _echo_eval(ground_truth: str, prediction: str) -> int: + return int(ground_truth) - int(prediction) + +def _rating_emoji(diff: int) -> str: + if diff == 0: + return "✅" + elif diff > 0: + return f"↗️" + else: + return "↘" + +def _verbal_rating(diff: int) -> str: + ratings = [ + (lambda d: d == 0, "perfect"), + (lambda d: 0 < d < 3, "good"), + (lambda d: 3 <= d < 6, "okay"), + (lambda d: 6 <= d < 8, "bad"), + (lambda d: d >= 8, "terrible"), + ] + return next(rating for condition, rating in ratings if condition(abs(diff))) + +class EchoFeedback(BaseModel): + direction: str + verbal_rating: str + + @staticmethod + def create(ground_truth: str, prediction: str) -> "EchoFeedback": + diff = _echo_eval(ground_truth, prediction) + return EchoFeedback(direction=_rating_emoji(diff), verbal_rating=_verbal_rating(diff)) diff --git a/examples/feedback/echo/noisy_echo_feedback.py b/examples/feedback/echo/noisy_echo_feedback.py new file mode 100644 index 00000000..b0cc8afd --- /dev/null +++ b/examples/feedback/echo/noisy_echo_feedback.py @@ -0,0 +1,39 @@ +import random +from dotenv import load_dotenv +from random_word import RandomWords +from echo import ground_truth, noisy_predictor, EchoFeedback +from log10.llm import MockLLM, Message, Log10Config +from log10.feedback.feedback import Feedback +from log10.feedback.feedback_task import FeedbackTask + + +load_dotenv() + +def mk_tag(prefix): + rw = RandomWords() + return f"{prefix}-{rw.get_random_word()}-{rw.get_random_word()}" + + +if __name__ == "__main__": + # each time you run you have the same task name, but a different session name + task_name = "echo" + session_tag = mk_tag(task_name) + input_offset = random.randint(0, 100) + random_seed = 42 + # set a random seed for the noisy predictor. + # This seed will be logged as a tag for reproducibility. + random.seed(random_seed) + config = Log10Config(tags=[session_tag, task_name, f"random_seed:{random_seed}"]) + # we will mock the llm with a function + client = MockLLM(mock_function=noisy_predictor, log10_config=config) + task = FeedbackTask().create(name=task_name, + task_schema=EchoFeedback.model_json_schema()) + task_id = task.json()["id"] + for i in range(10): + x = i + input_offset + y = ground_truth(x) + response = client.chat([Message(role="user", content=str(x))]) + y_hat = response.content + l10fb = EchoFeedback.create(y, y_hat) + response = Feedback().create(task_id=task_id, values=l10fb.model_dump(), completion_tags_selector=config.tags) + print(f"{response.json()['id']}: {l10fb}") diff --git a/log10/llm.py b/log10/llm.py index 034dc957..5faa8485 100644 --- a/log10/llm.py +++ b/log10/llm.py @@ -1,14 +1,15 @@ import json import logging import os +import time import traceback from abc import ABC +from copy import deepcopy from enum import Enum -from typing import List, Optional +from typing import Callable, List, Optional import requests - Role = Enum("Role", ["system", "assistant", "user"]) Kind = Enum("Kind", ["chat", "text"]) @@ -250,3 +251,81 @@ def chat(self, messages: List[Message], hparams: dict = None) -> ChatCompletion: def text(self, prompt: str, hparams: dict = None) -> TextCompletion: logging.info("Received text completion requst: " + prompt) return TextCompletion(text="I'm not a real LLM") + +class MockLLM(LLM): + ''' + MockLLM is an LLM interface that allows you to mock the behavior of an LLM using any Python function. + It is useful for log10 testing and development without having to setup or call a real LLM. + Example: + >>> from log10.llm import MockLLM, Message, Log10Config + >>> # a mock llm that reverses that reverses any input + >>> client = MockLLM(mock_function=lambda x: str(x)[::-1], log10_config=config) + >>> response = client.chat([Message(role="user", content="hello world")]) + For a longer example, see examples/feedback/echo + ''' + def __init__(self, hparams: dict = {}, log10_config=None, mock_function: Callable = None): + ''' + hparams: dict = {} + log10_config: Log10Config = None + mock_function: Callable = None + If mock_function is not provided, it is assigned to an identity function. + ''' + hparams["model"] = hparams.get("model", "MockLLM") + super().__init__(hparams, log10_config) + self.mock_function = mock_function + + def chat(self, messages: List[Message], hparams: dict = None) -> ChatCompletion: + request = self.chat_request(messages, hparams) + + start_time = time.perf_counter() + content = messages[-1].content if len(messages) > 0 else "" + content = self.mock_function(content) + + self.completion_id = self.log_start(request, Kind.chat) + completion = { + "choices": [ + { + "message": { + "role": "assistant", + "content": content, + } + } + ] + } + response = ChatCompletion( + role=Role.assistant, + content=content, + response=completion, + ) + + self.log_end( + self.completion_id, + completion, + time.perf_counter() - start_time, + ) + + return response + + def chat_expected(self, messages: List[Message], expected: Message, hparams: dict = None) -> ChatCompletion: + ''' + Similar to chat, except it takes a list of messages and a single expected message. + It will return the expected message. If any `mock_function` is set previously, it will be ignored. + ''' + tmp_mock_func = self.mock_function + self.mock_function = lambda _: expected.content + response = self.chat(messages, hparams) + self.mock_function = tmp_mock_func + return response + + def chat_request(self, messages: List[Message], hparams: dict = None) -> dict: + merged_hparams = deepcopy(self.hparams) + if hparams: + merged_hparams.update(hparams) + + return { + "messages": [message.to_dict() for message in messages], + **merged_hparams, + } + + def text(self, prompt: str, hparams: dict = None) -> TextCompletion: + return TextCompletion(text="Not implemented in MockLLM") \ No newline at end of file