log10-io · delip · Apr 1, 2024 · Apr 10, 2024 · Apr 10, 2024 · Apr 12, 2024
diff --git a/examples/feedback/echo/echo.py b/examples/feedback/echo/echo.py
@@ -0,0 +1,39 @@
+import random
+from pydantic import BaseModel
+
+def ground_truth(n: str) -> str:
+    return str(n)
+
+def noisy_predictor(n: str) -> str:
+    n_int = int(n)
+    return str(n_int + random.randint(-10, 10)) if random.randint(1, 10) <= 5 else n
+
+def _echo_eval(ground_truth: str, prediction: str) -> int:
+    return int(ground_truth) - int(prediction)
+
+def _rating_emoji(diff: int) -> str:
+    if diff == 0:
+        return "✅"
+    elif diff > 0:
+        return f"↗️"
+    else:
+        return "↘"
+
+def _verbal_rating(diff: int) -> str:
+    ratings = [
+        (lambda d: d == 0, "perfect"),
+        (lambda d: 0 < d < 3, "good"),
+        (lambda d: 3 <= d < 6, "okay"),
+        (lambda d: 6 <= d < 8, "bad"),
+        (lambda d: d >= 8, "terrible"),
+    ]
+    return next(rating for condition, rating in ratings if condition(abs(diff)))
+
+class EchoFeedback(BaseModel):
+    direction: str
+    verbal_rating: str
+
+    @staticmethod
+    def create(ground_truth: str, prediction: str) -> "EchoFeedback":
+        diff = _echo_eval(ground_truth, prediction)
+        return EchoFeedback(direction=_rating_emoji(diff), verbal_rating=_verbal_rating(diff))
diff --git a/examples/feedback/echo/noisy_echo_feedback.py b/examples/feedback/echo/noisy_echo_feedback.py
@@ -0,0 +1,39 @@
+import random
+from dotenv import load_dotenv
+from random_word import RandomWords
+from echo import ground_truth, noisy_predictor, EchoFeedback
+from log10.llm import MockLLM, Message, Log10Config
+from log10.feedback.feedback import Feedback
+from log10.feedback.feedback_task import FeedbackTask
+
+
+load_dotenv()
+
+def mk_tag(prefix):
+    rw = RandomWords()
+    return f"{prefix}-{rw.get_random_word()}-{rw.get_random_word()}"
+
+
+if __name__ == "__main__":
+    # each time you run you have the same task name, but a different session name
+    task_name = "echo"
+    session_tag = mk_tag(task_name)
+    input_offset = random.randint(0, 100)
+    random_seed = 42
+    # set a random seed for the noisy predictor.
+    # This seed will be logged as a tag for reproducibility.
+    random.seed(random_seed)
+    config = Log10Config(tags=[session_tag, task_name, f"random_seed:{random_seed}"])
+    # we will mock the llm with a function
+    client = MockLLM(mock_function=noisy_predictor, log10_config=config)
+    task = FeedbackTask().create(name=task_name, 
+                                 task_schema=EchoFeedback.model_json_schema())
+    task_id = task.json()["id"]
+    for i in range(10):
+        x = i + input_offset
+        y = ground_truth(x)
+        response = client.chat([Message(role="user", content=str(x))])
+        y_hat = response.content
+        l10fb = EchoFeedback.create(y, y_hat)        
+        response = Feedback().create(task_id=task_id, values=l10fb.model_dump(), completion_tags_selector=config.tags)
+        print(f"{response.json()['id']}: {l10fb}")
diff --git a/log10/llm.py b/log10/llm.py
@@ -1,14 +1,15 @@
 import json
 import logging
 import os
+import time
 import traceback
 from abc import ABC
+from copy import deepcopy
 from enum import Enum
-from typing import List, Optional
+from typing import Callable, List, Optional
 
 import requests
 
-
 Role = Enum("Role", ["system", "assistant", "user"])
 Kind = Enum("Kind", ["chat", "text"])
 
@@ -250,3 +251,81 @@ def chat(self, messages: List[Message], hparams: dict = None) -> ChatCompletion:
     def text(self, prompt: str, hparams: dict = None) -> TextCompletion:
         logging.info("Received text completion requst: " + prompt)
         return TextCompletion(text="I'm not a real LLM")
+
+class MockLLM(LLM):
+    '''
+    MockLLM is an LLM interface that allows you to mock the behavior of an LLM using any Python function. 
+    It is useful for log10 testing and development without having to setup or call a real LLM.
+    Example:
+    >>> from log10.llm import MockLLM, Message, Log10Config
+    >>> # a mock llm that reverses that reverses any input
+    >>> client = MockLLM(mock_function=lambda x: str(x)[::-1], log10_config=config)
+    >>> response = client.chat([Message(role="user", content="hello world")])
+    For a longer example, see examples/feedback/echo
+    '''
+    def __init__(self, hparams: dict = {}, log10_config=None, mock_function: Callable = None):
+        '''
+        hparams: dict = {}
+        log10_config: Log10Config = None
+        mock_function: Callable = None
+        If mock_function is not provided, it is assigned to an identity function.
+        '''
+        hparams["model"] = hparams.get("model", "MockLLM")
+        super().__init__(hparams, log10_config)
+        self.mock_function = mock_function
+
+    def chat(self, messages: List[Message], hparams: dict = None) -> ChatCompletion:
+        request = self.chat_request(messages, hparams)
+
+        start_time = time.perf_counter()
+        content = messages[-1].content if len(messages) > 0 else ""
+        content = self.mock_function(content)
+
+        self.completion_id = self.log_start(request, Kind.chat)
+        completion = {
+            "choices": [
+                {
+                    "message": {
+                        "role": "assistant",
+                        "content": content,
+                    }
+                }
+            ]
+        }
+        response = ChatCompletion(
+            role=Role.assistant,
+            content=content,
+            response=completion,
+        )
+
+        self.log_end(
+            self.completion_id,
+            completion,
+            time.perf_counter() - start_time,
+        )
+
+        return response
+
+    def chat_expected(self, messages: List[Message], expected: Message, hparams: dict = None) -> ChatCompletion:
+        '''
+        Similar to chat, except it takes a list of messages and a single expected message.
+        It will return the expected message. If any `mock_function` is set previously, it will be ignored.
+        '''
+        tmp_mock_func = self.mock_function
+        self.mock_function = lambda _: expected.content
+        response = self.chat(messages, hparams)
+        self.mock_function = tmp_mock_func
+        return response
+
+    def chat_request(self, messages: List[Message], hparams: dict = None) -> dict:
+        merged_hparams = deepcopy(self.hparams)
+        if hparams:
+            merged_hparams.update(hparams)
+
+        return {
+            "messages": [message.to_dict() for message in messages],
+            **merged_hparams,
+        }
+
+    def text(self, prompt: str, hparams: dict = None) -> TextCompletion:
+        return TextCompletion(text="Not implemented in MockLLM")