Merge pull request #203 from SylphAI-Inc/main

[v0.2.2] G_eval and comprehensive LLM eval guideline
SylphAI-Inc · Sep 10, 2024 · 82a7878 · 82a7878
2 parents 26274f6 + 7e388f9
commit 82a7878
Show file tree

Hide file tree

Showing 51 changed files with 3,436 additions and 1,632 deletions.
diff --git a/README.md b/README.md
@@ -37,20 +37,26 @@
     <a href="https://pypi.org/project/adalflow/">
         <img alt="PyPI Version" src="https://img.shields.io/pypi/v/adalflow?style=flat-square">
     </a>
-    <a href="https://star-history.com/#SylphAI-Inc/LightRAG">
-        <img alt="GitHub stars" src="https://img.shields.io/github/stars/SylphAI-Inc/LightRAG?style=flat-square">
+    <a href="https://star-history.com/#SylphAI-Inc/AdalFlow">
+        <img alt="GitHub stars" src="https://img.shields.io/github/stars/SylphAI-Inc/AdalFlow?style=flat-square">
     </a>
-    <a href="https://github.com/SylphAI-Inc/LightRAG/issues">
-        <img alt="Open Issues" src="https://img.shields.io/github/issues-raw/SylphAI-Inc/LightRAG?style=flat-square">
+    <a href="https://github.com/SylphAI-Inc/AdalFlow/issues">
+        <img alt="Open Issues" src="https://img.shields.io/github/issues-raw/SylphAI-Inc/AdalFlow?style=flat-square">
     </a>
     <a href="https://opensource.org/license/MIT">
-        <img alt="License" src="https://img.shields.io/github/license/SylphAI-Inc/LightRAG">
+        <img alt="License" src="https://img.shields.io/github/license/SylphAI-Inc/AdalFlow">
     </a>
       <a href="https://discord.gg/ezzszrRZvT">
         <img alt="discord-invite" src="https://dcbadge.vercel.app/api/server/ezzszrRZvT?style=flat">
     </a>
 </p>
 
+<h4>
+<p align="center">
+For AI researchers, product teams, and software engineers who want to learn the AI way.
+</p>
+</h4>
+
 
 
 <!-- <a href="https://colab.research.google.com/drive/1PPxYEBa6eu__LquGoFFJZkhYgWVYE6kh?usp=sharing">

diff --git a/adalflow/CHANGELOG.md b/adalflow/CHANGELOG.md
@@ -1,10 +1,12 @@
-## [0.2.1] - 2024-09-01
+## [0.2.2] - 2024-09-09
 ### Added
 - `get_cache_path`, instead of print out the cache path all the time, we add a ``get_cache_path`` to get the cache path.
 - Make `huggingface datasets` as an optional dependency.
+- Eval: `G_eval` to evaluate llm applications that have no reference text.
 ### Modified
 - Add `template` to let users pass their own template, but need to have the same arguments as the default template.
-- Added `checkpoint resumt` in the `Trainer.diagnose` to show the newest performance and diagnostics on the checkpoint.
+- Added `checkpoint resume` in the `Trainer.diagnose` to show the newest performance and diagnostics on the checkpoint.
+
 ## [0.2.0] - 2024-08-20
 ### Added
 - Qdrant retriever.

diff --git a/adalflow/adalflow/__init__.py b/adalflow/adalflow/__init__.py
@@ -1,4 +1,4 @@
-__version__ = "0.2.0"
+__version__ = "0.2.2"
 
 from adalflow.core.component import Component, fun_to_component
 from adalflow.core.container import Sequential
@@ -8,7 +8,12 @@
 from adalflow.core.generator import Generator
 
 
-from adalflow.core.types import GeneratorOutput, EmbedderOutput, RetrieverOutput
+from adalflow.core.types import (
+    GeneratorOutput,
+    EmbedderOutput,
+    RetrieverOutput,
+    Document,
+)
 from adalflow.core.model_client import ModelClient
 from adalflow.core.embedder import Embedder
 from adalflow.core.string_parser import (
@@ -91,6 +96,7 @@
     "GeneratorOutput",
     "EmbedderOutput",
     "RetrieverOutput",
+    "Document",
     # Optimizer types
     "Optimizer",
     "DemoOptimizer",

diff --git a/adalflow/adalflow/components/retriever/faiss_retriever.py b/adalflow/adalflow/components/retriever/faiss_retriever.py
@@ -15,8 +15,6 @@
 import logging
 import os
 
-import faiss
-
 
 from adalflow.core.retriever import Retriever
 from adalflow.core.embedder import Embedder
@@ -31,6 +29,7 @@
 from adalflow.utils.lazy_import import safe_import, OptionalPackages
 
 safe_import(OptionalPackages.FAISS.value[0], OptionalPackages.FAISS.value[1])
+import faiss
 
 log = logging.getLogger(__name__)
 

diff --git a/adalflow/adalflow/core/db.py b/adalflow/adalflow/core/db.py
@@ -9,6 +9,7 @@
 
 from adalflow.core.component import Component
 from adalflow.utils.registry import EntityMapping
+from adalflow.utils.global_config import get_adalflow_default_root_path
 
 
 log = logging.getLogger(__name__)
@@ -18,6 +19,8 @@
 U = TypeVar("U")  # U will be the type after transformation
 
 
+# TODO: localDB does not need to be a component
+# TODO: DB clarity can be further improved
 @dataclass
 class LocalDB(Generic[T], Component):
     __doc__ = r"""LocalDB with in-memory CRUD operations, data transformation/processing pipelines, and persistence.
@@ -109,6 +112,9 @@ class LocalDB(Generic[T], Component):
     mapper_setups: Dict[str, Callable[[T], Any]] = field(
         default_factory=dict, metadata={"description": "Map function setup by key"}
     )
+    index_path: Optional[str] = field(
+        default="index.faiss", metadata={"description": "Path to the index file"}
+    )
 
     def __post_init__(self):
         super().__init__()
@@ -120,9 +126,27 @@ def length(self):
     def get_transformer_keys(self) -> List[str]:
         return list(self.transformed_items.keys())
 
-    def get_transformed_data(self, key: str) -> List[U]:
-        """Get the transformed items by key."""
-        return self.transformed_items[key]
+    # def get_transformed_data(self, key: str) -> List[U]:
+    #     """Get the transformed items by key."""
+    #     return self.transformed_items[key]
+
+    def get_transformed_data(
+        self, key: str, filter_fn: Callable[[Any], bool] = lambda x: True
+    ) -> List[U]:
+        """
+        Get the transformed items by key after applying a filter on metadata.
+
+        Args:
+            key (str): The key to identify which transformed items to retrieve.
+            filter_fn (Callable[[Any], bool], optional): The filter function to apply on the metadata. Defaults to lambda x: True.
+
+        Returns:
+            List[U]: The filtered and transformed items.
+        """
+        if key not in self.transformed_items:
+            raise ValueError(f"Key {key} not found in transformed items.")
+        # Apply filter function on the transformed items
+        return list(filter(filter_fn, self.transformed_items[key]))
 
     def _get_transformer_name(self, transformer: Component) -> str:
         name = f"{transformer.__class__.__name__}_"
@@ -143,6 +167,7 @@ def register_transformer(
         self.transformer_setups[key] = transformer
         if map_fn is not None:
             self.mapper_setups[key] = map_fn
+        self.transformed_items[key] = []
         return key
 
     @overload
@@ -209,9 +234,15 @@ def load(self, items: List[Any]):
         """
         self.items = items
 
-    def extend(self, items: List[Any], apply_transformer: bool = True):
+    def extend(
+        self,
+        items: List[Any],
+        apply_transformer: bool = True,
+    ):
         """Extend the db with new items."""
+
         self.items.extend(items)
+
         if apply_transformer:
             for key, transformer in self.transformer_setups.items():
                 # check if there was a map function registered
@@ -223,8 +254,6 @@ def extend(self, items: List[Any], apply_transformer: bool = True):
                     transformed_items = transformer(items)
                 self.transformed_items[key].extend(transformed_items)
 
-        self.items.extend(items)
-
     def delete(self, index: Optional[int] = None, remove_transformed: bool = True):
         """Remove items by index or pop the last item. Optionally remove the transformed data as well.
 
@@ -293,26 +322,38 @@ def reset(self):
         self.mapper_setups = {}
         self.items = []
 
-    def save_state(self, filepath: str):
+    def save_state(self, filepath: str = None):
         """Save the current state (attributes) of the DB using pickle.
 
         Note:
             The transformer setups will be lost when pickling. As it might not be picklable.
         """
-        filepath = filepath or "storage/local_item_db.pkl"
+        filepath = filepath or os.path.join(
+            get_adalflow_default_root_path,
+            (
+                "local_db/local_item_db.pkl"
+                if not self.name
+                else f"local_db/{self.name}.pkl"
+            ),
+        )
+        self.index_path = filepath
         file_dir = os.path.dirname(filepath)
-        if file_dir and file_dir != "":
+        if not os.path.exists(file_dir):
             os.makedirs(file_dir, exist_ok=True)
 
         with open(filepath, "wb") as file:
             pickle.dump(self, file)
+        print(f"Saved the state of the DB to {filepath}")
 
     @classmethod
     def load_state(cls, filepath: str = None) -> "LocalDB":
         """Load the state of the DB from a pickle file."""
-        filepath = filepath or "storage/local_item_db.pkl"
-        with open(filepath, "rb") as file:
-            return pickle.load(file)
+        filepath = filepath or os.path.join(
+            get_adalflow_default_root_path, "local_db/local_item_db.pkl"
+        )
+        if os.path.exists(filepath):
+            with open(filepath, "rb") as file:
+                return pickle.load(file)
 
     def __getstate__(self):
         """Special handling of the components in pickling."""

diff --git a/adalflow/adalflow/core/generator.py b/adalflow/adalflow/core/generator.py
@@ -6,7 +6,6 @@
 import json
 
 from typing import Any, Dict, Optional, Union, Callable, Tuple, List
-from copy import deepcopy
 import logging
 
 
@@ -110,11 +109,6 @@ def __init__(
             )
 
         template = template or DEFAULT_LIGHTRAG_SYSTEM_PROMPT
-        try:
-            prompt_kwargs = deepcopy(prompt_kwargs)
-        except Exception as e:
-            log.warning(f"Error copying the prompt_kwargs: {e}")
-            prompt_kwargs = prompt_kwargs
 
         # Cache
         model_str = (
@@ -833,7 +827,17 @@ def __call__(self, *args, **kwargs) -> Union[GeneratorOutputType, Any]:
             return self.call(*args, **kwargs)
 
     def _extra_repr(self) -> str:
+        # Create the string for model_kwargs
         s = f"model_kwargs={self.model_kwargs}, "
+
+        # Create the string for trainable prompt_kwargs
+        prompt_kwargs_repr = [
+            k
+            for k, v in self.prompt_kwargs.items()
+            if isinstance(v, Parameter) and v.requires_opt
+        ]
+
+        s += f"trainable_prompt_kwargs={prompt_kwargs_repr}"
         return s
 
     def to_dict(self) -> Dict[str, Any]:

diff --git a/adalflow/adalflow/eval/__init__.py b/adalflow/adalflow/eval/__init__.py
@@ -1,12 +1,20 @@
 from .answer_match_acc import AnswerMatchAcc
 from .retriever_recall import RetrieverRecall
-from .retriever_relevance import RetrieverRelevance
 from .llm_as_judge import LLMasJudge, DEFAULT_LLM_EVALUATOR_PROMPT
+from .g_eval import (
+    GEvalJudgeEvaluator,
+    GEvalLLMJudge,
+    GEvalMetric,
+    DEFAULT_G_EVAL_RPROMPT,
+)
 
 __all__ = [
     "AnswerMatchAcc",
     "RetrieverRecall",
-    "RetrieverRelevance",
     "LLMasJudge",
     "DEFAULT_LLM_EVALUATOR_PROMPT",
+    "GEvalJudgeEvaluator",
+    "GEvalLLMJudge",
+    "GEvalMetric",
+    "DEFAULT_G_EVAL_RPROMPT",
 ]
diff --git a/adalflow/adalflow/eval/answer_match_acc.py b/adalflow/adalflow/eval/answer_match_acc.py
@@ -1,4 +1,4 @@
-"""This is the metric for answer matching. It compares the predicted answer with the ground truth answer."""
+"""This is the metric for QA generation. It compares the predicted answer with the ground truth answer."""
 
 from typing import List, Literal
 from adalflow.eval.base import BaseEvaluator, EvaluationResult
@@ -68,38 +68,6 @@ def compute_single_item(
         else:
             raise NotImplementedError
 
-    # def compute_single_item(self, pred_answer: object, gt_answer: object) -> float:
-    #     r"""
-    #     Compute the match accuracy of the predicted answer for a single query.
-
-    #     Allow any type of input for pred_answer and gt_answer.
-    #     When evaluating, the input will be converted to string.
-
-    #     Args:
-    #         pred_answer (object): Predicted answer.
-    #         gt_answer (object): Ground truth answer.
-
-    #     Returns:
-    #         float: Match accuracy.
-    #     """
-    #     if isinstance(pred_answer, Parameter):
-    #         pred_answer = pred_answer.data
-    #     if isinstance(gt_answer, Parameter):
-    #         gt_answer = gt_answer.data
-    #     try:
-    #         pred_answer = str(pred_answer).split(" ")
-    #         gt_answer = str(gt_answer).split(" ")
-    #     except Exception as e:
-    #         raise ValueError(
-    #             f"Error converting pred_answer and gt_answer to string: {e}"
-    #         )
-    #     if self.type == "exact_match":
-    #         return 1.0 if pred_answer == gt_answer else 0.0
-    #     elif self.type == "fuzzy_match":
-    #         return 1.0 if gt_answer in pred_answer else 0.0
-    #     else:
-    #         raise NotImplementedError
-
     def compute(
         self, pred_answers: List[str], gt_answers: List[str]
     ) -> EvaluationResult:

diff --git a/adalflow/adalflow/eval/base.py b/adalflow/adalflow/eval/base.py
@@ -1,6 +1,6 @@
 """Abstract base class for evaluation metrics."""
 
-from typing import Optional, List
+from typing import Optional, List, Any
 
 from dataclasses import dataclass
 
@@ -22,7 +22,8 @@ def compute_single_item(self, *args, **kwargs) -> float:
         """Compute the score for a single item."""
         raise NotImplementedError("Subclasses must implement this method.")
 
-    def compute(self, *args, **kwargs) -> EvaluationResult:
+    # TODO: support multi-threading or async to speed up evaluation
+    def compute(self, *args, **kwargs) -> Any:
         """Evaluate a list of predictions and ground truth values. and return overall score and per-item scores."""
         raise NotImplementedError("Subclasses must implement this method.")