Skip to content

Commit

Permalink
Merge pull request #203 from SylphAI-Inc/main
Browse files Browse the repository at this point in the history
[v0.2.2] G_eval and comprehensive LLM eval guideline
  • Loading branch information
Sylph-AI authored Sep 10, 2024
2 parents 26274f6 + 7e388f9 commit 82a7878
Show file tree
Hide file tree
Showing 51 changed files with 3,436 additions and 1,632 deletions.
16 changes: 11 additions & 5 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -37,20 +37,26 @@
<a href="https://pypi.org/project/adalflow/">
<img alt="PyPI Version" src="https://img.shields.io/pypi/v/adalflow?style=flat-square">
</a>
<a href="https://star-history.com/#SylphAI-Inc/LightRAG">
<img alt="GitHub stars" src="https://img.shields.io/github/stars/SylphAI-Inc/LightRAG?style=flat-square">
<a href="https://star-history.com/#SylphAI-Inc/AdalFlow">
<img alt="GitHub stars" src="https://img.shields.io/github/stars/SylphAI-Inc/AdalFlow?style=flat-square">
</a>
<a href="https://github.com/SylphAI-Inc/LightRAG/issues">
<img alt="Open Issues" src="https://img.shields.io/github/issues-raw/SylphAI-Inc/LightRAG?style=flat-square">
<a href="https://github.com/SylphAI-Inc/AdalFlow/issues">
<img alt="Open Issues" src="https://img.shields.io/github/issues-raw/SylphAI-Inc/AdalFlow?style=flat-square">
</a>
<a href="https://opensource.org/license/MIT">
<img alt="License" src="https://img.shields.io/github/license/SylphAI-Inc/LightRAG">
<img alt="License" src="https://img.shields.io/github/license/SylphAI-Inc/AdalFlow">
</a>
<a href="https://discord.gg/ezzszrRZvT">
<img alt="discord-invite" src="https://dcbadge.vercel.app/api/server/ezzszrRZvT?style=flat">
</a>
</p>

<h4>
<p align="center">
For AI researchers, product teams, and software engineers who want to learn the AI way.
</p>
</h4>



<!-- <a href="https://colab.research.google.com/drive/1PPxYEBa6eu__LquGoFFJZkhYgWVYE6kh?usp=sharing">
Expand Down
6 changes: 4 additions & 2 deletions adalflow/CHANGELOG.md
Original file line number Diff line number Diff line change
@@ -1,10 +1,12 @@
## [0.2.1] - 2024-09-01
## [0.2.2] - 2024-09-09
### Added
- `get_cache_path`, instead of print out the cache path all the time, we add a ``get_cache_path`` to get the cache path.
- Make `huggingface datasets` as an optional dependency.
- Eval: `G_eval` to evaluate llm applications that have no reference text.
### Modified
- Add `template` to let users pass their own template, but need to have the same arguments as the default template.
- Added `checkpoint resumt` in the `Trainer.diagnose` to show the newest performance and diagnostics on the checkpoint.
- Added `checkpoint resume` in the `Trainer.diagnose` to show the newest performance and diagnostics on the checkpoint.

## [0.2.0] - 2024-08-20
### Added
- Qdrant retriever.
Expand Down
10 changes: 8 additions & 2 deletions adalflow/adalflow/__init__.py
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
__version__ = "0.2.0"
__version__ = "0.2.2"

from adalflow.core.component import Component, fun_to_component
from adalflow.core.container import Sequential
Expand All @@ -8,7 +8,12 @@
from adalflow.core.generator import Generator


from adalflow.core.types import GeneratorOutput, EmbedderOutput, RetrieverOutput
from adalflow.core.types import (
GeneratorOutput,
EmbedderOutput,
RetrieverOutput,
Document,
)
from adalflow.core.model_client import ModelClient
from adalflow.core.embedder import Embedder
from adalflow.core.string_parser import (
Expand Down Expand Up @@ -91,6 +96,7 @@
"GeneratorOutput",
"EmbedderOutput",
"RetrieverOutput",
"Document",
# Optimizer types
"Optimizer",
"DemoOptimizer",
Expand Down
3 changes: 1 addition & 2 deletions adalflow/adalflow/components/retriever/faiss_retriever.py
Original file line number Diff line number Diff line change
Expand Up @@ -15,8 +15,6 @@
import logging
import os

import faiss


from adalflow.core.retriever import Retriever
from adalflow.core.embedder import Embedder
Expand All @@ -31,6 +29,7 @@
from adalflow.utils.lazy_import import safe_import, OptionalPackages

safe_import(OptionalPackages.FAISS.value[0], OptionalPackages.FAISS.value[1])
import faiss

log = logging.getLogger(__name__)

Expand Down
65 changes: 53 additions & 12 deletions adalflow/adalflow/core/db.py
Original file line number Diff line number Diff line change
Expand Up @@ -9,6 +9,7 @@

from adalflow.core.component import Component
from adalflow.utils.registry import EntityMapping
from adalflow.utils.global_config import get_adalflow_default_root_path


log = logging.getLogger(__name__)
Expand All @@ -18,6 +19,8 @@
U = TypeVar("U") # U will be the type after transformation


# TODO: localDB does not need to be a component
# TODO: DB clarity can be further improved
@dataclass
class LocalDB(Generic[T], Component):
__doc__ = r"""LocalDB with in-memory CRUD operations, data transformation/processing pipelines, and persistence.
Expand Down Expand Up @@ -109,6 +112,9 @@ class LocalDB(Generic[T], Component):
mapper_setups: Dict[str, Callable[[T], Any]] = field(
default_factory=dict, metadata={"description": "Map function setup by key"}
)
index_path: Optional[str] = field(
default="index.faiss", metadata={"description": "Path to the index file"}
)

def __post_init__(self):
super().__init__()
Expand All @@ -120,9 +126,27 @@ def length(self):
def get_transformer_keys(self) -> List[str]:
return list(self.transformed_items.keys())

def get_transformed_data(self, key: str) -> List[U]:
"""Get the transformed items by key."""
return self.transformed_items[key]
# def get_transformed_data(self, key: str) -> List[U]:
# """Get the transformed items by key."""
# return self.transformed_items[key]

def get_transformed_data(
self, key: str, filter_fn: Callable[[Any], bool] = lambda x: True
) -> List[U]:
"""
Get the transformed items by key after applying a filter on metadata.
Args:
key (str): The key to identify which transformed items to retrieve.
filter_fn (Callable[[Any], bool], optional): The filter function to apply on the metadata. Defaults to lambda x: True.
Returns:
List[U]: The filtered and transformed items.
"""
if key not in self.transformed_items:
raise ValueError(f"Key {key} not found in transformed items.")
# Apply filter function on the transformed items
return list(filter(filter_fn, self.transformed_items[key]))

def _get_transformer_name(self, transformer: Component) -> str:
name = f"{transformer.__class__.__name__}_"
Expand All @@ -143,6 +167,7 @@ def register_transformer(
self.transformer_setups[key] = transformer
if map_fn is not None:
self.mapper_setups[key] = map_fn
self.transformed_items[key] = []
return key

@overload
Expand Down Expand Up @@ -209,9 +234,15 @@ def load(self, items: List[Any]):
"""
self.items = items

def extend(self, items: List[Any], apply_transformer: bool = True):
def extend(
self,
items: List[Any],
apply_transformer: bool = True,
):
"""Extend the db with new items."""

self.items.extend(items)

if apply_transformer:
for key, transformer in self.transformer_setups.items():
# check if there was a map function registered
Expand All @@ -223,8 +254,6 @@ def extend(self, items: List[Any], apply_transformer: bool = True):
transformed_items = transformer(items)
self.transformed_items[key].extend(transformed_items)

self.items.extend(items)

def delete(self, index: Optional[int] = None, remove_transformed: bool = True):
"""Remove items by index or pop the last item. Optionally remove the transformed data as well.
Expand Down Expand Up @@ -293,26 +322,38 @@ def reset(self):
self.mapper_setups = {}
self.items = []

def save_state(self, filepath: str):
def save_state(self, filepath: str = None):
"""Save the current state (attributes) of the DB using pickle.
Note:
The transformer setups will be lost when pickling. As it might not be picklable.
"""
filepath = filepath or "storage/local_item_db.pkl"
filepath = filepath or os.path.join(
get_adalflow_default_root_path,
(
"local_db/local_item_db.pkl"
if not self.name
else f"local_db/{self.name}.pkl"
),
)
self.index_path = filepath
file_dir = os.path.dirname(filepath)
if file_dir and file_dir != "":
if not os.path.exists(file_dir):
os.makedirs(file_dir, exist_ok=True)

with open(filepath, "wb") as file:
pickle.dump(self, file)
print(f"Saved the state of the DB to {filepath}")

@classmethod
def load_state(cls, filepath: str = None) -> "LocalDB":
"""Load the state of the DB from a pickle file."""
filepath = filepath or "storage/local_item_db.pkl"
with open(filepath, "rb") as file:
return pickle.load(file)
filepath = filepath or os.path.join(
get_adalflow_default_root_path, "local_db/local_item_db.pkl"
)
if os.path.exists(filepath):
with open(filepath, "rb") as file:
return pickle.load(file)

def __getstate__(self):
"""Special handling of the components in pickling."""
Expand Down
16 changes: 10 additions & 6 deletions adalflow/adalflow/core/generator.py
Original file line number Diff line number Diff line change
Expand Up @@ -6,7 +6,6 @@
import json

from typing import Any, Dict, Optional, Union, Callable, Tuple, List
from copy import deepcopy
import logging


Expand Down Expand Up @@ -110,11 +109,6 @@ def __init__(
)

template = template or DEFAULT_LIGHTRAG_SYSTEM_PROMPT
try:
prompt_kwargs = deepcopy(prompt_kwargs)
except Exception as e:
log.warning(f"Error copying the prompt_kwargs: {e}")
prompt_kwargs = prompt_kwargs

# Cache
model_str = (
Expand Down Expand Up @@ -833,7 +827,17 @@ def __call__(self, *args, **kwargs) -> Union[GeneratorOutputType, Any]:
return self.call(*args, **kwargs)

def _extra_repr(self) -> str:
# Create the string for model_kwargs
s = f"model_kwargs={self.model_kwargs}, "

# Create the string for trainable prompt_kwargs
prompt_kwargs_repr = [
k
for k, v in self.prompt_kwargs.items()
if isinstance(v, Parameter) and v.requires_opt
]

s += f"trainable_prompt_kwargs={prompt_kwargs_repr}"
return s

def to_dict(self) -> Dict[str, Any]:
Expand Down
12 changes: 10 additions & 2 deletions adalflow/adalflow/eval/__init__.py
Original file line number Diff line number Diff line change
@@ -1,12 +1,20 @@
from .answer_match_acc import AnswerMatchAcc
from .retriever_recall import RetrieverRecall
from .retriever_relevance import RetrieverRelevance
from .llm_as_judge import LLMasJudge, DEFAULT_LLM_EVALUATOR_PROMPT
from .g_eval import (
GEvalJudgeEvaluator,
GEvalLLMJudge,
GEvalMetric,
DEFAULT_G_EVAL_RPROMPT,
)

__all__ = [
"AnswerMatchAcc",
"RetrieverRecall",
"RetrieverRelevance",
"LLMasJudge",
"DEFAULT_LLM_EVALUATOR_PROMPT",
"GEvalJudgeEvaluator",
"GEvalLLMJudge",
"GEvalMetric",
"DEFAULT_G_EVAL_RPROMPT",
]
34 changes: 1 addition & 33 deletions adalflow/adalflow/eval/answer_match_acc.py
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
"""This is the metric for answer matching. It compares the predicted answer with the ground truth answer."""
"""This is the metric for QA generation. It compares the predicted answer with the ground truth answer."""

from typing import List, Literal
from adalflow.eval.base import BaseEvaluator, EvaluationResult
Expand Down Expand Up @@ -68,38 +68,6 @@ def compute_single_item(
else:
raise NotImplementedError

# def compute_single_item(self, pred_answer: object, gt_answer: object) -> float:
# r"""
# Compute the match accuracy of the predicted answer for a single query.

# Allow any type of input for pred_answer and gt_answer.
# When evaluating, the input will be converted to string.

# Args:
# pred_answer (object): Predicted answer.
# gt_answer (object): Ground truth answer.

# Returns:
# float: Match accuracy.
# """
# if isinstance(pred_answer, Parameter):
# pred_answer = pred_answer.data
# if isinstance(gt_answer, Parameter):
# gt_answer = gt_answer.data
# try:
# pred_answer = str(pred_answer).split(" ")
# gt_answer = str(gt_answer).split(" ")
# except Exception as e:
# raise ValueError(
# f"Error converting pred_answer and gt_answer to string: {e}"
# )
# if self.type == "exact_match":
# return 1.0 if pred_answer == gt_answer else 0.0
# elif self.type == "fuzzy_match":
# return 1.0 if gt_answer in pred_answer else 0.0
# else:
# raise NotImplementedError

def compute(
self, pred_answers: List[str], gt_answers: List[str]
) -> EvaluationResult:
Expand Down
5 changes: 3 additions & 2 deletions adalflow/adalflow/eval/base.py
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
"""Abstract base class for evaluation metrics."""

from typing import Optional, List
from typing import Optional, List, Any

from dataclasses import dataclass

Expand All @@ -22,7 +22,8 @@ def compute_single_item(self, *args, **kwargs) -> float:
"""Compute the score for a single item."""
raise NotImplementedError("Subclasses must implement this method.")

def compute(self, *args, **kwargs) -> EvaluationResult:
# TODO: support multi-threading or async to speed up evaluation
def compute(self, *args, **kwargs) -> Any:
"""Evaluate a list of predictions and ground truth values. and return overall score and per-item scores."""
raise NotImplementedError("Subclasses must implement this method.")

Expand Down
Loading

0 comments on commit 82a7878

Please sign in to comment.