From e4d8b7aa3d0de695a6752fba4626b75033d67f16 Mon Sep 17 00:00:00 2001 From: Jeremy Pinto Date: Mon, 26 Jun 2023 13:36:48 -0400 Subject: [PATCH] fix to_json() method (#109) * update Completio.to_json() method to ignore arbitrary columns --- buster/completers/base.py | 22 ++++++++++++++++++---- 1 file changed, 18 insertions(+), 4 deletions(-) diff --git a/buster/completers/base.py b/buster/completers/base.py index 87612a1..14071d7 100644 --- a/buster/completers/base.py +++ b/buster/completers/base.py @@ -2,7 +2,7 @@ import os from abc import ABC, abstractmethod from dataclasses import dataclass, field -from typing import Any, Iterator +from typing import Any, Iterator, Optional import openai import pandas as pd @@ -71,10 +71,24 @@ def completor(self): def completor(self, value: str) -> None: self._completor = value - def to_json(self) -> Any: + def to_json(self, columns_to_ignore: Optional[list[str]] = None) -> Any: + """Converts selected attributes of the object to a JSON format. + + Args: + columns_to_ignore (list[str]): A list of column names to ignore in the csulting matched_documents dataframe. + + Returns: + Any: The object's attributes encoded as JSON. + + Notes: + - The 'matched_documents' attribute of type pd.DataFrame is encoded separately + using a custom encoder. + - The resulting JSON may exclude specified columns based on the 'columns_to_ignore' parameter. + """ + def encode_df(df: pd.DataFrame) -> dict: - if "embedding" in df.columns: - df = df.drop(columns=["embedding"]) + if columns_to_ignore is not None: + df = df.drop(columns=columns_to_ignore) return df.to_json(orient="index") custom_encoder = {