From e4d8b7aa3d0de695a6752fba4626b75033d67f16 Mon Sep 17 00:00:00 2001
From: Jeremy Pinto <jerpint@gmail.com>
Date: Mon, 26 Jun 2023 13:36:48 -0400
Subject: [PATCH] fix to_json() method (#109)

* update Completio.to_json() method to ignore arbitrary columns
---
 buster/completers/base.py | 22 ++++++++++++++++++----
 1 file changed, 18 insertions(+), 4 deletions(-)

diff --git a/buster/completers/base.py b/buster/completers/base.py
index 87612a1..14071d7 100644
--- a/buster/completers/base.py
+++ b/buster/completers/base.py
@@ -2,7 +2,7 @@
 import os
 from abc import ABC, abstractmethod
 from dataclasses import dataclass, field
-from typing import Any, Iterator
+from typing import Any, Iterator, Optional
 
 import openai
 import pandas as pd
@@ -71,10 +71,24 @@ def completor(self):
     def completor(self, value: str) -> None:
         self._completor = value
 
-    def to_json(self) -> Any:
+    def to_json(self, columns_to_ignore: Optional[list[str]] = None) -> Any:
+        """Converts selected attributes of the object to a JSON format.
+
+        Args:
+            columns_to_ignore (list[str]): A list of column names to ignore in the csulting matched_documents dataframe.
+
+        Returns:
+            Any: The object's attributes encoded as JSON.
+
+        Notes:
+            - The 'matched_documents' attribute of type pd.DataFrame is encoded separately
+            using a custom encoder.
+            - The resulting JSON may exclude specified columns based on the 'columns_to_ignore' parameter.
+        """
+
         def encode_df(df: pd.DataFrame) -> dict:
-            if "embedding" in df.columns:
-                df = df.drop(columns=["embedding"])
+            if columns_to_ignore is not None:
+                df = df.drop(columns=columns_to_ignore)
             return df.to_json(orient="index")
 
         custom_encoder = {