Skip to content

Commit

Permalink
update info extraction piece
Browse files Browse the repository at this point in the history
  • Loading branch information
vinicvaz committed May 28, 2024
1 parent 8b16f8f commit 4ce3d81
Show file tree
Hide file tree
Showing 3 changed files with 53 additions and 7 deletions.
2 changes: 1 addition & 1 deletion config.toml
Original file line number Diff line number Diff line change
Expand Up @@ -8,5 +8,5 @@ REPOSITORY_LABEL = "OpenAI Domino Pieces"

# The version of this Pieces release
# Attention: changing this will create a new release
VERSION = "0.7.2"
VERSION = "0.7.3"

3 changes: 2 additions & 1 deletion pieces/InformationExtractionPiece/models.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,7 @@
from enum import Enum
from typing import List
from domino.models import OutputModifierModel, OutputModifierItemType

from typing import Optional

class LLMModelType(str, Enum):
"""
Expand Down Expand Up @@ -40,6 +40,7 @@ class OutputModel(BaseModel):
"""
# ref: https://docs.pydantic.dev/latest/concepts/models/#extra-fields
model_config = ConfigDict(extra='allow')
output_data: Optional[str] = Field(description="Extracted information as Stringfieid JSON.")


class SecretsModel(BaseModel):
Expand Down
55 changes: 50 additions & 5 deletions pieces/InformationExtractionPiece/piece.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,7 @@
from .models import InputModel, OutputModel, SecretsModel
from openai import OpenAI
import json
from typing import Union


class InformationExtractionPiece(BasePiece):
Expand All @@ -13,6 +14,7 @@ def piece_function(self, input_data: InputModel, secrets_data: SecretsModel):

client = OpenAI(api_key=secrets_data.OPENAI_API_KEY)
prompt = f"""Extract the following information from the text below as JSON.
The output can be a simple json or a list of jsons but never a nested json.
Use the items to be extract as information to identify the right information to be extract:
---
Input text: {input_data.input_text}
Expand All @@ -36,16 +38,23 @@ def piece_function(self, input_data: InputModel, secrets_data: SecretsModel):
raise Exception("No response from OpenAI")

output_json = json.loads(response.choices[0].message.content)

# Display result in the Domino GUI
self.format_display_result(input_data, output_json)
if not all(item.name in output_json for item in input_data.extract_items):
key = list(output_json.keys())[0]
if isinstance(output_json[key], list):
output_json = output_json[key]

# Return extracted information
self.logger.info("Returning extracted information")
return OutputModel(**output_json)
if isinstance(output_json, dict):
self.format_display_result_object(input_data, output_json)
return OutputModel(**output_json, output_data=json.dumps(output_json))

self.format_display_result_table(input_data, output_json)
return OutputModel(output_data=json.dumps(output_json))

def format_display_result(self, input_data: InputModel, result: dict):
def format_display_result_object(self, input_data: InputModel, result: dict):
md_text = """## Extracted Information\n"""

for item in input_data.extract_items:
md_text += f"""### {item.name}:\n{result.get(item.name)}\n"""
file_path = f"{self.results_path}/display_result.md"
Expand All @@ -55,3 +64,39 @@ def format_display_result(self, input_data: InputModel, result: dict):
"file_type": "md",
"file_path": file_path
}

def format_display_result_table(self, input_data: InputModel, result: Union[dict, list]):
# Headers from extract_items
headers = [item.name for item in input_data.extract_items]
md_text = "## Extracted Information\n\n"

if isinstance(result, list):
# Generate table headers
md_text += "| " + " | ".join(headers) + " |\n"
md_text += "|---" * len(headers) + "|\n"

# Populate table rows
for res in result:
row = [
str(res.get(item.name))
for item in input_data.extract_items
]
md_text += "| " + " | ".join(row) + " |\n"
else:
# Single object case
md_text += "| " + " | ".join(headers) + " |\n"
md_text += "|---" * len(headers) + "|\n"
row = [
str(result.get(item.name))
for item in input_data.extract_items
]
md_text += "| " + " | ".join(row) + " |\n"

self.logger.info(md_text)
file_path = f"{self.results_path}/display_result.md"
with open(file_path, "w") as f:
f.write(md_text)
self.display_result = {
"file_type": "md",
"file_path": file_path
}

0 comments on commit 4ce3d81

Please sign in to comment.