Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Implementing personalization with Gorilla #56

Open
wants to merge 7 commits into
base: main
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from 3 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions .gitignore
Original file line number Diff line number Diff line change
Expand Up @@ -7,3 +7,4 @@ dist
.vscode
.idea
.editorconfig
.env
13 changes: 11 additions & 2 deletions go_cli.py
Original file line number Diff line number Diff line change
Expand Up @@ -26,7 +26,7 @@
import sys
from halo import Halo
import go_questionary

from utils import personalize
__version__ = "0.0.11" # current version
arthbohra marked this conversation as resolved.
Show resolved Hide resolved
SERVER_URL = "https://cli.gorilla-llm.com"
UPDATE_CHECK_FILE = os.path.expanduser("~/.gorilla-cli-last-update-check")
Expand Down Expand Up @@ -231,14 +231,23 @@ def get_history_commands(history_file):
# Generate a unique interaction ID
interaction_id = str(uuid.uuid4())


personalized_input = f"""
Some relevant context about my history:
{personalize(user_input, get_history_commands(HISTORY_FILE), False)}

The query of the user is:
{user_input}
"""

if args.history:
commands = get_history_commands(HISTORY_FILE)
else:
with Halo(text=f"{GORILLA_EMOJI}Loading", spinner="dots"):
try:
data_json = {
"user_id": user_id,
"user_input": user_input,
"user_input": personalized_input,
arthbohra marked this conversation as resolved.
Show resolved Hide resolved
"interaction_id": interaction_id,
"system_info": system_info
}
Expand Down
86 changes: 86 additions & 0 deletions utils.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,86 @@
import os
from presidio_analyzer import AnalyzerEngine, PatternRecognizer
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Don't you also need to include this in the requirements presidio_analyzer

Copy link
Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Where would I access the file for this?

from presidio_anonymizer import AnonymizerEngine
from presidio_anonymizer.entities import OperatorConfig
import json
from pprint import pprint
from openai import OpenAI

client = OpenAI(api_key=os.getenv("OPENAI_KEY"))
arthbohra marked this conversation as resolved.
Show resolved Hide resolved

"""

1. Remove duplicates
2. Make PI removal an optional flag

"""


def get_bash_history():
history_file = os.path.expanduser("~/.bash_history")
prev_operations = ""
try:
with open(history_file, "r") as file:
history = file.readlines()
except FileNotFoundError:
return "No bash history was found."
return history[:-10]


def anonymize_bash_history(operations):
analyzer = AnalyzerEngine()
analyzer_results = analyzer.analyze(text=operations, language="en")
anonymizer = AnonymizerEngine()
anonymized_results = anonymizer.anonymize(
text=operations, analyzer_results=analyzer_results
)
return anonymized_results.text


def remove_duplicates(operations: list[str]):
return list(set(operations))


def stringify_bash_history(operations: list[str]):
return "\n".join(operations)


def synthesize_bash_history(desired_operation, gorila_history, history):
SYSTEM_PROMPT = """
You are an assistant for a developer who wants to find the right API call for a specific task.
The developer has bash history that contains the command they used to perform a task.
Synthesize their bash history to provide the API call prediction model with extra context about the task.
For reference, the API call prediction model, called Gorilla, is trained on a large dataset of API calls and their associated tasks.
You may see the developer's previous operations with the API calling tool in their bash history.
Use the previous bash history as well as their query to provide the model with a short paragraph of possible relevant context.
There is a chance that their query has nothing to do with the bash history, so in that case, return 'No relevant context found'.
"""
USER_PROMPT = f"""
The user's bash history is:
{history}

The user's previous operations with the API calling tool are:
{gorila_history}

The query of the user is:
{desired_operation}

Use this information to provide the model with a short paragraph of possible relevant context.
"""

response = client.chat.completions.create(
model="gpt-4",
messages=[
{"role": "system", "content": SYSTEM_PROMPT},
{"role": "user", "content": USER_PROMPT},
],
)
return response.choices[0].message.content


def personalize(query, gorilla_history, pi_removal=True):
history = stringify_bash_history(remove_duplicates(get_bash_history()))
if pi_removal:
history = anonymize_bash_history(history)
summary = synthesize_bash_history(query, gorilla_history, history)
return summary