diff --git a/.gitignore b/.gitignore index f8f86f0..c0d0505 100644 --- a/.gitignore +++ b/.gitignore @@ -7,3 +7,4 @@ dist .vscode .idea .editorconfig +.env \ No newline at end of file diff --git a/go_cli.py b/go_cli.py index 42296d4..26aa7d4 100644 --- a/go_cli.py +++ b/go_cli.py @@ -19,6 +19,8 @@ import fcntl import platform import requests +from openai import OpenAI +import json import subprocess import argparse import termios @@ -26,18 +28,23 @@ import sys from halo import Halo import go_questionary +from personalization.personalize import GorillaPersonalizer +from personalization.setup import PersonalizationSetup -__version__ = "0.0.11" # current version + +__version__ = "0.0.12" # current version SERVER_URL = "https://cli.gorilla-llm.com" UPDATE_CHECK_FILE = os.path.expanduser("~/.gorilla-cli-last-update-check") USERID_FILE = os.path.expanduser("~/.gorilla-cli-userid") HISTORY_FILE = os.path.expanduser("~/.gorilla_cli_history") +CONFIG_FILE = os.path.expanduser("~/.gorilla-cli-config.json") ISSUE_URL = f"https://github.com/gorilla-llm/gorilla-cli/issues/new" GORILLA_EMOJI = "🦍 " if go_questionary.try_encode_gorilla() else "" HISTORY_LENGTH = 10 WELCOME_TEXT = f"""===***=== {GORILLA_EMOJI}Welcome to Gorilla-CLI! Enhance your Command Line with the power of LLMs! + Simply use `gorilla ` and Gorilla will do the rest. For instance: gorilla generate 100 random characters into a file called test.txt gorilla get the image ids of all pods running in all namespaces in kubernetes @@ -54,6 +61,13 @@ def generate_random_uid(): return str(uuid.uuid4()) +def load_config(): + # Load the user's configuration file and perform any necessary checks + if os.path.isfile(CONFIG_FILE): + with open(CONFIG_FILE, "r") as config_file: + config_json = json.load(config_file) + return config_json + def get_git_email(): return subprocess.check_output(["git", "config", "--global", "user.email"]).decode("utf-8").strip() @@ -122,8 +136,32 @@ def check_for_updates(): except Exception as e: print("Unable to write update check file:", e) +def setup_config_file(user_id): + config_json = { "user_id": user_id } + with open(CONFIG_FILE, "w") as config_file: + json.dump(config_json, config_file) def get_user_id(): + if os.path.isfile(CONFIG_FILE): + with open(CONFIG_FILE, "r") as config_file: + try: + config_json = json.load(config_file) + if "user_id" not in config_json: + user_id = get_user_id_deprecated() + setup_config_file(user_id) + return get_user_id_deprecated() + else: + return config_json["user_id"] + except: + user_id = get_user_id_deprecated() + setup_config_file(user_id) + return get_user_id_deprecated + else: + user_id = get_user_id_deprecated() + setup_config_file(user_id) + return get_user_id_deprecated() + +def get_user_id_deprecated(): # Unique user identifier for authentication and load balancing # Gorilla-CLI is hosted by UC Berkeley Sky lab for FREE as a # research prototype. Please don't spam the system or use it @@ -184,6 +222,9 @@ def append_string_to_file_if_missing(file_path, target_string): file.write(target_string) + + + def main(): def execute_command(cmd): cmd = format_command(cmd) @@ -220,6 +261,13 @@ def get_history_commands(history_file): user_id = get_user_id() system_info = get_system_info() + personalization_setup = PersonalizationSetup() + # personalization_setup.request_personalization() + personalization = personalization_setup.personalization + open_ai_key = personalization_setup.open_ai_key + + personalized_history = None + # Parse command-line arguments parser = argparse.ArgumentParser(description="Gorilla CLI Help Doc") @@ -231,13 +279,24 @@ def get_history_commands(history_file): # Generate a unique interaction ID interaction_id = str(uuid.uuid4()) + + + commands = [] if args.history: commands = get_history_commands(HISTORY_FILE) + + if personalization: + personalizer = GorillaPersonalizer(open_ai_key) + personalized_history = personalizer.personalize(user_input, commands) + print (personalized_history) + + else: with Halo(text=f"{GORILLA_EMOJI}Loading", spinner="dots"): try: data_json = { "user_id": user_id, + #"synthesized_history": personalized_history, "user_input": user_input, "interaction_id": interaction_id, "system_info": system_info diff --git a/personalization/personalize.py b/personalization/personalize.py new file mode 100644 index 0000000..6bc5a1c --- /dev/null +++ b/personalization/personalize.py @@ -0,0 +1,106 @@ +import os +from presidio_analyzer import AnalyzerEngine, PatternRecognizer +from presidio_anonymizer import AnonymizerEngine +from presidio_anonymizer.entities import OperatorConfig +import json +from pprint import pprint +from openai import OpenAI +from personalization.prompts import get_system_prompt, get_user_prompt + +class GorillaPersonalizer: + """A class to personalize the user's bash history and query to provide the model with relevant context. + + Attributes: + client: The OpenAI client. + + """ + + def __init__(self, open_ai_key): + """ Initializes the GorillaPersonalizer class.""" + self.client = OpenAI(api_key=open_ai_key) + + def get_bash_history(self): + """ + Retrieves the user's bash history. (Last 10 commands) + """ + history_file = os.path.expanduser("~/.bash_history") + prev_operations = "" + try: + with open(history_file, "r") as file: + history = file.readlines() + except FileNotFoundError: + return "No bash history was found." + return history[:-10] + + def anonymize_bash_history(self, operations): + """ + Uses Microsoft's Presidio to anonymize the user's bash history. + + Args: + operations: The user's bash history. + + """ + analyzer = AnalyzerEngine() + analyzer_results = analyzer.analyze(text=operations, language="en") + anonymizer = AnonymizerEngine() + anonymized_results = anonymizer.anonymize( + text=operations, analyzer_results=analyzer_results + ) + return anonymized_results.text + + def remove_duplicates(self, operations: list[str]): + """Removes duplicates from the user's bash history + + Args: + operations: The user's bash history. + + """ + return list(set(operations)) + + def stringify_bash_history(self, operations: list[str]): + """Stringifies the user's bash history. + + Args: + operations: The user's bash history. + + """ + return "\n".join(operations) + + def synthesize_bash_history(self, desired_operation, gorilla_history, history): + """Uses OpenAI api to synthesize the user's bash, gorilla history. + It synthesizes this history with the current operation in mind, with the goal of providing the model with relevant context. + + Args: + client: The OpenAI client. + desired_operation: The operation the user wants to perform. + gorilla_history: The user's previous operations with the Gorilla + history: The user's bash history. + + """ + system_prompt = get_system_prompt() + user_prompt = get_user_prompt(history, gorilla_history, desired_operation) + response = self.client.chat.completions.create( + model="gpt-4", + messages=[ + {"role": "system", "content": system_prompt}, + {"role": "user", "content": user_prompt}, + ], + ) + return response.choices[0].message.content + + def personalize(self, query, gorilla_history, pi_removal=True): + """Personalizes the user's bash history and query to provide the model with relevant context. + + Args: + query: The operation the user wants to perform. + gorila_history: The user's previous operations with the Gorilla + open_ai_key: The OpenAI API key. + pi_removal: Whether to remove personally identifiable information from the user's bash history. + + """ + + history = self.stringify_bash_history(self.remove_duplicates(self.get_bash_history())) + if pi_removal: + history = self.anonymize_bash_history(history) + summary = self.synthesize_bash_history(query, gorilla_history, history) + return summary diff --git a/personalization/prompts.py b/personalization/prompts.py new file mode 100644 index 0000000..1930e8a --- /dev/null +++ b/personalization/prompts.py @@ -0,0 +1,26 @@ + + +def get_system_prompt(): + return f""" +You are an assistant for a developer who wants to find the right API call for a specific task. +The developer has bash history that contains the command they used to perform a task. +Synthesize their bash history to provide the API call prediction model with extra context about the task. +For reference, the API call prediction model, called Gorilla, is trained on a large dataset of API calls and their associated tasks. +You may see the developer's previous operations with the API calling tool in their bash history. +Use the previous bash history as well as their query to provide the model with a short paragraph of possible relevant context. +There is a chance that their query has nothing to do with the bash history, so in that case, return 'No relevant context found'. +""" + +def get_user_prompt(history, gorilla_history, desired_operation): + return f""" +The user's bash history is: +{history} + +The user's previous operations with the API calling tool are: +{gorilla_history} + +The query of the user is: +{desired_operation} + +Us this information to provide the model with a short paragraph of possible relevant context. + """ diff --git a/personalization/setup.py b/personalization/setup.py new file mode 100644 index 0000000..b8734ea --- /dev/null +++ b/personalization/setup.py @@ -0,0 +1,142 @@ +import os +import json +from openai import OpenAI +CONFIG_FILE = os.path.expanduser("~/.gorilla-cli-config.json") + + +class PersonalizationSetup: + + """ + Sets up the personalization options for the user. + """ + + def __init__(self): + """ + Initially sets the personalization to Falsy values + After, looks through the config/user files to populate relevant information + + On top of simply updating personalization settings, since we need to handle the old implementation of user IDs, + we need to add the user-ID from the user-ID file. + """ + self.personalization = False + self.open_ai_key = None + + with open(CONFIG_FILE, "r") as config_file: + self.config_json = json.load(config_file) + print(self.config_json) + if "personalization" not in self.config_json: + self.config_json["personalization"] = { + "permission": False, "api_key": None} + self.request_personalization() + else: + self.open_ai_key = self.config_json["personalization"]["api_key"] + self.permission = self.config_json["personalization"]["permission"] + + def populatePersonalizationSettings(self): + """ + Populates the json file with the relevant details/information + """ + self.config_json["personalization"]["permission"] = self.personalization + self.config_json["personalization"]["api_key"] = self.open_ai_key + with open(CONFIG_FILE, "w") as config_file: + json.dump(self.config_json, config_file) + + def checkOpenAIAPIValidity(self): + """ + Checks if the provided OpenAI key is valid or not. + """ + try: + print (self.open_ai_key) + client = OpenAI(api_key=self.open_ai_key) + client.models.list() + print () + except Exception as e: + return False + return True + + def changeOpenAIKey(self): + """ + Enables the user to add an OpenAI key + """ + self.open_ai_key = str( + input("Enter your new OpenAI API key: ")).strip() + while not self.checkOpenAIAPIValidity(): + response = str(input( + "The API key you entered is invalid. Do you want to try again? [Y/n]: ")).strip().lower() + if response in ["n", "no"]: + self.open_ai_key = None + return False + else: + self.open_ai_key = str( + input("Enter your new OpenAI API key: ")).strip() + return True + + def editPersonalizationSettings(self, permission: bool): + """ + This function enables a user to edit their personalization settings. + + We cover the following cases when the user wants to edit their settings: + + 1. The user wants to personalize, but they already have it enabled. + In this case, we ask them if they want to change their API key or not. + + 2. The user doesn't want to personalize, and they already have it disabled. + We let them know that they already have it disabled. + + 3. The user wants to personalize and they have it disabled. In that case, + we ask them to provide an API key. + + 4. The user doesn't want to personalize, and they have it enabled. In that case, + we turn off personalization for them. + """ + + if self.config_json["personalization"]["permission"] and permission: + res = input(f"You are already using the the following API key:\n\n{self.open_ai_key}\n\nDo you want to change it?").strip().lower() + if res in ["n", "no"]: + print("You're all set.") + else: + old_key = self.open_ai_key + success = self.changeOpenAIKey() + if success: + print("We successfully updated your API key.") + self.personalization = True + else: + print( + "You didn't provide a valid API key, so we didn't update your settings.") + self.personalization = True + self.open_ai_key = old_key + + # the user already has personalization not enabled so we kep it that way + elif not self.config_json["personalization"]["permission"] and not permission: + print("You already have personalization disabled. You're good to go!") + # the case of turning personalization off + elif self.config_json["personalization"]["permission"] and not permission: + print("We turned off personalization for you.") + self.personalization = False + self.open_ai_key = None + # the case of turning personalization on + else: + api_key = self.changeOpenAIKey() + if api_key: + print("We successfully added your API key.") + self.personalization = True + else: + print( + "You didn't provide a valid API key, so we didn't update your settings.") + self.personalization = False + self.open_ai_key = None + + self.populatePersonalizationSettings() + + def request_personalization(self): + """ + Ask the user if they want to personalize their bash history - depending on the Y/n response, set the personalize flag to true/false + """ + response = input( + "Do you want to personalize your bash history? [Y/n]: ").strip().lower() + if response in ["n", "no"]: + print("We won't use your bash history to personalize your queries. You can always turn this feature on in the future!") + self.editPersonalizationSettings(False) + else: + print("We're going to be using your bash history to personalize your queries. This feature will require OpenAI API access, so enter your API key when prompted below. You can always turn this feature off in the future!") + self.editPersonalizationSettings(True) diff --git a/setup.py b/setup.py index fc81ba6..de8b179 100644 --- a/setup.py +++ b/setup.py @@ -16,7 +16,7 @@ setup( name="gorilla-cli", - version="0.0.11", + version="0.0.12", url="https://github.com/gorilla-llm/gorilla-cli", author="Shishir Patil, Tianjun Zhang", author_email="sgp@berkeley.edu, tianjunz@berkeley.edu",