Skip to content

Commit

Permalink
[FEAT][Flow.run() img = None for conditional img inputs, BaseMultiMod…
Browse files Browse the repository at this point in the history
…alModel, and multi modal swarms of manufacturing agents
  • Loading branch information
kyegomez committed Nov 25, 2023
1 parent f895497 commit a92a6a5
Show file tree
Hide file tree
Showing 10 changed files with 189 additions and 28 deletions.
12 changes: 12 additions & 0 deletions example.py
Original file line number Diff line number Diff line change
@@ -1,9 +1,21 @@
import os

from dotenv import load_dotenv

# Import the OpenAIChat model and the Flow struct
from swarms.models import OpenAIChat
from swarms.structs import Flow

# Load the environment variables
load_dotenv()

# Get the API key from the environment
api_key = os.environ.get("OPENAI_API_KEY")

# Initialize the language model
llm = OpenAIChat(
temperature=0.5,
openai_api_key=api_key,
)


Expand Down
4 changes: 4 additions & 0 deletions multi_modal_auto_agent.py
Original file line number Diff line number Diff line change
@@ -1,5 +1,8 @@
from swarms.structs import Flow
from swarms.models.gpt4_vision_api import GPT4VisionAPI
from swarms.prompts.multi_modal_autonomous_instruction_prompt import (
MULTI_MODAL_AUTO_AGENT_SYSTEM_PROMPT_1,
)


llm = GPT4VisionAPI()
Expand All @@ -10,6 +13,7 @@
## Initialize the workflow
flow = Flow(
llm=llm,
sop=MULTI_MODAL_AUTO_AGENT_SYSTEM_PROMPT_1,
max_loops="auto",
)

Expand Down
7 changes: 7 additions & 0 deletions playground/demos/idea_2_img/main.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,7 @@
"""
Idea 2 img
task -> gpt4 text -> dalle3 img -> gpt4vision img + text analyze img -> dalle3 img -> loop
"""
from swarms.models.gpt4_vision_api import GPT4VisionAPI
15 changes: 15 additions & 0 deletions playground/demos/swarm_of_mma_manufacturing/main.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,15 @@
"""
Swarm of multi modal autonomous agents for manufacturing!
---------------------------------------------------------
Health Security agent: Agent that monitors the health of working conditions: input image of factory output: health safety index 0.0 - 1.0 being the highest
Quality Control agent: Agent that monitors the quality of the product: input image of product output: quality index 0.0 - 1.0 being the highest
Productivity agent: Agent that monitors the productivity of the factory: input image of factory output: productivity index 0.0 - 1.0 being the highest
Safety agent: Agent that monitors the safety of the factory: input image of factory output: safety index 0.0 - 1.0 being the highest
Security agent: Agent that monitors the security of the factory: input image of factory output: security index 0.0 - 1.0 being the highest
Sustainability agent: Agent that monitors the sustainability of the factory: input image of factory output: sustainability index 0.0 - 1.0 being the highest
Efficiency agent: Agent that monitors the efficiency of the factory: input image of factory output: efficiency index 0.0 - 1.0 being the highest
Flow:
health security agent -> quality control agent -> productivity agent -> safety agent -> security agent -> sustainability agent -> efficiency agent
"""
46 changes: 27 additions & 19 deletions swarms/models/base_multimodal_model.py
Original file line number Diff line number Diff line change
@@ -1,3 +1,4 @@
from abc import abstractmethod
import asyncio
import base64
import concurrent.futures
Expand All @@ -7,8 +8,8 @@
from typing import List, Optional, Tuple

import requests
from ABC import abstractmethod
from PIL import Image
from termcolor import colored


class BaseMultiModalModel:
Expand Down Expand Up @@ -37,7 +38,6 @@ def __init__(
self.retries = retries
self.chat_history = []


@abstractmethod
def __call__(self, text: str, img: str):
"""Run the model"""
Expand All @@ -61,17 +61,17 @@ def get_img_from_web(self, img: str):
except requests.RequestException as error:
print(f"Error fetching image from {img} and error: {error}")
return None

def encode_img(self, img: str):
"""Encode the image to base64"""
with open(img, "rb") as image_file:
return base64.b64encode(image_file.read()).decode("utf-8")

def get_img(self, img: str):
"""Get the image from the path"""
image_pil = Image.open(img)
return image_pil

def clear_chat_history(self):
"""Clear the chat history"""
self.chat_history = []
Expand All @@ -87,11 +87,11 @@ def run_many(
Args:
tasks (List[str]): List of tasks
imgs (List[str]): List of image paths
Returns:
List[str]: List of responses
"""
# Instantiate the thread pool executor
with ThreadPoolExecutor(max_workers=self.max_workers) as executor:
Expand All @@ -101,7 +101,6 @@ def run_many(
for result in results:
print(result)


def run_batch(self, tasks_images: List[Tuple[str, str]]) -> List[str]:
"""Process a batch of tasks and images"""
with concurrent.futures.ThreadPoolExecutor() as executor:
Expand Down Expand Up @@ -133,11 +132,11 @@ async def run_batch_async_with_retries(
for task, img in tasks_images
]
return await asyncio.gather(*futures)

def unique_chat_history(self):
"""Get the unique chat history"""
return list(set(self.chat_history))

def run_with_retries(self, task: str, img: str):
"""Run the model with retries"""
for i in range(self.retries):
Expand All @@ -146,7 +145,7 @@ def run_with_retries(self, task: str, img: str):
except Exception as error:
print(f"Error with the request {error}")
continue

def run_batch_with_retries(self, tasks_images: List[Tuple[str, str]]):
"""Run the model with retries"""
for i in range(self.retries):
Expand Down Expand Up @@ -188,28 +187,37 @@ def get_generation_time(self) -> float:
if self.start_time and self.end_time:
return self.end_time - self.start_time
return 0

def get_chat_history(self):
"""Get the chat history"""
return self.chat_history

def get_unique_chat_history(self):
"""Get the unique chat history"""
return list(set(self.chat_history))

def get_chat_history_length(self):
"""Get the chat history length"""
return len(self.chat_history)

def get_unique_chat_history_length(self):
"""Get the unique chat history length"""
return len(list(set(self.chat_history)))

def get_chat_history_tokens(self):
"""Get the chat history tokens"""
return self._num_tokens()

def print_beautiful(self, content: str, color: str = "cyan"):
"""Print Beautifully with termcolor"""
content = colored(content, color)
print(content)
print(content)

def stream(self, content: str):
"""Stream the output
Args:
content (str): _description_
"""
for chunk in content:
print(chunk)
108 changes: 106 additions & 2 deletions swarms/models/gpt4_vision_api.py
Original file line number Diff line number Diff line change
@@ -1,6 +1,7 @@
import logging
import logging
import asyncio
import base64
from typing import Optional
import concurrent.futures
from termcolor import colored
import json
Expand All @@ -12,6 +13,13 @@
import requests
from dotenv import load_dotenv


try:
import cv2
except ImportError:
print("OpenCV not installed. Please install OpenCV to use this model.")
raise ImportError

# Load environment variables
load_dotenv()
openai_api_key = os.getenv("OPENAI_API_KEY")
Expand Down Expand Up @@ -59,7 +67,8 @@ def __init__(
max_workers: int = 10,
max_tokens: str = 300,
openai_proxy: str = "https://api.openai.com/v1/chat/completions",
beautify: bool = False
beautify: bool = False,
streaming_enabled: Optional[bool] = False,
):
super().__init__()
self.openai_api_key = openai_api_key
Expand All @@ -69,6 +78,7 @@ def __init__(
self.max_tokens = max_tokens
self.openai_proxy = openai_proxy
self.beautify = beautify
self.streaming_enabled = streaming_enabled

if self.logging_enabled:
logging.basicConfig(level=logging.DEBUG)
Expand Down Expand Up @@ -123,14 +133,101 @@ def run(self, task: str, img: str):
out = response.json()
content = out["choices"][0]["message"]["content"]

if self.streaming_enabled:
content = self.stream_response(content)
else:
pass

if self.beautify:
content = colored(content, "cyan")
print(content)
else:
print(content)

except Exception as error:
print(f"Error with the request: {error}")
raise error

def video_prompt(self, frames):
"""
SystemPrompt is a class that generates a prompt for the user to respond to.
The prompt is generated based on the current state of the system.
Parameters
----------
frames : list
A list of base64 frames
Returns
-------
PROMPT : str
The system prompt
Examples
--------
>>> from swarms.models import GPT4VisionAPI
>>> llm = GPT4VisionAPI()
>>> video = "video.mp4"
>>> base64_frames = llm.process_video(video)
>>> prompt = llm.video_prompt(base64_frames)
>>> print(prompt)
"""
PROMPT = f"""
These are frames from a video that I want to upload. Generate a compelling description that I can upload along with the video:
{frames}
"""
return PROMPT

def stream_response(self, content: str):
"""Stream the response of the output
Args:
content (str): _description_
"""
for chunk in content:
print(chunk)

def process_video(self, video: str):
"""
Process a video into a list of base64 frames
Parameters
----------
video : str
The path to the video file
Returns
-------
base64_frames : list
A list of base64 frames
Examples
--------
>>> from swarms.models import GPT4VisionAPI
>>> llm = GPT4VisionAPI()
>>> video = "video.mp4"
>>> base64_frames = llm.process_video(video)
"""
video = cv2.VideoCapture(video)

base64_frames = []
while video.isOpened():
success, frame = video.read()
if not success:
break
_, buffer = cv2.imencode(".jpg", frame)
base64_frames.append(base64.b64encode(buffer).decode("utf-8"))

video.release()
print(len(base64_frames), "frames read.")

for img in base64_frames:
base64.b64decode(img.encode("utf-8"))

def __call__(self, task: str, img: str):
"""Run the model."""
try:
Expand Down Expand Up @@ -168,10 +265,17 @@ def __call__(self, task: str, img: str):
out = response.json()
content = out["choices"][0]["message"]["content"]

if self.streaming_enabled:
content = self.stream_response(content)
else:
pass

if self.beautify:
content = colored(content, "cyan")
print(content)
else:
print(content)

except Exception as error:
print(f"Error with the request: {error}")
raise error
Expand Down
2 changes: 1 addition & 1 deletion swarms/models/kosmos_two.py
Original file line number Diff line number Diff line change
Expand Up @@ -24,7 +24,7 @@ class Kosmos:
----------
model_name : str
Path to the pretrained model
Examples
--------
>>> kosmos = Kosmos()
Expand Down
4 changes: 3 additions & 1 deletion swarms/models/whisperx_model.py
Original file line number Diff line number Diff line change
Expand Up @@ -99,7 +99,9 @@ def transcribe_youtube_video(self):
print("The key 'segments' is not found in the result.")

def transcribe(self, audio_file):
model = whisperx_model.load_model("large-v2", self.device, self.compute_type)
model = whisperx_model.load_model(
"large-v2", self.device, self.compute_type
)
audio = whisperx_model.load_audio(audio_file)
result = model.transcribe(audio, batch_size=self.batch_size)

Expand Down
Loading

0 comments on commit a92a6a5

Please sign in to comment.