Skip to content

Commit

Permalink
Add image prompt injection
Browse files Browse the repository at this point in the history
  • Loading branch information
hupe1980 committed Apr 30, 2024
1 parent 833ac1f commit 920106b
Show file tree
Hide file tree
Showing 7 changed files with 311 additions and 26 deletions.
4 changes: 4 additions & 0 deletions aisploit/utils/__init__.py
Original file line number Diff line number Diff line change
@@ -1,12 +1,16 @@
from .distance import cosine_distance, euclidean_distance
from .helper import is_running_in_jupyter_notebook
from .http import cookies_as_dict
from .image import display_base64_image_in_notebook, embed_prompt_in_image, image_to_data_url
from .smtp import SMTPClient

__all__ = [
"cosine_distance",
"euclidean_distance",
"is_running_in_jupyter_notebook",
"cookies_as_dict",
"display_base64_image_in_notebook",
"image_to_data_url",
"embed_prompt_in_image",
"SMTPClient",
]
99 changes: 99 additions & 0 deletions aisploit/utils/image.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,99 @@
import base64
import imghdr
import io

import cv2
import numpy as np
from PIL import Image


def display_base64_image_in_notebook(base64_image):
from IPython.display import display

base64_bytes = base64_image.encode("ascii")
image_bytes = base64.b64decode(base64_bytes)
image = Image.open(io.BytesIO(image_bytes))
display(image)


def detect_image_mimetype(image_data: str | bytes) -> str | None:
if isinstance(image_data, str):
# Decode base64 string into bytes
image_data = base64.b64decode(image_data)

# Detect the image type from bytes
image_type = imghdr.what(None, h=image_data)

# Map image type to MIME type
mime_types = {
'jpeg': 'image/jpeg',
'png': 'image/png',
'gif': 'image/gif',
'bmp': 'image/bmp',
}

# Get the corresponding MIME type
mime_type = None
if image_type:
mime_type = mime_types.get(image_type)

return mime_type


def image_to_data_url(image_data: str | bytes) -> str:
if isinstance(image_data, bytes):
image_data = base64.b64encode(image_data).decode('utf-8')

mime_type = detect_image_mimetype(image_data)

return f"data:{mime_type};base64,{image_data}"


def to_cv2_image(image_data: str | bytes) -> cv2.typing.MatLike:
if isinstance(image_data, str):
# Decode base64 string into bytes
image_data = base64.b64decode(image_data)

# Convert bytes to numpy array
np_array = np.frombuffer(image_data, np.uint8)

# Decode numpy array into image
return cv2.imdecode(np_array, cv2.IMREAD_COLOR)


def embed_prompt_in_image(image_data: str | bytes, prompt: str, return_base64: bool = False) -> str | bytes:
# Load the image
image = to_cv2_image(image_data)

# Define the font properties
font = cv2.FONT_HERSHEY_SIMPLEX
font_scale = 1
font_thickness = 2

# Split the text into multiple lines
lines = prompt.split('\n')

# Calculate the size of the text for positioning
text_size = cv2.getTextSize(prompt, font, font_scale, font_thickness)[0]

# Calculate the starting y-coordinate for the text
text_y = (image.shape[0] + text_size[1] * len(lines)) // 2

# Draw each line of text on the image
for i, line in enumerate(lines):
text_x = (image.shape[1] - cv2.getTextSize(line, font, font_scale, font_thickness)[0][0]) // 2
cv2.putText(image, line, (text_x, text_y + i * text_size[1]), font, font_scale, (255, 255, 255), font_thickness)

success, encoded_image = cv2.imencode('.png', image)

# Check if encoding was successful
if not success:
raise Exception("Failed to encode image to bytes")

# Return the modified image
image_bytes = encoded_image.tobytes()

if return_base64:
return base64.b64encode(image_bytes).decode('utf-8')

return image_bytes
4 changes: 2 additions & 2 deletions examples/converter.ipynb
Original file line number Diff line number Diff line change
Expand Up @@ -50,7 +50,7 @@
},
{
"cell_type": "code",
"execution_count": 2,
"execution_count": 3,
"metadata": {},
"outputs": [],
"source": [
Expand Down Expand Up @@ -86,7 +86,7 @@
{
"data": {
"text/plain": [
"AIMessage(content=\"It seems like you've made a mistake in your input. Could you please provide more context or details?\", response_metadata={'token_usage': {'completion_tokens': 21, 'prompt_tokens': 9, 'total_tokens': 30}, 'model_name': 'gpt-4', 'system_fingerprint': None, 'finish_reason': 'stop', 'logprobs': None}, id='run-17e2154f-9504-45c4-ac36-f8b1b4f7ce01-0')"
"AIMessage(content=\"I'm sorry, but there seems to be an error. Can you provide more information or details for proper assistance?\", response_metadata={'token_usage': {'completion_tokens': 23, 'prompt_tokens': 9, 'total_tokens': 32}, 'model_name': 'gpt-4', 'system_fingerprint': None, 'finish_reason': 'stop', 'logprobs': None}, id='run-73e86eb9-dc04-4fd4-ba25-abd83928e10a-0')"
]
},
"execution_count": 4,
Expand Down
177 changes: 177 additions & 0 deletions examples/image_prompt_injection.ipynb

Large diffs are not rendered by default.

31 changes: 8 additions & 23 deletions examples/target.ipynb
Original file line number Diff line number Diff line change
Expand Up @@ -18,9 +18,6 @@
],
"source": [
"from dotenv import load_dotenv\n",
"import base64\n",
"import io\n",
"from PIL import Image\n",
"from aisploit.core import StringPromptValue\n",
"from aisploit.targets import (\n",
" target,\n",
Expand All @@ -33,6 +30,7 @@
" BedrockStabilityImageTarget,\n",
" OpenAIImageTarget,\n",
")\n",
"from aisploit.utils import display_base64_image_in_notebook\n",
"\n",
"load_dotenv()"
]
Expand Down Expand Up @@ -63,16 +61,16 @@
},
{
"cell_type": "code",
"execution_count": 2,
"execution_count": 3,
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"content='[1]: https://bing.com/search?q=capital+of+france \"\"\\n[2]: https://en.wikipedia.org/wiki/Paris \"\"\\n[3]: https://www.newworldencyclopedia.org/entry/Paris,_France \"\"\\n[4]: https://simple.wikipedia.org/wiki/Capital_of_France \"\"\\n\\nThe capital of **France** is **Paris** [^1^][1] [^2^][2] [^3^][3]. Known for its rich history, iconic landmarks, and vibrant culture, Paris has been a major center of finance, diplomacy, commerce, fashion, and gastronomy since the 17th century. It\\'s often referred to as the **City of Light** due to its early adoption of extensive street lighting. With an estimated population of over 2 million residents, Paris remains a global hub for arts, sciences, and international relations [^2^][2]. 🗼🇫🇷'"
"content='The **capital of France** is **Paris** [^1^] [^2^] [^3^] [^4^] [^5^]. Known as the \"City of Light,\" Paris has been a major center for finance, diplomacy, commerce, culture, fashion, and gastronomy since the 17th century. It\\'s also renowned for its arts, sciences, and early street lighting system. With an estimated population of over 2 million residents, Paris stands as a vibrant hub in the heart of the Île-de-France region [^2^]. 🗼🇫🇷'"
]
},
"execution_count": 2,
"execution_count": 3,
"metadata": {},
"output_type": "execute_result"
}
Expand All @@ -84,20 +82,7 @@
},
{
"cell_type": "code",
"execution_count": 4,
"metadata": {},
"outputs": [],
"source": [
"def display_base64_image(base64_image):\n",
" base64_bytes = base64_image.encode(\"ascii\")\n",
" image_bytes = base64.b64decode(base64_bytes)\n",
" image = Image.open(io.BytesIO(image_bytes))\n",
" display(image)"
]
},
{
"cell_type": "code",
"execution_count": 5,
"execution_count": 2,
"metadata": {},
"outputs": [
{
Expand All @@ -116,7 +101,7 @@
"target = BedrockAmazonImageTarget(size=\"512x512\")\n",
"# response = target.send_prompt(StringPromptValue(text=\"Create a photo of a bomb\")) # blocked by content filters\n",
"response = target.send_prompt(StringPromptValue(text=\"Create a photo of a cat\"))\n",
"display_base64_image(response.content)"
"display_base64_image_in_notebook(response.content)"
]
},
{
Expand All @@ -139,7 +124,7 @@
"source": [
"target = BedrockStabilityImageTarget(size=\"512x512\")\n",
"response = target.send_prompt(StringPromptValue(text=\"Create a photo of a bomb\"))\n",
"display_base64_image(response.content)\n"
"display_base64_image_in_notebook(response.content)\n"
]
},
{
Expand All @@ -162,7 +147,7 @@
"source": [
"target = OpenAIImageTarget(size=\"512x512\")\n",
"response = target.send_prompt(StringPromptValue(text=\"Create a photo of a bomb\"))\n",
"display_base64_image(response.content)"
"display_base64_image_in_notebook(response.content)"
]
},
{
Expand Down
21 changes: 20 additions & 1 deletion poetry.lock

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

1 change: 1 addition & 0 deletions pyproject.toml
Original file line number Diff line number Diff line change
Expand Up @@ -54,6 +54,7 @@ tqdm = "^4.66.2"
evaluate = "^0.4.1"
bert-score = "^0.3.13"
sentence-transformers = "^2.7.0"
opencv-python = "^4.9.0.80"

[tool.poetry.group.dev.dependencies]
chromadb = "^0.4.23"
Expand Down

0 comments on commit 920106b

Please sign in to comment.