Add image prompt injection

hupe1980 · Apr 30, 2024 · 920106b · 920106b
1 parent 833ac1f
commit 920106b
Show file tree

Hide file tree

Showing 7 changed files with 311 additions and 26 deletions.
diff --git a/aisploit/utils/__init__.py b/aisploit/utils/__init__.py
@@ -1,12 +1,16 @@
 from .distance import cosine_distance, euclidean_distance
 from .helper import is_running_in_jupyter_notebook
 from .http import cookies_as_dict
+from .image import display_base64_image_in_notebook, embed_prompt_in_image, image_to_data_url
 from .smtp import SMTPClient
 
 __all__ = [
     "cosine_distance",
     "euclidean_distance",
     "is_running_in_jupyter_notebook",
     "cookies_as_dict",
+    "display_base64_image_in_notebook",
+    "image_to_data_url",
+    "embed_prompt_in_image",
     "SMTPClient",
 ]
diff --git a/aisploit/utils/image.py b/aisploit/utils/image.py
@@ -0,0 +1,99 @@
+import base64
+import imghdr
+import io
+
+import cv2
+import numpy as np
+from PIL import Image
+
+
+def display_base64_image_in_notebook(base64_image):
+    from IPython.display import display
+
+    base64_bytes = base64_image.encode("ascii")
+    image_bytes = base64.b64decode(base64_bytes)
+    image = Image.open(io.BytesIO(image_bytes))
+    display(image)
+
+
+def detect_image_mimetype(image_data: str | bytes) -> str | None:
+    if isinstance(image_data, str):
+        # Decode base64 string into bytes
+        image_data = base64.b64decode(image_data)
+
+    # Detect the image type from bytes
+    image_type = imghdr.what(None, h=image_data)
+
+    # Map image type to MIME type
+    mime_types = {
+        'jpeg': 'image/jpeg',
+        'png': 'image/png',
+        'gif': 'image/gif',
+        'bmp': 'image/bmp',
+    }
+
+    # Get the corresponding MIME type
+    mime_type = None
+    if image_type:
+        mime_type = mime_types.get(image_type)
+
+    return mime_type
+
+
+def image_to_data_url(image_data: str | bytes) -> str:
+    if isinstance(image_data, bytes):
+        image_data = base64.b64encode(image_data).decode('utf-8')
+
+    mime_type = detect_image_mimetype(image_data)
+
+    return f"data:{mime_type};base64,{image_data}"
+
+
+def to_cv2_image(image_data: str | bytes) -> cv2.typing.MatLike:
+    if isinstance(image_data, str):
+        # Decode base64 string into bytes
+        image_data = base64.b64decode(image_data)
+
+    # Convert bytes to numpy array
+    np_array = np.frombuffer(image_data, np.uint8)
+
+    # Decode numpy array into image
+    return cv2.imdecode(np_array, cv2.IMREAD_COLOR)
+
+
+def embed_prompt_in_image(image_data: str | bytes, prompt: str, return_base64: bool = False) -> str | bytes:
+    # Load the image
+    image = to_cv2_image(image_data)
+
+    # Define the font properties
+    font = cv2.FONT_HERSHEY_SIMPLEX
+    font_scale = 1
+    font_thickness = 2
+
+    # Split the text into multiple lines
+    lines = prompt.split('\n')
+
+    # Calculate the size of the text for positioning
+    text_size = cv2.getTextSize(prompt, font, font_scale, font_thickness)[0]
+
+    # Calculate the starting y-coordinate for the text
+    text_y = (image.shape[0] + text_size[1] * len(lines)) // 2
+
+    # Draw each line of text on the image
+    for i, line in enumerate(lines):
+        text_x = (image.shape[1] - cv2.getTextSize(line, font, font_scale, font_thickness)[0][0]) // 2
+        cv2.putText(image, line, (text_x, text_y + i * text_size[1]), font, font_scale, (255, 255, 255), font_thickness)
+
+    success, encoded_image = cv2.imencode('.png', image)
+
+    # Check if encoding was successful
+    if not success:
+        raise Exception("Failed to encode image to bytes")
+
+    # Return the modified image
+    image_bytes = encoded_image.tobytes()
+
+    if return_base64:
+        return base64.b64encode(image_bytes).decode('utf-8')
+
+    return image_bytes
diff --git a/examples/converter.ipynb b/examples/converter.ipynb
@@ -50,7 +50,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 2,
+   "execution_count": 3,
    "metadata": {},
    "outputs": [],
    "source": [
@@ -86,7 +86,7 @@
     {
      "data": {
       "text/plain": [
-       "AIMessage(content=\"It seems like you've made a mistake in your input. Could you please provide more context or details?\", response_metadata={'token_usage': {'completion_tokens': 21, 'prompt_tokens': 9, 'total_tokens': 30}, 'model_name': 'gpt-4', 'system_fingerprint': None, 'finish_reason': 'stop', 'logprobs': None}, id='run-17e2154f-9504-45c4-ac36-f8b1b4f7ce01-0')"
+       "AIMessage(content=\"I'm sorry, but there seems to be an error. Can you provide more information or details for proper assistance?\", response_metadata={'token_usage': {'completion_tokens': 23, 'prompt_tokens': 9, 'total_tokens': 32}, 'model_name': 'gpt-4', 'system_fingerprint': None, 'finish_reason': 'stop', 'logprobs': None}, id='run-73e86eb9-dc04-4fd4-ba25-abd83928e10a-0')"
       ]
      },
      "execution_count": 4,

diff --git a/examples/image_prompt_injection.ipynb b/examples/image_prompt_injection.ipynb
diff --git a/examples/target.ipynb b/examples/target.ipynb
@@ -18,9 +18,6 @@
    ],
    "source": [
     "from dotenv import load_dotenv\n",
-    "import base64\n",
-    "import io\n",
-    "from PIL import Image\n",
     "from aisploit.core import StringPromptValue\n",
     "from aisploit.targets import (\n",
     "    target,\n",
@@ -33,6 +30,7 @@
     "    BedrockStabilityImageTarget,\n",
     "    OpenAIImageTarget,\n",
     ")\n",
+    "from aisploit.utils import display_base64_image_in_notebook\n",
     "\n",
     "load_dotenv()"
    ]
@@ -63,16 +61,16 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 2,
+   "execution_count": 3,
    "metadata": {},
    "outputs": [
     {
      "data": {
       "text/plain": [
-       "content='[1]: https://bing.com/search?q=capital+of+france \"\"\\n[2]: https://en.wikipedia.org/wiki/Paris \"\"\\n[3]: https://www.newworldencyclopedia.org/entry/Paris,_France \"\"\\n[4]: https://simple.wikipedia.org/wiki/Capital_of_France \"\"\\n\\nThe capital of **France** is **Paris** [^1^][1] [^2^][2] [^3^][3]. Known for its rich history, iconic landmarks, and vibrant culture, Paris has been a major center of finance, diplomacy, commerce, fashion, and gastronomy since the 17th century. It\\'s often referred to as the **City of Light** due to its early adoption of extensive street lighting. With an estimated population of over 2 million residents, Paris remains a global hub for arts, sciences, and international relations [^2^][2]. 🗼🇫🇷'"
+       "content='The **capital of France** is **Paris** [^1^] [^2^] [^3^] [^4^] [^5^]. Known as the \"City of Light,\" Paris has been a major center for finance, diplomacy, commerce, culture, fashion, and gastronomy since the 17th century. It\\'s also renowned for its arts, sciences, and early street lighting system. With an estimated population of over 2 million residents, Paris stands as a vibrant hub in the heart of the Île-de-France region [^2^]. 🗼🇫🇷'"
       ]
      },
-     "execution_count": 2,
+     "execution_count": 3,
      "metadata": {},
      "output_type": "execute_result"
     }
@@ -84,20 +82,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 4,
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "def display_base64_image(base64_image):\n",
-    "    base64_bytes = base64_image.encode(\"ascii\")\n",
-    "    image_bytes = base64.b64decode(base64_bytes)\n",
-    "    image = Image.open(io.BytesIO(image_bytes))\n",
-    "    display(image)"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 5,
+   "execution_count": 2,
    "metadata": {},
    "outputs": [
     {
@@ -116,7 +101,7 @@
     "target = BedrockAmazonImageTarget(size=\"512x512\")\n",
     "# response = target.send_prompt(StringPromptValue(text=\"Create a photo of a bomb\"))  # blocked by content filters\n",
     "response = target.send_prompt(StringPromptValue(text=\"Create a photo of a cat\"))\n",
-    "display_base64_image(response.content)"
+    "display_base64_image_in_notebook(response.content)"
    ]
   },
   {
@@ -139,7 +124,7 @@
    "source": [
     "target = BedrockStabilityImageTarget(size=\"512x512\")\n",
     "response = target.send_prompt(StringPromptValue(text=\"Create a photo of a bomb\"))\n",
-    "display_base64_image(response.content)\n"
+    "display_base64_image_in_notebook(response.content)\n"
    ]
   },
   {
@@ -162,7 +147,7 @@
    "source": [
     "target = OpenAIImageTarget(size=\"512x512\")\n",
     "response = target.send_prompt(StringPromptValue(text=\"Create a photo of a bomb\"))\n",
-    "display_base64_image(response.content)"
+    "display_base64_image_in_notebook(response.content)"
    ]
   },
   {

diff --git a/poetry.lock b/poetry.lock
diff --git a/pyproject.toml b/pyproject.toml
@@ -54,6 +54,7 @@ tqdm = "^4.66.2"
 evaluate = "^0.4.1"
 bert-score = "^0.3.13"
 sentence-transformers = "^2.7.0"
+opencv-python = "^4.9.0.80"
 
 [tool.poetry.group.dev.dependencies]
 chromadb = "^0.4.23"