diff --git a/aisploit/converters/__init__.py b/aisploit/converters/__init__.py
index 9d65a8a..8dd101e 100644
--- a/aisploit/converters/__init__.py
+++ b/aisploit/converters/__init__.py
@@ -13,6 +13,7 @@
from .stemming import StemmingConverter
from .translation import TranslationConverter
from .unicode_confusable import UnicodeConfusableConverter
+from .unicode_tags import UnicodeTagsConverter
__all__ = [
"Base64Converter",
@@ -30,4 +31,5 @@
"StemmingConverter",
"TranslationConverter",
"UnicodeConfusableConverter",
+ "UnicodeTagsConverter",
]
diff --git a/aisploit/converters/unicode_tags.py b/aisploit/converters/unicode_tags.py
new file mode 100644
index 0000000..87664ea
--- /dev/null
+++ b/aisploit/converters/unicode_tags.py
@@ -0,0 +1,23 @@
+from dataclasses import dataclass
+
+from ..core import BaseConverter
+
+
+@dataclass
+class UnicodeTagsConverter(BaseConverter):
+ prefix: str = ""
+ suffix: str = ""
+ add_sequence_markers: bool = False
+
+ def _convert(self, prompt: str) -> str:
+ encoded = ""
+
+ if self.add_sequence_markers:
+ encoded += chr(0xE0001)
+
+ encoded = ''.join(chr(0xE0000 + ord(ch)) for ch in prompt)
+
+ if self.add_sequence_markers:
+ encoded += chr(0xE007F)
+
+ return self.prefix + encoded + self.suffix
diff --git a/aisploit/scanner/plugins/prompt_injection.py b/aisploit/scanner/plugins/prompt_injection.py
index aa41a17..8ad08ec 100644
--- a/aisploit/scanner/plugins/prompt_injection.py
+++ b/aisploit/scanner/plugins/prompt_injection.py
@@ -31,7 +31,6 @@ class PromptInjectionPlugin(SendPromptsPlugin):
def create_prompts(self) -> Sequence[str | Prompt]:
"""Create prompts using templates from the specified dataset."""
-
prompts = []
for data in self.dataset:
prompt_template = PromptTemplate.from_template(data.template)
diff --git a/examples/converter.ipynb b/examples/converter.ipynb
index f713d35..0e57930 100644
--- a/examples/converter.ipynb
+++ b/examples/converter.ipynb
@@ -24,7 +24,7 @@
}
],
"source": [
- "from IPython.display import display, Markdown\n",
+ "from IPython.display import display, Markdown, HTML\n",
"import os\n",
"import textwrap\n",
"from dotenv import load_dotenv\n",
@@ -41,6 +41,7 @@
" StemmingConverter,\n",
" UnicodeConfusableConverter,\n",
" TranslationConverter,\n",
+ " UnicodeTagsConverter,\n",
")\n",
"from aisploit.models import ChatOpenAI\n",
"\n",
@@ -58,6 +59,50 @@
")"
]
},
+ {
+ "cell_type": "markdown",
+ "metadata": {},
+ "source": [
+ "## UnicodeTagsConverter"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 4,
+ "metadata": {},
+ "outputs": [
+ {
+ "data": {
+ "text/html": [
+ ""
+ ],
+ "text/plain": [
+ ""
+ ]
+ },
+ "metadata": {},
+ "output_type": "display_data"
+ },
+ {
+ "data": {
+ "text/plain": [
+ "AIMessage(content=\"It seems like you've made a mistake in your input. Could you please provide more context or details?\", response_metadata={'token_usage': {'completion_tokens': 21, 'prompt_tokens': 9, 'total_tokens': 30}, 'model_name': 'gpt-4', 'system_fingerprint': None, 'finish_reason': 'stop', 'logprobs': None}, id='run-17e2154f-9504-45c4-ac36-f8b1b4f7ce01-0')"
+ ]
+ },
+ "execution_count": 4,
+ "metadata": {},
+ "output_type": "execute_result"
+ }
+ ],
+ "source": [
+ "converter = UnicodeTagsConverter(prefix=\"START\", suffix=\"END\")\n",
+ "prompt = converter.convert(\"What is the capital of France?\")\n",
+ "\n",
+ "display(HTML(f''))\n",
+ "\n",
+ "chat_model.invoke(prompt)"
+ ]
+ },
{
"cell_type": "markdown",
"metadata": {},