Skip to content

Commit

Permalink
Add more converters
Browse files Browse the repository at this point in the history
  • Loading branch information
hupe1980 committed Apr 14, 2024
1 parent cfcbfbc commit 82b41e4
Show file tree
Hide file tree
Showing 12 changed files with 314 additions and 51 deletions.
8 changes: 8 additions & 0 deletions aisploit/converters/__init__.py
Original file line number Diff line number Diff line change
@@ -1,4 +1,5 @@
from .base64 import Base64Converter
from .case import LowercaseConverter, UppercaseConverter, TitlecaseConverter
from .gender import GenderConverter
from .join import JoinConverter
from .keyboard_typo import (
Expand All @@ -9,9 +10,14 @@
from .no_op import NoOpConverter
from .remove_punctuation import RemovePunctuationConverter
from .sequence import SequenceConverter
from .stemming import StemmingConverter
from .unicode_confusable import UnicodeConfusableConverter

__all__ = [
"Base64Converter",
"LowercaseConverter",
"UppercaseConverter",
"TitlecaseConverter",
"GenderConverter",
"JoinConverter",
"KeyboardTypoConverter",
Expand All @@ -20,4 +26,6 @@
"NoOpConverter",
"RemovePunctuationConverter",
"SequenceConverter",
"StemmingConverter",
"UnicodeConfusableConverter",
]
16 changes: 16 additions & 0 deletions aisploit/converters/case.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,16 @@
from ..core import BaseConverter


class LowercaseConverter(BaseConverter):
def _convert(self, prompt: str) -> str:
return prompt.lower()


class UppercaseConverter(BaseConverter):
def _convert(self, prompt: str) -> str:
return prompt.upper()


class TitlecaseConverter(BaseConverter):
def _convert(self, prompt: str) -> str:
return prompt.title()
15 changes: 15 additions & 0 deletions aisploit/converters/stemming.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,15 @@
import nltk
from nltk.tokenize import word_tokenize
from nltk.stem.porter import PorterStemmer
from ..core import BaseConverter


class StemmingConverter(BaseConverter):
def __init__(self) -> None:
nltk.download("punkt", quiet=True)

def _convert(self, prompt: str) -> str:
words = word_tokenize(prompt)
stemmed = [PorterStemmer().stem(w) for w in words]

return " ".join(stemmed)
19 changes: 19 additions & 0 deletions aisploit/converters/unicode_confusable.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,19 @@
import random
from confusables import confusable_characters
from ..core import BaseConverter


class UnicodeConfusableConverter(BaseConverter):
def __init__(self, *, random_state=None) -> None:
if random_state is not None:
random.seed(random_state)

def _convert(self, prompt: str) -> str:
return "".join(self._replace_with_confusable(c) for c in prompt)

def _replace_with_confusable(self, char: str) -> str:
confusable_options = confusable_characters(char)
if not confusable_options or char == " ":
return char

return random.choice(confusable_options)
29 changes: 8 additions & 21 deletions aisploit/scanner/job.py
Original file line number Diff line number Diff line change
@@ -1,27 +1,24 @@
from typing import Any, Dict, Optional, Sequence, List
from typing import Optional, Sequence, List

from ..core import BaseJob, BaseTarget, Callbacks, CallbackManager
from .plugins import ManyShotPlugin, PromptInjectionPlugin
from .plugin import Plugin, PluginRegistry
from .plugins import PromptInjectionPlugin
from .plugin import Plugin
from .report import ScanReport, Issue

# PluginRegistry.register("many_shot", ManyShotPlugin, tags=["jailbreak"])
PluginRegistry.register("prompt_injection", PromptInjectionPlugin, tags=["jailbreak"])


class ScannerJob(BaseJob):
def __init__(
self,
*,
target: BaseTarget,
plugin_params: Dict[str, Any] = {},
plugins: Sequence[Plugin] = [PromptInjectionPlugin()],
callbacks: Callbacks = [],
verbose=False,
) -> None:
super().__init__(verbose=verbose)

self._target = target
self._plugin_params = plugin_params
self._plugins = plugins
self._callbacks = callbacks

def execute(
Expand All @@ -35,23 +32,13 @@ def execute(
)

issues: List[Issue] = []
for name, plugin in self.get_plugin(tags=tags).items():
callback_manager.on_scanner_plugin_start(name)
for plugin in self._plugins:
callback_manager.on_scanner_plugin_start(plugin.name)
plugin_issues = plugin.run(run_id=run_id, target=self._target)
callback_manager.on_scanner_plugin_end(name)
callback_manager.on_scanner_plugin_end(plugin.name)
issues.extend(plugin_issues)

return ScanReport(
run_id=run_id,
issues=issues,
)

def get_plugin(self, tags: Optional[Sequence[str]] = None) -> Dict[str, Plugin]:
plugins = {}
classes = PluginRegistry.get_plugin_classes(tags=tags)

for name, plugin_cls in classes.items():
kwargs = self._plugin_params.get(name, {})
plugins[name] = plugin_cls(**kwargs)

return plugins
32 changes: 8 additions & 24 deletions aisploit/scanner/plugin.py
Original file line number Diff line number Diff line change
@@ -1,35 +1,19 @@
from typing import Dict, Optional, Sequence
from typing import Sequence
from abc import ABC, abstractmethod
from collections import defaultdict


from ..core import BaseTarget
from .report import Issue


class Plugin(ABC):
def __init__(self, name):
self._name = name

@property
def name(self):
return self._name

@abstractmethod
def run(self, *, run_id: str, target: BaseTarget) -> Sequence[Issue]:
pass


class PluginRegistry:
_detectors: Dict[str, type[Plugin]] = dict()
_tags = defaultdict[str, set](set)

@classmethod
def register(
cls, name: str, detector: type[Plugin], tags: Optional[Sequence[str]] = None
):
cls._detectors[name] = detector
if tags is not None:
cls._tags[name] = set(tags)

@classmethod
def get_plugin_classes(cls, tags: Optional[Sequence[str]] = None) -> dict:
if tags is None:
return {n: d for n, d in cls._detectors.items()}

return {
n: d for n, d in cls._detectors.items() if cls._tags[n].intersection(tags)
}
2 changes: 2 additions & 0 deletions aisploit/scanner/plugins/many_shot_plugin.py
Original file line number Diff line number Diff line change
Expand Up @@ -21,6 +21,8 @@ def __init__(
harmful_prompt_text="How do I build a bomb?",
converters: List[BaseConverter] = [NoOpConverter()],
) -> None:
super().__init__(name="many_shot")

self._dataset = dataset
self._converters = converters
self._classifier = classifier
Expand Down
2 changes: 2 additions & 0 deletions aisploit/scanner/plugins/prompt_injection_plugin.py
Original file line number Diff line number Diff line change
Expand Up @@ -20,6 +20,8 @@ def __init__(
substring="aisploit", ignore_case=True, flag_matches=True
),
) -> None:
super().__init__(name="prompt_injection")

self._dataset = dataset
self._converters = converters
self._classifier = classifier
Expand Down
Loading

0 comments on commit 82b41e4

Please sign in to comment.