Skip to content

Commit

Permalink
Refactor
Browse files Browse the repository at this point in the history
  • Loading branch information
hupe1980 committed Apr 4, 2024
1 parent 45c6f35 commit 43823f0
Show file tree
Hide file tree
Showing 21 changed files with 133 additions and 100 deletions.
Original file line number Diff line number Diff line change
@@ -1,4 +1,5 @@
from ...core import BaseClassifier


class HubPromptIjectionIdentifier(BaseClassifier):
pass
Original file line number Diff line number Diff line change
Expand Up @@ -6,6 +6,7 @@

from ...core import BaseClassifier, Score


class PipelinePromptInjectionIdentifier(BaseClassifier):
def __init__(
self,
Expand Down
10 changes: 7 additions & 3 deletions aisploit/converter/__init__.py
Original file line number Diff line number Diff line change
@@ -1,9 +1,13 @@
from .base64 import Base64Converter
from .keyboard_typo import KeyboardTypoConverter, KEYBOARD_NEIGHBORS_QWERTY, KEYBOARD_NEIGHBORS_QWERTZ
from .keyboard_typo import (
KeyboardTypoConverter,
KEYBOARD_NEIGHBORS_QWERTY,
KEYBOARD_NEIGHBORS_QWERTZ,
)

__all__ = [
"Base64Converter",
"KeyboardTypoConverter",
"KEYBOARD_NEIGHBORS_QWERTY",
"KEYBOARD_NEIGHBORS_QWERTZ"
]
"KEYBOARD_NEIGHBORS_QWERTZ",
]
6 changes: 5 additions & 1 deletion aisploit/converter/base64.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,10 @@

from ..core import BaseConverter


class Base64Converter(BaseConverter):
def convert(self, prompts: list[str]) -> list[str]:
return [base64.b64encode(prompt.encode("utf-8")).decode("utf-8") for prompt in prompts]
return [
base64.b64encode(prompt.encode("utf-8")).decode("utf-8")
for prompt in prompts
]
122 changes: 61 additions & 61 deletions aisploit/converter/keyboard_typo.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,70 +3,67 @@
from ..core import BaseConverter

KEYBOARD_NEIGHBORS_QWERTZ = {
'q': ['w', 'a', 's'],
'w': ['q', 'e', 's', 'd'],
'e': ['w', 'r', 'd', 'f'],
'r': ['e', 't', 'f', 'g'],
't': ['r', 'z', 'g', 'h'],
'z': ['t', 'u', 'h', 'j'],
'u': ['z', 'i', 'j', 'k'],
'i': ['u', 'o', 'k', 'l'],
'o': ['i', 'p', 'l'],

'a': ['q', 's', 'y'],
's': ['a', 'w', 'e', 'd', 'x', 'y'],
'd': ['s', 'e', 'r', 'f', 'c', 'x'],
'f': ['d', 'r', 't', 'g', 'v', 'c'],
'g': ['f', 't', 'z', 'h', 'b', 'v'],
'h': ['g', 'z', 'u', 'j', 'n', 'b'],
'j': ['h', 'u', 'i', 'k', 'm', 'n'],
'k': ['j', 'i', 'o', 'l', 'm'],
'l': ['k', 'o', 'p'],

'y': ['a', 's', 'x'],
'x': ['z', 's', 'd', 'c'],
'c': ['x', 'd', 'f', 'v'],
'v': ['c', 'f', 'g', 'b'],
'b': ['v', 'g', 'h', 'n'],
'n': ['b', 'h', 'j', 'm'],
'm': ['n', 'j', 'k'],
"q": ["w", "a", "s"],
"w": ["q", "e", "s", "d"],
"e": ["w", "r", "d", "f"],
"r": ["e", "t", "f", "g"],
"t": ["r", "z", "g", "h"],
"z": ["t", "u", "h", "j"],
"u": ["z", "i", "j", "k"],
"i": ["u", "o", "k", "l"],
"o": ["i", "p", "l"],
"a": ["q", "s", "y"],
"s": ["a", "w", "e", "d", "x", "y"],
"d": ["s", "e", "r", "f", "c", "x"],
"f": ["d", "r", "t", "g", "v", "c"],
"g": ["f", "t", "z", "h", "b", "v"],
"h": ["g", "z", "u", "j", "n", "b"],
"j": ["h", "u", "i", "k", "m", "n"],
"k": ["j", "i", "o", "l", "m"],
"l": ["k", "o", "p"],
"y": ["a", "s", "x"],
"x": ["z", "s", "d", "c"],
"c": ["x", "d", "f", "v"],
"v": ["c", "f", "g", "b"],
"b": ["v", "g", "h", "n"],
"n": ["b", "h", "j", "m"],
"m": ["n", "j", "k"],
}

KEYBOARD_NEIGHBORS_QWERTY = {
'q': ['w', 'a', 's'],
'w': ['q', 'a', 's', 'd', 'e'],
'e': ['w', 's', 'd', 'f', 'r'],
'r': ['e', 'd', 'f', 'g', 't'],
't': ['r', 'f', 'g', 'h', 'y'],
'y': ['t', 'g', 'h', 'j', 'u'],
'u': ['y', 'h', 'j', 'k', 'i'],
'i': ['u', 'j', 'k', 'l', 'o'],
'o': ['i', 'k', 'l', 'p'],
'p': ['o', 'l'],

'a': ['q', 'w', 's', 'z'],
's': ['q', 'w', 'e', 'a', 'd', 'z', 'x'],
'd': ['w', 'e', 'r', 's', 'f', 'x', 'c'],
'f': ['e', 'r', 't', 'd', 'g', 'c', 'v'],
'g': ['r', 't', 'y', 'f', 'h', 'v', 'b'],
'h': ['t', 'y', 'u', 'g', 'j', 'b', 'n'],
'j': ['y', 'u', 'i', 'h', 'k', 'n', 'm'],
'k': ['u', 'i', 'o', 'j', 'l', 'm'],
'l': ['i', 'o', 'p', 'k'],

'z': ['a', 's', 'x'],
'x': ['z', 's', 'd', 'c'],
'c': ['x', 'd', 'f', 'v'],
'v': ['c', 'f', 'g', 'b'],
'b': ['v', 'g', 'h', 'n'],
'n': ['b', 'h', 'j', 'm'],
'm': ['n', 'j', 'k'],
"q": ["w", "a", "s"],
"w": ["q", "a", "s", "d", "e"],
"e": ["w", "s", "d", "f", "r"],
"r": ["e", "d", "f", "g", "t"],
"t": ["r", "f", "g", "h", "y"],
"y": ["t", "g", "h", "j", "u"],
"u": ["y", "h", "j", "k", "i"],
"i": ["u", "j", "k", "l", "o"],
"o": ["i", "k", "l", "p"],
"p": ["o", "l"],
"a": ["q", "w", "s", "z"],
"s": ["q", "w", "e", "a", "d", "z", "x"],
"d": ["w", "e", "r", "s", "f", "x", "c"],
"f": ["e", "r", "t", "d", "g", "c", "v"],
"g": ["r", "t", "y", "f", "h", "v", "b"],
"h": ["t", "y", "u", "g", "j", "b", "n"],
"j": ["y", "u", "i", "h", "k", "n", "m"],
"k": ["u", "i", "o", "j", "l", "m"],
"l": ["i", "o", "p", "k"],
"z": ["a", "s", "x"],
"x": ["z", "s", "d", "c"],
"c": ["x", "d", "f", "v"],
"v": ["c", "f", "g", "b"],
"b": ["v", "g", "h", "n"],
"n": ["b", "h", "j", "m"],
"m": ["n", "j", "k"],
}


class KeyboardTypoConverter(BaseConverter):
def __init__(
self,
*,
self,
*,
keyboard_neighbors=KEYBOARD_NEIGHBORS_QWERTY,
typo_probability=0.1,
random_state=None,
Expand All @@ -75,13 +72,16 @@ def __init__(
self._typo_probability = typo_probability
if random_state is not None:
random.seed(random_state)

def convert(self, prompts: list[str]) -> list[str]:
typoPrompts = []
for prompt in prompts:
typoPrompt = ""
for char in prompt:
if random.random() < self._typo_probability and char.lower() in self._keyboard_neighbors:
if (
random.random() < self._typo_probability
and char.lower() in self._keyboard_neighbors
):
# Replace the character with a random neighboring key
neighbor_keys = self._keyboard_neighbors[char.lower()]
typo_char = random.choice(neighbor_keys)
Expand All @@ -92,5 +92,5 @@ def convert(self, prompts: list[str]) -> list[str]:
typoPrompt += char

typoPrompts.append(typoPrompt)
return typoPrompts

return typoPrompts
6 changes: 4 additions & 2 deletions aisploit/core/callbacks.py
Original file line number Diff line number Diff line change
Expand Up @@ -8,11 +8,13 @@ def on_redteam_attempt(self, attempt: int, prompt: str):
def on_redteam_attempt_response(self, attempt: int, response: str):
pass


Callbacks = List[BaseCallbackHandler]


class CallbackManager:
def __init__(
self,
self,
*,
id: str,
callbacks: List[BaseCallbackHandler] = [],
Expand All @@ -26,4 +28,4 @@ def on_redteam_attempt(self, attempt: int, prompt: str):

def on_redteam_attempt_response(self, attempt: int, response: str):
for cb in self._callbacks:
cb.on_redteam_attempt_response(attempt, response)
cb.on_redteam_attempt_response(attempt, response)
4 changes: 3 additions & 1 deletion aisploit/core/classifier.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,15 +2,17 @@
from typing import Literal
from dataclasses import dataclass


@dataclass
class Score:
score_type: Literal["int", "float", "str", "bool"]
score_value: int | float | str | bool
score_description: str = ""
score_explanation: str = ""


class BaseClassifier(ABC):
@abstractmethod
def score_text(self, text: str) -> Score:
"""Score the text and return a Score object."""
raise NotImplementedError("score_text method not implemented")
raise NotImplementedError("score_text method not implemented")
3 changes: 2 additions & 1 deletion aisploit/core/converter.py
Original file line number Diff line number Diff line change
@@ -1,6 +1,7 @@
from abc import ABC, abstractmethod


class BaseConverter(ABC):
@abstractmethod
def convert(self, prompts: list[str]) -> list[str]:
pass
pass
1 change: 1 addition & 0 deletions aisploit/core/job.py
Original file line number Diff line number Diff line change
@@ -1,5 +1,6 @@
from abc import ABC


class BaseJob(ABC):
def __init__(self, *, verbose=False) -> None:
self.verbose = verbose
5 changes: 4 additions & 1 deletion aisploit/core/model.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,14 +5,17 @@
from langchain_core.runnables import Runnable
from langchain_core.embeddings import Embeddings


class BaseLLM(Runnable[LanguageModelInput, str]):
pass


class BaseChatModel(Runnable[LanguageModelInput, BaseMessage]):
@abstractmethod
def supports_functions(self) -> bool:
pass


BaseModel = Union[BaseLLM, BaseChatModel]

BaseEmbeddings = Embeddings
BaseEmbeddings = Embeddings
3 changes: 2 additions & 1 deletion aisploit/core/target.py
Original file line number Diff line number Diff line change
@@ -1,6 +1,7 @@
from abc import ABC, abstractmethod


class BaseTarget(ABC):
@abstractmethod
def send_prompt(self, prompt: str) -> str:
pass
pass
2 changes: 1 addition & 1 deletion aisploit/dataset/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,4 +2,4 @@

__all__ = [
"Dataset",
]
]
4 changes: 2 additions & 2 deletions aisploit/dataset/dataset.py
Original file line number Diff line number Diff line change
@@ -1,3 +1,3 @@
class Dataset():
class Dataset:
def __init__(self) -> None:
pass
pass
3 changes: 1 addition & 2 deletions aisploit/demo/gandalf.py
Original file line number Diff line number Diff line change
Expand Up @@ -7,8 +7,7 @@
from langchain_core.callbacks.manager import CallbackManagerForLLMRun
from langchain_core.messages import SystemMessage, HumanMessage

from aisploit.core import BaseChatModel
from aisploit.classifier import BaseClassifier, Score
from aisploit.core import BaseChatModel, BaseClassifier, Score

GANDALF_API_ENDPOINT = "https://gandalf.lakera.ai/api"

Expand Down
2 changes: 1 addition & 1 deletion aisploit/embedding/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,6 @@
from .openai import OpenAIEmbeddings

__all__ = [
"OllamaEmbeddings",
"OllamaEmbeddings",
"OpenAIEmbeddings",
]
2 changes: 1 addition & 1 deletion aisploit/llm/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,6 @@
from .chat_openai import ChatOpenAI

__all__ = [
"ChatOllama",
"ChatOllama",
"ChatOpenAI",
]
1 change: 1 addition & 0 deletions aisploit/llm/chat_openai.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,6 +4,7 @@

from ..core import BaseChatModel


class ChatOpenAI(LangchainChatOpenAI, BaseChatModel):
"""
Wrapper class for interacting with the OpenAI API for chat-based models.
Expand Down
30 changes: 18 additions & 12 deletions aisploit/redteam/job.py
Original file line number Diff line number Diff line change
@@ -1,12 +1,18 @@
from typing import List

from .bot import RedTeamingBot
from ..core import BaseJob, BaseTarget, BaseCallbackHandler, CallbackManager
from ..classifier import BaseClassifier
from ..core import (
BaseClassifier,
BaseJob,
BaseTarget,
BaseCallbackHandler,
CallbackManager,
)


class RedTeamJob(BaseJob):
def __init__(
self,
self,
*,
bot: RedTeamingBot,
target: BaseTarget,
Expand All @@ -16,7 +22,7 @@ def __init__(
verbose=False,
) -> None:
super().__init__(verbose=verbose)

self._bot = bot
self._target = target
self._classifier = classifier
Expand All @@ -29,24 +35,24 @@ def __init__(
@property
def conversation_id(self):
return self._bot.conversation_id

def execute(self, max_attempt=5, clear_history=True):
if clear_history:
self._bot.clear_history()

current_prompt = self._initial_prompt
for attempt in range(1, max_attempt+1):

for attempt in range(1, max_attempt + 1):
current_prompt = self._bot.invoke(current_prompt)

self._callback_manager.on_redteam_attempt(attempt, current_prompt)

response = self._target.send_prompt(current_prompt)
score = self._classifier.score_text(text=response)

self._callback_manager.on_redteam_attempt_response(attempt, response)


current_prompt = response

if score.score_value:
return score


Loading

0 comments on commit 43823f0

Please sign in to comment.