Skip to content

Commit

Permalink
Misc
Browse files Browse the repository at this point in the history
  • Loading branch information
hupe1980 committed Apr 7, 2024
1 parent fb5b8ad commit 0f67f09
Show file tree
Hide file tree
Showing 27 changed files with 350 additions and 143 deletions.
6 changes: 2 additions & 4 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -30,9 +30,7 @@ from aisploit.demo import GandalfBot, GandalfLevel, GandalfScorer
def play_game(level: GandalfLevel, max_attempt=5) -> None:
print(f"Starting Level {level.value} - {level.description}\n")

chat_model = ChatOpenAI(
api_key=os.getenv("OPENAI_API_KEY"),
)
chat_model = ChatOpenAI()

gandalf_bot = GandalfBot(level=level)
gandalf_scorer = GandalfScorer(level=level, chat_model=chat_model)
Expand Down Expand Up @@ -73,7 +71,7 @@ def play_game(level: GandalfLevel, max_attempt=5) -> None:
)

report = job.execute(initial_prompt_text=level.description, max_attempt=max_attempt)
if report.final_score.score_value:
if report.final_score.flagged:
print(f"✅ Password: {report.final_score.score_value}")
else:
print("❌ Failed!")
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -45,6 +45,7 @@ def score_text(self, text: str) -> Score:
)

return Score(
flagged=score > self._threshold,
score_type="float",
score_value=score,
score_description="Prompt injection detection score",
Expand Down
3 changes: 2 additions & 1 deletion aisploit/core/classifier.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,8 +3,9 @@
from dataclasses import dataclass


@dataclass
@dataclass(frozen=True)
class Score:
flagged: bool
score_type: Literal["int", "float", "str", "bool"]
score_value: int | float | str | bool
score_description: str = ""
Expand Down
16 changes: 14 additions & 2 deletions aisploit/core/report.py
Original file line number Diff line number Diff line change
@@ -1,12 +1,24 @@
from typing import Any
from typing import Any, Generic, TypeVar, List
from abc import ABC, abstractmethod
from pathlib import Path
from jinja2 import Template


class BaseReport(ABC):
T = TypeVar("T")


class BaseReport(Generic[T], ABC):
_entries: List[T]

def __init__(self, *, run_id: str) -> None:
self.run_id = run_id
self._entries = []

def __iter__(self):
return iter(self._entries)

def __len__(self):
return len(self._entries)

@abstractmethod
def _ipython_display_(self):
Expand Down
9 changes: 7 additions & 2 deletions aisploit/dataset/__init__.py
Original file line number Diff line number Diff line change
@@ -1,7 +1,12 @@
from .dataset import Dataset, JailbreakDataset, Prompt
from .dataset import Dataset
from .prompt import PromptDataset, Prompt, JailbreakPromptDataset
from .sample import SampleDataset, Sample

__all__ = [
"Dataset",
"JailbreakDataset",
"PromptDataset",
"Prompt",
"JailbreakPromptDataset",
"SampleDataset",
"Sample",
]
66 changes: 14 additions & 52 deletions aisploit/dataset/dataset.py
Original file line number Diff line number Diff line change
@@ -1,14 +1,24 @@
import abc
import os
from abc import ABC
import yaml
from pathlib import Path
from typing import Generic, Type, TypeVar, Sequence
from dataclasses import dataclass

T = TypeVar("T")


class YamlDeserializable(abc.ABC):
class Dataset(Generic[T]):
"""Generic dataset class."""

_prompts: Sequence[T]

def __iter__(self):
return iter(self._prompts)

def __len__(self):
return len(self._prompts)


class YamlDeserializable(ABC):
"""Abstract base class for objects that can be deserialized from YAML."""

@classmethod
Expand Down Expand Up @@ -37,51 +47,3 @@ def from_yaml_file(cls: Type[T], file: Path) -> T:
raise ValueError(f"Invalid YAML file '{file}': {exc}")

return cls(**yaml_data)


@dataclass
class Prompt(YamlDeserializable):
"""A prompt configuration."""

name: str
skip: bool
source: str
language: str
tags: Sequence[str]
parameters: Sequence[str]
template: str


JAILBREAK_PROMPTS_PATH = Path(__file__, "..", "jailbreak").resolve()


class Dataset(Generic[T]):
"""Generic dataset class."""

_prompts: Sequence[T]

def __iter__(self):
return iter(self._prompts)

def __len__(self):
return len(self._prompts)


class JailbreakDataset(Dataset[Prompt]):
"""Dataset for jailbreak prompts."""

def __init__(
self,
*,
path=JAILBREAK_PROMPTS_PATH,
) -> None:
"""Initialize the JailbreakDataset.
Args:
path (str): The path to the directory containing prompt YAML files.
"""
self._prompts = []
for file_name in os.listdir(path):
prompt = Prompt.from_yaml_file(path / file_name)
if not prompt.skip:
self._prompts.append(prompt)
58 changes: 58 additions & 0 deletions aisploit/dataset/prompt.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,58 @@
import os
from pathlib import Path
from typing import Sequence, Optional
from dataclasses import dataclass

from .dataset import Dataset, YamlDeserializable


@dataclass
class Prompt(YamlDeserializable):
"""A prompt configuration."""

name: str
skip: bool
source: str
language: str
tags: Sequence[str]
parameters: Sequence[str]
template: str


class PromptDataset(Dataset[Prompt]):
"""Dataset for prompts."""

def __init__(self, prompts: Sequence[Prompt]) -> None:
"""Initialize the PromptDataset with a sequence of prompts.
Args:
prompts (Sequence[Prompt]): The prompts to initialize the dataset with.
"""
self._prompts = prompts

@classmethod
def load_from_directory(
cls, path: Path, tags_filter: Optional[Sequence[str]] = None
) -> "PromptDataset":
"""Create a JailbreakDataset instance by loading prompts from a directory.
Args:
path (Path): The path to the directory containing prompt YAML files.
tags_filter (Sequence[str], optional): Tags to filter prompts. Defaults to None.
Returns:
JailbreakDataset: A dataset containing prompts loaded from the directory.
"""
prompts = []
for file_name in os.listdir(path):
prompt = Prompt.from_yaml_file(path / file_name)
if not prompt.skip and (
not tags_filter or set(prompt.tags).intersection(tags_filter)
):
prompts.append(prompt)
return cls(prompts)


JailbreakPromptDataset = PromptDataset.load_from_directory(
Path(__file__, "..", "prompts").resolve(), tags_filter=["jailbreak"]
)
File renamed without changes.
File renamed without changes.
51 changes: 51 additions & 0 deletions aisploit/dataset/sample.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,51 @@
import os
from typing import Sequence, Optional
from pathlib import Path
from dataclasses import dataclass

from .dataset import Dataset, YamlDeserializable


@dataclass
class Sample(YamlDeserializable):
"""A sample configuration."""

skip: bool
input: str
output: str
language: str
tags: Sequence[str]


class SampleDataset(Dataset[Sample]):
"""Dataset for samples."""

def __init__(self, samples: Sequence[Sample]) -> None:
"""Initialize the SampleDataset with a sequence of samples.
Args:
samples (Sequence[Sample]): The samples to initialize the dataset with.
"""
self._samples = samples

@classmethod
def load_from_directory(
cls, path: Path, tags_filter: Optional[Sequence[str]] = None
) -> "SampleDataset":
"""Create a SampleDataset instance by loading samples from a directory.
Args:
path (Path): The path to the directory containing sample YAML files.
tags_filter (Optional[Sequence[str]], optional): Tags to filter samples. Defaults to None.
Returns:
SampleDataset: A dataset containing samples loaded from the directory.
"""
samples = []
for file_name in os.listdir(path):
sample = Sample.from_yaml_file(path / file_name)
if not sample.skip and (
not tags_filter or set(sample.tags).intersection(tags_filter)
):
samples.append(sample)
return cls(samples)
3 changes: 3 additions & 0 deletions aisploit/demo/gandalf.py
Original file line number Diff line number Diff line change
Expand Up @@ -122,6 +122,7 @@ def score_text(self, text: str) -> Score:

if not extracted_password:
return Score(
flagged=False,
score_type="str",
score_value="",
score_description=score_description,
Expand All @@ -146,13 +147,15 @@ def score_text(self, text: str) -> Score:
if did_guess_password:
message = json_response["message"]
return Score(
flagged=True,
score_type="str",
score_description=score_description,
score_explanation=f"Password found in text. {message}",
score_value=extracted_password,
)
else:
return Score(
flagged=False,
score_type="str",
score_description=score_description,
score_explanation=f"Invalid password found in text. [value={extracted_password}]",
Expand Down
9 changes: 3 additions & 6 deletions aisploit/redteam/report.py
Original file line number Diff line number Diff line change
Expand Up @@ -11,19 +11,16 @@ class RedTeamReportEntry:
score: Score


class RedTeamReport(BaseReport):
entries: List[RedTeamReportEntry]

class RedTeamReport(BaseReport[RedTeamReportEntry]):
def __init__(self, *, run_id: str) -> None:
super().__init__(run_id=run_id)
self.entries = []

def add_entry(self, entry: RedTeamReportEntry):
self.entries.append(entry)
self._entries.append(entry)

@property
def final_score(self) -> Optional[Score]:
last_entry = self.entries[-1]
last_entry = self._entries[-1]
if last_entry:
return last_entry.score
return None
Expand Down
19 changes: 0 additions & 19 deletions aisploit/scanner/issue.py

This file was deleted.

8 changes: 4 additions & 4 deletions aisploit/scanner/job.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,11 +3,11 @@


from ..core import BaseJob, BaseTarget, Callbacks, CallbackManager
from .plugins import PromptInjectionPlugin
from .plugins import ManyShotPlugin, PromptInjectionPlugin
from .plugin import Plugin, PluginRegistry
from .issue import Issue
from .report import ScanReport
from .report import ScanReport, Issue

#PluginRegistry.register("many_shot", ManyShotPlugin, tags=["jailbreak"])
PluginRegistry.register("prompt_injection", PromptInjectionPlugin, tags=["jailbreak"])


Expand Down Expand Up @@ -40,7 +40,7 @@ def execute(
issues: List[Issue] = []
for name, plugin in self.get_plugin(tags=tags).items():
callback_manager.on_scanner_plugin_start(name)
plugin_issues = plugin.run(self._target)
plugin_issues = plugin.run(run_id=run_id, target=self._target)
callback_manager.on_scanner_plugin_end(name)
issues.extend(plugin_issues)

Expand Down
4 changes: 2 additions & 2 deletions aisploit/scanner/plugin.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,12 +4,12 @@


from ..core import BaseTarget
from .issue import Issue
from .report import Issue


class Plugin(ABC):
@abstractmethod
def run(self, target: BaseTarget) -> Sequence[Issue]:
def run(self, *, run_id: str, target: BaseTarget) -> Sequence[Issue]:
pass


Expand Down
6 changes: 5 additions & 1 deletion aisploit/scanner/plugins/__init__.py
Original file line number Diff line number Diff line change
@@ -1,3 +1,7 @@
from .many_shot_plugin import ManyShotPlugin
from .prompt_injection_plugin import PromptInjectionPlugin

__all__ = ["PromptInjectionPlugin"]
__all__ = [
"ManyShotPlugin",
"PromptInjectionPlugin",
]
Loading

0 comments on commit 0f67f09

Please sign in to comment.