Skip to content

Commit

Permalink
Add presidio classifier
Browse files Browse the repository at this point in the history
  • Loading branch information
hupe1980 committed Apr 21, 2024
1 parent 6d258f0 commit 0902622
Show file tree
Hide file tree
Showing 5 changed files with 674 additions and 73 deletions.
5 changes: 5 additions & 0 deletions aisploit/classifiers/presidio/__init__.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,5 @@
from .presidio_analyser import PresidioAnalyserClassifier

__all__ = [
"PresidioAnalyserClassifier",
]
30 changes: 30 additions & 0 deletions aisploit/classifiers/presidio/presidio_analyser.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,30 @@
from dataclasses import dataclass
from typing import List

from presidio_analyzer import AnalyzerEngine, RecognizerResult

from ...core import BaseTextClassifier, Score


@dataclass
class PresidioAnalyserClassifier(BaseTextClassifier[List[RecognizerResult]]):
language: str = "en"
entities: List[str] | None = None
threshold: float = 0.7

def __post_init__(self) -> None:
# Set up the engine, loads the NLP module (spaCy model by default) and other PII recognizers
self._analyzer = AnalyzerEngine(default_score_threshold=self.threshold)

def score(self, input: str) -> Score[List[RecognizerResult]]:
# Call analyzer to get results
results = self._analyzer.analyze(text=input, entities=self.entities, language=self.language)

return Score[List[RecognizerResult]](
flagged=len(results) > 0,
value=results,
description="Return True if entities are found in the input",
explanation=(
f"Found {len(results)} entities in input" if len(results) > 0 else "Did not find entities in input"
),
)
31 changes: 30 additions & 1 deletion examples/classifier.ipynb
Original file line number Diff line number Diff line change
Expand Up @@ -26,12 +26,41 @@
"source": [
"import textwrap\n",
"from dotenv import load_dotenv\n",
"from aisploit.classifiers.presidio import PresidioAnalyserClassifier\n",
"from aisploit.classifiers.huggingface import PipelinePromptInjectionIdentifier\n",
"from aisploit.classifiers.openai import ModerationClassifier\n",
"\n",
"load_dotenv()"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"## Presidio"
]
},
{
"cell_type": "code",
"execution_count": 3,
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"Score(flagged=True, value=[type: PERSON, start: 11, end: 19, score: 0.85, type: PHONE_NUMBER, start: 43, end: 55, score: 0.75], description='Return True if entities are found in the input', explanation='Found 2 entities in input')"
]
},
"execution_count": 3,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"classifier = PresidioAnalyserClassifier()\n",
"classifier.score(\"My name is John Doo and my phone number is 212-555-5555\")"
]
},
{
"cell_type": "markdown",
"metadata": {},
Expand Down Expand Up @@ -81,7 +110,7 @@
{
"data": {
"text/plain": [
"Score(flagged=True, value=Moderation(categories=Categories(harassment=True, harassment_threatening=True, hate=False, hate_threatening=False, self_harm=False, self_harm_instructions=False, self_harm_intent=False, sexual=False, sexual_minors=False, violence=True, violence_graphic=False, self-harm=False, sexual/minors=False, hate/threatening=False, violence/graphic=False, self-harm/intent=False, self-harm/instructions=False, harassment/threatening=True), category_scores=CategoryScores(harassment=0.45708343386650085, harassment_threatening=0.35233721137046814, hate=0.0006805699085816741, hate_threatening=4.209355392958969e-05, self_harm=4.824657480639871e-06, self_harm_instructions=3.298543660434916e-08, self_harm_intent=2.0117977328482084e-06, sexual=4.9561123887542635e-05, sexual_minors=1.9911001913897053e-07, violence=0.9988710284233093, violence_graphic=1.047616660798667e-05, self-harm=4.824657480639871e-06, sexual/minors=1.9911001913897053e-07, hate/threatening=4.209355392958969e-05, violence/graphic=1.047616660798667e-05, self-harm/intent=2.0117977328482084e-06, self-harm/instructions=3.298543660434916e-08, harassment/threatening=0.35233721137046814), flagged=True), description='Moderation score for the given input', explanation='Details about the moderation score')"
"Score(flagged=True, value=Moderation(categories=Categories(harassment=True, harassment_threatening=True, hate=False, hate_threatening=False, self_harm=False, self_harm_instructions=False, self_harm_intent=False, sexual=False, sexual_minors=False, violence=True, violence_graphic=False, self-harm=False, sexual/minors=False, hate/threatening=False, violence/graphic=False, self-harm/intent=False, self-harm/instructions=False, harassment/threatening=True), category_scores=CategoryScores(harassment=0.4573294222354889, harassment_threatening=0.35159170627593994, hate=0.0006792626227252185, hate_threatening=4.232471837894991e-05, self_harm=4.82136874779826e-06, self_harm_instructions=3.341407150969644e-08, self_harm_intent=2.0083894014533143e-06, sexual=4.86759927298408e-05, sexual_minors=1.9414277119267354e-07, violence=0.9988717436790466, violence_graphic=1.050253467838047e-05, self-harm=4.82136874779826e-06, sexual/minors=1.9414277119267354e-07, hate/threatening=4.232471837894991e-05, violence/graphic=1.050253467838047e-05, self-harm/intent=2.0083894014533143e-06, self-harm/instructions=3.341407150969644e-08, harassment/threatening=0.35159170627593994), flagged=True), description='Moderation score for the given input', explanation='Details about the moderation score')"
]
},
"execution_count": 3,
Expand Down
Loading

0 comments on commit 0902622

Please sign in to comment.