From e4e39823fe211a5ec0f4e22d7e72948f50866897 Mon Sep 17 00:00:00 2001 From: hupe1980 Date: Mon, 22 Apr 2024 22:56:20 +0200 Subject: [PATCH] Add self similarity classifier --- aisploit/classifiers/__init__.py | 2 + aisploit/classifiers/self_similarity.py | 50 ++++++++++ examples/classifier.ipynb | 29 ++++++ poetry.lock | 121 +++++++++++++++++++++++- pyproject.toml | 1 + 5 files changed, 202 insertions(+), 1 deletion(-) create mode 100644 aisploit/classifiers/self_similarity.py diff --git a/aisploit/classifiers/__init__.py b/aisploit/classifiers/__init__.py index 683f51f..e757d88 100644 --- a/aisploit/classifiers/__init__.py +++ b/aisploit/classifiers/__init__.py @@ -1,5 +1,6 @@ from .markdown import MarkdownInjectionClassifier from .package_hallucination import PythonPackageHallucinationClassifier +from .self_similarity import SelfSimilarityClassifier from .text import RegexClassifier, SubstringClassifier, TextTokenClassifier __all__ = [ @@ -7,5 +8,6 @@ "PythonPackageHallucinationClassifier", "RegexClassifier", "SubstringClassifier", + "SelfSimilarityClassifier", "TextTokenClassifier", ] diff --git a/aisploit/classifiers/self_similarity.py b/aisploit/classifiers/self_similarity.py new file mode 100644 index 0000000..0c1638b --- /dev/null +++ b/aisploit/classifiers/self_similarity.py @@ -0,0 +1,50 @@ +from dataclasses import dataclass, field +from typing import List + +from sentence_transformers import SentenceTransformer +from sentence_transformers.util import cos_sim + +from ..core import BaseTextClassifier, Score + + +@dataclass +class SelfSimilarityClassifier(BaseTextClassifier[float]): + """A text classifier based on self-similarity using cosine similarity scores.""" + + model_name_or_path: str = "all-MiniLM-L6-v2" + threshold: float = 0.7 + tags: List[str] = field(default_factory=lambda: ["hallucination"], init=False) + + def __post_init__(self) -> None: + """Initialize the SentenceTransformer model.""" + self._model = SentenceTransformer(self.model_name_or_path) + + def score(self, input: str, references: List[str] | None = None) -> Score[float]: + """Score the input text based on its self-similarity to reference texts. + + Args: + input (str): The input text to be scored. + references (List[str], optional): List of reference texts. Defaults to None. + + Raises: + ValueError: If references is None or if the number of references is not at least 1. + + Returns: + Score[float]: A Score object representing the self-similarity score of the input. + """ + if not references or not len(references) >= 1: + raise ValueError("The number of references must be at least 1.") + + input_embeddings = self._model.encode(input, convert_to_tensor=True) + references_embeddings = self._model.encode(references, convert_to_tensor=True) + + cos_scores = cos_sim(input_embeddings, references_embeddings)[0] + + score = cos_scores.mean() + + return Score[float]( + flagged=(score < self.threshold).item(), + value=score.item(), + description="Returns True if the cosine similarity score is less than the threshold", + explanation=f"The cosine similarity score for the input is {score}", + ) diff --git a/examples/classifier.ipynb b/examples/classifier.ipynb index e7f053c..83338d4 100644 --- a/examples/classifier.ipynb +++ b/examples/classifier.ipynb @@ -26,6 +26,7 @@ "source": [ "import textwrap\n", "from dotenv import load_dotenv\n", + "from aisploit.classifiers import SelfSimilarityClassifier\n", "from aisploit.classifiers.presidio import PresidioAnalyserClassifier\n", "from aisploit.classifiers.huggingface import (\n", " BleuClassifier,\n", @@ -41,6 +42,34 @@ "load_dotenv()" ] }, + { + "cell_type": "code", + "execution_count": 2, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "Score(flagged=True, value=0.20951247215270996, description='Returns True if the cosine similarity score is less than the threshold', explanation='The cosine similarity score for the input is 0.20951247215270996')" + ] + }, + "execution_count": 2, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "classifier = SelfSimilarityClassifier()\n", + "classifier.score(\n", + " \"The sky turned green and the trees began to sing in a chorus of laughter.\", \n", + " [\n", + " \"As I looked around, I noticed that the buildings were made of candy, and the streets were paved with shimmering gold.\",\n", + " \"I found myself in a library unlike any other, where the books flew off the shelves and started telling stories of their own.\",\n", + " \"Every person I met had wings, and they soared through the air with grace and elegance, leaving trails of glitter in their wake.\",\n", + " ],\n", + ")" + ] + }, { "cell_type": "markdown", "metadata": {}, diff --git a/poetry.lock b/poetry.lock index ed32137..42cae71 100644 --- a/poetry.lock +++ b/poetry.lock @@ -5940,6 +5940,90 @@ tensorflow = ["safetensors[numpy]", "tensorflow (>=2.11.0)"] testing = ["h5py (>=3.7.0)", "huggingface-hub (>=0.12.1)", "hypothesis (>=6.70.2)", "pytest (>=7.2.0)", "pytest-benchmark (>=4.0.0)", "safetensors[numpy]", "setuptools-rust (>=1.5.2)"] torch = ["safetensors[numpy]", "torch (>=1.10)"] +[[package]] +name = "scikit-learn" +version = "1.4.2" +description = "A set of python modules for machine learning and data mining" +optional = false +python-versions = ">=3.9" +files = [ + {file = "scikit-learn-1.4.2.tar.gz", hash = "sha256:daa1c471d95bad080c6e44b4946c9390a4842adc3082572c20e4f8884e39e959"}, + {file = "scikit_learn-1.4.2-cp310-cp310-macosx_10_9_x86_64.whl", hash = "sha256:8539a41b3d6d1af82eb629f9c57f37428ff1481c1e34dddb3b9d7af8ede67ac5"}, + {file = "scikit_learn-1.4.2-cp310-cp310-macosx_12_0_arm64.whl", hash = "sha256:68b8404841f944a4a1459b07198fa2edd41a82f189b44f3e1d55c104dbc2e40c"}, + {file = "scikit_learn-1.4.2-cp310-cp310-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:81bf5d8bbe87643103334032dd82f7419bc8c8d02a763643a6b9a5c7288c5054"}, + {file = "scikit_learn-1.4.2-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:36f0ea5d0f693cb247a073d21a4123bdf4172e470e6d163c12b74cbb1536cf38"}, + {file = "scikit_learn-1.4.2-cp310-cp310-win_amd64.whl", hash = "sha256:87440e2e188c87db80ea4023440923dccbd56fbc2d557b18ced00fef79da0727"}, + {file = "scikit_learn-1.4.2-cp311-cp311-macosx_10_9_x86_64.whl", hash = "sha256:45dee87ac5309bb82e3ea633955030df9bbcb8d2cdb30383c6cd483691c546cc"}, + {file = "scikit_learn-1.4.2-cp311-cp311-macosx_12_0_arm64.whl", hash = "sha256:1d0b25d9c651fd050555aadd57431b53d4cf664e749069da77f3d52c5ad14b3b"}, + {file = "scikit_learn-1.4.2-cp311-cp311-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:b0203c368058ab92efc6168a1507d388d41469c873e96ec220ca8e74079bf62e"}, + {file = "scikit_learn-1.4.2-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:44c62f2b124848a28fd695db5bc4da019287abf390bfce602ddc8aa1ec186aae"}, + {file = "scikit_learn-1.4.2-cp311-cp311-win_amd64.whl", hash = "sha256:5cd7b524115499b18b63f0c96f4224eb885564937a0b3477531b2b63ce331904"}, + {file = "scikit_learn-1.4.2-cp312-cp312-macosx_10_9_x86_64.whl", hash = "sha256:90378e1747949f90c8f385898fff35d73193dfcaec3dd75d6b542f90c4e89755"}, + {file = "scikit_learn-1.4.2-cp312-cp312-macosx_12_0_arm64.whl", hash = "sha256:ff4effe5a1d4e8fed260a83a163f7dbf4f6087b54528d8880bab1d1377bd78be"}, + {file = "scikit_learn-1.4.2-cp312-cp312-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:671e2f0c3f2c15409dae4f282a3a619601fa824d2c820e5b608d9d775f91780c"}, + {file = "scikit_learn-1.4.2-cp312-cp312-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:d36d0bc983336bbc1be22f9b686b50c964f593c8a9a913a792442af9bf4f5e68"}, + {file = "scikit_learn-1.4.2-cp312-cp312-win_amd64.whl", hash = "sha256:d762070980c17ba3e9a4a1e043ba0518ce4c55152032f1af0ca6f39b376b5928"}, + {file = "scikit_learn-1.4.2-cp39-cp39-macosx_10_9_x86_64.whl", hash = "sha256:d9993d5e78a8148b1d0fdf5b15ed92452af5581734129998c26f481c46586d68"}, + {file = "scikit_learn-1.4.2-cp39-cp39-macosx_12_0_arm64.whl", hash = "sha256:426d258fddac674fdf33f3cb2d54d26f49406e2599dbf9a32b4d1696091d4256"}, + {file = "scikit_learn-1.4.2-cp39-cp39-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:5460a1a5b043ae5ae4596b3126a4ec33ccba1b51e7ca2c5d36dac2169f62ab1d"}, + {file = "scikit_learn-1.4.2-cp39-cp39-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:49d64ef6cb8c093d883e5a36c4766548d974898d378e395ba41a806d0e824db8"}, + {file = "scikit_learn-1.4.2-cp39-cp39-win_amd64.whl", hash = "sha256:c97a50b05c194be9146d61fe87dbf8eac62b203d9e87a3ccc6ae9aed2dfaf361"}, +] + +[package.dependencies] +joblib = ">=1.2.0" +numpy = ">=1.19.5" +scipy = ">=1.6.0" +threadpoolctl = ">=2.0.0" + +[package.extras] +benchmark = ["matplotlib (>=3.3.4)", "memory-profiler (>=0.57.0)", "pandas (>=1.1.5)"] +docs = ["Pillow (>=7.1.2)", "matplotlib (>=3.3.4)", "memory-profiler (>=0.57.0)", "numpydoc (>=1.2.0)", "pandas (>=1.1.5)", "plotly (>=5.14.0)", "pooch (>=1.6.0)", "scikit-image (>=0.17.2)", "seaborn (>=0.9.0)", "sphinx (>=6.0.0)", "sphinx-copybutton (>=0.5.2)", "sphinx-gallery (>=0.15.0)", "sphinx-prompt (>=1.3.0)", "sphinxext-opengraph (>=0.4.2)"] +examples = ["matplotlib (>=3.3.4)", "pandas (>=1.1.5)", "plotly (>=5.14.0)", "pooch (>=1.6.0)", "scikit-image (>=0.17.2)", "seaborn (>=0.9.0)"] +tests = ["black (>=23.3.0)", "matplotlib (>=3.3.4)", "mypy (>=1.3)", "numpydoc (>=1.2.0)", "pandas (>=1.1.5)", "polars (>=0.19.12)", "pooch (>=1.6.0)", "pyamg (>=4.0.0)", "pyarrow (>=12.0.0)", "pytest (>=7.1.2)", "pytest-cov (>=2.9.0)", "ruff (>=0.0.272)", "scikit-image (>=0.17.2)"] + +[[package]] +name = "scipy" +version = "1.13.0" +description = "Fundamental algorithms for scientific computing in Python" +optional = false +python-versions = ">=3.9" +files = [ + {file = "scipy-1.13.0-cp310-cp310-macosx_10_9_x86_64.whl", hash = "sha256:ba419578ab343a4e0a77c0ef82f088238a93eef141b2b8017e46149776dfad4d"}, + {file = "scipy-1.13.0-cp310-cp310-macosx_12_0_arm64.whl", hash = "sha256:22789b56a999265431c417d462e5b7f2b487e831ca7bef5edeb56efe4c93f86e"}, + {file = "scipy-1.13.0-cp310-cp310-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:05f1432ba070e90d42d7fd836462c50bf98bd08bed0aa616c359eed8a04e3922"}, + {file = "scipy-1.13.0-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:b8434f6f3fa49f631fae84afee424e2483289dfc30a47755b4b4e6b07b2633a4"}, + {file = "scipy-1.13.0-cp310-cp310-musllinux_1_1_x86_64.whl", hash = "sha256:dcbb9ea49b0167de4167c40eeee6e167caeef11effb0670b554d10b1e693a8b9"}, + {file = "scipy-1.13.0-cp310-cp310-win_amd64.whl", hash = "sha256:1d2f7bb14c178f8b13ebae93f67e42b0a6b0fc50eba1cd8021c9b6e08e8fb1cd"}, + {file = "scipy-1.13.0-cp311-cp311-macosx_10_9_x86_64.whl", hash = "sha256:0fbcf8abaf5aa2dc8d6400566c1a727aed338b5fe880cde64907596a89d576fa"}, + {file = "scipy-1.13.0-cp311-cp311-macosx_12_0_arm64.whl", hash = "sha256:5e4a756355522eb60fcd61f8372ac2549073c8788f6114449b37e9e8104f15a5"}, + {file = "scipy-1.13.0-cp311-cp311-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:b5acd8e1dbd8dbe38d0004b1497019b2dbbc3d70691e65d69615f8a7292865d7"}, + {file = "scipy-1.13.0-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:9ff7dad5d24a8045d836671e082a490848e8639cabb3dbdacb29f943a678683d"}, + {file = "scipy-1.13.0-cp311-cp311-musllinux_1_1_x86_64.whl", hash = "sha256:4dca18c3ffee287ddd3bc8f1dabaf45f5305c5afc9f8ab9cbfab855e70b2df5c"}, + {file = "scipy-1.13.0-cp311-cp311-win_amd64.whl", hash = "sha256:a2f471de4d01200718b2b8927f7d76b5d9bde18047ea0fa8bd15c5ba3f26a1d6"}, + {file = "scipy-1.13.0-cp312-cp312-macosx_10_9_x86_64.whl", hash = "sha256:d0de696f589681c2802f9090fff730c218f7c51ff49bf252b6a97ec4a5d19e8b"}, + {file = "scipy-1.13.0-cp312-cp312-macosx_12_0_arm64.whl", hash = "sha256:b2a3ff461ec4756b7e8e42e1c681077349a038f0686132d623fa404c0bee2551"}, + {file = "scipy-1.13.0-cp312-cp312-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:6bf9fe63e7a4bf01d3645b13ff2aa6dea023d38993f42aaac81a18b1bda7a82a"}, + {file = "scipy-1.13.0-cp312-cp312-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:1e7626dfd91cdea5714f343ce1176b6c4745155d234f1033584154f60ef1ff42"}, + {file = "scipy-1.13.0-cp312-cp312-musllinux_1_1_x86_64.whl", hash = "sha256:109d391d720fcebf2fbe008621952b08e52907cf4c8c7efc7376822151820820"}, + {file = "scipy-1.13.0-cp312-cp312-win_amd64.whl", hash = "sha256:8930ae3ea371d6b91c203b1032b9600d69c568e537b7988a3073dfe4d4774f21"}, + {file = "scipy-1.13.0-cp39-cp39-macosx_10_9_x86_64.whl", hash = "sha256:5407708195cb38d70fd2d6bb04b1b9dd5c92297d86e9f9daae1576bd9e06f602"}, + {file = "scipy-1.13.0-cp39-cp39-macosx_12_0_arm64.whl", hash = "sha256:ac38c4c92951ac0f729c4c48c9e13eb3675d9986cc0c83943784d7390d540c78"}, + {file = "scipy-1.13.0-cp39-cp39-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:09c74543c4fbeb67af6ce457f6a6a28e5d3739a87f62412e4a16e46f164f0ae5"}, + {file = "scipy-1.13.0-cp39-cp39-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:28e286bf9ac422d6beb559bc61312c348ca9b0f0dae0d7c5afde7f722d6ea13d"}, + {file = "scipy-1.13.0-cp39-cp39-musllinux_1_1_x86_64.whl", hash = "sha256:33fde20efc380bd23a78a4d26d59fc8704e9b5fd9b08841693eb46716ba13d86"}, + {file = "scipy-1.13.0-cp39-cp39-win_amd64.whl", hash = "sha256:45c08bec71d3546d606989ba6e7daa6f0992918171e2a6f7fbedfa7361c2de1e"}, + {file = "scipy-1.13.0.tar.gz", hash = "sha256:58569af537ea29d3f78e5abd18398459f195546bb3be23d16677fb26616cc11e"}, +] + +[package.dependencies] +numpy = ">=1.22.4,<2.3" + +[package.extras] +dev = ["cython-lint (>=0.12.2)", "doit (>=0.36.0)", "mypy", "pycodestyle", "pydevtool", "rich-click", "ruff", "types-psutil", "typing_extensions"] +doc = ["jupyterlite-pyodide-kernel", "jupyterlite-sphinx (>=0.12.0)", "jupytext", "matplotlib (>=3.5)", "myst-nb", "numpydoc", "pooch", "pydata-sphinx-theme (>=0.15.2)", "sphinx (>=5.0.0)", "sphinx-design (>=0.4.0)"] +test = ["array-api-strict", "asv", "gmpy2", "hypothesis (>=6.30)", "mpmath", "pooch", "pytest", "pytest-cov", "pytest-timeout", "pytest-xdist", "scikit-umfpack", "threadpoolctl"] + [[package]] name = "send2trash" version = "1.8.3" @@ -5956,6 +6040,30 @@ nativelib = ["pyobjc-framework-Cocoa", "pywin32"] objc = ["pyobjc-framework-Cocoa"] win32 = ["pywin32"] +[[package]] +name = "sentence-transformers" +version = "2.7.0" +description = "Multilingual text embeddings" +optional = false +python-versions = ">=3.8.0" +files = [ + {file = "sentence_transformers-2.7.0-py3-none-any.whl", hash = "sha256:6a7276b05a95931581bbfa4ba49d780b2cf6904fa4a171ec7fd66c343f761c98"}, + {file = "sentence_transformers-2.7.0.tar.gz", hash = "sha256:2f7df99d1c021dded471ed2d079e9d1e4fc8e30ecb06f957be060511b36f24ea"}, +] + +[package.dependencies] +huggingface-hub = ">=0.15.1" +numpy = "*" +Pillow = "*" +scikit-learn = "*" +scipy = "*" +torch = ">=1.11.0" +tqdm = "*" +transformers = ">=4.34.0,<5.0.0" + +[package.extras] +dev = ["pre-commit", "pytest", "ruff (>=0.3.0)"] + [[package]] name = "setuptools" version = "69.5.1" @@ -6626,6 +6734,17 @@ mxnet = ["mxnet (>=1.5.1,<1.6.0)"] tensorflow = ["tensorflow (>=2.0.0,<2.6.0)"] torch = ["torch (>=1.6.0)"] +[[package]] +name = "threadpoolctl" +version = "3.4.0" +description = "threadpoolctl" +optional = false +python-versions = ">=3.8" +files = [ + {file = "threadpoolctl-3.4.0-py3-none-any.whl", hash = "sha256:8f4c689a65b23e5ed825c8436a92b818aac005e0f3715f6a1664d7c7ee29d262"}, + {file = "threadpoolctl-3.4.0.tar.gz", hash = "sha256:f11b491a03661d6dd7ef692dd422ab34185d982466c49c8f98c8f716b5c93196"}, +] + [[package]] name = "tiktoken" version = "0.6.0" @@ -7804,4 +7923,4 @@ testing = ["big-O", "jaraco.functools", "jaraco.itertools", "more-itertools", "p [metadata] lock-version = "2.0" python-versions = "^3.12" -content-hash = "fab9d4999e3f7e1c0030609dbe423db96ae25388b7abc6ef389d41a41cb28ca0" +content-hash = "3b163f21aaf9ff3e954f748f61346d25262c96347d62cfaad763da0e4c21797a" diff --git a/pyproject.toml b/pyproject.toml index b8bf90b..eec7371 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -53,6 +53,7 @@ pillow = "^10.3.0" tqdm = "^4.66.2" evaluate = "^0.4.1" bert-score = "^0.3.13" +sentence-transformers = "^2.7.0" [tool.poetry.group.dev.dependencies] chromadb = "^0.4.23"