diff --git a/.github/workflows/codeql-analysis.yml b/.github/workflows/codeql-analysis.yml deleted file mode 100644 index 5b803a3..0000000 --- a/.github/workflows/codeql-analysis.yml +++ /dev/null @@ -1,77 +0,0 @@ -# For most projects, this workflow file will not need changing; you simply need -# to commit it to your repository. -# -# You may wish to alter this file to override the set of languages analyzed, -# or to provide custom queries or build logic. -# -# ******** NOTE ******** -# We have attempted to detect the languages in your repository. Please check -# the `language` matrix defined below to confirm you have the correct set of -# supported CodeQL languages. -# -name: "CodeQL" - -on: - push: - branches: [ main ] - paths-ignore: - - '*.md' - - '*.rst' - pull_request: - # The branches below must be a subset of the branches above - branches: [ main ] - paths-ignore: - - '*.md' - - '*.rst' - schedule: - - cron: '30 15 * * 1' - -jobs: - analyze: - name: Analyze - runs-on: ubuntu-latest - permissions: - actions: read - contents: read - security-events: write - - strategy: - fail-fast: false - matrix: - language: [ 'python' ] - # CodeQL supports [ 'cpp', 'csharp', 'go', 'java', 'javascript', 'python' ] - # Learn more: - # https://docs.github.com/en/free-pro-team@latest/github/finding-security-vulnerabilities-and-errors-in-your-code/configuring-code-scanning#changing-the-languages-that-are-analyzed - - steps: - - name: Checkout repository - uses: actions/checkout@v2 - - # Initializes the CodeQL tools for scanning. - - name: Initialize CodeQL - uses: github/codeql-action/init@v1 - with: - languages: ${{ matrix.language }} - # If you wish to specify custom queries, you can do so here or in a config file. - # By default, queries listed here will override any specified in a config file. - # Prefix the list here with "+" to use these queries and those in the config file. - # queries: ./path/to/local/query, your-org/your-repo/queries@main - - # Autobuild attempts to build any compiled languages (C/C++, C#, or Java). - # If this step fails, then you should remove it and run the build manually (see below) - - name: Autobuild - uses: github/codeql-action/autobuild@v1 - - # ℹ️ Command-line programs to run using the OS shell. - # 📚 https://git.io/JvXDl - - # ✏️ If the Autobuild fails above, remove it and uncomment the following three lines - # and modify them (or add more) to build your code if your project - # uses a compiled language - - #- run: | - # make bootstrap - # make release - - - name: Perform CodeQL Analysis - uses: github/codeql-action/analyze@v1 diff --git a/.github/workflows/greetings.yml b/.github/workflows/greetings.yml deleted file mode 100644 index d2f3d4f..0000000 --- a/.github/workflows/greetings.yml +++ /dev/null @@ -1,16 +0,0 @@ -name: Greetings - -on: [pull_request, issues] - -jobs: - greeting: - runs-on: ubuntu-latest - permissions: - issues: write - pull-requests: write - steps: - - uses: actions/first-interaction@v1 - with: - repo-token: ${{ secrets.GITHUB_TOKEN }} - issue-message: 'Hello thank you for your contribution, do not worry, I will review this pull issues in a few days at the latest.' - pr-message: 'Hello thank you for your contribution, do not worry, I will review this pull request in a few days at the latest.' diff --git a/.github/workflows/label-issues.yml b/.github/workflows/label-issues.yml deleted file mode 100644 index 786f3cb..0000000 --- a/.github/workflows/label-issues.yml +++ /dev/null @@ -1,15 +0,0 @@ -name: Labeling new issue -on: - issues: - types: ['opened'] -jobs: - build: - runs-on: ubuntu-latest - steps: - - uses: Renato66/auto-label@v2 - with: - repo-token: ${{ secrets.GITHUB_TOKEN }} - ignore-comments: true - labels-synonyms: '{"bug":["error","need fix","not working","bug"],"enhancement":["upgrade"],"question":["help", "question"],"documentation":["documentation","document"],"wontfix":["wont fix","wontfix","need fix","needfix"],"good first issue":["good first","first issue","good issue"]}' - labels-not-allowed: '' - default-labels: '["help wanted"]' diff --git a/.github/workflows/publish.yml b/.github/workflows/publish.yml index 4c59f3c..2f9cd17 100644 --- a/.github/workflows/publish.yml +++ b/.github/workflows/publish.yml @@ -4,30 +4,29 @@ name: Publish on: - release: + release: push: tags: - - "V*" + - "V*" jobs: publish: - runs-on: ubuntu-latest steps: - - uses: actions/checkout@v2 - - name: Set up Python - uses: actions/setup-python@v2 - with: - python-version: '3.8' - - name: Install dependencies - run: | - python -m pip install --upgrade pip - pip install setuptools wheel twine - - name: Build and publish - env: - TWINE_USERNAME: ${{ secrets.PYPI_USERNAME }} - TWINE_PASSWORD: ${{ secrets.PYPI_PASSWORD }} - run: | - python setup.py sdist bdist_wheel --verbose - twine upload dist/* --verbose + - uses: actions/checkout@v3 + - name: Set up Python + uses: actions/setup-python@v3 + with: + python-version: "3.9" + - name: Install dependencies + run: | + python -m pip install --upgrade pip + pip install setuptools wheel twine + - name: Build and publish + env: + TWINE_USERNAME: ${{ secrets.PYPI_USERNAME }} + TWINE_PASSWORD: ${{ secrets.PYPI_PASSWORD }} + run: | + python setup.py sdist bdist_wheel --verbose + twine upload dist/* --verbose diff --git a/.github/workflows/pylint.yml b/.github/workflows/pylint.yml deleted file mode 100644 index b86ad25..0000000 --- a/.github/workflows/pylint.yml +++ /dev/null @@ -1,28 +0,0 @@ -name: Pylint - -on: - push: - branches: - - main - - '*.x' - paths-ignore: - - '*.md' - - '*.rst' -jobs: - build: - - runs-on: ubuntu-latest - - steps: - - uses: actions/checkout@v2 - - name: Set up Python 3.9 - uses: actions/setup-python@v2 - with: - python-version: 3.9 - - name: Install dependencies - run: | - python -m pip install --upgrade pip - pip install pylint - - name: Analysing the code with pylint - run: | - pylint sansorchi test --exit-zero diff --git a/.github/workflows/stale.yml b/.github/workflows/stale.yml index 30c3dd9..426ca84 100644 --- a/.github/workflows/stale.yml +++ b/.github/workflows/stale.yml @@ -2,21 +2,20 @@ name: Mark stale issues and pull requests on: schedule: - - cron: "30 1 * * *" + - cron: "30 1 * * *" jobs: stale: - runs-on: ubuntu-latest permissions: issues: write pull-requests: write steps: - - uses: actions/stale@v3 - with: - repo-token: ${{ secrets.GITHUB_TOKEN }} - stale-issue-message: 'Stale issue message' - stale-pr-message: 'Stale pull request message' - stale-issue-label: 'no-issue-activity' - stale-pr-label: 'no-pr-activity' + - uses: actions/stale@v3 + with: + repo-token: ${{ secrets.GITHUB_TOKEN }} + stale-issue-message: "Stale issue message" + stale-pr-message: "Stale pull request message" + stale-issue-label: "no-issue-activity" + stale-pr-label: "no-pr-activity" diff --git a/.github/workflows/test.yml b/.github/workflows/test.yml index 9276bee..4c1fe81 100644 --- a/.github/workflows/test.yml +++ b/.github/workflows/test.yml @@ -1,54 +1,56 @@ name: Tests on: push: - branches: - - main - - "*.x" + branches: [main] paths-ignore: + - "docs/**" - "*.md" - - "*.rst" - - "*.yml" - - "*.yaml" - - pull_request: - branches: - - main - - "*.x" + branches: [main] paths-ignore: + - "docs/**" - "*.md" - - "*.rst" - - "*.yml" - - "*.yaml" jobs: tests: - name: ${{ matrix.name }} - runs-on: ${{ matrix.os }} + name: ${{ matrix.name }} 🤖 + runs-on: ubuntu-latest strategy: fail-fast: false matrix: include: - - { name: "3.9", python: "3.9", os: ubuntu-latest, tox: py39 } - - { name: "3.8", python: "3.8", os: ubuntu-latest, tox: py38 } - - { name: "3.7", python: "3.7", os: ubuntu-latest, tox: py37 } + - { name: "Python 3.10", python: "3.10" } + - { name: "Python 3.9", python: "3.9" } + - { name: "Python 3.8", python: "3.8" } + - { name: "Python 3.7", python: "3.7" } steps: - - uses: actions/checkout@v2 - - uses: actions/setup-python@v2 + - name: Checkout 🛎️ + uses: actions/checkout@v3 + + - name: Set up Python 3 🐍 + uses: actions/setup-python@v3 with: python-version: ${{ matrix.python }} - - name: update pip + + - name: Update PIP ✨ run: | pip install -U wheel pip install -U setuptools python -m pip install -U pip - - name: get pip cache dir - id: pip-cache - run: echo "::set-output name=dir::$(pip cache dir)" - - name: cache pip - uses: actions/cache@v2 + + - name: Install requirements ⚙️ + run: | + pip install pytest flake8 coverage hazm + pip install -e . + + - name: Run Flake8 💫 + run: flake8 sansorchi tests setup.py + + - name: Run Tests and collect coverage 🧪 + run: | + coverage run -m pytest + coverage xml + - name: Upload coverage to Codecov 📈 + uses: codecov/codecov-action@v3 with: - path: ${{ steps.pip-cache.outputs.dir }} - key: pip|${{ runner.os }}|${{ matrix.python }}|${{ hashFiles('setup.py') }}|${{ hashFiles('requirements/*.txt') }} - - run: pip install tox - - run: pip install -e . - - run: tox -e ${{ matrix.tox }} + fail_ci_if_error: true + token: ${{ secrets.CODECOV_TOKEN }} diff --git a/.npmrc b/.npmrc new file mode 100644 index 0000000..7f520b2 --- /dev/null +++ b/.npmrc @@ -0,0 +1 @@ +@OWNER:registry=https://npm.pkg.github.com diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml index 30ed24f..a609b4f 100644 --- a/.pre-commit-config.yaml +++ b/.pre-commit-config.yaml @@ -19,7 +19,11 @@ repos: rev: 22.1.0 hooks: - id: black - - repo: https://github.com/PyCQA/isort - rev: "5.10.1" + - repo: https://github.com/asottile/reorder_python_imports + rev: v3.1.0 hooks: - - id: isort + - id: reorder-python-imports + name: Reorder Python imports (src, tests) + files: "^(?!examples/)" + args: ["--application-directories", "src"] + additional_dependencies: ["setuptools>60.9"] diff --git a/TODO.md b/TODO.md deleted file mode 100644 index 107682a..0000000 --- a/TODO.md +++ /dev/null @@ -1,3 +0,0 @@ -# TODO - -- [X] The problem of "ی" at the end of words diff --git a/package.json b/package.json new file mode 100644 index 0000000..5ad1e73 --- /dev/null +++ b/package.json @@ -0,0 +1,28 @@ +{ + "name": "sansorchi", + "version": "5.0.0", + "description": " Remove Persian (Farsi) Swear Words ", + "main": "sansorchi/__init__.py", + "directories": { + "test": "test" + }, + "scripts": { + "test": "pytest" + }, + "repository": { + "type": "git", + "url": "git+https://github.com/mmdbalkhi/Sansorchi.git" + }, + "keywords": [ + "python", + "swear", + "persian", + "farsi" + ], + "author": "Komeil Parseh ", + "license": "GPL-3.0-or-later", + "bugs": { + "url": "https://github.com/mmdbalkhi/Sansorchi/issues" + }, + "homepage": "https://github.com/mmdbalkhi/Sansorchi#readme" +} diff --git a/requirements/dev.in b/requirements/dev.in deleted file mode 100644 index c854000..0000000 --- a/requirements/dev.in +++ /dev/null @@ -1,5 +0,0 @@ --r docs.in --r tests.in -pip-tools -pre-commit -tox diff --git a/requirements/dev.txt b/requirements/dev.txt deleted file mode 100644 index 221295e..0000000 --- a/requirements/dev.txt +++ /dev/null @@ -1,145 +0,0 @@ -# -# This file is autogenerated by pip-compile with python 3.9 -# To update, run: -# -# pip-compile requirements/dev.in -# -alabaster==0.7.12 - # via sphinx -asgiref==3.5.2 - # via -r requirements/tests.in -attrs==21.2.0 - # via pytest -babel==2.9.1 - # via sphinx -backports-entry-points-selectable==1.1.0 - # via virtualenv -blinker==1.4 - # via -r requirements/tests.in -certifi==2021.5.30 - # via requests -cfgv==3.3.1 - # via pre-commit -charset-normalizer==2.0.4 - # via requests -click==8.0.1 - # via pip-tools -distlib==0.3.2 - # via virtualenv -docutils==0.17.1 - # via - # sphinx - # sphinx-tabs -filelock==3.0.12 - # via - # tox - # virtualenv -greenlet==1.1.2 - # via -r requirements/tests.in -identify==2.2.13 - # via pre-commit -idna==3.2 - # via requests -imagesize==1.2.0 - # via sphinx -iniconfig==1.1.1 - # via pytest -jinja2==3.0.1 - # via sphinx -markupsafe==2.0.1 - # via jinja2 -nodeenv==1.6.0 - # via pre-commit -packaging==21.0 - # via - # pallets-sphinx-themes - # pytest - # sphinx - # tox -pallets-sphinx-themes==2.0.2 - # via -r requirements/docs.in -pep517==0.11.0 - # via pip-tools -pip-tools==6.6.2 - # via -r requirements/dev.in -platformdirs==2.3.0 - # via virtualenv -pluggy==1.0.0 - # via - # pytest - # tox -pre-commit==2.19.0 - # via -r requirements/dev.in -py==1.10.0 - # via - # pytest - # tox -pygments==2.10.0 - # via - # sphinx - # sphinx-tabs -pyparsing==2.4.7 - # via packaging -pytest==7.1.2 - # via -r requirements/tests.in -python-dotenv==0.20.0 - # via -r requirements/tests.in -pytz==2021.1 - # via babel -pyyaml==5.4.1 - # via pre-commit -requests==2.26.0 - # via sphinx -six==1.16.0 - # via - # tox - # virtualenv -snowballstemmer==2.1.0 - # via sphinx -sphinx==4.5.0 - # via - # -r requirements/docs.in - # pallets-sphinx-themes - # sphinx-issues - # sphinx-tabs - # sphinxcontrib-log-cabinet -sphinx-issues==3.0.1 - # via -r requirements/docs.in -sphinx-tabs==3.3.1 - # via -r requirements/docs.in -sphinxcontrib-applehelp==1.0.2 - # via sphinx -sphinxcontrib-devhelp==1.0.2 - # via sphinx -sphinxcontrib-htmlhelp==2.0.0 - # via sphinx -sphinxcontrib-jsmath==1.0.1 - # via sphinx -sphinxcontrib-log-cabinet==1.0.1 - # via -r requirements/docs.in -sphinxcontrib-qthelp==1.0.3 - # via sphinx -sphinxcontrib-serializinghtml==1.1.5 - # via sphinx -toml==0.10.2 - # via - # pre-commit - # tox -tomli==1.2.1 - # via - # pep517 - # pytest -tox==3.25.0 - # via -r requirements/dev.in -urllib3==1.26.6 - # via requests -virtualenv==20.7.2 - # via - # pre-commit - # tox -wheel==0.37.0 - # via pip-tools - -# The following packages are considered to be unsafe in a requirements file: -# pip -# setuptools diff --git a/requirements/docs.in b/requirements/docs.in deleted file mode 100644 index 3ee050a..0000000 --- a/requirements/docs.in +++ /dev/null @@ -1,5 +0,0 @@ -Pallets-Sphinx-Themes -Sphinx -sphinx-issues -sphinxcontrib-log-cabinet -sphinx-tabs diff --git a/requirements/docs.txt b/requirements/docs.txt deleted file mode 100644 index b0698cd..0000000 --- a/requirements/docs.txt +++ /dev/null @@ -1,71 +0,0 @@ -# -# This file is autogenerated by pip-compile with python 3.9 -# To update, run: -# -# pip-compile requirements/docs.in -# -alabaster==0.7.12 - # via sphinx -babel==2.9.1 - # via sphinx -certifi==2021.5.30 - # via requests -charset-normalizer==2.0.4 - # via requests -docutils==0.17.1 - # via - # sphinx - # sphinx-tabs -idna==3.2 - # via requests -imagesize==1.2.0 - # via sphinx -jinja2==3.0.1 - # via sphinx -markupsafe==2.0.1 - # via jinja2 -packaging==21.0 - # via - # pallets-sphinx-themes - # sphinx -pallets-sphinx-themes==2.0.2 - # via -r requirements/docs.in -pygments==2.10.0 - # via - # sphinx - # sphinx-tabs -pyparsing==2.4.7 - # via packaging -pytz==2021.1 - # via babel -requests==2.26.0 - # via sphinx -snowballstemmer==2.1.0 - # via sphinx -sphinx==4.5.0 - # via - # -r requirements/docs.in - # pallets-sphinx-themes - # sphinx-issues - # sphinx-tabs - # sphinxcontrib-log-cabinet -sphinx-issues==3.0.1 - # via -r requirements/docs.in -sphinx-tabs==3.3.1 - # via -r requirements/docs.in -sphinxcontrib-applehelp==1.0.2 - # via sphinx -sphinxcontrib-devhelp==1.0.2 - # via sphinx -sphinxcontrib-htmlhelp==2.0.0 - # via sphinx -sphinxcontrib-jsmath==1.0.1 - # via sphinx -sphinxcontrib-log-cabinet==1.0.1 - # via -r requirements/docs.in -sphinxcontrib-qthelp==1.0.3 - # via sphinx -sphinxcontrib-serializinghtml==1.1.5 - # via sphinx -urllib3==1.26.6 - # via requests diff --git a/requirements/tests.in b/requirements/tests.in deleted file mode 100644 index 88fe548..0000000 --- a/requirements/tests.in +++ /dev/null @@ -1,5 +0,0 @@ -pytest -asgiref -blinker -greenlet -python-dotenv diff --git a/requirements/tests.txt b/requirements/tests.txt deleted file mode 100644 index f944ad4..0000000 --- a/requirements/tests.txt +++ /dev/null @@ -1,30 +0,0 @@ -# -# This file is autogenerated by pip-compile with python 3.9 -# To update, run: -# -# pip-compile requirements/tests.in -# -asgiref==3.5.2 - # via -r requirements/tests.in -attrs==21.2.0 - # via pytest -blinker==1.4 - # via -r requirements/tests.in -greenlet==1.1.2 - # via -r requirements/tests.in -iniconfig==1.1.1 - # via pytest -packaging==21.0 - # via pytest -pluggy==1.0.0 - # via pytest -py==1.10.0 - # via pytest -pyparsing==2.4.7 - # via packaging -pytest==7.1.2 - # via -r requirements/tests.in -python-dotenv==0.20.0 - # via -r requirements/tests.in -tomli==2.0.1 - # via pytest diff --git a/sansorchi/__init__.py b/sansorchi/__init__.py index d4e610f..5da6f8c 100644 --- a/sansorchi/__init__.py +++ b/sansorchi/__init__.py @@ -1,4 +1,4 @@ """Sansorchi a package for remove swears word""" +from sansorchi.sansorchi import Sansorchi # noqa: E401 -from .__version__ import __version__ as version -from .sansorchi import sansor +__version__ = "5.0.0-dev" diff --git a/sansorchi/__version__.py b/sansorchi/__version__.py deleted file mode 100644 index b777be9..0000000 --- a/sansorchi/__version__.py +++ /dev/null @@ -1 +0,0 @@ -__version__ = "4.0" diff --git a/sansorchi/data.py b/sansorchi/data.py index db0b219..f2c0060 100644 --- a/sansorchi/data.py +++ b/sansorchi/data.py @@ -6,168 +6,171 @@ fa = { "word": [ - "کیر", + "کثافت", "آشغال", - "تناسل", + "ماچ کردنی", + "آلت تناسلی", "آلت", "ابله", - "یزید", + "ابن یزید", "احمق", "اسب", - "اسکل", - "اسگل", - "اسگول", "الاغ", "الاق", "انگل", + "انی", + "انی", "اوسکل", "اوسکل", "اوسگل", "اوصکل", "اوصگل", - "باسن", + "ب ک", "بخورش", "بدبخت", "بمال", - "تخم", - "چپم", + "تخمم", "بپرروش", "بپرسرش", - "کونی", "بکارت", - "بکن", - "آبرو", - "خایه", - "شرف", + "بکن توش", + "بکنش", + "بکنمت", + "بی عفت", + "بی غیرت", + "بی ناموس", + "بی پدر", "بیشعور", - "عفت", - "غیرت", - "ناموس", - "پدر", - "بخورش", - "سگ", + "بیناموس", + "تخم سگ", + "تخم", "ترک", - "توله", + "توله سگ", "جاکش", - "جلق", - "جنده", + "جلق زدن", "جنسی", - "جون", + "جوون", "جکس", "جیندا", "حرومزاده", "حشر", - "حیوان", - "خار", - "کسده", - "کصده", - "کسّه", - "کصته", - "خور", - "مال", + "خارکس ده", + "خارکسده", + "خارکسّه", + "خایه خور", + "خایه مال", "خایه", - "خر", + "خرفت", "خز", - "خفه", - "بشو", - "خنگ", - "خواهر", - "مادر", - "دختر", - "زن", - "خیکاس", + "خر", + "خفه خون", + "خفه شو", + "خی کاس", + "داف ناز", "داف", - "داگاستایل", - "داگ", - "قرتی", - "دراز", + "داگ استایل", + "دخترقرتی", + "درازگوش", "دله", - "سرویس", + "دهن سرویس", "گاییده", - "دهنت", + "دهنت سرویس", "دوجنسه", - "پسر", "دول", - "دکل", "دیوث", + "دیوس خان", "دیوس", "دیوص", "رشتی", "رید", "زارت", "زباله", - "زر", - "کاسده", - "زنا", + "زرنزن", + "زن جنده", + "زن کاسده", + "زنا زاده", + "زنازاده", + "زنتو", + "زنشو", + "زنیکه", "سادیسمی", - "بزن" "ساک", "ساکونی", - "خور", + "سرخور", + "سرکیر", "سسکی", - "سوراخ", + "سوراخ کون", + "سوراخ کون", "سولاخ", - "کون", - "سکسچت", - "سکس", - "سکسیباش", - "سکسی", - "روح", + "سکس چت", + "حیوان", + "سکسی باش", + "سگ تو روحت", + "سگ دهن", + "سگ صفت", + "سگ پدر", + "سگی", "سیکتیر", "شاسگول", "شاش", - "شق", - "شل", + "شق کردن", + "شل مغز", "شنگول", - "شهوت", - "شورت", - "شپرت", - "صیغه", + "شهوتی", + "صیغه ای", "صیک", "عرب", - "عرق", - "عمه", - "عمع", - "عم" "عنتر", + "عرق خور", + "عمتو", + "عمه ننه", + "عن تر", "عن", "عوضی", "غرمساق", "غرمصاق", - "قرمساق", - "قرمصاق", "فاحشه", "فارس", - "فاک", - "فیلمسوپر", + "فاک فیس", + "فیلم سوپر", "قرتی", "قرمساق", "قرمصاق", "قس", - "لاپا", + "لا پا", "لاس", - "لاش", - "لامصب", + "لاش گوشت", + "لاشی", + "لاکونی", "لجن", + "لختی", "لخت", + "لر", "لز", - "قهوه", - "قهبه", - "قهرمان", + "مادر جنده", + "مادرجنده", + "مادرسگ", + "مادرقهوه", + "مادرکونی", "مالوندن", - "ماچ", "مرتیکه", - "مرد", - "مرض", - "مرز", + "مردیکه", + "مرض داری", + "مرضداری", "مشروب", "ملنگ", + "ممه خور", "ممه", "منگل", - "نر", + "میخوریش", + "نرکده", "نعشه", "نکبت", "نگاییدم", "هیز", "ولدزنا", + "پدر سوخته", + "پدر سگ", + "پدر صلواتی", + "پدرسگ", "پریود", "پستان", "پسون", @@ -178,110 +181,68 @@ "پپه", "چاغال", "چاقال", + "چس خور", "چس", "کاسکش", - "کثلیس", - "کث", - "کثافت", + "کث لیس", "کثافط", - "کردن", - "کردنی", "کرم", - "کسخل", - "کسخور", - "کسخیس", - "کسدادن", - "کسلیس", - "کسلیس", - "کسلیسیدن", - "کسننت", - "کسوکیر", - "کسکردن", - "کسکش", - "کس", - "کسخل", - "کسشعر", - "کسکش", - "کسکیر", - "کصخل", - "کصلیس", - "کص", - "کصافت", - "کصافط", - "کصخل", - "کصکش", "کلفت", - "کلهکیری", - "کلهکیری", - "کوثلیس", - "کوسخل", - "کوسخور", - "کوسلیس", "کوس", - "کوصخل", - "کوصلیس", "کوص", - "کونتپل", - "کونده", - "کونسوراخ", - "کونپنیر", - "کونگنده", "کون", - "کوندهخار", - "کوندهخوار", - "کونده", - "کونشو", - "کونن", - "کونی", - "کونی", "کیر", - "کیردراز", - "کیردوس", - "کیرر", - "کیرمکیدن", - "کیرناز", - "کیروکس", - "کیروکس", - "کیری", "گاو", - "گاوی", "گاگول", "گایدن", "گایدی", "گاییدن", - "گردندراز", + "گردن دراز", "گشاد", - "گندهگوز", - "گه", - "گهی", - "گوزباقالي", - "گوز", - "گوزو", "گوزو", "گوسفند", - "گوشدراز", + "گوش دراز", "گوه", "گوه", - "گیزن", + "گی زن", "گیخوار", - "یبنزنا", + "یبن زنا", "مادرتو", "ناموستو", "چنده", "باسنی", - "ساعتیچند", - "خارکصده", "سیکیم", - "ناموس", - "خای", - "لیس", + "سگ ناموس", + "نوب", + "خایمال", + "مادر به خطا", + "کصلیس", "بکنت", + "کصده", "گورومساخ", "پوفیوز", + "پدرتو", "قورومساق", "سیهدیر", "اوبی", - "مادرسگ", - "موز", - ], + "مادر سگ", + "خایه", + "جنده", + "زنا", + "ساک", + "کون", + "کیر", + "سکس", + "اسکل", + "اسکل", + "اسگل", + "کردنی", + "اسگول", + "کث", + "کردن", + "گوز", + "کص", + "باسن", + "کس", + ] } diff --git a/sansorchi/sansorchi.py b/sansorchi/sansorchi.py index de6559c..d865d40 100644 --- a/sansorchi/sansorchi.py +++ b/sansorchi/sansorchi.py @@ -1,43 +1,44 @@ -"""Sansorchi a package for remove swears word""" import re +from typing import Union + +from hazm import Normalizer # type: ignore from .data import fa -__all__ = ["sansor"] +class Sansorchi: + """Remove Persian (Farsi) Swear Words -def sansor(txt: str, bad_db=fa["word"]) -> str: - """*** this func receives a text and processesit - **** *and returns the processed text to the user - *** *replaces swear words with length of that start Note: The - ** word swear at any length Returns the length of that star - >>> sansor('موز خر است') == "** ** است' # Farsi + :func:`remove_swear_words` removes Persian (Farsi) swear words from a given text. + ===================================================================================== + :param text: text to remove swear words + :type text: str + :return: text without swear words """ - txt = txt.replace("‌", "") + def __init__(self, level=None) -> None: + self.level = level + self.bad_words = fa["word"] - split = txt.split(" ") - return_txt = "" + def __repr__(self) -> str: + return f"Sansorchi({self.level})" - for text in split: - swear_bool = False - if text not in bad_db: - for swear in bad_db: - if re.findall(swear, text): # if swear word in text do this - return_txt += re.sub( - swear, str(len(swear) * "*"), text - ) # replace swear word with start + def remove_bad_words(self, text: Union[str, bytes], replace_text: str = "") -> str: + if isinstance(text, bytes): # pragma: no cover + text = text.decode("utf-8") - swear_bool = True - break - else: - return_txt += str(len(text) * "*") - swear_bool = True + for word in self.bad_words: + text = re.sub(word, replace_text, text) - if not swear_bool: - return_txt = return_txt + text + return text - if len(split) > 1: - return_txt += " " + def is_bad_word(self, text: Union[str, bytes]) -> bool: + for word in self.bad_words: + if word in text: + return True + return False - return return_txt + @staticmethod + def normalize(text: Union[str, bytes]) -> str: # pragma: no cover + normalizer = Normalizer() + return normalizer.normalize(text) diff --git a/setup.cfg b/setup.cfg new file mode 100644 index 0000000..1e919cb --- /dev/null +++ b/setup.cfg @@ -0,0 +1,24 @@ +[flake8] +# B = bugbear +# E = pycodestyle errors +# F = flake8 pyflakes +# W = pycodestyle warnings +# B9 = bugbear opinions +# ISC = implicit str concat +select = B, E, F, W, B9, ISC +ignore = + # slice notation whitespace, invalid + E203 + # import at top, too many circular import fixes + E402 + # line length, handled by bugbear B950 + E501 + # bare except, handled by bugbear B001 + E722 + # bin op line break, invalid + W503 +# up to 88 allowed by bugbear B950 +max-line-length = 80 +per-file-ignores = + # __init__ exports names + sansorchi/__init__.py: F401 \ No newline at end of file diff --git a/setup.py b/setup.py index 7d387df..b327056 100644 --- a/setup.py +++ b/setup.py @@ -1,17 +1,14 @@ -from sansorchi import version +from sansorchi import __version__ with open("README.md", encoding="utf-8") as fh: readme = fh.read() -dev_requires = ["black", "pytest", "pylint"] -doc_requires = ["sphinx", "myst_parser"] - def setup_package(): metadata = dict( name="Sansorchi", packages=["sansorchi"], - version=version, + version=__version__, long_description=readme, long_description_content_type="text/markdown", description="Remove Persian (Farsi) Swear Words", @@ -19,6 +16,7 @@ def setup_package(): author_email="ahmdparsh129@gmail.com", url="https://github.com/KomeilParseh/Sansorchi", download_url="https://github.com/KomeilParseh/Sansorchi/releases", + install_requires=["hazm>=0.7.0"], keywords=[ "Improper words", "swearing", @@ -34,8 +32,7 @@ def setup_package(): "License :: OSI Approved :: GNU General Public License v3 (GPLv3)", "Operating System :: OS Independent", ], - python_requires=">=3.6", - extras_require={"dev": dev_requires, "docs": doc_requires}, + python_requires=">=3.7", ) try: from setuptools import setup diff --git a/test/test_sentences.py b/test/test_sentences.py deleted file mode 100644 index 5783468..0000000 --- a/test/test_sentences.py +++ /dev/null @@ -1,14 +0,0 @@ -"""Testing Sansorchi package""" - -from sansorchi import sansor - -txts = ["تو موزی موز", "خنگ خدا", "خر به پیش", "حیوانی وحشی"] -outputs = ["تو ***ی *** ", "*** خدا ", "** به پیش ", "*****ی وحشی "] - - -def test_sentences(): - """Test with up Lists""" - i = 0 - for txt in txts: - i += 1 - assert sansor(txt) == outputs[i - 1] diff --git a/test/test_word.py b/test/test_word.py deleted file mode 100644 index 84a6add..0000000 --- a/test/test_word.py +++ /dev/null @@ -1,10 +0,0 @@ -"""Testing Sansorchi package""" -from sansorchi import data, sansor - - -def test_word(): - """Unit test, Clean the code with the help of pylint""" - swears = data.fa["word"] - for swear in swears: - print(swear) - assert sansor(swear) == str(len(swear) * "*") diff --git a/tests/test_sansorchi.py b/tests/test_sansorchi.py new file mode 100644 index 0000000..18925d5 --- /dev/null +++ b/tests/test_sansorchi.py @@ -0,0 +1,11 @@ +from sansorchi import Sansorchi + +sansorchi = Sansorchi(level=None) + + +def test_repr(): + assert str(sansorchi) == "Sansorchi(None)" + + +def test_not_is_bad(): + assert not sansorchi.is_bad_word("سلام") diff --git a/tests/test_text.py b/tests/test_text.py new file mode 100644 index 0000000..ca2cbb3 --- /dev/null +++ b/tests/test_text.py @@ -0,0 +1,14 @@ +"""Testing Sansorchi package""" +import pytest + +from sansorchi import Sansorchi + +sansorchi = Sansorchi(level=None) + +txts = ["خر حیوانی نجیب است", "خر یک گوسفند خورد"] +outputs = [" حیو نجیب است", " یک خورد"] + + +@pytest.mark.parametrize("txt, output", zip(txts, outputs)) +def test_text(txt, output): + assert sansorchi.remove_bad_words(txt) == output diff --git a/tests/test_word.py b/tests/test_word.py new file mode 100644 index 0000000..b5e8a11 --- /dev/null +++ b/tests/test_word.py @@ -0,0 +1,19 @@ +"""Testing Sansorchi package""" +import pytest + +from sansorchi import data +from sansorchi import Sansorchi + +sansorchi = Sansorchi(level=None) + + +@pytest.mark.parametrize("word", data.fa["word"]) +def test_word(word): + """Test word function""" + assert sansorchi.remove_bad_words(word) == "" + + +@pytest.mark.parametrize("word", data.fa["word"]) +def test_is_bad_word(word): + """test is_bad_word function""" + assert sansorchi.is_bad_word(text=word) diff --git a/tox.ini b/tox.ini deleted file mode 100644 index fd33ab1..0000000 --- a/tox.ini +++ /dev/null @@ -1,15 +0,0 @@ -[tox] -envlist = - py{39,38,37,36} - style -skip_missing_interpreters = true - -[testenv] -deps = - -r requirements/tests.txt -commands = pytest -v - -[testenv:style] -deps = pre-commit -skip_install = true -commands = pre-commit run --all-files --show-diff-on-failure