diff --git a/.github/workflows/ci.yaml b/.github/workflows/ci.yaml index f64c2b88..0f96a56e 100644 --- a/.github/workflows/ci.yaml +++ b/.github/workflows/ci.yaml @@ -6,43 +6,43 @@ on: tags: '*' pull_request: +env: + PYTHON_LATEST_TAG: py313 + PYTHON_LATEST_VER: 3.13 + +concurrency: + group: ${{ github.workflow }}-${{ github.head_ref || github.run_id }} + cancel-in-progress: true + jobs: - tests: - if: "!contains(github.event.head_commit.message, '[skip ci]')" - name: ${{ matrix.name }} - runs-on: ${{ matrix.os }} + tox: + name: ${{ matrix.env }} + runs-on: ubuntu-latest strategy: - fail-fast: true matrix: - include: - - {name: '3.12', python: '3.12', os: ubuntu-latest, tox: py312} - - {name: '3.11', python: '3.11', os: ubuntu-latest, tox: py311} - - {name: '3.10', python: '3.10', os: ubuntu-latest, tox: py310} - - {name: '3.9', python: '3.9', os: ubuntu-latest, tox: py39} - - {name: '3.8', python: '3.8', os: ubuntu-latest, tox: py38} + env: [py39, py310, py311, py312, py313] steps: - - uses: actions/checkout@v4 - - - name: Setup Python - uses: actions/setup-python@v5 - with: - python-version: ${{ matrix.python }} - cache: 'pip' - - - name: Upgrade bootstrap packages - run: python -m pip install --upgrade pip tox - - - name: Install tesseract - run: sudo apt-get -y update && sudo apt-get install -y tesseract-ocr tesseract-ocr-fra - - - name: Print tesseract version - run: echo $(tesseract --version) - - - name: Run tox - run: tox -e ${{ matrix.tox }} - env: - PY_COLORS: 1 - TOX_TESTENV_PASSENV: PY_COLORS - - - name: Test pytesseract package installation - run: pip install -U . && pip show pytesseract && python -c 'import pytesseract' + - uses: asottile/workflows/.github/actions/fast-checkout@v1.8.0 + with: + submodules: false + - uses: actions/setup-python@v5 + with: + python-version: | + ${{ + (matrix.env == 'py39' || startsWith(matrix.env, 'py39-')) && '3.9' || + (matrix.env == 'py310' || startsWith(matrix.env, 'py310-')) && '3.10' || + (matrix.env == 'py311' || startsWith(matrix.env, 'py311-')) && '3.11' || + (matrix.env == 'py312' || startsWith(matrix.env, 'py312-')) && '3.12' || + (matrix.env == env.PYTHON_LATEST_TAG) && env.PYTHON_LATEST_VER + }} + - name: Install tesseract + run: sudo apt-get -y update && sudo apt-get install -y tesseract-ocr tesseract-ocr-fra + - name: Print tesseract version + run: echo $(tesseract --version) + - name: Update tools + run: python -mpip install --upgrade setuptools tox virtualenv + - name: Run tox + run: tox -e ${{ matrix.env != env.PYTHON_LATEST_TAG && matrix.env || 'pandas' }} + - name: Test pytesseract package installation + if: ${{ matrix.env == env.PYTHON_LATEST_TAG }} + run: python -mpip install -U . && python -mpip show pytesseract && python -c 'import pytesseract' \ No newline at end of file diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml index 4f9f1471..6f6002c7 100644 --- a/.pre-commit-config.yaml +++ b/.pre-commit-config.yaml @@ -10,38 +10,38 @@ repos: hooks: - id: trailing-whitespace - id: end-of-file-fixer - - id: check-docstring-first - id: check-yaml - id: debug-statements + - id: double-quote-string-fixer - id: name-tests-test - id: requirements-txt-fixer - - id: double-quote-string-fixer -- repo: https://github.com/PyCQA/flake8 - rev: 7.1.1 - hooks: - - id: flake8 -- repo: https://github.com/hhatto/autopep8 - rev: v2.3.1 + - id: check-docstring-first +- repo: https://github.com/asottile/setup-cfg-fmt + rev: v2.7.0 hooks: - - id: autopep8 + - id: setup-cfg-fmt - repo: https://github.com/asottile/reorder-python-imports rev: v3.14.0 hooks: - id: reorder-python-imports - args: [--py37-plus] + args: [--py39-plus, --add-import, 'from __future__ import annotations'] +- repo: https://github.com/asottile/add-trailing-comma + rev: v3.1.0 + hooks: + - id: add-trailing-comma - repo: https://github.com/asottile/pyupgrade rev: v3.19.0 hooks: - id: pyupgrade - args: [--py37-plus] -- repo: https://github.com/asottile/add-trailing-comma - rev: v3.1.0 + args: [--py39-plus] +- repo: https://github.com/hhatto/autopep8 + rev: v2.3.1 hooks: - - id: add-trailing-comma -# - repo: https://github.com/asottile/setup-cfg-fmt -# rev: v1.16.0 -# hooks: -# - id: setup-cfg-fmt + - id: autopep8 +- repo: https://github.com/PyCQA/flake8 + rev: 7.1.1 + hooks: + - id: flake8 - repo: local hooks: - id: rst diff --git a/pyproject.toml b/pyproject.toml deleted file mode 100644 index 74f860f7..00000000 --- a/pyproject.toml +++ /dev/null @@ -1,5 +0,0 @@ -[build-system] -requires = [ - "setuptools >= 40.0.4", - "wheel >= 0.29.0", -] diff --git a/pytesseract/__init__.py b/pytesseract/__init__.py index 40164fa9..403bed4f 100644 --- a/pytesseract/__init__.py +++ b/pytesseract/__init__.py @@ -1,4 +1,6 @@ # flake8: noqa: F401 +from __future__ import annotations + from .pytesseract import ALTONotSupported from .pytesseract import get_languages from .pytesseract import get_tesseract_version @@ -16,4 +18,4 @@ from .pytesseract import TSVNotSupported -__version__ = '0.3.13' +__version__ = '0.3.14' diff --git a/pytesseract/pytesseract.py b/pytesseract/pytesseract.py index 37837f4c..cf487571 100644 --- a/pytesseract/pytesseract.py +++ b/pytesseract/pytesseract.py @@ -1,4 +1,6 @@ #!/usr/bin/env python +from __future__ import annotations + import logging import re import shlex @@ -20,8 +22,6 @@ from os.path import realpath from tempfile import NamedTemporaryFile from time import sleep -from typing import List -from typing import Optional from packaging.version import InvalidVersion from packaging.version import parse @@ -293,8 +293,8 @@ def _read_output(filename: str, return_bytes: bool = False): def run_and_get_multiple_output( image, - extensions: List[str], - lang: Optional[str] = None, + extensions: list[str], + lang: str | None = None, nice: int = 0, timeout: int = 0, return_bytes: bool = False, diff --git a/setup.cfg b/setup.cfg index 7145d1e1..16167f54 100644 --- a/setup.cfg +++ b/setup.cfg @@ -1,43 +1,37 @@ [metadata] name = pytesseract version = attr: pytesseract.__version__ -author = Samuel Hoffstaetter -author_email = samuel@hoffstaetter.com -maintainer = Matthias Lee -maintainer_email = pytesseract@madmaze.net description = Python-tesseract is a python wrapper for Google's Tesseract-OCR long_description = file: README.rst long_description_content_type = text/x-rst -license = Apache License 2.0 -license_file = LICENSE -keywords = python-tesseract OCR Python url = https://github.com/madmaze/pytesseract +author = Samuel Hoffstaetter +author_email = samuel@hoffstaetter.com +maintainer = Matthias Lee +maintainer_email = pytesseract@madmaze.net +license = Apache-2.0 +license_files = LICENSE classifiers = License :: OSI Approved :: Apache Software License Programming Language :: Python :: 3 Programming Language :: Python :: 3 :: Only - Programming Language :: Python :: 3.8 - Programming Language :: Python :: 3.9 - Programming Language :: Python :: 3.10 - Programming Language :: Python :: 3.11 - Programming Language :: Python :: 3.12 Programming Language :: Python :: Implementation :: CPython Programming Language :: Python :: Implementation :: PyPy +keywords = python-tesseract OCR Python [options] packages = find: -include_package_data = True install_requires = - packaging>=21.3 Pillow>=8.0.0 - -python_requires = >=3.8 - -[options.entry_points] -console_scripts = - pytesseract = pytesseract.pytesseract:main + packaging>=21.3 +python_requires = >=3.9 +include_package_data = True [options.packages.find] exclude = tests* testing* + +[options.entry_points] +console_scripts = + pytesseract = pytesseract.pytesseract:main diff --git a/setup.py b/setup.py index b024da80..536cf178 100644 --- a/setup.py +++ b/setup.py @@ -1,3 +1,5 @@ +from __future__ import annotations + from setuptools import setup diff --git a/tests/pytesseract_test.py b/tests/pytesseract_test.py index 52efa2dd..0afaa6e1 100644 --- a/tests/pytesseract_test.py +++ b/tests/pytesseract_test.py @@ -1,3 +1,5 @@ +from __future__ import annotations + from functools import partial from glob import iglob from multiprocessing import Pool @@ -293,7 +295,7 @@ def test_image_to_alto_xml_support(test_file): TESSERACT_VERSION[:2] >= (3, 5), reason='requires tesseract < 3.05', ) -def test_image_to_data__pandas_support(test_file_small): +def test_image_to_data_pandas_support(test_file_small): with pytest.raises(TSVNotSupported): image_to_data(test_file_small, output_type=Output.DATAFRAME) @@ -303,7 +305,7 @@ def test_image_to_data__pandas_support(test_file_small): reason='requires tesseract >= 3.05', ) @pytest.mark.skipif(pandas_installed is False, reason='requires pandas') -def test_image_to_data__pandas_output(test_file_small): +def test_image_to_data_pandas_output(test_file_small): """Test and compare the type and meta information of the result.""" result = image_to_data(test_file_small, output_type=Output.DATAFRAME) assert isinstance(result, pandas.DataFrame) diff --git a/tox.ini b/tox.ini index 6be7d2ce..23884234 100644 --- a/tox.ini +++ b/tox.ini @@ -1,5 +1,5 @@ [tox] -envlist = py, py-slow +envlist = py, pandas [pytest] addopts = --strict-markers --verbose --cache-clear --color=yes -p no:doctest @@ -10,13 +10,15 @@ markers = [testenv] deps = -r{toxinidir}/requirements-dev.txt +passenv = * commands = python -bb -m pytest {posargs:tests} -[testenv:py-slow] +[testenv:pandas] deps = numpy pandas -r{toxinidir}/requirements-dev.txt +passenv = * commands = python -bb -m pytest {posargs:tests}