diff --git a/.moban.d/docs/source/conf.py b/.moban.d/docs/source/conf.py index 7dfb293..894a812 100644 --- a/.moban.d/docs/source/conf.py +++ b/.moban.d/docs/source/conf.py @@ -14,4 +14,3 @@ def setup(app): {%endblock%} - diff --git a/.moban.d/setup.py b/.moban.d/setup.py index 7ae48c7..94ba5bf 100644 --- a/.moban.d/setup.py +++ b/.moban.d/setup.py @@ -1,4 +1,4 @@ {%extends "setup.py.jj2"%} {%block platform_block%} -{%endblock%} \ No newline at end of file +{%endblock%} diff --git a/Makefile b/Makefile index 03fb3be..a61e48f 100644 --- a/Makefile +++ b/Makefile @@ -8,3 +8,11 @@ document: spelling: sphinx-build -b spelling docs/source/ docs/build/spelling + +format: + isort -y $(find pyexcel_pdfr -name "*.py"|xargs echo) $(find tests -name "*.py"|xargs echo) + black -l 79 pyexcel_pdfr + black -l 79 tests + +lint: + bash lint.sh diff --git a/pyexcel_pdfr/__init__.py b/pyexcel_pdfr/__init__.py index 78db77f..40ce08b 100644 --- a/pyexcel_pdfr/__init__.py +++ b/pyexcel_pdfr/__init__.py @@ -5,15 +5,18 @@ :copyright: (c) 2015-2017 by Onni Software Ltd & its contributors :license: New BSD License """ -from ._version import __version__, __author__ # flake8: noqa +# flake8: noqa +from pyexcel_io.io import get_data as read_data +from pyexcel_io.io import isstream from pyexcel_io.plugins import IOPluginInfoChain -from pyexcel_io.io import get_data as read_data, isstream -__FILE_TYPE__ = 'pdf' +from ._version import __author__, __version__ + +__FILE_TYPE__ = "pdf" IOPluginInfoChain(__name__).add_a_reader( - relative_plugin_class_path='pdfr.PdfFile', + relative_plugin_class_path="pdfr.PdfFile", file_types=[__FILE_TYPE__], - stream_type='binary' + stream_type="binary", ) diff --git a/pyexcel_pdfr/_version.py b/pyexcel_pdfr/_version.py index ef42364..74f429b 100644 --- a/pyexcel_pdfr/_version.py +++ b/pyexcel_pdfr/_version.py @@ -1,2 +1,2 @@ -__version__ = '0.5.0-rc1' -__author__ = 'C.W.' +__version__ = "0.5.0-rc1" +__author__ = "C.W." diff --git a/pyexcel_pdfr/pdfr.py b/pyexcel_pdfr/pdfr.py index 37236fd..9086314 100644 --- a/pyexcel_pdfr/pdfr.py +++ b/pyexcel_pdfr/pdfr.py @@ -6,18 +6,22 @@ :copyright: (c) 2015-2017 by Onni Software Ltd & its contributors :license: New BSD License """ +import pyexcel_io.service as service from pdftables import get_tables -from pyexcel_io.book import BookReader -from pyexcel_io.sheet import SheetReader, NamedContent from pyexcel_io._compact import OrderedDict -import pyexcel_io.service as service +from pyexcel_io.book import BookReader +from pyexcel_io.sheet import NamedContent, SheetReader class PdfTable(SheetReader): - def __init__(self, sheet, auto_detect_int=True, - auto_detect_float=True, - auto_detect_datetime=True, - **keywords): + def __init__( + self, + sheet, + auto_detect_int=True, + auto_detect_float=True, + auto_detect_datetime=True, + **keywords + ): SheetReader.__init__(self, sheet, **keywords) self.__auto_detect_int = auto_detect_int self.__auto_detect_float = auto_detect_float @@ -47,10 +51,10 @@ def column_iterator(self, row): self.__column_span[index] -= 1 if self.__column_span[index] == 0: del self.__column_span[index] - yield '' + yield "" index += 1 - if not hasattr(cell, 'topleft'): + if not hasattr(cell, "topleft"): yield cell index += 1 continue @@ -63,15 +67,15 @@ def column_iterator(self, row): for offset in range(row_span): if offset > 0: # for next cell, give full col span - self.__column_span[index+offset] = col_span + self.__column_span[index + offset] = col_span else: # for current cell, give -1 because it has been # yielded - self.__column_span[index+offset] = col_span - 1 + self.__column_span[index + offset] = col_span - 1 else: # no col span found, so just repeat in the same row - for _ in range(row_span-1): - yield '' + for _ in range(row_span - 1): + yield "" index += 1 else: if col_span > 1: @@ -86,9 +90,8 @@ def __convert_cell(self, cell_text): if ret is None and self.__auto_detect_float: ret = service.detect_float_value(cell_text) shall_we_ignore_the_conversion = ( - (ret in [float('inf'), float('-inf')]) and - self.__ignore_infinity - ) + ret in [float("inf"), float("-inf")] + ) and self.__ignore_infinity if shall_we_ignore_the_conversion: ret = None if ret is None and self.__auto_detect_datetime: @@ -122,7 +125,7 @@ def read_sheet(self, native_sheet): return {sheet.name: sheet.to_array()} def _load_from_file(self): - self._file_handle = open(self._file_name, 'rb') + self._file_handle = open(self._file_name, "rb") self._native_book = self._parse_pdf(self._file_handle) def _load_from_memory(self): @@ -134,7 +137,8 @@ def _parse_pdf(self, file_handle): table.table_number_on_page, table.total_tables_on_page, table.page_number, - table.total_pages) + table.total_pages, + ) yield NamedContent(name, table) def close(self): diff --git a/setup.py b/setup.py index cbda614..499c841 100644 --- a/setup.py +++ b/setup.py @@ -1,14 +1,14 @@ #!/usr/bin/env python3 -# Template by pypi-mobans -import os -import sys import codecs import locale +# Template by pypi-mobans +import os import platform +import sys from shutil import rmtree -from setuptools import Command, setup, find_packages +from setuptools import Command, find_packages, setup PY2 = sys.version_info[0] == 2 PY26 = PY2 and sys.version_info[1] < 7 diff --git a/tests/test_pdfr.py b/tests/test_pdfr.py index 0c38600..2ce005b 100644 --- a/tests/test_pdfr.py +++ b/tests/test_pdfr.py @@ -1,20 +1,43 @@ import os -from nose.tools import eq_ + import pyexcel as p +from nose.tools import eq_ def test_simple_pdf(): - book = p.get_book(file_name=get_fixtures('simple.pdf')) + book = p.get_book(file_name=get_fixtures("simple.pdf")) eq_(book.number_of_sheets(), 1) - eq_(book[0].name, 'Table 1 of 1 on page 1 of 1') + eq_(book[0].name, "Table 1 of 1 on page 1 of 1") def test_complex_pdf(): - book = p.get_book(file_name=get_fixtures('CBP-7857.pdf')) + book = p.get_book(file_name=get_fixtures("CBP-7857.pdf")) peer_look = [ - u'16 Higher education', u'stude', u'nt', u'numb', u'ers', - '', '', '', '', '', '', '', '', '', '', '', '', '', '', - '', '', '', '', ''] + u"16 Higher education", + u"stude", + u"nt", + u"numb", + u"ers", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + ] eq_(book.number_of_sheets(), 5) eq_(book.Table_1_of_1_on_page_16_of_17.row[0], peer_look)