Skip to content

Commit

Permalink
💄 beautify all the code
Browse files Browse the repository at this point in the history
  • Loading branch information
chfw committed Mar 14, 2019
1 parent 8ec1c8b commit e170aee
Show file tree
Hide file tree
Showing 8 changed files with 75 additions and 38 deletions.
1 change: 0 additions & 1 deletion .moban.d/docs/source/conf.py
Original file line number Diff line number Diff line change
Expand Up @@ -14,4 +14,3 @@ def setup(app):


{%endblock%}

2 changes: 1 addition & 1 deletion .moban.d/setup.py
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
{%extends "setup.py.jj2"%}

{%block platform_block%}
{%endblock%}
{%endblock%}
8 changes: 8 additions & 0 deletions Makefile
Original file line number Diff line number Diff line change
Expand Up @@ -8,3 +8,11 @@ document:

spelling:
sphinx-build -b spelling docs/source/ docs/build/spelling

format:
isort -y $(find pyexcel_pdfr -name "*.py"|xargs echo) $(find tests -name "*.py"|xargs echo)
black -l 79 pyexcel_pdfr
black -l 79 tests

lint:
bash lint.sh
13 changes: 8 additions & 5 deletions pyexcel_pdfr/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,15 +5,18 @@
:copyright: (c) 2015-2017 by Onni Software Ltd & its contributors
:license: New BSD License
"""
from ._version import __version__, __author__ # flake8: noqa
# flake8: noqa
from pyexcel_io.io import get_data as read_data
from pyexcel_io.io import isstream
from pyexcel_io.plugins import IOPluginInfoChain
from pyexcel_io.io import get_data as read_data, isstream

__FILE_TYPE__ = 'pdf'
from ._version import __author__, __version__

__FILE_TYPE__ = "pdf"
IOPluginInfoChain(__name__).add_a_reader(
relative_plugin_class_path='pdfr.PdfFile',
relative_plugin_class_path="pdfr.PdfFile",
file_types=[__FILE_TYPE__],
stream_type='binary'
stream_type="binary",
)


Expand Down
4 changes: 2 additions & 2 deletions pyexcel_pdfr/_version.py
Original file line number Diff line number Diff line change
@@ -1,2 +1,2 @@
__version__ = '0.5.0-rc1'
__author__ = 'C.W.'
__version__ = "0.5.0-rc1"
__author__ = "C.W."
40 changes: 22 additions & 18 deletions pyexcel_pdfr/pdfr.py
Original file line number Diff line number Diff line change
Expand Up @@ -6,18 +6,22 @@
:copyright: (c) 2015-2017 by Onni Software Ltd & its contributors
:license: New BSD License
"""
import pyexcel_io.service as service
from pdftables import get_tables
from pyexcel_io.book import BookReader
from pyexcel_io.sheet import SheetReader, NamedContent
from pyexcel_io._compact import OrderedDict
import pyexcel_io.service as service
from pyexcel_io.book import BookReader
from pyexcel_io.sheet import NamedContent, SheetReader


class PdfTable(SheetReader):
def __init__(self, sheet, auto_detect_int=True,
auto_detect_float=True,
auto_detect_datetime=True,
**keywords):
def __init__(
self,
sheet,
auto_detect_int=True,
auto_detect_float=True,
auto_detect_datetime=True,
**keywords
):
SheetReader.__init__(self, sheet, **keywords)
self.__auto_detect_int = auto_detect_int
self.__auto_detect_float = auto_detect_float
Expand Down Expand Up @@ -47,10 +51,10 @@ def column_iterator(self, row):
self.__column_span[index] -= 1
if self.__column_span[index] == 0:
del self.__column_span[index]
yield ''
yield ""
index += 1

if not hasattr(cell, 'topleft'):
if not hasattr(cell, "topleft"):
yield cell
index += 1
continue
Expand All @@ -63,15 +67,15 @@ def column_iterator(self, row):
for offset in range(row_span):
if offset > 0:
# for next cell, give full col span
self.__column_span[index+offset] = col_span
self.__column_span[index + offset] = col_span
else:
# for current cell, give -1 because it has been
# yielded
self.__column_span[index+offset] = col_span - 1
self.__column_span[index + offset] = col_span - 1
else:
# no col span found, so just repeat in the same row
for _ in range(row_span-1):
yield ''
for _ in range(row_span - 1):
yield ""
index += 1
else:
if col_span > 1:
Expand All @@ -86,9 +90,8 @@ def __convert_cell(self, cell_text):
if ret is None and self.__auto_detect_float:
ret = service.detect_float_value(cell_text)
shall_we_ignore_the_conversion = (
(ret in [float('inf'), float('-inf')]) and
self.__ignore_infinity
)
ret in [float("inf"), float("-inf")]
) and self.__ignore_infinity
if shall_we_ignore_the_conversion:
ret = None
if ret is None and self.__auto_detect_datetime:
Expand Down Expand Up @@ -122,7 +125,7 @@ def read_sheet(self, native_sheet):
return {sheet.name: sheet.to_array()}

def _load_from_file(self):
self._file_handle = open(self._file_name, 'rb')
self._file_handle = open(self._file_name, "rb")
self._native_book = self._parse_pdf(self._file_handle)

def _load_from_memory(self):
Expand All @@ -134,7 +137,8 @@ def _parse_pdf(self, file_handle):
table.table_number_on_page,
table.total_tables_on_page,
table.page_number,
table.total_pages)
table.total_pages,
)
yield NamedContent(name, table)

def close(self):
Expand Down
8 changes: 4 additions & 4 deletions setup.py
Original file line number Diff line number Diff line change
@@ -1,14 +1,14 @@
#!/usr/bin/env python3

# Template by pypi-mobans
import os
import sys
import codecs
import locale
# Template by pypi-mobans
import os
import platform
import sys
from shutil import rmtree

from setuptools import Command, setup, find_packages
from setuptools import Command, find_packages, setup

PY2 = sys.version_info[0] == 2
PY26 = PY2 and sys.version_info[1] < 7
Expand Down
37 changes: 30 additions & 7 deletions tests/test_pdfr.py
Original file line number Diff line number Diff line change
@@ -1,20 +1,43 @@
import os
from nose.tools import eq_

import pyexcel as p
from nose.tools import eq_


def test_simple_pdf():
book = p.get_book(file_name=get_fixtures('simple.pdf'))
book = p.get_book(file_name=get_fixtures("simple.pdf"))
eq_(book.number_of_sheets(), 1)
eq_(book[0].name, 'Table 1 of 1 on page 1 of 1')
eq_(book[0].name, "Table 1 of 1 on page 1 of 1")


def test_complex_pdf():
book = p.get_book(file_name=get_fixtures('CBP-7857.pdf'))
book = p.get_book(file_name=get_fixtures("CBP-7857.pdf"))
peer_look = [
u'16 Higher education', u'stude', u'nt', u'numb', u'ers',
'', '', '', '', '', '', '', '', '', '', '', '', '', '',
'', '', '', '', '']
u"16 Higher education",
u"stude",
u"nt",
u"numb",
u"ers",
"",
"",
"",
"",
"",
"",
"",
"",
"",
"",
"",
"",
"",
"",
"",
"",
"",
"",
"",
]
eq_(book.number_of_sheets(), 5)
eq_(book.Table_1_of_1_on_page_16_of_17.row[0], peer_look)

Expand Down

0 comments on commit e170aee

Please sign in to comment.