From bb0028f5641dea737c36b436f5a871491ace0520 Mon Sep 17 00:00:00 2001 From: "mathieu.brunot" Date: Tue, 5 Nov 2019 17:38:48 +0100 Subject: [PATCH 1/4] :green_heart: Export build var from travis Signed-off-by: mathieu.brunot --- .travis.yml | 3 +++ test/docker-compose.mariadb.yml | 4 ++-- test/docker-compose.postgres.yml | 4 ++-- 3 files changed, 7 insertions(+), 4 deletions(-) diff --git a/.travis.yml b/.travis.yml index 6d65d0e5..2996fedc 100644 --- a/.travis.yml +++ b/.travis.yml @@ -14,6 +14,8 @@ before_script: - env | sort - dir="test" - export IMAGE_NAME=docker-erpnext-ext:erpnext_ocr-travis + - export BUILD_BRANCH=${TRAVIS_PULL_REQUEST_BRANCH:-${TRAVIS_BRANCH}} + - export BUILD_URL=https://github.com/${TRAVIS_REPO_SLUG} script: - cd "$dir" @@ -41,6 +43,7 @@ script: - docker-compose -f docker-compose.${DATABASE}.yml logs "erpnext_web" - docker-compose -f docker-compose.${DATABASE}.yml ps "erpnext_web" | grep "Up" - echo 'Wait until test finished (1-2 minutes)' && sleep 90 + - docker-compose -f docker-compose.${DATABASE}.yml logs "sut" - docker-compose -f docker-compose.${DATABASE}.yml ps "sut" | grep "Exit 0" # Test container restart - docker-compose -f docker-compose.${DATABASE}.yml down diff --git a/test/docker-compose.mariadb.yml b/test/docker-compose.mariadb.yml index b6fd0f7c..220f370f 100644 --- a/test/docker-compose.mariadb.yml +++ b/test/docker-compose.mariadb.yml @@ -38,8 +38,8 @@ services: context: ./ dockerfile: Dockerfile.${VARIANT} args: - - BUILD_BRANCH=${TRAVIS_BRANCH} - - BUILD_URL=https://github.com/${TRAVIS_REPO_SLUG} + - BUILD_BRANCH=${BUILD_BRANCH} + - BUILD_URL=${BUILD_URL} image: ${IMAGE_NAME} container_name: erpnext_app command: app diff --git a/test/docker-compose.postgres.yml b/test/docker-compose.postgres.yml index 1315d245..a12bc306 100644 --- a/test/docker-compose.postgres.yml +++ b/test/docker-compose.postgres.yml @@ -38,8 +38,8 @@ services: context: ./ dockerfile: Dockerfile.${VARIANT} args: - - BUILD_BRANCH=${TRAVIS_BRANCH} - - BUILD_URL=https://github.com/${TRAVIS_REPO_SLUG} + - BUILD_BRANCH=${BUILD_BRANCH} + - BUILD_URL=${BUILD_URL} image: ${IMAGE_NAME} container_name: erpnext_app #restart: always From fe748dcb90e80cd01782ae46e0724150a29ca2d8 Mon Sep 17 00:00:00 2001 From: "mathieu.brunot" Date: Tue, 5 Nov 2019 17:48:32 +0100 Subject: [PATCH 2/4] :green_heart: Fix build URL for travis Signed-off-by: mathieu.brunot --- .travis.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.travis.yml b/.travis.yml index 2996fedc..9a2f1d86 100644 --- a/.travis.yml +++ b/.travis.yml @@ -15,7 +15,7 @@ before_script: - dir="test" - export IMAGE_NAME=docker-erpnext-ext:erpnext_ocr-travis - export BUILD_BRANCH=${TRAVIS_PULL_REQUEST_BRANCH:-${TRAVIS_BRANCH}} - - export BUILD_URL=https://github.com/${TRAVIS_REPO_SLUG} + - export BUILD_URL=https://github.com/${TRAVIS_PULL_REQUEST_SLUG:-${TRAVIS_REPO_SLUG}} script: - cd "$dir" From 309dde69406c9359850564d66b92eb75a38e3942 Mon Sep 17 00:00:00 2001 From: "mathieu.brunot" Date: Wed, 6 Nov 2019 21:28:45 +0100 Subject: [PATCH 3/4] :memo: Improve README documentation Signed-off-by: mathieu.brunot --- README.md | 88 +++++++++++++++++++++++++++++++++++++++---------------- 1 file changed, 63 insertions(+), 25 deletions(-) diff --git a/README.md b/README.md index 9ec39f0a..e9a79be9 100644 --- a/README.md +++ b/README.md @@ -8,45 +8,38 @@ ## ERPNext OCR -OCR with [tesseract](https://github.com/tesseract-ocr/tesseract). +> :alembic: **Experimental** Frappe OCR application with [tesseract](https://github.com/tesseract-ocr/tesseract). -#### License +This project is a fork of [ERPNext-OCR](https://github.com/jvfiel/ERPNext-OCR) by [John Vincent Fiel](https://github.com/jvfiel). Its aim is to fix and cleanup the original source code and add some new features. -MIT# ERPNext-OCR +https://discuss.erpnext.com/t/erpnext-ocr-app/33834/7 -## About this project -This project is a fork of [ERPNext-OCR](https://github.com/jvfiel/ERPNext-OCR) by John Vincent Fiel. -Its aim is to fix and cleanup the original source code and add some new features. +## :chart_with_upwards_trend: Changes -**Changes** -* See [CHANGELOG](./CHANGELOG.md) +See [CHANGELOG](./CHANGELOG.md) -**Roadmap** -* See [Taiga.io](https://tree.taiga.io/project/monogrammbot-monogrammerpnext_ocr/ "Taiga.io monogrammbot-monogrammerpnext_ocr") +## :bookmark: Roadmap +See [Taiga.io](https://tree.taiga.io/project/monogrammbot-monogrammerpnext_ocr/ "Taiga.io monogrammbot-monogrammerpnext_ocr") -## Sample Screenshot -![Sample Screenshot](https://github.com/jvfiel/ERPNext-OCR/blob/master/erpnext_ocr/erpnext_ocr/Selection_046.png) -## File Being Read -![Sample Screenshot 2](https://github.com/jvfiel/ERPNext-OCR/blob/master/erpnext_ocr/erpnext_ocr/Selection_047.png) +## :construction: Install +**Pre-requisites: tesseract-python and imagemagick** -## Pre-requisites: tesseract-python and imagemagick - -- Install tesseract-ocr, plus imagemagick and ghostscript (to work with pdf files) using this command on Debian: +Install tesseract-ocr, plus imagemagick and ghostscript (to work with pdf files) using this command on Debian: ``` sudo apt-get install tesseract-ocr imagemagick libmagickwand-dev ghostscript ``` -## Installation +**Install Frappe application** - ``` - bench get-app --branch develop erpnext_ocr https://github.com/Monogramm/erpnext_ocr - bench install-app erpnext_ocr - ``` +```sh +bench get-app --branch develop erpnext_ocr https://github.com/Monogramm/erpnext_ocr +bench install-app erpnext_ocr +``` When installing Frappe app, the following python requirements will be installed: * python binding for tesseract, [pytesseract](https://pypi.org/project/pytesseract/) @@ -54,12 +47,23 @@ When installing Frappe app, the following python requirements will be installed: * HTTP library in python, [requests](https://pypi.org/project/requests/) * python binding for imagemagick, [wand](https://pypi.org/project/Wand/) -## Tesseract trained data +## :rocket: Usage + +**Sample Screenshot**: + +![Sample Screenshot](./erpnext_ocr/erpnext_ocr/Selection_046.png) + + +**File Being Read**: + +![Sample Screenshot 2](./erpnext_ocr/erpnext_ocr/Selection_047.png) + +### Tesseract trained data In order to use OCR with different languages, you need to install the appropriate trained data files. -Check tesseract Wiki for details https://github.com/tesseract-ocr/tesseract/wiki/Data-Files +Check tesseract Wiki for details: https://github.com/tesseract-ocr/tesseract/wiki/Data-Files -## Known issues +### Known issues * `wand.exceptions.PolicyError: not authorized '/opt/sample.pdf' @ error/constitute.c/ReadImage/412` * This can happen due to security configuration in imagemagick, preventing it to read PDF files. @@ -73,3 +77,37 @@ Check tesseract Wiki for details https://github.com/tesseract-ocr/tesseract/wiki * `OSError: encoder error -2 when writing image file` * This might happen when trying to open a TIFF image, but the real error is "_hidden_" and only displayed in console. * If the original error in console is `Fax3SetupState: Bits/sample must be 1 for Group 3/4 encoding/decoding.` that usually happens when TIFF image compression is not valid / recognized. + +## :white_check_mark: Run tests + +```sh +bench bench run-tests --profile --app erpnext_autoinstall +``` + +## :bust_in_silhouette: Authors + +**Monogramm** + +* Website: https://www.monogramm.io +* Github: [@Monogramm](https://github.com/Monogramm) + +**John Vincent Fiel** + +* Github: [@jvfiel](https://github.com/jvfiel) + +## :handshake: Contributing + +Contributions, issues and feature requests are welcome!
Feel free to check [issues page](https://github.com/Monogramm/erpnext_ocr/issues). +[Check the contributing guide](./CONTRIBUTING.md).
+ +## :thumbsup: Show your support + +Give a :star: if this project helped you! + +## :page_facing_up: License + +Copyright © 2019 [Monogramm](https://github.com/Monogramm).
+This project is [MIT](uri_license) licensed. + +*** +_This README was generated with :heart: by [readme-md-generator](https://github.com/kefranabg/readme-md-generator)_ From 4edfee72954a6198af57a6647252fb950e5d80bb Mon Sep 17 00:00:00 2001 From: "mathieu.brunot" Date: Wed, 6 Nov 2019 23:30:31 +0100 Subject: [PATCH 4/4] :bookmark: Set version and update changelog Signed-off-by: mathieu.brunot --- CHANGELOG.md | 11 +++++++++++ erpnext_ocr/__init__.py | 2 +- 2 files changed, 12 insertions(+), 1 deletion(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index b3030730..ef540a83 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -5,6 +5,17 @@ The format is based on [Keep a Changelog](https://keepachangelog.com/en/1.0.0/), and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0.html). ## [Unreleased] +### Added + +### Changed + +### Fixed + +### Removed + + +## [0.9.0] - 2019-11-06 + ### Added - PDF management in `OCR Read` - `OCR Language` to manage available tesseract traindata files diff --git a/erpnext_ocr/__init__.py b/erpnext_ocr/__init__.py index 95d1338e..32840add 100644 --- a/erpnext_ocr/__init__.py +++ b/erpnext_ocr/__init__.py @@ -1,5 +1,5 @@ # -*- coding: utf-8 -*- from __future__ import unicode_literals -__version__ = '0.0.1' +__version__ = '0.9.0'