From 4a501369afc0f8ae4672de63b889bd882afa1a48 Mon Sep 17 00:00:00 2001 From: Josh Stegmaier <104993387+joshuastegmaier@users.noreply.github.com> Date: Thu, 20 Jun 2024 10:50:42 -0400 Subject: [PATCH] Install non-English languages with tesseract (#2421) --- .github/workflows/test.yml | 3 ++- Dockerfile | 2 +- 2 files changed, 3 insertions(+), 2 deletions(-) diff --git a/.github/workflows/test.yml b/.github/workflows/test.yml index 89f61a024..de9b6c208 100644 --- a/.github/workflows/test.yml +++ b/.github/workflows/test.yml @@ -72,7 +72,8 @@ jobs: run: | sudo apt-get update -qy && sudo apt-get dist-upgrade -qy && sudo apt-get install -qy \ libmemcached-dev libz-dev libfreetype6-dev libtiff-dev \ - libjpeg-dev libopenjp2-7-dev libwebp-dev zlib1g-dev libpq-dev + libjpeg-dev libopenjp2-7-dev libwebp-dev zlib1g-dev libpq-dev \ + tesseract-ocr tesseract-ocr-all - name: Install node and npm uses: actions/setup-node@v4 diff --git a/Dockerfile b/Dockerfile index e3b729e16..42e16d858 100644 --- a/Dockerfile +++ b/Dockerfile @@ -26,7 +26,7 @@ RUN apt-get update -qy && apt-get dist-upgrade -qy && apt-get install -o Dpkg::O # Weasyprint requirements libpango-1.0-0 libharfbuzz0b libpangoft2-1.0-0 \ # Tesseract - tesseract-ocr \ + tesseract-ocr tesseract-ocr-all \ nodejs node-gyp npm && apt-get -qy autoremove && apt-get -qy autoclean RUN locale-gen en_US.UTF-8