Skip to content

Commit

Permalink
Updated build to support non-WASM version
Browse files Browse the repository at this point in the history
  • Loading branch information
Balearica committed Aug 21, 2023
1 parent 99f1078 commit 24dbf22
Show file tree
Hide file tree
Showing 20 changed files with 820 additions and 768 deletions.
18 changes: 10 additions & 8 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -6,6 +6,16 @@ tesseract.js-core
Core part of [tesseract.js](https://github.com/naptha/tesseract.js), which compiles original tesseract from C to JavaScript WebAssembly.


## Compiling

To build tesseract-core.js by yourself, please install [docker](https://www.docker.com/) and run:

```
$ bash build-with-docker.sh
```

The generated files will be stored in root path. When compiling, errors sometimes occur due to race conditions (some dependencies do not appear to compile properly in parallel). Re-running generally resolves.

## Structure

1. Build scripts are in `build-scripts` folder
Expand Down Expand Up @@ -48,11 +58,3 @@ As we leverage git-submodule to manage dependencies, remember to add recursive w
```
$ git clone --recursive https://github.com/naptha/tesseract.js-core
```

To build tesseract-core.js by yourself, please install [docker](https://www.docker.com/) and run:

```
$ bash build-with-docker.sh
```

The generated files will be stored in root path.
6 changes: 3 additions & 3 deletions build-scripts/build-giflib.sh
Original file line number Diff line number Diff line change
Expand Up @@ -10,9 +10,9 @@ CONF_FLAGS=(
)

echo "CONF_FLAGS=${CONF_FLAGS[@]}"
(cd $LIB_PATH && autoreconf -f -i && emconfigure ./configure -C "${CONF_FLAGS[@]}")
if [ $BUILD_CLEAN = 1 ]
then
emmake make -C $LIB_PATH clean
make -C $LIB_PATH distclean || echo "Failed to run make -C $LIB_PATH distclean"
fi
emmake make -C $LIB_PATH install -j$PROC
(cd $LIB_PATH && autoreconf -f -i && $CONFIGURE_CMD ./configure -C "${CONF_FLAGS[@]}")
$MAKE_CMD -C $LIB_PATH install -j$PROC
23 changes: 14 additions & 9 deletions build-scripts/build-leptonica.sh
Original file line number Diff line number Diff line change
Expand Up @@ -6,24 +6,29 @@ source $(dirname $0)/var.sh
LIB_PATH=third_party/leptonica
CXXFLAGS="-I$INCLUDE_DIR $OPTIM_FLAGS"
CM_FLAGS=(
-DBUILD_SHARED_LIBS=OFF
-DCMAKE_INSTALL_PREFIX=$BUILD_DIR
-DCMAKE_TOOLCHAIN_FILE=$TOOLCHAIN_FILE
-DCMAKE_PREFIX_PATH=$BUILD_DIR
-DGIF_LIBRARY=$LIB_DIR
-DGIF_LIBRARY=$LIB_DIR/libgif.a
-DGIF_INCLUDE_DIR=$INCLUDE_DIR
-DZLIB_LIBRARY=$LIB_DIR
-DZLIB_LIBRARY=$LIB_DIR/libz.a
-DZLIB_INCLUDE_DIR=$INCLUDE_DIR
-DPNG_LIBRARY=$LIB_DIR
-DPNG_LIBRARY=$LIB_DIR/libpng.a
-DPNG_PNG_INCLUDE_DIR=$INCLUDE_DIR
-DJPEG_LIBRARY=$LIB_DIR
-DJPEG_LIBRARY=$LIB_DIR/libjpeg.a
-DJPEG_INCLUDE_DIR=$INCLUDE_DIR
-DTIFF_LIBRARY=$LIB_DIR
-DTIFF_LIBRARY=$LIB_DIR/libtiff.a
-DTIFF_INCLUDE_DIR=$INCLUDE_DIR
-DWEBP_LIBRARY=$LIB_DIR
-DWEBP_LIBRARY=$LIB_DIR/libwebp.a
-DWEBP_INCLUDE_DIR=$INCLUDE_DIR
-DWEBPMUX_INCLUDE_DIR=$INCLUDE_DIR
-DHAVE_LIBJP2K=0
)

if [ $BUILD_WASM = 1 ]; then
CM_FLAGS+=(-DCMAKE_TOOLCHAIN_FILE=$TOOLCHAIN_FILE)
fi

echo "CM_FLAGS=${CM_FLAGS[@]}"

cd $LIB_PATH
Expand All @@ -33,6 +38,6 @@ then
fi
mkdir -p build
cd build
emmake cmake .. -DCMAKE_C_FLAGS="$CXXFLAGS" ${CM_FLAGS[@]}
emmake make install -j$PROC
$CMAKE_CMD .. -DCMAKE_C_FLAGS="$CXXFLAGS" ${CM_FLAGS[@]}
$MAKE_CMD install -j$PROC
cd $ROOT_DIR
12 changes: 8 additions & 4 deletions build-scripts/build-libjpeg.sh
Original file line number Diff line number Diff line change
Expand Up @@ -7,9 +7,13 @@ LIB_PATH=third_party/libjpeg
CXXFLAGS="$OPTIM_FLAGS"
CM_FLAGS=(
-DCMAKE_INSTALL_PREFIX=$BUILD_DIR
-DCMAKE_TOOLCHAIN_FILE=$TOOLCHAIN_FILE
-DBUILD_SHARED_LIBS=OFF
)

if [ $BUILD_WASM = 1 ]; then
export CM_FLAGS+=(-DCMAKE_TOOLCHAIN_FILE=$TOOLCHAIN_FILE)
fi

echo "CM_FLAGS=${CM_FLAGS[@]}"

cd $LIB_PATH
Expand All @@ -19,10 +23,10 @@ then
fi
mkdir -p build
cd build
emmake cmake .. -DCMAKE_C_FLAGS="$CXXFLAGS" ${CM_FLAGS[@]}
$CMAKE_CMD .. -DCMAKE_C_FLAGS="$CXXFLAGS" ${CM_FLAGS[@]}
if [ $BUILD_CLEAN = 1 ]
then
emmake make clean
$MAKE_CMD clean
fi
emmake make install -j$PROC
$MAKE_CMD install -j$PROC
cd $ROOT_DIR
6 changes: 3 additions & 3 deletions build-scripts/build-libpng.sh
Original file line number Diff line number Diff line change
Expand Up @@ -23,10 +23,10 @@ then
fi
mkdir -p build
cd build
emmake cmake .. -DCMAKE_C_FLAGS="$CXXFLAGS" ${CM_FLAGS[@]}
$CMAKE_CMD .. -DCMAKE_C_FLAGS="$CXXFLAGS" ${CM_FLAGS[@]}
if [ $BUILD_CLEAN = 1 ]
then
emmake make clean
$MAKE_CMD clean
fi
emmake make install -j$PROC
$MAKE_CMD install -j$PROC
cd $ROOT_DIR
7 changes: 4 additions & 3 deletions build-scripts/build-libtiff.sh
Original file line number Diff line number Diff line change
Expand Up @@ -10,9 +10,10 @@ CONF_FLAGS=(
)

echo "CONF_FLAGS=${CONF_FLAGS[@]}"
(cd $LIB_PATH && autoreconf -f -i && emconfigure ./configure -C "${CONF_FLAGS[@]}")

if [ $BUILD_CLEAN = 1 ]
then
emmake make -C $LIB_PATH clean
make -C $LIB_PATH distclean || echo "Failed to run make -C $LIB_PATH distclean"
fi
emmake make -C $LIB_PATH install -j$PROC
(cd $LIB_PATH && autoreconf -f -i && $CONFIGURE_CMD ./configure -C "${CONF_FLAGS[@]}")
$MAKE_CMD -C $LIB_PATH install -j$PROC
23 changes: 14 additions & 9 deletions build-scripts/build-libwebp.sh
Original file line number Diff line number Diff line change
@@ -1,24 +1,24 @@
#!/bin/bash

set -euo pipefail
echo $(dirname $0)/var.sh
source $(dirname $0)/var.sh

LIB_PATH=third_party/libwebp
CXXFLAGS="-I$INCLUDE_DIR $OPTIM_FLAGS"
LDFLAGS="-L$LIB_DIR"
CM_FLAGS=(
-DCMAKE_INSTALL_PREFIX=$BUILD_DIR
-DCMAKE_TOOLCHAIN_FILE=$TOOLCHAIN_FILE
-DBUILD_SHARED_LIBS=OFF
-DZLIB_LIBRARY=$LIB_DIR
-DZLIB_LIBRARY=$LIB_DIR/libz.a
-DZLIB_INCLUDE_DIR=$INCLUDE_DIR
-DPNG_LIBRARY=$LIB_DIR
-DPNG_LIBRARY=$LIB_DIR/libpng.a
-DPNG_PNG_INCLUDE_DIR=$INCLUDE_DIR
-DJPEG_LIBRARY=$LIB_DIR
-DJPEG_LIBRARY=$LIB_DIR/libjpeg.a
-DJPEG_INCLUDE_DIR=$INCLUDE_DIR
-DTIFF_LIBRARY=$LIB_DIR
-DTIFF_LIBRARY=$LIB_DIR/libtiff.a
-DTIFF_INCLUDE_DIR=$INCLUDE_DIR
-DGIF_LIBRARY=$LIB_DIR
-DGIF_LIBRARY=$LIB_DIR/libgif.a
-DGIF_INCLUDE_DIR=$INCLUDE_DIR
-DWEBP_ENABLE_SIMD=OFF
-DWEBP_BUILD_CWEBP=OFF
Expand All @@ -32,6 +32,11 @@ CM_FLAGS=(
-DWEBP_BUILD_ANIM_UTILS=OFF
-DWEBP_BUILD_EXTRAS=OFF
)

if [ $BUILD_WASM = 1 ]; then
export CM_FLAGS+=(-DCMAKE_TOOLCHAIN_FILE=$TOOLCHAIN_FILE)
fi

echo "CM_FLAGS=${CM_FLAGS[@]}"

cd $LIB_PATH
Expand All @@ -41,10 +46,10 @@ then
fi
mkdir -p build
cd build
emmake cmake .. -DCMAKE_C_FLAGS="$CXXFLAGS" ${CM_FLAGS[@]}
cmake .. -DCMAKE_C_FLAGS="$CXXFLAGS" ${CM_FLAGS[@]}
if [ $BUILD_CLEAN = 1 ]
then
emmake make clean
make clean
fi
emmake make install -j$PROC
make install -j$PROC
cd $ROOT_DIR
4 changes: 2 additions & 2 deletions build-scripts/build-openlibm.sh
Original file line number Diff line number Diff line change
Expand Up @@ -6,6 +6,6 @@ source $(dirname $0)/var.sh
LIB_PATH=third_party/openlibm
if [ $BUILD_CLEAN = 1 ]
then
emmake make -C $LIB_PATH clean
$MAKE_CMD -C $LIB_PATH clean
fi
emmake make -C $LIB_PATH prefix=$BUILD_DIR install -j$PROC
$MAKE_CMD -C $LIB_PATH prefix=$BUILD_DIR install-static -j$PROC
25 changes: 20 additions & 5 deletions build-scripts/build-tesseract.sh
Original file line number Diff line number Diff line change
Expand Up @@ -6,9 +6,17 @@ LIB_PATH=third_party/tesseract
CXXFLAGS="$OPTIM_FLAGS"
CM_FLAGS=(
-DCMAKE_PREFIX_PATH=$BUILD_DIR
-DCMAKE_TOOLCHAIN_FILE=$TOOLCHAIN_FILE
-DLeptonica_DIR=../leptonica/build
-DOPENMP_BUILD=OFF
-DBUILD_TRAINING_TOOLS=OFF
-DGRAPHICS_DISABLED=ON
)

if [ $BUILD_WASM = 1 ]; then
CM_FLAGS+=(-DCMAKE_TOOLCHAIN_FILE=$TOOLCHAIN_FILE)
CM_FLAGS+=(-DWASM_BUILD=ON)
fi

echo "CM_FLAGS=${CM_FLAGS[@]}"

cd $LIB_PATH
Expand All @@ -18,7 +26,14 @@ then
fi
mkdir -p build
cd build
emmake cmake .. -DCMAKE_CXX_FLAGS="$CXXFLAGS" ${CM_FLAGS[@]} -D HAVE_SSE4_1=1
emmake make -j${PROC}
emmake cmake .. -DCMAKE_CXX_FLAGS="$CXXFLAGS" ${CM_FLAGS[@]} -D HAVE_SSE4_1=0
emmake make -j${PROC}

## For the .wasm version, a version is built with SIMD enabled and disabled.
if [ $BUILD_WASM = 1 ]; then
$CMAKE_CMD .. -DCMAKE_CXX_FLAGS="$CXXFLAGS" ${CM_FLAGS[@]} -D HAVE_SSE4_1=1
$MAKE_CMD -j${PROC}
$CMAKE_CMD .. -DCMAKE_CXX_FLAGS="$CXXFLAGS" ${CM_FLAGS[@]} -D HAVE_SSE4_1=0
$MAKE_CMD -j${PROC}
else
$CMAKE_CMD .. -DCMAKE_CXX_FLAGS="$CXXFLAGS" ${CM_FLAGS[@]}
$MAKE_CMD -j${PROC}
fi
15 changes: 10 additions & 5 deletions build-scripts/build-zlib.sh
Original file line number Diff line number Diff line change
Expand Up @@ -7,23 +7,28 @@ LIB_PATH=third_party/zlib
CXXFLAGS="$OPTIM_FLAGS"
CM_FLAGS=(
-DCMAKE_INSTALL_PREFIX=$BUILD_DIR
-DCMAKE_TOOLCHAIN_FILE=$TOOLCHAIN_FILE
-DBUILD_SHARED_LIBS=OFF
-DSKIP_INSTALL_FILES=ON
)

if [ $BUILD_WASM = 1 ]; then
export CM_FLAGS+=(-DCMAKE_TOOLCHAIN_FILE=$TOOLCHAIN_FILE)
fi

echo "CM_FLAGS=${CM_FLAGS[@]}"

cd $LIB_PATH
if [ $BUILD_CLEAN = 1 ]
then
rm -rf build zconf.h
rm -rf build
fi
mkdir -p build
cd build
emmake cmake .. -DCMAKE_C_FLAGS="$CXXFLAGS" ${CM_FLAGS[@]}
$CMAKE_CMD .. -DCMAKE_C_FLAGS="$CXXFLAGS" ${CM_FLAGS[@]}
if [ $BUILD_CLEAN = 1 ]
then
emmake make clean
$MAKE_CMD clean
fi
emmake make install -j${PROC}
## Only using 1 thread for now, as errors are often thrown when more are used
$MAKE_CMD install -j1
cd $ROOT_DIR
28 changes: 22 additions & 6 deletions build-scripts/var.sh
Original file line number Diff line number Diff line change
Expand Up @@ -4,24 +4,38 @@

set -euo pipefail

## Build to .wasm
BUILD_WASM=1

# Include llvm binaries
export PATH=$PATH:$EMSDK/upstream/bin
if [ $BUILD_WASM = 1 ]; then
export PATH=$PATH:$EMSDK/upstream/bin
fi

if [ $BUILD_WASM = 1 ]; then
export CONFIGURE_CMD="emconfigure"
export MAKE_CMD="emmake make"
export CMAKE_CMD="emmake cmake"
else
export CONFIGURE_CMD=""
export MAKE_CMD="make"
export CMAKE_CMD="cmake"
fi


# Build everything from scratch (rather than any incremental changes)
# This should always be set to 1 in the Git repo, and buils should always be run with BUILD_CLEAN=1 before pushing.
# However, it reduces compile time during development to set BUILD_CLEAN=0.
BUILD_CLEAN=0
BUILD_CLEAN=1

# Number of processes
PROC=$(($(cat /proc/cpuinfo | awk '/^processor/{print $3}' | wc -l)-1))

# Flags for code optimization, focus on speed instead
# of size
OPTIM_FLAGS=(
-O3
)

if [[ "$OSTYPE" == "linux-gnu"* ]]; then
if [[ "$OSTYPE" == "linux-gnu"* ]] && [ $BUILD_WASM = 1 ]; then
# Use closure complier only in linux environment
OPTIM_FLAGS=(
"${OPTIM_FLAGS[@]}"
Expand All @@ -44,7 +58,9 @@ INCLUDE_DIR=$BUILD_DIR/include
EM_PKG_CONFIG_PATH=$BUILD_DIR/lib/pkgconfig

# Toolchain file path for cmake
TOOLCHAIN_FILE=$EMSDK/upstream/emscripten/cmake/Modules/Platform/Emscripten.cmake
if [ $BUILD_WASM = 1 ]; then
TOOLCHAIN_FILE=$EMSDK/upstream/emscripten/cmake/Modules/Platform/Emscripten.cmake
fi

CFLAGS="-I$BUILD_DIR/include $OPTIM_FLAGS"
CXXFLAGS="-I$BUILD_DIR/include $OPTIM_FLAGS"
Expand Down
3 changes: 1 addition & 2 deletions build-with-docker.sh
Original file line number Diff line number Diff line change
Expand Up @@ -4,9 +4,8 @@ set -euo pipefail

EM_VERSION=3.1.18

docker pull emscripten/emsdk:$EM_VERSION
# docker pull emscripten/emsdk:$EM_VERSION
docker run \
--rm \
-v $PWD:/src \
-v $PWD/wasm/cache:/emsdk/upstream/emscripten/cache \
emscripten/emsdk:$EM_VERSION \
Expand Down
20 changes: 10 additions & 10 deletions build.sh
Original file line number Diff line number Diff line change
Expand Up @@ -6,25 +6,25 @@ SCRIPT_ROOT=$(dirname $0)/build-scripts

# verify Emscripten version
emcc -v
# install dependencies
echo "installing dependencies"
$SCRIPT_ROOT/install-deps.sh
# build zlib
echo "building zlib"
$SCRIPT_ROOT/build-zlib.sh
# build libtiff
echo "building libtiff"
$SCRIPT_ROOT/build-libtiff.sh
# build openlibm
echo "building openlibm"
$SCRIPT_ROOT/build-openlibm.sh
# build giflib
echo "building giflib"
$SCRIPT_ROOT/build-giflib.sh
# build libpng
echo "building libpng"
$SCRIPT_ROOT/build-libpng.sh
# build libjpeg
echo "building libjpeg"
$SCRIPT_ROOT/build-libjpeg.sh
# build libwebp
echo "building libwebp"
$SCRIPT_ROOT/build-libwebp.sh
# build leptonica
echo "building leptonica"
$SCRIPT_ROOT/build-leptonica.sh
# build tesseract
echo "building tesseract"
$SCRIPT_ROOT/build-tesseract.sh

cp ./third_party/tesseract/build/bin/* .
Loading

0 comments on commit 24dbf22

Please sign in to comment.