forked from ad-freiburg/pdfact
-
Notifications
You must be signed in to change notification settings - Fork 0
/
Dockerfile
34 lines (23 loc) · 1.19 KB
/
Dockerfile
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
FROM ubuntu:20.04
WORKDIR /
RUN apt-get update -y && apt-get upgrade -y && apt-get install -y maven
COPY pdfact-cli ./pdfact-cli
COPY pdfact-api ./pdfact-api
COPY pdfact-core ./pdfact-core
COPY resources ./resources
COPY pom.xml .
RUN mvn install -DskipTests
EXPOSE 4567
# Define the entrypoint.
ENTRYPOINT ["java", "-cp", "/bin/pdfact.jar"]
CMD ["pdfact.api.PdfApi"]
# Build image.
# docker build -t pdfact .
# Get usage info.
# docker run --rm --name pdfact pdfact --help
# Extract all paragraphs from a single PDF and print the output to stdout.
# docker run --rm --name pdfact -v <path-to-pdf>:/input.pdf pdfact input.pdf
# Extract the body paragraphs from a single PDF, print the output to output.xml (in XML format)
# and create a visualization file.
# NOTE: In the command below, <path-to-xml-file> and <path-to-visualization-pdf-file> must be paths to existent files on the host because otherwise Docker creates and mounts directories (instead of files).
# docker run --rm --name pdfact -v <path-to-pdf>:/input.pdf -v <path-to-xml-file>:/output.xml -v <path-to-visualization-pdf-file>:/visualization.pdf pdfact input.pdf output.xml --include-roles body --visualize visualization.pdf --format xml