Skip to content

Commit

Permalink
eux-pdf-flattener
Browse files Browse the repository at this point in the history
  • Loading branch information
walberg committed Oct 18, 2024
1 parent 6320d78 commit 8201fc2
Show file tree
Hide file tree
Showing 6 changed files with 27 additions and 211 deletions.
6 changes: 3 additions & 3 deletions .nais/03-start-pm2-gulp.sh
Original file line number Diff line number Diff line change
Expand Up @@ -25,7 +25,9 @@ pm2 start google-chrome \
--disable-crash-reporter \
--no-crashpad \
--disable-gpu \
--disable-extensions
--disable-extensions \
--run-all-compositor-stages-before-draw \
--disable-infobars

# remove root and sandbox-setting
echo "Copying"
Expand All @@ -45,5 +47,3 @@ cd /app

echo "$PWD"
ls /app
# npx gulp server
# tail -f /dev/null
37 changes: 5 additions & 32 deletions Dockerfile
Original file line number Diff line number Diff line change
Expand Up @@ -52,26 +52,20 @@ RUN wget https://dl.google.com/linux/direct/google-chrome-stable_current_amd64.d

RUN apt-get update && apt -y install ./google-chrome-stable_current_amd64.deb

# RUN wget https://mirror.cs.uchicago.edu/google-chrome/pool/main/g/google-chrome-stable/google-chrome-stable_129.0.6668.89-1_amd64.deb
# RUN apt-get update && apt -y install ./google-chrome-stable_129.0.6668.89-1_amd64.deb

RUN mkdir /home/apprunner
RUN mkdir pdf.js
RUN chmod 777 /home/apprunner
RUN chmod 777 pdf.js
RUN cd pdf.js && npm install -g gulp-cli
RUN npm install -g -save html-pdf-chrome
# RUN npm install -g chromedriver
RUN npm install -g pm2
RUN npm install -g -save pdfjs-dist@3.11.174
COPY ./print.js ./xfa.pdf ./count.js ./medical.pdf /
COPY ./count.js /
RUN mkdir tmppdf
RUN chmod 777 tmppdf
COPY ./xfa.pdf tmppdf/xfa.pdf
RUN chmod 777 /print.js
RUN chmod 777 /count.js
# RUN chmod 777 /chrome.sh
# RUN chmod 777 /entrypoint.sh
RUN chmod 777 /xfa.pdf
RUN chmod 777 /medical.pdf
RUN chmod 777 tmppdf/xfa.pdf
RUN chown apprunner pdf.js

# RUN ls -l
Expand All @@ -81,41 +75,20 @@ ENV PATH $NVM_DIR/versions/node/v$NODE_VERSION/bin:$PATH


RUN git clone https://github.com/mozilla/pdf.js.git
# RUN ls -l
RUN cd pdf.js && npm install

#RUN cd pdf.js && npm install
# RUN cd pdf.js && npm link module gulp

RUN cd pdf.js && mkdir out && mkdir in
# RUN cp tmppdf/xfa.pdf pdf.js/in/xfa.pdf
USER root
RUN cd pdf.js && npm link gulp
USER apprunner
RUN chmod -R 777 pdf.js
RUN google-chrome --version


# RUN file="$(ls -1 /usr/local/ -al)" && echo $file
# RUN file="$(ls -1 /usr/local/nvm/versions/node -al)" && echo $file





# ADD eux-pdf-flattener-webapp/target/eux-pdf-flattener.jar /app/app.jar

# CMD ["node" , "print.js"]
#ENTRYPOINT ["/entrypoint.sh"]
# CMD ["tail", "-f", "/dev/null"]
#CMD ["/chrome.sh"]
COPY ./.nais/03-start-pm2-gulp.sh /init-scripts/03-start-pm2-gulp.sh

COPY eux-pdf-flattener-webapp/target/eux-pdf-flattener.jar /app/app.jar
EXPOSE 8080
# EXPOSE 8888

WORKDIR /app

# ENTRYPOINT ["/entrypoint.sh"]

# CMD ["tail", "-f", "/dev/null"]
Original file line number Diff line number Diff line change
Expand Up @@ -13,42 +13,12 @@ import org.springframework.stereotype.Service
import java.io.*
import java.time.Duration
import java.util.*
import java.util.concurrent.TimeUnit


@Service
class FlattenPdfService(
val om: ObjectMapper,
) {
/*
val options = ChromeOptions()
.addArguments(
// "--headless=new",
"--headless",
"--disable-infobars",
"--disable-extensions",
"--disable-popup-blocking",
// "--remote-debugging-pipe",
"--run-all-compositor-stages-before-draw",
"--disable-gpu",
// "--virtual-time-budget=10000",
"--no-pdf-header-footer",
"--no-sandbox",
"--interpreter=none",
"--disable-translate",
"--disable-background-networking",
"--safebrowsing-disable-auto-update",
"--disable-sync",
"--metrics-recording-only",
"--disable-default-apps",
"--no-first-run",
"--mute-audio",
"--hide-scrollbars",
"--remote-debugging-port=9222",
"--disable-crash-reporter",
"--no-crashpad"
)
*/
val options = ChromeOptions()
.setExperimentalOption("debuggerAddress","127.0.0.1:9222")

Expand All @@ -58,39 +28,41 @@ class FlattenPdfService(
"paperHeight" to 11.69
)


val log = logger {}

val inPath= "/tmp/pdf.js/in/"
val outPath = "/tmp/pdf.js/out/"
val printJsPath = "/print.js"
val countJsPath = "/count.js"
val timeout = 55;
val charPool : List<Char> = ('a'..'z') + ('A'..'Z') + ('0'..'9')
val pdf = ".pdf"
val basePdfJsUrl = "http://localhost:8888/web/viewer.html"


fun flattenPdf(incomingPdf: ByteArray): ByteArray {
val randomName = randomStringByKotlinCollectionRandom()
val pdf = ".pdf"
File(inPath + randomName + pdf).writeBytes(incomingPdf)
val builder = ProcessBuilder().command("node", countJsPath, inPath + randomName + pdf)
val countProcess = builder.start()
val exitCode = countProcess.waitFor()
if (exitCode != 0) {
log.error { "Feilet å lese antall sider. Avsluttet med kode $exitCode" }
val completed = countProcess.waitFor(30L, TimeUnit.SECONDS)
if (!completed) {
log.error { "Timet ut å lese antall sider." }
throw IllegalArgumentException("Timet ut å lese antall sider.")
}
if (countProcess.exitValue() != 0) {
log.error { "Feilet å lese antall sider. Avsluttet med kode ${countProcess.exitValue()}" }
try {
val bufferedReader = BufferedReader(InputStreamReader(countProcess.errorStream))
var line : String? = null
do {
line = bufferedReader.readLine();
if (line != null) {
println(line)
log.info { "Count error log:\n$line" }
}

} while (line != null)
} catch (e: IOException) {
e.printStackTrace()
log.warn { "Feilet å parse error log fra count" }
}
throw IllegalArgumentException("Feilet å lese antall sider.")
}
val results = BufferedReader(InputStreamReader(countProcess.inputStream)).lines().toList()
var totalPages = 0
Expand All @@ -99,169 +71,47 @@ class FlattenPdfService(
log.info { "Count $totalPages" }
}
val driver: ChromeDriver = ChromeDriver(options)
val url = "http://localhost:8888/web/viewer.html?file=/in/$randomName$pdf#page=1"
val url = "$basePdfJsUrl?file=/in/$randomName$pdf#page=1"
driver.get(url)

for (i in 1..totalPages) {
log.info {"Processing page $i" }
// const url = 'http://localhost:8888/web/viewer.html?file=/in/' + args[0] + args[1] + '#page=' +args[2];
val url = "http://localhost:8888/web/viewer.html?file=/in/$randomName$pdf#page=$i"
val url = "$basePdfJsUrl?file=/in/$randomName$pdf#page=$i"
driver.navigate().to(url)
//Thread.sleep(10000L)
val wait: WebDriverWait =
WebDriverWait(driver, Duration.ofSeconds(20))
val wait: WebDriverWait = WebDriverWait(driver, Duration.ofSeconds(20))

/*
.withTimeout(Duration.ofSeconds(2))
.pollingEvery(Duration.ofMillis(300))
.ignoring(ElementNotInteractableException::class.java)
*/
/*
wait.until {
fun apply(driver: WebDriver): Boolean {
return (driver as JavascriptExecutor).executeScript("if (window.PDFViewerApplication.eventBus) { return true } else { return false } ") == "complete"
}
}
*/
// wait.until(ExpectedConditions.presenceOfElementLocated(By.xpath("//div[@class='textLayer']")))
// wait.until(ExpectedConditions.presenceOfElementLocated(By.xpath("//div[@id='outerContainer']")))
log.info { "Waiting for page $i"}
wait.until(ExpectedConditions.presenceOfElementLocated(By.xpath("//div[@data-page-number='$i']")))

// val executeScript = driver.executeScript("return window.PDFViewerApplication.eventBus")
// System.out.println("application: " + executeScript)
// val eventBus = driver.executeScript("return window.PDFViewerApplication.eventBus")
// System.out.println("eventbus: " + eventBus)
// val eventBusNotNull = driver.executeScript("if (window.PDFViewerApplication.eventBus) { return true } else { return false } ")
// System.out.println("eventbus check: " + eventBusNotNull)
// System.out.println("eventbus check type: " + eventBusNotNull.javaClass.name)
//System.out.println(driver.pageSource)
//driver.
//wait.until(ExpectedConditions.presenceOfElementLocated(By. .xpath(window.PDFViewerApplication.eventBus)))
/*
{ d: WebDriver? ->
revealed.sendKeys("Displayed")
true
}
//val mvm : MultiValueMap<String, String> = LinkedMultiValueMap(params);
*/
// window.PDFViewerApplication.eventBus.on('pagerendered', ...);

log.info { "Printing page $i "}
log.info { "Printing page $i to file"}
val output = driver.executeCdpCommand( "Page.printToPDF", printParams )
val filename = "$outPath$randomName$i$pdf"
val fileOutputStream: FileOutputStream = FileOutputStream(filename)
val byteArray = Base64.getDecoder().decode(output.get("data") as String)
fileOutputStream.write(byteArray)
fileOutputStream.close()
//driver.close()
}
driver.quit()

log.info { "Starting merge "}
log.info { "Starting page merge"}
val completeFilename = "$outPath$randomName$pdf"
// val completeFileOutputStream: FileOutputStream = FileOutputStream(completeFilename)
// completeFileOutputStream.write(byteArray)
// completeFileOutputStream.close()

val utility = PDFMergerUtility()
utility.destinationFileName = completeFilename

for (i in 1..totalPages) {
val filename = "$outPath$randomName$i$pdf"
log.info { "Adding $filename "}
log.info { "Adding $filename to merge"}
utility.addSource(filename)
}

// utility.addSource("testpg1.pdf")
// utility.addSource("testpg2.pdf")
log.info { "Finalizing merge "}
utility.mergeDocuments(MemoryUsageSetting.setupMixed(100_000_000L))


log.info { "Retrieving final file"}

val readBytes = File(completeFilename).readBytes()
return readBytes

/*
for (i in 1..count) {
val process = Runtime.getRuntime().exec(arrayOf("node", printJsPath, randomName, pdf, i.toString()))
val exitCode = process.waitFor()
try {
val bufferedReader = BufferedReader(InputStreamReader(process.inputStream))
var line : String? = null
do {
line = bufferedReader.readLine();
if (line != null) {
println(line)
}
} while (line != null)
} catch (e: IOException) {
e.printStackTrace()
}
if (exitCode == 0) {
val listFiles = File(outPath).listFiles()
listFiles.forEach { f -> log.info { "File " + f.canonicalPath } }
val readBytes = File(outPath + randomName + pdf).readBytes()
return readBytes
} else {
log.error { "Feilet å konvertere $randomName side $i. Avsluttet med kode $exitCode" }
try {
val bufferedReader = BufferedReader(InputStreamReader(process.errorStream))
var line : String? = null
do {
line = bufferedReader.readLine();
if (line != null) {
println(line)
}
} while (line != null)
} catch (e: IOException) {
e.printStackTrace()
}
throw IllegalArgumentException("Feilet å konvertere $randomName. Avsluttet med kode $exitCode")
}
}
*/
// return ByteArray(0)


// write to pdf.js/in
// call print.js
// read from pdf.js.out
// delete in and out


// val rinasak = hentRinsakNode(rinasakId)
// val kanSlettes = kanSlettes(rinasak)
// return incomingPdf
// return EuxRinasakStatus(kanSlettes = kanSlettes)
/*
val flattenerProperties: XFAFlattenerProperties = XFAFlattenerProperties()
.setPdfVersion(XFAFlattenerProperties.PDF_1_7)
.createXmpMetaData()
.setTagged()
.setMetaData(
MetaData()
.setAuthor("iText Samples")
.setLanguage("EN")
.setSubject("Showing off our flattening skills")
.setTitle("Flattened XFA")
)
val xfaf: XFAFlattener = XFAFlattener()
.setFlattenerProperties(flattenerProperties)
xfaf.flatten(FileInputStream("xfaform.pdf"), FileOutputStream("flat.pdf"))
*/
}

fun randomStringByKotlinCollectionRandom() = List(16) { charPool.random() }.joinToString("")
Expand Down
Binary file removed medical.pdf
Binary file not shown.
7 changes: 0 additions & 7 deletions pom.xml
Original file line number Diff line number Diff line change
Expand Up @@ -178,18 +178,11 @@
<artifactId>junit-jupiter-engine</artifactId>
<scope>test</scope>
</dependency>
<!-- dependency>
<groupId>io.github.bonigarcia</groupId>
<artifactId>webdrivermanager</artifactId>
<version>5.9.2</version>
</dependency -->
<!-- https://mvnrepository.com/artifact/org.seleniumhq.selenium/selenium-chrome-driver -->
<dependency>
<groupId>org.seleniumhq.selenium</groupId>
<artifactId>selenium-chrome-driver</artifactId>
<version>4.14.1</version>
</dependency>

<dependency>
<groupId>org.seleniumhq.selenium</groupId>
<artifactId>selenium-java</artifactId>
Expand Down
Binary file removed xfa.pdf
Binary file not shown.

0 comments on commit 8201fc2

Please sign in to comment.