From 5f0c144643b32fceee1ec96cfe9fb8e2d1adb04a Mon Sep 17 00:00:00 2001 From: GihanAyesh Date: Wed, 27 Mar 2024 15:31:33 +0530 Subject: [PATCH] pdfbox bump --- components/apimgt/org.wso2.carbon.apimgt.impl/pom.xml | 4 ++++ .../apimgt/impl/indexing/indexer/DocumentIndexer.java | 6 +++--- .../carbon/apimgt/impl/indexing/indexer/PDFIndexer.java | 6 +++--- .../apimgt/impl/indexing/indexer/PDFIndexerTest.java | 5 ++++- .../pom.xml | 3 +++ pom.xml | 9 ++++++++- 6 files changed, 25 insertions(+), 8 deletions(-) diff --git a/components/apimgt/org.wso2.carbon.apimgt.impl/pom.xml b/components/apimgt/org.wso2.carbon.apimgt.impl/pom.xml index 3981b9c20973..100cac996b5d 100644 --- a/components/apimgt/org.wso2.carbon.apimgt.impl/pom.xml +++ b/components/apimgt/org.wso2.carbon.apimgt.impl/pom.xml @@ -228,6 +228,10 @@ org.apache.pdfbox pdfbox + + org.wso2.orbit.org.apache.pdfbox + pdfbox-io + org.wso2.carbon.event-processing diff --git a/components/apimgt/org.wso2.carbon.apimgt.impl/src/main/java/org/wso2/carbon/apimgt/impl/indexing/indexer/DocumentIndexer.java b/components/apimgt/org.wso2.carbon.apimgt.impl/src/main/java/org/wso2/carbon/apimgt/impl/indexing/indexer/DocumentIndexer.java index f19704cca1cf..6c690c958860 100644 --- a/components/apimgt/org.wso2.carbon.apimgt.impl/src/main/java/org/wso2/carbon/apimgt/impl/indexing/indexer/DocumentIndexer.java +++ b/components/apimgt/org.wso2.carbon.apimgt.impl/src/main/java/org/wso2/carbon/apimgt/impl/indexing/indexer/DocumentIndexer.java @@ -24,7 +24,7 @@ import org.apache.commons.logging.Log; import org.apache.commons.logging.LogFactory; import org.apache.pdfbox.cos.COSDocument; -import org.apache.pdfbox.io.RandomAccessBufferedFileInputStream; +import org.apache.pdfbox.io.RandomAccessReadBuffer; import org.apache.pdfbox.pdfparser.PDFParser; import org.apache.pdfbox.pdmodel.PDDocument; import org.apache.pdfbox.text.PDFTextStripper; @@ -181,9 +181,9 @@ private String fetchDocumentContent(Registry registry, Resource documentResource inputStream = contentResource.getContentStream(); switch (extension) { case APIConstants.PDF_EXTENSION: - PDFParser pdfParser = new PDFParser(new RandomAccessBufferedFileInputStream(inputStream)); + PDFParser pdfParser = new PDFParser(new RandomAccessReadBuffer(inputStream)); pdfParser.parse(); - COSDocument cosDocument = pdfParser.getDocument(); + COSDocument cosDocument = pdfParser.parse().getDocument(); PDFTextStripper stripper = new PDFTextStripper(); contentString = stripper.getText(new PDDocument(cosDocument)); break; diff --git a/components/apimgt/org.wso2.carbon.apimgt.impl/src/main/java/org/wso2/carbon/apimgt/impl/indexing/indexer/PDFIndexer.java b/components/apimgt/org.wso2.carbon.apimgt.impl/src/main/java/org/wso2/carbon/apimgt/impl/indexing/indexer/PDFIndexer.java index c65290cac015..803714535b9e 100644 --- a/components/apimgt/org.wso2.carbon.apimgt.impl/src/main/java/org/wso2/carbon/apimgt/impl/indexing/indexer/PDFIndexer.java +++ b/components/apimgt/org.wso2.carbon.apimgt.impl/src/main/java/org/wso2/carbon/apimgt/impl/indexing/indexer/PDFIndexer.java @@ -9,7 +9,7 @@ import org.apache.commons.logging.Log; import org.apache.commons.logging.LogFactory; -import org.apache.pdfbox.io.RandomAccessBufferedFileInputStream; +import org.apache.pdfbox.io.RandomAccessReadBuffer; import org.apache.solr.common.SolrException; import org.apache.solr.common.SolrException.ErrorCode; import org.apache.pdfbox.cos.COSDocument; @@ -30,7 +30,7 @@ public IndexDocument getIndexedDocument(File2Index fileData) throws SolrExceptio try { PDFParser parser = getPdfParser(fileData); parser.parse(); - cosDoc = parser.getDocument(); + cosDoc = parser.parse().getDocument(); PDFTextStripper stripper = getPdfTextStripper(); String docText = stripper.getText(new PDDocument(cosDoc)); @@ -70,7 +70,7 @@ protected PDFTextStripper getPdfTextStripper() throws IOException { } protected PDFParser getPdfParser(File2Index fileData) throws IOException { - return new PDFParser(new RandomAccessBufferedFileInputStream(new ByteArrayInputStream(fileData.data))); + return new PDFParser(new RandomAccessReadBuffer(new ByteArrayInputStream(fileData.data))); } } diff --git a/components/apimgt/org.wso2.carbon.apimgt.impl/src/test/java/org/wso2/carbon/apimgt/impl/indexing/indexer/PDFIndexerTest.java b/components/apimgt/org.wso2.carbon.apimgt.impl/src/test/java/org/wso2/carbon/apimgt/impl/indexing/indexer/PDFIndexerTest.java index 64fc21774e6a..2d0f200f7868 100644 --- a/components/apimgt/org.wso2.carbon.apimgt.impl/src/test/java/org/wso2/carbon/apimgt/impl/indexing/indexer/PDFIndexerTest.java +++ b/components/apimgt/org.wso2.carbon.apimgt.impl/src/test/java/org/wso2/carbon/apimgt/impl/indexing/indexer/PDFIndexerTest.java @@ -45,10 +45,13 @@ public void testShouldReturnIndexedDocumentWhenParameterCorrect() throws IOExcep String mediaType = "application/pdf+test"; final String MEDIA_TYPE = "mediaType"; PDFParser parser = Mockito.mock(PDFParser.class); + PDDocument pdDocument = Mockito.mock(PDDocument.class); COSDocument cosDoc = Mockito.mock(COSDocument.class); PDFTextStripper pdfTextStripper = Mockito.mock(PDFTextStripper.class); Mockito.doThrow(IOException.class).when(cosDoc).close(); - Mockito.when(parser.getDocument()).thenReturn(new COSDocument()).thenReturn(cosDoc); + Mockito.when(parser.parse()).thenReturn(new PDDocument()); + + Mockito.when(pdDocument.getDocument()).thenReturn(new COSDocument()).thenReturn(cosDoc); Mockito.when(pdfTextStripper.getText(new PDDocument())).thenReturn(""); PDFIndexer pdfIndexer = new PDFIndexerWrapper(parser, pdfTextStripper); diff --git a/features/apimgt/org.wso2.carbon.apimgt.rest.api.store.feature/pom.xml b/features/apimgt/org.wso2.carbon.apimgt.rest.api.store.feature/pom.xml index 23d7275c29b8..b831d39d3050 100644 --- a/features/apimgt/org.wso2.carbon.apimgt.rest.api.store.feature/pom.xml +++ b/features/apimgt/org.wso2.carbon.apimgt.rest.api.store.feature/pom.xml @@ -328,6 +328,9 @@ org.apache.pdfbox:pdfbox:${pdfbox.version} org.apache.pdfbox:fontbox:${pdfbox.version} org.apache.pdfbox:xmpbox:${pdfbox.version} + + org.wso2.orbit.org.apache.pdfbox:pdfbox-io:${pdfbox.io.version} + org.wso2.orbit.com.github.dblock.waffle:waffle-jna:${waffle-jna.version} diff --git a/pom.xml b/pom.xml index d643c935dac5..d0ee135ccb4c 100644 --- a/pom.xml +++ b/pom.xml @@ -1396,6 +1396,12 @@ ${pdfbox.version} + + org.wso2.orbit.org.apache.pdfbox + pdfbox-io + ${pdfbox.io.version} + + org.apache.pdfbox fontbox @@ -2166,8 +2172,9 @@ 3.6.2 1.3.12 - 2.0.25 2.3.10 + 3.0.1 + 3.0.1.wso2v1 1.0.16.wso2v1 2.0.5.wso2v2 2.1.20.wso2v1