diff --git a/Jenkinsfile b/Jenkinsfile
index cf07a10..64276f4 100644
--- a/Jenkinsfile
+++ b/Jenkinsfile
@@ -4,6 +4,6 @@
def repoName = "pdfOcr"
def dependencyRegex = "itextcore"
def solutionFile = "i7n-ocr.sln"
-def csprojFramework = "netcoreapp2.0"
+def csprojFramework = "net461"
automaticDotnetBuild(repoName, dependencyRegex, solutionFile, csprojFramework)
diff --git a/doxyfile b/doxyfile
index 014ef9b..e8db83f 100644
--- a/doxyfile
+++ b/doxyfile
@@ -32,7 +32,7 @@ DOXYFILE_ENCODING = UTF-8
# title of most generated pages and in a few other places.
# The default value is: My Project.
-PROJECT_NAME = "pdfOCR 1.0.3 API"
+PROJECT_NAME = "pdfOCR 2.0.0 API"
# The PROJECT_NUMBER tag can be used to enter a project or revision number. This
# could be handy for archiving the generated documentation or if some version
diff --git a/itext.tests/itext.pdfocr.api.tests/Properties/AssemblyInfo.cs b/itext.tests/itext.pdfocr.api.tests/Properties/AssemblyInfo.cs
index 535049e..22b13f9 100644
--- a/itext.tests/itext.pdfocr.api.tests/Properties/AssemblyInfo.cs
+++ b/itext.tests/itext.pdfocr.api.tests/Properties/AssemblyInfo.cs
@@ -15,6 +15,6 @@
[assembly: Guid("d6a6ea97-1f23-448f-b700-eff62971d234")]
-[assembly: AssemblyVersion("1.0.3.0")]
-[assembly: AssemblyFileVersion("1.0.3.0")]
-[assembly: AssemblyInformationalVersion("1.0.3")]
+[assembly: AssemblyVersion("2.0.0.0")]
+[assembly: AssemblyFileVersion("2.0.0.0")]
+[assembly: AssemblyInformationalVersion("2.0.0")]
diff --git a/itext.tests/itext.pdfocr.api.tests/itext.pdfocr.api.tests.csproj b/itext.tests/itext.pdfocr.api.tests/itext.pdfocr.api.tests.csproj
index 73d3ee2..8b25ae6 100644
--- a/itext.tests/itext.pdfocr.api.tests/itext.pdfocr.api.tests.csproj
+++ b/itext.tests/itext.pdfocr.api.tests/itext.pdfocr.api.tests.csproj
@@ -9,7 +9,7 @@
library
- net45
+ net461
true
@@ -25,7 +25,7 @@
-
+
diff --git a/itext.tests/itext.pdfocr.api.tests/itext/pdfocr/ApiTest.cs b/itext.tests/itext.pdfocr.api.tests/itext/pdfocr/ApiTest.cs
index aa676a8..9077a3c 100644
--- a/itext.tests/itext.pdfocr.api.tests/itext/pdfocr/ApiTest.cs
+++ b/itext.tests/itext.pdfocr.api.tests/itext/pdfocr/ApiTest.cs
@@ -23,44 +23,129 @@ You should have received a copy of the GNU Affero General Public License
using System;
using System.Collections.Generic;
using System.IO;
+using iText.Commons.Actions.Contexts;
+using iText.Commons.Utils;
using iText.IO.Image;
-using iText.IO.Util;
using iText.Kernel.Colors;
using iText.Kernel.Font;
using iText.Kernel.Geom;
+using iText.Kernel.Pdf;
+using iText.Pdfa;
using iText.Pdfocr.Helpers;
+using iText.Pdfocr.Logs;
using iText.Test;
using iText.Test.Attributes;
namespace iText.Pdfocr {
public class ApiTest : ExtendedITextTest {
+ public static readonly String DESTINATION_FOLDER = NUnit.Framework.TestContext.CurrentContext.TestDirectory
+ + "/test/itext/pdfocr";
+
+ [NUnit.Framework.OneTimeSetUp]
+ public static void BeforeClass() {
+ CreateOrClearDestinationFolder(DESTINATION_FOLDER);
+ }
+
[NUnit.Framework.Test]
- public virtual void TestTextInfo() {
- String path = PdfHelper.GetDefaultImagePath();
- IDictionary> result = new CustomOcrEngine().DoImageOcr(new FileInfo(path));
- NUnit.Framework.Assert.AreEqual(1, result.Count);
- TextInfo textInfo = new TextInfo();
- textInfo.SetText("text");
- textInfo.SetBboxRect(new Rectangle(204.0f, 158.0f, 538.0f, 136.0f));
- int page = 2;
- result.Put(page, JavaCollectionsUtil.SingletonList(textInfo));
- NUnit.Framework.Assert.AreEqual(2, result.Count);
- NUnit.Framework.Assert.AreEqual(textInfo.GetText(), result.Get(page)[0].GetText());
+ public virtual void CreatePdfWithFileTest() {
+ OcrPdfCreatorProperties props = new OcrPdfCreatorProperties().SetMetaInfo(new ApiTest.DummyMetaInfo());
+ OcrPdfCreator pdfCreator = new OcrPdfCreator(new CustomOcrEngine(), props);
+ using (PdfDocument pdf = pdfCreator.CreatePdf(JavaCollectionsUtil.SingletonList(new FileInfo(PdfHelper
+ .GetDefaultImagePath())), PdfHelper.GetPdfWriter(), new DocumentProperties().SetEventCountingMetaInfo(
+ new ApiTest.DummyMetaInfo()))) {
+ String contentBytes = iText.Commons.Utils.JavaUtil.GetStringForBytes(pdf.GetPage(1).GetContentBytes(), System.Text.Encoding
+ .UTF8);
+ NUnit.Framework.Assert.IsTrue(contentBytes.Contains("<00190014001c001400150014>"));
+ }
+ }
+
+ [NUnit.Framework.Test]
+ public virtual void CreatePdfFileWithFileTest() {
+ String output = DESTINATION_FOLDER + "createPdfFileWithFileTest.pdf";
+ OcrPdfCreatorProperties props = new OcrPdfCreatorProperties().SetMetaInfo(new ApiTest.DummyMetaInfo());
+ OcrPdfCreator pdfCreator = new OcrPdfCreator(new CustomOcrEngine(), props);
+ pdfCreator.CreatePdfFile(JavaCollectionsUtil.SingletonList(new FileInfo(PdfHelper.GetDefaultImagePath
+ ())), new FileInfo(output));
+ using (PdfDocument pdf = new PdfDocument(new PdfReader(output))) {
+ String contentBytes = iText.Commons.Utils.JavaUtil.GetStringForBytes(pdf.GetPage(1).GetContentBytes(), System.Text.Encoding
+ .UTF8);
+ NUnit.Framework.Assert.IsTrue(contentBytes.Contains("<00190014001c001400150014>"));
+ }
+ }
+
+ [NUnit.Framework.Test]
+ public virtual void CreatePdfAWithFileTest() {
+ OcrPdfCreatorProperties props = new OcrPdfCreatorProperties().SetMetaInfo(new ApiTest.DummyMetaInfo()).SetPdfLang
+ ("en-US");
+ OcrPdfCreator pdfCreator = new OcrPdfCreator(new CustomOcrEngine(), props);
+ using (PdfDocument pdf = pdfCreator.CreatePdfA(JavaCollectionsUtil.SingletonList(new FileInfo(PdfHelper
+ .GetDefaultImagePath())), PdfHelper.GetPdfWriter(), new DocumentProperties().SetEventCountingMetaInfo(
+ new ApiTest.DummyMetaInfo()), PdfHelper.GetRGBPdfOutputIntent())) {
+ String contentBytes = iText.Commons.Utils.JavaUtil.GetStringForBytes(pdf.GetPage(1).GetContentBytes(), System.Text.Encoding
+ .UTF8);
+ NUnit.Framework.Assert.IsTrue(contentBytes.Contains("<00190014001c001400150014>"));
+ NUnit.Framework.Assert.IsTrue(pdf is PdfADocument);
+ }
}
[NUnit.Framework.Test]
- public virtual void TestTextInfoDeprecationMode() {
+ public virtual void CreatePdfAFileWithFileTest() {
+ String output = DESTINATION_FOLDER + "createPdfAFileWithFileTest.pdf";
+ OcrPdfCreatorProperties props = new OcrPdfCreatorProperties().SetMetaInfo(new ApiTest.DummyMetaInfo()).SetPdfLang
+ ("en-US");
+ OcrPdfCreator pdfCreator = new OcrPdfCreator(new CustomOcrEngine(), props);
+ pdfCreator.CreatePdfAFile(JavaCollectionsUtil.SingletonList(new FileInfo(PdfHelper.GetDefaultImagePath
+ ())), new FileInfo(output), PdfHelper.GetRGBPdfOutputIntent());
+ using (PdfDocument pdf = new PdfDocument(new PdfReader(output))) {
+ String contentBytes = iText.Commons.Utils.JavaUtil.GetStringForBytes(pdf.GetPage(1).GetContentBytes(), System.Text.Encoding
+ .UTF8);
+ NUnit.Framework.Assert.IsTrue(contentBytes.Contains("<00190014001c001400150014>"));
+ PdfAConformanceLevel cl = pdf.GetReader().GetPdfAConformanceLevel();
+ NUnit.Framework.Assert.AreEqual(PdfAConformanceLevel.PDF_A_3U.GetConformance(), cl.GetConformance());
+ NUnit.Framework.Assert.AreEqual(PdfAConformanceLevel.PDF_A_3U.GetPart(), cl.GetPart());
+ }
+ }
+
+ [NUnit.Framework.Test]
+ public virtual void CreatePdfAFileWithFileNoMetaTest() {
+ String output = DESTINATION_FOLDER + "createPdfAFileWithFileNoMetaTest.pdf";
+ OcrPdfCreatorProperties props = new OcrPdfCreatorProperties().SetPdfLang("en-US");
+ OcrPdfCreator pdfCreator = new OcrPdfCreator(new CustomOcrEngine(), props);
+ pdfCreator.CreatePdfAFile(JavaCollectionsUtil.SingletonList(new FileInfo(PdfHelper.GetDefaultImagePath
+ ())), new FileInfo(output), PdfHelper.GetRGBPdfOutputIntent());
+ using (PdfDocument pdf = new PdfDocument(new PdfReader(output))) {
+ String contentBytes = iText.Commons.Utils.JavaUtil.GetStringForBytes(pdf.GetPage(1).GetContentBytes(), System.Text.Encoding
+ .UTF8);
+ NUnit.Framework.Assert.IsTrue(contentBytes.Contains("<00190014001c001400150014>"));
+ PdfAConformanceLevel cl = pdf.GetReader().GetPdfAConformanceLevel();
+ NUnit.Framework.Assert.AreEqual(PdfAConformanceLevel.PDF_A_3U.GetConformance(), cl.GetConformance());
+ NUnit.Framework.Assert.AreEqual(PdfAConformanceLevel.PDF_A_3U.GetPart(), cl.GetPart());
+ }
+ }
+
+ [NUnit.Framework.Test]
+ public virtual void CreatePdfAFileWithFileProductAwareEngineTest() {
+ String output = DESTINATION_FOLDER + "createPdfAFileWithFileProductAwareEngineTest.pdf";
+ OcrPdfCreatorProperties props = new OcrPdfCreatorProperties().SetPdfLang("en-US");
+ CustomProductAwareOcrEngine ocrEngine = new CustomProductAwareOcrEngine();
+ OcrPdfCreator pdfCreator = new OcrPdfCreator(ocrEngine, props);
+ pdfCreator.CreatePdfAFile(JavaCollectionsUtil.SingletonList(new FileInfo(PdfHelper.GetDefaultImagePath
+ ())), new FileInfo(output), PdfHelper.GetRGBPdfOutputIntent());
+ NUnit.Framework.Assert.IsTrue(ocrEngine.IsGetMetaInfoContainerTriggered());
+ }
+
+ [NUnit.Framework.Test]
+ public virtual void TestTextInfo() {
String path = PdfHelper.GetDefaultImagePath();
- IDictionary> result = new CustomOcrEngine(true).DoImageOcr(new FileInfo(path));
+ IDictionary> result = new CustomOcrEngine().DoImageOcr(new FileInfo(path));
NUnit.Framework.Assert.AreEqual(1, result.Count);
TextInfo textInfo = new TextInfo();
textInfo.SetText("text");
- textInfo.SetBbox(JavaUtil.ArraysAsList(204.0f, 158.0f, 742.0f, 294.0f));
+ textInfo.SetBboxRect(new Rectangle(204.0f, 158.0f, 538.0f, 136.0f));
int page = 2;
result.Put(page, JavaCollectionsUtil.SingletonList(textInfo));
NUnit.Framework.Assert.AreEqual(2, result.Count);
NUnit.Framework.Assert.AreEqual(textInfo.GetText(), result.Get(page)[0].GetText());
- NUnit.Framework.Assert.AreEqual(textInfo.GetBbox().Count, result.Get(page)[0].GetBbox().Count);
}
[LogMessage(PdfOcrLogMessageConstant.COULD_NOT_FIND_CORRESPONDING_GLYPH_TO_UNICODE_CHARACTER, Count = 7)]
@@ -113,18 +198,7 @@ public virtual ImageData ApplyRotation(ImageData imageData) {
}
}
- [LogMessage(PdfOcrLogMessageConstant.COULD_NOT_FIND_CORRESPONDING_GLYPH_TO_UNICODE_CHARACTER, Count = 7)]
- [NUnit.Framework.Test]
- public virtual void TestThaiImageWithNotDefGlyphsDeprecationMode() {
- String testName = "testThaiImageWithNotdefGlyphs";
- String path = PdfHelper.GetThaiImagePath();
- String pdfPath = PdfHelper.GetTargetDirectory() + testName + ".pdf";
- PdfHelper.CreatePdf(pdfPath, new FileInfo(path), new OcrPdfCreatorProperties().SetTextColor(DeviceRgb.BLACK
- ), true);
- ExtractionStrategy strategy = PdfHelper.GetExtractionStrategy(pdfPath);
- PdfFont font = strategy.GetPdfFont();
- String fontName = font.GetFontProgram().GetFontNames().GetFontName();
- NUnit.Framework.Assert.IsTrue(fontName.Contains("LiberationSans"));
+ private class DummyMetaInfo : IMetaInfo {
}
}
}
diff --git a/itext.tests/itext.pdfocr.api.tests/itext/pdfocr/OcrPdfCreatorEventHelperTest.cs b/itext.tests/itext.pdfocr.api.tests/itext/pdfocr/OcrPdfCreatorEventHelperTest.cs
new file mode 100644
index 0000000..8e4a85c
--- /dev/null
+++ b/itext.tests/itext.pdfocr.api.tests/itext/pdfocr/OcrPdfCreatorEventHelperTest.cs
@@ -0,0 +1,137 @@
+/*
+This file is part of the iText (R) project.
+Copyright (c) 1998-2021 iText Group NV
+Authors: iText Software.
+
+This program is offered under a commercial and under the AGPL license.
+For commercial licensing, contact us at https://itextpdf.com/sales. For AGPL licensing, see below.
+
+AGPL licensing:
+This program is free software: you can redistribute it and/or modify
+it under the terms of the GNU Affero General Public License as published by
+the Free Software Foundation, either version 3 of the License, or
+(at your option) any later version.
+
+This program is distributed in the hope that it will be useful,
+but WITHOUT ANY WARRANTY; without even the implied warranty of
+MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+GNU Affero General Public License for more details.
+
+You should have received a copy of the GNU Affero General Public License
+along with this program. If not, see .
+*/
+using System;
+using System.Collections.Generic;
+using iText.Commons.Actions;
+using iText.Commons.Actions.Confirmations;
+using iText.Commons.Actions.Contexts;
+using iText.Commons.Actions.Data;
+using iText.Commons.Actions.Sequence;
+using iText.Commons.Utils;
+using iText.Kernel.Actions.Data;
+using iText.Pdfocr.Statistics;
+using iText.Test;
+
+namespace iText.Pdfocr {
+ public class OcrPdfCreatorEventHelperTest : ExtendedITextTest {
+ private static readonly ProductData DUMMY_PRODUCT_DATA = new ProductData("test-product", "inner_product",
+ "1.0.0", 1900, 2100);
+
+ private OcrPdfCreatorEventHelperTest.StoreEventsHandler storeEventsHandler;
+
+ [NUnit.Framework.SetUp]
+ public virtual void Before() {
+ storeEventsHandler = new OcrPdfCreatorEventHelperTest.StoreEventsHandler();
+ EventManager.GetInstance().Register(storeEventsHandler);
+ }
+
+ [NUnit.Framework.TearDown]
+ public virtual void After() {
+ EventManager.GetInstance().Unregister(storeEventsHandler);
+ storeEventsHandler = null;
+ }
+
+ [NUnit.Framework.Test]
+ public virtual void ProductContextBasedEventTest() {
+ OcrPdfCreatorEventHelper helper = new OcrPdfCreatorEventHelper(new SequenceId(), new OcrPdfCreatorEventHelperTest.DummyMetaInfo
+ ());
+ OcrPdfCreatorEventHelperTest.DummyITextEvent @event = new OcrPdfCreatorEventHelperTest.DummyITextEvent();
+ helper.OnEvent(@event);
+ NUnit.Framework.Assert.AreEqual(1, storeEventsHandler.GetEvents().Count);
+ NUnit.Framework.Assert.AreEqual(@event, storeEventsHandler.GetEvents()[0]);
+ }
+
+ [NUnit.Framework.Test]
+ public virtual void PdfOcrStatisticsEventTest() {
+ OcrPdfCreatorEventHelper helper = new OcrPdfCreatorEventHelper(new SequenceId(), new OcrPdfCreatorEventHelperTest.DummyMetaInfo
+ ());
+ PdfOcrOutputTypeStatisticsEvent e = new PdfOcrOutputTypeStatisticsEvent(PdfOcrOutputType.PDF, DUMMY_PRODUCT_DATA
+ );
+ helper.OnEvent(e);
+ NUnit.Framework.Assert.AreEqual(0, storeEventsHandler.GetEvents().Count);
+ }
+
+ [NUnit.Framework.Test]
+ public virtual void CustomProductEventTest() {
+ OcrPdfCreatorEventHelper helper = new OcrPdfCreatorEventHelper(new SequenceId(), new OcrPdfCreatorEventHelperTest.DummyMetaInfo
+ ());
+ AbstractProductITextEvent @event = new OcrPdfCreatorEventHelperTest.CustomProductITextEvent(DUMMY_PRODUCT_DATA
+ );
+ helper.OnEvent(@event);
+ NUnit.Framework.Assert.AreEqual(1, storeEventsHandler.GetEvents().Count);
+ NUnit.Framework.Assert.AreEqual(@event, storeEventsHandler.GetEvents()[0]);
+ }
+
+ [NUnit.Framework.Test]
+ public virtual void CustomStatisticsEventTest() {
+ OcrPdfCreatorEventHelper helper = new OcrPdfCreatorEventHelper(new SequenceId(), new OcrPdfCreatorEventHelperTest.DummyMetaInfo
+ ());
+ OcrPdfCreatorEventHelperTest.CustomStatisticsEvent @event = new OcrPdfCreatorEventHelperTest.CustomStatisticsEvent
+ (DUMMY_PRODUCT_DATA);
+ helper.OnEvent(@event);
+ NUnit.Framework.Assert.AreEqual(1, storeEventsHandler.GetEvents().Count);
+ NUnit.Framework.Assert.AreEqual(@event, storeEventsHandler.GetEvents()[0]);
+ }
+
+ private class DummyMetaInfo : IMetaInfo {
+ }
+
+ private class DummyITextEvent : AbstractProductProcessITextEvent {
+ protected internal DummyITextEvent()
+ : base(ITextCoreProductData.GetInstance(), null, EventConfirmationType.ON_DEMAND) {
+ }
+
+ public override String GetEventType() {
+ return "test-event";
+ }
+ }
+
+ private class CustomProductITextEvent : AbstractProductITextEvent {
+ protected internal CustomProductITextEvent(ProductData productData)
+ : base(productData) {
+ }
+ }
+
+ private class CustomStatisticsEvent : AbstractStatisticsEvent {
+ protected internal CustomStatisticsEvent(ProductData productData)
+ : base(productData) {
+ }
+
+ public override IList GetStatisticsNames() {
+ return JavaCollectionsUtil.SingletonList("custom-statistics");
+ }
+ }
+
+ private class StoreEventsHandler : IEventHandler {
+ private IList events = new List();
+
+ public virtual IList GetEvents() {
+ return events;
+ }
+
+ public virtual void OnEvent(IEvent @event) {
+ events.Add(@event);
+ }
+ }
+ }
+}
diff --git a/itext.tests/itext.pdfocr.api.tests/itext/pdfocr/OcrProcessContextTest.cs b/itext.tests/itext.pdfocr.api.tests/itext/pdfocr/OcrProcessContextTest.cs
new file mode 100644
index 0000000..d118333
--- /dev/null
+++ b/itext.tests/itext.pdfocr.api.tests/itext/pdfocr/OcrProcessContextTest.cs
@@ -0,0 +1,51 @@
+/*
+This file is part of the iText (R) project.
+Copyright (c) 1998-2021 iText Group NV
+Authors: iText Software.
+
+This program is offered under a commercial and under the AGPL license.
+For commercial licensing, contact us at https://itextpdf.com/sales. For AGPL licensing, see below.
+
+AGPL licensing:
+This program is free software: you can redistribute it and/or modify
+it under the terms of the GNU Affero General Public License as published by
+the Free Software Foundation, either version 3 of the License, or
+(at your option) any later version.
+
+This program is distributed in the hope that it will be useful,
+but WITHOUT ANY WARRANTY; without even the implied warranty of
+MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+GNU Affero General Public License for more details.
+
+You should have received a copy of the GNU Affero General Public License
+along with this program. If not, see .
+*/
+using iText.Commons.Actions;
+using iText.Commons.Actions.Confirmations;
+using iText.Commons.Actions.Sequence;
+using iText.Test;
+
+namespace iText.Pdfocr {
+ public class OcrProcessContextTest : ExtendedITextTest {
+ [NUnit.Framework.Test]
+ public virtual void SetOcrEventHelperTest() {
+ AbstractPdfOcrEventHelper eventHelper = new OcrProcessContextTest.CustomEventHelper();
+ OcrProcessContext context = new OcrProcessContext(eventHelper);
+ NUnit.Framework.Assert.AreSame(eventHelper, context.GetOcrEventHelper());
+ }
+
+ private class CustomEventHelper : AbstractPdfOcrEventHelper {
+ public override void OnEvent(AbstractProductITextEvent @event) {
+ }
+
+ // Do nothing
+ public override SequenceId GetSequenceId() {
+ return null;
+ }
+
+ public override EventConfirmationType GetConfirmationType() {
+ return EventConfirmationType.ON_DEMAND;
+ }
+ }
+ }
+}
diff --git a/itext.tests/itext.pdfocr.api.tests/itext/pdfocr/PdfA3uTest.cs b/itext.tests/itext.pdfocr.api.tests/itext/pdfocr/PdfA3uTest.cs
index c10b442..5164c08 100644
--- a/itext.tests/itext.pdfocr.api.tests/itext/pdfocr/PdfA3uTest.cs
+++ b/itext.tests/itext.pdfocr.api.tests/itext/pdfocr/PdfA3uTest.cs
@@ -22,14 +22,16 @@ You should have received a copy of the GNU Affero General Public License
*/
using System;
using System.IO;
-using iText.IO.Util;
-using iText.Kernel;
+using iText.Commons.Utils;
using iText.Kernel.Colors;
+using iText.Kernel.Exceptions;
using iText.Kernel.Font;
using iText.Kernel.Pdf;
using iText.Layout.Font;
-using iText.Pdfa;
+using iText.Pdfa.Exceptions;
+using iText.Pdfocr.Exceptions;
using iText.Pdfocr.Helpers;
+using iText.Pdfocr.Logs;
using iText.Test;
using iText.Test.Attributes;
@@ -104,7 +106,7 @@ public virtual void TestPdfCustomMetadata() {
pdfDocument.Close();
}
- [LogMessage(OcrException.CANNOT_CREATE_PDF_DOCUMENT, Count = 1)]
+ [LogMessage(PdfOcrExceptionMessageConstant.CANNOT_CREATE_PDF_DOCUMENT, Count = 1)]
[NUnit.Framework.Test]
public virtual void TestNonCompliantThaiPdfA() {
NUnit.Framework.Assert.That(() => {
@@ -117,7 +119,7 @@ public virtual void TestNonCompliantThaiPdfA() {
PdfHelper.CreatePdfA(pdfPath, new FileInfo(path), ocrPdfCreatorProperties, PdfHelper.GetRGBPdfOutputIntent
());
}
- , NUnit.Framework.Throws.InstanceOf().With.Message.EqualTo(MessageFormatUtil.Format(OcrException.CANNOT_CREATE_PDF_DOCUMENT, MessageFormatUtil.Format(PdfOcrLogMessageConstant.COULD_NOT_FIND_CORRESPONDING_GLYPH_TO_UNICODE_CHARACTER, 3611))))
+ , NUnit.Framework.Throws.InstanceOf().With.Message.EqualTo(MessageFormatUtil.Format(PdfOcrExceptionMessageConstant.CANNOT_CREATE_PDF_DOCUMENT, MessageFormatUtil.Format(PdfOcrLogMessageConstant.COULD_NOT_FIND_CORRESPONDING_GLYPH_TO_UNICODE_CHARACTER, 3611))))
;
}
@@ -147,7 +149,7 @@ public virtual void TestCompliantThaiPdfA() {
NUnit.Framework.Assert.IsTrue(font.IsEmbedded());
}
- [LogMessage(OcrException.CANNOT_CREATE_PDF_DOCUMENT, Count = 1)]
+ [LogMessage(PdfOcrExceptionMessageConstant.CANNOT_CREATE_PDF_DOCUMENT, Count = 1)]
[NUnit.Framework.Test]
public virtual void TestPdfACreateWithoutPdfLangProperty() {
NUnit.Framework.Assert.That(() => {
@@ -157,7 +159,7 @@ public virtual void TestPdfACreateWithoutPdfLangProperty() {
PdfHelper.CreatePdfA(pdfPath, new FileInfo(path), new OcrPdfCreatorProperties(), PdfHelper.GetRGBPdfOutputIntent
());
}
- , NUnit.Framework.Throws.InstanceOf().With.Message.EqualTo(MessageFormatUtil.Format(OcrException.CANNOT_CREATE_PDF_DOCUMENT, PdfOcrLogMessageConstant.PDF_LANGUAGE_PROPERTY_IS_NOT_SET)))
+ , NUnit.Framework.Throws.InstanceOf().With.Message.EqualTo(MessageFormatUtil.Format(PdfOcrExceptionMessageConstant.CANNOT_CREATE_PDF_DOCUMENT, PdfOcrLogMessageConstant.PDF_LANGUAGE_PROPERTY_IS_NOT_SET)))
;
}
}
diff --git a/itext.tests/itext.pdfocr.api.tests/itext/pdfocr/PdfCreatorUtilTest.cs b/itext.tests/itext.pdfocr.api.tests/itext/pdfocr/PdfCreatorUtilTest.cs
index 89625d6..c8238ee 100644
--- a/itext.tests/itext.pdfocr.api.tests/itext/pdfocr/PdfCreatorUtilTest.cs
+++ b/itext.tests/itext.pdfocr.api.tests/itext/pdfocr/PdfCreatorUtilTest.cs
@@ -22,9 +22,11 @@ You should have received a copy of the GNU Affero General Public License
*/
using System.Collections.Generic;
using System.IO;
+using iText.Commons.Utils;
using iText.IO.Image;
-using iText.IO.Util;
+using iText.Pdfocr.Exceptions;
using iText.Pdfocr.Helpers;
+using iText.Pdfocr.Logs;
using iText.Test;
using iText.Test.Attributes;
@@ -70,7 +72,7 @@ public virtual void GetImageDataFromNotExistingImageTest() {
NUnit.Framework.Assert.That(() => {
PdfCreatorUtil.GetImageData(new FileInfo("no such path"), null);
}
- , NUnit.Framework.Throws.InstanceOf())
+ , NUnit.Framework.Throws.InstanceOf())
;
}
@@ -80,7 +82,7 @@ public virtual void GetImageDataFromInvalidImageTest() {
NUnit.Framework.Assert.That(() => {
PdfCreatorUtil.GetImageData(new FileInfo(PdfHelper.GetImagesTestDirectory() + "corrupted.jpg"), null);
}
- , NUnit.Framework.Throws.InstanceOf().With.Message.EqualTo(MessageFormatUtil.Format(OcrException.CANNOT_READ_INPUT_IMAGE)))
+ , NUnit.Framework.Throws.InstanceOf().With.Message.EqualTo(MessageFormatUtil.Format(PdfOcrExceptionMessageConstant.CANNOT_READ_INPUT_IMAGE)))
;
}
}
diff --git a/itext.tests/itext.pdfocr.api.tests/itext/pdfocr/PdfFontTest.cs b/itext.tests/itext.pdfocr.api.tests/itext/pdfocr/PdfFontTest.cs
index b71d408..4a29c44 100644
--- a/itext.tests/itext.pdfocr.api.tests/itext/pdfocr/PdfFontTest.cs
+++ b/itext.tests/itext.pdfocr.api.tests/itext/pdfocr/PdfFontTest.cs
@@ -22,12 +22,14 @@ You should have received a copy of the GNU Affero General Public License
*/
using System;
using System.IO;
+using iText.Commons.Utils;
using iText.IO.Font;
-using iText.IO.Util;
using iText.Kernel.Colors;
using iText.Kernel.Font;
using iText.Layout.Font;
+using iText.Pdfocr.Exceptions;
using iText.Pdfocr.Helpers;
+using iText.Pdfocr.Logs;
using iText.Test;
using iText.Test.Attributes;
@@ -51,7 +53,7 @@ public virtual void TestFontColor() {
}
[LogMessage(PdfOcrLogMessageConstant.PROVIDED_FONT_PROVIDER_IS_INVALID, Count = 1)]
- [LogMessage(OcrException.CANNOT_CREATE_PDF_DOCUMENT, Count = 1)]
+ [LogMessage(PdfOcrExceptionMessageConstant.CANNOT_CREATE_PDF_DOCUMENT, Count = 1)]
[NUnit.Framework.Test]
public virtual void TestInvalidFontWithInvalidDefaultFontFamily() {
NUnit.Framework.Assert.That(() => {
@@ -69,7 +71,7 @@ public virtual void TestInvalidFontWithInvalidDefaultFontFamily() {
NUnit.Framework.Assert.AreEqual(PdfHelper.DEFAULT_TEXT, result);
NUnit.Framework.Assert.AreEqual(ScaleMode.SCALE_TO_FIT, properties.GetScaleMode());
}
- , NUnit.Framework.Throws.InstanceOf().With.Message.EqualTo(MessageFormatUtil.Format(OcrException.CANNOT_CREATE_PDF_DOCUMENT, OcrException.CANNOT_RESOLVE_PROVIDED_FONTS)))
+ , NUnit.Framework.Throws.InstanceOf().With.Message.EqualTo(MessageFormatUtil.Format(PdfOcrExceptionMessageConstant.CANNOT_CREATE_PDF_DOCUMENT, PdfOcrExceptionMessageConstant.CANNOT_RESOLVE_PROVIDED_FONTS)))
;
}
diff --git a/itext.tests/itext.pdfocr.api.tests/itext/pdfocr/PdfInputImageTest.cs b/itext.tests/itext.pdfocr.api.tests/itext/pdfocr/PdfInputImageTest.cs
index 65458df..1358041 100644
--- a/itext.tests/itext.pdfocr.api.tests/itext/pdfocr/PdfInputImageTest.cs
+++ b/itext.tests/itext.pdfocr.api.tests/itext/pdfocr/PdfInputImageTest.cs
@@ -22,7 +22,9 @@ You should have received a copy of the GNU Affero General Public License
*/
using System;
using System.IO;
+using iText.Pdfocr.Exceptions;
using iText.Pdfocr.Helpers;
+using iText.Pdfocr.Logs;
using iText.Test;
using iText.Test.Attributes;
@@ -37,7 +39,7 @@ public virtual void TestCorruptedImage() {
NUnit.Framework.Assert.IsNotNull(realOutput);
NUnit.Framework.Assert.AreEqual("", realOutput);
}
- , NUnit.Framework.Throws.InstanceOf())
+ , NUnit.Framework.Throws.InstanceOf())
;
}
@@ -50,7 +52,7 @@ public virtual void TestCorruptedImageWithoutExtension() {
NUnit.Framework.Assert.IsNotNull(realOutput);
NUnit.Framework.Assert.AreEqual("", realOutput);
}
- , NUnit.Framework.Throws.InstanceOf())
+ , NUnit.Framework.Throws.InstanceOf())
;
}
@@ -63,7 +65,7 @@ public virtual void TestInvalidImagePathWithoutDot() {
NUnit.Framework.Assert.IsNotNull(realOutput);
NUnit.Framework.Assert.AreEqual("", realOutput);
}
- , NUnit.Framework.Throws.InstanceOf())
+ , NUnit.Framework.Throws.InstanceOf())
;
}
@@ -76,7 +78,7 @@ public virtual void TestInvalidImagePathWithDot() {
NUnit.Framework.Assert.IsNotNull(realOutput);
NUnit.Framework.Assert.AreEqual("", realOutput);
}
- , NUnit.Framework.Throws.InstanceOf())
+ , NUnit.Framework.Throws.InstanceOf())
;
}
@@ -89,7 +91,7 @@ public virtual void TestValidImageWithoutExtension() {
NUnit.Framework.Assert.IsNotNull(realOutput);
NUnit.Framework.Assert.AreEqual("", realOutput);
}
- , NUnit.Framework.Throws.InstanceOf())
+ , NUnit.Framework.Throws.InstanceOf())
;
}
}
diff --git a/itext.tests/itext.pdfocr.api.tests/itext/pdfocr/PdfLayersTest.cs b/itext.tests/itext.pdfocr.api.tests/itext/pdfocr/PdfLayersTest.cs
index 19eca01..2ba742b 100644
--- a/itext.tests/itext.pdfocr.api.tests/itext/pdfocr/PdfLayersTest.cs
+++ b/itext.tests/itext.pdfocr.api.tests/itext/pdfocr/PdfLayersTest.cs
@@ -23,7 +23,7 @@ You should have received a copy of the GNU Affero General Public License
using System;
using System.Collections.Generic;
using System.IO;
-using iText.IO.Util;
+using iText.Commons.Utils;
using iText.Kernel.Pdf;
using iText.Kernel.Pdf.Layer;
using iText.Pdfocr.Helpers;
diff --git a/itext.tests/itext.pdfocr.api.tests/itext/pdfocr/PdfOcrMetaInfoContainerTest.cs b/itext.tests/itext.pdfocr.api.tests/itext/pdfocr/PdfOcrMetaInfoContainerTest.cs
new file mode 100644
index 0000000..dafb173
--- /dev/null
+++ b/itext.tests/itext.pdfocr.api.tests/itext/pdfocr/PdfOcrMetaInfoContainerTest.cs
@@ -0,0 +1,38 @@
+/*
+This file is part of the iText (R) project.
+Copyright (c) 1998-2021 iText Group NV
+Authors: iText Software.
+
+This program is offered under a commercial and under the AGPL license.
+For commercial licensing, contact us at https://itextpdf.com/sales. For AGPL licensing, see below.
+
+AGPL licensing:
+This program is free software: you can redistribute it and/or modify
+it under the terms of the GNU Affero General Public License as published by
+the Free Software Foundation, either version 3 of the License, or
+(at your option) any later version.
+
+This program is distributed in the hope that it will be useful,
+but WITHOUT ANY WARRANTY; without even the implied warranty of
+MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+GNU Affero General Public License for more details.
+
+You should have received a copy of the GNU Affero General Public License
+along with this program. If not, see .
+*/
+using iText.Commons.Actions.Contexts;
+using iText.Test;
+
+namespace iText.Pdfocr {
+ public class PdfOcrMetaInfoContainerTest : ExtendedITextTest {
+ [NUnit.Framework.Test]
+ public virtual void Test() {
+ PdfOcrMetaInfoContainerTest.DummyMetaInfo mi = new PdfOcrMetaInfoContainerTest.DummyMetaInfo();
+ PdfOcrMetaInfoContainer instance = new PdfOcrMetaInfoContainer(mi);
+ NUnit.Framework.Assert.AreSame(mi, instance.GetMetaInfo());
+ }
+
+ private class DummyMetaInfo : IMetaInfo {
+ }
+ }
+}
diff --git a/itext.tests/itext.pdfocr.api.tests/itext/pdfocr/ScaleModeTest.cs b/itext.tests/itext.pdfocr.api.tests/itext/pdfocr/ScaleModeTest.cs
index 3826278..9a018d5 100644
--- a/itext.tests/itext.pdfocr.api.tests/itext/pdfocr/ScaleModeTest.cs
+++ b/itext.tests/itext.pdfocr.api.tests/itext/pdfocr/ScaleModeTest.cs
@@ -22,8 +22,8 @@ You should have received a copy of the GNU Affero General Public License
*/
using System;
using System.IO;
+using iText.Commons.Utils;
using iText.IO.Image;
-using iText.IO.Util;
using iText.Kernel.Geom;
using iText.Kernel.Pdf;
using iText.Pdfocr.Helpers;
diff --git a/itext.tests/itext.pdfocr.api.tests/itext/pdfocr/events/EventCountingTest.cs b/itext.tests/itext.pdfocr.api.tests/itext/pdfocr/events/EventCountingTest.cs
deleted file mode 100644
index c7fd4cd..0000000
--- a/itext.tests/itext.pdfocr.api.tests/itext/pdfocr/events/EventCountingTest.cs
+++ /dev/null
@@ -1,97 +0,0 @@
-/*
-This file is part of the iText (R) project.
-Copyright (c) 1998-2021 iText Group NV
-Authors: iText Software.
-
-This program is offered under a commercial and under the AGPL license.
-For commercial licensing, contact us at https://itextpdf.com/sales. For AGPL licensing, see below.
-
-AGPL licensing:
-This program is free software: you can redistribute it and/or modify
-it under the terms of the GNU Affero General Public License as published by
-the Free Software Foundation, either version 3 of the License, or
-(at your option) any later version.
-
-This program is distributed in the hope that it will be useful,
-but WITHOUT ANY WARRANTY; without even the implied warranty of
-MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
-GNU Affero General Public License for more details.
-
-You should have received a copy of the GNU Affero General Public License
-along with this program. If not, see .
-*/
-using System;
-using System.IO;
-using iText.IO.Util;
-using iText.Kernel.Pdf;
-using iText.Metainfo;
-using iText.Pdfocr;
-using iText.Pdfocr.Helpers;
-using iText.Test;
-
-namespace iText.Pdfocr.Events {
- public class EventCountingTest : ExtendedITextTest {
- protected internal static readonly String PROFILE_FOLDER = iText.Test.TestUtil.GetParentProjectDirectory(NUnit.Framework.TestContext
- .CurrentContext.TestDirectory) + "/resources/itext/pdfocr/profiles/";
-
- protected internal static readonly String SOURCE_FOLDER = iText.Test.TestUtil.GetParentProjectDirectory(NUnit.Framework.TestContext
- .CurrentContext.TestDirectory) + "/resources/itext/pdfocr/events/";
-
- private IOcrEngine tesseractReader;
-
- public EventCountingTest() {
- tesseractReader = new CustomOcrEngine();
- }
-
- [NUnit.Framework.Test]
- public virtual void TestEventCountingPdfEvent() {
- ((CustomOcrEngine)tesseractReader).SetThreadLocalMetaInfo(new TestMetaInfo());
- DoImageToPdfOcr(tesseractReader, GetTestImageFile());
- NUnit.Framework.Assert.IsTrue(((CustomOcrEngine)tesseractReader).GetThreadLocalMetaInfo() is TestMetaInfo);
- }
-
- [NUnit.Framework.Test]
- public virtual void TestEventCountingPdfAEvent() {
- ((CustomOcrEngine)tesseractReader).SetThreadLocalMetaInfo(new TestMetaInfo());
- DoImageToPdfAOcr(tesseractReader, GetTestImageFile());
- NUnit.Framework.Assert.IsTrue(((CustomOcrEngine)tesseractReader).GetThreadLocalMetaInfo() is TestMetaInfo);
- }
-
- [NUnit.Framework.Test]
- public virtual void TestEventCountingImageEvent() {
- ((CustomOcrEngine)tesseractReader).SetThreadLocalMetaInfo(new TestMetaInfo());
- DoImageOcr(tesseractReader, GetTestImageFile());
- NUnit.Framework.Assert.IsTrue(((CustomOcrEngine)tesseractReader).GetThreadLocalMetaInfo() is TestMetaInfo);
- }
-
- private static void DoImageOcr(IOcrEngine tesseractReader, FileInfo imageFile) {
- tesseractReader.DoImageOcr(imageFile);
- }
-
- private static void DoImageToPdfOcr(IOcrEngine tesseractReader, FileInfo imageFile) {
- OcrPdfCreator ocrPdfCreator = new OcrPdfCreator(tesseractReader);
- ocrPdfCreator.CreatePdf(JavaUtil.ArraysAsList(imageFile), new PdfWriter(new MemoryStream()));
- }
-
- private static void DoImageToPdfAOcr(IOcrEngine tesseractReader, FileInfo imageFile) {
- OcrPdfCreator ocrPdfCreator = new OcrPdfCreator(tesseractReader, new OcrPdfCreatorProperties().SetPdfLang(
- "en-US"));
- Stream @is = null;
- try {
- @is = new FileStream(PROFILE_FOLDER + "sRGB_CS_profile.icm", FileMode.Open, FileAccess.Read);
- }
- catch (FileNotFoundException) {
- }
- // No expected
- PdfOutputIntent outputIntent = new PdfOutputIntent("Custom", "", "http://www.color.org", "sRGB IEC61966-2.1"
- , @is);
- ocrPdfCreator.CreatePdfA(JavaUtil.ArraysAsList(imageFile), new PdfWriter(new MemoryStream()), outputIntent
- );
- }
-
- private static FileInfo GetTestImageFile() {
- String imgPath = SOURCE_FOLDER + "numbers_01.jpg";
- return new FileInfo(imgPath);
- }
- }
-}
diff --git a/itext.tests/itext.pdfocr.api.tests/itext/pdfocr/exceptions/PdfOcrExceptionTest.cs b/itext.tests/itext.pdfocr.api.tests/itext/pdfocr/exceptions/PdfOcrExceptionTest.cs
new file mode 100644
index 0000000..4c9aea8
--- /dev/null
+++ b/itext.tests/itext.pdfocr.api.tests/itext/pdfocr/exceptions/PdfOcrExceptionTest.cs
@@ -0,0 +1,59 @@
+/*
+This file is part of the iText (R) project.
+Copyright (c) 1998-2021 iText Group NV
+Authors: iText Software.
+
+This program is offered under a commercial and under the AGPL license.
+For commercial licensing, contact us at https://itextpdf.com/sales. For AGPL licensing, see below.
+
+AGPL licensing:
+This program is free software: you can redistribute it and/or modify
+it under the terms of the GNU Affero General Public License as published by
+the Free Software Foundation, either version 3 of the License, or
+(at your option) any later version.
+
+This program is distributed in the hope that it will be useful,
+but WITHOUT ANY WARRANTY; without even the implied warranty of
+MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+GNU Affero General Public License for more details.
+
+You should have received a copy of the GNU Affero General Public License
+along with this program. If not, see .
+*/
+using System;
+using iText.Test;
+
+namespace iText.Pdfocr.Exceptions {
+ public class PdfOcrExceptionTest : ExtendedITextTest {
+ [NUnit.Framework.Test]
+ public virtual void OcrExceptionThrowableConstructorTest() {
+ Exception cause = new System.IO.IOException();
+ PdfOcrException exception = new PdfOcrException(cause);
+ NUnit.Framework.Assert.AreEqual(cause, exception.InnerException);
+ }
+
+ [NUnit.Framework.Test]
+ public virtual void OcrInputExceptionThrowableConstructorTest() {
+ Exception cause = new System.IO.IOException();
+ PdfOcrException exception = new PdfOcrInputException(cause);
+ NUnit.Framework.Assert.AreEqual(cause, exception.InnerException);
+ }
+
+ [NUnit.Framework.Test]
+ public virtual void OcrInputExceptionStringConstructorTest() {
+ String message = "test message";
+ PdfOcrException exception = new PdfOcrInputException(message);
+ NUnit.Framework.Assert.AreEqual(message, exception.Message);
+ }
+
+ [NUnit.Framework.Test]
+ public virtual void OcrExceptiongetMessageParamsTest() {
+ String message = "test message {0}";
+ String param = "param";
+ String expectedMessage = "test message param";
+ PdfOcrException exception = new PdfOcrInputException(message);
+ exception.SetMessageParams(param);
+ NUnit.Framework.Assert.AreEqual(expectedMessage, exception.Message);
+ }
+ }
+}
diff --git a/itext.tests/itext.pdfocr.api.tests/itext/pdfocr/helpers/CustomOcrEngine.cs b/itext.tests/itext.pdfocr.api.tests/itext/pdfocr/helpers/CustomOcrEngine.cs
index 470d03e..cf157c1 100644
--- a/itext.tests/itext.pdfocr.api.tests/itext/pdfocr/helpers/CustomOcrEngine.cs
+++ b/itext.tests/itext.pdfocr.api.tests/itext/pdfocr/helpers/CustomOcrEngine.cs
@@ -23,26 +23,15 @@ You should have received a copy of the GNU Affero General Public License
using System;
using System.Collections.Generic;
using System.IO;
-using iText.IO.Util;
-using iText.Kernel.Counter.Event;
+using iText.Commons.Utils;
using iText.Kernel.Geom;
using iText.Pdfocr;
-using iText.Pdfocr.Events;
namespace iText.Pdfocr.Helpers {
- public class CustomOcrEngine : IOcrEngine, IThreadLocalMetaInfoAware {
+ public class CustomOcrEngine : IOcrEngine {
private OcrEngineProperties ocrEngineProperties;
- private IMetaInfo threadLocalMetaInfo;
-
- private bool textInfoDeprecationMode = false;
-
- public CustomOcrEngine()
- : this(false) {
- }
-
- public CustomOcrEngine(bool textInfoDeprecationMode) {
- this.textInfoDeprecationMode = textInfoDeprecationMode;
+ public CustomOcrEngine() {
}
public CustomOcrEngine(OcrEngineProperties ocrEngineProperties) {
@@ -55,22 +44,21 @@ public virtual IDictionary> DoImageOcr(FileInfo input) {
if (input.FullName.Contains(PdfHelper.THAI_IMAGE_NAME)) {
text = PdfHelper.THAI_TEXT;
}
- TextInfo textInfo = this.textInfoDeprecationMode ? new TextInfo(text, JavaUtil.ArraysAsList(204.0f, 158.0f
- , 742.0f, 294.0f)) : new TextInfo(text, new Rectangle(204.0f, 158.0f, 538.0f, 136.0f));
+ TextInfo textInfo = new TextInfo(text, new Rectangle(204.0f, 158.0f, 538.0f, 136.0f));
result.Put(1, JavaCollectionsUtil.SingletonList(textInfo));
return result;
}
- public virtual void CreateTxtFile(IList inputImages, FileInfo txtFile) {
+ public virtual IDictionary> DoImageOcr(FileInfo input, OcrProcessContext ocrProcessContext
+ ) {
+ return DoImageOcr(input);
}
- public virtual IMetaInfo GetThreadLocalMetaInfo() {
- return threadLocalMetaInfo;
+ public virtual void CreateTxtFile(IList inputImages, FileInfo txtFile) {
}
- public virtual IThreadLocalMetaInfoAware SetThreadLocalMetaInfo(IMetaInfo metaInfo) {
- this.threadLocalMetaInfo = metaInfo;
- return this;
+ public virtual void CreateTxtFile(IList inputImages, FileInfo txtFile, OcrProcessContext ocrProcessContext
+ ) {
}
public virtual OcrEngineProperties GetOcrEngineProperties() {
diff --git a/itext.tests/itext.pdfocr.api.tests/itext/pdfocr/helpers/CustomProductAwareOcrEngine.cs b/itext.tests/itext.pdfocr.api.tests/itext/pdfocr/helpers/CustomProductAwareOcrEngine.cs
new file mode 100644
index 0000000..186268a
--- /dev/null
+++ b/itext.tests/itext.pdfocr.api.tests/itext/pdfocr/helpers/CustomProductAwareOcrEngine.cs
@@ -0,0 +1,73 @@
+/*
+This file is part of the iText (R) project.
+Copyright (c) 1998-2021 iText Group NV
+Authors: iText Software.
+
+This program is offered under a commercial and under the AGPL license.
+For commercial licensing, contact us at https://itextpdf.com/sales. For AGPL licensing, see below.
+
+AGPL licensing:
+This program is free software: you can redistribute it and/or modify
+it under the terms of the GNU Affero General Public License as published by
+the Free Software Foundation, either version 3 of the License, or
+(at your option) any later version.
+
+This program is distributed in the hope that it will be useful,
+but WITHOUT ANY WARRANTY; without even the implied warranty of
+MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+GNU Affero General Public License for more details.
+
+You should have received a copy of the GNU Affero General Public License
+along with this program. If not, see .
+*/
+using System.Collections.Generic;
+using System.IO;
+using iText.Commons.Actions.Contexts;
+using iText.Commons.Actions.Data;
+using iText.Commons.Utils;
+using iText.Pdfocr;
+
+namespace iText.Pdfocr.Helpers {
+ public class CustomProductAwareOcrEngine : IOcrEngine, IProductAware {
+ private bool getMetaInfoContainerTriggered = false;
+
+ public CustomProductAwareOcrEngine() {
+ }
+
+ public virtual IDictionary> DoImageOcr(FileInfo input) {
+ return JavaCollectionsUtil.EmptyMap>();
+ }
+
+ public virtual IDictionary> DoImageOcr(FileInfo input, OcrProcessContext ocrProcessContext
+ ) {
+ return DoImageOcr(input);
+ }
+
+ public virtual void CreateTxtFile(IList inputImages, FileInfo txtFile) {
+ }
+
+ public virtual void CreateTxtFile(IList inputImages, FileInfo txtFile, OcrProcessContext ocrProcessContext
+ ) {
+ }
+
+ public virtual OcrEngineProperties GetOcrEngineProperties() {
+ return null;
+ }
+
+ public virtual PdfOcrMetaInfoContainer GetMetaInfoContainer() {
+ getMetaInfoContainerTriggered = true;
+ return new PdfOcrMetaInfoContainer(new CustomProductAwareOcrEngine.DummyMetaInfo());
+ }
+
+ public virtual ProductData GetProductData() {
+ return null;
+ }
+
+ public virtual bool IsGetMetaInfoContainerTriggered() {
+ return getMetaInfoContainerTriggered;
+ }
+
+ private class DummyMetaInfo : IMetaInfo {
+ }
+ }
+}
diff --git a/itext.tests/itext.pdfocr.api.tests/itext/pdfocr/helpers/PdfHelper.cs b/itext.tests/itext.pdfocr.api.tests/itext/pdfocr/helpers/PdfHelper.cs
index 5a84f75..e50ef15 100644
--- a/itext.tests/itext.pdfocr.api.tests/itext/pdfocr/helpers/PdfHelper.cs
+++ b/itext.tests/itext.pdfocr.api.tests/itext/pdfocr/helpers/PdfHelper.cs
@@ -22,8 +22,9 @@ You should have received a copy of the GNU Affero General Public License
*/
using System;
using System.IO;
-using Common.Logging;
-using iText.IO.Util;
+using Microsoft.Extensions.Logging;
+using iText.Commons;
+using iText.Commons.Utils;
using iText.Kernel.Pdf;
using iText.Kernel.Pdf.Canvas.Parser;
using iText.Pdfocr;
@@ -46,7 +47,7 @@ public class PdfHelper {
public static readonly String TARGET_DIRECTORY = NUnit.Framework.TestContext.CurrentContext.TestDirectory
+ "/test/resources/itext/pdfocr/";
- private static readonly ILog LOGGER = LogManager.GetLogger(typeof(PdfHelper));
+ private static readonly ILogger LOGGER = ITextLogManager.GetLogger(typeof(PdfHelper));
/// Returns images test directory.
public static String GetImagesTestDirectory() {
@@ -123,23 +124,14 @@ public static String GetTextFromPdfLayerUseActualText(String pdfPath, String lay
/// of properties and save to the given path.
///
public static void CreatePdf(String pdfPath, FileInfo inputFile, OcrPdfCreatorProperties properties) {
- CreatePdf(pdfPath, inputFile, properties, false);
- }
-
- ///
- /// Perform OCR with custom ocr engine using provided input image and set
- /// of properties and save to the given path.
- ///
- public static void CreatePdf(String pdfPath, FileInfo inputFile, OcrPdfCreatorProperties properties, bool
- textInfoDeprecationMode) {
- OcrPdfCreator ocrPdfCreator = new OcrPdfCreator(new CustomOcrEngine(textInfoDeprecationMode), properties);
+ OcrPdfCreator ocrPdfCreator = new OcrPdfCreator(new CustomOcrEngine(), properties);
try {
using (PdfWriter pdfWriter = GetPdfWriter(pdfPath)) {
ocrPdfCreator.CreatePdf(JavaCollectionsUtil.SingletonList(inputFile), pdfWriter).Close();
}
}
catch (System.IO.IOException e) {
- LOGGER.Error(e.Message);
+ LOGGER.LogError(e.Message);
}
}
@@ -157,7 +149,7 @@ public static void CreatePdfA(String pdfPath, FileInfo inputFile, OcrPdfCreatorP
}
}
catch (System.IO.IOException e) {
- LOGGER.Error(e.Message);
+ LOGGER.LogError(e.Message);
}
}
@@ -171,7 +163,7 @@ public static String GetTextFromPdf(FileInfo file, String testName) {
result = GetTextFromPdfLayer(pdfPath, "Text Layer");
}
catch (System.IO.IOException e) {
- LOGGER.Error(e.Message);
+ LOGGER.LogError(e.Message);
}
return result;
}
diff --git a/itext.tests/itext.pdfocr.api.tests/itext/pdfocr/statistics/PdfOcrOutputTypeStatisticsAggregatorTest.cs b/itext.tests/itext.pdfocr.api.tests/itext/pdfocr/statistics/PdfOcrOutputTypeStatisticsAggregatorTest.cs
new file mode 100644
index 0000000..4a24516
--- /dev/null
+++ b/itext.tests/itext.pdfocr.api.tests/itext/pdfocr/statistics/PdfOcrOutputTypeStatisticsAggregatorTest.cs
@@ -0,0 +1,109 @@
+/*
+This file is part of the iText (R) project.
+Copyright (c) 1998-2021 iText Group NV
+Authors: iText Software.
+
+This program is offered under a commercial and under the AGPL license.
+For commercial licensing, contact us at https://itextpdf.com/sales. For AGPL licensing, see below.
+
+AGPL licensing:
+This program is free software: you can redistribute it and/or modify
+it under the terms of the GNU Affero General Public License as published by
+the Free Software Foundation, either version 3 of the License, or
+(at your option) any later version.
+
+This program is distributed in the hope that it will be useful,
+but WITHOUT ANY WARRANTY; without even the implied warranty of
+MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+GNU Affero General Public License for more details.
+
+You should have received a copy of the GNU Affero General Public License
+along with this program. If not, see .
+*/
+using System;
+using System.Collections.Generic;
+using iText.Commons.Actions;
+using iText.Commons.Actions.Data;
+using iText.Test;
+
+namespace iText.Pdfocr.Statistics {
+ public class PdfOcrOutputTypeStatisticsAggregatorTest : ExtendedITextTest {
+ private static readonly ProductData DUMMY_PRODUCT_DATA = new ProductData("test-product", "inner_product",
+ "1.0.0", 1900, 2100);
+
+ [NUnit.Framework.Test]
+ public virtual void AggregateEventTest() {
+ PdfOcrOutputTypeStatisticsAggregator aggregator = new PdfOcrOutputTypeStatisticsAggregator();
+ aggregator.Aggregate(new PdfOcrOutputTypeStatisticsEvent(PdfOcrOutputType.PDF, DUMMY_PRODUCT_DATA));
+ aggregator.Aggregate(new PdfOcrOutputTypeStatisticsEvent(PdfOcrOutputType.PDFA, DUMMY_PRODUCT_DATA));
+ aggregator.Aggregate(new PdfOcrOutputTypeStatisticsEvent(PdfOcrOutputType.DATA, DUMMY_PRODUCT_DATA));
+ aggregator.Aggregate(new PdfOcrOutputTypeStatisticsEvent(PdfOcrOutputType.PDFA, DUMMY_PRODUCT_DATA));
+ aggregator.Aggregate(new PdfOcrOutputTypeStatisticsEvent(PdfOcrOutputType.PDF, DUMMY_PRODUCT_DATA));
+ IDictionary aggregation = (IDictionary)aggregator.RetrieveAggregation();
+ NUnit.Framework.Assert.AreEqual(3, aggregation.Count);
+ long? numberOfOcrProcessesWithGivenOutput = aggregation.Get("data");
+ NUnit.Framework.Assert.AreEqual(1L, numberOfOcrProcessesWithGivenOutput);
+ numberOfOcrProcessesWithGivenOutput = aggregation.Get("pdf");
+ NUnit.Framework.Assert.AreEqual(2L, numberOfOcrProcessesWithGivenOutput);
+ numberOfOcrProcessesWithGivenOutput = aggregation.Get("pdfa");
+ NUnit.Framework.Assert.AreEqual(2L, numberOfOcrProcessesWithGivenOutput);
+ }
+
+ [NUnit.Framework.Test]
+ public virtual void MergeTest() {
+ PdfOcrOutputTypeStatisticsAggregator firstAggregator = new PdfOcrOutputTypeStatisticsAggregator();
+ PdfOcrOutputTypeStatisticsAggregator secondAggregator = new PdfOcrOutputTypeStatisticsAggregator();
+ firstAggregator.Aggregate(new PdfOcrOutputTypeStatisticsEvent(PdfOcrOutputType.PDF, DUMMY_PRODUCT_DATA));
+ firstAggregator.Aggregate(new PdfOcrOutputTypeStatisticsEvent(PdfOcrOutputType.PDFA, DUMMY_PRODUCT_DATA));
+ secondAggregator.Aggregate(new PdfOcrOutputTypeStatisticsEvent(PdfOcrOutputType.DATA, DUMMY_PRODUCT_DATA));
+ secondAggregator.Aggregate(new PdfOcrOutputTypeStatisticsEvent(PdfOcrOutputType.PDFA, DUMMY_PRODUCT_DATA));
+ secondAggregator.Aggregate(new PdfOcrOutputTypeStatisticsEvent(PdfOcrOutputType.PDF, DUMMY_PRODUCT_DATA));
+ firstAggregator.Merge(secondAggregator);
+ IDictionary aggregation = (IDictionary)firstAggregator.RetrieveAggregation();
+ NUnit.Framework.Assert.AreEqual(3, aggregation.Count);
+ long? numberOfOcrProcessesWithGivenOutput = aggregation.Get("data");
+ NUnit.Framework.Assert.AreEqual(1L, numberOfOcrProcessesWithGivenOutput);
+ numberOfOcrProcessesWithGivenOutput = aggregation.Get("pdf");
+ NUnit.Framework.Assert.AreEqual(2L, numberOfOcrProcessesWithGivenOutput);
+ numberOfOcrProcessesWithGivenOutput = aggregation.Get("pdfa");
+ NUnit.Framework.Assert.AreEqual(2L, numberOfOcrProcessesWithGivenOutput);
+ }
+
+ [NUnit.Framework.Test]
+ public virtual void AggregateInvalidEventTest() {
+ PdfOcrOutputTypeStatisticsAggregator aggregator = new PdfOcrOutputTypeStatisticsAggregator();
+ aggregator.Aggregate(new PdfOcrOutputTypeStatisticsAggregatorTest.DummyAbstractStatisticsEvent(DUMMY_PRODUCT_DATA
+ ));
+ NUnit.Framework.Assert.IsTrue(((IDictionary)aggregator.RetrieveAggregation()).IsEmpty());
+ }
+
+ [NUnit.Framework.Test]
+ public virtual void MergeInvalidAggregatorTest() {
+ PdfOcrOutputTypeStatisticsAggregator aggregator = new PdfOcrOutputTypeStatisticsAggregator();
+ aggregator.Merge(new PdfOcrOutputTypeStatisticsAggregatorTest.DummyAbstractStatisticsAggregator());
+ NUnit.Framework.Assert.IsTrue(((IDictionary)aggregator.RetrieveAggregation()).IsEmpty());
+ }
+
+ private class DummyAbstractStatisticsEvent : AbstractStatisticsEvent {
+ protected internal DummyAbstractStatisticsEvent(ProductData productData)
+ : base(productData) {
+ }
+
+ public override IList GetStatisticsNames() {
+ return null;
+ }
+ }
+
+ private class DummyAbstractStatisticsAggregator : AbstractStatisticsAggregator {
+ public override void Aggregate(AbstractStatisticsEvent @event) {
+ }
+
+ public override Object RetrieveAggregation() {
+ return null;
+ }
+
+ public override void Merge(AbstractStatisticsAggregator aggregator) {
+ }
+ }
+ }
+}
diff --git a/itext.tests/itext.pdfocr.api.tests/itext/pdfocr/statistics/PdfOcrOutputTypeStatisticsEventTest.cs b/itext.tests/itext.pdfocr.api.tests/itext/pdfocr/statistics/PdfOcrOutputTypeStatisticsEventTest.cs
new file mode 100644
index 0000000..48c3889
--- /dev/null
+++ b/itext.tests/itext.pdfocr.api.tests/itext/pdfocr/statistics/PdfOcrOutputTypeStatisticsEventTest.cs
@@ -0,0 +1,52 @@
+/*
+This file is part of the iText (R) project.
+Copyright (c) 1998-2021 iText Group NV
+Authors: iText Software.
+
+This program is offered under a commercial and under the AGPL license.
+For commercial licensing, contact us at https://itextpdf.com/sales. For AGPL licensing, see below.
+
+AGPL licensing:
+This program is free software: you can redistribute it and/or modify
+it under the terms of the GNU Affero General Public License as published by
+the Free Software Foundation, either version 3 of the License, or
+(at your option) any later version.
+
+This program is distributed in the hope that it will be useful,
+but WITHOUT ANY WARRANTY; without even the implied warranty of
+MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+GNU Affero General Public License for more details.
+
+You should have received a copy of the GNU Affero General Public License
+along with this program. If not, see .
+*/
+using iText.Commons.Actions.Data;
+using iText.Commons.Logs;
+using iText.Commons.Utils;
+using iText.Test;
+using iText.Test.Attributes;
+
+namespace iText.Pdfocr.Statistics {
+ public class PdfOcrOutputTypeStatisticsEventTest : ExtendedITextTest {
+ private static readonly ProductData DUMMY_PRODUCT_DATA = new ProductData("test-product", "inner_product",
+ "1.0.0", 1900, 2100);
+
+ [NUnit.Framework.Test]
+ public virtual void DefaultEventTest() {
+ PdfOcrOutputTypeStatisticsEvent @event = new PdfOcrOutputTypeStatisticsEvent(PdfOcrOutputType.PDF, DUMMY_PRODUCT_DATA
+ );
+ NUnit.Framework.Assert.AreEqual(PdfOcrOutputType.PDF, @event.GetPdfOcrStatisticsEventType());
+ NUnit.Framework.Assert.AreEqual(JavaCollectionsUtil.SingletonList("ocrOutput"), @event.GetStatisticsNames(
+ ));
+ NUnit.Framework.Assert.AreEqual(typeof(PdfOcrOutputTypeStatisticsAggregator), @event.CreateStatisticsAggregatorFromName
+ ("ocrOutput").GetType());
+ }
+
+ [NUnit.Framework.Test]
+ [LogMessage(CommonsLogMessageConstant.INVALID_STATISTICS_NAME)]
+ public virtual void InvalidAggregatorNameTest() {
+ NUnit.Framework.Assert.IsNull(new PdfOcrOutputTypeStatisticsEvent(PdfOcrOutputType.PDF, DUMMY_PRODUCT_DATA
+ ).CreateStatisticsAggregatorFromName("dummy name"));
+ }
+ }
+}
diff --git a/itext.tests/itext.pdfocr.tesseract4.tests/Properties/AssemblyInfo.cs b/itext.tests/itext.pdfocr.tesseract4.tests/Properties/AssemblyInfo.cs
index 930819f..3613622 100644
--- a/itext.tests/itext.pdfocr.tesseract4.tests/Properties/AssemblyInfo.cs
+++ b/itext.tests/itext.pdfocr.tesseract4.tests/Properties/AssemblyInfo.cs
@@ -15,6 +15,6 @@
[assembly: Guid("d6a6ea97-1f23-448f-b700-eff62971d234")]
-[assembly: AssemblyVersion("1.0.3.0")]
-[assembly: AssemblyFileVersion("1.0.3.0")]
-[assembly: AssemblyInformationalVersion("1.0.3")]
+[assembly: AssemblyVersion("2.0.0.0")]
+[assembly: AssemblyFileVersion("2.0.0.0")]
+[assembly: AssemblyInformationalVersion("2.0.0")]
diff --git a/itext.tests/itext.pdfocr.tesseract4.tests/itext.pdfocr.tesseract4.tests.csproj b/itext.tests/itext.pdfocr.tesseract4.tests/itext.pdfocr.tesseract4.tests.csproj
index 54e5bf9..fabc6dc 100644
--- a/itext.tests/itext.pdfocr.tesseract4.tests/itext.pdfocr.tesseract4.tests.csproj
+++ b/itext.tests/itext.pdfocr.tesseract4.tests/itext.pdfocr.tesseract4.tests.csproj
@@ -9,7 +9,7 @@
library
- net45
+ net461
true
@@ -26,7 +26,7 @@
-
+
diff --git a/itext.tests/itext.pdfocr.tesseract4.tests/itext/pdfocr/IntegrationEventHandlingTestHelper.cs b/itext.tests/itext.pdfocr.tesseract4.tests/itext/pdfocr/IntegrationEventHandlingTestHelper.cs
new file mode 100644
index 0000000..4b7e508
--- /dev/null
+++ b/itext.tests/itext.pdfocr.tesseract4.tests/itext/pdfocr/IntegrationEventHandlingTestHelper.cs
@@ -0,0 +1,186 @@
+/*
+This file is part of the iText (R) project.
+Copyright (c) 1998-2021 iText Group NV
+Authors: iText Software.
+
+This program is offered under a commercial and under the AGPL license.
+For commercial licensing, contact us at https://itextpdf.com/sales. For AGPL licensing, see below.
+
+AGPL licensing:
+This program is free software: you can redistribute it and/or modify
+it under the terms of the GNU Affero General Public License as published by
+the Free Software Foundation, either version 3 of the License, or
+(at your option) any later version.
+
+This program is distributed in the hope that it will be useful,
+but WITHOUT ANY WARRANTY; without even the implied warranty of
+MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+GNU Affero General Public License for more details.
+
+You should have received a copy of the GNU Affero General Public License
+along with this program. If not, see .
+*/
+using System;
+using System.Collections.Generic;
+using System.IO;
+using iText.Commons.Actions;
+using iText.Commons.Actions.Confirmations;
+using iText.Commons.Actions.Contexts;
+using iText.Commons.Actions.Processors;
+using iText.Commons.Actions.Producer;
+using iText.Commons.Actions.Sequence;
+using iText.Commons.Utils;
+using iText.Kernel.Actions.Events;
+using iText.Kernel.Pdf;
+using iText.Pdfocr.Statistics;
+using iText.Pdfocr.Tesseract4;
+using iText.Pdfocr.Tesseract4.Actions.Data;
+using iText.Pdfocr.Tesseract4.Actions.Events;
+
+namespace iText.Pdfocr {
+ public abstract class IntegrationEventHandlingTestHelper : IntegrationTestHelper {
+ protected internal readonly AbstractTesseract4OcrEngine tesseractReader;
+
+ protected internal IntegrationEventHandlingTestHelper.StoreEventsHandler eventsHandler;
+
+ public IntegrationEventHandlingTestHelper(IntegrationTestHelper.ReaderType type) {
+ tesseractReader = GetTesseractReader(type);
+ }
+
+ [NUnit.Framework.SetUp]
+ public virtual void Before() {
+ // init ocr engine
+ Tesseract4OcrEngineProperties ocrEngineProperties = new Tesseract4OcrEngineProperties();
+ ocrEngineProperties.SetPathToTessData(GetTessDataDirectory());
+ tesseractReader.SetTesseract4OcrEngineProperties(ocrEngineProperties);
+ // register event handler
+ eventsHandler = new IntegrationEventHandlingTestHelper.StoreEventsHandler();
+ EventManager.GetInstance().Register(eventsHandler);
+ }
+
+ [NUnit.Framework.TearDown]
+ public virtual void After() {
+ EventManager.GetInstance().Unregister(eventsHandler);
+ eventsHandler = null;
+ }
+
+ protected internal static void ValidateUsageEvent(IEvent @event, EventConfirmationType expectedConfirmationType
+ ) {
+ NUnit.Framework.Assert.IsTrue(@event is PdfOcrTesseract4ProductEvent);
+ NUnit.Framework.Assert.AreEqual("process-image", ((PdfOcrTesseract4ProductEvent)@event).GetEventType());
+ NUnit.Framework.Assert.AreEqual(expectedConfirmationType, ((PdfOcrTesseract4ProductEvent)@event).GetConfirmationType
+ ());
+ NUnit.Framework.Assert.AreEqual(PdfOcrTesseract4ProductData.GetInstance(), ((PdfOcrTesseract4ProductEvent)
+ @event).GetProductData());
+ }
+
+ protected internal static void ValidateStatisticEvent(IEvent @event, PdfOcrOutputType outputType) {
+ NUnit.Framework.Assert.IsTrue(@event is PdfOcrOutputTypeStatisticsEvent);
+ NUnit.Framework.Assert.AreEqual(outputType, ((PdfOcrOutputTypeStatisticsEvent)@event).GetPdfOcrStatisticsEventType
+ ());
+ NUnit.Framework.Assert.AreEqual(PdfOcrTesseract4ProductData.GetInstance(), ((PdfOcrOutputTypeStatisticsEvent
+ )@event).GetProductData());
+ }
+
+ protected internal static void ValidateConfirmEvent(IEvent @event, IEvent expectedConfirmedEvent) {
+ NUnit.Framework.Assert.IsTrue(@event is ConfirmEvent);
+ NUnit.Framework.Assert.AreSame(expectedConfirmedEvent, ((ConfirmEvent)@event).GetConfirmedEvent());
+ }
+
+ // we expect core events in case of API methods returning PdfDocument
+ protected internal static void ValidateCoreConfirmEvent(IEvent @event) {
+ NUnit.Framework.Assert.IsTrue(@event is ConfirmEvent);
+ NUnit.Framework.Assert.AreEqual(GetCoreEvent().GetEvent().GetEventType(), ((ConfirmEvent)@event).GetConfirmedEvent
+ ().GetEventType());
+ NUnit.Framework.Assert.AreEqual(GetCoreEvent().GetEvent().GetConfirmationType(), ((ConfirmEvent)@event).GetConfirmedEvent
+ ().GetConfirmationType());
+ }
+
+ protected internal virtual void ValidatePdfProducerLine(String filePath, String expected) {
+ using (PdfDocument pdfDocument = new PdfDocument(new PdfReader(filePath))) {
+ NUnit.Framework.Assert.AreEqual(expected, pdfDocument.GetDocumentInfo().GetProducer());
+ }
+ }
+
+ protected internal static String CreateExpectedProducerLine(ConfirmedEventWrapper[] expectedEvents) {
+ IList listEvents = JavaUtil.ArraysAsList(expectedEvents);
+ return ProducerBuilder.ModifyProducer(listEvents, null);
+ }
+
+ protected internal static ConfirmedEventWrapper GetPdfOcrEvent() {
+ DefaultITextProductEventProcessor processor = new DefaultITextProductEventProcessor(ProductNameConstant.PDF_HTML
+ );
+ return new ConfirmedEventWrapper(PdfOcrTesseract4ProductEvent.CreateProcessImageEvent(new SequenceId(), null
+ , EventConfirmationType.ON_CLOSE), processor.GetUsageType(), processor.GetProducer());
+ }
+
+ protected internal static ConfirmedEventWrapper GetCoreEvent() {
+ DefaultITextProductEventProcessor processor = new DefaultITextProductEventProcessor(ProductNameConstant.ITEXT_CORE
+ );
+ return new ConfirmedEventWrapper(ITextCoreProductEvent.CreateProcessPdfEvent(new SequenceId(), null, EventConfirmationType
+ .ON_CLOSE), processor.GetUsageType(), processor.GetProducer());
+ }
+
+ protected internal static PdfOutputIntent GetRGBPdfOutputIntent() {
+ String defaultRGBColorProfilePath = TEST_DIRECTORY + "profiles" + "/sRGB_CS_profile.icm";
+ Stream @is = new FileStream(defaultRGBColorProfilePath, FileMode.Open, FileAccess.Read);
+ return new PdfOutputIntent("", "", "", "sRGB IEC61966-2.1", @is);
+ }
+
+ ///
+ /// Creates PDF document with
+ ///
+ /// and set event counting meta info.
+ ///
+ ///
+ /// engine to set in the
+ ///
+ ///
+ /// out pdf file
+ /// image file
+ /// meta info
+ protected internal virtual void CreatePdfAndSetEventCountingMetaInfo(IOcrEngine engine, FileInfo outPdfFile
+ , FileInfo imgFile, IMetaInfo metaInfo) {
+ using (PdfWriter pdfWriter = new PdfWriter(outPdfFile)) {
+ PdfDocument pdfDocument = new OcrPdfCreator(engine).CreatePdf(JavaCollectionsUtil.SingletonList(imgFile),
+ pdfWriter, new DocumentProperties().SetEventCountingMetaInfo(metaInfo));
+ pdfDocument.Close();
+ }
+ }
+
+ ///
+ /// Creates PDF document with
+ ///
+ /// and set meta info to
+ /// .
+ ///
+ ///
+ /// engine to set in the
+ ///
+ ///
+ /// out pdf file
+ /// image file
+ /// meta info
+ protected internal virtual void CreatePdfFileAndSetMetaInfoToProps(IOcrEngine engine, FileInfo outPdfFile,
+ FileInfo imgFile, IMetaInfo metaInfo) {
+ OcrPdfCreatorProperties properties = new OcrPdfCreatorProperties().SetMetaInfo(metaInfo);
+ new OcrPdfCreator(engine, properties).CreatePdfFile(JavaCollectionsUtil.SingletonList(imgFile), outPdfFile
+ );
+ }
+
+ protected internal class StoreEventsHandler : IEventHandler {
+ private readonly IList events = new List();
+
+ public virtual IList GetEvents() {
+ return events;
+ }
+
+ public virtual void OnEvent(IEvent @event) {
+ if (@event is PdfOcrTesseract4ProductEvent || @event is PdfOcrOutputTypeStatisticsEvent || @event is ConfirmEvent
+ ) {
+ events.Add(@event);
+ }
+ }
+ }
+ }
+}
diff --git a/itext.tests/itext.pdfocr.tesseract4.tests/itext/pdfocr/IntegrationTestHelper.cs b/itext.tests/itext.pdfocr.tesseract4.tests/itext/pdfocr/IntegrationTestHelper.cs
index 7b6cf0f..a01ec44 100644
--- a/itext.tests/itext.pdfocr.tesseract4.tests/itext/pdfocr/IntegrationTestHelper.cs
+++ b/itext.tests/itext.pdfocr.tesseract4.tests/itext/pdfocr/IntegrationTestHelper.cs
@@ -23,9 +23,10 @@ You should have received a copy of the GNU Affero General Public License
using System;
using System.Collections.Generic;
using System.IO;
-using Common.Logging;
+using Microsoft.Extensions.Logging;
+using iText.Commons;
+using iText.Commons.Utils;
using iText.IO.Font;
-using iText.IO.Util;
using iText.Kernel.Colors;
using iText.Kernel.Font;
using iText.Kernel.Geom;
@@ -36,11 +37,13 @@ You should have received a copy of the GNU Affero General Public License
using iText.Kernel.Pdf.Canvas.Parser.Listener;
using iText.Layout.Font;
using iText.Pdfocr.Tesseract4;
+using iText.Pdfocr.Tesseract4.Logs;
using iText.Test;
namespace iText.Pdfocr {
public class IntegrationTestHelper : ExtendedITextTest {
- private static readonly ILog LOGGER = LogManager.GetLogger(typeof(iText.Pdfocr.IntegrationTestHelper));
+ private static readonly ILogger LOGGER = ITextLogManager.GetLogger(typeof(iText.Pdfocr.IntegrationTestHelper
+ ));
// directory with test files
public static readonly String TEST_DIRECTORY = iText.Test.TestUtil.GetParentProjectDirectory(NUnit.Framework.TestContext
@@ -164,7 +167,7 @@ protected internal virtual String GetTextFromPdf(AbstractTesseract4OcrEngine tes
result = GetTextFromPdfLayer(pdfPath, null, page);
}
catch (System.IO.IOException e) {
- LOGGER.Error(e.Message);
+ LOGGER.LogError(e.Message);
}
return result;
}
@@ -203,8 +206,7 @@ protected internal virtual String GetTextFromPdf(AbstractTesseract4OcrEngine tes
/// Get text from layer specified by name from page.
protected internal virtual String GetTextFromPdfLayer(String pdfPath, String layerName, int page, bool useActualText
) {
- PdfDocument pdfDocument = new PdfDocument(new PdfReader(pdfPath), new DocumentProperties().SetEventCountingMetaInfo
- (new PdfOcrMetaInfo()));
+ PdfDocument pdfDocument = new PdfDocument(new PdfReader(pdfPath));
IntegrationTestHelper.ExtractionStrategy textExtractionStrategy = new IntegrationTestHelper.ExtractionStrategy
(layerName);
textExtractionStrategy.SetUseActualText(useActualText);
@@ -243,7 +245,7 @@ protected internal virtual String GetRecognizedTextFromTextFile(AbstractTesserac
result = GetTextFromTextFile(new FileInfo(txtPath));
}
catch (Exception e) {
- LOGGER.Error(e.Message);
+ LOGGER.LogError(e.Message);
}
return result;
}
@@ -338,7 +340,7 @@ protected internal virtual void DoOcrAndSavePdfToPath(AbstractTesseract4OcrEngin
}
}
catch (System.IO.IOException e) {
- LOGGER.Error(e.Message);
+ LOGGER.LogError(e.Message);
}
}
@@ -383,11 +385,11 @@ protected internal virtual void DoOcrAndSavePdfToPath(AbstractTesseract4OcrEngin
protected internal virtual String GetTextFromTextFile(FileInfo file) {
String content = null;
try {
- content = iText.IO.Util.JavaUtil.GetStringForBytes(File.ReadAllBytes(file.FullName), System.Text.Encoding.
- UTF8);
+ content = iText.Commons.Utils.JavaUtil.GetStringForBytes(File.ReadAllBytes(file.FullName), System.Text.Encoding
+ .UTF8);
}
catch (System.IO.IOException e) {
- LOGGER.Error(MessageFormatUtil.Format(Tesseract4LogMessageConstant.CANNOT_READ_FILE, file.FullName, e.Message
+ LOGGER.LogError(MessageFormatUtil.Format(Tesseract4LogMessageConstant.CANNOT_READ_FILE, file.FullName, e.Message
));
}
return content;
diff --git a/itext.tests/itext.pdfocr.tesseract4.tests/itext/pdfocr/TesseractExecutableIntegrationTest.cs b/itext.tests/itext.pdfocr.tesseract4.tests/itext/pdfocr/TesseractExecutableIntegrationTest.cs
index 4ad2474..a4f6ac0 100644
--- a/itext.tests/itext.pdfocr.tesseract4.tests/itext/pdfocr/TesseractExecutableIntegrationTest.cs
+++ b/itext.tests/itext.pdfocr.tesseract4.tests/itext/pdfocr/TesseractExecutableIntegrationTest.cs
@@ -22,11 +22,13 @@ You should have received a copy of the GNU Affero General Public License
*/
using System.IO;
using iText.Pdfocr.Tesseract4;
+using iText.Pdfocr.Tesseract4.Exceptions;
+using iText.Pdfocr.Tesseract4.Logs;
using iText.Test.Attributes;
namespace iText.Pdfocr {
public class TesseractExecutableIntegrationTest : IntegrationTestHelper {
- [LogMessage(Tesseract4OcrException.CANNOT_FIND_PATH_TO_TESSERACT_EXECUTABLE, Count = 1)]
+ [LogMessage(PdfOcrTesseract4ExceptionMessageConstant.CANNOT_FIND_PATH_TO_TESSERACT_EXECUTABLE, Count = 1)]
[NUnit.Framework.Test]
public virtual void TestNullPathToTesseractExecutable() {
NUnit.Framework.Assert.That(() => {
@@ -36,23 +38,23 @@ public virtual void TestNullPathToTesseractExecutable() {
tesseractExecutableReader.SetPathToExecutable(null);
GetTextFromPdf(tesseractExecutableReader, file);
}
- , NUnit.Framework.Throws.InstanceOf().With.Message.EqualTo(Tesseract4OcrException.CANNOT_FIND_PATH_TO_TESSERACT_EXECUTABLE))
+ , NUnit.Framework.Throws.InstanceOf().With.Message.EqualTo(PdfOcrTesseract4ExceptionMessageConstant.CANNOT_FIND_PATH_TO_TESSERACT_EXECUTABLE))
;
}
- [LogMessage(Tesseract4OcrException.CANNOT_FIND_PATH_TO_TESSERACT_EXECUTABLE, Count = 1)]
+ [LogMessage(PdfOcrTesseract4ExceptionMessageConstant.CANNOT_FIND_PATH_TO_TESSERACT_EXECUTABLE, Count = 1)]
[NUnit.Framework.Test]
public virtual void TestEmptyPathToTesseractExecutable() {
NUnit.Framework.Assert.That(() => {
FileInfo file = new FileInfo(TEST_IMAGES_DIRECTORY + "spanish_01.jpg");
GetTextFromPdf(new Tesseract4ExecutableOcrEngine("", new Tesseract4OcrEngineProperties()), file);
}
- , NUnit.Framework.Throws.InstanceOf().With.Message.EqualTo(Tesseract4OcrException.CANNOT_FIND_PATH_TO_TESSERACT_EXECUTABLE))
+ , NUnit.Framework.Throws.InstanceOf().With.Message.EqualTo(PdfOcrTesseract4ExceptionMessageConstant.CANNOT_FIND_PATH_TO_TESSERACT_EXECUTABLE))
;
}
[LogMessage(Tesseract4LogMessageConstant.COMMAND_FAILED, Count = 1)]
- [LogMessage(Tesseract4OcrException.TESSERACT_NOT_FOUND, Count = 1)]
+ [LogMessage(PdfOcrTesseract4ExceptionMessageConstant.TESSERACT_NOT_FOUND, Count = 1)]
[NUnit.Framework.Test]
public virtual void TestIncorrectPathToTesseractExecutable() {
NUnit.Framework.Assert.That(() => {
@@ -60,7 +62,7 @@ public virtual void TestIncorrectPathToTesseractExecutable() {
GetTextFromPdf(new Tesseract4ExecutableOcrEngine("path\\to\\executable\\", new Tesseract4OcrEngineProperties
()), file);
}
- , NUnit.Framework.Throws.InstanceOf().With.Message.EqualTo(Tesseract4OcrException.TESSERACT_NOT_FOUND))
+ , NUnit.Framework.Throws.InstanceOf().With.Message.EqualTo(PdfOcrTesseract4ExceptionMessageConstant.TESSERACT_NOT_FOUND))
;
}
}
diff --git a/itext.tests/itext.pdfocr.tesseract4.tests/itext/pdfocr/events/multithreading/MultiThreadingExecutableTest.cs b/itext.tests/itext.pdfocr.tesseract4.tests/itext/pdfocr/actions/Tesseract4EventHandlingExecutableTest.cs
similarity index 85%
rename from itext.tests/itext.pdfocr.tesseract4.tests/itext/pdfocr/events/multithreading/MultiThreadingExecutableTest.cs
rename to itext.tests/itext.pdfocr.tesseract4.tests/itext/pdfocr/actions/Tesseract4EventHandlingExecutableTest.cs
index b4d66fd..9eef692 100644
--- a/itext.tests/itext.pdfocr.tesseract4.tests/itext/pdfocr/events/multithreading/MultiThreadingExecutableTest.cs
+++ b/itext.tests/itext.pdfocr.tesseract4.tests/itext/pdfocr/actions/Tesseract4EventHandlingExecutableTest.cs
@@ -22,9 +22,9 @@ You should have received a copy of the GNU Affero General Public License
*/
using iText.Pdfocr;
-namespace iText.Pdfocr.Events.Multithreading {
- public class MultiThreadingExecutableTest : MultiThreadingTest {
- public MultiThreadingExecutableTest()
+namespace iText.Pdfocr.Actions {
+ public class Tesseract4EventHandlingExecutableTest : Tesseract4EventHandlingTest {
+ public Tesseract4EventHandlingExecutableTest()
: base(IntegrationTestHelper.ReaderType.EXECUTABLE) {
}
}
diff --git a/itext/itext.pdfocr.api/itext/pdfocr/IMetaInfoWrapper.cs b/itext.tests/itext.pdfocr.tesseract4.tests/itext/pdfocr/actions/Tesseract4EventHandlingLibTest.cs
similarity index 74%
rename from itext/itext.pdfocr.api/itext/pdfocr/IMetaInfoWrapper.cs
rename to itext.tests/itext.pdfocr.tesseract4.tests/itext/pdfocr/actions/Tesseract4EventHandlingLibTest.cs
index b432cc7..7945112 100644
--- a/itext/itext.pdfocr.api/itext/pdfocr/IMetaInfoWrapper.cs
+++ b/itext.tests/itext.pdfocr.tesseract4.tests/itext/pdfocr/actions/Tesseract4EventHandlingLibTest.cs
@@ -20,13 +20,12 @@ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
You should have received a copy of the GNU Affero General Public License
along with this program. If not, see .
*/
-using iText.Kernel.Counter.Event;
+using iText.Pdfocr;
-namespace iText.Pdfocr {
- /// The meta info wrapper that holds some meta info
- public interface IMetaInfoWrapper {
- /// Gets the wrapped meta info
- /// the wrapped meta info
- IMetaInfo GetWrappedMetaInfo();
+namespace iText.Pdfocr.Actions {
+ public class Tesseract4EventHandlingLibTest : Tesseract4EventHandlingTest {
+ public Tesseract4EventHandlingLibTest()
+ : base(IntegrationTestHelper.ReaderType.LIB) {
+ }
}
}
diff --git a/itext.tests/itext.pdfocr.tesseract4.tests/itext/pdfocr/actions/Tesseract4EventHandlingTest.cs b/itext.tests/itext.pdfocr.tesseract4.tests/itext/pdfocr/actions/Tesseract4EventHandlingTest.cs
new file mode 100644
index 0000000..59aeb95
--- /dev/null
+++ b/itext.tests/itext.pdfocr.tesseract4.tests/itext/pdfocr/actions/Tesseract4EventHandlingTest.cs
@@ -0,0 +1,401 @@
+/*
+This file is part of the iText (R) project.
+Copyright (c) 1998-2021 iText Group NV
+Authors: iText Software.
+
+This program is offered under a commercial and under the AGPL license.
+For commercial licensing, contact us at https://itextpdf.com/sales. For AGPL licensing, see below.
+
+AGPL licensing:
+This program is free software: you can redistribute it and/or modify
+it under the terms of the GNU Affero General Public License as published by
+the Free Software Foundation, either version 3 of the License, or
+(at your option) any later version.
+
+This program is distributed in the hope that it will be useful,
+but WITHOUT ANY WARRANTY; without even the implied warranty of
+MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+GNU Affero General Public License for more details.
+
+You should have received a copy of the GNU Affero General Public License
+along with this program. If not, see .
+*/
+using System;
+using System.Collections.Generic;
+using System.IO;
+using iText.Commons.Actions;
+using iText.Commons.Actions.Confirmations;
+using iText.Commons.Actions.Contexts;
+using iText.Commons.Actions.Sequence;
+using iText.Commons.Utils;
+using iText.Kernel.Pdf;
+using iText.Pdfocr;
+using iText.Pdfocr.Exceptions;
+using iText.Pdfocr.Statistics;
+using iText.Pdfocr.Tesseract4.Exceptions;
+using iText.Pdfocr.Tesseract4.Logs;
+using iText.Test.Attributes;
+
+namespace iText.Pdfocr.Actions {
+ public abstract class Tesseract4EventHandlingTest : IntegrationEventHandlingTestHelper {
+ public Tesseract4EventHandlingTest(IntegrationTestHelper.ReaderType type)
+ : base(type) {
+ }
+
+ [NUnit.Framework.Test]
+ public virtual void OcrPdfCreatorCreatePdfFileTest() {
+ FileInfo imgFile = new FileInfo(TEST_IMAGES_DIRECTORY + "numbers_01.jpg");
+ FileInfo outPdfFile = FileUtil.CreateTempFile("test", ".pdf");
+ new OcrPdfCreator(tesseractReader).CreatePdfFile(JavaCollectionsUtil.SingletonList(imgFile), outPdfFile);
+ // check ocr events
+ NUnit.Framework.Assert.AreEqual(3, eventsHandler.GetEvents().Count);
+ IEvent ocrUsageEvent = eventsHandler.GetEvents()[0];
+ ValidateUsageEvent(ocrUsageEvent, EventConfirmationType.ON_CLOSE);
+ ValidateStatisticEvent(eventsHandler.GetEvents()[1], PdfOcrOutputType.PDF);
+ ValidateConfirmEvent(eventsHandler.GetEvents()[2], ocrUsageEvent);
+ // check producer line in the output pdf
+ String expectedProdLine = CreateExpectedProducerLine(new ConfirmedEventWrapper[] { GetPdfOcrEvent() });
+ ValidatePdfProducerLine(outPdfFile.FullName, expectedProdLine);
+ }
+
+ [NUnit.Framework.Test]
+ [LogMessage(Tesseract4LogMessageConstant.CANNOT_READ_INPUT_IMAGE)]
+ public virtual void OcrPdfCreatorCreatePdfFileNoImageTest() {
+ FileInfo imgFile = new FileInfo("unknown");
+ IList images = JavaCollectionsUtil.SingletonList(imgFile);
+ FileInfo outPdfFile = FileUtil.CreateTempFile("test", ".pdf");
+ OcrPdfCreator ocrPdfCreator = new OcrPdfCreator(tesseractReader);
+ NUnit.Framework.Assert.Catch(typeof(PdfOcrException), () => ocrPdfCreator.CreatePdfFile(images, outPdfFile
+ ));
+ // check ocr events
+ NUnit.Framework.Assert.AreEqual(0, eventsHandler.GetEvents().Count);
+ }
+
+ [NUnit.Framework.Test]
+ public virtual void OcrPdfCreatorCreatePdfFileNoOutputFileTest() {
+ FileInfo imgFile = new FileInfo(TEST_IMAGES_DIRECTORY + "numbers_01.jpg");
+ IList images = JavaCollectionsUtil.SingletonList(imgFile);
+ FileInfo outPdfFile = new FileInfo("no/no_file");
+ OcrPdfCreator ocrPdfCreator = new OcrPdfCreator(tesseractReader);
+ NUnit.Framework.Assert.Catch(typeof(System.IO.IOException), () => ocrPdfCreator.CreatePdfFile(images, outPdfFile
+ ));
+ NUnit.Framework.Assert.AreEqual(0, eventsHandler.GetEvents().Count);
+ }
+
+ [NUnit.Framework.Test]
+ public virtual void OcrPdfCreatorCreatePdfFileNullOutputFileTest() {
+ FileInfo imgFile = new FileInfo(TEST_IMAGES_DIRECTORY + "numbers_01.jpg");
+ IList images = JavaCollectionsUtil.SingletonList(imgFile);
+ OcrPdfCreator ocrPdfCreator = new OcrPdfCreator(tesseractReader);
+ NUnit.Framework.Assert.Catch(typeof(NullReferenceException), () => ocrPdfCreator.CreatePdfFile(images, null
+ ));
+ NUnit.Framework.Assert.AreEqual(0, eventsHandler.GetEvents().Count);
+ }
+
+ [NUnit.Framework.Test]
+ public virtual void OcrPdfCreatorCreatePdfFileTwoImagesTest() {
+ FileInfo imgFile = new FileInfo(TEST_IMAGES_DIRECTORY + "numbers_01.jpg");
+ FileInfo outPdfFile = FileUtil.CreateTempFile("test", ".pdf");
+ new OcrPdfCreator(tesseractReader).CreatePdfFile(JavaUtil.ArraysAsList(imgFile, imgFile), outPdfFile);
+ // check ocr events
+ NUnit.Framework.Assert.AreEqual(5, eventsHandler.GetEvents().Count);
+ IEvent ocrUsageEvent1 = eventsHandler.GetEvents()[0];
+ ValidateUsageEvent(ocrUsageEvent1, EventConfirmationType.ON_CLOSE);
+ IEvent ocrUsageEvent2 = eventsHandler.GetEvents()[1];
+ ValidateUsageEvent(ocrUsageEvent2, EventConfirmationType.ON_CLOSE);
+ ValidateStatisticEvent(eventsHandler.GetEvents()[2], PdfOcrOutputType.PDF);
+ ValidateConfirmEvent(eventsHandler.GetEvents()[3], ocrUsageEvent1);
+ ValidateConfirmEvent(eventsHandler.GetEvents()[4], ocrUsageEvent2);
+ // check producer line in the output pdf
+ String expectedProdLine = CreateExpectedProducerLine(new ConfirmedEventWrapper[] { GetPdfOcrEvent() });
+ ValidatePdfProducerLine(outPdfFile.FullName, expectedProdLine);
+ }
+
+ [NUnit.Framework.Test]
+ public virtual void OcrPdfCreatorCreatePdfFileTwoRunningsTest() {
+ FileInfo imgFile = new FileInfo(TEST_IMAGES_DIRECTORY + "numbers_01.jpg");
+ FileInfo outPdfFile = FileUtil.CreateTempFile("test", ".pdf");
+ new OcrPdfCreator(tesseractReader).CreatePdfFile(JavaCollectionsUtil.SingletonList(imgFile), outPdfFile);
+ new OcrPdfCreator(tesseractReader).CreatePdfFile(JavaCollectionsUtil.SingletonList(imgFile), outPdfFile);
+ NUnit.Framework.Assert.AreEqual(6, eventsHandler.GetEvents().Count);
+ IEvent ocrUsageEvent = eventsHandler.GetEvents()[0];
+ ValidateUsageEvent(ocrUsageEvent, EventConfirmationType.ON_CLOSE);
+ ValidateStatisticEvent(eventsHandler.GetEvents()[1], PdfOcrOutputType.PDF);
+ ValidateConfirmEvent(eventsHandler.GetEvents()[2], ocrUsageEvent);
+ // usage event
+ ocrUsageEvent = eventsHandler.GetEvents()[3];
+ ValidateUsageEvent(ocrUsageEvent, EventConfirmationType.ON_CLOSE);
+ ValidateStatisticEvent(eventsHandler.GetEvents()[4], PdfOcrOutputType.PDF);
+ ValidateConfirmEvent(eventsHandler.GetEvents()[5], ocrUsageEvent);
+ // check producer line in the output pdf
+ String expectedProdLine = CreateExpectedProducerLine(new ConfirmedEventWrapper[] { GetPdfOcrEvent() });
+ ValidatePdfProducerLine(outPdfFile.FullName, expectedProdLine);
+ }
+
+ [NUnit.Framework.Test]
+ public virtual void OcrPdfCreatorCreatePdfTest() {
+ FileInfo imgFile = new FileInfo(TEST_IMAGES_DIRECTORY + "numbers_01.jpg");
+ FileInfo outPdfFile = FileUtil.CreateTempFile("test", ".pdf");
+ PdfWriter pdfWriter = new PdfWriter(outPdfFile);
+ PdfDocument pdfDocument = new OcrPdfCreator(tesseractReader).CreatePdf(JavaCollectionsUtil.SingletonList(imgFile
+ ), pdfWriter);
+ pdfDocument.Close();
+ NUnit.Framework.Assert.AreEqual(4, eventsHandler.GetEvents().Count);
+ IEvent ocrUsageEvent = eventsHandler.GetEvents()[0];
+ ValidateUsageEvent(ocrUsageEvent, EventConfirmationType.ON_CLOSE);
+ ValidateStatisticEvent(eventsHandler.GetEvents()[1], PdfOcrOutputType.PDF);
+ ValidateCoreConfirmEvent(eventsHandler.GetEvents()[2]);
+ ValidateConfirmEvent(eventsHandler.GetEvents()[3], ocrUsageEvent);
+ String expectedProdLine = CreateExpectedProducerLine(new ConfirmedEventWrapper[] { GetCoreEvent(), GetPdfOcrEvent
+ () });
+ ValidatePdfProducerLine(outPdfFile.FullName, expectedProdLine);
+ }
+
+ [NUnit.Framework.Test]
+ [LogMessage(Tesseract4LogMessageConstant.CANNOT_READ_INPUT_IMAGE)]
+ public virtual void OcrPdfCreatorCreatePdfNoImageTest() {
+ IList images = JavaCollectionsUtil.SingletonList(new FileInfo("no_image"));
+ FileInfo outPdfFile = FileUtil.CreateTempFile("test", ".pdf");
+ PdfWriter pdfWriter = new PdfWriter(outPdfFile);
+ OcrPdfCreator ocrPdfCreator = new OcrPdfCreator(tesseractReader);
+ NUnit.Framework.Assert.Catch(typeof(PdfOcrTesseract4Exception), () => ocrPdfCreator.CreatePdf(images, pdfWriter
+ ));
+ pdfWriter.Dispose();
+ NUnit.Framework.Assert.AreEqual(0, eventsHandler.GetEvents().Count);
+ }
+
+ [NUnit.Framework.Test]
+ public virtual void OcrPdfCreatorCreatePdfNullWriterTest() {
+ FileInfo imgFile = new FileInfo(TEST_IMAGES_DIRECTORY + "numbers_01.jpg");
+ IList images = JavaCollectionsUtil.SingletonList(imgFile);
+ OcrPdfCreator ocrPdfCreator = new OcrPdfCreator(tesseractReader);
+ NUnit.Framework.Assert.Catch(typeof(ArgumentException), () => ocrPdfCreator.CreatePdf(images, null));
+ NUnit.Framework.Assert.AreEqual(1, eventsHandler.GetEvents().Count);
+ ValidateUsageEvent(eventsHandler.GetEvents()[0], EventConfirmationType.ON_CLOSE);
+ }
+
+ [NUnit.Framework.Test]
+ public virtual void OcrPdfCreatorCreatePdfAFileTest() {
+ FileInfo imgFile = new FileInfo(TEST_IMAGES_DIRECTORY + "numbers_01.jpg");
+ FileInfo outPdfFile = FileUtil.CreateTempFile("test", ".pdf");
+ OcrPdfCreatorProperties props = new OcrPdfCreatorProperties().SetPdfLang("en-US");
+ new OcrPdfCreator(tesseractReader, props).CreatePdfAFile(JavaCollectionsUtil.SingletonList(imgFile), outPdfFile
+ , GetRGBPdfOutputIntent());
+ // check ocr events
+ NUnit.Framework.Assert.AreEqual(3, eventsHandler.GetEvents().Count);
+ IEvent ocrUsageEvent = eventsHandler.GetEvents()[0];
+ ValidateUsageEvent(ocrUsageEvent, EventConfirmationType.ON_CLOSE);
+ ValidateStatisticEvent(eventsHandler.GetEvents()[1], PdfOcrOutputType.PDFA);
+ ValidateConfirmEvent(eventsHandler.GetEvents()[2], ocrUsageEvent);
+ // check producer line in the output pdf
+ String expectedProdLine = CreateExpectedProducerLine(new ConfirmedEventWrapper[] { GetPdfOcrEvent() });
+ ValidatePdfProducerLine(outPdfFile.FullName, expectedProdLine);
+ }
+
+ [NUnit.Framework.Test]
+ public virtual void OcrPdfCreatorCreatePdfATest() {
+ FileInfo imgFile = new FileInfo(TEST_IMAGES_DIRECTORY + "numbers_01.jpg");
+ FileInfo outPdfFile = FileUtil.CreateTempFile("test", ".pdf");
+ PdfWriter pdfWriter = new PdfWriter(outPdfFile);
+ OcrPdfCreatorProperties props = new OcrPdfCreatorProperties().SetPdfLang("en-US");
+ PdfDocument pdfDocument = new OcrPdfCreator(tesseractReader, props).CreatePdfA(JavaCollectionsUtil.SingletonList
+ (imgFile), pdfWriter, GetRGBPdfOutputIntent());
+ pdfDocument.Close();
+ // check ocr events
+ NUnit.Framework.Assert.AreEqual(4, eventsHandler.GetEvents().Count);
+ IEvent ocrUsageEvent = eventsHandler.GetEvents()[0];
+ ValidateUsageEvent(ocrUsageEvent, EventConfirmationType.ON_CLOSE);
+ ValidateStatisticEvent(eventsHandler.GetEvents()[1], PdfOcrOutputType.PDFA);
+ ValidateCoreConfirmEvent(eventsHandler.GetEvents()[2]);
+ ValidateConfirmEvent(eventsHandler.GetEvents()[3], ocrUsageEvent);
+ // check producer line in the output pdf
+ String expectedProdLine = CreateExpectedProducerLine(new ConfirmedEventWrapper[] { GetCoreEvent(), GetPdfOcrEvent
+ () });
+ ValidatePdfProducerLine(outPdfFile.FullName, expectedProdLine);
+ }
+
+ [NUnit.Framework.Test]
+ public virtual void DoImageOcrTest() {
+ FileInfo imgFile = new FileInfo(TEST_IMAGES_DIRECTORY + "numbers_01.jpg");
+ tesseractReader.DoImageOcr(imgFile);
+ NUnit.Framework.Assert.AreEqual(3, eventsHandler.GetEvents().Count);
+ IEvent usageEvent = eventsHandler.GetEvents()[0];
+ ValidateUsageEvent(usageEvent, EventConfirmationType.ON_DEMAND);
+ ValidateStatisticEvent(eventsHandler.GetEvents()[1], PdfOcrOutputType.DATA);
+ ValidateConfirmEvent(eventsHandler.GetEvents()[2], usageEvent);
+ }
+
+ [NUnit.Framework.Test]
+ [LogMessage(Tesseract4LogMessageConstant.CANNOT_READ_INPUT_IMAGE)]
+ public virtual void DoImageOcrNoImageTest() {
+ FileInfo imgFile = new FileInfo("uncknown");
+ NUnit.Framework.Assert.Catch(typeof(PdfOcrException), () => tesseractReader.DoImageOcr(imgFile));
+ NUnit.Framework.Assert.AreEqual(0, eventsHandler.GetEvents().Count);
+ }
+
+ [NUnit.Framework.Test]
+ public virtual void DoImageOcrTwoRunningsTest() {
+ FileInfo imgFile = new FileInfo(TEST_IMAGES_DIRECTORY + "numbers_01.jpg");
+ tesseractReader.DoImageOcr(imgFile);
+ tesseractReader.DoImageOcr(imgFile);
+ NUnit.Framework.Assert.AreEqual(6, eventsHandler.GetEvents().Count);
+ IEvent usageEvent = eventsHandler.GetEvents()[0];
+ ValidateUsageEvent(usageEvent, EventConfirmationType.ON_DEMAND);
+ ValidateStatisticEvent(eventsHandler.GetEvents()[1], PdfOcrOutputType.DATA);
+ ValidateConfirmEvent(eventsHandler.GetEvents()[2], usageEvent);
+ usageEvent = eventsHandler.GetEvents()[3];
+ ValidateUsageEvent(usageEvent, EventConfirmationType.ON_DEMAND);
+ ValidateStatisticEvent(eventsHandler.GetEvents()[4], PdfOcrOutputType.DATA);
+ ValidateConfirmEvent(eventsHandler.GetEvents()[5], usageEvent);
+ }
+
+ [NUnit.Framework.Test]
+ public virtual void CreateTxtFileTest() {
+ FileInfo imgFile = new FileInfo(TEST_IMAGES_DIRECTORY + "numbers_01.jpg");
+ tesseractReader.CreateTxtFile(JavaUtil.ArraysAsList(imgFile, imgFile), FileUtil.CreateTempFile("test", ".txt"
+ ));
+ NUnit.Framework.Assert.AreEqual(4, eventsHandler.GetEvents().Count);
+ IEvent usageEvent = eventsHandler.GetEvents()[0];
+ ValidateUsageEvent(usageEvent, EventConfirmationType.ON_DEMAND);
+ ValidateStatisticEvent(eventsHandler.GetEvents()[1], PdfOcrOutputType.DATA);
+ ValidateStatisticEvent(eventsHandler.GetEvents()[2], PdfOcrOutputType.DATA);
+ ValidateConfirmEvent(eventsHandler.GetEvents()[3], usageEvent);
+ }
+
+ [NUnit.Framework.Test]
+ public virtual void CreateTxtFileNullEventHelperTest() {
+ FileInfo imgFile = new FileInfo(TEST_IMAGES_DIRECTORY + "numbers_01.jpg");
+ tesseractReader.CreateTxtFile(JavaUtil.ArraysAsList(imgFile, imgFile), FileUtil.CreateTempFile("test", ".txt"
+ ), new OcrProcessContext(null));
+ NUnit.Framework.Assert.AreEqual(4, eventsHandler.GetEvents().Count);
+ IEvent usageEvent = eventsHandler.GetEvents()[0];
+ ValidateUsageEvent(usageEvent, EventConfirmationType.ON_DEMAND);
+ ValidateStatisticEvent(eventsHandler.GetEvents()[1], PdfOcrOutputType.DATA);
+ ValidateStatisticEvent(eventsHandler.GetEvents()[2], PdfOcrOutputType.DATA);
+ ValidateConfirmEvent(eventsHandler.GetEvents()[3], usageEvent);
+ }
+
+ [NUnit.Framework.Test]
+ [LogMessage(Tesseract4LogMessageConstant.CANNOT_READ_INPUT_IMAGE)]
+ public virtual void CreateTxtFileNoImageTest() {
+ FileInfo imgFile = new FileInfo("no_image");
+ IList images = JavaUtil.ArraysAsList(imgFile, imgFile);
+ FileInfo outPdfFile = FileUtil.CreateTempFile("test", ".txt");
+ NUnit.Framework.Assert.Catch(typeof(PdfOcrException), () => tesseractReader.CreateTxtFile(images, outPdfFile
+ ));
+ // only one usage event is expected and it is not confirmed (no confirm event
+ NUnit.Framework.Assert.AreEqual(1, eventsHandler.GetEvents().Count);
+ ValidateUsageEvent(eventsHandler.GetEvents()[0], EventConfirmationType.ON_DEMAND);
+ }
+
+ [NUnit.Framework.Test]
+ public virtual void CreateTxtFileNoFileTest() {
+ FileInfo imgFile = new FileInfo(TEST_IMAGES_DIRECTORY + "numbers_01.jpg");
+ IList images = JavaUtil.ArraysAsList(imgFile, imgFile);
+ FileInfo outPdfFile = new FileInfo("nopath/nofile");
+ Exception e = NUnit.Framework.Assert.Catch(typeof(PdfOcrTesseract4Exception), () => tesseractReader.CreateTxtFile
+ (images, outPdfFile));
+ NUnit.Framework.Assert.AreEqual(PdfOcrTesseract4ExceptionMessageConstant.CANNOT_WRITE_TO_FILE, e.Message);
+ NUnit.Framework.Assert.AreEqual(3, eventsHandler.GetEvents().Count);
+ IEvent usageEvent = eventsHandler.GetEvents()[0];
+ ValidateUsageEvent(usageEvent, EventConfirmationType.ON_DEMAND);
+ ValidateStatisticEvent(eventsHandler.GetEvents()[1], PdfOcrOutputType.DATA);
+ ValidateStatisticEvent(eventsHandler.GetEvents()[2], PdfOcrOutputType.DATA);
+ }
+
+ [NUnit.Framework.Test]
+ public virtual void CreateTxtFileNullOutFileTest() {
+ FileInfo imgFile = new FileInfo(TEST_IMAGES_DIRECTORY + "numbers_01.jpg");
+ IList images = JavaUtil.ArraysAsList(imgFile, imgFile);
+ NUnit.Framework.Assert.Catch(typeof(NullReferenceException), () => tesseractReader.CreateTxtFile(images, null
+ ));
+ NUnit.Framework.Assert.AreEqual(3, eventsHandler.GetEvents().Count);
+ IEvent usageEvent = eventsHandler.GetEvents()[0];
+ ValidateUsageEvent(usageEvent, EventConfirmationType.ON_DEMAND);
+ ValidateStatisticEvent(eventsHandler.GetEvents()[1], PdfOcrOutputType.DATA);
+ ValidateStatisticEvent(eventsHandler.GetEvents()[2], PdfOcrOutputType.DATA);
+ }
+
+ // set meta info tests
+ [NUnit.Framework.Test]
+ public virtual void SetEventCountingMetaInfoTest() {
+ FileInfo imgFile = new FileInfo(TEST_IMAGES_DIRECTORY + "numbers_01.jpg");
+ FileInfo outPdfFile = FileUtil.CreateTempFile("test", ".pdf");
+ CreatePdfAndSetEventCountingMetaInfo(tesseractReader, outPdfFile, imgFile, new Tesseract4EventHandlingTest.TestMetaInfo
+ ());
+ NUnit.Framework.Assert.AreEqual(4, eventsHandler.GetEvents().Count);
+ IEvent ocrUsageEvent = eventsHandler.GetEvents()[0];
+ ValidateUsageEvent(ocrUsageEvent, EventConfirmationType.ON_CLOSE);
+ ValidateStatisticEvent(eventsHandler.GetEvents()[1], PdfOcrOutputType.PDF);
+ ValidateCoreConfirmEvent(eventsHandler.GetEvents()[2]);
+ ValidateConfirmEvent(eventsHandler.GetEvents()[3], ocrUsageEvent);
+ String expectedProdLine = CreateExpectedProducerLine(new ConfirmedEventWrapper[] { GetCoreEvent(), GetPdfOcrEvent
+ () });
+ ValidatePdfProducerLine(outPdfFile.FullName, expectedProdLine);
+ }
+
+ [NUnit.Framework.Test]
+ public virtual void CreatePdfFileTestMetaInfoTest() {
+ FileInfo imgFile = new FileInfo(TEST_IMAGES_DIRECTORY + "numbers_01.jpg");
+ FileInfo outPdfFile = FileUtil.CreateTempFile("test", ".pdf");
+ CreatePdfFileAndSetMetaInfoToProps(tesseractReader, outPdfFile, imgFile, new Tesseract4EventHandlingTest.TestMetaInfo
+ ());
+ // check ocr events
+ NUnit.Framework.Assert.AreEqual(4, eventsHandler.GetEvents().Count);
+ IEvent ocrUsageEvent = eventsHandler.GetEvents()[0];
+ ValidateUsageEvent(ocrUsageEvent, EventConfirmationType.ON_CLOSE);
+ ValidateStatisticEvent(eventsHandler.GetEvents()[1], PdfOcrOutputType.PDF);
+ ValidateCoreConfirmEvent(eventsHandler.GetEvents()[2]);
+ ValidateConfirmEvent(eventsHandler.GetEvents()[3], ocrUsageEvent);
+ // check producer line in the output pdf
+ String expectedProdLine = CreateExpectedProducerLine(new ConfirmedEventWrapper[] { GetCoreEvent(), GetPdfOcrEvent
+ () });
+ ValidatePdfProducerLine(outPdfFile.FullName, expectedProdLine);
+ }
+
+ [NUnit.Framework.Test]
+ public virtual void DoImageOcrCustomEventHelperTest() {
+ FileInfo imgFile = new FileInfo(TEST_IMAGES_DIRECTORY + "numbers_01.jpg");
+ tesseractReader.DoImageOcr(imgFile, new OcrProcessContext(new Tesseract4EventHandlingTest.CustomEventHelper
+ ()));
+ NUnit.Framework.Assert.AreEqual(3, eventsHandler.GetEvents().Count);
+ IEvent usageEvent = eventsHandler.GetEvents()[0];
+ ValidateUsageEvent(usageEvent, EventConfirmationType.ON_DEMAND);
+ ValidateStatisticEvent(eventsHandler.GetEvents()[1], PdfOcrOutputType.DATA);
+ ValidateConfirmEvent(eventsHandler.GetEvents()[2], usageEvent);
+ }
+
+ [NUnit.Framework.Test]
+ public virtual void CreateTxtFileCustomEventHelperTest() {
+ FileInfo imgFile = new FileInfo(TEST_IMAGES_DIRECTORY + "numbers_01.jpg");
+ tesseractReader.CreateTxtFile(JavaUtil.ArraysAsList(imgFile, imgFile), FileUtil.CreateTempFile("test", ".txt"
+ ), new OcrProcessContext(new Tesseract4EventHandlingTest.CustomEventHelper()));
+ NUnit.Framework.Assert.AreEqual(4, eventsHandler.GetEvents().Count);
+ IEvent usageEvent = eventsHandler.GetEvents()[0];
+ ValidateUsageEvent(usageEvent, EventConfirmationType.ON_DEMAND);
+ ValidateStatisticEvent(eventsHandler.GetEvents()[1], PdfOcrOutputType.DATA);
+ ValidateStatisticEvent(eventsHandler.GetEvents()[2], PdfOcrOutputType.DATA);
+ ValidateConfirmEvent(eventsHandler.GetEvents()[3], usageEvent);
+ }
+
+ private class CustomEventHelper : AbstractPdfOcrEventHelper {
+ public override void OnEvent(AbstractProductITextEvent @event) {
+ if (@event is AbstractContextBasedITextEvent) {
+ ((AbstractContextBasedITextEvent)@event).SetMetaInfo(new Tesseract4EventHandlingTest.TestMetaInfo());
+ }
+ EventManager.GetInstance().OnEvent(@event);
+ }
+
+ public override SequenceId GetSequenceId() {
+ return new SequenceId();
+ }
+
+ public override EventConfirmationType GetConfirmationType() {
+ return EventConfirmationType.ON_DEMAND;
+ }
+ }
+
+ private class TestMetaInfo : IMetaInfo {
+ }
+ }
+}
diff --git a/itext.tests/itext.pdfocr.tesseract4.tests/itext/pdfocr/actions/events/PdfOcrTesseract4ProductEventTest.cs b/itext.tests/itext.pdfocr.tesseract4.tests/itext/pdfocr/actions/events/PdfOcrTesseract4ProductEventTest.cs
new file mode 100644
index 0000000..695a8bf
--- /dev/null
+++ b/itext.tests/itext.pdfocr.tesseract4.tests/itext/pdfocr/actions/events/PdfOcrTesseract4ProductEventTest.cs
@@ -0,0 +1,44 @@
+/*
+This file is part of the iText (R) project.
+Copyright (c) 1998-2021 iText Group NV
+Authors: iText Software.
+
+This program is offered under a commercial and under the AGPL license.
+For commercial licensing, contact us at https://itextpdf.com/sales. For AGPL licensing, see below.
+
+AGPL licensing:
+This program is free software: you can redistribute it and/or modify
+it under the terms of the GNU Affero General Public License as published by
+the Free Software Foundation, either version 3 of the License, or
+(at your option) any later version.
+
+This program is distributed in the hope that it will be useful,
+but WITHOUT ANY WARRANTY; without even the implied warranty of
+MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+GNU Affero General Public License for more details.
+
+You should have received a copy of the GNU Affero General Public License
+along with this program. If not, see .
+*/
+using iText.Commons.Actions.Confirmations;
+using iText.Commons.Actions.Sequence;
+using iText.Pdfocr.Tesseract4.Actions.Data;
+using iText.Pdfocr.Tesseract4.Actions.Events;
+using iText.Test;
+
+namespace iText.Pdfocr.Actions.Events {
+ public class PdfOcrTesseract4ProductEventTest : ExtendedITextTest {
+ [NUnit.Framework.Test]
+ public virtual void EventTypeTest() {
+ PdfOcrTesseract4ProductEvent e = PdfOcrTesseract4ProductEvent.CreateProcessImageEvent(new SequenceId(), null
+ , EventConfirmationType.ON_DEMAND);
+ NUnit.Framework.Assert.AreEqual(PdfOcrTesseract4ProductEvent.PROCESS_IMAGE, e.GetEventType());
+ }
+
+ [NUnit.Framework.Test]
+ public virtual void ProductDataNameTest() {
+ NUnit.Framework.Assert.AreEqual("pdfOcr-tesseract4", PdfOcrTesseract4ProductData.GetInstance().GetProductName
+ ());
+ }
+ }
+}
diff --git a/itext.tests/itext.pdfocr.tesseract4.tests/itext/pdfocr/events/EventCountingTest.cs b/itext.tests/itext.pdfocr.tesseract4.tests/itext/pdfocr/events/EventCountingTest.cs
deleted file mode 100644
index aa83ae2..0000000
--- a/itext.tests/itext.pdfocr.tesseract4.tests/itext/pdfocr/events/EventCountingTest.cs
+++ /dev/null
@@ -1,280 +0,0 @@
-/*
-This file is part of the iText (R) project.
-Copyright (c) 1998-2021 iText Group NV
-Authors: iText Software.
-
-This program is offered under a commercial and under the AGPL license.
-For commercial licensing, contact us at https://itextpdf.com/sales. For AGPL licensing, see below.
-
-AGPL licensing:
-This program is free software: you can redistribute it and/or modify
-it under the terms of the GNU Affero General Public License as published by
-the Free Software Foundation, either version 3 of the License, or
-(at your option) any later version.
-
-This program is distributed in the hope that it will be useful,
-but WITHOUT ANY WARRANTY; without even the implied warranty of
-MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
-GNU Affero General Public License for more details.
-
-You should have received a copy of the GNU Affero General Public License
-along with this program. If not, see .
-*/
-using System;
-using System.Collections.Generic;
-using System.IO;
-using iText.IO.Util;
-using iText.Kernel.Counter;
-using iText.Kernel.Counter.Event;
-using iText.Kernel.Pdf;
-using iText.Metainfo;
-using iText.Pdfocr;
-using iText.Pdfocr.Tesseract4;
-using iText.Pdfocr.Tesseract4.Events;
-
-namespace iText.Pdfocr.Events {
- public abstract class EventCountingTest : IntegrationTestHelper {
- protected internal static readonly String PROFILE_FOLDER = iText.Test.TestUtil.GetParentProjectDirectory(NUnit.Framework.TestContext
- .CurrentContext.TestDirectory) + "/resources/itext/pdfocr/events/";
-
- internal AbstractTesseract4OcrEngine tesseractReader;
-
- internal String testFileTypeName;
-
- private bool isExecutableReaderType;
-
- public EventCountingTest(IntegrationTestHelper.ReaderType type) {
- isExecutableReaderType = type.Equals(IntegrationTestHelper.ReaderType.EXECUTABLE);
- if (isExecutableReaderType) {
- testFileTypeName = "executable";
- }
- else {
- testFileTypeName = "lib";
- }
- tesseractReader = GetTesseractReader(type);
- }
-
- [NUnit.Framework.SetUp]
- public virtual void InitTesseractProperties() {
- Tesseract4OcrEngineProperties ocrEngineProperties = new Tesseract4OcrEngineProperties();
- ocrEngineProperties.SetPathToTessData(GetTessDataDirectory());
- tesseractReader.SetTesseract4OcrEngineProperties(ocrEngineProperties);
- }
-
- [NUnit.Framework.Test]
- public virtual void TestEventCountingPdfEvent() {
- String imgPath = TEST_IMAGES_DIRECTORY + "numbers_01.jpg";
- FileInfo file = new FileInfo(imgPath);
- EventCountingTest.TestEventCounter eventCounter = new EventCountingTest.TestEventCounter();
- IEventCounterFactory factory = new SimpleEventCounterFactory(eventCounter);
- EventCounterHandler.GetInstance().Register(factory);
- try {
- DoImageToPdfOcr(tesseractReader, JavaUtil.ArraysAsList(file));
- NUnit.Framework.Assert.AreEqual(1, eventCounter.GetEvents().Count);
- NUnit.Framework.Assert.AreSame(PdfOcrTesseract4Event.TESSERACT4_IMAGE_TO_PDF, eventCounter.GetEvents()[0]);
- NUnit.Framework.Assert.IsNull(eventCounter.GetMetaInfos()[0]);
- }
- finally {
- EventCounterHandler.GetInstance().Unregister(factory);
- }
- }
-
- [NUnit.Framework.Test]
- public virtual void TestEventCountingSeveralImagesOneImageToPdfEvent() {
- String imgPath = TEST_IMAGES_DIRECTORY + "numbers_01.jpg";
- FileInfo file = new FileInfo(imgPath);
- EventCountingTest.TestEventCounter eventCounter = new EventCountingTest.TestEventCounter();
- IEventCounterFactory factory = new SimpleEventCounterFactory(eventCounter);
- EventCounterHandler.GetInstance().Register(factory);
- try {
- DoImageToPdfOcr(tesseractReader, JavaUtil.ArraysAsList(file, file));
- NUnit.Framework.Assert.AreEqual(1, eventCounter.GetEvents().Count);
- NUnit.Framework.Assert.AreSame(PdfOcrTesseract4Event.TESSERACT4_IMAGE_TO_PDF, eventCounter.GetEvents()[0]);
- NUnit.Framework.Assert.IsNull(eventCounter.GetMetaInfos()[0]);
- }
- finally {
- EventCounterHandler.GetInstance().Unregister(factory);
- }
- }
-
- [NUnit.Framework.Test]
- public virtual void TestEventCountingPdfAEvent() {
- String imgPath = TEST_IMAGES_DIRECTORY + "numbers_01.jpg";
- FileInfo file = new FileInfo(imgPath);
- EventCountingTest.TestEventCounter eventCounter = new EventCountingTest.TestEventCounter();
- IEventCounterFactory factory = new SimpleEventCounterFactory(eventCounter);
- EventCounterHandler.GetInstance().Register(factory);
- try {
- DoImageToPdfAOcr(tesseractReader, JavaUtil.ArraysAsList(file));
- NUnit.Framework.Assert.AreEqual(1, eventCounter.GetEvents().Count);
- NUnit.Framework.Assert.AreSame(PdfOcrTesseract4Event.TESSERACT4_IMAGE_TO_PDFA, eventCounter.GetEvents()[0]
- );
- NUnit.Framework.Assert.IsNull(eventCounter.GetMetaInfos()[0]);
- }
- finally {
- EventCounterHandler.GetInstance().Unregister(factory);
- }
- }
-
- [NUnit.Framework.Test]
- public virtual void TestEventCountingTwoPdfEvents() {
- String imgPath = TEST_IMAGES_DIRECTORY + "numbers_01.jpg";
- FileInfo file = new FileInfo(imgPath);
- EventCountingTest.TestEventCounter eventCounter = new EventCountingTest.TestEventCounter();
- IEventCounterFactory factory = new SimpleEventCounterFactory(eventCounter);
- EventCounterHandler.GetInstance().Register(factory);
- try {
- DoImageToPdfOcr(tesseractReader, JavaUtil.ArraysAsList(file));
- DoImageToPdfOcr(tesseractReader, JavaUtil.ArraysAsList(file));
- NUnit.Framework.Assert.AreEqual(2, eventCounter.GetEvents().Count);
- for (int i = 0; i < eventCounter.GetEvents().Count; i++) {
- NUnit.Framework.Assert.AreSame(PdfOcrTesseract4Event.TESSERACT4_IMAGE_TO_PDF, eventCounter.GetEvents()[i]);
- NUnit.Framework.Assert.IsNull(eventCounter.GetMetaInfos()[i]);
- }
- }
- finally {
- EventCounterHandler.GetInstance().Unregister(factory);
- }
- }
-
- [NUnit.Framework.Test]
- public virtual void TestEventCountingImageEvent() {
- String imgPath = TEST_IMAGES_DIRECTORY + "numbers_01.jpg";
- FileInfo file = new FileInfo(imgPath);
- EventCountingTest.TestEventCounter eventCounter = new EventCountingTest.TestEventCounter();
- IEventCounterFactory factory = new SimpleEventCounterFactory(eventCounter);
- EventCounterHandler.GetInstance().Register(factory);
- try {
- DoImageOcr(tesseractReader, file);
- NUnit.Framework.Assert.AreEqual(1, eventCounter.GetEvents().Count);
- NUnit.Framework.Assert.AreSame(PdfOcrTesseract4Event.TESSERACT4_IMAGE_OCR, eventCounter.GetEvents()[0]);
- NUnit.Framework.Assert.IsNull(eventCounter.GetMetaInfos()[0]);
- }
- finally {
- EventCounterHandler.GetInstance().Unregister(factory);
- }
- }
-
- [NUnit.Framework.Test]
- public virtual void TestEventCountingImageEventCustomMetaInfo() {
- String imgPath = TEST_IMAGES_DIRECTORY + "numbers_01.jpg";
- FileInfo file = new FileInfo(imgPath);
- EventCountingTest.TestEventCounter eventCounter = new EventCountingTest.TestEventCounter();
- IEventCounterFactory factory = new SimpleEventCounterFactory(eventCounter);
- EventCounterHandler.GetInstance().Register(factory);
- try {
- tesseractReader.SetThreadLocalMetaInfo(new TestMetaInfo());
- DoImageOcr(tesseractReader, file);
- NUnit.Framework.Assert.AreEqual(1, eventCounter.GetEvents().Count);
- NUnit.Framework.Assert.AreSame(PdfOcrTesseract4Event.TESSERACT4_IMAGE_OCR, eventCounter.GetEvents()[0]);
- NUnit.Framework.Assert.IsTrue(eventCounter.GetMetaInfos()[0] is TestMetaInfo);
- }
- finally {
- EventCounterHandler.GetInstance().Unregister(factory);
- tesseractReader.SetThreadLocalMetaInfo(null);
- }
- }
-
- [NUnit.Framework.Test]
- public virtual void TestEventCountingPdfEventCustomMetaInfo() {
- String imgPath = TEST_IMAGES_DIRECTORY + "numbers_01.jpg";
- FileInfo file = new FileInfo(imgPath);
- EventCountingTest.TestEventCounter eventCounter = new EventCountingTest.TestEventCounter();
- IEventCounterFactory factory = new SimpleEventCounterFactory(eventCounter);
- EventCounterHandler.GetInstance().Register(factory);
- try {
- tesseractReader.SetThreadLocalMetaInfo(new TestMetaInfo());
- DoImageToPdfOcr(tesseractReader, JavaUtil.ArraysAsList(file));
- NUnit.Framework.Assert.AreEqual(1, eventCounter.GetEvents().Count);
- NUnit.Framework.Assert.AreSame(PdfOcrTesseract4Event.TESSERACT4_IMAGE_TO_PDF, eventCounter.GetEvents()[0]);
- NUnit.Framework.Assert.IsTrue(eventCounter.GetMetaInfos()[0] is TestMetaInfo);
- }
- finally {
- EventCounterHandler.GetInstance().Unregister(factory);
- tesseractReader.SetThreadLocalMetaInfo(null);
- }
- }
-
- [NUnit.Framework.Test]
- public virtual void TestEventCountingWithImprovedHocrParsing() {
- String imgPath = TEST_IMAGES_DIRECTORY + "thai_03.jpg";
- FileInfo file = new FileInfo(imgPath);
- EventCountingTest.TestEventCounter eventCounter = new EventCountingTest.TestEventCounter();
- IEventCounterFactory factory = new SimpleEventCounterFactory(eventCounter);
- EventCounterHandler.GetInstance().Register(factory);
- Tesseract4OcrEngineProperties properties = tesseractReader.GetTesseract4OcrEngineProperties();
- properties.SetTextPositioning(TextPositioning.BY_WORDS_AND_LINES);
- properties.SetUseTxtToImproveHocrParsing(true);
- properties.SetPathToTessData(new FileInfo(LANG_TESS_DATA_DIRECTORY));
- tesseractReader.SetTesseract4OcrEngineProperties(properties);
- tesseractReader.DoImageOcr(file);
- NUnit.Framework.Assert.AreEqual(1, eventCounter.GetEvents().Count);
- NUnit.Framework.Assert.AreEqual(PdfOcrTesseract4Event.TESSERACT4_IMAGE_OCR.GetEventType(), eventCounter.GetEvents
- ()[0].GetEventType());
- EventCounterHandler.GetInstance().Unregister(factory);
- }
-
- public virtual void TestEventCountingCustomMetaInfoError() {
- String imgPath = TEST_IMAGES_DIRECTORY + "numbers_101.jpg";
- FileInfo file = new FileInfo(imgPath);
- EventCountingTest.TestEventCounter eventCounter = new EventCountingTest.TestEventCounter();
- IEventCounterFactory factory = new SimpleEventCounterFactory(eventCounter);
- EventCounterHandler.GetInstance().Register(factory);
- IMetaInfo metaInfo = new TestMetaInfo();
- try {
- tesseractReader.SetThreadLocalMetaInfo(metaInfo);
- DoImageToPdfOcr(tesseractReader, JavaUtil.ArraysAsList(file));
- }
- finally {
- NUnit.Framework.Assert.AreEqual(metaInfo, tesseractReader.GetThreadLocalMetaInfo());
- EventCounterHandler.GetInstance().Unregister(factory);
- tesseractReader.SetThreadLocalMetaInfo(null);
- }
- }
-
- private static void DoImageOcr(AbstractTesseract4OcrEngine tesseractReader, FileInfo imageFile) {
- tesseractReader.DoImageOcr(imageFile);
- }
-
- private static void DoImageToPdfOcr(AbstractTesseract4OcrEngine tesseractReader, IList imageFiles
- ) {
- OcrPdfCreator ocrPdfCreator = new OcrPdfCreator(tesseractReader);
- ocrPdfCreator.CreatePdf(imageFiles, new PdfWriter(new MemoryStream()));
- }
-
- private static void DoImageToPdfAOcr(AbstractTesseract4OcrEngine tesseractReader, IList imageFiles
- ) {
- OcrPdfCreator ocrPdfCreator = new OcrPdfCreator(tesseractReader, new OcrPdfCreatorProperties().SetPdfLang(
- "en-US"));
- Stream @is = null;
- try {
- @is = new FileStream(PROFILE_FOLDER + "sRGB_CS_profile.icm", FileMode.Open, FileAccess.Read);
- }
- catch (FileNotFoundException) {
- }
- // No expected
- PdfOutputIntent outputIntent = new PdfOutputIntent("Custom", "", "http://www.color.org", "sRGB IEC61966-2.1"
- , @is);
- ocrPdfCreator.CreatePdfA(imageFiles, new PdfWriter(new MemoryStream()), outputIntent);
- }
-
- private class TestEventCounter : EventCounter {
- private IList events = new List();
-
- private IList metaInfos = new List();
-
- public virtual IList GetEvents() {
- return events;
- }
-
- public virtual IList GetMetaInfos() {
- return metaInfos;
- }
-
- protected override void OnEvent(IEvent @event, IMetaInfo metaInfo) {
- this.events.Add(@event);
- this.metaInfos.Add(metaInfo);
- }
- }
- }
-}
diff --git a/itext.tests/itext.pdfocr.tesseract4.tests/itext/pdfocr/events/PdfOcrTesseract4EventTest.cs b/itext.tests/itext.pdfocr.tesseract4.tests/itext/pdfocr/events/PdfOcrTesseract4EventTest.cs
deleted file mode 100644
index 4837719..0000000
--- a/itext.tests/itext.pdfocr.tesseract4.tests/itext/pdfocr/events/PdfOcrTesseract4EventTest.cs
+++ /dev/null
@@ -1,52 +0,0 @@
-/*
-This file is part of the iText (R) project.
-Copyright (c) 1998-2021 iText Group NV
-Authors: iText Software.
-
-This program is offered under a commercial and under the AGPL license.
-For commercial licensing, contact us at https://itextpdf.com/sales. For AGPL licensing, see below.
-
-AGPL licensing:
-This program is free software: you can redistribute it and/or modify
-it under the terms of the GNU Affero General Public License as published by
-the Free Software Foundation, either version 3 of the License, or
-(at your option) any later version.
-
-This program is distributed in the hope that it will be useful,
-but WITHOUT ANY WARRANTY; without even the implied warranty of
-MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
-GNU Affero General Public License for more details.
-
-You should have received a copy of the GNU Affero General Public License
-along with this program. If not, see .
-*/
-using System;
-using iText.Pdfocr;
-using iText.Pdfocr.Tesseract4.Events;
-
-namespace iText.Pdfocr.Events {
- public class PdfOcrTesseract4EventTest : IntegrationTestHelper {
- private const String PDF_OCR_TESSERACT4_ORIGIN_ID = "iText.Pdfocr.Tesseract4";
-
- [NUnit.Framework.Test]
- public virtual void TestEventTypes() {
- String[] expectedTypes = new String[] { "pdfOcr-tesseract4-image-ocr", "pdfOcr-tesseract4-image-to-pdf", "pdfOcr-tesseract4-image-to-pdfa"
- };
- PdfOcrTesseract4Event[] testedEvents = new PdfOcrTesseract4Event[] { PdfOcrTesseract4Event.TESSERACT4_IMAGE_OCR
- , PdfOcrTesseract4Event.TESSERACT4_IMAGE_TO_PDF, PdfOcrTesseract4Event.TESSERACT4_IMAGE_TO_PDFA };
- for (int i = 0; i < testedEvents.Length; i++) {
- NUnit.Framework.Assert.AreEqual(expectedTypes[i], testedEvents[i].GetEventType());
- }
- }
-
- [NUnit.Framework.Test]
- public virtual void TestOriginId() {
- String expected = PDF_OCR_TESSERACT4_ORIGIN_ID;
- PdfOcrTesseract4Event[] testedEvents = new PdfOcrTesseract4Event[] { PdfOcrTesseract4Event.TESSERACT4_IMAGE_TO_PDF
- , PdfOcrTesseract4Event.TESSERACT4_IMAGE_TO_PDF, PdfOcrTesseract4Event.TESSERACT4_IMAGE_TO_PDFA };
- foreach (PdfOcrTesseract4Event @event in testedEvents) {
- NUnit.Framework.Assert.AreEqual(expected, @event.GetOriginId());
- }
- }
- }
-}
diff --git a/itext.tests/itext.pdfocr.tesseract4.tests/itext/pdfocr/events/multithreading/DoImageOcrRunnable.cs b/itext.tests/itext.pdfocr.tesseract4.tests/itext/pdfocr/events/multithreading/DoImageOcrRunnable.cs
deleted file mode 100644
index e4a91e7..0000000
--- a/itext.tests/itext.pdfocr.tesseract4.tests/itext/pdfocr/events/multithreading/DoImageOcrRunnable.cs
+++ /dev/null
@@ -1,69 +0,0 @@
-/*
-This file is part of the iText (R) project.
-Copyright (c) 1998-2021 iText Group NV
-Authors: iText Software.
-
-This program is offered under a commercial and under the AGPL license.
-For commercial licensing, contact us at https://itextpdf.com/sales. For AGPL licensing, see below.
-
-AGPL licensing:
-This program is free software: you can redistribute it and/or modify
-it under the terms of the GNU Affero General Public License as published by
-the Free Software Foundation, either version 3 of the License, or
-(at your option) any later version.
-
-This program is distributed in the hope that it will be useful,
-but WITHOUT ANY WARRANTY; without even the implied warranty of
-MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
-GNU Affero General Public License for more details.
-
-You should have received a copy of the GNU Affero General Public License
-along with this program. If not, see .
-*/
-using System;
-using System.IO;
-using iText.IO.Util;
-using iText.Kernel.Counter.Event;
-using iText.Kernel.Pdf;
-using iText.Pdfocr;
-using iText.Pdfocr.Tesseract4;
-
-namespace iText.Pdfocr.Events.Multithreading {
- public class DoImageOcrRunnable : Object {
- private AbstractTesseract4OcrEngine tesseractReader;
-
- private FileInfo imgFile;
-
- private FileInfo outputFile;
-
- private bool createPdf;
-
- private IMetaInfo metaInfo;
-
- internal DoImageOcrRunnable(AbstractTesseract4OcrEngine tesseractReader, IMetaInfo metaInfo, FileInfo imgFile
- , FileInfo outputFile, bool createPdf) {
- this.tesseractReader = tesseractReader;
- this.metaInfo = metaInfo;
- this.imgFile = imgFile;
- this.outputFile = outputFile;
- this.createPdf = createPdf;
- }
-
- public virtual void Run() {
- try {
- tesseractReader.SetThreadLocalMetaInfo(metaInfo);
- if (createPdf) {
- new OcrPdfCreator(tesseractReader).CreatePdf(JavaUtil.ArraysAsList(imgFile), new PdfWriter(outputFile));
- }
- else {
- tesseractReader.DoTesseractOcr(imgFile, outputFile, OutputFormat.TXT);
- }
- // for test purposes
- System.Console.Out.WriteLine(imgFile.Name);
- }
- catch (Exception e) {
- throw new Exception(e.Message);
- }
- }
- }
-}
diff --git a/itext.tests/itext.pdfocr.tesseract4.tests/itext/pdfocr/events/multithreading/MultiThreadingTest.cs b/itext.tests/itext.pdfocr.tesseract4.tests/itext/pdfocr/events/multithreading/MultiThreadingTest.cs
deleted file mode 100644
index 9aa2489..0000000
--- a/itext.tests/itext.pdfocr.tesseract4.tests/itext/pdfocr/events/multithreading/MultiThreadingTest.cs
+++ /dev/null
@@ -1,130 +0,0 @@
-/*
-This file is part of the iText (R) project.
-Copyright (c) 1998-2021 iText Group NV
-Authors: iText Software.
-
-This program is offered under a commercial and under the AGPL license.
-For commercial licensing, contact us at https://itextpdf.com/sales. For AGPL licensing, see below.
-
-AGPL licensing:
-This program is free software: you can redistribute it and/or modify
-it under the terms of the GNU Affero General Public License as published by
-the Free Software Foundation, either version 3 of the License, or
-(at your option) any later version.
-
-This program is distributed in the hope that it will be useful,
-but WITHOUT ANY WARRANTY; without even the implied warranty of
-MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
-GNU Affero General Public License for more details.
-
-You should have received a copy of the GNU Affero General Public License
-along with this program. If not, see .
-*/
-using System;
-using System.Collections.Generic;
-using System.IO;
-using System.Threading;
-using iText.Kernel.Counter;
-using iText.Kernel.Counter.Event;
-using iText.Metainfo;
-using iText.Pdfocr;
-using iText.Pdfocr.Tesseract4;
-using iText.Pdfocr.Tesseract4.Events;
-
-namespace iText.Pdfocr.Events.Multithreading {
- public abstract class MultiThreadingTest : IntegrationTestHelper {
- protected internal static readonly String destinationFolder = NUnit.Framework.TestContext.CurrentContext.TestDirectory
- + "/test/itext/pdfocr/events/multithreading/";
-
- protected internal static readonly String sourceFolder = iText.Test.TestUtil.GetParentProjectDirectory(NUnit.Framework.TestContext
- .CurrentContext.TestDirectory) + "/resources/itext/pdfocr/events/multithreading/";
-
- internal AbstractTesseract4OcrEngine tesseractReader;
-
- public MultiThreadingTest(IntegrationTestHelper.ReaderType type) {
- tesseractReader = GetTesseractReader(type);
- }
-
- [NUnit.Framework.OneTimeSetUp]
- public static void BeforeClass() {
- CreateDestinationFolder(destinationFolder);
- }
-
- [NUnit.Framework.SetUp]
- public virtual void InitTesseractProperties() {
- Tesseract4OcrEngineProperties ocrEngineProperties = new Tesseract4OcrEngineProperties();
- ocrEngineProperties.SetPathToTessData(new FileInfo(sourceFolder + "../../tessdata"));
- tesseractReader.SetTesseract4OcrEngineProperties(ocrEngineProperties);
- }
-
- [NUnit.Framework.Test]
- public virtual void TestEventCountingPdfEvent() {
- MultiThreadingTest.TestEventCounter eventCounter = new MultiThreadingTest.TestEventCounter();
- IEventCounterFactory factory = new SimpleEventCounterFactory(eventCounter);
- EventCounterHandler.GetInstance().Register(factory);
- try {
- int n = 16;
- IMetaInfo metainfo = new TestMetaInfo();
- Thread[] threads = new Thread[n];
- for (int i = 0; i < n; i++) {
- // We do not use Runnable as the variable's type because of porting issues
- DoImageOcrRunnable runnable = new DoImageOcrRunnable(tesseractReader, metainfo, new FileInfo(sourceFolder
- + "numbers_01.jpg"), new FileInfo(destinationFolder + "ocr-result-" + (i + 1) + ".txt"), 0 == i % 2);
- threads[i] = GetThread(runnable);
- }
- for (int i = 0; i < n; i++) {
- threads[i].Start();
- }
- for (int i = 0; i < n; i++) {
- threads[i].Join();
- }
- NUnit.Framework.Assert.AreEqual(n, eventCounter.GetEvents().Count);
- int expectedPdfEvents = n / 2;
- int expectedImageEvents = n - expectedPdfEvents;
- int foundPdfEvents = 0;
- int foundImageEvents = 0;
- for (int i = 0; i < n; i++) {
- if (PdfOcrTesseract4Event.TESSERACT4_IMAGE_TO_PDF == eventCounter.GetEvents()[i]) {
- foundPdfEvents++;
- }
- else {
- if (PdfOcrTesseract4Event.TESSERACT4_IMAGE_OCR == eventCounter.GetEvents()[i]) {
- foundImageEvents++;
- }
- }
- NUnit.Framework.Assert.AreEqual(metainfo, eventCounter.GetMetaInfos()[i]);
- }
- NUnit.Framework.Assert.AreEqual(expectedImageEvents, foundImageEvents);
- NUnit.Framework.Assert.AreEqual(expectedPdfEvents, foundPdfEvents);
- }
- finally {
- EventCounterHandler.GetInstance().Unregister(factory);
- }
- }
-
- private static Thread GetThread(DoImageOcrRunnable runnable) {
- return new Thread(new ThreadStart(runnable.Run));
- }
-
- public class TestEventCounter : EventCounter {
- private IList events = new List();
-
- private IList metaInfos = new List();
-
- public virtual IList GetEvents() {
- return events;
- }
-
- public virtual IList GetMetaInfos() {
- return metaInfos;
- }
-
- [System.Runtime.CompilerServices.MethodImpl(System.Runtime.CompilerServices.MethodImplOptions.Synchronized
- )]
- protected override void OnEvent(IEvent @event, IMetaInfo metaInfo) {
- this.events.Add(@event);
- this.metaInfos.Add(metaInfo);
- }
- }
- }
-}
diff --git a/itext.tests/itext.pdfocr.tesseract4.tests/itext/pdfocr/events/EventCountingLibTest.cs b/itext.tests/itext.pdfocr.tesseract4.tests/itext/pdfocr/exceptions/PdfOcrTesseract4ExceptionTest.cs
similarity index 53%
rename from itext.tests/itext.pdfocr.tesseract4.tests/itext/pdfocr/events/EventCountingLibTest.cs
rename to itext.tests/itext.pdfocr.tesseract4.tests/itext/pdfocr/exceptions/PdfOcrTesseract4ExceptionTest.cs
index b556ab4..f5d2cd5 100644
--- a/itext.tests/itext.pdfocr.tesseract4.tests/itext/pdfocr/events/EventCountingLibTest.cs
+++ b/itext.tests/itext.pdfocr.tesseract4.tests/itext/pdfocr/exceptions/PdfOcrTesseract4ExceptionTest.cs
@@ -20,27 +20,24 @@ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
You should have received a copy of the GNU Affero General Public License
along with this program. If not, see .
*/
-using System.IO;
-using iText.IO.Util;
-using iText.Pdfocr;
-using iText.Pdfocr.Tesseract4;
-using iText.Test.Attributes;
+using System;
+using iText.Pdfocr.Tesseract4.Exceptions;
+using iText.Test;
-namespace iText.Pdfocr.Events {
- public class EventCountingLibTest : EventCountingTest {
- public EventCountingLibTest()
- : base(IntegrationTestHelper.ReaderType.LIB) {
+namespace iText.Pdfocr.Exceptions {
+ public class PdfOcrTesseract4ExceptionTest : ExtendedITextTest {
+ [NUnit.Framework.Test]
+ public virtual void Tesseract4PdfOcrExceptionThrowableConstructorTest() {
+ Exception cause = new System.IO.IOException();
+ PdfOcrTesseract4Exception exception = new PdfOcrTesseract4Exception(cause);
+ NUnit.Framework.Assert.AreEqual(cause, exception.InnerException);
}
[NUnit.Framework.Test]
- [LogMessage(Tesseract4LogMessageConstant.CANNOT_READ_INPUT_IMAGE)]
- public override void TestEventCountingCustomMetaInfoError() {
- FileInfo img = new FileInfo(TEST_IMAGES_DIRECTORY + "numbers_101.jpg");
- NUnit.Framework.Assert.That(() => {
- base.TestEventCountingCustomMetaInfoError();
- }
- , NUnit.Framework.Throws.InstanceOf().With.Message.EqualTo(MessageFormatUtil.Format(Tesseract4LogMessageConstant.CANNOT_READ_INPUT_IMAGE, img.FullName)))
-;
+ public virtual void Tesseract4PdfOcrInputExceptionThrowableConstructorTest() {
+ Exception cause = new System.IO.IOException();
+ PdfOcrTesseract4Exception exception = new PdfOcrInputTesseract4Exception(cause);
+ NUnit.Framework.Assert.AreEqual(cause, exception.InnerException);
}
}
}
diff --git a/itext.tests/itext.pdfocr.tesseract4.tests/itext/pdfocr/general/BasicTesseractIntegrationTest.cs b/itext.tests/itext.pdfocr.tesseract4.tests/itext/pdfocr/general/BasicTesseractIntegrationTest.cs
index 7534ebb..374e143 100644
--- a/itext.tests/itext.pdfocr.tesseract4.tests/itext/pdfocr/general/BasicTesseractIntegrationTest.cs
+++ b/itext.tests/itext.pdfocr.tesseract4.tests/itext/pdfocr/general/BasicTesseractIntegrationTest.cs
@@ -24,14 +24,16 @@ You should have received a copy of the GNU Affero General Public License
using System.Collections.Generic;
using System.IO;
using System.Text;
+using iText.Commons.Utils;
using iText.IO.Source;
-using iText.IO.Util;
using iText.Kernel.Colors;
using iText.Kernel.Geom;
using iText.Kernel.Pdf;
using iText.Kernel.Pdf.Canvas.Parser;
using iText.Pdfocr;
using iText.Pdfocr.Tesseract4;
+using iText.Pdfocr.Tesseract4.Exceptions;
+using iText.Pdfocr.Tesseract4.Logs;
using iText.Test.Attributes;
namespace iText.Pdfocr.General {
@@ -129,7 +131,7 @@ public virtual void TestInputInvalidImage() {
OcrPdfCreator ocrPdfCreator = new OcrPdfCreator(tesseractReader);
ocrPdfCreator.CreatePdf(JavaUtil.ArraysAsList(file3, file1, file2, file3), GetPdfWriter());
}
- , NUnit.Framework.Throws.InstanceOf().With.Message.EqualTo(MessageFormatUtil.Format(Tesseract4OcrException.CANNOT_READ_PROVIDED_IMAGE, new FileInfo(TEST_IMAGES_DIRECTORY + "example.txt").FullName)))
+ , NUnit.Framework.Throws.InstanceOf().With.Message.EqualTo(MessageFormatUtil.Format(PdfOcrTesseract4ExceptionMessageConstant.CANNOT_READ_PROVIDED_IMAGE, new FileInfo(TEST_IMAGES_DIRECTORY + "example.txt").FullName)))
;
}
@@ -159,7 +161,7 @@ public virtual void TestNullPathToTessData() {
(null));
GetTextFromPdf(tesseractReader, file, JavaCollectionsUtil.SingletonList("eng"));
}
- , NUnit.Framework.Throws.InstanceOf().With.Message.EqualTo(Tesseract4OcrException.PATH_TO_TESS_DATA_DIRECTORY_IS_INVALID))
+ , NUnit.Framework.Throws.InstanceOf().With.Message.EqualTo(PdfOcrTesseract4ExceptionMessageConstant.PATH_TO_TESS_DATA_DIRECTORY_IS_INVALID))
;
}
@@ -171,11 +173,11 @@ public virtual void TestPathToTessDataWithoutData() {
(new FileInfo("test/")));
GetTextFromPdf(tesseractReader, file, JavaCollectionsUtil.SingletonList("eng"));
}
- , NUnit.Framework.Throws.InstanceOf().With.Message.EqualTo(Tesseract4OcrException.PATH_TO_TESS_DATA_DIRECTORY_IS_INVALID))
+ , NUnit.Framework.Throws.InstanceOf().With.Message.EqualTo(PdfOcrTesseract4ExceptionMessageConstant.PATH_TO_TESS_DATA_DIRECTORY_IS_INVALID))
;
}
- [LogMessage(Tesseract4OcrException.INCORRECT_LANGUAGE)]
+ [LogMessage(PdfOcrTesseract4ExceptionMessageConstant.INCORRECT_LANGUAGE)]
[NUnit.Framework.Test]
public virtual void TestEmptyPathToTessData() {
NUnit.Framework.Assert.That(() => {
@@ -187,33 +189,33 @@ public virtual void TestEmptyPathToTessData() {
NUnit.Framework.Assert.AreEqual(new FileInfo("").FullName, tesseractReader.GetTesseract4OcrEngineProperties
().GetPathToTessData().FullName);
}
- , NUnit.Framework.Throws.InstanceOf().With.Message.EqualTo(MessageFormatUtil.Format(Tesseract4OcrException.INCORRECT_LANGUAGE, "eng.traineddata", new FileInfo(".").FullName)))
+ , NUnit.Framework.Throws.InstanceOf().With.Message.EqualTo(MessageFormatUtil.Format(PdfOcrTesseract4ExceptionMessageConstant.INCORRECT_LANGUAGE, "eng.traineddata", new FileInfo(".").FullName)))
;
}
- [LogMessage(Tesseract4OcrException.INCORRECT_LANGUAGE, Count = 1)]
+ [LogMessage(PdfOcrTesseract4ExceptionMessageConstant.INCORRECT_LANGUAGE, Count = 1)]
[NUnit.Framework.Test]
public virtual void TestIncorrectLanguage() {
NUnit.Framework.Assert.That(() => {
FileInfo file = new FileInfo(TEST_IMAGES_DIRECTORY + "spanish_01.jpg");
GetTextFromPdf(tesseractReader, file, JavaCollectionsUtil.SingletonList("spa_new"));
}
- , NUnit.Framework.Throws.InstanceOf().With.Message.EqualTo(MessageFormatUtil.Format(Tesseract4OcrException.INCORRECT_LANGUAGE, "spa_new.traineddata", new FileInfo(LANG_TESS_DATA_DIRECTORY).FullName)))
+ , NUnit.Framework.Throws.InstanceOf().With.Message.EqualTo(MessageFormatUtil.Format(PdfOcrTesseract4ExceptionMessageConstant.INCORRECT_LANGUAGE, "spa_new.traineddata", new FileInfo(LANG_TESS_DATA_DIRECTORY).FullName)))
;
}
- [LogMessage(Tesseract4OcrException.INCORRECT_LANGUAGE, Count = 1)]
+ [LogMessage(PdfOcrTesseract4ExceptionMessageConstant.INCORRECT_LANGUAGE, Count = 1)]
[NUnit.Framework.Test]
public virtual void TestListOfLanguagesWithOneIncorrectLanguage() {
NUnit.Framework.Assert.That(() => {
FileInfo file = new FileInfo(TEST_IMAGES_DIRECTORY + "spanish_01.jpg");
GetTextFromPdf(tesseractReader, file, JavaUtil.ArraysAsList("spa", "spa_new", "spa_old"));
}
- , NUnit.Framework.Throws.InstanceOf().With.Message.EqualTo(MessageFormatUtil.Format(Tesseract4OcrException.INCORRECT_LANGUAGE, "spa_new.traineddata", new FileInfo(LANG_TESS_DATA_DIRECTORY).FullName)))
+ , NUnit.Framework.Throws.InstanceOf().With.Message.EqualTo(MessageFormatUtil.Format(PdfOcrTesseract4ExceptionMessageConstant.INCORRECT_LANGUAGE, "spa_new.traineddata", new FileInfo(LANG_TESS_DATA_DIRECTORY).FullName)))
;
}
- [LogMessage(Tesseract4OcrException.INCORRECT_LANGUAGE, Count = 1)]
+ [LogMessage(PdfOcrTesseract4ExceptionMessageConstant.INCORRECT_LANGUAGE, Count = 1)]
[NUnit.Framework.Test]
public virtual void TestIncorrectScriptsName() {
NUnit.Framework.Assert.That(() => {
@@ -222,11 +224,11 @@ public virtual void TestIncorrectScriptsName() {
(new FileInfo(SCRIPT_TESS_DATA_DIRECTORY)));
GetTextFromPdf(tesseractReader, file, JavaCollectionsUtil.SingletonList("English"));
}
- , NUnit.Framework.Throws.InstanceOf().With.Message.EqualTo(MessageFormatUtil.Format(Tesseract4OcrException.INCORRECT_LANGUAGE, "English.traineddata", new FileInfo(SCRIPT_TESS_DATA_DIRECTORY).FullName)))
+ , NUnit.Framework.Throws.InstanceOf().With.Message.EqualTo(MessageFormatUtil.Format(PdfOcrTesseract4ExceptionMessageConstant.INCORRECT_LANGUAGE, "English.traineddata", new FileInfo(SCRIPT_TESS_DATA_DIRECTORY).FullName)))
;
}
- [LogMessage(Tesseract4OcrException.INCORRECT_LANGUAGE, Count = 1)]
+ [LogMessage(PdfOcrTesseract4ExceptionMessageConstant.INCORRECT_LANGUAGE, Count = 1)]
[NUnit.Framework.Test]
public virtual void TestListOfScriptsWithOneIncorrect() {
NUnit.Framework.Assert.That(() => {
@@ -235,7 +237,7 @@ public virtual void TestListOfScriptsWithOneIncorrect() {
(new FileInfo(SCRIPT_TESS_DATA_DIRECTORY)));
GetTextFromPdf(tesseractReader, file, JavaUtil.ArraysAsList("Georgian", "Japanese", "English"));
}
- , NUnit.Framework.Throws.InstanceOf().With.Message.EqualTo(MessageFormatUtil.Format(Tesseract4OcrException.INCORRECT_LANGUAGE, "English.traineddata", new FileInfo(SCRIPT_TESS_DATA_DIRECTORY).FullName)))
+ , NUnit.Framework.Throws.InstanceOf().With.Message.EqualTo(MessageFormatUtil.Format(PdfOcrTesseract4ExceptionMessageConstant.INCORRECT_LANGUAGE, "English.traineddata", new FileInfo(SCRIPT_TESS_DATA_DIRECTORY).FullName)))
;
}
@@ -267,7 +269,7 @@ public virtual void TestTxtStringOutput() {
);
String result = tesseractReader.DoImageOcr(file, OutputFormat.TXT);
foreach (String line in expectedOutput) {
- NUnit.Framework.Assert.IsTrue(iText.IO.Util.StringUtil.ReplaceAll(result, "\r", "").Contains(line));
+ NUnit.Framework.Assert.IsTrue(iText.Commons.Utils.StringUtil.ReplaceAll(result, "\r", "").Contains(line));
}
}
@@ -280,7 +282,7 @@ public virtual void TestHocrStringOutput() {
);
String result = tesseractReader.DoImageOcr(file, OutputFormat.HOCR);
foreach (String line in expectedOutput) {
- NUnit.Framework.Assert.IsTrue(iText.IO.Util.StringUtil.ReplaceAll(result, "\r", "").Contains(line));
+ NUnit.Framework.Assert.IsTrue(iText.Commons.Utils.StringUtil.ReplaceAll(result, "\r", "").Contains(line));
}
}
diff --git a/itext.tests/itext.pdfocr.tesseract4.tests/itext/pdfocr/imageformats/ImageFormatIntegrationTest.cs b/itext.tests/itext.pdfocr.tesseract4.tests/itext/pdfocr/imageformats/ImageFormatIntegrationTest.cs
index 7b65a6d..fd99ae9 100644
--- a/itext.tests/itext.pdfocr.tesseract4.tests/itext/pdfocr/imageformats/ImageFormatIntegrationTest.cs
+++ b/itext.tests/itext.pdfocr.tesseract4.tests/itext/pdfocr/imageformats/ImageFormatIntegrationTest.cs
@@ -22,11 +22,13 @@ You should have received a copy of the GNU Affero General Public License
*/
using System;
using System.IO;
-using iText.IO.Util;
+using iText.Commons.Utils;
using iText.Kernel.Colors;
using iText.Kernel.Utils;
using iText.Pdfocr;
using iText.Pdfocr.Tesseract4;
+using iText.Pdfocr.Tesseract4.Exceptions;
+using iText.Pdfocr.Tesseract4.Logs;
using iText.Test.Attributes;
namespace iText.Pdfocr.Imageformats {
@@ -66,8 +68,8 @@ public virtual void TestBMPText() {
String expectedOutput = "This is a test message for OCR Scanner Test";
String realOutputHocr = GetTextFromPdf(tesseractReader, new FileInfo(path), JavaCollectionsUtil.SingletonList
("eng"));
- realOutputHocr = iText.IO.Util.StringUtil.ReplaceAll(realOutputHocr, "[\n]", " ");
- realOutputHocr = iText.IO.Util.StringUtil.ReplaceAll(realOutputHocr, "[‘]", "");
+ realOutputHocr = iText.Commons.Utils.StringUtil.ReplaceAll(realOutputHocr, "[\n]", " ");
+ realOutputHocr = iText.Commons.Utils.StringUtil.ReplaceAll(realOutputHocr, "[‘]", "");
NUnit.Framework.Assert.IsTrue(realOutputHocr.Contains((expectedOutput)));
}
@@ -90,7 +92,7 @@ public virtual void TestBMPText02() {
String expectedOutput = "This is a test message for OCR Scanner Test BMPTest";
String realOutputHocr = GetTextFromPdf(tesseractReader, new FileInfo(path), JavaCollectionsUtil.SingletonList
("eng"));
- realOutputHocr = iText.IO.Util.StringUtil.ReplaceAll(realOutputHocr, "[\n]", " ");
+ realOutputHocr = iText.Commons.Utils.StringUtil.ReplaceAll(realOutputHocr, "[\n]", " ");
NUnit.Framework.Assert.IsTrue(realOutputHocr.Contains((expectedOutput)));
}
@@ -226,7 +228,7 @@ public virtual void TestInputWrongFormat() {
FileInfo file = new FileInfo(TEST_IMAGES_DIRECTORY + "wierdwords.gif");
GetTextFromPdf(tesseractReader, file);
}
- , NUnit.Framework.Throws.InstanceOf().With.Message.EqualTo(MessageFormatUtil.Format(Tesseract4OcrException.INCORRECT_INPUT_IMAGE_FORMAT, "wierdwords.gif")))
+ , NUnit.Framework.Throws.InstanceOf().With.Message.EqualTo(MessageFormatUtil.Format(PdfOcrTesseract4ExceptionMessageConstant.INCORRECT_INPUT_IMAGE_FORMAT, "wierdwords.gif")))
;
}
diff --git a/itext.tests/itext.pdfocr.tesseract4.tests/itext/pdfocr/pdfa3u/PdfA3UIntegrationTest.cs b/itext.tests/itext.pdfocr.tesseract4.tests/itext/pdfocr/pdfa3u/PdfA3UIntegrationTest.cs
index 0dfa64d..20f39a3 100644
--- a/itext.tests/itext.pdfocr.tesseract4.tests/itext/pdfocr/pdfa3u/PdfA3UIntegrationTest.cs
+++ b/itext.tests/itext.pdfocr.tesseract4.tests/itext/pdfocr/pdfa3u/PdfA3UIntegrationTest.cs
@@ -22,7 +22,7 @@ You should have received a copy of the GNU Affero General Public License
*/
using System;
using System.IO;
-using iText.IO.Util;
+using iText.Commons.Utils;
using iText.Kernel.Colors;
using iText.Kernel.Pdf;
using iText.Kernel.Utils;
diff --git a/itext.tests/itext.pdfocr.tesseract4.tests/itext/pdfocr/pdflayers/PdfLayersIntegrationTest.cs b/itext.tests/itext.pdfocr.tesseract4.tests/itext/pdfocr/pdflayers/PdfLayersIntegrationTest.cs
index 9ad330a..2a2d450 100644
--- a/itext.tests/itext.pdfocr.tesseract4.tests/itext/pdfocr/pdflayers/PdfLayersIntegrationTest.cs
+++ b/itext.tests/itext.pdfocr.tesseract4.tests/itext/pdfocr/pdflayers/PdfLayersIntegrationTest.cs
@@ -23,7 +23,7 @@ You should have received a copy of the GNU Affero General Public License
using System;
using System.Collections.Generic;
using System.IO;
-using iText.IO.Util;
+using iText.Commons.Utils;
using iText.Kernel.Pdf;
using iText.Kernel.Pdf.Layer;
using iText.Pdfocr;
diff --git a/itext.tests/itext.pdfocr.tesseract4.tests/itext/pdfocr/tessdata/TessDataIntegrationLibTest.cs b/itext.tests/itext.pdfocr.tesseract4.tests/itext/pdfocr/tessdata/TessDataIntegrationLibTest.cs
index 7026623..4d855b8 100644
--- a/itext.tests/itext.pdfocr.tesseract4.tests/itext/pdfocr/tessdata/TessDataIntegrationLibTest.cs
+++ b/itext.tests/itext.pdfocr.tesseract4.tests/itext/pdfocr/tessdata/TessDataIntegrationLibTest.cs
@@ -21,11 +21,13 @@ You should have received a copy of the GNU Affero General Public License
along with this program. If not, see .
*/
using System;
-using iText.IO.Util;
+using iText.Commons.Utils;
using iText.Kernel.Colors;
using iText.Kernel.Utils;
using iText.Pdfocr;
+using iText.Pdfocr.Logs;
using iText.Pdfocr.Tesseract4;
+using iText.Pdfocr.Tesseract4.Exceptions;
using iText.Test.Attributes;
namespace iText.Pdfocr.Tessdata {
@@ -34,7 +36,8 @@ public TessDataIntegrationLibTest()
: base(IntegrationTestHelper.ReaderType.LIB) {
}
- [LogMessage(Tesseract4OcrException.PATH_TO_TESS_DATA_DIRECTORY_CONTAINS_NON_ASCII_CHARACTERS)]
+ [LogMessage(PdfOcrTesseract4ExceptionMessageConstant.PATH_TO_TESS_DATA_DIRECTORY_CONTAINS_NON_ASCII_CHARACTERS
+ )]
[NUnit.Framework.Test]
public virtual void TestTessDataWithNonAsciiPath() {
NUnit.Framework.Assert.That(() => {
@@ -43,7 +46,7 @@ public virtual void TestTessDataWithNonAsciiPath() {
NUnit.Framework.Assert.Fail("Should throw exception for the tesseract lib when tess data path contains non ASCII characters"
);
}
- , NUnit.Framework.Throws.InstanceOf().With.Message.EqualTo(Tesseract4OcrException.PATH_TO_TESS_DATA_DIRECTORY_CONTAINS_NON_ASCII_CHARACTERS))
+ , NUnit.Framework.Throws.InstanceOf().With.Message.EqualTo(PdfOcrTesseract4ExceptionMessageConstant.PATH_TO_TESS_DATA_DIRECTORY_CONTAINS_NON_ASCII_CHARACTERS))
;
}
diff --git a/itext.tests/itext.pdfocr.tesseract4.tests/itext/pdfocr/tessdata/TessDataIntegrationTest.cs b/itext.tests/itext.pdfocr.tesseract4.tests/itext/pdfocr/tessdata/TessDataIntegrationTest.cs
index d5dd493..8e91618 100644
--- a/itext.tests/itext.pdfocr.tesseract4.tests/itext/pdfocr/tessdata/TessDataIntegrationTest.cs
+++ b/itext.tests/itext.pdfocr.tesseract4.tests/itext/pdfocr/tessdata/TessDataIntegrationTest.cs
@@ -23,18 +23,20 @@ You should have received a copy of the GNU Affero General Public License
using System;
using System.Collections.Generic;
using System.IO;
-using Common.Logging;
-using iText.IO.Util;
+using Microsoft.Extensions.Logging;
+using iText.Commons;
+using iText.Commons.Utils;
using iText.Kernel.Colors;
using iText.Kernel.Pdf;
using iText.Kernel.Utils;
using iText.Pdfocr;
+using iText.Pdfocr.Logs;
using iText.Pdfocr.Tesseract4;
using iText.Test.Attributes;
namespace iText.Pdfocr.Tessdata {
public abstract class TessDataIntegrationTest : IntegrationTestHelper {
- private static readonly ILog LOGGER = LogManager.GetLogger(typeof(iText.Pdfocr.Tessdata.TessDataIntegrationTest
+ private static readonly ILogger LOGGER = ITextLogManager.GetLogger(typeof(iText.Pdfocr.Tessdata.TessDataIntegrationTest
));
internal AbstractTesseract4OcrEngine tesseractReader;
@@ -155,7 +157,7 @@ public virtual void TextJapaneseOutputFromTxtFile() {
String expected = "日本語文法";
String result = GetRecognizedTextFromTextFile(tesseractReader, imgPath, JavaCollectionsUtil.SingletonList<
String>("jpn"));
- result = iText.IO.Util.StringUtil.ReplaceAll(result, "[\f\n]", "");
+ result = iText.Commons.Utils.StringUtil.ReplaceAll(result, "[\f\n]", "");
// correct result with specified japanese language
NUnit.Framework.Assert.IsTrue(result.Contains(expected));
}
@@ -166,8 +168,8 @@ public virtual void TestFrenchOutputFromTxtFile() {
String expectedFr = "RESTEZ\nCALME\nPARLEZ EN\nFRANÇAIS";
String result = GetRecognizedTextFromTextFile(tesseractReader, imgPath, JavaCollectionsUtil.SingletonList<
String>("fra"));
- result = iText.IO.Util.StringUtil.ReplaceAll(result, "(?:\\n\\f)+", "").Trim();
- result = iText.IO.Util.StringUtil.ReplaceAll(result, "\\n\\n", "\n").Trim();
+ result = iText.Commons.Utils.StringUtil.ReplaceAll(result, "(?:\\n\\f)+", "").Trim();
+ result = iText.Commons.Utils.StringUtil.ReplaceAll(result, "\\n\\n", "\n").Trim();
// correct result with specified spanish language
NUnit.Framework.Assert.IsTrue(result.EndsWith(expectedFr));
// incorrect result when languages are not specified
@@ -246,7 +248,7 @@ public virtual void TestArabicTextWithEng() {
String result = GetTextFromPdf(tesseractReader, file, JavaUtil.ArraysAsList("ara", "eng"), CAIRO_FONT_PATH
);
// correct result with specified arabic+english languages
- NUnit.Framework.Assert.AreEqual(expected, iText.IO.Util.StringUtil.ReplaceAll(result, "[?]", ""));
+ NUnit.Framework.Assert.AreEqual(expected, iText.Commons.Utils.StringUtil.ReplaceAll(result, "[?]", ""));
// incorrect result when languages are not specified
// or languages were specified in the wrong order
NUnit.Framework.Assert.AreNotEqual(expected, GetTextFromPdf(tesseractReader, file, JavaCollectionsUtil.SingletonList
@@ -572,9 +574,9 @@ private bool CompareTxtLines(IList expected, IList result) {
}
for (int i = 0; i < expected.Count; i++) {
String exp = expected[i].Replace("\n", "").Replace("\f", "");
- exp = iText.IO.Util.StringUtil.ReplaceAll(exp, "[^\\u0009\\u000A\\u000D\\u0020-\\u007E]", "");
+ exp = iText.Commons.Utils.StringUtil.ReplaceAll(exp, "[^\\u0009\\u000A\\u000D\\u0020-\\u007E]", "");
String res = result[i].Replace("\n", "").Replace("\f", "");
- res = iText.IO.Util.StringUtil.ReplaceAll(res, "[^\\u0009\\u000A\\u000D\\u0020-\\u007E]", "");
+ res = iText.Commons.Utils.StringUtil.ReplaceAll(res, "[^\\u0009\\u000A\\u000D\\u0020-\\u007E]", "");
if (expected[i] == null || result[i] == null) {
areEqual = false;
break;
@@ -599,7 +601,7 @@ private bool CompareTxtFiles(String expectedFilePath, String resultFilePath) {
}
catch (System.IO.IOException e) {
areEqual = false;
- LOGGER.Error(e.Message);
+ LOGGER.LogError(e.Message);
}
return areEqual;
}
diff --git a/itext.tests/itext.pdfocr.tesseract4.tests/itext/pdfocr/tesseract4/ApiTest.cs b/itext.tests/itext.pdfocr.tesseract4.tests/itext/pdfocr/tesseract4/ApiTest.cs
index 677a711..75b4695 100644
--- a/itext.tests/itext.pdfocr.tesseract4.tests/itext/pdfocr/tesseract4/ApiTest.cs
+++ b/itext.tests/itext.pdfocr.tesseract4.tests/itext/pdfocr/tesseract4/ApiTest.cs
@@ -23,13 +23,15 @@ You should have received a copy of the GNU Affero General Public License
using System;
using System.Collections.Generic;
using System.IO;
-using iText.IO.Util;
+using iText.Commons.Utils;
using iText.Pdfocr;
+using iText.Pdfocr.Tesseract4.Exceptions;
+using iText.Pdfocr.Tesseract4.Logs;
using iText.Test.Attributes;
namespace iText.Pdfocr.Tesseract4 {
public class ApiTest : IntegrationTestHelper {
- [LogMessage(Tesseract4OcrException.PATH_TO_TESS_DATA_IS_NOT_SET)]
+ [LogMessage(PdfOcrTesseract4ExceptionMessageConstant.PATH_TO_TESS_DATA_IS_NOT_SET)]
[NUnit.Framework.Test]
public virtual void TestDefaultTessDataPathValidationForLib() {
NUnit.Framework.Assert.That(() => {
@@ -38,11 +40,11 @@ public virtual void TestDefaultTessDataPathValidationForLib() {
Tesseract4LibOcrEngine engine = new Tesseract4LibOcrEngine(new Tesseract4OcrEngineProperties());
engine.DoImageOcr(imgFile);
}
- , NUnit.Framework.Throws.InstanceOf().With.Message.EqualTo(Tesseract4OcrException.PATH_TO_TESS_DATA_IS_NOT_SET))
+ , NUnit.Framework.Throws.InstanceOf().With.Message.EqualTo(PdfOcrTesseract4ExceptionMessageConstant.PATH_TO_TESS_DATA_IS_NOT_SET))
;
}
- [LogMessage(Tesseract4OcrException.PATH_TO_TESS_DATA_IS_NOT_SET)]
+ [LogMessage(PdfOcrTesseract4ExceptionMessageConstant.PATH_TO_TESS_DATA_IS_NOT_SET)]
[NUnit.Framework.Test]
public virtual void TestDefaultTessDataPathValidationForExecutable() {
NUnit.Framework.Assert.That(() => {
@@ -52,7 +54,7 @@ public virtual void TestDefaultTessDataPathValidationForExecutable() {
());
engine.DoImageOcr(imgFile);
}
- , NUnit.Framework.Throws.InstanceOf().With.Message.EqualTo(Tesseract4OcrException.PATH_TO_TESS_DATA_IS_NOT_SET))
+ , NUnit.Framework.Throws.InstanceOf().With.Message.EqualTo(PdfOcrTesseract4ExceptionMessageConstant.PATH_TO_TESS_DATA_IS_NOT_SET))
;
}
@@ -66,12 +68,12 @@ public virtual void TestDoTesseractOcrForIncorrectImageForExecutable() {
().SetPathToTessData(GetTessDataDirectory()));
engine.DoTesseractOcr(imgFile, null, OutputFormat.HOCR);
}
- , NUnit.Framework.Throws.InstanceOf().With.Message.EqualTo(MessageFormatUtil.Format(Tesseract4OcrException.CANNOT_READ_PROVIDED_IMAGE, new FileInfo(TEST_IMAGES_DIRECTORY + "numbers_01").FullName)))
+ , NUnit.Framework.Throws.InstanceOf().With.Message.EqualTo(MessageFormatUtil.Format(PdfOcrTesseract4ExceptionMessageConstant.CANNOT_READ_PROVIDED_IMAGE, new FileInfo(TEST_IMAGES_DIRECTORY + "numbers_01").FullName)))
;
}
[LogMessage(Tesseract4LogMessageConstant.CANNOT_READ_INPUT_IMAGE)]
- [LogMessage(Tesseract4OcrException.TESSERACT_FAILED)]
+ [LogMessage(PdfOcrTesseract4ExceptionMessageConstant.TESSERACT_FAILED)]
[LogMessage(Tesseract4LogMessageConstant.TESSERACT_FAILED)]
[NUnit.Framework.Test]
public virtual void TestOcrResultForSinglePageForNullImage() {
@@ -82,7 +84,7 @@ public virtual void TestOcrResultForSinglePageForNullImage() {
tesseract4LibOcrEngine.InitializeTesseract(OutputFormat.TXT);
tesseract4LibOcrEngine.DoTesseractOcr(null, null, OutputFormat.HOCR);
}
- , NUnit.Framework.Throws.InstanceOf().With.Message.EqualTo(Tesseract4OcrException.TESSERACT_FAILED))
+ , NUnit.Framework.Throws.InstanceOf().With.Message.EqualTo(PdfOcrTesseract4ExceptionMessageConstant.TESSERACT_FAILED))
;
}
@@ -113,10 +115,6 @@ public virtual void TestDetectAndFixBrokenBBoxes() {
NUnit.Framework.Assert.AreEqual(136.5f, (float)textInfo.GetBboxRect().GetBottom(), 0.1);
NUnit.Framework.Assert.AreEqual(385.5, (float)textInfo.GetBboxRect().GetRight(), 0.1);
NUnit.Framework.Assert.AreEqual(162.75, (float)textInfo.GetBboxRect().GetTop(), 0.1);
- NUnit.Framework.Assert.AreEqual(383.0f, (float)textInfo.GetBbox()[0], 0.1);
- NUnit.Framework.Assert.AreEqual(101.0f, (float)textInfo.GetBbox()[1], 0.1);
- NUnit.Framework.Assert.AreEqual(514.0f, (float)textInfo.GetBbox()[2], 0.1);
- NUnit.Framework.Assert.AreEqual(136.0f, (float)textInfo.GetBbox()[3], 0.1);
}
}
}
diff --git a/itext.tests/itext.pdfocr.tesseract4.tests/itext/pdfocr/tesseract4/ImageIntegrationTest.cs b/itext.tests/itext.pdfocr.tesseract4.tests/itext/pdfocr/tesseract4/ImageIntegrationTest.cs
index 2d26206..62d10de 100644
--- a/itext.tests/itext.pdfocr.tesseract4.tests/itext/pdfocr/tesseract4/ImageIntegrationTest.cs
+++ b/itext.tests/itext.pdfocr.tesseract4.tests/itext/pdfocr/tesseract4/ImageIntegrationTest.cs
@@ -23,14 +23,15 @@ You should have received a copy of the GNU Affero General Public License
using System;
using System.Collections.Generic;
using System.IO;
-using Common.Logging;
-using iText.IO.Util;
+using Microsoft.Extensions.Logging;
+using iText.Commons;
+using iText.Commons.Utils;
using iText.Kernel.Utils;
using iText.Pdfocr;
namespace iText.Pdfocr.Tesseract4 {
public abstract class ImageIntegrationTest : IntegrationTestHelper {
- private static readonly ILog LOGGER = LogManager.GetLogger(typeof(iText.Pdfocr.Tesseract4.ImageIntegrationTest
+ private static readonly ILogger LOGGER = ITextLogManager.GetLogger(typeof(iText.Pdfocr.Tesseract4.ImageIntegrationTest
));
internal AbstractTesseract4OcrEngine tesseractReader;
@@ -70,8 +71,8 @@ public virtual void TestHocrRotatedImage() {
NUnit.Framework.Assert.AreEqual("degrees", pageData.Get(1)[1].GetText());
NUnit.Framework.Assert.AreEqual("rotated", pageData.Get(1)[2].GetText());
NUnit.Framework.Assert.AreEqual("image", pageData.Get(1)[3].GetText());
- NUnit.Framework.Assert.IsTrue(pageData.Get(1)[1].GetBbox()[2] - pageData.Get(1)[0].GetBbox()[0] > 100);
- NUnit.Framework.Assert.IsTrue(pageData.Get(1)[1].GetBbox()[3] - pageData.Get(1)[0].GetBbox()[1] < 100);
+ NUnit.Framework.Assert.IsTrue(pageData.Get(1)[1].GetBboxRect().GetWidth() > 100);
+ NUnit.Framework.Assert.IsTrue(pageData.Get(1)[1].GetBboxRect().GetHeight() < 100);
}
[NUnit.Framework.Test]
diff --git a/itext.tests/itext.pdfocr.tesseract4.tests/itext/pdfocr/tesseract4/ImagePreprocessingUtilTest.cs b/itext.tests/itext.pdfocr.tesseract4.tests/itext/pdfocr/tesseract4/ImagePreprocessingUtilTest.cs
index 848860f..c163488 100644
--- a/itext.tests/itext.pdfocr.tesseract4.tests/itext/pdfocr/tesseract4/ImagePreprocessingUtilTest.cs
+++ b/itext.tests/itext.pdfocr.tesseract4.tests/itext/pdfocr/tesseract4/ImagePreprocessingUtilTest.cs
@@ -22,9 +22,11 @@ You should have received a copy of the GNU Affero General Public License
*/
using System;
using System.IO;
+using iText.Commons.Utils;
using iText.IO.Image;
-using iText.IO.Util;
using iText.Pdfocr;
+using iText.Pdfocr.Tesseract4.Exceptions;
+using iText.Pdfocr.Tesseract4.Logs;
using iText.Test.Attributes;
namespace iText.Pdfocr.Tesseract4 {
@@ -44,7 +46,7 @@ public virtual void TestReadingInvalidImagePath() {
FileInfo imgFile = new FileInfo(path);
ImagePreprocessingUtil.PreprocessImage(imgFile, 1, new ImagePreprocessingOptions());
}
- , NUnit.Framework.Throws.InstanceOf())
+ , NUnit.Framework.Throws.InstanceOf())
;
}
diff --git a/itext.tests/itext.pdfocr.tesseract4.tests/itext/pdfocr/tesseract4/Tesseract4FileResultEventHelperTest.cs b/itext.tests/itext.pdfocr.tesseract4.tests/itext/pdfocr/tesseract4/Tesseract4FileResultEventHelperTest.cs
new file mode 100644
index 0000000..4e4ecb0
--- /dev/null
+++ b/itext.tests/itext.pdfocr.tesseract4.tests/itext/pdfocr/tesseract4/Tesseract4FileResultEventHelperTest.cs
@@ -0,0 +1,73 @@
+/*
+This file is part of the iText (R) project.
+Copyright (c) 1998-2021 iText Group NV
+Authors: iText Software.
+
+This program is offered under a commercial and under the AGPL license.
+For commercial licensing, contact us at https://itextpdf.com/sales. For AGPL licensing, see below.
+
+AGPL licensing:
+This program is free software: you can redistribute it and/or modify
+it under the terms of the GNU Affero General Public License as published by
+the Free Software Foundation, either version 3 of the License, or
+(at your option) any later version.
+
+This program is distributed in the hope that it will be useful,
+but WITHOUT ANY WARRANTY; without even the implied warranty of
+MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+GNU Affero General Public License for more details.
+
+You should have received a copy of the GNU Affero General Public License
+along with this program. If not, see .
+*/
+using System.Collections.Generic;
+using iText.Commons.Actions;
+using iText.Commons.Actions.Confirmations;
+using iText.Commons.Actions.Sequence;
+using iText.Pdfocr.Statistics;
+using iText.Pdfocr.Tesseract4.Actions.Data;
+using iText.Pdfocr.Tesseract4.Actions.Events;
+using iText.Test;
+
+namespace iText.Pdfocr.Tesseract4 {
+ public class Tesseract4FileResultEventHelperTest : ExtendedITextTest {
+ [NUnit.Framework.Test]
+ public virtual void DefaultProcessImageEventTest() {
+ Tesseract4FileResultEventHelperTest.StoreEventsHandler eventsHandler = new Tesseract4FileResultEventHelperTest.StoreEventsHandler
+ ();
+ EventManager.GetInstance().Register(eventsHandler);
+ Tesseract4FileResultEventHelper helper = new Tesseract4FileResultEventHelper();
+ helper.OnEvent(PdfOcrTesseract4ProductEvent.CreateProcessImageEvent(new SequenceId(), null, EventConfirmationType
+ .ON_CLOSE));
+ NUnit.Framework.Assert.AreEqual(0, eventsHandler.GetEvents().Count);
+ EventManager.GetInstance().Unregister(eventsHandler);
+ }
+
+ [NUnit.Framework.Test]
+ public virtual void DefaultStatisticsEventTest() {
+ Tesseract4FileResultEventHelperTest.StoreEventsHandler eventsHandler = new Tesseract4FileResultEventHelperTest.StoreEventsHandler
+ ();
+ EventManager.GetInstance().Register(eventsHandler);
+ Tesseract4FileResultEventHelper helper = new Tesseract4FileResultEventHelper();
+ helper.OnEvent(new PdfOcrOutputTypeStatisticsEvent(PdfOcrOutputType.PDF, PdfOcrTesseract4ProductData.GetInstance
+ ()));
+ NUnit.Framework.Assert.AreEqual(1, eventsHandler.GetEvents().Count);
+ EventManager.GetInstance().Unregister(eventsHandler);
+ }
+
+ protected internal class StoreEventsHandler : IEventHandler {
+ private readonly IList events = new List();
+
+ public virtual IList GetEvents() {
+ return events;
+ }
+
+ public virtual void OnEvent(IEvent @event) {
+ if (@event is PdfOcrTesseract4ProductEvent || @event is PdfOcrOutputTypeStatisticsEvent || @event is ConfirmEvent
+ ) {
+ events.Add(@event);
+ }
+ }
+ }
+ }
+}
diff --git a/itext/itext.pdfocr.tesseract4/itext/pdfocr/tesseract4/PdfOcrTesseract4ProductInfo.cs b/itext.tests/itext.pdfocr.tesseract4.tests/itext/pdfocr/tesseract4/Tesseract4MetaInfoEventHandlingExecutableTest.cs
similarity index 68%
rename from itext/itext.pdfocr.tesseract4/itext/pdfocr/tesseract4/PdfOcrTesseract4ProductInfo.cs
rename to itext.tests/itext.pdfocr.tesseract4.tests/itext/pdfocr/tesseract4/Tesseract4MetaInfoEventHandlingExecutableTest.cs
index fc1bd5e..8ee2a02 100644
--- a/itext/itext.pdfocr.tesseract4/itext/pdfocr/tesseract4/PdfOcrTesseract4ProductInfo.cs
+++ b/itext.tests/itext.pdfocr.tesseract4.tests/itext/pdfocr/tesseract4/Tesseract4MetaInfoEventHandlingExecutableTest.cs
@@ -20,18 +20,12 @@ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
You should have received a copy of the GNU Affero General Public License
along with this program. If not, see .
*/
-using System;
+using iText.Pdfocr;
namespace iText.Pdfocr.Tesseract4 {
- /// Product info about this iText add-on.
- public class PdfOcrTesseract4ProductInfo {
- /// The product name.
- public const String PRODUCT_NAME = "pdfOcr-Tesseract4";
-
- /// The major version number.
- public const int MAJOR_VERSION = 1;
-
- /// The minor version number.
- public const int MINOR_VERSION = 0;
+ public class Tesseract4MetaInfoEventHandlingExecutableTest : Tesseract4MetaInfoEventHandlingTest {
+ public Tesseract4MetaInfoEventHandlingExecutableTest()
+ : base(IntegrationTestHelper.ReaderType.EXECUTABLE) {
+ }
}
}
diff --git a/itext.tests/itext.pdfocr.tesseract4.tests/itext/pdfocr/tesseract4/Tesseract4MetaInfoEventHandlingLibTest.cs b/itext.tests/itext.pdfocr.tesseract4.tests/itext/pdfocr/tesseract4/Tesseract4MetaInfoEventHandlingLibTest.cs
new file mode 100644
index 0000000..f8b63bc
--- /dev/null
+++ b/itext.tests/itext.pdfocr.tesseract4.tests/itext/pdfocr/tesseract4/Tesseract4MetaInfoEventHandlingLibTest.cs
@@ -0,0 +1,31 @@
+/*
+This file is part of the iText (R) project.
+Copyright (c) 1998-2021 iText Group NV
+Authors: iText Software.
+
+This program is offered under a commercial and under the AGPL license.
+For commercial licensing, contact us at https://itextpdf.com/sales. For AGPL licensing, see below.
+
+AGPL licensing:
+This program is free software: you can redistribute it and/or modify
+it under the terms of the GNU Affero General Public License as published by
+the Free Software Foundation, either version 3 of the License, or
+(at your option) any later version.
+
+This program is distributed in the hope that it will be useful,
+but WITHOUT ANY WARRANTY; without even the implied warranty of
+MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+GNU Affero General Public License for more details.
+
+You should have received a copy of the GNU Affero General Public License
+along with this program. If not, see .
+*/
+using iText.Pdfocr;
+
+namespace iText.Pdfocr.Tesseract4 {
+ public class Tesseract4MetaInfoEventHandlingLibTest : Tesseract4MetaInfoEventHandlingTest {
+ public Tesseract4MetaInfoEventHandlingLibTest()
+ : base(IntegrationTestHelper.ReaderType.LIB) {
+ }
+ }
+}
diff --git a/itext.tests/itext.pdfocr.tesseract4.tests/itext/pdfocr/tesseract4/Tesseract4MetaInfoEventHandlingTest.cs b/itext.tests/itext.pdfocr.tesseract4.tests/itext/pdfocr/tesseract4/Tesseract4MetaInfoEventHandlingTest.cs
new file mode 100644
index 0000000..c51024e
--- /dev/null
+++ b/itext.tests/itext.pdfocr.tesseract4.tests/itext/pdfocr/tesseract4/Tesseract4MetaInfoEventHandlingTest.cs
@@ -0,0 +1,74 @@
+/*
+This file is part of the iText (R) project.
+Copyright (c) 1998-2021 iText Group NV
+Authors: iText Software.
+
+This program is offered under a commercial and under the AGPL license.
+For commercial licensing, contact us at https://itextpdf.com/sales. For AGPL licensing, see below.
+
+AGPL licensing:
+This program is free software: you can redistribute it and/or modify
+it under the terms of the GNU Affero General Public License as published by
+the Free Software Foundation, either version 3 of the License, or
+(at your option) any later version.
+
+This program is distributed in the hope that it will be useful,
+but WITHOUT ANY WARRANTY; without even the implied warranty of
+MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+GNU Affero General Public License for more details.
+
+You should have received a copy of the GNU Affero General Public License
+along with this program. If not, see .
+*/
+using System;
+using System.IO;
+using iText.Commons.Actions;
+using iText.Commons.Actions.Confirmations;
+using iText.Commons.Actions.Contexts;
+using iText.Commons.Utils;
+using iText.Pdfocr;
+using iText.Pdfocr.Statistics;
+
+namespace iText.Pdfocr.Tesseract4 {
+ public abstract class Tesseract4MetaInfoEventHandlingTest : IntegrationEventHandlingTestHelper {
+ public Tesseract4MetaInfoEventHandlingTest(IntegrationTestHelper.ReaderType type)
+ : base(type) {
+ }
+
+ // set meta info tests
+ [NUnit.Framework.Test]
+ public virtual void SetEventCountingMetaInfoTest() {
+ FileInfo imgFile = new FileInfo(TEST_IMAGES_DIRECTORY + "numbers_01.jpg");
+ FileInfo outPdfFile = FileUtil.CreateTempFile("test", ".pdf");
+ CreatePdfAndSetEventCountingMetaInfo(tesseractReader, outPdfFile, imgFile, new Tesseract4MetaInfoEventHandlingTest.TestMetaInfo
+ ());
+ NUnit.Framework.Assert.AreEqual(3, eventsHandler.GetEvents().Count);
+ IEvent ocrUsageEvent = eventsHandler.GetEvents()[0];
+ ValidateUsageEvent(ocrUsageEvent, EventConfirmationType.ON_CLOSE);
+ ValidateStatisticEvent(eventsHandler.GetEvents()[1], PdfOcrOutputType.PDF);
+ ValidateConfirmEvent(eventsHandler.GetEvents()[2], ocrUsageEvent);
+ String expectedProdLine = CreateExpectedProducerLine(new ConfirmedEventWrapper[] { GetPdfOcrEvent() });
+ ValidatePdfProducerLine(outPdfFile.FullName, expectedProdLine);
+ }
+
+ [NUnit.Framework.Test]
+ public virtual void CreatePdfFileTestMetaInfoTest() {
+ FileInfo imgFile = new FileInfo(TEST_IMAGES_DIRECTORY + "numbers_01.jpg");
+ FileInfo outPdfFile = FileUtil.CreateTempFile("test", ".pdf");
+ CreatePdfFileAndSetMetaInfoToProps(tesseractReader, outPdfFile, imgFile, new Tesseract4MetaInfoEventHandlingTest.TestMetaInfo
+ ());
+ // check ocr events
+ NUnit.Framework.Assert.AreEqual(3, eventsHandler.GetEvents().Count);
+ IEvent ocrUsageEvent = eventsHandler.GetEvents()[0];
+ ValidateUsageEvent(ocrUsageEvent, EventConfirmationType.ON_CLOSE);
+ ValidateStatisticEvent(eventsHandler.GetEvents()[1], PdfOcrOutputType.PDF);
+ ValidateConfirmEvent(eventsHandler.GetEvents()[2], ocrUsageEvent);
+ // check producer line in the output pdf
+ String expectedProdLine = CreateExpectedProducerLine(new ConfirmedEventWrapper[] { GetPdfOcrEvent() });
+ ValidatePdfProducerLine(outPdfFile.FullName, expectedProdLine);
+ }
+
+ private class TestMetaInfo : IMetaInfo {
+ }
+ }
+}
diff --git a/itext.tests/itext.pdfocr.tesseract4.tests/itext/pdfocr/tesseract4/TesseractHelperLibTest.cs b/itext.tests/itext.pdfocr.tesseract4.tests/itext/pdfocr/tesseract4/TesseractHelperLibTest.cs
index c01f347..b5717ed 100644
--- a/itext.tests/itext.pdfocr.tesseract4.tests/itext/pdfocr/tesseract4/TesseractHelperLibTest.cs
+++ b/itext.tests/itext.pdfocr.tesseract4.tests/itext/pdfocr/tesseract4/TesseractHelperLibTest.cs
@@ -23,7 +23,7 @@ You should have received a copy of the GNU Affero General Public License
using System;
using System.Collections.Generic;
using System.IO;
-using iText.IO.Util;
+using iText.Commons.Utils;
using iText.Pdfocr;
namespace iText.Pdfocr.Tesseract4 {
diff --git a/itext.tests/itext.pdfocr.tesseract4.tests/itext/pdfocr/tesseract4/TesseractHelperTest.cs b/itext.tests/itext.pdfocr.tesseract4.tests/itext/pdfocr/tesseract4/TesseractHelperTest.cs
index 978c5f6..33b0cf0 100644
--- a/itext.tests/itext.pdfocr.tesseract4.tests/itext/pdfocr/tesseract4/TesseractHelperTest.cs
+++ b/itext.tests/itext.pdfocr.tesseract4.tests/itext/pdfocr/tesseract4/TesseractHelperTest.cs
@@ -24,13 +24,14 @@ You should have received a copy of the GNU Affero General Public License
using System.Collections.Generic;
using System.IO;
using System.Text;
-using Common.Logging;
-using iText.IO.Util;
+using Microsoft.Extensions.Logging;
+using iText.Commons;
+using iText.Commons.Utils;
using iText.Pdfocr;
namespace iText.Pdfocr.Tesseract4 {
public abstract class TesseractHelperTest : IntegrationTestHelper {
- private static readonly ILog LOGGER = LogManager.GetLogger(typeof(iText.Pdfocr.Tesseract4.TesseractHelperTest
+ private static readonly ILogger LOGGER = ITextLogManager.GetLogger(typeof(iText.Pdfocr.Tesseract4.TesseractHelperTest
));
internal AbstractTesseract4OcrEngine tesseractReader;
diff --git a/itext.tests/itext.pdfocr.tesseract4.tests/itext/pdfocr/tesseract4/TesseractOcrUtilTest.cs b/itext.tests/itext.pdfocr.tesseract4.tests/itext/pdfocr/tesseract4/TesseractOcrUtilTest.cs
index 994307b..92f33d1 100644
--- a/itext.tests/itext.pdfocr.tesseract4.tests/itext/pdfocr/tesseract4/TesseractOcrUtilTest.cs
+++ b/itext.tests/itext.pdfocr.tesseract4.tests/itext/pdfocr/tesseract4/TesseractOcrUtilTest.cs
@@ -25,6 +25,7 @@ You should have received a copy of the GNU Affero General Public License
using Tesseract;
using iText.IO.Image;
using iText.Pdfocr;
+using iText.Pdfocr.Tesseract4.Logs;
using iText.Test.Attributes;
namespace iText.Pdfocr.Tesseract4 {
diff --git a/itext.tests/itext.pdfocr.tesseract4.tests/itext/pdfocr/tesseract4/UserWordsTest.cs b/itext.tests/itext.pdfocr.tesseract4.tests/itext/pdfocr/tesseract4/UserWordsTest.cs
index b8c98a0..fc022d0 100644
--- a/itext.tests/itext.pdfocr.tesseract4.tests/itext/pdfocr/tesseract4/UserWordsTest.cs
+++ b/itext.tests/itext.pdfocr.tesseract4.tests/itext/pdfocr/tesseract4/UserWordsTest.cs
@@ -23,8 +23,9 @@ You should have received a copy of the GNU Affero General Public License
using System;
using System.Collections.Generic;
using System.IO;
-using iText.IO.Util;
+using iText.Commons.Utils;
using iText.Pdfocr;
+using iText.Pdfocr.Tesseract4.Exceptions;
namespace iText.Pdfocr.Tesseract4 {
public abstract class UserWordsTest : IntegrationTestHelper {
@@ -76,7 +77,7 @@ public virtual void TestCustomUserWordsWithListOfLanguages() {
tesseractReader.SetTesseract4OcrEngineProperties(properties);
String result = GetRecognizedTextFromTextFile(tesseractReader, imgPath);
result = result.Replace("\n", "").Replace("\f", "");
- result = iText.IO.Util.StringUtil.ReplaceAll(result, "[^\\u0009\\u000A\\u000D\\u0020-\\u007E]", "");
+ result = iText.Commons.Utils.StringUtil.ReplaceAll(result, "[^\\u0009\\u000A\\u000D\\u0020-\\u007E]", "");
NUnit.Framework.Assert.IsTrue(result.StartsWith(expectedOutput));
NUnit.Framework.Assert.IsTrue(tesseractReader.GetTesseract4OcrEngineProperties().GetPathToUserWordsFile().
EndsWith(".user-words"));
@@ -90,7 +91,7 @@ public virtual void TestUserWordsWithLanguageNotInList() {
properties.SetUserWords("spa", new FileStream(userWords, FileMode.Open, FileAccess.Read));
properties.SetLanguages(new List());
}
- , NUnit.Framework.Throws.InstanceOf().With.Message.EqualTo(MessageFormatUtil.Format(Tesseract4OcrException.LANGUAGE_IS_NOT_IN_THE_LIST, "spa")))
+ , NUnit.Framework.Throws.InstanceOf().With.Message.EqualTo(MessageFormatUtil.Format(PdfOcrTesseract4ExceptionMessageConstant.LANGUAGE_IS_NOT_IN_THE_LIST, "spa")))
;
}
@@ -101,7 +102,7 @@ public virtual void TestIncorrectLanguageForUserWordsAsList() {
properties.SetUserWords("eng1", JavaUtil.ArraysAsList("word1", "word2"));
properties.SetLanguages(new List());
}
- , NUnit.Framework.Throws.InstanceOf().With.Message.EqualTo(MessageFormatUtil.Format(Tesseract4OcrException.LANGUAGE_IS_NOT_IN_THE_LIST, "eng1")))
+ , NUnit.Framework.Throws.InstanceOf().With.Message.EqualTo(MessageFormatUtil.Format(PdfOcrTesseract4ExceptionMessageConstant.LANGUAGE_IS_NOT_IN_THE_LIST, "eng1")))
;
}
diff --git a/itext/itext.pdfocr.api/PdfOcrExtensions.cs b/itext/itext.pdfocr.api/PdfOcrExtensions.cs
index 1bdaacb..c0a1166 100644
--- a/itext/itext.pdfocr.api/PdfOcrExtensions.cs
+++ b/itext/itext.pdfocr.api/PdfOcrExtensions.cs
@@ -50,4 +50,8 @@ public static TValue Put(this IDictionary col, TKey
return oldVal;
}
+ public static bool IsEmpty(this ICollection> collection) {
+ return collection.Count == 0;
+ }
+
}
diff --git a/itext/itext.pdfocr.api/Properties/AssemblyInfo.cs b/itext/itext.pdfocr.api/Properties/AssemblyInfo.cs
index 8bbd834..f387a9b 100644
--- a/itext/itext.pdfocr.api/Properties/AssemblyInfo.cs
+++ b/itext/itext.pdfocr.api/Properties/AssemblyInfo.cs
@@ -14,9 +14,9 @@
[assembly: Guid("0c4ceb00-9a56-4547-a925-5974a85a6048")]
-[assembly: AssemblyVersion("1.0.3.0")]
-[assembly: AssemblyFileVersion("1.0.3.0")]
-[assembly: AssemblyInformationalVersion("1.0.3")]
+[assembly: AssemblyVersion("2.0.0.0")]
+[assembly: AssemblyFileVersion("2.0.0.0")]
+[assembly: AssemblyInformationalVersion("2.0.0")]
[assembly: InternalsVisibleTo("itext.pdfocr.api.tests, PublicKey=" +
"00240000048000009400000006020000002400005253413100040000010001008b21ed5b3fc1c1" +
"1996390981fe22bbe71a39a9e11d3c2cefddd6ee92920fa871f9666ae0fa941af0280d0653df04" +
diff --git a/itext/itext.pdfocr.api/itext.pdfocr.api.csproj b/itext/itext.pdfocr.api/itext.pdfocr.api.csproj
index e46c9c4..00f0717 100644
--- a/itext/itext.pdfocr.api/itext.pdfocr.api.csproj
+++ b/itext/itext.pdfocr.api/itext.pdfocr.api.csproj
@@ -13,7 +13,7 @@
- net45
+ net461
CS1591;CS1570;CS1572;CS1573;CS1574;CS1580;CS1584;CS1658
@@ -30,7 +30,7 @@
-
+
diff --git a/itext/itext.pdfocr.api/itext/pdfocr/AbstractPdfOcrEventHelper.cs b/itext/itext.pdfocr.api/itext/pdfocr/AbstractPdfOcrEventHelper.cs
new file mode 100644
index 0000000..6d47763
--- /dev/null
+++ b/itext/itext.pdfocr.api/itext/pdfocr/AbstractPdfOcrEventHelper.cs
@@ -0,0 +1,43 @@
+/*
+This file is part of the iText (R) project.
+Copyright (c) 1998-2021 iText Group NV
+Authors: iText Software.
+
+This program is offered under a commercial and under the AGPL license.
+For commercial licensing, contact us at https://itextpdf.com/sales. For AGPL licensing, see below.
+
+AGPL licensing:
+This program is free software: you can redistribute it and/or modify
+it under the terms of the GNU Affero General Public License as published by
+the Free Software Foundation, either version 3 of the License, or
+(at your option) any later version.
+
+This program is distributed in the hope that it will be useful,
+but WITHOUT ANY WARRANTY; without even the implied warranty of
+MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+GNU Affero General Public License for more details.
+
+You should have received a copy of the GNU Affero General Public License
+along with this program. If not, see .
+*/
+using iText.Commons.Actions;
+using iText.Commons.Actions.Confirmations;
+using iText.Commons.Actions.Sequence;
+
+namespace iText.Pdfocr {
+ /// Helper class for working with events.
+ /// Helper class for working with events. This class is for internal usage.
+ public abstract class AbstractPdfOcrEventHelper : AbstractITextEvent {
+ /// Handles the event.
+ /// event
+ public abstract void OnEvent(AbstractProductITextEvent @event);
+
+ /// Returns the sequence id
+ /// sequence id
+ public abstract SequenceId GetSequenceId();
+
+ /// Returns the confirmation type of event.
+ /// event confirmation type
+ public abstract EventConfirmationType GetConfirmationType();
+ }
+}
diff --git a/itext/itext.pdfocr.api/itext/pdfocr/IOcrEngine.cs b/itext/itext.pdfocr.api/itext/pdfocr/IOcrEngine.cs
index 751f7bc..5efdc43 100644
--- a/itext/itext.pdfocr.api/itext/pdfocr/IOcrEngine.cs
+++ b/itext/itext.pdfocr.api/itext/pdfocr/IOcrEngine.cs
@@ -63,6 +63,31 @@ public interface IOcrEngine {
///
IDictionary> DoImageOcr(FileInfo input);
+ ///
+ /// Reads data from the provided input image file and returns retrieved data
+ /// in the format described below.
+ ///
+ ///
+ /// input image
+ ///
+ ///
+ /// ocr processing context
+ ///
+ ///
+ ///
+ /// where key is
+ ///
+ /// representing the number of the page and value is
+ ///
+ /// of
+ ///
+ /// elements where each
+ ///
+ /// element contains a word or a line and its 4
+ /// coordinates(bbox)
+ ///
+ IDictionary> DoImageOcr(FileInfo input, OcrProcessContext ocrProcessContext);
+
///
/// Performs OCR using provided
///
@@ -84,5 +109,28 @@ public interface IOcrEngine {
///
/// file to be created
void CreateTxtFile(IList inputImages, FileInfo txtFile);
+
+ ///
+ /// Performs OCR using provided
+ ///
+ /// for the given list of
+ /// input images and saves output to a text file using provided path.
+ ///
+ ///
+ /// Performs OCR using provided
+ ///
+ /// for the given list of
+ /// input images and saves output to a text file using provided path.
+ /// Note that a human reading order is not guaranteed
+ /// due to possible specifics of input images (multi column layout, tables etc)
+ ///
+ ///
+ ///
+ ///
+ /// of images to be OCRed
+ ///
+ /// file to be created
+ /// ocr processing context
+ void CreateTxtFile(IList inputImages, FileInfo txtFile, OcrProcessContext ocrProcessContext);
}
}
diff --git a/itext.tests/itext.pdfocr.api.tests/itext/metainfo/TestMetaInfo.cs b/itext/itext.pdfocr.api/itext/pdfocr/IProductAware.cs
similarity index 63%
rename from itext.tests/itext.pdfocr.api.tests/itext/metainfo/TestMetaInfo.cs
rename to itext/itext.pdfocr.api/itext/pdfocr/IProductAware.cs
index 36601e1..b5e6864 100644
--- a/itext.tests/itext.pdfocr.api.tests/itext/metainfo/TestMetaInfo.cs
+++ b/itext/itext.pdfocr.api/itext/pdfocr/IProductAware.cs
@@ -20,15 +20,17 @@ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
You should have received a copy of the GNU Affero General Public License
along with this program. If not, see .
*/
-using iText.Kernel.Counter.Event;
+using iText.Commons.Actions.Data;
-namespace iText.Metainfo {
- /// This class is used for test purposes.
- ///
- /// This class is used for test purposes.
- /// Please be aware that it's put in the com.itextpdf.metainfo deliberately,
- /// so that it belongs neither to com.itextpdf.pdfocr nor com.itextpdf.pdfocr.tesseract4 packages
- ///
- public class TestMetaInfo : IMetaInfo {
+namespace iText.Pdfocr {
+ /// The interface that holds information about product data and meta info.
+ public interface IProductAware {
+ /// Gets the container with meta info.
+ /// the held meta info container
+ PdfOcrMetaInfoContainer GetMetaInfoContainer();
+
+ /// Gets object containing information about the product.
+ /// product data
+ ProductData GetProductData();
}
}
diff --git a/itext/itext.pdfocr.api/itext/pdfocr/OcrEngineProperties.cs b/itext/itext.pdfocr.api/itext/pdfocr/OcrEngineProperties.cs
index 090394b..de0cf6d 100644
--- a/itext/itext.pdfocr.api/itext/pdfocr/OcrEngineProperties.cs
+++ b/itext/itext.pdfocr.api/itext/pdfocr/OcrEngineProperties.cs
@@ -22,7 +22,7 @@ You should have received a copy of the GNU Affero General Public License
*/
using System;
using System.Collections.Generic;
-using iText.IO.Util;
+using iText.Commons.Utils;
namespace iText.Pdfocr {
public class OcrEngineProperties {
diff --git a/itext/itext.pdfocr.api/itext/pdfocr/OcrPdfCreator.cs b/itext/itext.pdfocr.api/itext/pdfocr/OcrPdfCreator.cs
index 9ca32e8..56c4b92 100644
--- a/itext/itext.pdfocr.api/itext/pdfocr/OcrPdfCreator.cs
+++ b/itext/itext.pdfocr.api/itext/pdfocr/OcrPdfCreator.cs
@@ -23,11 +23,14 @@ You should have received a copy of the GNU Affero General Public License
using System;
using System.Collections.Generic;
using System.IO;
-using Common.Logging;
+using Microsoft.Extensions.Logging;
+using iText.Commons;
+using iText.Commons.Actions;
+using iText.Commons.Actions.Sequence;
+using iText.Commons.Utils;
using iText.IO.Font.Otf;
using iText.IO.Image;
-using iText.IO.Util;
-using iText.Kernel.Counter.Event;
+using iText.Kernel.Actions.Events;
using iText.Kernel.Font;
using iText.Kernel.Geom;
using iText.Kernel.Pdf;
@@ -38,7 +41,9 @@ You should have received a copy of the GNU Affero General Public License
using iText.Layout.Font;
using iText.Layout.Properties;
using iText.Pdfa;
-using iText.Pdfocr.Events;
+using iText.Pdfocr.Exceptions;
+using iText.Pdfocr.Logs;
+using iText.Pdfocr.Statistics;
namespace iText.Pdfocr {
///
@@ -67,16 +72,7 @@ namespace iText.Pdfocr {
///
public class OcrPdfCreator {
/// The logger.
- private static readonly ILog LOGGER = LogManager.GetLogger(typeof(iText.Pdfocr.OcrPdfCreator));
-
- /// Indices in array representing bbox.
- private const int LEFT_IDX = 0;
-
- private const int TOP_IDX = 1;
-
- private const int RIGHT_IDX = 2;
-
- private const int BOTTOM_IDX = 3;
+ private static readonly ILogger LOGGER = ITextLogManager.GetLogger(typeof(iText.Pdfocr.OcrPdfCreator));
///
/// Selected
@@ -151,6 +147,8 @@ public void SetOcrPdfCreatorProperties(OcrPdfCreatorProperties ocrPdfCreatorProp
/// and
/// creates PDF using provided
///
+ /// ,
+ ///
/// and
/// .
///
@@ -160,12 +158,23 @@ public void SetOcrPdfCreatorProperties(OcrPdfCreatorProperties ocrPdfCreatorProp
/// and
/// creates PDF using provided
///
+ /// ,
+ ///
/// and
- /// .
- /// PDF/A-3u document will be created if
+ ///
+ /// . PDF/A-3u document will be created if
/// provided
///
/// is not null.
+ ///
+ /// NOTE that after executing this method you will have a product event from
+ /// the both itextcore and pdfOcr. Therefore, use this method only if you need to work
+ /// with the generated
+ ///
+ /// . If you don't need this, use the
+ ///
+ /// method. In this case, only the pdfOcr event will be dispatched.
///
///
///
@@ -178,6 +187,7 @@ public void SetOcrPdfCreatorProperties(OcrPdfCreatorProperties ocrPdfCreatorProp
/// object
/// to write final PDF document to
///
+ /// document properties
///
///
///
@@ -188,34 +198,127 @@ public void SetOcrPdfCreatorProperties(OcrPdfCreatorProperties ocrPdfCreatorProp
///
/// object
///
- public PdfDocument CreatePdfA(IList inputImages, PdfWriter pdfWriter, PdfOutputIntent pdfOutputIntent
- ) {
- LOGGER.Info(MessageFormatUtil.Format(PdfOcrLogMessageConstant.START_OCR_FOR_IMAGES, inputImages.Count));
- IMetaInfo storedMetaInfo = null;
- if (ocrEngine is IThreadLocalMetaInfoAware) {
- storedMetaInfo = ((IThreadLocalMetaInfoAware)ocrEngine).GetThreadLocalMetaInfo();
- ((IThreadLocalMetaInfoAware)ocrEngine).SetThreadLocalMetaInfo(new OcrPdfCreatorMetaInfo(((IThreadLocalMetaInfoAware
- )ocrEngine).GetThreadLocalMetaInfo(), Guid.NewGuid(), null != pdfOutputIntent ? OcrPdfCreatorMetaInfo.PdfDocumentType
- .PDFA : OcrPdfCreatorMetaInfo.PdfDocumentType.PDF));
- }
+ public PdfDocument CreatePdfA(IList inputImages, PdfWriter pdfWriter, DocumentProperties documentProperties
+ , PdfOutputIntent pdfOutputIntent) {
+ LOGGER.LogInformation(MessageFormatUtil.Format(PdfOcrLogMessageConstant.START_OCR_FOR_IMAGES, inputImages.
+ Count));
+ // create event helper
+ SequenceId pdfSequenceId = new SequenceId();
+ OcrPdfCreatorEventHelper ocrEventHelper = new OcrPdfCreatorEventHelper(pdfSequenceId, ocrPdfCreatorProperties
+ .GetMetaInfo());
+ OcrProcessContext ocrProcessContext = new OcrProcessContext(ocrEventHelper);
// map contains:
// keys: image files
// values:
// map pageNumber -> retrieved text data(text and its coordinates)
IDictionary>> imagesTextData = new LinkedDictionary>>();
- try {
- foreach (FileInfo inputImage in inputImages) {
- imagesTextData.Put(inputImage, ocrEngine.DoImageOcr(inputImage));
- }
- }
- finally {
- if (ocrEngine is IThreadLocalMetaInfoAware) {
- ((IThreadLocalMetaInfoAware)ocrEngine).SetThreadLocalMetaInfo(storedMetaInfo);
- }
+ foreach (FileInfo inputImage in inputImages) {
+ imagesTextData.Put(inputImage, ocrEngine.DoImageOcr(inputImage, ocrProcessContext));
}
// create PdfDocument
- return CreatePdfDocument(pdfWriter, pdfOutputIntent, imagesTextData);
+ return CreatePdfDocument(pdfWriter, pdfOutputIntent, imagesTextData, pdfSequenceId, documentProperties);
+ }
+
+ ///
+ /// Performs OCR with set parameters using provided
+ ///
+ /// and
+ /// creates PDF using provided
+ ///
+ /// and
+ /// .
+ ///
+ ///
+ /// Performs OCR with set parameters using provided
+ ///
+ /// and
+ /// creates PDF using provided
+ ///
+ /// and
+ ///
+ /// . PDF/A-3u document will be created if
+ /// provided
+ ///
+ /// is not null.
+ ///
+ /// NOTE that after executing this method you will have a product event from
+ /// the both itextcore and pdfOcr. Therefore, use this method only if you need to work
+ /// with the generated
+ ///
+ /// . If you don't need this, use the
+ ///
+ /// method. In this case, only the pdfOcr event will be dispatched.
+ ///
+ ///
+ ///
+ ///
+ /// of images to be OCRed
+ ///
+ ///
+ /// the
+ ///
+ /// object
+ /// to write final PDF document to
+ ///
+ ///
+ ///
+ ///
+ /// for PDF/A-3u document
+ ///
+ ///
+ /// result PDF/A-3u
+ ///
+ /// object
+ ///
+ public PdfDocument CreatePdfA(IList inputImages, PdfWriter pdfWriter, PdfOutputIntent pdfOutputIntent
+ ) {
+ return CreatePdfA(inputImages, pdfWriter, new DocumentProperties(), pdfOutputIntent);
+ }
+
+ ///
+ /// Performs OCR with set parameters using provided
+ ///
+ /// and
+ /// creates PDF using provided
+ /// .
+ ///
+ ///
+ /// Performs OCR with set parameters using provided
+ ///
+ /// and
+ /// creates PDF using provided
+ /// .
+ ///
+ /// NOTE that after executing this method you will have a product event from
+ /// the both itextcore and pdfOcr. Therefore, use this method only if you need to work
+ /// with the generated
+ ///
+ /// . If you don't need this, use the
+ ///
+ /// method. In this case, only the pdfOcr event will be dispatched.
+ ///
+ ///
+ ///
+ ///
+ /// of images to be OCRed
+ ///
+ ///
+ /// the
+ ///
+ /// object
+ /// to write final PDF document to
+ ///
+ /// document properties
+ ///
+ /// result
+ ///
+ /// object
+ ///
+ public PdfDocument CreatePdf(IList inputImages, PdfWriter pdfWriter, DocumentProperties documentProperties
+ ) {
+ return CreatePdfA(inputImages, pdfWriter, documentProperties, null);
}
///
@@ -225,6 +328,21 @@ public PdfDocument CreatePdfA(IList inputImages, PdfWriter pdfWriter,
/// creates PDF using provided
/// .
///
+ ///
+ /// Performs OCR with set parameters using provided
+ ///
+ /// and
+ /// creates PDF using provided
+ /// .
+ ///
+ /// NOTE that after executing this method you will have a product event from
+ /// the both itextcore and pdfOcr. Therefore, use this method only if you need to work
+ /// with the generated
+ ///
+ /// . If you don't need this, use the
+ ///
+ /// method. In this case, only the pdfOcr event will be dispatched.
+ ///
///
///
///
@@ -242,7 +360,82 @@ public PdfDocument CreatePdfA(IList inputImages, PdfWriter pdfWriter,
/// object
///
public PdfDocument CreatePdf(IList inputImages, PdfWriter pdfWriter) {
- return CreatePdfA(inputImages, pdfWriter, null);
+ return CreatePdfA(inputImages, pdfWriter, new DocumentProperties(), null);
+ }
+
+ ///
+ /// Performs OCR with set parameters using provided
+ ///
+ /// and
+ /// creates PDF using provided
+ /// .
+ ///
+ ///
+ ///
+ ///
+ /// of images to be OCRed
+ ///
+ ///
+ /// the
+ ///
+ /// object to write final PDF document to
+ ///
+ public virtual void CreatePdfFile(IList inputImages, FileInfo outPdfFile) {
+ CreatePdfAFile(inputImages, outPdfFile, null);
+ }
+
+ ///
+ /// Performs OCR with set parameters using provided
+ ///
+ /// and
+ /// creates PDF using provided
+ ///
+ /// and
+ /// .
+ ///
+ ///
+ /// Performs OCR with set parameters using provided
+ ///
+ /// and
+ /// creates PDF using provided
+ ///
+ /// and
+ /// .
+ /// PDF/A-3u document will be created if provided
+ ///
+ /// is not null.
+ ///
+ ///
+ ///
+ ///
+ /// of images to be OCRed
+ ///
+ ///
+ /// the
+ ///
+ /// object to write final PDF document to
+ ///
+ ///
+ ///
+ ///
+ /// for PDF/A-3u document
+ ///
+ public virtual void CreatePdfAFile(IList inputImages, FileInfo outPdfFile, PdfOutputIntent pdfOutputIntent
+ ) {
+ DocumentProperties documentProperties = new DocumentProperties();
+ if (ocrPdfCreatorProperties.GetMetaInfo() != null) {
+ documentProperties.SetEventCountingMetaInfo(ocrPdfCreatorProperties.GetMetaInfo());
+ }
+ else {
+ if (ocrEngine is IProductAware) {
+ documentProperties.SetEventCountingMetaInfo(((IProductAware)ocrEngine).GetMetaInfoContainer().GetMetaInfo(
+ ));
+ }
+ }
+ using (PdfWriter pdfWriter = new PdfWriter(outPdfFile.FullName)) {
+ PdfDocument pdfDocument = CreatePdfA(inputImages, pdfWriter, documentProperties, pdfOutputIntent);
+ pdfDocument.Close();
+ }
}
///
@@ -319,59 +512,39 @@ private void AddToCanvas(PdfDocument pdfDocument, Rectangle imageSize, IList
- /// Creates a new PDF document using provided properties, adds images with
- /// recognized text.
- ///
- ///
- /// the
- ///
- /// object
- /// to write final PDF document to
- ///
- ///
- ///
- ///
- /// for PDF/A-3u document
- ///
- ///
- /// map that contains input image files as keys,
- /// and as value: map pageNumber -> text for the page
- ///
- ///
- /// result
- ///
- /// object
- ///
private PdfDocument CreatePdfDocument(PdfWriter pdfWriter, PdfOutputIntent pdfOutputIntent, IDictionary>> imagesTextData) {
+ , IDictionary>> imagesTextData, SequenceId pdfSequenceId, DocumentProperties documentProperties
+ ) {
PdfDocument pdfDocument;
bool createPdfA3u = pdfOutputIntent != null;
if (createPdfA3u) {
- pdfDocument = new PdfADocument(pdfWriter, PdfAConformanceLevel.PDF_A_3U, pdfOutputIntent, new DocumentProperties
- ().SetEventCountingMetaInfo(new PdfOcrMetaInfo()));
+ pdfDocument = new PdfADocument(pdfWriter, PdfAConformanceLevel.PDF_A_3U, pdfOutputIntent, documentProperties
+ );
}
else {
- pdfDocument = new PdfDocument(pdfWriter, new DocumentProperties().SetEventCountingMetaInfo(new PdfOcrMetaInfo
- ()));
+ pdfDocument = new PdfDocument(pdfWriter, documentProperties);
}
+ LinkDocumentIdEvent linkDocumentIdEvent = new LinkDocumentIdEvent(pdfDocument, pdfSequenceId);
+ EventManager.GetInstance().OnEvent(linkDocumentIdEvent);
// pdfLang should be set in PDF/A mode
bool hasPdfLangProperty = ocrPdfCreatorProperties.GetPdfLang() != null && !ocrPdfCreatorProperties.GetPdfLang
().Equals("");
if (createPdfA3u && !hasPdfLangProperty) {
- LOGGER.Error(MessageFormatUtil.Format(OcrException.CANNOT_CREATE_PDF_DOCUMENT, PdfOcrLogMessageConstant.PDF_LANGUAGE_PROPERTY_IS_NOT_SET
- ));
- throw new OcrException(OcrException.CANNOT_CREATE_PDF_DOCUMENT).SetMessageParams(PdfOcrLogMessageConstant.
- PDF_LANGUAGE_PROPERTY_IS_NOT_SET);
+ LOGGER.LogError(MessageFormatUtil.Format(PdfOcrExceptionMessageConstant.CANNOT_CREATE_PDF_DOCUMENT, PdfOcrLogMessageConstant
+ .PDF_LANGUAGE_PROPERTY_IS_NOT_SET));
+ throw new PdfOcrException(PdfOcrExceptionMessageConstant.CANNOT_CREATE_PDF_DOCUMENT).SetMessageParams(PdfOcrLogMessageConstant
+ .PDF_LANGUAGE_PROPERTY_IS_NOT_SET);
}
// add metadata
if (hasPdfLangProperty) {
@@ -386,6 +559,13 @@ private PdfDocument CreatePdfDocument(PdfWriter pdfWriter, PdfOutputIntent pdfOu
// reset passed font provider
ocrPdfCreatorProperties.GetFontProvider().Reset();
AddDataToPdfDocument(imagesTextData, pdfDocument, createPdfA3u);
+ // statisctics event about type of created pdf
+ if (ocrEngine is IProductAware && ((IProductAware)ocrEngine).GetProductData() != null) {
+ PdfOcrOutputType eventType = createPdfA3u ? PdfOcrOutputType.PDFA : PdfOcrOutputType.PDF;
+ PdfOcrOutputTypeStatisticsEvent docTypeStatisticsEvent = new PdfOcrOutputTypeStatisticsEvent(eventType, ((
+ IProductAware)ocrEngine).GetProductData());
+ EventManager.GetInstance().OnEvent(docTypeStatisticsEvent);
+ }
return pdfDocument;
}
@@ -406,8 +586,8 @@ private void AddDataToPdfDocument(IDictionary imageDataList = PdfCreatorUtil.GetImageData(inputImage, ocrPdfCreatorProperties.GetImageRotationHandler
());
- LOGGER.Info(MessageFormatUtil.Format(PdfOcrLogMessageConstant.NUMBER_OF_PAGES_IN_IMAGE, inputImage.ToString
- (), imageDataList.Count));
+ LOGGER.LogInformation(MessageFormatUtil.Format(PdfOcrLogMessageConstant.NUMBER_OF_PAGES_IN_IMAGE, inputImage
+ .ToString(), imageDataList.Count));
IDictionary> imageTextData = entry.Value;
if (imageTextData.Keys.Count > 0) {
for (int page = 0; page < imageDataList.Count; ++page) {
@@ -435,14 +615,14 @@ private void AddDataToPdfDocument(IDictionaryGet left bound of text chunk.
private static float GetLeft(TextInfo textInfo, float multiplier) {
- if (textInfo.GetBboxRect() == null) {
- return textInfo.GetBbox()[LEFT_IDX] * multiplier;
- }
- else {
- return textInfo.GetBboxRect().GetLeft() * multiplier;
- }
+ return textInfo.GetBboxRect().GetLeft() * multiplier;
}
/// Get right bound of text chunk.
private static float GetRight(TextInfo textInfo, float multiplier) {
- if (textInfo.GetBboxRect() == null) {
- return (textInfo.GetBbox()[RIGHT_IDX] + 1) * multiplier - 1;
- }
- else {
- return (textInfo.GetBboxRect().GetRight() + 1) * multiplier - 1;
- }
+ return (textInfo.GetBboxRect().GetRight() + 1) * multiplier - 1;
}
/// Get top bound of text chunk.
private static float GetTop(TextInfo textInfo, float multiplier) {
- if (textInfo.GetBboxRect() == null) {
- return textInfo.GetBbox()[TOP_IDX] * multiplier;
- }
- else {
- return textInfo.GetBboxRect().GetTop() * multiplier;
- }
+ return textInfo.GetBboxRect().GetTop() * multiplier;
}
/// Get bottom bound of text chunk.
private static float GetBottom(TextInfo textInfo, float multiplier) {
- if (textInfo.GetBboxRect() == null) {
- return (textInfo.GetBbox()[BOTTOM_IDX] + 1) * multiplier - 1;
- }
- else {
- return (textInfo.GetBboxRect().GetBottom() + 1) * multiplier - 1;
- }
+ return (textInfo.GetBboxRect().GetBottom() + 1) * multiplier - 1;
}
/// Check if line is not empty.
@@ -642,7 +802,7 @@ public override PdfCanvas ShowText(GlyphLine text) {
if (this.createPdfA3u) {
// exception is thrown only if PDF/A document is
// being created
- throw new OcrException(message);
+ throw new PdfOcrException(message);
}
// setting actual text to NotDef glyph
glyphLine.SetActualTextToGlyph(i, glyphLine.ToUnicodeString(i, i + 1));
@@ -656,7 +816,7 @@ public override PdfCanvas ShowText(GlyphLine text) {
}
// Warning is logged if not PDF/A document is being created
if (notDefGlyphsExists) {
- LOGGER.Warn(message);
+ LOGGER.LogWarning(message);
}
return this.ShowText(glyphLine, new ActualTextIterator(glyphLine));
}
diff --git a/itext/itext.pdfocr.api/itext/pdfocr/OcrPdfCreatorEventHelper.cs b/itext/itext.pdfocr.api/itext/pdfocr/OcrPdfCreatorEventHelper.cs
new file mode 100644
index 0000000..114d4f0
--- /dev/null
+++ b/itext/itext.pdfocr.api/itext/pdfocr/OcrPdfCreatorEventHelper.cs
@@ -0,0 +1,61 @@
+/*
+This file is part of the iText (R) project.
+Copyright (c) 1998-2021 iText Group NV
+Authors: iText Software.
+
+This program is offered under a commercial and under the AGPL license.
+For commercial licensing, contact us at https://itextpdf.com/sales. For AGPL licensing, see below.
+
+AGPL licensing:
+This program is free software: you can redistribute it and/or modify
+it under the terms of the GNU Affero General Public License as published by
+the Free Software Foundation, either version 3 of the License, or
+(at your option) any later version.
+
+This program is distributed in the hope that it will be useful,
+but WITHOUT ANY WARRANTY; without even the implied warranty of
+MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+GNU Affero General Public License for more details.
+
+You should have received a copy of the GNU Affero General Public License
+along with this program. If not, see .
+*/
+using iText.Commons.Actions;
+using iText.Commons.Actions.Confirmations;
+using iText.Commons.Actions.Contexts;
+using iText.Commons.Actions.Sequence;
+using iText.Pdfocr.Statistics;
+
+namespace iText.Pdfocr {
+ internal class OcrPdfCreatorEventHelper : AbstractPdfOcrEventHelper {
+ private readonly SequenceId sequenceId;
+
+ private readonly IMetaInfo metaInfo;
+
+ internal OcrPdfCreatorEventHelper(SequenceId sequenceId, IMetaInfo metaInfo) {
+ this.sequenceId = sequenceId;
+ this.metaInfo = metaInfo;
+ }
+
+ public override void OnEvent(AbstractProductITextEvent @event) {
+ if (@event is AbstractContextBasedITextEvent) {
+ ((AbstractContextBasedITextEvent)@event).SetMetaInfo(this.metaInfo);
+ }
+ else {
+ if (@event is PdfOcrOutputTypeStatisticsEvent) {
+ // do nothing as we would
+ return;
+ }
+ }
+ EventManager.GetInstance().OnEvent(@event);
+ }
+
+ public override SequenceId GetSequenceId() {
+ return sequenceId;
+ }
+
+ public override EventConfirmationType GetConfirmationType() {
+ return EventConfirmationType.ON_CLOSE;
+ }
+ }
+}
diff --git a/itext/itext.pdfocr.api/itext/pdfocr/OcrPdfCreatorMetaInfo.cs b/itext/itext.pdfocr.api/itext/pdfocr/OcrPdfCreatorMetaInfo.cs
deleted file mode 100644
index 4a539da..0000000
--- a/itext/itext.pdfocr.api/itext/pdfocr/OcrPdfCreatorMetaInfo.cs
+++ /dev/null
@@ -1,68 +0,0 @@
-/*
-This file is part of the iText (R) project.
-Copyright (c) 1998-2021 iText Group NV
-Authors: iText Software.
-
-This program is offered under a commercial and under the AGPL license.
-For commercial licensing, contact us at https://itextpdf.com/sales. For AGPL licensing, see below.
-
-AGPL licensing:
-This program is free software: you can redistribute it and/or modify
-it under the terms of the GNU Affero General Public License as published by
-the Free Software Foundation, either version 3 of the License, or
-(at your option) any later version.
-
-This program is distributed in the hope that it will be useful,
-but WITHOUT ANY WARRANTY; without even the implied warranty of
-MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
-GNU Affero General Public License for more details.
-
-You should have received a copy of the GNU Affero General Public License
-along with this program. If not, see .
-*/
-using System;
-using iText.Kernel.Counter.Event;
-
-namespace iText.Pdfocr {
- /// The meta info that is used internally by pdfOcr to pass a wrapped custom meta data
- public class OcrPdfCreatorMetaInfo : IMetaInfo, IMetaInfoWrapper {
- private IMetaInfo wrappedMetaInfo;
-
- private Guid uuid;
-
- private OcrPdfCreatorMetaInfo.PdfDocumentType pdfDocumentType;
-
- /// Creates an inner meta info wrapper
- /// the meta info to be wrapped
- /// a unique String which corresponds to the ocr event for which this meta info is passed
- /// a type of the document which is created during the corresponding ocr event
- public OcrPdfCreatorMetaInfo(IMetaInfo wrappedMetaInfo, Guid uuid, OcrPdfCreatorMetaInfo.PdfDocumentType pdfDocumentType
- ) {
- this.wrappedMetaInfo = wrappedMetaInfo;
- this.uuid = uuid;
- this.pdfDocumentType = pdfDocumentType;
- }
-
- /// Gets the unique String which corresponds to the ocr event for which this meta info is passed
- /// the unique String which corresponds to the ocr event for which this meta info is passed
- public virtual Guid GetDocumentId() {
- return uuid;
- }
-
- /// Gets the type of the document which is created during the corresponding ocr event
- /// the type of the document which is created during the corresponding ocr event
- public virtual OcrPdfCreatorMetaInfo.PdfDocumentType GetPdfDocumentType() {
- return pdfDocumentType;
- }
-
- public virtual IMetaInfo GetWrappedMetaInfo() {
- return wrappedMetaInfo;
- }
-
- /// The enum which represents types of documents, for which pdfOcr sends different events
- public enum PdfDocumentType {
- PDF,
- PDFA
- }
- }
-}
diff --git a/itext/itext.pdfocr.api/itext/pdfocr/OcrPdfCreatorProperties.cs b/itext/itext.pdfocr.api/itext/pdfocr/OcrPdfCreatorProperties.cs
index fdb254f..d1b8610 100644
--- a/itext/itext.pdfocr.api/itext/pdfocr/OcrPdfCreatorProperties.cs
+++ b/itext/itext.pdfocr.api/itext/pdfocr/OcrPdfCreatorProperties.cs
@@ -21,6 +21,7 @@ You should have received a copy of the GNU Affero General Public License
along with this program. If not, see .
*/
using System;
+using iText.Commons.Actions.Contexts;
using iText.Kernel.Colors;
using iText.Kernel.Geom;
using iText.Layout.Font;
@@ -113,6 +114,8 @@ public class OcrPdfCreatorProperties {
///
private IImageRotationHandler imageRotationHandler;
+ private IMetaInfo metaInfo;
+
///
/// Creates a new
///
@@ -465,5 +468,25 @@ public virtual iText.Pdfocr.OcrPdfCreatorProperties SetImageRotationHandler(IIma
this.imageRotationHandler = imageRotationDetector;
return this;
}
+
+ ///
+ /// Set meta info for this
+ /// .
+ ///
+ /// meta info
+ ///
+ /// the instance of the current
+ ///
+ ///
+ public virtual iText.Pdfocr.OcrPdfCreatorProperties SetMetaInfo(IMetaInfo metaInfo) {
+ this.metaInfo = metaInfo;
+ return this;
+ }
+
+ /// Returns meta info
+ /// meta info
+ internal virtual IMetaInfo GetMetaInfo() {
+ return metaInfo;
+ }
}
}
diff --git a/itext/itext.pdfocr.api/itext/pdfocr/OcrProcessContext.cs b/itext/itext.pdfocr.api/itext/pdfocr/OcrProcessContext.cs
new file mode 100644
index 0000000..ec65907
--- /dev/null
+++ b/itext/itext.pdfocr.api/itext/pdfocr/OcrProcessContext.cs
@@ -0,0 +1,49 @@
+/*
+This file is part of the iText (R) project.
+Copyright (c) 1998-2021 iText Group NV
+Authors: iText Software.
+
+This program is offered under a commercial and under the AGPL license.
+For commercial licensing, contact us at https://itextpdf.com/sales. For AGPL licensing, see below.
+
+AGPL licensing:
+This program is free software: you can redistribute it and/or modify
+it under the terms of the GNU Affero General Public License as published by
+the Free Software Foundation, either version 3 of the License, or
+(at your option) any later version.
+
+This program is distributed in the hope that it will be useful,
+but WITHOUT ANY WARRANTY; without even the implied warranty of
+MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+GNU Affero General Public License for more details.
+
+You should have received a copy of the GNU Affero General Public License
+along with this program. If not, see .
+*/
+namespace iText.Pdfocr {
+ /// Class for storing ocr processing context.
+ public class OcrProcessContext {
+ private AbstractPdfOcrEventHelper ocrEventHelper;
+
+ /// Creates an instance of ocr process context
+ /// helper class for working with events
+ public OcrProcessContext(AbstractPdfOcrEventHelper eventHelper) {
+ this.ocrEventHelper = eventHelper;
+ }
+
+ /// Returns helper for working with events.
+ ///
+ /// an instance of
+ ///
+ ///
+ public virtual AbstractPdfOcrEventHelper GetOcrEventHelper() {
+ return ocrEventHelper;
+ }
+
+ /// Sets ocr event helper.
+ /// event helper
+ public virtual void SetOcrEventHelper(AbstractPdfOcrEventHelper eventHelper) {
+ this.ocrEventHelper = eventHelper;
+ }
+ }
+}
diff --git a/itext/itext.pdfocr.api/itext/pdfocr/PdfCreatorUtil.cs b/itext/itext.pdfocr.api/itext/pdfocr/PdfCreatorUtil.cs
index dbc2704..bcf8279 100644
--- a/itext/itext.pdfocr.api/itext/pdfocr/PdfCreatorUtil.cs
+++ b/itext/itext.pdfocr.api/itext/pdfocr/PdfCreatorUtil.cs
@@ -23,15 +23,18 @@ You should have received a copy of the GNU Affero General Public License
using System;
using System.Collections.Generic;
using System.IO;
-using Common.Logging;
+using Microsoft.Extensions.Logging;
+using iText.Commons;
+using iText.Commons.Utils;
using iText.IO.Image;
using iText.IO.Source;
-using iText.IO.Util;
using iText.Kernel.Geom;
using iText.Layout;
using iText.Layout.Element;
using iText.Layout.Layout;
using iText.Layout.Renderer;
+using iText.Pdfocr.Exceptions;
+using iText.Pdfocr.Logs;
namespace iText.Pdfocr {
internal class PdfCreatorUtil {
@@ -42,7 +45,7 @@ internal class PdfCreatorUtil {
private const float POINTS_PER_INCH = 72.0f;
/// The logger.
- private static readonly ILog LOGGER = LogManager.GetLogger(typeof(PdfCreatorUtil));
+ private static readonly ILogger LOGGER = ITextLogManager.GetLogger(typeof(PdfCreatorUtil));
///
/// Calculates font size according to given bbox height, width and selected
@@ -83,8 +86,8 @@ internal static float CalculateFontSize(Document document, String line, String f
}
}
catch (InvalidOperationException e) {
- LOGGER.Error(PdfOcrLogMessageConstant.PROVIDED_FONT_PROVIDER_IS_INVALID);
- throw new OcrException(OcrException.CANNOT_RESOLVE_PROVIDED_FONTS, e);
+ LOGGER.LogError(PdfOcrLogMessageConstant.PROVIDED_FONT_PROVIDER_IS_INVALID);
+ throw new PdfOcrInputException(PdfOcrExceptionMessageConstant.CANNOT_RESOLVE_PROVIDED_FONTS, e);
}
return fontSize;
}
@@ -176,12 +179,12 @@ internal static IList GetImageData(FileInfo inputImage, IImageRotatio
}
}
catch (System.IO.IOException e) {
- LOGGER.Error(MessageFormatUtil.Format(PdfOcrLogMessageConstant.CANNOT_READ_INPUT_IMAGE, e.Message));
- throw new OcrException(OcrException.CANNOT_READ_INPUT_IMAGE, e);
+ LOGGER.LogError(MessageFormatUtil.Format(PdfOcrLogMessageConstant.CANNOT_READ_INPUT_IMAGE, e.Message));
+ throw new PdfOcrInputException(PdfOcrExceptionMessageConstant.CANNOT_READ_INPUT_IMAGE, e);
}
- catch (iText.IO.IOException e) {
- LOGGER.Error(MessageFormatUtil.Format(PdfOcrLogMessageConstant.CANNOT_READ_INPUT_IMAGE, e.Message));
- throw new OcrException(OcrException.CANNOT_READ_INPUT_IMAGE, e);
+ catch (iText.IO.Exceptions.IOException e) {
+ LOGGER.LogError(MessageFormatUtil.Format(PdfOcrLogMessageConstant.CANNOT_READ_INPUT_IMAGE, e.Message));
+ throw new PdfOcrInputException(PdfOcrExceptionMessageConstant.CANNOT_READ_INPUT_IMAGE, e);
}
return images;
}
diff --git a/itext/itext.pdfocr.api/itext/pdfocr/PdfOcrFontProvider.cs b/itext/itext.pdfocr.api/itext/pdfocr/PdfOcrFontProvider.cs
index 2dd729b..ffe336e 100644
--- a/itext/itext.pdfocr.api/itext/pdfocr/PdfOcrFontProvider.cs
+++ b/itext/itext.pdfocr.api/itext/pdfocr/PdfOcrFontProvider.cs
@@ -22,10 +22,13 @@ You should have received a copy of the GNU Affero General Public License
*/
using System;
using System.IO;
-using Common.Logging;
+using Microsoft.Extensions.Logging;
+using iText.Commons;
+using iText.Commons.Utils;
using iText.IO.Font;
using iText.IO.Util;
using iText.Layout.Font;
+using iText.Pdfocr.Logs;
namespace iText.Pdfocr {
public class PdfOcrFontProvider : FontProvider {
@@ -70,7 +73,7 @@ private byte[] GetDefaultFont() {
}
}
catch (System.IO.IOException e) {
- LogManager.GetLogger(GetType()).Error(MessageFormatUtil.Format(PdfOcrLogMessageConstant.CANNOT_READ_DEFAULT_FONT
+ ITextLogManager.GetLogger(GetType()).LogError(MessageFormatUtil.Format(PdfOcrLogMessageConstant.CANNOT_READ_DEFAULT_FONT
, e.Message));
return new byte[0];
}
diff --git a/itext/itext.pdfocr.api/itext/pdfocr/PdfOcrMetaInfoContainer.cs b/itext/itext.pdfocr.api/itext/pdfocr/PdfOcrMetaInfoContainer.cs
new file mode 100644
index 0000000..750cdb9
--- /dev/null
+++ b/itext/itext.pdfocr.api/itext/pdfocr/PdfOcrMetaInfoContainer.cs
@@ -0,0 +1,40 @@
+/*
+This file is part of the iText (R) project.
+Copyright (c) 1998-2021 iText Group NV
+Authors: iText Software.
+
+This program is offered under a commercial and under the AGPL license.
+For commercial licensing, contact us at https://itextpdf.com/sales. For AGPL licensing, see below.
+
+AGPL licensing:
+This program is free software: you can redistribute it and/or modify
+it under the terms of the GNU Affero General Public License as published by
+the Free Software Foundation, either version 3 of the License, or
+(at your option) any later version.
+
+This program is distributed in the hope that it will be useful,
+but WITHOUT ANY WARRANTY; without even the implied warranty of
+MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+GNU Affero General Public License for more details.
+
+You should have received a copy of the GNU Affero General Public License
+along with this program. If not, see .
+*/
+using iText.Commons.Actions.Contexts;
+
+namespace iText.Pdfocr {
+ /// Container to keep meta info.
+ public class PdfOcrMetaInfoContainer {
+ private readonly IMetaInfo metaInfo;
+
+ /// Creates instance of container to keep passed meta info.
+ /// meta info
+ public PdfOcrMetaInfoContainer(IMetaInfo metaInfo) {
+ this.metaInfo = metaInfo;
+ }
+
+ internal virtual IMetaInfo GetMetaInfo() {
+ return metaInfo;
+ }
+ }
+}
diff --git a/itext/itext.pdfocr.api/itext/pdfocr/TextInfo.cs b/itext/itext.pdfocr.api/itext/pdfocr/TextInfo.cs
index 1d0a49b..5e57fab 100644
--- a/itext/itext.pdfocr.api/itext/pdfocr/TextInfo.cs
+++ b/itext/itext.pdfocr.api/itext/pdfocr/TextInfo.cs
@@ -21,8 +21,6 @@ You should have received a copy of the GNU Affero General Public License
along with this program. If not, see .
*/
using System;
-using System.Collections.Generic;
-using iText.IO.Util;
using iText.Kernel.Geom;
namespace iText.Pdfocr {
@@ -40,16 +38,6 @@ public class TextInfo {
///
private Rectangle bboxRect;
- /// Contains 4 float coordinates: bbox parameters.
- ///
- /// Contains 4 float coordinates: bbox parameters.
- /// Alike bboxRect described by
- ///
- /// coordinates are upper-left based and expressed in pixels.
- ///
- [System.ObsoleteAttribute(@"since 1.0.1. Use bboxRect instead")]
- private IList bbox = JavaCollectionsUtil.EmptyList();
-
///
/// Creates a new
///
@@ -67,7 +55,6 @@ public TextInfo() {
public TextInfo(iText.Pdfocr.TextInfo textInfo) {
this.text = textInfo.text;
this.bboxRect = new Rectangle(textInfo.bboxRect);
- this.bbox = JavaCollectionsUtil.UnmodifiableList(textInfo.bbox);
}
///
@@ -86,48 +73,6 @@ public TextInfo(String text, Rectangle bbox) {
this.bboxRect = new Rectangle(bbox);
}
- ///
- /// Creates a new
- ///
- /// instance.
- ///
- /// any text
- ///
- ///
- ///
- /// of bbox parameters
- ///
- [System.ObsoleteAttribute(@"since 1.0.1. Use TextInfo(System.String, iText.Kernel.Geom.Rectangle) instead"
- )]
- public TextInfo(String text, IList bbox) {
- this.text = text;
- this.bbox = JavaCollectionsUtil.UnmodifiableList(bbox);
- }
-
- ///
- /// Creates a new
- ///
- /// instance.
- ///
- /// any text
- ///
- ///
- ///
- /// describing text bbox
- ///
- ///
- ///
- ///
- /// of bbox parameters
- ///
- [System.ObsoleteAttribute(@"since 1.0.1. Use TextInfo(System.String, iText.Kernel.Geom.Rectangle) instead"
- )]
- public TextInfo(String text, Rectangle bboxRect, IList bbox) {
- this.text = text;
- this.bboxRect = bboxRect;
- this.bbox = JavaCollectionsUtil.UnmodifiableList(bbox);
- }
-
/// Gets text element.
/// String
public virtual String GetText() {
@@ -158,30 +103,6 @@ public virtual Rectangle GetBboxRect() {
///
public virtual void SetBboxRect(Rectangle bbox) {
this.bboxRect = new Rectangle(bbox);
- this.bbox = JavaCollectionsUtil.EmptyList();
- }
-
- /// Gets bbox coordinates.
- ///
- ///
- ///
- /// of bbox parameters
- ///
- [System.ObsoleteAttribute(@"since 1.0.1. Use GetBboxRect() instead")]
- public virtual IList GetBbox() {
- return new List(bbox);
- }
-
- /// Sets bbox coordinates.
- ///
- ///
- ///
- /// of bbox parameters
- ///
- [System.ObsoleteAttribute(@"since 1.0.1. Use SetBboxRect(iText.Kernel.Geom.Rectangle) instead")]
- public virtual void SetBbox(IList bbox) {
- this.bbox = JavaCollectionsUtil.UnmodifiableList(bbox);
- this.bboxRect = null;
}
}
}
diff --git a/itext/itext.pdfocr.api/itext/pdfocr/events/IThreadLocalMetaInfoAware.cs b/itext/itext.pdfocr.api/itext/pdfocr/events/IThreadLocalMetaInfoAware.cs
deleted file mode 100644
index 4a851eb..0000000
--- a/itext/itext.pdfocr.api/itext/pdfocr/events/IThreadLocalMetaInfoAware.cs
+++ /dev/null
@@ -1,43 +0,0 @@
-/*
-This file is part of the iText (R) project.
-Copyright (c) 1998-2021 iText Group NV
-Authors: iText Software.
-
-This program is offered under a commercial and under the AGPL license.
-For commercial licensing, contact us at https://itextpdf.com/sales. For AGPL licensing, see below.
-
-AGPL licensing:
-This program is free software: you can redistribute it and/or modify
-it under the terms of the GNU Affero General Public License as published by
-the Free Software Foundation, either version 3 of the License, or
-(at your option) any later version.
-
-This program is distributed in the hope that it will be useful,
-but WITHOUT ANY WARRANTY; without even the implied warranty of
-MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
-GNU Affero General Public License for more details.
-
-You should have received a copy of the GNU Affero General Public License
-along with this program. If not, see .
-*/
-using iText.Kernel.Counter.Event;
-
-namespace iText.Pdfocr.Events {
- ///
- /// The interface which holds a thread local meta info,
- /// meaning different threads operate with independent and different meta infos.
- ///
- public interface IThreadLocalMetaInfoAware {
- /// Gets the meta info which is held by the interface.
- /// the held thread local meta info
- IMetaInfo GetThreadLocalMetaInfo();
-
- /// Sets a thread local meta info.
- /// a thread local meta info to be held
- ///
- /// this
- ///
- ///
- IThreadLocalMetaInfoAware SetThreadLocalMetaInfo(IMetaInfo metaInfo);
- }
-}
diff --git a/itext/itext.pdfocr.api/itext/pdfocr/OcrException.cs b/itext/itext.pdfocr.api/itext/pdfocr/exceptions/PdfOcrException.cs
similarity index 71%
rename from itext/itext.pdfocr.api/itext/pdfocr/OcrException.cs
rename to itext/itext.pdfocr.api/itext/pdfocr/exceptions/PdfOcrException.cs
index 1d4dc9b..6e8a6fe 100644
--- a/itext/itext.pdfocr.api/itext/pdfocr/OcrException.cs
+++ b/itext/itext.pdfocr.api/itext/pdfocr/exceptions/PdfOcrException.cs
@@ -22,20 +22,18 @@ You should have received a copy of the GNU Affero General Public License
*/
using System;
using System.Collections.Generic;
-using iText.IO.Util;
+using iText.Commons.Exceptions;
+using iText.Commons.Utils;
-namespace iText.Pdfocr {
+namespace iText.Pdfocr.Exceptions {
/// Exception class for custom exceptions.
- public class OcrException : Exception {
- public const String CANNOT_READ_INPUT_IMAGE = "Cannot read input image";
-
- public const String CANNOT_RESOLVE_PROVIDED_FONTS = "Cannot resolve " + "any of provided fonts. Please check provided FontProvider.";
-
- public const String CANNOT_CREATE_PDF_DOCUMENT = "Cannot create " + "PDF document: {0}";
-
+ public class PdfOcrException : ITextException {
private IList messageParams;
- /// Creates a new OcrException.
+ ///
+ /// Creates a new
+ /// .
+ ///
/// the detail message.
///
/// the cause
@@ -44,16 +42,34 @@ public class OcrException : Exception {
///
/// method).
///
- public OcrException(String msg, Exception e)
+ public PdfOcrException(String msg, Exception e)
: base(msg, e) {
}
- /// Creates a new OcrException.
+ ///
+ /// Creates a new
+ /// .
+ ///
/// the detail message.
- public OcrException(String msg)
+ public PdfOcrException(String msg)
: base(msg) {
}
+ ///
+ /// Creates a new
+ /// .
+ ///
+ ///
+ /// the cause
+ /// which is saved for later retrieval
+ /// by
+ ///
+ /// method).
+ ///
+ public PdfOcrException(Exception e)
+ : base(e) {
+ }
+
///
public override String Message {
get {
@@ -74,7 +90,7 @@ protected internal virtual Object[] GetMessageParams() {
/// Sets additional params for Exception message.
/// additional params.
/// object itself.
- public virtual iText.Pdfocr.OcrException SetMessageParams(params String[] messageParams) {
+ public virtual iText.Pdfocr.Exceptions.PdfOcrException SetMessageParams(params String[] messageParams) {
this.messageParams = JavaUtil.ArraysAsList(messageParams);
return this;
}
diff --git a/itext/itext.pdfocr.api/itext/pdfocr/exceptions/PdfOcrExceptionMessageConstant.cs b/itext/itext.pdfocr.api/itext/pdfocr/exceptions/PdfOcrExceptionMessageConstant.cs
new file mode 100644
index 0000000..e6f883d
--- /dev/null
+++ b/itext/itext.pdfocr.api/itext/pdfocr/exceptions/PdfOcrExceptionMessageConstant.cs
@@ -0,0 +1,41 @@
+/*
+This file is part of the iText (R) project.
+Copyright (c) 1998-2021 iText Group NV
+Authors: iText Software.
+
+This program is offered under a commercial and under the AGPL license.
+For commercial licensing, contact us at https://itextpdf.com/sales. For AGPL licensing, see below.
+
+AGPL licensing:
+This program is free software: you can redistribute it and/or modify
+it under the terms of the GNU Affero General Public License as published by
+the Free Software Foundation, either version 3 of the License, or
+(at your option) any later version.
+
+This program is distributed in the hope that it will be useful,
+but WITHOUT ANY WARRANTY; without even the implied warranty of
+MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+GNU Affero General Public License for more details.
+
+You should have received a copy of the GNU Affero General Public License
+along with this program. If not, see .
+*/
+using System;
+
+namespace iText.Pdfocr.Exceptions {
+ public class PdfOcrExceptionMessageConstant {
+ public const String CANNOT_READ_INPUT_IMAGE = "Cannot read input image";
+
+ public const String CANNOT_RESOLVE_PROVIDED_FONTS = "Cannot resolve any of provided fonts. Please check provided FontProvider.";
+
+ public const String CANNOT_CREATE_PDF_DOCUMENT = "Cannot create PDF document: {0}";
+
+ public const String STATISTICS_EVENT_TYPE_CANT_BE_NULL = "Statistics event type can't be null";
+
+ public const String STATISTICS_EVENT_TYPE_IS_NOT_DETECTED = "Statistics event type is not detected.";
+
+ private PdfOcrExceptionMessageConstant() {
+ }
+ //Private constructor will prevent the instantiation of this class directly
+ }
+}
diff --git a/itext/itext.pdfocr.api/itext/pdfocr/exceptions/PdfOcrInputException.cs b/itext/itext.pdfocr.api/itext/pdfocr/exceptions/PdfOcrInputException.cs
new file mode 100644
index 0000000..871f802
--- /dev/null
+++ b/itext/itext.pdfocr.api/itext/pdfocr/exceptions/PdfOcrInputException.cs
@@ -0,0 +1,68 @@
+/*
+This file is part of the iText (R) project.
+Copyright (c) 1998-2021 iText Group NV
+Authors: iText Software.
+
+This program is offered under a commercial and under the AGPL license.
+For commercial licensing, contact us at https://itextpdf.com/sales. For AGPL licensing, see below.
+
+AGPL licensing:
+This program is free software: you can redistribute it and/or modify
+it under the terms of the GNU Affero General Public License as published by
+the Free Software Foundation, either version 3 of the License, or
+(at your option) any later version.
+
+This program is distributed in the hope that it will be useful,
+but WITHOUT ANY WARRANTY; without even the implied warranty of
+MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+GNU Affero General Public License for more details.
+
+You should have received a copy of the GNU Affero General Public License
+along with this program. If not, see .
+*/
+using System;
+
+namespace iText.Pdfocr.Exceptions {
+ /// Exception class for input related exceptions.
+ public class PdfOcrInputException : PdfOcrException {
+ ///
+ /// Creates a new
+ /// .
+ ///
+ /// the detail message.
+ ///
+ /// the cause
+ /// (which is saved for later retrieval
+ /// by
+ ///
+ /// method).
+ ///
+ public PdfOcrInputException(String msg, Exception e)
+ : base(msg, e) {
+ }
+
+ ///
+ /// Creates a new
+ /// .
+ ///
+ /// the detail message.
+ public PdfOcrInputException(String msg)
+ : base(msg) {
+ }
+
+ ///
+ /// Creates a new
+ /// .
+ ///
+ ///
+ /// the cause
+ /// which is saved for later retrieval
+ /// by
+ ///
+ /// method).
+ ///
+ public PdfOcrInputException(Exception e)
+ : base(e) {
+ }
+ }
+}
diff --git a/itext/itext.pdfocr.api/itext/pdfocr/PdfOcrLogMessageConstant.cs b/itext/itext.pdfocr.api/itext/pdfocr/logs/PdfOcrLogMessageConstant.cs
similarity index 90%
rename from itext/itext.pdfocr.api/itext/pdfocr/PdfOcrLogMessageConstant.cs
rename to itext/itext.pdfocr.api/itext/pdfocr/logs/PdfOcrLogMessageConstant.cs
index c01cde3..7b9bc01 100644
--- a/itext/itext.pdfocr.api/itext/pdfocr/PdfOcrLogMessageConstant.cs
+++ b/itext/itext.pdfocr.api/itext/pdfocr/logs/PdfOcrLogMessageConstant.cs
@@ -22,12 +22,11 @@ You should have received a copy of the GNU Affero General Public License
*/
using System;
-namespace iText.Pdfocr {
+namespace iText.Pdfocr.Logs {
public class PdfOcrLogMessageConstant {
public const String CANNOT_READ_INPUT_IMAGE = "Cannot read input image {0}";
- public const String PROVIDED_FONT_PROVIDER_IS_INVALID = "Provided FontProvider is invalid. Please check that it contains "
- + "valid fonts and default font family name.";
+ public const String PROVIDED_FONT_PROVIDER_IS_INVALID = "Provided FontProvider is invalid. Please check that it contains valid fonts and default font family name.";
public const String CANNOT_READ_DEFAULT_FONT = "Cannot default read font: {0}";
@@ -37,12 +36,12 @@ public class PdfOcrLogMessageConstant {
public const String NUMBER_OF_PAGES_IN_IMAGE = "Image {0} contains {1} page(s)";
- public const String COULD_NOT_FIND_CORRESPONDING_GLYPH_TO_UNICODE_CHARACTER = "Could not find a glyph corresponding to Unicode character {0} "
- + "in any of the fonts";
+ public const String COULD_NOT_FIND_CORRESPONDING_GLYPH_TO_UNICODE_CHARACTER = "Could not find a glyph corresponding to Unicode character {0} in any of the fonts";
public const String PDF_LANGUAGE_PROPERTY_IS_NOT_SET = "PDF language property is not set";
private PdfOcrLogMessageConstant() {
}
+ //Private constructor will prevent the instantiation of this class directly
}
}
diff --git a/itext.tests/itext.pdfocr.tesseract4.tests/itext/metainfo/TestMetaInfo.cs b/itext/itext.pdfocr.api/itext/pdfocr/statistics/PdfOcrOutputType.cs
similarity index 68%
rename from itext.tests/itext.pdfocr.tesseract4.tests/itext/metainfo/TestMetaInfo.cs
rename to itext/itext.pdfocr.api/itext/pdfocr/statistics/PdfOcrOutputType.cs
index 36601e1..0a49a8c 100644
--- a/itext.tests/itext.pdfocr.tesseract4.tests/itext/metainfo/TestMetaInfo.cs
+++ b/itext/itext.pdfocr.api/itext/pdfocr/statistics/PdfOcrOutputType.cs
@@ -20,15 +20,14 @@ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
You should have received a copy of the GNU Affero General Public License
along with this program. If not, see .
*/
-using iText.Kernel.Counter.Event;
-
-namespace iText.Metainfo {
- /// This class is used for test purposes.
- ///
- /// This class is used for test purposes.
- /// Please be aware that it's put in the com.itextpdf.metainfo deliberately,
- /// so that it belongs neither to com.itextpdf.pdfocr nor com.itextpdf.pdfocr.tesseract4 packages
- ///
- public class TestMetaInfo : IMetaInfo {
+namespace iText.Pdfocr.Statistics {
+ /// pdfOcr output types for statistics.
+ public enum PdfOcrOutputType {
+ /// Processing of an image in the engine with data output
+ DATA,
+ /// Creating a PDF file
+ PDF,
+ /// Creating a PDF-A file
+ PDFA
}
}
diff --git a/itext/itext.pdfocr.api/itext/pdfocr/statistics/PdfOcrOutputTypeStatisticsAggregator.cs b/itext/itext.pdfocr.api/itext/pdfocr/statistics/PdfOcrOutputTypeStatisticsAggregator.cs
new file mode 100644
index 0000000..10779a0
--- /dev/null
+++ b/itext/itext.pdfocr.api/itext/pdfocr/statistics/PdfOcrOutputTypeStatisticsAggregator.cs
@@ -0,0 +1,114 @@
+/*
+This file is part of the iText (R) project.
+Copyright (c) 1998-2021 iText Group NV
+Authors: iText Software.
+
+This program is offered under a commercial and under the AGPL license.
+For commercial licensing, contact us at https://itextpdf.com/sales. For AGPL licensing, see below.
+
+AGPL licensing:
+This program is free software: you can redistribute it and/or modify
+it under the terms of the GNU Affero General Public License as published by
+the Free Software Foundation, either version 3 of the License, or
+(at your option) any later version.
+
+This program is distributed in the hope that it will be useful,
+but WITHOUT ANY WARRANTY; without even the implied warranty of
+MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+GNU Affero General Public License for more details.
+
+You should have received a copy of the GNU Affero General Public License
+along with this program. If not, see .
+*/
+using System;
+using System.Collections.Generic;
+using iText.Commons.Actions;
+using iText.Commons.Utils;
+using iText.Pdfocr.Exceptions;
+
+namespace iText.Pdfocr.Statistics {
+ /// Statistics aggregator which aggregates types of ocr processing.
+ internal class PdfOcrOutputTypeStatisticsAggregator : AbstractStatisticsAggregator {
+ private const String STRING_FOR_DATA = "data";
+
+ private const String STRING_FOR_PDF = "pdf";
+
+ private const String STRING_FOR_PDFA = "pdfa";
+
+ private static readonly IDictionary OCR_OUTPUT_TYPES;
+
+ static PdfOcrOutputTypeStatisticsAggregator() {
+ IDictionary temp = new Dictionary();
+ temp.Put(PdfOcrOutputType.DATA, STRING_FOR_DATA);
+ temp.Put(PdfOcrOutputType.PDF, STRING_FOR_PDF);
+ temp.Put(PdfOcrOutputType.PDFA, STRING_FOR_PDFA);
+ OCR_OUTPUT_TYPES = JavaCollectionsUtil.UnmodifiableMap(temp);
+ }
+
+ private readonly Object Lock = new Object();
+
+ private readonly IDictionary numberOfUsagesPerType = new LinkedDictionary();
+
+ /// Aggregates pdfOcr event type.
+ ///
+ ///
+ ///
+ /// instance
+ ///
+ public override void Aggregate(AbstractStatisticsEvent @event) {
+ if (!(@event is PdfOcrOutputTypeStatisticsEvent)) {
+ return;
+ }
+ // the event's properties are required to be not null
+ PdfOcrOutputType type = ((PdfOcrOutputTypeStatisticsEvent)@event).GetPdfOcrStatisticsEventType();
+ String fileTypeKey = GetKeyForType(type);
+ if (null == fileTypeKey) {
+ // this line is not expected to be reached, since an exception should have been thrown on event creation
+ throw new PdfOcrException(PdfOcrExceptionMessageConstant.STATISTICS_EVENT_TYPE_IS_NOT_DETECTED);
+ }
+ lock (Lock) {
+ long? documentsOfThisRange = numberOfUsagesPerType.Get(fileTypeKey);
+ long? currentValue = documentsOfThisRange == null ? 1L : (documentsOfThisRange + 1L);
+ numberOfUsagesPerType.Put(fileTypeKey, currentValue);
+ }
+ }
+
+ /// Retrieves Map where keys are pdfOcr event types and values are the amounts of such events.
+ ///
+ /// aggregated
+ ///
+ ///
+ public override Object RetrieveAggregation() {
+ return JavaCollectionsUtil.UnmodifiableMap(numberOfUsagesPerType);
+ }
+
+ /// Merges data about amounts of pdfOcr event types from the provided aggregator into this aggregator.
+ ///
+ ///
+ ///
+ ///
+ /// from which data will be taken.
+ ///
+ public override void Merge(AbstractStatisticsAggregator aggregator) {
+ if (!(aggregator is PdfOcrOutputTypeStatisticsAggregator)) {
+ return;
+ }
+ IDictionary otherNumberOfFiles = ((PdfOcrOutputTypeStatisticsAggregator)aggregator).numberOfUsagesPerType;
+ lock (Lock) {
+ MapUtil.Merge(this.numberOfUsagesPerType, otherNumberOfFiles, (el1, el2) => {
+ if (el2 == null) {
+ return el1;
+ }
+ else {
+ return el1 + el2;
+ }
+ }
+ );
+ }
+ }
+
+ internal static String GetKeyForType(PdfOcrOutputType type) {
+ return OCR_OUTPUT_TYPES.Get(type);
+ }
+ }
+}
diff --git a/itext/itext.pdfocr.api/itext/pdfocr/statistics/PdfOcrOutputTypeStatisticsEvent.cs b/itext/itext.pdfocr.api/itext/pdfocr/statistics/PdfOcrOutputTypeStatisticsEvent.cs
new file mode 100644
index 0000000..103a25a
--- /dev/null
+++ b/itext/itext.pdfocr.api/itext/pdfocr/statistics/PdfOcrOutputTypeStatisticsEvent.cs
@@ -0,0 +1,74 @@
+/*
+This file is part of the iText (R) project.
+Copyright (c) 1998-2021 iText Group NV
+Authors: iText Software.
+
+This program is offered under a commercial and under the AGPL license.
+For commercial licensing, contact us at https://itextpdf.com/sales. For AGPL licensing, see below.
+
+AGPL licensing:
+This program is free software: you can redistribute it and/or modify
+it under the terms of the GNU Affero General Public License as published by
+the Free Software Foundation, either version 3 of the License, or
+(at your option) any later version.
+
+This program is distributed in the hope that it will be useful,
+but WITHOUT ANY WARRANTY; without even the implied warranty of
+MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+GNU Affero General Public License for more details.
+
+You should have received a copy of the GNU Affero General Public License
+along with this program. If not, see .
+*/
+using System;
+using System.Collections.Generic;
+using iText.Commons.Actions;
+using iText.Commons.Actions.Data;
+using iText.Commons.Utils;
+using iText.Pdfocr.Exceptions;
+
+namespace iText.Pdfocr.Statistics {
+ /// Class which represents an event for specifying type of an ocr processing.
+ ///
+ /// Class which represents an event for specifying type of an ocr processing.
+ /// For internal usage only.
+ ///
+ public class PdfOcrOutputTypeStatisticsEvent : AbstractStatisticsEvent {
+ private const String OCR_OUTPUT_TYPE = "ocrOutput";
+
+ private readonly PdfOcrOutputType type;
+
+ /// Creates instance of pdfOcr statistics event.
+ /// pdfCcr output type
+ /// product data
+ public PdfOcrOutputTypeStatisticsEvent(PdfOcrOutputType type, ProductData productData)
+ : base(productData) {
+ if (type == null) {
+ throw new PdfOcrException(PdfOcrExceptionMessageConstant.STATISTICS_EVENT_TYPE_CANT_BE_NULL);
+ }
+ if (null == PdfOcrOutputTypeStatisticsAggregator.GetKeyForType(type)) {
+ throw new PdfOcrException(PdfOcrExceptionMessageConstant.STATISTICS_EVENT_TYPE_IS_NOT_DETECTED);
+ }
+ this.type = type;
+ }
+
+ ///
+ public override AbstractStatisticsAggregator CreateStatisticsAggregatorFromName(String statisticsName) {
+ if (OCR_OUTPUT_TYPE.Equals(statisticsName)) {
+ return new PdfOcrOutputTypeStatisticsAggregator();
+ }
+ return base.CreateStatisticsAggregatorFromName(statisticsName);
+ }
+
+ ///
+ public override IList GetStatisticsNames() {
+ return JavaCollectionsUtil.SingletonList(OCR_OUTPUT_TYPE);
+ }
+
+ /// Gets the type of statistic event.
+ /// the statistics event type
+ public virtual PdfOcrOutputType GetPdfOcrStatisticsEventType() {
+ return type;
+ }
+ }
+}
diff --git a/itext/itext.pdfocr.api/pdfocr-api.nuspec b/itext/itext.pdfocr.api/pdfocr-api.nuspec
index a488365..ed5bf78 100644
--- a/itext/itext.pdfocr.api/pdfocr-api.nuspec
+++ b/itext/itext.pdfocr.api/pdfocr-api.nuspec
@@ -2,7 +2,7 @@
itext7.pdfocr.api
- 1.0.3
+ 2.0.0
iText 7 pdfOcr
iText Software
iText Software
@@ -17,14 +17,14 @@
OCR PDF ligatures text glyphs iText Optical Character Recognition PDF/A ISO-compliant Tesseract open-source opensource English Mandarin Chinese Hindi Spanish French Arabic Bengali Russian Portuguese Indonesian scan image extractable data searchable diacritic sdk c# .net
-
-
+
+
-
-
+
+
diff --git a/itext/itext.pdfocr.tesseract4/Properties/AssemblyInfo.cs b/itext/itext.pdfocr.tesseract4/Properties/AssemblyInfo.cs
index f414d70..3ebb9ec 100644
--- a/itext/itext.pdfocr.tesseract4/Properties/AssemblyInfo.cs
+++ b/itext/itext.pdfocr.tesseract4/Properties/AssemblyInfo.cs
@@ -14,9 +14,9 @@
[assembly: Guid("0c4ceb00-9a56-4547-a925-5974a85a6048")]
-[assembly: AssemblyVersion("1.0.3.0")]
-[assembly: AssemblyFileVersion("1.0.3.0")]
-[assembly: AssemblyInformationalVersion("1.0.3")]
+[assembly: AssemblyVersion("2.0.0.0")]
+[assembly: AssemblyFileVersion("2.0.0.0")]
+[assembly: AssemblyInformationalVersion("2.0.0")]
[assembly: InternalsVisibleTo("itext.pdfocr.tesseract4.tests, PublicKey=" +
"00240000048000009400000006020000002400005253413100040000010001008b21ed5b3fc1c1" +
"1996390981fe22bbe71a39a9e11d3c2cefddd6ee92920fa871f9666ae0fa941af0280d0653df04" +
@@ -24,5 +24,4 @@
"009746bbdafcb75bcdbcecb7caf1f0f4b6e7d013906ba60b66eb1c8298e4efb052caf6cece4bf1" +
"816902cc")]
-[assembly: Versions.Attributes.KeyVersion("3.1.5.0")]
-[assembly: Versions.Attributes.KernelVersion("7.1.16.0")]
\ No newline at end of file
+[assembly: Versions.Attributes.KernelVersion("7.2.0.0")]
\ No newline at end of file
diff --git a/itext/itext.pdfocr.tesseract4/Properties/KeyVersionAttribute.cs b/itext/itext.pdfocr.tesseract4/Properties/KeyVersionAttribute.cs
deleted file mode 100644
index 9a8a754..0000000
--- a/itext/itext.pdfocr.tesseract4/Properties/KeyVersionAttribute.cs
+++ /dev/null
@@ -1,54 +0,0 @@
-/*
-This file is part of the iText (R) project.
-Copyright (c) 1998-2021 iText Group NV
-Authors: iText Software.
-
-This program is free software; you can redistribute it and/or modify
-it under the terms of the GNU Affero General Public License version 3
-as published by the Free Software Foundation with the addition of the
-following permission added to Section 15 as permitted in Section 7(a):
-FOR ANY PART OF THE COVERED WORK IN WHICH THE COPYRIGHT IS OWNED BY
-ITEXT GROUP. ITEXT GROUP DISCLAIMS THE WARRANTY OF NON INFRINGEMENT
-OF THIRD PARTY RIGHTS
-
-This program is distributed in the hope that it will be useful, but
-WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
-or FITNESS FOR A PARTICULAR PURPOSE.
-See the GNU Affero General Public License for more details.
-You should have received a copy of the GNU Affero General Public License
-along with this program; if not, see http://www.gnu.org/licenses or write to
-the Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
-Boston, MA, 02110-1301 USA, or download the license from the following URL:
-http://itextpdf.com/terms-of-use/
-
-The interactive user interfaces in modified source and object code versions
-of this program must display Appropriate Legal Notices, as required under
-Section 5 of the GNU Affero General Public License.
-
-In accordance with Section 7(b) of the GNU Affero General Public License,
-a covered work must retain the producer line in every PDF that is created
-or manipulated using iText.
-
-You can be released from the requirements of the license by purchasing
-a commercial license. Buying such a license is mandatory as soon as you
-develop commercial activities involving the iText software without
-disclosing the source code of your own applications.
-These activities include: offering paid services to customers as an ASP,
-serving PDFs on the fly in a web application, shipping iText with a closed
-source product.
-
-For more information, please contact iText Software Corp. at this
-address: sales@itextpdf.com
- */
-using System;
-
-namespace Versions.Attributes {
- [AttributeUsage(AttributeTargets.Assembly)]
- internal class KeyVersionAttribute : Attribute {
- internal string KeyVersion { get; private set; }
-
- internal KeyVersionAttribute(string keyVersion) {
- this.KeyVersion = keyVersion;
- }
- }
-}
diff --git a/itext/itext.pdfocr.tesseract4/itext.pdfocr.tesseract4.csproj b/itext/itext.pdfocr.tesseract4/itext.pdfocr.tesseract4.csproj
index 0866c86..fe4abf8 100644
--- a/itext/itext.pdfocr.tesseract4/itext.pdfocr.tesseract4.csproj
+++ b/itext/itext.pdfocr.tesseract4/itext.pdfocr.tesseract4.csproj
@@ -13,7 +13,7 @@
- net45
+ net461
CS1591;CS1570;CS1572;CS1573;CS1574;CS1580;CS1584;CS1658
diff --git a/itext/itext.pdfocr.tesseract4/itext/pdfocr/tesseract4/AbstractTesseract4OcrEngine.cs b/itext/itext.pdfocr.tesseract4/itext/pdfocr/tesseract4/AbstractTesseract4OcrEngine.cs
index 5046c68..a17635d 100644
--- a/itext/itext.pdfocr.tesseract4/itext/pdfocr/tesseract4/AbstractTesseract4OcrEngine.cs
+++ b/itext/itext.pdfocr.tesseract4/itext/pdfocr/tesseract4/AbstractTesseract4OcrEngine.cs
@@ -25,14 +25,19 @@ You should have received a copy of the GNU Affero General Public License
using System.IO;
using System.Text;
using System.Threading;
-using Common.Logging;
+using Microsoft.Extensions.Logging;
+using iText.Commons;
+using iText.Commons.Actions.Confirmations;
+using iText.Commons.Actions.Contexts;
+using iText.Commons.Actions.Data;
+using iText.Commons.Utils;
using iText.IO.Image;
-using iText.IO.Util;
-using iText.Kernel.Counter;
-using iText.Kernel.Counter.Event;
using iText.Pdfocr;
-using iText.Pdfocr.Events;
-using iText.Pdfocr.Tesseract4.Events;
+using iText.Pdfocr.Statistics;
+using iText.Pdfocr.Tesseract4.Actions.Data;
+using iText.Pdfocr.Tesseract4.Actions.Events;
+using iText.Pdfocr.Tesseract4.Exceptions;
+using iText.Pdfocr.Tesseract4.Logs;
namespace iText.Pdfocr.Tesseract4 {
///
@@ -47,7 +52,7 @@ namespace iText.Pdfocr.Tesseract4 {
/// Also there are possibilities to use features of "tesseract"
/// (optical character recognition engine for various operating systems).
///
- public abstract class AbstractTesseract4OcrEngine : IOcrEngine, IThreadLocalMetaInfoAware {
+ public abstract class AbstractTesseract4OcrEngine : IOcrEngine, IProductAware {
/// Supported image formats.
private static readonly ICollection SUPPORTED_IMAGE_FORMATS = JavaCollectionsUtil.UnmodifiableSet
(new HashSet(JavaUtil.ArraysAsList(ImageType.BMP, ImageType.PNG, ImageType.TIFF, ImageType.
@@ -76,7 +81,25 @@ public AbstractTesseract4OcrEngine(Tesseract4OcrEngineProperties tesseract4OcrEn
/// for tesseract
///
public virtual void DoTesseractOcr(FileInfo inputImage, FileInfo outputFile, OutputFormat outputFormat) {
- DoTesseractOcr(inputImage, JavaCollectionsUtil.SingletonList(outputFile), outputFormat, 1);
+ DoTesseractOcr(inputImage, outputFile, outputFormat, new OcrProcessContext(new Tesseract4EventHelper()));
+ }
+
+ /// Performs tesseract OCR for the first (or for the only) image page.
+ ///
+ /// input image
+ ///
+ ///
+ /// output file for the result for the first page
+ ///
+ /// selected
+ ///
+ /// for tesseract
+ ///
+ /// ocr process context
+ public virtual void DoTesseractOcr(FileInfo inputImage, FileInfo outputFile, OutputFormat outputFormat, OcrProcessContext
+ ocrProcessContext) {
+ DoTesseractOcr(inputImage, JavaCollectionsUtil.SingletonList(outputFile), outputFormat, 1, ocrProcessContext
+ .GetOcrEventHelper());
}
///
@@ -92,14 +115,52 @@ public virtual void DoTesseractOcr(FileInfo inputImage, FileInfo outputFile, Out
///
/// file to be created
public virtual void CreateTxtFile(IList inputImages, FileInfo txtFile) {
- LogManager.GetLogger(GetType()).Info(MessageFormatUtil.Format(Tesseract4LogMessageConstant.START_OCR_FOR_IMAGES
- , inputImages.Count));
- StringBuilder content = new StringBuilder();
- foreach (FileInfo inputImage in inputImages) {
- content.Append(DoImageOcr(inputImage, OutputFormat.TXT));
+ CreateTxtFile(inputImages, txtFile, new OcrProcessContext(new Tesseract4EventHelper()));
+ }
+
+ ///
+ /// Performs OCR using provided
+ ///
+ /// for the given list of
+ /// input images and saves output to a text file using provided path.
+ ///
+ ///
+ ///
+ ///
+ /// of images to be OCRed
+ ///
+ /// file to be created
+ /// ocr process context
+ public virtual void CreateTxtFile(IList inputImages, FileInfo txtFile, OcrProcessContext ocrProcessContext
+ ) {
+ ITextLogManager.GetLogger(GetType()).LogInformation(MessageFormatUtil.Format(Tesseract4LogMessageConstant.
+ START_OCR_FOR_IMAGES, inputImages.Count));
+ AbstractPdfOcrEventHelper storedEventHelper;
+ if (ocrProcessContext.GetOcrEventHelper() == null) {
+ storedEventHelper = new Tesseract4EventHelper();
+ }
+ else {
+ storedEventHelper = ocrProcessContext.GetOcrEventHelper();
+ }
+ PdfOcrTesseract4ProductEvent @event = PdfOcrTesseract4ProductEvent.CreateProcessImageEvent(storedEventHelper
+ .GetSequenceId(), null, storedEventHelper.GetConfirmationType());
+ storedEventHelper.OnEvent(@event);
+ try {
+ // set Tesseract4FileResultEventHelper
+ ocrProcessContext.SetOcrEventHelper(new Tesseract4FileResultEventHelper(storedEventHelper));
+ StringBuilder content = new StringBuilder();
+ foreach (FileInfo inputImage in inputImages) {
+ content.Append(DoImageOcr(inputImage, OutputFormat.TXT, ocrProcessContext));
+ }
+ // write to file
+ TesseractHelper.WriteToTextFile(txtFile.FullName, content.ToString());
+ if (@event.GetConfirmationType() == EventConfirmationType.ON_DEMAND) {
+ storedEventHelper.OnEvent(new ConfirmEvent(@event));
+ }
+ }
+ finally {
+ ocrProcessContext.SetOcrEventHelper(storedEventHelper);
}
- // write to file
- TesseractHelper.WriteToTextFile(txtFile.FullName, content.ToString());
}
///
@@ -171,7 +232,36 @@ public String GetLanguagesAsString() {
public IDictionary> DoImageOcr(FileInfo input) {
VerifyImageFormatValidity(input);
return ((AbstractTesseract4OcrEngine.TextInfoTesseractOcrResult)ProcessInputFiles(input, OutputFormat.HOCR
- )).GetTextInfos();
+ , new Tesseract4EventHelper())).GetTextInfos();
+ }
+
+ ///
+ /// Reads data from the provided input image file and returns retrieved
+ /// data in the format described below.
+ ///
+ ///
+ /// input image
+ ///
+ ///
+ /// ocr process context
+ ///
+ ///
+ ///
+ /// where key is
+ ///
+ /// representing the number of the page and value is
+ ///
+ /// of
+ ///
+ /// elements where each
+ ///
+ /// element contains a word or a line and its 4
+ /// coordinates(bbox)
+ ///
+ public IDictionary> DoImageOcr(FileInfo input, OcrProcessContext ocrProcessContext) {
+ VerifyImageFormatValidity(input);
+ return ((AbstractTesseract4OcrEngine.TextInfoTesseractOcrResult)ProcessInputFiles(input, OutputFormat.HOCR
+ , ocrProcessContext.GetOcrEventHelper())).GetTextInfos();
}
///
@@ -187,16 +277,18 @@ public IDictionary> DoImageOcr(FileInfo input) {
///
/// result
///
+ /// ocr process context
///
/// OCR result as a
///
/// that is
/// returned after processing the given image
///
- public String DoImageOcr(FileInfo input, OutputFormat outputFormat) {
+ public String DoImageOcr(FileInfo input, OutputFormat outputFormat, OcrProcessContext ocrProcessContext) {
String result = "";
VerifyImageFormatValidity(input);
- AbstractTesseract4OcrEngine.ITesseractOcrResult processedData = ProcessInputFiles(input, outputFormat);
+ AbstractTesseract4OcrEngine.ITesseractOcrResult processedData = ProcessInputFiles(input, outputFormat, ocrProcessContext
+ .GetOcrEventHelper());
if (processedData != null) {
if (outputFormat.Equals(OutputFormat.TXT)) {
result = ((AbstractTesseract4OcrEngine.StringTesseractOcrResult)processedData).GetData();
@@ -220,6 +312,29 @@ public String DoImageOcr(FileInfo input, OutputFormat outputFormat) {
return result;
}
+ ///
+ /// Reads data from the provided input image file and returns retrieved
+ /// data as string.
+ ///
+ ///
+ /// input image
+ ///
+ ///
+ ///
+ /// return
+ ///
+ /// result
+ ///
+ ///
+ /// OCR result as a
+ ///
+ /// that is
+ /// returned after processing the given image
+ ///
+ public String DoImageOcr(FileInfo input, OutputFormat outputFormat) {
+ return DoImageOcr(input, outputFormat, new OcrProcessContext(new Tesseract4EventHelper()));
+ }
+
/// Checks current os type.
/// boolean true is current os is windows, otherwise - false
public virtual bool IsWindows() {
@@ -251,29 +366,27 @@ public virtual void ValidateLanguages(IList languagesList) {
if (languagesList.Count == 0) {
if (!new FileInfo(GetTessData() + System.IO.Path.DirectorySeparatorChar + GetTesseract4OcrEngineProperties
().GetDefaultLanguage() + suffix).Exists) {
- throw new Tesseract4OcrException(Tesseract4OcrException.INCORRECT_LANGUAGE).SetMessageParams(GetTesseract4OcrEngineProperties
- ().GetDefaultLanguage() + suffix, GetTessData());
+ throw new PdfOcrInputTesseract4Exception(PdfOcrTesseract4ExceptionMessageConstant.INCORRECT_LANGUAGE).SetMessageParams
+ (GetTesseract4OcrEngineProperties().GetDefaultLanguage() + suffix, GetTessData());
}
}
else {
foreach (String lang in languagesList) {
if (!new FileInfo(GetTessData() + System.IO.Path.DirectorySeparatorChar + lang + suffix).Exists) {
- throw new Tesseract4OcrException(Tesseract4OcrException.INCORRECT_LANGUAGE).SetMessageParams(lang + suffix
- , GetTessData());
+ throw new PdfOcrInputTesseract4Exception(PdfOcrTesseract4ExceptionMessageConstant.INCORRECT_LANGUAGE).SetMessageParams
+ (lang + suffix, GetTessData());
}
}
}
}
///
- public virtual IMetaInfo GetThreadLocalMetaInfo() {
- return threadLocalMetaInfo.Value;
+ public virtual PdfOcrMetaInfoContainer GetMetaInfoContainer() {
+ return new PdfOcrMetaInfoContainer(new Tesseract4MetaInfo());
}
- ///
- public virtual IThreadLocalMetaInfoAware SetThreadLocalMetaInfo(IMetaInfo metaInfo) {
- this.threadLocalMetaInfo.Value = metaInfo;
- return this;
+ public virtual ProductData GetProductData() {
+ return PdfOcrTesseract4ProductData.GetInstance();
}
///
@@ -307,8 +420,8 @@ public virtual IThreadLocalMetaInfoAware SetThreadLocalMetaInfo(IMetaInfo metaIn
///
/// number of page to be processed
internal virtual void DoTesseractOcr(FileInfo inputImage, IList outputFiles, OutputFormat outputFormat
- , int pageNumber) {
- DoTesseractOcr(inputImage, outputFiles, outputFormat, pageNumber, true);
+ , int pageNumber, AbstractPdfOcrEventHelper eventHelper) {
+ DoTesseractOcr(inputImage, outputFiles, outputFormat, pageNumber, true, eventHelper);
}
///
@@ -341,13 +454,10 @@ internal virtual void DoTesseractOcr(FileInfo inputImage, IList output
/// for tesseract
///
/// number of page to be processed
- ///
- /// indicates if
- ///
- /// needs to be dispatched
- ///
+ /// indicates if event needs to be dispatched
+ /// event helper
internal abstract void DoTesseractOcr(FileInfo inputImage, IList outputFiles, OutputFormat outputFormat
- , int pageNumber, bool dispatchEvent);
+ , int pageNumber, bool dispatchEvent, AbstractPdfOcrEventHelper eventHelper);
/// Gets path to provided tess data directory.
///
@@ -356,32 +466,24 @@ internal abstract void DoTesseractOcr(FileInfo inputImage, IList outpu
///
internal virtual String GetTessData() {
if (GetTesseract4OcrEngineProperties().GetPathToTessData() == null) {
- throw new Tesseract4OcrException(Tesseract4OcrException.PATH_TO_TESS_DATA_IS_NOT_SET);
+ throw new PdfOcrTesseract4Exception(PdfOcrTesseract4ExceptionMessageConstant.PATH_TO_TESS_DATA_IS_NOT_SET);
}
else {
return GetTesseract4OcrEngineProperties().GetPathToTessData().FullName;
}
}
- internal virtual void ScheduledCheck() {
- ReflectionUtils.ScheduledCheck();
+ internal virtual PdfOcrTesseract4ProductEvent OnEvent(AbstractPdfOcrEventHelper eventHelper) {
+ // usage event
+ PdfOcrTesseract4ProductEvent @event = PdfOcrTesseract4ProductEvent.CreateProcessImageEvent(eventHelper.GetSequenceId
+ (), null, eventHelper.GetConfirmationType());
+ eventHelper.OnEvent(@event);
+ return @event;
}
- internal virtual void OnEvent() {
- IMetaInfo metaInfo = this.GetThreadLocalMetaInfo();
- if (!(metaInfo is OcrPdfCreatorMetaInfo)) {
- EventCounterHandler.GetInstance().OnEvent(PdfOcrTesseract4Event.TESSERACT4_IMAGE_OCR, this.GetThreadLocalMetaInfo
- (), GetType());
- }
- else {
- Guid uuid = ((OcrPdfCreatorMetaInfo)metaInfo).GetDocumentId();
- if (!processedUUID.Contains(uuid)) {
- processedUUID.Add(uuid);
- EventCounterHandler.GetInstance().OnEvent(OcrPdfCreatorMetaInfo.PdfDocumentType.PDFA.Equals(((OcrPdfCreatorMetaInfo
- )metaInfo).GetPdfDocumentType()) ? PdfOcrTesseract4Event.TESSERACT4_IMAGE_TO_PDFA : PdfOcrTesseract4Event
- .TESSERACT4_IMAGE_TO_PDF, ((OcrPdfCreatorMetaInfo)metaInfo).GetWrappedMetaInfo(), GetType());
- }
- }
+ internal virtual void OnEventStatistics(AbstractPdfOcrEventHelper eventHelper) {
+ eventHelper.OnEvent(new PdfOcrOutputTypeStatisticsEvent(PdfOcrOutputType.DATA, PdfOcrTesseract4ProductData
+ .GetInstance()));
}
/// Reads data from the provided input image file.
@@ -396,6 +498,7 @@ internal virtual void OnEvent() {
/// by
///
///
+ /// event helper
///
///
///
@@ -406,7 +509,7 @@ internal virtual void OnEvent() {
/// if the output format is HOCR
///
private AbstractTesseract4OcrEngine.ITesseractOcrResult ProcessInputFiles(FileInfo input, OutputFormat outputFormat
- ) {
+ , AbstractPdfOcrEventHelper eventHelper) {
IDictionary> imageData = new LinkedDictionary>();
StringBuilder data = new StringBuilder();
IList tempFiles = new List();
@@ -423,7 +526,7 @@ private AbstractTesseract4OcrEngine.ITesseractOcrResult ProcessInputFiles(FileIn
for (int i = 0; i < numOfFiles; i++) {
tempFiles.Add(CreateTempFile(extension));
}
- DoTesseractOcr(input, tempFiles, outputFormat, page);
+ DoTesseractOcr(input, tempFiles, outputFormat, page, true, eventHelper);
if (outputFormat.Equals(OutputFormat.HOCR)) {
IList tempTxtFiles = null;
if (GetTesseract4OcrEngineProperties().IsUseTxtToImproveHocrParsing()) {
@@ -431,7 +534,7 @@ private AbstractTesseract4OcrEngine.ITesseractOcrResult ProcessInputFiles(FileIn
for (int i = 0; i < numOfFiles; i++) {
tempTxtFiles.Add(CreateTempFile(".txt"));
}
- DoTesseractOcr(input, tempTxtFiles, OutputFormat.TXT, page, false);
+ DoTesseractOcr(input, tempTxtFiles, OutputFormat.TXT, page, false, eventHelper);
}
IDictionary> pageData = TesseractHelper.ParseHocrFile(tempFiles, tempTxtFiles, GetTesseract4OcrEngineProperties
());
@@ -454,7 +557,7 @@ private AbstractTesseract4OcrEngine.ITesseractOcrResult ProcessInputFiles(FileIn
}
}
catch (System.IO.IOException e) {
- LogManager.GetLogger(GetType()).Error(MessageFormatUtil.Format(Tesseract4LogMessageConstant.CANNOT_OCR_INPUT_FILE
+ ITextLogManager.GetLogger(GetType()).LogError(MessageFormatUtil.Format(Tesseract4LogMessageConstant.CANNOT_OCR_INPUT_FILE
, e.Message));
}
finally {
@@ -495,10 +598,10 @@ private void VerifyImageFormatValidity(FileInfo image) {
ImageType type = ImagePreprocessingUtil.GetImageType(image);
bool isValid = SUPPORTED_IMAGE_FORMATS.Contains(type);
if (!isValid) {
- LogManager.GetLogger(GetType()).Error(MessageFormatUtil.Format(Tesseract4LogMessageConstant.CANNOT_READ_INPUT_IMAGE
+ ITextLogManager.GetLogger(GetType()).LogError(MessageFormatUtil.Format(Tesseract4LogMessageConstant.CANNOT_READ_INPUT_IMAGE
, image.FullName));
- throw new Tesseract4OcrException(Tesseract4OcrException.INCORRECT_INPUT_IMAGE_FORMAT).SetMessageParams(image
- .Name);
+ throw new PdfOcrInputTesseract4Exception(PdfOcrTesseract4ExceptionMessageConstant.INCORRECT_INPUT_IMAGE_FORMAT
+ ).SetMessageParams(image.Name);
}
}
diff --git a/itext/itext.pdfocr.tesseract4/itext/pdfocr/tesseract4/ImagePreprocessingUtil.cs b/itext/itext.pdfocr.tesseract4/itext/pdfocr/tesseract4/ImagePreprocessingUtil.cs
index 856b043..76a7021 100644
--- a/itext/itext.pdfocr.tesseract4/itext/pdfocr/tesseract4/ImagePreprocessingUtil.cs
+++ b/itext/itext.pdfocr.tesseract4/itext/pdfocr/tesseract4/ImagePreprocessingUtil.cs
@@ -22,11 +22,15 @@ You should have received a copy of the GNU Affero General Public License
*/
using System;
using System.IO;
-using Common.Logging;
+using Microsoft.Extensions.Logging;
using Tesseract;
+using iText.Commons;
+using iText.Commons.Utils;
using iText.IO.Image;
using iText.IO.Source;
using iText.IO.Util;
+using iText.Pdfocr.Tesseract4.Exceptions;
+using iText.Pdfocr.Tesseract4.Logs;
namespace iText.Pdfocr.Tesseract4 {
/// Utilities class to work with images.
@@ -83,10 +87,10 @@ internal static ImageType GetImageType(FileInfo inputImage) {
}
catch (Exception e) {
// NOSONAR
- LogManager.GetLogger(typeof(iText.Pdfocr.Tesseract4.ImagePreprocessingUtil)).Error(MessageFormatUtil.Format
- (Tesseract4LogMessageConstant.CANNOT_READ_INPUT_IMAGE, e.Message));
- throw new Tesseract4OcrException(Tesseract4OcrException.CANNOT_READ_PROVIDED_IMAGE).SetMessageParams(inputImage
- .FullName);
+ ITextLogManager.GetLogger(typeof(iText.Pdfocr.Tesseract4.ImagePreprocessingUtil)).LogError(MessageFormatUtil
+ .Format(Tesseract4LogMessageConstant.CANNOT_READ_INPUT_IMAGE, e.Message));
+ throw new PdfOcrInputTesseract4Exception(PdfOcrTesseract4ExceptionMessageConstant.CANNOT_READ_PROVIDED_IMAGE
+ ).SetMessageParams(inputImage.FullName);
}
return type;
}
@@ -158,8 +162,8 @@ internal static Pix PreprocessImage(FileInfo inputFile, int pageNumber, ImagePre
pix = TesseractOcrUtil.ReadPix(inputFile);
}
if (pix == null) {
- throw new Tesseract4OcrException(Tesseract4OcrException.CANNOT_READ_PROVIDED_IMAGE).SetMessageParams(inputFile
- .FullName);
+ throw new PdfOcrInputTesseract4Exception(PdfOcrTesseract4ExceptionMessageConstant.CANNOT_READ_PROVIDED_IMAGE
+ ).SetMessageParams(inputFile.FullName);
}
return TesseractOcrUtil.PreprocessPix(pix, imagePreprocessingOptions);
}
@@ -190,12 +194,12 @@ internal static System.Drawing.Bitmap ReadImage(FileInfo inputImage) {
bufferedImage = iText.Pdfocr.Tesseract4.ImagePreprocessingUtil.ReadImageFromFile(inputImage);
}
catch (ArgumentException ex) {
- LogManager.GetLogger(typeof(iText.Pdfocr.Tesseract4.ImagePreprocessingUtil)).Info(MessageFormatUtil.Format
- (Tesseract4LogMessageConstant.CANNOT_CREATE_BUFFERED_IMAGE, ex.Message));
+ ITextLogManager.GetLogger(typeof(iText.Pdfocr.Tesseract4.ImagePreprocessingUtil)).LogInformation(MessageFormatUtil
+ .Format(Tesseract4LogMessageConstant.CANNOT_CREATE_BUFFERED_IMAGE, ex.Message));
}
catch (System.IO.IOException ex) {
- LogManager.GetLogger(typeof(iText.Pdfocr.Tesseract4.ImagePreprocessingUtil)).Info(MessageFormatUtil.Format
- (Tesseract4LogMessageConstant.CANNOT_CREATE_BUFFERED_IMAGE, ex.Message));
+ ITextLogManager.GetLogger(typeof(iText.Pdfocr.Tesseract4.ImagePreprocessingUtil)).LogInformation(MessageFormatUtil
+ .Format(Tesseract4LogMessageConstant.CANNOT_CREATE_BUFFERED_IMAGE, ex.Message));
}
if (bufferedImage == null) {
try {
@@ -203,8 +207,8 @@ internal static System.Drawing.Bitmap ReadImage(FileInfo inputImage) {
);
}
catch (System.IO.IOException ex) {
- LogManager.GetLogger(typeof(iText.Pdfocr.Tesseract4.ImagePreprocessingUtil)).Info(MessageFormatUtil.Format
- (Tesseract4LogMessageConstant.CANNOT_READ_INPUT_IMAGE, ex.Message));
+ ITextLogManager.GetLogger(typeof(iText.Pdfocr.Tesseract4.ImagePreprocessingUtil)).LogInformation(MessageFormatUtil
+ .Format(Tesseract4LogMessageConstant.CANNOT_READ_INPUT_IMAGE, ex.Message));
}
}
return bufferedImage;
diff --git a/itext/itext.pdfocr.tesseract4/itext/pdfocr/tesseract4/ReflectionUtils.cs b/itext/itext.pdfocr.tesseract4/itext/pdfocr/tesseract4/ReflectionUtils.cs
deleted file mode 100644
index 7ec8b0f..0000000
--- a/itext/itext.pdfocr.tesseract4/itext/pdfocr/tesseract4/ReflectionUtils.cs
+++ /dev/null
@@ -1,137 +0,0 @@
-/*
-
-This file is part of the iText (R) project.
- Copyright (c) 1998-2021 iText Group NV
-Authors: Bruno Lowagie, Paulo Soares, et al.
-
-This program is free software; you can redistribute it and/or modify
-it under the terms of the GNU Affero General Public License version 3
-as published by the Free Software Foundation with the addition of the
-following permission added to Section 15 as permitted in Section 7(a):
-FOR ANY PART OF THE COVERED WORK IN WHICH THE COPYRIGHT IS OWNED BY
-ITEXT GROUP. ITEXT GROUP DISCLAIMS THE WARRANTY OF NON INFRINGEMENT
-OF THIRD PARTY RIGHTS
-
-This program is distributed in the hope that it will be useful, but
-WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
-or FITNESS FOR A PARTICULAR PURPOSE.
-See the GNU Affero General Public License for more details.
-You should have received a copy of the GNU Affero General Public License
-along with this program; if not, see http://www.gnu.org/licenses or write to
-the Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
-Boston, MA, 02110-1301 USA, or download the license from the following URL:
-http://itextpdf.com/terms-of-use/
-
-The interactive user interfaces in modified source and object code versions
-of this program must display Appropriate Legal Notices, as required under
-Section 5 of the GNU Affero General Public License.
-
-In accordance with Section 7(b) of the GNU Affero General Public License,
-a covered work must retain the producer line in every PDF that is created
-or manipulated using iText.
-
-You can be released from the requirements of the license by purchasing
-a commercial license. Buying such a license is mandatory as soon as you
-develop commercial activities involving the iText software without
-disclosing the source code of your own applications.
-These activities include: offering paid services to customers as an ASP,
-serving PDFs on the fly in a web application, shipping iText with a closed
-source product.
-
-For more information, please contact iText Software Corp. at this
-address: sales@itextpdf.com
-*/
-using System;
-using System.Collections;
-using System.Collections.Generic;
-using System.IO;
-using System.Reflection;
-using Common.Logging;
-using iText.IO.Util;
-using iText.Kernel.Counter;
-using Versions.Attributes;
-
-namespace iText.Pdfocr.Tesseract4 {
- public sealed class ReflectionUtils {
-
- private const String NO_PDFOCR_TESSERACT4 = "No license loaded for product pdfOcr-Tesseract4. Please use LicenseKey.loadLicense(...) to load one.";
-
- private ReflectionUtils() {
- }
-
- public static void ScheduledCheck() {
- try {
- String licenseKeyClassName = "iText.License.LicenseKey, itext.licensekey";
- String licenseKeyProductClassName = "iText.License.LicenseKeyProduct, itext.licensekey";
- String checkLicenseKeyMethodName = "ScheduledCheck";
- Type licenseKeyClass = GetLicenseKeyClass(licenseKeyClassName);
- if (licenseKeyClass != null)
- {
- Type licenseKeyProductClass = GetLicenseKeyClass(licenseKeyProductClassName);
- object[] objects = new object[]
- {
- PdfOcrTesseract4ProductInfo.PRODUCT_NAME,
- PdfOcrTesseract4ProductInfo.MAJOR_VERSION.ToString(),
- PdfOcrTesseract4ProductInfo.MINOR_VERSION.ToString()
- };
- Object productObject = System.Activator.CreateInstance(licenseKeyProductClass, objects);
- MethodInfo m = licenseKeyClass.GetMethod(checkLicenseKeyMethodName);
- m.Invoke(System.Activator.CreateInstance(licenseKeyClass), new object[] { productObject });
- }
- }
- catch (Exception e) {
- if (null != e && null != e.InnerException) {
- String message = e.InnerException.Message;
- if (NO_PDFOCR_TESSERACT4.Equals(message)) {
- throw new Exception(message, e.InnerException);
- }
- }
- if (!iText.Kernel.Version.IsAGPLVersion()) {
- throw;
- }
- }
- }
-
- private static Type GetLicenseKeyClass(string className)
- {
- String licenseKeyClassFullName = null;
- Assembly assembly = typeof(ReflectionUtils).GetAssembly();
- Attribute keyVersionAttr = assembly.GetCustomAttribute(typeof(KeyVersionAttribute));
- if (keyVersionAttr is KeyVersionAttribute)
- {
- String keyVersion = ((KeyVersionAttribute)keyVersionAttr).KeyVersion;
- String format = "{0}, Version={1}, Culture=neutral, PublicKeyToken=8354ae6d2174ddca";
- licenseKeyClassFullName = String.Format(format, className, keyVersion);
- }
- Type type = null;
- if (licenseKeyClassFullName != null)
- {
- String fileLoadExceptionMessage = null;
- try
- {
- type = System.Type.GetType(licenseKeyClassFullName);
- }
- catch (FileLoadException fileLoadException)
- {
- fileLoadExceptionMessage = fileLoadException.Message;
- }
- if (type == null)
- {
- try
- {
- type = System.Type.GetType(className);
- }
- catch
- {
- // empty
- }
- if (type == null && fileLoadExceptionMessage != null)
- {
- LogManager.GetLogger(typeof(ReflectionUtils)).Error(fileLoadExceptionMessage);
- }
- }
- }
- return type;
- }
- }
-}
diff --git a/itext.tests/itext.pdfocr.tesseract4.tests/itext/pdfocr/events/EventCountingExecutableTest.cs b/itext/itext.pdfocr.tesseract4/itext/pdfocr/tesseract4/Tesseract4EventHelper.cs
similarity index 52%
rename from itext.tests/itext.pdfocr.tesseract4.tests/itext/pdfocr/events/EventCountingExecutableTest.cs
rename to itext/itext.pdfocr.tesseract4/itext/pdfocr/tesseract4/Tesseract4EventHelper.cs
index bbc6be9..4b33f96 100644
--- a/itext.tests/itext.pdfocr.tesseract4.tests/itext/pdfocr/events/EventCountingExecutableTest.cs
+++ b/itext/itext.pdfocr.tesseract4/itext/pdfocr/tesseract4/Tesseract4EventHelper.cs
@@ -20,28 +20,31 @@ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
You should have received a copy of the GNU Affero General Public License
along with this program. If not, see .
*/
-using System;
-using System.IO;
-using iText.IO.Util;
+using iText.Commons.Actions;
+using iText.Commons.Actions.Confirmations;
+using iText.Commons.Actions.Sequence;
using iText.Pdfocr;
-using iText.Pdfocr.Tesseract4;
-using iText.Test.Attributes;
-namespace iText.Pdfocr.Events {
- public class EventCountingExecutableTest : EventCountingTest {
- public EventCountingExecutableTest()
- : base(IntegrationTestHelper.ReaderType.EXECUTABLE) {
+namespace iText.Pdfocr.Tesseract4 {
+ /// Helper class for working with events.
+ internal class Tesseract4EventHelper : AbstractPdfOcrEventHelper {
+ internal Tesseract4EventHelper() {
}
- [NUnit.Framework.Test]
- [LogMessage(Tesseract4LogMessageConstant.CANNOT_READ_INPUT_IMAGE)]
- public override void TestEventCountingCustomMetaInfoError() {
- String imgPath = new FileInfo(TEST_IMAGES_DIRECTORY + "numbers_101.jpg").FullName;
- NUnit.Framework.Assert.That(() => {
- base.TestEventCountingCustomMetaInfoError();
+ // do nothing
+ public override void OnEvent(AbstractProductITextEvent @event) {
+ if (@event is AbstractContextBasedITextEvent) {
+ ((AbstractContextBasedITextEvent)@event).SetMetaInfo(new Tesseract4MetaInfo());
}
- , NUnit.Framework.Throws.InstanceOf().With.Message.EqualTo(MessageFormatUtil.Format(Tesseract4LogMessageConstant.CANNOT_READ_INPUT_IMAGE, imgPath)))
-;
+ EventManager.GetInstance().OnEvent(@event);
+ }
+
+ public override SequenceId GetSequenceId() {
+ return new SequenceId();
+ }
+
+ public override EventConfirmationType GetConfirmationType() {
+ return EventConfirmationType.ON_DEMAND;
}
}
}
diff --git a/itext/itext.pdfocr.tesseract4/itext/pdfocr/tesseract4/Tesseract4ExecutableOcrEngine.cs b/itext/itext.pdfocr.tesseract4/itext/pdfocr/tesseract4/Tesseract4ExecutableOcrEngine.cs
index 21c941f..16a2803 100644
--- a/itext/itext.pdfocr.tesseract4/itext/pdfocr/tesseract4/Tesseract4ExecutableOcrEngine.cs
+++ b/itext/itext.pdfocr.tesseract4/itext/pdfocr/tesseract4/Tesseract4ExecutableOcrEngine.cs
@@ -24,9 +24,15 @@ You should have received a copy of the GNU Affero General Public License
using System.Collections.Generic;
using System.IO;
using System.Security;
-using Common.Logging;
+using Microsoft.Extensions.Logging;
using Tesseract;
-using iText.IO.Util;
+using iText.Commons;
+using iText.Commons.Actions.Confirmations;
+using iText.Commons.Utils;
+using iText.Pdfocr;
+using iText.Pdfocr.Tesseract4.Actions.Events;
+using iText.Pdfocr.Tesseract4.Exceptions;
+using iText.Pdfocr.Tesseract4.Logs;
namespace iText.Pdfocr.Tesseract4 {
///
@@ -121,23 +127,27 @@ public void SetPathToExecutable(String path) {
/// for tesseract
///
/// number of page to be processed
- ///
- /// indicates if
- ///
- /// needs to be dispatched
- ///
+ /// indicates if event needs to be dispatched
+ /// event helper
internal override void DoTesseractOcr(FileInfo inputImage, IList outputFiles, OutputFormat outputFormat
- , int pageNumber, bool dispatchEvent) {
- ScheduledCheck();
+ , int pageNumber, bool dispatchEvent, AbstractPdfOcrEventHelper eventHelper) {
IList @params = new List();
String execPath = null;
String imagePath = null;
String workingDirectory = null;
+ PdfOcrTesseract4ProductEvent @event = null;
+ if (eventHelper == null) {
+ eventHelper = new Tesseract4EventHelper();
+ }
+ if (dispatchEvent) {
+ @event = OnEvent(eventHelper);
+ }
try {
imagePath = inputImage.FullName;
// path to tesseract executable
if (GetPathToExecutable() == null || String.IsNullOrEmpty(GetPathToExecutable())) {
- throw new Tesseract4OcrException(Tesseract4OcrException.CANNOT_FIND_PATH_TO_TESSERACT_EXECUTABLE);
+ throw new PdfOcrTesseract4Exception(PdfOcrTesseract4ExceptionMessageConstant.CANNOT_FIND_PATH_TO_TESSERACT_EXECUTABLE
+ );
}
else {
if (IsWindows()) {
@@ -156,7 +166,7 @@ internal override void DoTesseractOcr(FileInfo inputImage, IList outpu
imagePath = PreprocessImage(inputImage, pageNumber);
// get the input file parent directory as working directory
// as tesseract cannot parse non ascii characters in input path
- String imageParentDir = TesseractOcrUtil.GetParentDirectory(imagePath);
+ String imageParentDir = TesseractOcrUtil.GetParentDirectoryFile(imagePath);
String replacement = IsWindows() ? "" : "/";
workingDirectory = imageParentDir.Replace("file:///", replacement).Replace("file:/", replacement);
// input file
@@ -173,15 +183,18 @@ internal override void DoTesseractOcr(FileInfo inputImage, IList outpu
AddPreserveInterwordSpaces(@params);
// set default user defined dpi
AddDefaultDpi(@params);
- if (dispatchEvent) {
- OnEvent();
- }
// run tesseract process
TesseractHelper.RunCommand(execPath, @params, workingDirectory);
+ // statistics event
+ OnEventStatistics(eventHelper);
+ // confrim on_demand event
+ if (@event != null && @event.GetConfirmationType() == EventConfirmationType.ON_DEMAND) {
+ eventHelper.OnEvent(new ConfirmEvent(@event));
+ }
}
- catch (Tesseract4OcrException e) {
- LogManager.GetLogger(GetType()).Error(e.Message);
- throw new Tesseract4OcrException(e.Message, e);
+ catch (PdfOcrTesseract4Exception e) {
+ ITextLogManager.GetLogger(GetType()).LogError(e.Message);
+ throw new PdfOcrTesseract4Exception(e.Message, e);
}
finally {
try {
@@ -190,7 +203,7 @@ internal override void DoTesseractOcr(FileInfo inputImage, IList outpu
}
}
catch (SecurityException e) {
- LogManager.GetLogger(GetType()).Error(MessageFormatUtil.Format(Tesseract4LogMessageConstant.CANNOT_DELETE_FILE
+ ITextLogManager.GetLogger(GetType()).LogError(MessageFormatUtil.Format(Tesseract4LogMessageConstant.CANNOT_DELETE_FILE
, imagePath, e.Message));
}
try {
@@ -200,7 +213,7 @@ internal override void DoTesseractOcr(FileInfo inputImage, IList outpu
}
}
catch (SecurityException e) {
- LogManager.GetLogger(GetType()).Error(MessageFormatUtil.Format(Tesseract4LogMessageConstant.CANNOT_DELETE_FILE
+ ITextLogManager.GetLogger(GetType()).LogError(MessageFormatUtil.Format(Tesseract4LogMessageConstant.CANNOT_DELETE_FILE
, GetTesseract4OcrEngineProperties().GetPathToUserWordsFile(), e.Message));
}
}
@@ -307,13 +320,13 @@ private void AddOutputFile(IList command, FileInfo outputFile, OutputFor
.FullName;
String fileName = new String(filePath.ToCharArray(), 0, filePath.IndexOf(extension, StringComparison.Ordinal
));
- LogManager.GetLogger(GetType()).Info(MessageFormatUtil.Format(Tesseract4LogMessageConstant.CREATED_TEMPORARY_FILE
- , outputFile.FullName));
+ ITextLogManager.GetLogger(GetType()).LogInformation(MessageFormatUtil.Format(Tesseract4LogMessageConstant.
+ CREATED_TEMPORARY_FILE, outputFile.FullName));
command.Add(AddQuotes(fileName));
}
catch (Exception) {
// NOSONAR
- throw new Tesseract4OcrException(Tesseract4OcrException.TESSERACT_FAILED);
+ throw new PdfOcrTesseract4Exception(PdfOcrTesseract4ExceptionMessageConstant.TESSERACT_FAILED);
}
}
@@ -364,7 +377,7 @@ private String PreprocessImage(FileInfo inputImage, int pageNumber) {
}
}
catch (System.IO.IOException e) {
- LogManager.GetLogger(GetType()).Error(MessageFormatUtil.Format(Tesseract4LogMessageConstant.CANNOT_READ_INPUT_IMAGE
+ ITextLogManager.GetLogger(GetType()).LogError(MessageFormatUtil.Format(Tesseract4LogMessageConstant.CANNOT_READ_INPUT_IMAGE
, e.Message));
}
return path;
@@ -379,8 +392,8 @@ private void CheckTesseractInstalled(String execPath) {
try {
TesseractHelper.RunCommand(execPath, JavaCollectionsUtil.SingletonList("--version"));
}
- catch (Tesseract4OcrException e) {
- throw new Tesseract4OcrException(Tesseract4OcrException.TESSERACT_NOT_FOUND, e);
+ catch (PdfOcrTesseract4Exception e) {
+ throw new PdfOcrTesseract4Exception(PdfOcrTesseract4ExceptionMessageConstant.TESSERACT_NOT_FOUND, e);
}
}
@@ -407,8 +420,8 @@ private String GetExtension(FileInfo inputImage) {
/// path to the second file
/// true if parent directories are equal, otherwise - false
private bool AreEqualParentDirectories(String firstPath, String secondPath) {
- String firstParentDir = TesseractOcrUtil.GetParentDirectory(firstPath);
- String secondParentDir = TesseractOcrUtil.GetParentDirectory(secondPath);
+ String firstParentDir = TesseractOcrUtil.GetParentDirectoryFile(firstPath);
+ String secondParentDir = TesseractOcrUtil.GetParentDirectoryFile(secondPath);
return firstParentDir != null && firstParentDir.Equals(secondParentDir);
}
}
diff --git a/itext/itext.pdfocr.tesseract4/itext/pdfocr/tesseract4/Tesseract4FileResultEventHelper.cs b/itext/itext.pdfocr.tesseract4/itext/pdfocr/tesseract4/Tesseract4FileResultEventHelper.cs
new file mode 100644
index 0000000..f81437b
--- /dev/null
+++ b/itext/itext.pdfocr.tesseract4/itext/pdfocr/tesseract4/Tesseract4FileResultEventHelper.cs
@@ -0,0 +1,67 @@
+/*
+This file is part of the iText (R) project.
+Copyright (c) 1998-2021 iText Group NV
+Authors: iText Software.
+
+This program is offered under a commercial and under the AGPL license.
+For commercial licensing, contact us at https://itextpdf.com/sales. For AGPL licensing, see below.
+
+AGPL licensing:
+This program is free software: you can redistribute it and/or modify
+it under the terms of the GNU Affero General Public License as published by
+the Free Software Foundation, either version 3 of the License, or
+(at your option) any later version.
+
+This program is distributed in the hope that it will be useful,
+but WITHOUT ANY WARRANTY; without even the implied warranty of
+MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+GNU Affero General Public License for more details.
+
+You should have received a copy of the GNU Affero General Public License
+along with this program. If not, see .
+*/
+using iText.Commons.Actions;
+using iText.Commons.Actions.Confirmations;
+using iText.Commons.Actions.Sequence;
+using iText.Pdfocr;
+using iText.Pdfocr.Tesseract4.Actions.Events;
+
+namespace iText.Pdfocr.Tesseract4 {
+ /// Helper class for working with events.
+ internal class Tesseract4FileResultEventHelper : AbstractPdfOcrEventHelper {
+ private AbstractPdfOcrEventHelper wrappedEventHelper;
+
+ internal Tesseract4FileResultEventHelper()
+ : this(null) {
+ }
+
+ internal Tesseract4FileResultEventHelper(AbstractPdfOcrEventHelper wrappedEventHelper) {
+ this.wrappedEventHelper = wrappedEventHelper == null ? new Tesseract4EventHelper() : wrappedEventHelper;
+ }
+
+ public override void OnEvent(AbstractProductITextEvent @event) {
+ if (!IsProcessImageEvent(@event) && !IsConfirmForProcessImageEvent(@event)) {
+ wrappedEventHelper.OnEvent(@event);
+ }
+ }
+
+ public override SequenceId GetSequenceId() {
+ return wrappedEventHelper.GetSequenceId();
+ }
+
+ public override EventConfirmationType GetConfirmationType() {
+ return wrappedEventHelper.GetConfirmationType();
+ }
+
+ private static bool IsProcessImageEvent(AbstractProductITextEvent @event) {
+ return @event is PdfOcrTesseract4ProductEvent && PdfOcrTesseract4ProductEvent.PROCESS_IMAGE.Equals(((PdfOcrTesseract4ProductEvent
+ )@event).GetEventType());
+ }
+
+ private static bool IsConfirmForProcessImageEvent(AbstractProductITextEvent @event) {
+ return @event is ConfirmEvent && ((ConfirmEvent)@event).GetConfirmedEvent() is PdfOcrTesseract4ProductEvent
+ && PdfOcrTesseract4ProductEvent.PROCESS_IMAGE.Equals(((ConfirmEvent)@event).GetConfirmedEvent().GetEventType
+ ());
+ }
+ }
+}
diff --git a/itext/itext.pdfocr.tesseract4/itext/pdfocr/tesseract4/Tesseract4LibOcrEngine.cs b/itext/itext.pdfocr.tesseract4/itext/pdfocr/tesseract4/Tesseract4LibOcrEngine.cs
index f0ee1b9..2163e1e 100644
--- a/itext/itext.pdfocr.tesseract4/itext/pdfocr/tesseract4/Tesseract4LibOcrEngine.cs
+++ b/itext/itext.pdfocr.tesseract4/itext/pdfocr/tesseract4/Tesseract4LibOcrEngine.cs
@@ -24,9 +24,15 @@ You should have received a copy of the GNU Affero General Public License
using System.Collections.Generic;
using System.IO;
using System.Text.RegularExpressions;
-using Common.Logging;
+using Microsoft.Extensions.Logging;
using Tesseract;
-using iText.IO.Util;
+using iText.Commons;
+using iText.Commons.Actions.Confirmations;
+using iText.Commons.Utils;
+using iText.Pdfocr;
+using iText.Pdfocr.Tesseract4.Actions.Events;
+using iText.Pdfocr.Tesseract4.Exceptions;
+using iText.Pdfocr.Tesseract4.Logs;
namespace iText.Pdfocr.Tesseract4 {
///
@@ -56,7 +62,7 @@ public class Tesseract4LibOcrEngine : AbstractTesseract4OcrEngine {
private TesseractEngine tesseractInstance = null;
/// Pattern for matching ASCII string.
- private static readonly Regex ASCII_STRING_PATTERN = iText.IO.Util.StringUtil.RegexCompile("^[\\u0000-\\u007F]*$"
+ private static readonly Regex ASCII_STRING_PATTERN = iText.Commons.Utils.StringUtil.RegexCompile("^[\\u0000-\\u007F]*$"
);
///
@@ -144,22 +150,23 @@ public virtual void InitializeTesseract(OutputFormat outputFormat) {
/// for tesseract
///
/// number of page to be processed
- ///
- /// indicates if
- ///
- /// needs to be dispatched
- ///
+ /// indicates if event needs to be dispatched
+ /// event helper
internal override void DoTesseractOcr(FileInfo inputImage, IList outputFiles, OutputFormat outputFormat
- , int pageNumber, bool dispatchEvent) {
- ScheduledCheck();
+ , int pageNumber, bool dispatchEvent, AbstractPdfOcrEventHelper eventHelper) {
+ PdfOcrTesseract4ProductEvent @event = null;
+ if (eventHelper == null) {
+ eventHelper = new Tesseract4EventHelper();
+ }
+ // usage event
+ if (dispatchEvent) {
+ @event = OnEvent(eventHelper);
+ }
try {
// check tess data path for non ASCII characters
ValidateTessDataPath(GetTessData());
ValidateLanguages(GetTesseract4OcrEngineProperties().GetLanguages());
InitializeTesseract(outputFormat);
- if (dispatchEvent) {
- OnEvent();
- }
// if preprocessing is not needed and provided image is tiff,
// the image will be paginated and separate pages will be OCRed
IList resultList = new List();
@@ -183,16 +190,20 @@ internal override void DoTesseractOcr(FileInfo inputImage, IList outpu
}
}
catch (System.IO.IOException e) {
- LogManager.GetLogger(GetType()).Error(MessageFormatUtil.Format(Tesseract4LogMessageConstant.CANNOT_WRITE_TO_FILE
- , e.Message));
- throw new Tesseract4OcrException(Tesseract4OcrException.TESSERACT_FAILED);
+ throw new PdfOcrInputTesseract4Exception(PdfOcrTesseract4ExceptionMessageConstant.CANNOT_WRITE_TO_FILE, e);
}
}
}
+ // statistics event
+ OnEventStatistics(eventHelper);
+ // confirm on_demand event
+ if (@event != null && @event.GetConfirmationType() == EventConfirmationType.ON_DEMAND) {
+ eventHelper.OnEvent(new ConfirmEvent(@event));
+ }
}
- catch (Tesseract4OcrException e) {
- LogManager.GetLogger(GetType()).Error(e.Message);
- throw new Tesseract4OcrException(e.Message, e);
+ catch (PdfOcrTesseract4Exception e) {
+ ITextLogManager.GetLogger(GetType()).LogError(e.Message);
+ throw new PdfOcrTesseract4Exception(e.Message, e);
}
finally {
if (tesseractInstance != null) {
@@ -220,9 +231,9 @@ internal override void DoTesseractOcr(FileInfo inputImage, IList outpu
/// path to tess data
///
private static void ValidateTessDataPath(String tessDataPath) {
- Matcher asciiStringMatcher = iText.IO.Util.Matcher.Match(ASCII_STRING_PATTERN, tessDataPath);
+ Matcher asciiStringMatcher = iText.Commons.Utils.Matcher.Match(ASCII_STRING_PATTERN, tessDataPath);
if (!asciiStringMatcher.Matches()) {
- throw new Tesseract4OcrException(Tesseract4OcrException.PATH_TO_TESS_DATA_DIRECTORY_CONTAINS_NON_ASCII_CHARACTERS
+ throw new PdfOcrTesseract4Exception(PdfOcrTesseract4ExceptionMessageConstant.PATH_TO_TESS_DATA_DIRECTORY_CONTAINS_NON_ASCII_CHARACTERS
);
}
}
@@ -263,8 +274,8 @@ private IList GetOcrResultForMultiPage(FileInfo inputImage, OutputFormat
}
catch (TesseractException e) {
String msg = MessageFormatUtil.Format(Tesseract4LogMessageConstant.TESSERACT_FAILED, e.Message);
- LogManager.GetLogger(GetType()).Error(msg);
- throw new Tesseract4OcrException(Tesseract4OcrException.TESSERACT_FAILED);
+ ITextLogManager.GetLogger(GetType()).LogError(msg);
+ throw new PdfOcrTesseract4Exception(PdfOcrTesseract4ExceptionMessageConstant.TESSERACT_FAILED);
}
finally {
TesseractOcrUtil.DisposeTesseractInstance(GetTesseractInstance());
@@ -305,8 +316,8 @@ private String GetOcrResultForSinglePage(FileInfo inputImage, OutputFormat outpu
}
catch (Exception e) {
// NOSONAR
- LogManager.GetLogger(GetType()).Info(MessageFormatUtil.Format(Tesseract4LogMessageConstant.CANNOT_PROCESS_IMAGE
- , e.Message));
+ ITextLogManager.GetLogger(GetType()).LogInformation(MessageFormatUtil.Format(Tesseract4LogMessageConstant.
+ CANNOT_PROCESS_IMAGE, e.Message));
}
}
if (result == null) {
@@ -317,9 +328,9 @@ private String GetOcrResultForSinglePage(FileInfo inputImage, OutputFormat outpu
}
catch (Exception e) {
// NOSONAR
- LogManager.GetLogger(GetType()).Error(MessageFormatUtil.Format(Tesseract4LogMessageConstant.TESSERACT_FAILED
+ ITextLogManager.GetLogger(GetType()).LogError(MessageFormatUtil.Format(Tesseract4LogMessageConstant.TESSERACT_FAILED
, e.Message));
- throw new Tesseract4OcrException(Tesseract4OcrException.TESSERACT_FAILED);
+ throw new PdfOcrTesseract4Exception(PdfOcrTesseract4ExceptionMessageConstant.TESSERACT_FAILED);
}
return result;
}
diff --git a/itext/itext.pdfocr.api/itext/pdfocr/PdfOcrMetaInfo.cs b/itext/itext.pdfocr.tesseract4/itext/pdfocr/tesseract4/Tesseract4MetaInfo.cs
similarity index 88%
rename from itext/itext.pdfocr.api/itext/pdfocr/PdfOcrMetaInfo.cs
rename to itext/itext.pdfocr.tesseract4/itext/pdfocr/tesseract4/Tesseract4MetaInfo.cs
index f60da53..268657f 100644
--- a/itext/itext.pdfocr.api/itext/pdfocr/PdfOcrMetaInfo.cs
+++ b/itext/itext.pdfocr.tesseract4/itext/pdfocr/tesseract4/Tesseract4MetaInfo.cs
@@ -20,9 +20,9 @@ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
You should have received a copy of the GNU Affero General Public License
along with this program. If not, see .
*/
-using iText.Kernel.Counter.Event;
+using iText.Commons.Actions.Contexts;
-namespace iText.Pdfocr {
- public class PdfOcrMetaInfo : IMetaInfo {
+namespace iText.Pdfocr.Tesseract4 {
+ internal class Tesseract4MetaInfo : IMetaInfo {
}
}
diff --git a/itext/itext.pdfocr.tesseract4/itext/pdfocr/tesseract4/Tesseract4OcrEngineProperties.cs b/itext/itext.pdfocr.tesseract4/itext/pdfocr/tesseract4/Tesseract4OcrEngineProperties.cs
index b5bc93a..e02ad00 100644
--- a/itext/itext.pdfocr.tesseract4/itext/pdfocr/tesseract4/Tesseract4OcrEngineProperties.cs
+++ b/itext/itext.pdfocr.tesseract4/itext/pdfocr/tesseract4/Tesseract4OcrEngineProperties.cs
@@ -23,9 +23,12 @@ You should have received a copy of the GNU Affero General Public License
using System;
using System.Collections.Generic;
using System.IO;
-using Common.Logging;
-using iText.IO.Util;
+using Microsoft.Extensions.Logging;
+using iText.Commons;
+using iText.Commons.Utils;
using iText.Pdfocr;
+using iText.Pdfocr.Tesseract4.Exceptions;
+using iText.Pdfocr.Tesseract4.Logs;
namespace iText.Pdfocr.Tesseract4 {
///
@@ -151,7 +154,8 @@ public FileInfo GetPathToTessData() {
///
public iText.Pdfocr.Tesseract4.Tesseract4OcrEngineProperties SetPathToTessData(FileInfo tessData) {
if (tessData == null || !FileUtil.DirectoryExists(tessData.FullName)) {
- throw new Tesseract4OcrException(Tesseract4OcrException.PATH_TO_TESS_DATA_DIRECTORY_IS_INVALID);
+ throw new PdfOcrTesseract4Exception(PdfOcrTesseract4ExceptionMessageConstant.PATH_TO_TESS_DATA_DIRECTORY_IS_INVALID
+ );
}
this.tessDataDir = tessData;
return this;
@@ -291,7 +295,7 @@ internal virtual iText.Pdfocr.Tesseract4.Tesseract4OcrEngineProperties SetUserWo
SetUserWords(language, inputStream);
}
catch (System.IO.IOException e) {
- LogManager.GetLogger(GetType()).Warn(MessageFormatUtil.Format(Tesseract4LogMessageConstant.CANNOT_USE_USER_WORDS
+ ITextLogManager.GetLogger(GetType()).LogWarning(MessageFormatUtil.Format(Tesseract4LogMessageConstant.CANNOT_USE_USER_WORDS
, e.Message));
}
}
@@ -339,8 +343,8 @@ internal virtual iText.Pdfocr.Tesseract4.Tesseract4OcrEngineProperties SetUserWo
SetLanguages(languagesList);
}
else {
- throw new Tesseract4OcrException(Tesseract4OcrException.LANGUAGE_IS_NOT_IN_THE_LIST).SetMessageParams(language
- );
+ throw new PdfOcrInputTesseract4Exception(PdfOcrTesseract4ExceptionMessageConstant.LANGUAGE_IS_NOT_IN_THE_LIST
+ ).SetMessageParams(language);
}
}
String userWordsFileName = TesseractOcrUtil.GetTempFilePath(language, "." + DEFAULT_USER_WORDS_SUFFIX);
@@ -357,7 +361,7 @@ internal virtual iText.Pdfocr.Tesseract4.Tesseract4OcrEngineProperties SetUserWo
}
catch (System.IO.IOException e) {
SetPathToUserWordsFile(null);
- LogManager.GetLogger(GetType()).Warn(MessageFormatUtil.Format(Tesseract4LogMessageConstant.CANNOT_USE_USER_WORDS
+ ITextLogManager.GetLogger(GetType()).LogWarning(MessageFormatUtil.Format(Tesseract4LogMessageConstant.CANNOT_USE_USER_WORDS
, e.Message));
}
return this;
diff --git a/itext/itext.pdfocr.tesseract4/itext/pdfocr/tesseract4/TesseractHelper.cs b/itext/itext.pdfocr.tesseract4/itext/pdfocr/tesseract4/TesseractHelper.cs
index fba8a58..c82f609 100644
--- a/itext/itext.pdfocr.tesseract4/itext/pdfocr/tesseract4/TesseractHelper.cs
+++ b/itext/itext.pdfocr.tesseract4/itext/pdfocr/tesseract4/TesseractHelper.cs
@@ -25,11 +25,15 @@ You should have received a copy of the GNU Affero General Public License
using System.IO;
using System.Linq;
using System.Security;
+using System.Text;
using System.Text.RegularExpressions;
-using Common.Logging;
-using iText.IO.Util;
+using Microsoft.Extensions.Logging;
+using iText.Commons;
+using iText.Commons.Utils;
using iText.Kernel.Geom;
using iText.Pdfocr;
+using iText.Pdfocr.Tesseract4.Exceptions;
+using iText.Pdfocr.Tesseract4.Logs;
using iText.StyledXmlParser.Jsoup.Nodes;
using iText.StyledXmlParser.Jsoup.Select;
@@ -37,17 +41,17 @@ namespace iText.Pdfocr.Tesseract4 {
/// Helper class.
public class TesseractHelper {
/// The logger.
- private static readonly ILog LOGGER = LogManager.GetLogger(typeof(iText.Pdfocr.Tesseract4.TesseractHelper)
- );
+ private static readonly ILogger LOGGER = ITextLogManager.GetLogger(typeof(iText.Pdfocr.Tesseract4.TesseractHelper
+ ));
/// Patterns for matching hOCR element bboxes.
- private static readonly Regex BBOX_PATTERN = iText.IO.Util.StringUtil.RegexCompile(".*bbox(\\s+\\d+){4}.*"
+ private static readonly Regex BBOX_PATTERN = iText.Commons.Utils.StringUtil.RegexCompile(".*bbox(\\s+\\d+){4}.*"
);
- private static readonly Regex BBOX_COORDINATE_PATTERN = iText.IO.Util.StringUtil.RegexCompile(".*\\s+(\\d+)\\s+(\\d+)\\s+(\\d+)\\s+(\\d+).*"
+ private static readonly Regex BBOX_COORDINATE_PATTERN = iText.Commons.Utils.StringUtil.RegexCompile(".*\\s+(\\d+)\\s+(\\d+)\\s+(\\d+)\\s+(\\d+).*"
);
- private static readonly Regex WCONF_PATTERN = iText.IO.Util.StringUtil.RegexCompile("^.*(x_wconf *\\d+).*$"
+ private static readonly Regex WCONF_PATTERN = iText.Commons.Utils.StringUtil.RegexCompile("^.*(x_wconf *\\d+).*$"
);
/// Size of the array containing bbox.
@@ -93,37 +97,6 @@ public class TesseractHelper {
private TesseractHelper() {
}
- ///
- /// Parses each hocr file from the provided list, retrieves text, and
- /// returns data in the format described below.
- ///
- /// list of input files
- ///
- ///
- ///
- ///
- ///
- ///
- ///
- /// where key is
- ///
- /// representing the number of the page and value is
- ///
- /// of
- ///
- /// elements where each
- ///
- /// element contains a word or a line and its 4
- /// coordinates(bbox)
- ///
- [System.ObsoleteAttribute(@"since 1.0.2. Use ParseHocrFile(System.Collections.Generic.IList{E}, System.Collections.Generic.IList{E}, Tesseract4OcrEngineProperties) instead"
- )]
- public static IDictionary> ParseHocrFile(IList inputFiles, TextPositioning
- textPositioning) {
- return ParseHocrFile(inputFiles, null, new Tesseract4OcrEngineProperties().SetTextPositioning(textPositioning
- ));
- }
-
///
/// Parses each hocr file from the provided list, retrieves text, and
/// returns data in the format described below.
@@ -171,7 +144,7 @@ internal static IDictionary> ParseHocrFile(IList
inputFile.FullName);
Elements pages = doc.GetElementsByClass(OCR_PAGE);
foreach (iText.StyledXmlParser.Jsoup.Nodes.Element page in pages) {
- String[] pageNum = iText.IO.Util.StringUtil.Split(page.Id(), PAGE_PREFIX_PATTERN);
+ String[] pageNum = iText.Commons.Utils.StringUtil.Split(page.Id(), PAGE_PREFIX_PATTERN);
int pageNumber = Convert.ToInt32(pageNum[pageNum.Length - 1], System.Globalization.CultureInfo.InvariantCulture
);
IList textData = GetTextData(page, tesseract4OcrEngineProperties, txt, unparsedBBoxes);
@@ -186,8 +159,8 @@ internal static IDictionary> ParseHocrFile(IList
}
}
foreach (iText.StyledXmlParser.Jsoup.Nodes.Node node in unparsedBBoxes.Values) {
- LOGGER.Warn(MessageFormatUtil.Format(Tesseract4LogMessageConstant.CANNOT_PARSE_NODE_BBOX, node.ToString())
- );
+ LOGGER.LogWarning(MessageFormatUtil.Format(Tesseract4LogMessageConstant.CANNOT_PARSE_NODE_BBOX, node.ToString
+ ()));
}
return imageData;
}
@@ -216,9 +189,10 @@ internal static Rectangle GetAlignedBBox(iText.StyledXmlParser.Jsoup.Nodes.Eleme
internal static Rectangle ParseBBox(iText.StyledXmlParser.Jsoup.Nodes.Node node, Rectangle pageBBox, IDictionary
unparsedBBoxes) {
IList bbox = new List();
- Matcher bboxMatcher = iText.IO.Util.Matcher.Match(BBOX_PATTERN, node.Attr(TITLE));
+ Matcher bboxMatcher = iText.Commons.Utils.Matcher.Match(BBOX_PATTERN, node.Attr(TITLE));
if (bboxMatcher.Matches()) {
- Matcher bboxCoordinateMatcher = iText.IO.Util.Matcher.Match(BBOX_COORDINATE_PATTERN, bboxMatcher.Group());
+ Matcher bboxCoordinateMatcher = iText.Commons.Utils.Matcher.Match(BBOX_COORDINATE_PATTERN, bboxMatcher.Group
+ ());
if (bboxCoordinateMatcher.Matches()) {
for (int i = 0; i < BBOX_ARRAY_SIZE; i++) {
String coord = bboxCoordinateMatcher.Group(i + 1);
@@ -293,12 +267,12 @@ internal static void DeleteFile(String pathToFile) {
}
}
catch (System.IO.IOException e) {
- LOGGER.Info(MessageFormatUtil.Format(Tesseract4LogMessageConstant.CANNOT_DELETE_FILE, pathToFile, e.Message
- ));
+ LOGGER.LogInformation(MessageFormatUtil.Format(Tesseract4LogMessageConstant.CANNOT_DELETE_FILE, pathToFile
+ , e.Message));
}
catch (SecurityException e) {
- LOGGER.Info(MessageFormatUtil.Format(Tesseract4LogMessageConstant.CANNOT_DELETE_FILE, pathToFile, e.Message
- ));
+ LOGGER.LogInformation(MessageFormatUtil.Format(Tesseract4LogMessageConstant.CANNOT_DELETE_FILE, pathToFile
+ , e.Message));
}
}
@@ -316,12 +290,12 @@ internal static void DeleteFile(String pathToFile) {
internal static String ReadTxtFile(FileInfo txtFile) {
String content = null;
try {
- content = iText.IO.Util.JavaUtil.GetStringForBytes(File.ReadAllBytes(txtFile.FullName), System.Text.Encoding
+ content = iText.Commons.Utils.JavaUtil.GetStringForBytes(File.ReadAllBytes(txtFile.FullName), System.Text.Encoding
.UTF8);
}
catch (System.IO.IOException e) {
- LOGGER.Error(MessageFormatUtil.Format(Tesseract4LogMessageConstant.CANNOT_READ_FILE, txtFile.FullName, e.Message
- ));
+ LOGGER.LogError(MessageFormatUtil.Format(Tesseract4LogMessageConstant.CANNOT_READ_FILE, txtFile.FullName,
+ e.Message));
}
return content;
}
@@ -349,7 +323,7 @@ internal static void WriteToTextFile(String path, String data) {
}
}
catch (System.IO.IOException e) {
- LOGGER.Error(MessageFormatUtil.Format(Tesseract4LogMessageConstant.CANNOT_WRITE_TO_FILE, path, e.Message));
+ throw new PdfOcrInputTesseract4Exception(PdfOcrTesseract4ExceptionMessageConstant.CANNOT_WRITE_TO_FILE, e);
}
}
@@ -377,19 +351,19 @@ internal static void RunCommand(String execPath, IList paramsList, Strin
String @params = String.Join(" ", paramsList);
bool cmdSucceeded = SystemUtil.RunProcessAndWait(execPath, @params, workingDirPath);
if (!cmdSucceeded) {
- LOGGER.Error(MessageFormatUtil.Format(Tesseract4LogMessageConstant.COMMAND_FAILED, execPath + " " + @params
+ LOGGER.LogError(MessageFormatUtil.Format(Tesseract4LogMessageConstant.COMMAND_FAILED, execPath + " " + @params
));
- throw new Tesseract4OcrException(Tesseract4OcrException.TESSERACT_FAILED);
+ throw new PdfOcrTesseract4Exception(PdfOcrTesseract4ExceptionMessageConstant.TESSERACT_FAILED);
}
}
catch (System.IO.IOException e) {
// NOSONAR
- LOGGER.Error(MessageFormatUtil.Format(Tesseract4LogMessageConstant.COMMAND_FAILED, e.Message));
- throw new Tesseract4OcrException(Tesseract4OcrException.TESSERACT_FAILED);
+ LOGGER.LogError(MessageFormatUtil.Format(Tesseract4LogMessageConstant.COMMAND_FAILED, e.Message));
+ throw new PdfOcrTesseract4Exception(PdfOcrTesseract4ExceptionMessageConstant.TESSERACT_FAILED);
}
catch (Exception e) {
- LOGGER.Error(MessageFormatUtil.Format(Tesseract4LogMessageConstant.COMMAND_FAILED, e.Message));
- throw new Tesseract4OcrException(Tesseract4OcrException.TESSERACT_FAILED);
+ LOGGER.LogError(MessageFormatUtil.Format(Tesseract4LogMessageConstant.COMMAND_FAILED, e.Message));
+ throw new PdfOcrTesseract4Exception(PdfOcrTesseract4ExceptionMessageConstant.TESSERACT_FAILED);
}
}
@@ -451,7 +425,7 @@ private static bool IsElementConfident(iText.StyledXmlParser.Jsoup.Nodes.Element
foreach (iText.StyledXmlParser.Jsoup.Nodes.Node node in lineOrCaption.ChildNodes()) {
if (node is iText.StyledXmlParser.Jsoup.Nodes.Element) {
String title = ((iText.StyledXmlParser.Jsoup.Nodes.Element)node).Attr(TITLE);
- Matcher matcher = iText.IO.Util.Matcher.Match(WCONF_PATTERN, title);
+ Matcher matcher = iText.Commons.Utils.Matcher.Match(WCONF_PATTERN, title);
if (matcher.Matches()) {
String wconf = null;
try {
@@ -461,7 +435,7 @@ private static bool IsElementConfident(iText.StyledXmlParser.Jsoup.Nodes.Element
}
//No need to do anything here
if (wconf != null) {
- wconf = iText.IO.Util.StringUtil.ReplaceAll(wconf, X_WCONF, "").Trim();
+ wconf = iText.Commons.Utils.StringUtil.ReplaceAll(wconf, X_WCONF, "").Trim();
wconfTotal += Convert.ToInt32(wconf, System.Globalization.CultureInfo.InvariantCulture);
wconfCount++;
}
@@ -485,21 +459,21 @@ private static IList GetTextDataForWords(iText.StyledXmlParser.Jsoup.N
if (txtLine == null) {
foreach (iText.StyledXmlParser.Jsoup.Nodes.Element word in lineOrCaption.GetElementsByClass(OCRX_WORD)) {
Rectangle bboxRect = GetAlignedBBox(word, textPositioning, pageBbox, unparsedBBoxes);
- AddToTextData(textData, word.Text(), bboxRect, pageBbox);
+ AddToTextData(textData, word.Text(), bboxRect);
}
}
else {
IList textInfos = new List();
- String txtLine1 = iText.IO.Util.StringUtil.ReplaceAll(txtLine, NEW_LINE_PATTERN, "");
- String txtLine2 = iText.IO.Util.StringUtil.ReplaceAll(txtLine1, SPACE_PATTERN, " ");
- String[] lineItems = iText.IO.Util.StringUtil.Split(txtLine2, " ");
+ String txtLine1 = iText.Commons.Utils.StringUtil.ReplaceAll(txtLine, NEW_LINE_PATTERN, "");
+ String txtLine2 = iText.Commons.Utils.StringUtil.ReplaceAll(txtLine1, SPACE_PATTERN, " ");
+ String[] lineItems = iText.Commons.Utils.StringUtil.Split(txtLine2, " ");
foreach (iText.StyledXmlParser.Jsoup.Nodes.Element word in lineOrCaption.GetElementsByClass(OCRX_WORD)) {
Rectangle bboxRect = GetAlignedBBox(word, textPositioning, pageBbox, unparsedBBoxes);
textInfos.Add(new TextInfo(word.Text(), bboxRect));
- if (iText.IO.Util.StringUtil.ReplaceAll(lineItems[0], NEW_LINE_OR_SPACE_PATTERN, "").Equals(iText.IO.Util.StringUtil.ReplaceAll
+ if (iText.Commons.Utils.StringUtil.ReplaceAll(lineItems[0], NEW_LINE_OR_SPACE_PATTERN, "").Equals(iText.Commons.Utils.StringUtil.ReplaceAll
(GetTextInfosText(textInfos), SPACE_PATTERN, ""))) {
lineItems = JavaUtil.ArraysCopyOfRange(lineItems, 1, lineItems.Length);
- AddToTextData(textData, MergeTextInfos(textInfos), pageBbox);
+ AddToTextData(textData, MergeTextInfos(textInfos));
textInfos.Clear();
}
}
@@ -514,37 +488,34 @@ private static IList GetTextDataForLines(iText.StyledXmlParser.Jsoup.N
IList textData = new List();
Rectangle bboxRect = GetAlignedBBox(lineOrCaption, TextPositioning.BY_LINES, pageBbox, unparsedBBoxes);
if (txtLine == null) {
- AddToTextData(textData, lineOrCaption.Text(), bboxRect, pageBbox);
+ AddToTextData(textData, lineOrCaption.Text(), bboxRect);
}
else {
- AddToTextData(textData, txtLine, bboxRect, pageBbox);
+ AddToTextData(textData, txtLine, bboxRect);
}
return textData;
}
/// Add text chunk represented by text and bbox to list of text infos.
- private static void AddToTextData(IList textData, String text, Rectangle bboxRect, Rectangle pageBbox
- ) {
- IList bbox = JavaUtil.ArraysAsList(ToPixels(bboxRect.GetLeft()), ToPixels(pageBbox.GetTop() - bboxRect
- .GetTop()), ToPixels(bboxRect.GetRight()), ToPixels(pageBbox.GetTop() - bboxRect.GetBottom()));
- TextInfo textInfo = new TextInfo(text, bboxRect, bbox);
+ private static void AddToTextData(IList textData, String text, Rectangle bboxRect) {
+ TextInfo textInfo = new TextInfo(text, bboxRect);
textData.Add(textInfo);
}
/// Add text chunk represented by text info to list of text infos.
- private static void AddToTextData(IList textData, TextInfo textInfo, Rectangle pageBbox) {
+ private static void AddToTextData(IList textData, TextInfo textInfo) {
String text = textInfo.GetText();
Rectangle bboxRect = textInfo.GetBboxRect();
- AddToTextData(textData, text, bboxRect, pageBbox);
+ AddToTextData(textData, text, bboxRect);
}
/// Gets common text for list of text infos.
private static String GetTextInfosText(IList textInfos) {
- String text = "";
+ StringBuilder text = new StringBuilder();
foreach (TextInfo textInfo in textInfos) {
- text = text + textInfo.GetText();
+ text.Append(textInfo.GetText());
}
- return text;
+ return text.ToString();
}
/// Merges text infos.
@@ -568,12 +539,12 @@ private static String FindHocrLineInTxt(iText.StyledXmlParser.Jsoup.Nodes.Elemen
if (txt == null) {
return null;
}
- String hocrLineText = iText.IO.Util.StringUtil.ReplaceAll(line.Text(), SPACE_PATTERN, "");
+ String hocrLineText = iText.Commons.Utils.StringUtil.ReplaceAll(line.Text(), SPACE_PATTERN, "");
if (String.IsNullOrEmpty(hocrLineText)) {
return null;
}
foreach (String txtLine in txt) {
- if (iText.IO.Util.StringUtil.ReplaceAll(txtLine, SPACE_PATTERN, "").Equals(hocrLineText)) {
+ if (iText.Commons.Utils.StringUtil.ReplaceAll(txtLine, SPACE_PATTERN, "").Equals(hocrLineText)) {
return txtLine;
}
}
diff --git a/itext/itext.pdfocr.tesseract4/itext/pdfocr/tesseract4/TesseractOcrUtil.cs b/itext/itext.pdfocr.tesseract4/itext/pdfocr/tesseract4/TesseractOcrUtil.cs
index 5a3341d..f4b9786 100644
--- a/itext/itext.pdfocr.tesseract4/itext/pdfocr/tesseract4/TesseractOcrUtil.cs
+++ b/itext/itext.pdfocr.tesseract4/itext/pdfocr/tesseract4/TesseractOcrUtil.cs
@@ -28,9 +28,13 @@ You should have received a copy of the GNU Affero General Public License
using System.IO;
using System.Linq;
using System.Runtime.InteropServices;
-using Common.Logging;
+using iText.Commons;
+using iText.Commons.Utils;
using iText.IO.Image;
using iText.IO.Util;
+using iText.Pdfocr.Tesseract4.Logs;
+using Microsoft.Extensions.Logging;
+using iText.Pdfocr.Tesseract4.Exceptions;
using Tesseract;
namespace iText.Pdfocr.Tesseract4 {
@@ -153,8 +157,8 @@ internal static Pix ConvertToGrayscale(Pix pix) {
}
else
{
- LogManager.GetLogger(typeof(TesseractOcrUtil))
- .Info(MessageFormatUtil.Format(Tesseract4LogMessageConstant.CANNOT_CONVERT_IMAGE_TO_GRAYSCALE, depth));
+ ITextLogManager.GetLogger(typeof(TesseractOcrUtil))
+ .LogInformation(MessageFormatUtil.Format(Tesseract4LogMessageConstant.CANNOT_CONVERT_IMAGE_TO_GRAYSCALE, depth));
return pix;
}
}
@@ -207,16 +211,16 @@ internal static Pix OtsuImageThresholding(Pix pix, ImagePreprocessingOptions ima
}
else
{
- LogManager.GetLogger(typeof(TesseractOcrUtil))
- .Info(MessageFormatUtil.Format(Tesseract4LogMessageConstant.CANNOT_BINARIZE_IMAGE, pix.Depth));
+ ITextLogManager.GetLogger(typeof(TesseractOcrUtil))
+ .LogInformation(MessageFormatUtil.Format(Tesseract4LogMessageConstant.CANNOT_BINARIZE_IMAGE, pix.Depth));
DestroyPix(thresholdPix);
return pix;
}
}
else
{
- LogManager.GetLogger(typeof(TesseractOcrUtil))
- .Info(MessageFormatUtil.Format(Tesseract4LogMessageConstant.CANNOT_BINARIZE_IMAGE, pix.Depth));
+ ITextLogManager.GetLogger(typeof(TesseractOcrUtil))
+ .LogInformation(MessageFormatUtil.Format(Tesseract4LogMessageConstant.CANNOT_BINARIZE_IMAGE, pix.Depth));
return pix;
}
}
@@ -324,7 +328,7 @@ internal static void SetTesseractProperties(TesseractEngine tesseractInstance, S
/// method. In .Net all these properties
/// are needed to be provided in tesseract constructor in order to
/// initialize tesseract instance.Thus, tesseract initialization takes
- /// place in constructor in
+ /// place in constructor in
/// java, but in .Net it happens only after all properties are validated,
/// i.e. just before OCR process.
///
@@ -355,9 +359,9 @@ internal static TesseractEngine InitializeTesseractInstance(bool isWindows, Stri
}
catch (Exception e)
{
- throw new Tesseract4OcrException(isWindows ?
- Tesseract4OcrException.TESSERACT_LIB_NOT_INSTALLED_WIN :
- Tesseract4OcrException.TESSERACT_LIB_NOT_INSTALLED, e);
+ throw new PdfOcrTesseract4Exception(isWindows ?
+ PdfOcrTesseract4ExceptionMessageConstant.TESSERACT_LIB_NOT_INSTALLED_WIN :
+ PdfOcrTesseract4ExceptionMessageConstant.TESSERACT_LIB_NOT_INSTALLED, e);
}
}
}
@@ -437,7 +441,7 @@ internal static String GetTempFilePath(string name, string suffix) {
/// Returns parent directory for the passed path.
/// path path to file
/// parent directory where the file is located
- internal static String GetParentDirectory(string path)
+ internal static String GetParentDirectoryFile(string path)
{
return Directory.GetParent(path).FullName;
}
@@ -475,8 +479,8 @@ internal void InitializeImagesListFromTiff(FileInfo inputFile)
SetListOfPages(bitmapList);
} catch (Exception e)
{
- LogManager.GetLogger(typeof(TesseractOcrUtil))
- .Error(MessageFormatUtil.Format(
+ ITextLogManager.GetLogger(typeof(TesseractOcrUtil))
+ .LogError(MessageFormatUtil.Format(
Tesseract4LogMessageConstant.CANNOT_RETRIEVE_PAGES_FROM_IMAGE,
inputFile.FullName,
e.Message));
@@ -505,8 +509,8 @@ internal static Bitmap GetImagePage(FileInfo input, int page)
int pages = image.GetFrameCount(FrameDimension.Page);
if (page >= pages)
{
- LogManager.GetLogger(typeof(TesseractOcrUtil))
- .Warn(MessageFormatUtil.Format(
+ ITextLogManager.GetLogger(typeof(TesseractOcrUtil))
+ .LogWarning(MessageFormatUtil.Format(
Tesseract4LogMessageConstant.PAGE_NUMBER_IS_INCORRECT,
page,
input.FullName));
@@ -516,8 +520,8 @@ internal static Bitmap GetImagePage(FileInfo input, int page)
img = new Bitmap(image);
} catch (Exception e)
{
- LogManager.GetLogger(typeof(TesseractOcrUtil))
- .Error(MessageFormatUtil.Format(
+ ITextLogManager.GetLogger(typeof(TesseractOcrUtil))
+ .LogError(MessageFormatUtil.Format(
Tesseract4LogMessageConstant.CANNOT_RETRIEVE_PAGES_FROM_IMAGE,
input.FullName,
e.Message));
@@ -727,7 +731,7 @@ internal static void SaveImageToTempPngFile(string tmpFileName, Bitmap image)
}
catch (Exception e)
{
- LogManager.GetLogger(typeof(TesseractOcrUtil)).Error(MessageFormatUtil.Format(
+ ITextLogManager.GetLogger(typeof(TesseractOcrUtil)).LogError(MessageFormatUtil.Format(
Tesseract4LogMessageConstant.CANNOT_PROCESS_IMAGE,
e.Message));
}
@@ -754,7 +758,7 @@ internal static void SavePixToPngFile(string filename, Pix pix)
}
catch (Exception e)
{
- LogManager.GetLogger(typeof(TesseractOcrUtil)).Info(MessageFormatUtil.Format(
+ ITextLogManager.GetLogger(typeof(TesseractOcrUtil)).LogInformation(MessageFormatUtil.Format(
Tesseract4LogMessageConstant.CANNOT_PROCESS_IMAGE,
e.Message));
}
@@ -835,7 +839,7 @@ internal static Pix ReadPix(FileInfo inputFile)
catch (Exception e)
{
// NOSONAR
- LogManager.GetLogger(typeof(TesseractOcrUtil)).Error(MessageFormatUtil.Format
+ ITextLogManager.GetLogger(typeof(TesseractOcrUtil)).LogError(MessageFormatUtil.Format
(Tesseract4LogMessageConstant.CANNOT_READ_INPUT_IMAGE, e.Message));
}
if (pix != null)
@@ -870,7 +874,7 @@ internal static Pix ReadPix(byte[] imageBytes)
catch (Exception e)
{
// NOSONAR
- LogManager.GetLogger(typeof(TesseractOcrUtil)).Error(MessageFormatUtil.Format
+ ITextLogManager.GetLogger(typeof(TesseractOcrUtil)).LogError(MessageFormatUtil.Format
(Tesseract4LogMessageConstant.CANNOT_READ_INPUT_IMAGE, e.Message));
return null;
}
@@ -895,7 +899,7 @@ internal static int DetectRotation(FileInfo inputFile)
catch (Exception e)
{
// NOSONAR
- LogManager.GetLogger(typeof(TesseractOcrUtil)).Error(MessageFormatUtil.Format
+ ITextLogManager.GetLogger(typeof(TesseractOcrUtil)).LogError(MessageFormatUtil.Format
(Tesseract4LogMessageConstant.CANNOT_READ_INPUT_IMAGE, e.Message));
return ROTATION_0;
}
@@ -935,7 +939,7 @@ internal static int DetectRotation(byte[] imageBytes)
catch (Exception e)
{
// NOSONAR
- LogManager.GetLogger(typeof(TesseractOcrUtil)).Error(MessageFormatUtil.Format
+ ITextLogManager.GetLogger(typeof(TesseractOcrUtil)).LogError(MessageFormatUtil.Format
(Tesseract4LogMessageConstant.CANNOT_READ_INPUT_IMAGE, e.Message));
return ROTATION_0;
}
@@ -970,7 +974,7 @@ internal static int ReadRotationFromMetadata(System.Drawing.Image image)
case EXIF_ROTATION_270:
return ROTATION_270;
default:
- LogManager.GetLogger(typeof(TesseractOcrUtil)).Warn(MessageFormatUtil.Format(
+ ITextLogManager.GetLogger(typeof(TesseractOcrUtil)).LogWarning(MessageFormatUtil.Format(
Tesseract4LogMessageConstant.UNSUPPORTED_EXIF_ORIENTATION_VALUE,
orientation));
return ROTATION_0;
diff --git a/itext/itext.pdfocr.tesseract4/itext/pdfocr/tesseract4/actions/data/PdfOcrTesseract4ProductData.cs b/itext/itext.pdfocr.tesseract4/itext/pdfocr/tesseract4/actions/data/PdfOcrTesseract4ProductData.cs
new file mode 100644
index 0000000..070ad1e
--- /dev/null
+++ b/itext/itext.pdfocr.tesseract4/itext/pdfocr/tesseract4/actions/data/PdfOcrTesseract4ProductData.cs
@@ -0,0 +1,56 @@
+/*
+This file is part of the iText (R) project.
+Copyright (c) 1998-2021 iText Group NV
+Authors: iText Software.
+
+This program is offered under a commercial and under the AGPL license.
+For commercial licensing, contact us at https://itextpdf.com/sales. For AGPL licensing, see below.
+
+AGPL licensing:
+This program is free software: you can redistribute it and/or modify
+it under the terms of the GNU Affero General Public License as published by
+the Free Software Foundation, either version 3 of the License, or
+(at your option) any later version.
+
+This program is distributed in the hope that it will be useful,
+but WITHOUT ANY WARRANTY; without even the implied warranty of
+MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+GNU Affero General Public License for more details.
+
+You should have received a copy of the GNU Affero General Public License
+along with this program. If not, see .
+*/
+using System;
+using iText.Commons.Actions.Data;
+
+namespace iText.Pdfocr.Tesseract4.Actions.Data {
+ ///
+ /// Stores an instance of
+ ///
+ /// related to iText pdfOcr Tesseract4 module.
+ ///
+ public class PdfOcrTesseract4ProductData {
+ private const String PDF_OCR_TESSERACT4_PRODUCT_NAME = "pdfOcr-tesseract4";
+
+ private const String PDF_OCR_TESSERACT4_PUBLIC_PRODUCT_NAME = "pdfOCR-Tesseract4";
+
+ private const String PDF_OCR_VERSION = "2.0.0";
+
+ private const int PDF_OCR_COPYRIGHT_SINCE = 2000;
+
+ private const int PDF_OCR_COPYRIGHT_TO = 2021;
+
+ private static readonly ProductData PDF_OCR_PRODUCT_DATA = new ProductData(PDF_OCR_TESSERACT4_PUBLIC_PRODUCT_NAME
+ , PDF_OCR_TESSERACT4_PRODUCT_NAME, PDF_OCR_VERSION, PDF_OCR_COPYRIGHT_SINCE, PDF_OCR_COPYRIGHT_TO);
+
+ ///
+ /// Getter for an instance of
+ ///
+ /// related to iText pdfOcr Tesseract4 module.
+ ///
+ /// iText pdfOcr Tesseract4 product description
+ public static ProductData GetInstance() {
+ return PDF_OCR_PRODUCT_DATA;
+ }
+ }
+}
diff --git a/itext/itext.pdfocr.tesseract4/itext/pdfocr/tesseract4/actions/events/PdfOcrTesseract4ProductEvent.cs b/itext/itext.pdfocr.tesseract4/itext/pdfocr/tesseract4/actions/events/PdfOcrTesseract4ProductEvent.cs
new file mode 100644
index 0000000..14ef410
--- /dev/null
+++ b/itext/itext.pdfocr.tesseract4/itext/pdfocr/tesseract4/actions/events/PdfOcrTesseract4ProductEvent.cs
@@ -0,0 +1,64 @@
+/*
+This file is part of the iText (R) project.
+Copyright (c) 1998-2021 iText Group NV
+Authors: iText Software.
+
+This program is offered under a commercial and under the AGPL license.
+For commercial licensing, contact us at https://itextpdf.com/sales. For AGPL licensing, see below.
+
+AGPL licensing:
+This program is free software: you can redistribute it and/or modify
+it under the terms of the GNU Affero General Public License as published by
+the Free Software Foundation, either version 3 of the License, or
+(at your option) any later version.
+
+This program is distributed in the hope that it will be useful,
+but WITHOUT ANY WARRANTY; without even the implied warranty of
+MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+GNU Affero General Public License for more details.
+
+You should have received a copy of the GNU Affero General Public License
+along with this program. If not, see .
+*/
+using System;
+using iText.Commons.Actions;
+using iText.Commons.Actions.Confirmations;
+using iText.Commons.Actions.Contexts;
+using iText.Commons.Actions.Sequence;
+using iText.Pdfocr.Tesseract4.Actions.Data;
+
+namespace iText.Pdfocr.Tesseract4.Actions.Events {
+ /// Class represents events registered in iText pdfOcr Tesseract4 module.
+ public class PdfOcrTesseract4ProductEvent : AbstractProductProcessITextEvent {
+ /// Process image event type.
+ public const String PROCESS_IMAGE = "process-image";
+
+ private readonly String eventType;
+
+ /// Creates an event associated with a general identifier and additional meta data.
+ /// is an identifier associated with the event
+ /// is an additional meta info
+ /// is a string description of the event
+ /// is an event confirmation type
+ private PdfOcrTesseract4ProductEvent(SequenceId sequenceId, IMetaInfo metaInfo, String eventType, EventConfirmationType
+ eventConfirmationType)
+ : base(sequenceId, PdfOcrTesseract4ProductData.GetInstance(), metaInfo, eventConfirmationType) {
+ this.eventType = eventType;
+ }
+
+ /// Creates process-image event.
+ /// is an identifier associated with the event
+ /// is an additional meta info
+ /// is an event confirmation type
+ /// process-image event
+ public static iText.Pdfocr.Tesseract4.Actions.Events.PdfOcrTesseract4ProductEvent CreateProcessImageEvent(
+ SequenceId sequenceId, IMetaInfo metaInfo, EventConfirmationType eventConfirmationType) {
+ return new iText.Pdfocr.Tesseract4.Actions.Events.PdfOcrTesseract4ProductEvent(sequenceId, metaInfo, PROCESS_IMAGE
+ , eventConfirmationType);
+ }
+
+ public override String GetEventType() {
+ return eventType;
+ }
+ }
+}
diff --git a/itext/itext.pdfocr.tesseract4/itext/pdfocr/tesseract4/events/PdfOcrTesseract4Event.cs b/itext/itext.pdfocr.tesseract4/itext/pdfocr/tesseract4/events/PdfOcrTesseract4Event.cs
deleted file mode 100644
index 0e2eb7f..0000000
--- a/itext/itext.pdfocr.tesseract4/itext/pdfocr/tesseract4/events/PdfOcrTesseract4Event.cs
+++ /dev/null
@@ -1,54 +0,0 @@
-/*
-This file is part of the iText (R) project.
-Copyright (c) 1998-2021 iText Group NV
-Authors: iText Software.
-
-This program is offered under a commercial and under the AGPL license.
-For commercial licensing, contact us at https://itextpdf.com/sales. For AGPL licensing, see below.
-
-AGPL licensing:
-This program is free software: you can redistribute it and/or modify
-it under the terms of the GNU Affero General Public License as published by
-the Free Software Foundation, either version 3 of the License, or
-(at your option) any later version.
-
-This program is distributed in the hope that it will be useful,
-but WITHOUT ANY WARRANTY; without even the implied warranty of
-MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
-GNU Affero General Public License for more details.
-
-You should have received a copy of the GNU Affero General Public License
-along with this program. If not, see .
-*/
-using System;
-using iText.Kernel.Counter.Event;
-
-namespace iText.Pdfocr.Tesseract4.Events {
- /// Class for ocr events
- public class PdfOcrTesseract4Event : IGenericEvent {
- public static readonly iText.Pdfocr.Tesseract4.Events.PdfOcrTesseract4Event TESSERACT4_IMAGE_OCR = new iText.Pdfocr.Tesseract4.Events.PdfOcrTesseract4Event
- ("tesseract4-image-ocr");
-
- public static readonly iText.Pdfocr.Tesseract4.Events.PdfOcrTesseract4Event TESSERACT4_IMAGE_TO_PDF = new
- iText.Pdfocr.Tesseract4.Events.PdfOcrTesseract4Event("tesseract4-image-to-pdf");
-
- public static readonly iText.Pdfocr.Tesseract4.Events.PdfOcrTesseract4Event TESSERACT4_IMAGE_TO_PDFA = new
- iText.Pdfocr.Tesseract4.Events.PdfOcrTesseract4Event("tesseract4-image-to-pdfa");
-
- private const String PDF_OCR_TESSERACT4_ORIGIN_ID = "iText.Pdfocr.Tesseract4";
-
- private readonly String subtype;
-
- private PdfOcrTesseract4Event(String subtype) {
- this.subtype = subtype;
- }
-
- public virtual String GetEventType() {
- return "pdfOcr-" + subtype;
- }
-
- public virtual String GetOriginId() {
- return PDF_OCR_TESSERACT4_ORIGIN_ID;
- }
- }
-}
diff --git a/itext/itext.pdfocr.tesseract4/itext/pdfocr/tesseract4/exceptions/PdfOcrInputTesseract4Exception.cs b/itext/itext.pdfocr.tesseract4/itext/pdfocr/tesseract4/exceptions/PdfOcrInputTesseract4Exception.cs
new file mode 100644
index 0000000..d951176
--- /dev/null
+++ b/itext/itext.pdfocr.tesseract4/itext/pdfocr/tesseract4/exceptions/PdfOcrInputTesseract4Exception.cs
@@ -0,0 +1,67 @@
+/*
+This file is part of the iText (R) project.
+Copyright (c) 1998-2021 iText Group NV
+Authors: iText Software.
+
+This program is offered under a commercial and under the AGPL license.
+For commercial licensing, contact us at https://itextpdf.com/sales. For AGPL licensing, see below.
+
+AGPL licensing:
+This program is free software: you can redistribute it and/or modify
+it under the terms of the GNU Affero General Public License as published by
+the Free Software Foundation, either version 3 of the License, or
+(at your option) any later version.
+
+This program is distributed in the hope that it will be useful,
+but WITHOUT ANY WARRANTY; without even the implied warranty of
+MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+GNU Affero General Public License for more details.
+
+You should have received a copy of the GNU Affero General Public License
+along with this program. If not, see .
+*/
+using System;
+
+namespace iText.Pdfocr.Tesseract4.Exceptions {
+ public class PdfOcrInputTesseract4Exception : PdfOcrTesseract4Exception {
+ ///
+ /// Creates a new
+ /// .
+ ///
+ /// the detail message.
+ ///
+ /// the cause
+ /// (which is saved for later retrieval
+ /// by
+ ///
+ /// method).
+ ///
+ public PdfOcrInputTesseract4Exception(String msg, Exception e)
+ : base(msg, e) {
+ }
+
+ ///
+ /// Creates a new
+ /// .
+ ///
+ /// the detail message.
+ public PdfOcrInputTesseract4Exception(String msg)
+ : base(msg) {
+ }
+
+ ///
+ /// Creates a new
+ /// .
+ ///
+ ///
+ /// the cause
+ /// which is saved for later retrieval
+ /// by
+ ///
+ /// method).
+ ///
+ public PdfOcrInputTesseract4Exception(Exception e)
+ : base(e) {
+ }
+ }
+}
diff --git a/itext/itext.pdfocr.tesseract4/itext/pdfocr/tesseract4/exceptions/PdfOcrTesseract4Exception.cs b/itext/itext.pdfocr.tesseract4/itext/pdfocr/tesseract4/exceptions/PdfOcrTesseract4Exception.cs
new file mode 100644
index 0000000..a159fd6
--- /dev/null
+++ b/itext/itext.pdfocr.tesseract4/itext/pdfocr/tesseract4/exceptions/PdfOcrTesseract4Exception.cs
@@ -0,0 +1,68 @@
+/*
+This file is part of the iText (R) project.
+Copyright (c) 1998-2021 iText Group NV
+Authors: iText Software.
+
+This program is offered under a commercial and under the AGPL license.
+For commercial licensing, contact us at https://itextpdf.com/sales. For AGPL licensing, see below.
+
+AGPL licensing:
+This program is free software: you can redistribute it and/or modify
+it under the terms of the GNU Affero General Public License as published by
+the Free Software Foundation, either version 3 of the License, or
+(at your option) any later version.
+
+This program is distributed in the hope that it will be useful,
+but WITHOUT ANY WARRANTY; without even the implied warranty of
+MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+GNU Affero General Public License for more details.
+
+You should have received a copy of the GNU Affero General Public License
+along with this program. If not, see .
+*/
+using System;
+using iText.Pdfocr.Exceptions;
+
+namespace iText.Pdfocr.Tesseract4.Exceptions {
+ public class PdfOcrTesseract4Exception : PdfOcrException {
+ ///
+ /// Creates a new
+ /// .
+ ///
+ /// the detail message.
+ ///
+ /// the cause
+ /// (which is saved for later retrieval
+ /// by
+ ///
+ /// method).
+ ///
+ public PdfOcrTesseract4Exception(String msg, Exception e)
+ : base(msg, e) {
+ }
+
+ ///
+ /// Creates a new
+ /// .
+ ///
+ /// the detail message.
+ public PdfOcrTesseract4Exception(String msg)
+ : base(msg) {
+ }
+
+ ///
+ /// Creates a new
+ /// .
+ ///
+ ///
+ /// the cause
+ /// which is saved for later retrieval
+ /// by
+ ///
+ /// method).
+ ///
+ public PdfOcrTesseract4Exception(Exception e)
+ : base(e) {
+ }
+ }
+}
diff --git a/itext/itext.pdfocr.tesseract4/itext/pdfocr/tesseract4/Tesseract4OcrException.cs b/itext/itext.pdfocr.tesseract4/itext/pdfocr/tesseract4/exceptions/PdfOcrTesseract4ExceptionMessageConstant.cs
similarity index 64%
rename from itext/itext.pdfocr.tesseract4/itext/pdfocr/tesseract4/Tesseract4OcrException.cs
rename to itext/itext.pdfocr.tesseract4/itext/pdfocr/tesseract4/exceptions/PdfOcrTesseract4ExceptionMessageConstant.cs
index 4994688..31aecb1 100644
--- a/itext/itext.pdfocr.tesseract4/itext/pdfocr/tesseract4/Tesseract4OcrException.cs
+++ b/itext/itext.pdfocr.tesseract4/itext/pdfocr/tesseract4/exceptions/PdfOcrTesseract4ExceptionMessageConstant.cs
@@ -21,13 +21,9 @@ You should have received a copy of the GNU Affero General Public License
along with this program. If not, see .
*/
using System;
-using iText.Pdfocr;
-
-namespace iText.Pdfocr.Tesseract4 {
- public class Tesseract4OcrException : OcrException {
- public const String TESSERACT_LIB_NOT_INSTALLED_WIN = "Tesseract failed. "
- + "Please ensure you have at least Visual C++ 2015 Redistributable installed";
+namespace iText.Pdfocr.Tesseract4.Exceptions {
+ public class PdfOcrTesseract4ExceptionMessageConstant {
public const String INCORRECT_INPUT_IMAGE_FORMAT = "{0} format is not supported.";
public const String INCORRECT_LANGUAGE = "{0} does not exist in {1}";
@@ -36,40 +32,27 @@ public class Tesseract4OcrException : OcrException {
public const String CANNOT_READ_PROVIDED_IMAGE = "Cannot read input image {0}";
- public const String TESSERACT_FAILED = "Tesseract failed. " + "Please check provided parameters";
+ public const String CANNOT_WRITE_TO_FILE = "Cannot write to file {0}: {1}";
+
+ public const String TESSERACT_FAILED = "Tesseract failed. Please check provided parameters";
+
+ public const String TESSERACT_LIB_NOT_INSTALLED = "Tesseract failed. Please ensure you have tesseract library installed";
- public const String TESSERACT_LIB_NOT_INSTALLED = "Tesseract failed. " + "Please ensure you have tesseract library installed";
+ public const String TESSERACT_LIB_NOT_INSTALLED_WIN = "Tesseract failed. Please ensure you have latest Visual C++ Redistributable installed";
- public const String TESSERACT_NOT_FOUND = "Tesseract failed. " + "Please check that tesseract is installed and provided path to "
+ public const String TESSERACT_NOT_FOUND = "Tesseract failed. Please check that tesseract is installed and provided path to "
+ "tesseract executable directory is correct";
public const String CANNOT_FIND_PATH_TO_TESSERACT_EXECUTABLE = "Cannot find path to tesseract executable.";
- public const String PATH_TO_TESS_DATA_DIRECTORY_IS_INVALID = "Provided path to tess data directory does not exist or it is "
- + "an invalid directory";
+ public const String PATH_TO_TESS_DATA_DIRECTORY_IS_INVALID = "Provided path to tess data directory does not exist or it is an invalid directory";
- public const String PATH_TO_TESS_DATA_IS_NOT_SET = "Path to tess data directory cannot be null and must be set "
- + "to a valid directory";
+ public const String PATH_TO_TESS_DATA_IS_NOT_SET = "Path to tess data directory cannot be null and must be set to a valid directory";
public const String PATH_TO_TESS_DATA_DIRECTORY_CONTAINS_NON_ASCII_CHARACTERS = "Path to tess data should contain only ASCII characters";
- /// Creates a new TesseractException.
- /// the detail message.
- ///
- /// the cause
- /// (which is saved for later retrieval
- /// by
- ///
- /// method).
- ///
- public Tesseract4OcrException(String msg, Exception e)
- : base(msg, e) {
- }
-
- /// Creates a new TesseractException.
- /// the detail message.
- public Tesseract4OcrException(String msg)
- : base(msg) {
+ private PdfOcrTesseract4ExceptionMessageConstant() {
}
+ //Private constructor will prevent the instantiation of this class directly
}
}
diff --git a/itext/itext.pdfocr.tesseract4/itext/pdfocr/tesseract4/Tesseract4LogMessageConstant.cs b/itext/itext.pdfocr.tesseract4/itext/pdfocr/tesseract4/logs/Tesseract4LogMessageConstant.cs
similarity index 83%
rename from itext/itext.pdfocr.tesseract4/itext/pdfocr/tesseract4/Tesseract4LogMessageConstant.cs
rename to itext/itext.pdfocr.tesseract4/itext/pdfocr/tesseract4/logs/Tesseract4LogMessageConstant.cs
index fa8de35..e9df364 100644
--- a/itext/itext.pdfocr.tesseract4/itext/pdfocr/tesseract4/Tesseract4LogMessageConstant.cs
+++ b/itext/itext.pdfocr.tesseract4/itext/pdfocr/tesseract4/logs/Tesseract4LogMessageConstant.cs
@@ -22,7 +22,7 @@ You should have received a copy of the GNU Affero General Public License
*/
using System;
-namespace iText.Pdfocr.Tesseract4 {
+namespace iText.Pdfocr.Tesseract4.Logs {
public class Tesseract4LogMessageConstant {
public const String TESSERACT_FAILED = "Tesseract failed: {0}";
@@ -30,7 +30,7 @@ public class Tesseract4LogMessageConstant {
public const String CANNOT_READ_FILE = "Cannot read file {0}: {1}";
- public const String CANNOT_OCR_INPUT_FILE = "Cannot ocr input file: {1}";
+ public const String CANNOT_OCR_INPUT_FILE = "Cannot ocr input file: {0}";
public const String CANNOT_USE_USER_WORDS = "Cannot use custom user words: {0}";
@@ -40,14 +40,11 @@ public class Tesseract4LogMessageConstant {
public const String CANNOT_DELETE_FILE = "File {0} cannot be deleted: {1}";
- public const String CANNOT_PROCESS_IMAGE = "Cannot process " + "image: {0}";
-
- public const String CANNOT_WRITE_TO_FILE = "Cannot write to file {0}: {1}";
+ public const String CANNOT_PROCESS_IMAGE = "Cannot process image: {0}";
public const String CREATED_TEMPORARY_FILE = "Created temp file {0}";
- /// Constant is not used.
- [System.ObsoleteAttribute(@"since 1.0.1. Will be removed in 2.0.0")]
+ // Constant is used only in .NET version, but it's kept here for the sake of consistency and autoporting.
public const String CANNOT_CONVERT_IMAGE_TO_GRAYSCALE = "Cannot convert to gray image with depth {0}";
public const String CANNOT_BINARIZE_IMAGE = "Cannot binarize image with depth {0}";
@@ -58,11 +55,7 @@ public class Tesseract4LogMessageConstant {
public const String CANNOT_READ_INPUT_IMAGE = "Cannot read input image {0}";
- public const String CANNOT_GET_TEMPORARY_DIRECTORY = "Cannot get " + "temporary directory: {0}";
-
- /// Constant is not used.
- [System.ObsoleteAttribute(@"since 1.0.1. Will be removed in 2.0.0")]
- public const String CANNOT_CONVERT_IMAGE_TO_PIX = "Cannot convert image to pix: {0}";
+ public const String CANNOT_GET_TEMPORARY_DIRECTORY = "Cannot get temporary directory: {0}";
public const String CANNOT_PARSE_NODE_BBOX = "Cannot parse node BBox, defaults to 0, 0, 0, 0. Node: {0}";
@@ -72,5 +65,6 @@ public class Tesseract4LogMessageConstant {
private Tesseract4LogMessageConstant() {
}
+ //Private constructor will prevent the instantiation of this class directly
}
}
diff --git a/itext/itext.pdfocr.tesseract4/pdfocr-tesseract4.nuspec b/itext/itext.pdfocr.tesseract4/pdfocr-tesseract4.nuspec
index 113211e..9358518 100644
--- a/itext/itext.pdfocr.tesseract4/pdfocr-tesseract4.nuspec
+++ b/itext/itext.pdfocr.tesseract4/pdfocr-tesseract4.nuspec
@@ -2,7 +2,7 @@
itext7.pdfocr.tesseract4
- 1.0.3
+ 2.0.0
iText 7 pdfOcr
iText Software
iText Software
@@ -17,8 +17,8 @@
OCR PDF ligatures text glyphs iText Optical Character Recognition PDF/A ISO-compliant Tesseract open-source opensource English Mandarin Chinese Hindi Spanish French Arabic Bengali Russian Portuguese Indonesian scan image extractable data searchable diacritic sdk c# .net
-
-
+
+
@@ -27,10 +27,10 @@
-
-
+
+
-
+
diff --git a/port-hash b/port-hash
index 1fdc3a4..381754d 100644
--- a/port-hash
+++ b/port-hash
@@ -1 +1 @@
-c438260f7e5f29ec0bfe0306e06fb1a5ce0bd6db
+fb9aa93bb391504fd844c0010192124ce0d7fc49