Skip to content

Commit

Permalink
Merge branch 'release_branch_DEVSIX-5576' into master-rc
Browse files Browse the repository at this point in the history
  • Loading branch information
iText-CI committed Sep 29, 2021
2 parents 20eb242 + b1293d6 commit 6480e74
Show file tree
Hide file tree
Showing 100 changed files with 3,249 additions and 1,757 deletions.
2 changes: 1 addition & 1 deletion Jenkinsfile
Original file line number Diff line number Diff line change
Expand Up @@ -4,6 +4,6 @@
def repoName = "pdfOcr"
def dependencyRegex = "itextcore"
def solutionFile = "i7n-ocr.sln"
def csprojFramework = "netcoreapp2.0"
def csprojFramework = "net461"

automaticDotnetBuild(repoName, dependencyRegex, solutionFile, csprojFramework)
2 changes: 1 addition & 1 deletion doxyfile
Original file line number Diff line number Diff line change
Expand Up @@ -32,7 +32,7 @@ DOXYFILE_ENCODING = UTF-8
# title of most generated pages and in a few other places.
# The default value is: My Project.

PROJECT_NAME = "pdfOCR 1.0.3 API"
PROJECT_NAME = "pdfOCR 2.0.0 API"

# The PROJECT_NUMBER tag can be used to enter a project or revision number. This
# could be handy for archiving the generated documentation or if some version
Expand Down
6 changes: 3 additions & 3 deletions itext.tests/itext.pdfocr.api.tests/Properties/AssemblyInfo.cs
Original file line number Diff line number Diff line change
Expand Up @@ -15,6 +15,6 @@

[assembly: Guid("d6a6ea97-1f23-448f-b700-eff62971d234")]

[assembly: AssemblyVersion("1.0.3.0")]
[assembly: AssemblyFileVersion("1.0.3.0")]
[assembly: AssemblyInformationalVersion("1.0.3")]
[assembly: AssemblyVersion("2.0.0.0")]
[assembly: AssemblyFileVersion("2.0.0.0")]
[assembly: AssemblyInformationalVersion("2.0.0")]
Original file line number Diff line number Diff line change
Expand Up @@ -9,7 +9,7 @@
<OutputType>library</OutputType>
</PropertyGroup>
<PropertyGroup>
<TargetFramework>net45</TargetFramework>
<TargetFramework>net461</TargetFramework>
</PropertyGroup>
<PropertyGroup>
<SignAssembly>true</SignAssembly>
Expand All @@ -25,7 +25,7 @@
</ItemGroup>
<ItemGroup>
<ProjectReference Include="..\..\..\itextcore\itext\itext.pdftest\itext.pdftest.netstandard.csproj" Condition="Exists('..\..\..\itextcore\itext\itext.pdftest\itext.pdftest.netstandard.csproj')" />
<PackageReference Include="itext7.pdftest" Version="7.1.16" Condition="!Exists('..\..\..\itextcore\itext\itext.pdftest\itext.pdftest.netstandard.csproj')" />
<PackageReference Include="itext7.pdftest" Version="7.2.0" Condition="!Exists('..\..\..\itextcore\itext\itext.pdftest\itext.pdftest.netstandard.csproj')" />
<PackageReference Include="Microsoft.NET.Test.Sdk" Version="16.6.0" />
<PackageReference Include="NUnit" Version="3.12.0" />
<PackageReference Include="NUnit3TestAdapter" Version="3.16.1">
Expand Down
130 changes: 102 additions & 28 deletions itext.tests/itext.pdfocr.api.tests/itext/pdfocr/ApiTest.cs
Original file line number Diff line number Diff line change
Expand Up @@ -23,44 +23,129 @@ You should have received a copy of the GNU Affero General Public License
using System;
using System.Collections.Generic;
using System.IO;
using iText.Commons.Actions.Contexts;
using iText.Commons.Utils;
using iText.IO.Image;
using iText.IO.Util;
using iText.Kernel.Colors;
using iText.Kernel.Font;
using iText.Kernel.Geom;
using iText.Kernel.Pdf;
using iText.Pdfa;
using iText.Pdfocr.Helpers;
using iText.Pdfocr.Logs;
using iText.Test;
using iText.Test.Attributes;

namespace iText.Pdfocr {
public class ApiTest : ExtendedITextTest {
public static readonly String DESTINATION_FOLDER = NUnit.Framework.TestContext.CurrentContext.TestDirectory
+ "/test/itext/pdfocr";

[NUnit.Framework.OneTimeSetUp]
public static void BeforeClass() {
CreateOrClearDestinationFolder(DESTINATION_FOLDER);
}

[NUnit.Framework.Test]
public virtual void TestTextInfo() {
String path = PdfHelper.GetDefaultImagePath();
IDictionary<int, IList<TextInfo>> result = new CustomOcrEngine().DoImageOcr(new FileInfo(path));
NUnit.Framework.Assert.AreEqual(1, result.Count);
TextInfo textInfo = new TextInfo();
textInfo.SetText("text");
textInfo.SetBboxRect(new Rectangle(204.0f, 158.0f, 538.0f, 136.0f));
int page = 2;
result.Put(page, JavaCollectionsUtil.SingletonList<TextInfo>(textInfo));
NUnit.Framework.Assert.AreEqual(2, result.Count);
NUnit.Framework.Assert.AreEqual(textInfo.GetText(), result.Get(page)[0].GetText());
public virtual void CreatePdfWithFileTest() {
OcrPdfCreatorProperties props = new OcrPdfCreatorProperties().SetMetaInfo(new ApiTest.DummyMetaInfo());
OcrPdfCreator pdfCreator = new OcrPdfCreator(new CustomOcrEngine(), props);
using (PdfDocument pdf = pdfCreator.CreatePdf(JavaCollectionsUtil.SingletonList<FileInfo>(new FileInfo(PdfHelper
.GetDefaultImagePath())), PdfHelper.GetPdfWriter(), new DocumentProperties().SetEventCountingMetaInfo(
new ApiTest.DummyMetaInfo()))) {
String contentBytes = iText.Commons.Utils.JavaUtil.GetStringForBytes(pdf.GetPage(1).GetContentBytes(), System.Text.Encoding
.UTF8);
NUnit.Framework.Assert.IsTrue(contentBytes.Contains("<00190014001c001400150014>"));
}
}

[NUnit.Framework.Test]
public virtual void CreatePdfFileWithFileTest() {
String output = DESTINATION_FOLDER + "createPdfFileWithFileTest.pdf";
OcrPdfCreatorProperties props = new OcrPdfCreatorProperties().SetMetaInfo(new ApiTest.DummyMetaInfo());
OcrPdfCreator pdfCreator = new OcrPdfCreator(new CustomOcrEngine(), props);
pdfCreator.CreatePdfFile(JavaCollectionsUtil.SingletonList<FileInfo>(new FileInfo(PdfHelper.GetDefaultImagePath
())), new FileInfo(output));
using (PdfDocument pdf = new PdfDocument(new PdfReader(output))) {
String contentBytes = iText.Commons.Utils.JavaUtil.GetStringForBytes(pdf.GetPage(1).GetContentBytes(), System.Text.Encoding
.UTF8);
NUnit.Framework.Assert.IsTrue(contentBytes.Contains("<00190014001c001400150014>"));
}
}

[NUnit.Framework.Test]
public virtual void CreatePdfAWithFileTest() {
OcrPdfCreatorProperties props = new OcrPdfCreatorProperties().SetMetaInfo(new ApiTest.DummyMetaInfo()).SetPdfLang
("en-US");
OcrPdfCreator pdfCreator = new OcrPdfCreator(new CustomOcrEngine(), props);
using (PdfDocument pdf = pdfCreator.CreatePdfA(JavaCollectionsUtil.SingletonList<FileInfo>(new FileInfo(PdfHelper
.GetDefaultImagePath())), PdfHelper.GetPdfWriter(), new DocumentProperties().SetEventCountingMetaInfo(
new ApiTest.DummyMetaInfo()), PdfHelper.GetRGBPdfOutputIntent())) {
String contentBytes = iText.Commons.Utils.JavaUtil.GetStringForBytes(pdf.GetPage(1).GetContentBytes(), System.Text.Encoding
.UTF8);
NUnit.Framework.Assert.IsTrue(contentBytes.Contains("<00190014001c001400150014>"));
NUnit.Framework.Assert.IsTrue(pdf is PdfADocument);
}
}

[NUnit.Framework.Test]
public virtual void TestTextInfoDeprecationMode() {
public virtual void CreatePdfAFileWithFileTest() {
String output = DESTINATION_FOLDER + "createPdfAFileWithFileTest.pdf";
OcrPdfCreatorProperties props = new OcrPdfCreatorProperties().SetMetaInfo(new ApiTest.DummyMetaInfo()).SetPdfLang
("en-US");
OcrPdfCreator pdfCreator = new OcrPdfCreator(new CustomOcrEngine(), props);
pdfCreator.CreatePdfAFile(JavaCollectionsUtil.SingletonList<FileInfo>(new FileInfo(PdfHelper.GetDefaultImagePath
())), new FileInfo(output), PdfHelper.GetRGBPdfOutputIntent());
using (PdfDocument pdf = new PdfDocument(new PdfReader(output))) {
String contentBytes = iText.Commons.Utils.JavaUtil.GetStringForBytes(pdf.GetPage(1).GetContentBytes(), System.Text.Encoding
.UTF8);
NUnit.Framework.Assert.IsTrue(contentBytes.Contains("<00190014001c001400150014>"));
PdfAConformanceLevel cl = pdf.GetReader().GetPdfAConformanceLevel();
NUnit.Framework.Assert.AreEqual(PdfAConformanceLevel.PDF_A_3U.GetConformance(), cl.GetConformance());
NUnit.Framework.Assert.AreEqual(PdfAConformanceLevel.PDF_A_3U.GetPart(), cl.GetPart());
}
}

[NUnit.Framework.Test]
public virtual void CreatePdfAFileWithFileNoMetaTest() {
String output = DESTINATION_FOLDER + "createPdfAFileWithFileNoMetaTest.pdf";
OcrPdfCreatorProperties props = new OcrPdfCreatorProperties().SetPdfLang("en-US");
OcrPdfCreator pdfCreator = new OcrPdfCreator(new CustomOcrEngine(), props);
pdfCreator.CreatePdfAFile(JavaCollectionsUtil.SingletonList<FileInfo>(new FileInfo(PdfHelper.GetDefaultImagePath
())), new FileInfo(output), PdfHelper.GetRGBPdfOutputIntent());
using (PdfDocument pdf = new PdfDocument(new PdfReader(output))) {
String contentBytes = iText.Commons.Utils.JavaUtil.GetStringForBytes(pdf.GetPage(1).GetContentBytes(), System.Text.Encoding
.UTF8);
NUnit.Framework.Assert.IsTrue(contentBytes.Contains("<00190014001c001400150014>"));
PdfAConformanceLevel cl = pdf.GetReader().GetPdfAConformanceLevel();
NUnit.Framework.Assert.AreEqual(PdfAConformanceLevel.PDF_A_3U.GetConformance(), cl.GetConformance());
NUnit.Framework.Assert.AreEqual(PdfAConformanceLevel.PDF_A_3U.GetPart(), cl.GetPart());
}
}

[NUnit.Framework.Test]
public virtual void CreatePdfAFileWithFileProductAwareEngineTest() {
String output = DESTINATION_FOLDER + "createPdfAFileWithFileProductAwareEngineTest.pdf";
OcrPdfCreatorProperties props = new OcrPdfCreatorProperties().SetPdfLang("en-US");
CustomProductAwareOcrEngine ocrEngine = new CustomProductAwareOcrEngine();
OcrPdfCreator pdfCreator = new OcrPdfCreator(ocrEngine, props);
pdfCreator.CreatePdfAFile(JavaCollectionsUtil.SingletonList<FileInfo>(new FileInfo(PdfHelper.GetDefaultImagePath
())), new FileInfo(output), PdfHelper.GetRGBPdfOutputIntent());
NUnit.Framework.Assert.IsTrue(ocrEngine.IsGetMetaInfoContainerTriggered());
}

[NUnit.Framework.Test]
public virtual void TestTextInfo() {
String path = PdfHelper.GetDefaultImagePath();
IDictionary<int, IList<TextInfo>> result = new CustomOcrEngine(true).DoImageOcr(new FileInfo(path));
IDictionary<int, IList<TextInfo>> result = new CustomOcrEngine().DoImageOcr(new FileInfo(path));
NUnit.Framework.Assert.AreEqual(1, result.Count);
TextInfo textInfo = new TextInfo();
textInfo.SetText("text");
textInfo.SetBbox(JavaUtil.ArraysAsList(204.0f, 158.0f, 742.0f, 294.0f));
textInfo.SetBboxRect(new Rectangle(204.0f, 158.0f, 538.0f, 136.0f));
int page = 2;
result.Put(page, JavaCollectionsUtil.SingletonList<TextInfo>(textInfo));
NUnit.Framework.Assert.AreEqual(2, result.Count);
NUnit.Framework.Assert.AreEqual(textInfo.GetText(), result.Get(page)[0].GetText());
NUnit.Framework.Assert.AreEqual(textInfo.GetBbox().Count, result.Get(page)[0].GetBbox().Count);
}

[LogMessage(PdfOcrLogMessageConstant.COULD_NOT_FIND_CORRESPONDING_GLYPH_TO_UNICODE_CHARACTER, Count = 7)]
Expand Down Expand Up @@ -113,18 +198,7 @@ public virtual ImageData ApplyRotation(ImageData imageData) {
}
}

[LogMessage(PdfOcrLogMessageConstant.COULD_NOT_FIND_CORRESPONDING_GLYPH_TO_UNICODE_CHARACTER, Count = 7)]
[NUnit.Framework.Test]
public virtual void TestThaiImageWithNotDefGlyphsDeprecationMode() {
String testName = "testThaiImageWithNotdefGlyphs";
String path = PdfHelper.GetThaiImagePath();
String pdfPath = PdfHelper.GetTargetDirectory() + testName + ".pdf";
PdfHelper.CreatePdf(pdfPath, new FileInfo(path), new OcrPdfCreatorProperties().SetTextColor(DeviceRgb.BLACK
), true);
ExtractionStrategy strategy = PdfHelper.GetExtractionStrategy(pdfPath);
PdfFont font = strategy.GetPdfFont();
String fontName = font.GetFontProgram().GetFontNames().GetFontName();
NUnit.Framework.Assert.IsTrue(fontName.Contains("LiberationSans"));
private class DummyMetaInfo : IMetaInfo {
}
}
}
Original file line number Diff line number Diff line change
@@ -0,0 +1,137 @@
/*
This file is part of the iText (R) project.
Copyright (c) 1998-2021 iText Group NV
Authors: iText Software.
This program is offered under a commercial and under the AGPL license.
For commercial licensing, contact us at https://itextpdf.com/sales. For AGPL licensing, see below.
AGPL licensing:
This program is free software: you can redistribute it and/or modify
it under the terms of the GNU Affero General Public License as published by
the Free Software Foundation, either version 3 of the License, or
(at your option) any later version.
This program is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU Affero General Public License for more details.
You should have received a copy of the GNU Affero General Public License
along with this program. If not, see <https://www.gnu.org/licenses/>.
*/
using System;
using System.Collections.Generic;
using iText.Commons.Actions;
using iText.Commons.Actions.Confirmations;
using iText.Commons.Actions.Contexts;
using iText.Commons.Actions.Data;
using iText.Commons.Actions.Sequence;
using iText.Commons.Utils;
using iText.Kernel.Actions.Data;
using iText.Pdfocr.Statistics;
using iText.Test;

namespace iText.Pdfocr {
public class OcrPdfCreatorEventHelperTest : ExtendedITextTest {
private static readonly ProductData DUMMY_PRODUCT_DATA = new ProductData("test-product", "inner_product",
"1.0.0", 1900, 2100);

private OcrPdfCreatorEventHelperTest.StoreEventsHandler storeEventsHandler;

[NUnit.Framework.SetUp]
public virtual void Before() {
storeEventsHandler = new OcrPdfCreatorEventHelperTest.StoreEventsHandler();
EventManager.GetInstance().Register(storeEventsHandler);
}

[NUnit.Framework.TearDown]
public virtual void After() {
EventManager.GetInstance().Unregister(storeEventsHandler);
storeEventsHandler = null;
}

[NUnit.Framework.Test]
public virtual void ProductContextBasedEventTest() {
OcrPdfCreatorEventHelper helper = new OcrPdfCreatorEventHelper(new SequenceId(), new OcrPdfCreatorEventHelperTest.DummyMetaInfo
());
OcrPdfCreatorEventHelperTest.DummyITextEvent @event = new OcrPdfCreatorEventHelperTest.DummyITextEvent();
helper.OnEvent(@event);
NUnit.Framework.Assert.AreEqual(1, storeEventsHandler.GetEvents().Count);
NUnit.Framework.Assert.AreEqual(@event, storeEventsHandler.GetEvents()[0]);
}

[NUnit.Framework.Test]
public virtual void PdfOcrStatisticsEventTest() {
OcrPdfCreatorEventHelper helper = new OcrPdfCreatorEventHelper(new SequenceId(), new OcrPdfCreatorEventHelperTest.DummyMetaInfo
());
PdfOcrOutputTypeStatisticsEvent e = new PdfOcrOutputTypeStatisticsEvent(PdfOcrOutputType.PDF, DUMMY_PRODUCT_DATA
);
helper.OnEvent(e);
NUnit.Framework.Assert.AreEqual(0, storeEventsHandler.GetEvents().Count);
}

[NUnit.Framework.Test]
public virtual void CustomProductEventTest() {
OcrPdfCreatorEventHelper helper = new OcrPdfCreatorEventHelper(new SequenceId(), new OcrPdfCreatorEventHelperTest.DummyMetaInfo
());
AbstractProductITextEvent @event = new OcrPdfCreatorEventHelperTest.CustomProductITextEvent(DUMMY_PRODUCT_DATA
);
helper.OnEvent(@event);
NUnit.Framework.Assert.AreEqual(1, storeEventsHandler.GetEvents().Count);
NUnit.Framework.Assert.AreEqual(@event, storeEventsHandler.GetEvents()[0]);
}

[NUnit.Framework.Test]
public virtual void CustomStatisticsEventTest() {
OcrPdfCreatorEventHelper helper = new OcrPdfCreatorEventHelper(new SequenceId(), new OcrPdfCreatorEventHelperTest.DummyMetaInfo
());
OcrPdfCreatorEventHelperTest.CustomStatisticsEvent @event = new OcrPdfCreatorEventHelperTest.CustomStatisticsEvent
(DUMMY_PRODUCT_DATA);
helper.OnEvent(@event);
NUnit.Framework.Assert.AreEqual(1, storeEventsHandler.GetEvents().Count);
NUnit.Framework.Assert.AreEqual(@event, storeEventsHandler.GetEvents()[0]);
}

private class DummyMetaInfo : IMetaInfo {
}

private class DummyITextEvent : AbstractProductProcessITextEvent {
protected internal DummyITextEvent()
: base(ITextCoreProductData.GetInstance(), null, EventConfirmationType.ON_DEMAND) {
}

public override String GetEventType() {
return "test-event";
}
}

private class CustomProductITextEvent : AbstractProductITextEvent {
protected internal CustomProductITextEvent(ProductData productData)
: base(productData) {
}
}

private class CustomStatisticsEvent : AbstractStatisticsEvent {
protected internal CustomStatisticsEvent(ProductData productData)
: base(productData) {
}

public override IList<String> GetStatisticsNames() {
return JavaCollectionsUtil.SingletonList("custom-statistics");
}
}

private class StoreEventsHandler : IEventHandler {
private IList<IEvent> events = new List<IEvent>();

public virtual IList<IEvent> GetEvents() {
return events;
}

public virtual void OnEvent(IEvent @event) {
events.Add(@event);
}
}
}
}
Loading

0 comments on commit 6480e74

Please sign in to comment.