Skip to content

Commit

Permalink
Improvements and new examples
Browse files Browse the repository at this point in the history
Make Android work without Xalan (use stax instead)
Remove usage of thread locals in AlternativeProperty (use references instead)
Show how to get streaming behavior with iterator and how to stream XML.
Pass xml instruction via XML instead of tag metadata.
  • Loading branch information
zapov committed Apr 22, 2022
1 parent 2b65f3b commit 4bb7cce
Show file tree
Hide file tree
Showing 35 changed files with 529 additions and 137 deletions.
7 changes: 0 additions & 7 deletions Advanced/CsvStreaming/Readme.md

This file was deleted.

7 changes: 4 additions & 3 deletions Advanced/README.md
Original file line number Diff line number Diff line change
Expand Up @@ -44,11 +44,12 @@ Consuming embedded CSV or Excel table via Power Query (Requires Excel 2010+)

[template](PowerQuery/template/PowerQuery.xlsx?raw=true) - [result](PowerQuery/result.xlsx?raw=true)

### [CSV streaming](CsvStreaming/Readme.md)
### [CSV streaming](Streaming/Readme.md)

Stream CSV while processing to support huge exports
Stream CSV/XML while processing to support huge exports

[template](CsvStreaming/template/input.csv) - [result](CsvStreaming/result.csv)
[csv template](Streaming/template/input.csv) - [result](Streaming/result.csv)
[xml template](Streaming/template/input.xml) - [result](Streaming/result.xml)

### [Various JSON examples](TemplaterServer/Readme.md)

Expand Down
9 changes: 9 additions & 0 deletions Advanced/Streaming/Readme.md
Original file line number Diff line number Diff line change
@@ -0,0 +1,9 @@
## Streaming of large documents

Streaming in Templater is supported out of the box if streaming type is used (ResultSet/Iterator/Enumerator).
Alternatively streaming can be simulated manually by multiple calls to process API.

Both methods allows Templater to flush the content of populated stream and reuse memory in next call to process API.

Streaming can be done only up to row without tags. This means that first non-streaming tags should be processed (if there are any)
and then streaming tags can be processed which will perform flushing.
Original file line number Diff line number Diff line change
Expand Up @@ -8,8 +8,8 @@
<ProjectGuid>{5BB2AABB-A28F-404F-8C37-DBE122E893F5}</ProjectGuid>
<OutputType>Exe</OutputType>
<AppDesignerFolder>Properties</AppDesignerFolder>
<RootNamespace>CsvStreaming</RootNamespace>
<AssemblyName>CsvStreaming</AssemblyName>
<RootNamespace>Streaming</RootNamespace>
<AssemblyName>Streaming</AssemblyName>
<TargetFrameworkVersion>v4.0</TargetFrameworkVersion>
<TargetFrameworkProfile>Client</TargetFrameworkProfile>
<FileAlignment>512</FileAlignment>
Expand Down Expand Up @@ -39,7 +39,7 @@
<HintPath>..\..\packages\DotNetZip.1.13.0\lib\net40\DotNetZip.dll</HintPath>
<Private>True</Private>
</Reference>
<Reference Include="NGS.Templater">
<Reference Include="NGS.Templater">
<HintPath>..\..\packages\Templater.7.0.0\lib\Net40\NGS.Templater.dll</HintPath>
<SpecificVersion>False</SpecificVersion>
</Reference>
Expand All @@ -65,6 +65,11 @@
<CopyToOutputDirectory>Always</CopyToOutputDirectory>
</None>
</ItemGroup>
<ItemGroup>
<None Include="template\input.xml">
<CopyToOutputDirectory>Always</CopyToOutputDirectory>
</None>
</ItemGroup>
<Import Project="$(MSBuildToolsPath)\Microsoft.CSharp.targets" />
<Import Project="$(SolutionDir)\.nuget\NuGet.targets" Condition="Exists('$(SolutionDir)\.nuget\NuGet.targets')" />
<Target Name="EnsureNuGetPackageBuildImports" BeforeTargets="PrepareForBuild">
Expand Down
File renamed without changes.
4 changes: 2 additions & 2 deletions Advanced/CsvStreaming/pom.xml → Advanced/Streaming/pom.xml
Original file line number Diff line number Diff line change
Expand Up @@ -2,10 +2,10 @@
xsi:schemaLocation="http://maven.apache.org/POM/4.0.0 http://maven.apache.org/maven-v4_0_0.xsd">
<modelVersion>4.0.0</modelVersion>
<groupId>hr.ngs.templater.example</groupId>
<artifactId>csv-streaming-example</artifactId>
<artifactId>streaming-example</artifactId>
<packaging>jar</packaging>
<version>7.0.0</version>
<name>CSV streaming</name>
<name>Streaming</name>
<url>https://github.com/ngs-doo/TemplaterExamples</url>

<properties>
Expand Down
File renamed without changes.
165 changes: 165 additions & 0 deletions Advanced/Streaming/result.xml
Original file line number Diff line number Diff line change
@@ -0,0 +1,165 @@
<?xml version="1.0" encoding="utf-8"?><root>
<filter date="All" user="All"/>
<items>
<data id="1000000" status="">
<amount>260</amount>
<date>27. 07. 2019.</date>
<created on="2019-07-27 13:22:55.117"/>
<reference>reference0</reference>
<branch>branch0</branch>
<verified by="suzane" on="2019-07-27 13:22:55.117"/>

</data><data id="1000001" status="APPROVED">
<amount>260</amount>
<date>27. 07. 2019.</date>
<created by="" on="2019-07-27 13:22:55.117"/>
<reference>reference1</reference>
<branch>branch1</branch>
<verified by="eric" on="2019-07-27 13:22:55.117"/>

</data><data id="1000002" status="">
<amount>260</amount>
<date>27. 07. 2019.</date>
<created by="rick" on="2019-07-27 13:22:55.117"/>
<reference>reference2</reference>
<branch>branch2</branch>
<verified by="mick" on="2019-07-27 13:22:55.117"/>
-
</data><data id="1000003" status="APPROVED">
<amount>260</amount>
<date>27. 07. 2019.</date>
<created by="marty" on="2019-07-27 13:22:55.117"/>
<reference>reference3</reference>
<branch>branch3</branch>
<verified by="admin" on="2019-07-27 13:22:55.117"/>
...
</data><data id="1000004" status="">
<amount>261</amount>
<date>27. 07. 2019.</date>
<created by="suzane" on="2019-07-27 13:22:55.117"/>
<reference>reference4</reference>
<branch>branch4</branch>
<verified on="2019-07-27 13:22:55.117"/>
IMPORTANT
</data><data id="1000005" status="APPROVED">
<amount>261</amount>
<date>27. 07. 2019.</date>
<created by="eric" on="2019-07-27 13:22:55.117"/>
<reference>reference5</reference>
<branch>branch5</branch>
<verified by="" on="2019-07-27 13:22:55.117"/>
REMINDER
</data><data id="1000006" status="VERIFIED">
<amount>261</amount>
<date>27. 07. 2019.</date>
<created by="mick" on="2019-07-27 13:22:55.117"/>
<reference>reference6</reference>
<branch>branch6</branch>
<verified by="rick" on="2019-07-27 13:22:55.117"/>
something to look "into later
</data><data id="1000007" status="CANCELED">
<amount>261</amount>
<date>27. 07. 2019.</date>
<created by="admin" on="2019-07-27 13:22:55.117"/>
<reference>reference7</reference>
<branch>branch7</branch>
<verified by="marty" on="2019-07-27 13:22:55.117"/>
special" char,
</data><data id="1000008" status="">
<amount>261</amount>
<date>27. 07. 2019.</date>
<created on="2019-07-27 13:22:55.117"/>
<reference>reference8</reference>
<branch>branch8</branch>
<verified by="suzane" on="2019-07-27 13:22:55.117"/>

</data><data id="1000009" status="APPROVED">
<amount>261</amount>
<date>27. 07. 2019.</date>
<created by="" on="2019-07-27 13:22:55.117"/>
<reference>reference9</reference>
<branch>branch9</branch>
<verified by="eric" on="2019-07-27 13:22:55.117"/>

</data><data id="1000010" status="">
<amount>261</amount>
<date>27. 07. 2019.</date>
<created by="rick" on="2019-07-27 13:22:55.117"/>
<reference>reference10</reference>
<branch>branch10</branch>
<verified by="mick" on="2019-07-27 13:22:55.117"/>
-
</data><data id="1000011" status="APPROVED">
<amount>261</amount>
<date>27. 07. 2019.</date>
<created by="marty" on="2019-07-27 13:22:55.117"/>
<reference>reference11</reference>
<branch>branch11</branch>
<verified by="admin" on="2019-07-27 13:22:55.117"/>
...
</data><data id="1000012" status="">
<amount>262</amount>
<date>27. 07. 2019.</date>
<created by="suzane" on="2019-07-27 13:22:55.117"/>
<reference>reference12</reference>
<branch>branch12</branch>
<verified on="2019-07-27 13:22:55.117"/>
IMPORTANT
</data><data id="1000013" status="APPROVED">
<amount>262</amount>
<date>27. 07. 2019.</date>
<created by="eric" on="2019-07-27 13:22:55.117"/>
<reference>reference13</reference>
<branch>branch13</branch>
<verified by="" on="2019-07-27 13:22:55.117"/>
REMINDER
</data><data id="1000014" status="VERIFIED">
<amount>262</amount>
<date>27. 07. 2019.</date>
<created by="mick" on="2019-07-27 13:22:55.117"/>
<reference>reference14</reference>
<branch>branch14</branch>
<verified by="rick" on="2019-07-27 13:22:55.117"/>
something to look "into later
</data><data id="1000015" status="CANCELED">
<amount>262</amount>
<date>27. 07. 2019.</date>
<created by="admin" on="2019-07-27 13:22:55.117"/>
<reference>reference15</reference>
<branch>branch15</branch>
<verified by="marty" on="2019-07-27 13:22:55.117"/>
special" char,
</data><data id="1000016" status="">
<amount>262</amount>
<date>27. 07. 2019.</date>
<created on="2019-07-27 13:22:55.117"/>
<reference>reference16</reference>
<branch>branch16</branch>
<verified by="suzane" on="2019-07-27 13:22:55.117"/>

</data><data id="1000017" status="APPROVED">
<amount>262</amount>
<date>27. 07. 2019.</date>
<created by="" on="2019-07-27 13:22:55.117"/>
<reference>reference17</reference>
<branch>branch17</branch>
<verified by="eric" on="2019-07-27 13:22:55.117"/>

</data><data id="1000018" status="">
<amount>262</amount>
<date>27. 07. 2019.</date>
<created by="rick" on="2019-07-27 13:22:55.117"/>
<reference>reference18</reference>
<branch>branch18</branch>
<verified by="mick" on="2019-07-27 13:22:55.117"/>
-
</data><data id="1000019" status="APPROVED">
<amount>262</amount>
<date>27. 07. 2019.</date>
<created by="marty" on="2019-07-27 13:22:55.117"/>
<reference>reference19</reference>
<branch>branch19</branch>
<verified by="admin" on="2019-07-27 13:22:55.117"/>
...
</data>
</items></root>
Original file line number Diff line number Diff line change
@@ -1,4 +1,5 @@
using System;
using System.Collections;
using System.Collections.Generic;
using System.Data;
using System.Diagnostics;
Expand All @@ -8,7 +9,7 @@
using Ionic.Zip;
using NGS.Templater;

namespace CsvStreaming
namespace Streaming
{
public class Program
{
Expand Down Expand Up @@ -45,7 +46,7 @@ struct StreamingRow
public string verifiedBy;
public DateTime verifiedOn;

public StreamingRow(DataTableReader reader)
public StreamingRow(IDataReader reader)
{
id = reader.GetInt32(0);
amount = reader.GetDecimal(1);
Expand All @@ -59,6 +60,22 @@ public StreamingRow(DataTableReader reader)
verifiedBy = reader.IsDBNull(9) ? null : reader.GetString(9);
verifiedOn = reader.GetDateTime(10);
}

public class ReaderIterator : IEnumerator<StreamingRow>
{
private readonly IDataReader Reader;

public ReaderIterator(IDataReader reader)
{
this.Reader = reader;
}

public StreamingRow Current { get { return new StreamingRow(Reader); } }
object IEnumerator.Current { get { return Current; } }
public bool MoveNext() { return Reader.Read(); }
public void Reset() { }
public void Dispose() { }
}
}

public static void Main(string[] args)
Expand Down Expand Up @@ -97,42 +114,77 @@ public static void Main(string[] args)
startTimestamp.AddMinutes(i)
);
}
var reader = table.CreateDataReader();
var config = Configuration.Builder.Include(Quoter);
var reader1 = table.CreateDataReader();
var reader2 = table.CreateDataReader();
var reader3 = table.CreateDataReader();
var csvConfig = Configuration.Builder.Include(Quoter); //we need quoting as we are simulating CSV
var xmlConfig = Configuration.Builder; //we don't need quoting as XML is natively supported
//if we are using a culture which has comma as decimal separator, change the output to dot
//we could apply this always, but it adds a bit of overhead, so let's apply it conditionally
if (Thread.CurrentThread.CurrentCulture.NumberFormat.NumberDecimalSeparator.Contains(","))
config.Include(NumberAsDot);
{
csvConfig.Include(NumberAsDot);
xmlConfig.Include(NumberAsDot);
}
csvConfig.Streaming(50000);//by default streaming is 16k, lets leave the default for xml
var csvFactory = csvConfig.Build();
var xmlFactory = xmlConfig.Build();
//for example purposes we will stream it a zip file
using (var zip = new ZipOutputStream("output.zip"))
{
zip.PutNextEntry("output.csv");
using (var doc = config.Build().Open(File.OpenRead("template/input.csv"), "csv", zip))
zip.PutNextEntry("manual.csv");
var sw = Stopwatch.StartNew();
ManualStreaming(reader1, csvFactory, zip);
Console.WriteLine("manual csv took: " + sw.ElapsedMilliseconds);
zip.PutNextEntry("automatic.csv");
sw = Stopwatch.StartNew();
AutomaticStreaming(reader2, csvFactory, "csv", zip);
Console.WriteLine("automatic csv took: " + sw.ElapsedMilliseconds);
zip.PutNextEntry("data.xml");
sw = Stopwatch.StartNew();
AutomaticStreaming(reader3, xmlFactory, "xml", zip);
Console.WriteLine("automatic xml took: " + sw.ElapsedMilliseconds);
}
Process.Start(new ProcessStartInfo("output.zip") { UseShellExecute = true });
}

private static void ManualStreaming(IDataReader reader, IDocumentFactory factory, ZipOutputStream zip)
{
using (var doc = factory.Open(File.OpenRead("template/input.csv"), "csv", zip))
{
//streaming processing assumes we have only a single collection, which means we first need to process all other tags
doc.Process(new { filter = new { date = "All", user = "All" } });
//to do a streaming processing we need to process collection in chunks
var chunk = new List<StreamingRow>(50000);
var hasData = reader.Read();
while (hasData)
{
//streaming processing assumes we have only a single collection, which means we first need to process all other tags
doc.Process(new { filter = new { date = "All", user = "All" } });
//to do a streaming processing we need to process collection in chunks
var chunk = new List<StreamingRow>(50000);
var hasData = reader.Read();
while (hasData)
//one way of doing streaming is first duplicating the template row (context)
doc.Templater.Resize(doc.Templater.Tags, 2);
//and then process that row with all known data
//this way we will have additional row to process (or remove) later
do
{
//one way of doing streaming is first duplicating the template row (context)
doc.Templater.Resize(doc.Templater.Tags, 2);
//and then process that row with all known data
//this way we will have additional row to process (or remove) later
do
{
chunk.Add(new StreamingRow(reader));
hasData = reader.Read();
} while (chunk.Count < 50000 && hasData);
doc.Process(new { data = chunk });
chunk.Clear();
}
//remove remaining rows
doc.Templater.Resize(doc.Templater.Tags, 0);
chunk.Add(new StreamingRow(reader));
hasData = reader.Read();
} while (chunk.Count < 50000 && hasData);
doc.Process(new { data = chunk });
chunk.Clear();
}
//remove remaining rows
doc.Templater.Resize(doc.Templater.Tags, 0);
}
}

private static void AutomaticStreaming(IDataReader reader, IDocumentFactory factory, string extension, ZipOutputStream zip)
{
using (var doc = factory.Open(File.OpenRead("template/input." + extension), extension, zip))
{
//we still want to make sure all non collection tags are processed first (or they are at the end of document)
doc.Process(new { filter = new { date = "All", user = "All" } });
//for streaming lets just pass enumerator for processing
doc.Process(new { data = new StreamingRow.ReaderIterator(reader) });
}
Process.Start(new ProcessStartInfo("output.zip") { UseShellExecute = true });
}
}
}
Loading

0 comments on commit 4bb7cce

Please sign in to comment.