From 05841da0b0b4668c19b7560a8dd358ac7e1f7cbd Mon Sep 17 00:00:00 2001 From: i2van Date: Sat, 26 Jun 2021 00:01:52 +0300 Subject: [PATCH] Add encoding auto-detection. (#4) Bugfixes. Co-authored-by: Ivan Ivon --- Deploy/buildlpx.cmd | 2 +- README.md | 8 +- .../CodeGen/CsvCSharpCodeGenerator.cs | 3 +- Src/CsvLINQPadDriver/CodeGen/CsvTableBase.cs | 5 +- .../CodeGen/CsvTableEnumerable.cs | 3 +- .../CodeGen/CsvTableFactory.cs | 5 +- Src/CsvLINQPadDriver/CodeGen/CsvTableList.cs | 3 +- Src/CsvLINQPadDriver/ConnectionDialog.xaml | 119 +++++++++++------- Src/CsvLINQPadDriver/ConnectionDialog.xaml.cs | 36 ++++-- .../CsvDataContextDriverProperties.cs | 6 + Src/CsvLINQPadDriver/CsvLINQPadDriver.csproj | 15 +-- .../DataModel/CsvDataModelGenerator.cs | 5 +- Src/CsvLINQPadDriver/Directory.Build.props | 6 +- .../Extensions/CodeGenExtensions.cs | 39 +++--- .../Extensions/FileExtensions.cs | 88 ++++++++++--- Src/CsvLINQPadDriver/Extensions/Functions.cs | 8 ++ .../ICsvDataContextDriverProperties.cs | 5 + Src/CsvLINQPadDriver/SchemaBuilder.cs | 8 +- Src/CsvLINQPadDriver/Wpf/ControlExtensions.cs | 65 ++++++++++ Src/CsvLINQPadDriver/Wpf/DialogExtensions.cs | 4 +- Src/CsvLINQPadDriver/app.manifest | 2 +- Src/LPRun/LPRun.csproj | 4 +- .../CsvLINQPadDriverTest.csproj | 7 +- Tests/CsvLINQPadDriverTest/LPRunTests.cs | 32 +++-- 24 files changed, 342 insertions(+), 136 deletions(-) create mode 100644 Src/CsvLINQPadDriver/Extensions/Functions.cs create mode 100644 Src/CsvLINQPadDriver/Wpf/ControlExtensions.cs diff --git a/Deploy/buildlpx.cmd b/Deploy/buildlpx.cmd index 14a3d2f..bb2412e 100644 --- a/Deploy/buildlpx.cmd +++ b/Deploy/buildlpx.cmd @@ -1,4 +1,4 @@ -@set version=6.12.0 +@set version=6.13.0 @set zip="%ProgramFiles%\7-Zip\7z.exe" @set output="CsvLINQPadDriver.%version%.lpx6" diff --git a/README.md b/README.md index 48518ea..cf4c0ab 100644 --- a/README.md +++ b/README.md @@ -159,15 +159,16 @@ CSV files connection can be added to LINQPad 6 the same way as any other connect * `c:\Books\Books?.csv`: `Books.csv`, `Books1.csv`, etc. files in folder `c:\Books` * `c:\Books\*.csv`: all `*.csv` files in folder `c:\Books` * `c:\Books\**.csv`: all `*.csv` files in folder `c:\Books` and its sub-folders. -* Order files by: specifies files sort order. Affects similar files order. -* No BOM encoding: specifies encoding for files without [BOM](https://en.wikipedia.org/wiki/Byte_order_mark). `UTF-8` is default. +* Order files by: specify files sort order. Affects similar files order. +* Fallback encoding: specify encoding to use if file encoding could not be detected, e.g. due to missing [BOM](https://en.wikipedia.org/wiki/Byte_order_mark). `UTF-8` is default. +* Auto-detect file encodings: try to detect file encodings. * Validate file paths: check if file paths are valid. * Ignore files with invalid format: files with content which does not resemble CSV will be ignored. ### Format ### * CSV separator: character used to separate columns in files. Can be `,`, `\t`, etc. Auto-detected if empty. -* Use CsvHelper library separator auto-detection: use CsvHelper library separator auto-detection instead of internal one. +* Use [CsvHelper](https://joshclose.github.io/CsvHelper) library separator auto-detection: use CsvHelper library separator auto-detection instead of internal one. * Ignore bad data: ignore malformed CSV data. * Allow comments: lines starting with `#` will be ignored. @@ -402,6 +403,7 @@ TimeSpan? ToTimeSpan( * [Moq](https://github.com/moq/moq4) * [NUnit](https://github.com/nunit/nunit) * [Windows API Code Pack](https://github.com/contre/Windows-API-Code-Pack-1.1) +* [UnicodeCharsetDetector](https://github.com/i2van/UnicodeCharsetDetector) ## License ## diff --git a/Src/CsvLINQPadDriver/CodeGen/CsvCSharpCodeGenerator.cs b/Src/CsvLINQPadDriver/CodeGen/CsvCSharpCodeGenerator.cs index 9c26305..71d5ba2 100644 --- a/Src/CsvLINQPadDriver/CodeGen/CsvCSharpCodeGenerator.cs +++ b/Src/CsvLINQPadDriver/CodeGen/CsvCSharpCodeGenerator.cs @@ -68,6 +68,7 @@ public class {_contextTypeName} : {typeof(CsvDataContextBase).GetCodeTypeClassNa {typeof(NoBomEncoding).GetCodeTypeClassName()}.{_properties.NoBomEncoding}, {GetBoolConst(_properties.AllowComments)}, {GetBoolConst(_properties.IgnoreBadData)}, + {GetBoolConst(_properties.AutoDetectEncoding)}, {table.FilePath.AsValidCSharpCode()}, new {typeof(CsvColumnInfoList<>).GetCodeTypeClassName(GetClassName(table))} {{ {string.Join(string.Empty, table.Columns.Select(c => $@"{{ {c.Index}, x => x.{c.CodeName} }}, "))} @@ -81,7 +82,7 @@ public class {_contextTypeName} : {typeof(CsvDataContextBase).GetCodeTypeClassNa }} }} // context class - // Data types {string.Join(Environment.NewLine, groups.Select(grouping => grouping.First().Code))} // data types + // Data types {string.Join(Environment.NewLine, groups.Select(grouping => grouping.OrderByDescending(code => code.Code.Length).First().Code))} // data types }} // namespace ", groups); diff --git a/Src/CsvLINQPadDriver/CodeGen/CsvTableBase.cs b/Src/CsvLINQPadDriver/CodeGen/CsvTableBase.cs index b43a8b1..af21ec7 100644 --- a/Src/CsvLINQPadDriver/CodeGen/CsvTableBase.cs +++ b/Src/CsvLINQPadDriver/CodeGen/CsvTableBase.cs @@ -25,6 +25,7 @@ public abstract class CsvTableBase : CsvTableBase, IEnumerable private readonly NoBomEncoding _noBomEncoding; private readonly bool _allowComments; private readonly bool _ignoreBadData; + private readonly bool _autoDetectEncoding; protected readonly string FilePath; @@ -34,6 +35,7 @@ protected CsvTableBase( NoBomEncoding noBomEncoding, bool allowComments, bool ignoreBadData, + bool autoDetectEncoding, string filePath, IEnumerable propertiesInfo, Action relationsInit) @@ -43,6 +45,7 @@ protected CsvTableBase( _noBomEncoding = noBomEncoding; _allowComments = allowComments; _ignoreBadData = ignoreBadData; + _autoDetectEncoding = autoDetectEncoding; FilePath = filePath; @@ -50,7 +53,7 @@ protected CsvTableBase( } protected IEnumerable ReadData() => - FilePath.CsvReadRows(_csvSeparator, IsStringInternEnabled, _noBomEncoding, _allowComments, _ignoreBadData, _cachedCsvRowMappingBase!); + FilePath.CsvReadRows(_csvSeparator, IsStringInternEnabled, _noBomEncoding, _allowComments, _ignoreBadData, _autoDetectEncoding, _cachedCsvRowMappingBase!); // ReSharper disable once UnusedMember.Global public abstract IEnumerable WhereIndexed(Func getProperty, string propertyName, params string[] values); diff --git a/Src/CsvLINQPadDriver/CodeGen/CsvTableEnumerable.cs b/Src/CsvLINQPadDriver/CodeGen/CsvTableEnumerable.cs index b99d3ab..4ae0801 100644 --- a/Src/CsvLINQPadDriver/CodeGen/CsvTableEnumerable.cs +++ b/Src/CsvLINQPadDriver/CodeGen/CsvTableEnumerable.cs @@ -13,10 +13,11 @@ public CsvTableEnumerable( NoBomEncoding noBomEncoding, bool allowComments, bool ignoreBadData, + bool autoDetectEncoding, string filePath, IEnumerable propertiesInfo, Action relationsInit) - : base(isStringInternEnabled, csvSeparator, noBomEncoding, allowComments, ignoreBadData, filePath, propertiesInfo, relationsInit) + : base(isStringInternEnabled, csvSeparator, noBomEncoding, allowComments, ignoreBadData, autoDetectEncoding, filePath, propertiesInfo, relationsInit) { } diff --git a/Src/CsvLINQPadDriver/CodeGen/CsvTableFactory.cs b/Src/CsvLINQPadDriver/CodeGen/CsvTableFactory.cs index 5fecea8..fc60119 100644 --- a/Src/CsvLINQPadDriver/CodeGen/CsvTableFactory.cs +++ b/Src/CsvLINQPadDriver/CodeGen/CsvTableFactory.cs @@ -13,12 +13,13 @@ public static CsvTableBase CreateTable( NoBomEncoding noBomEncoding, bool allowComments, bool ignoreBadData, + bool autoDetectEncoding, string filePath, IEnumerable propertiesInfo, Action relationsInit) where TRow : ICsvRowBase, new() => isCacheEnabled - ? new CsvTableList(isStringInternEnabled, csvSeparator, noBomEncoding, allowComments, ignoreBadData, filePath, propertiesInfo, relationsInit) - : new CsvTableEnumerable(isStringInternEnabled, csvSeparator, noBomEncoding, allowComments, ignoreBadData, filePath, propertiesInfo, relationsInit); + ? new CsvTableList(isStringInternEnabled, csvSeparator, noBomEncoding, allowComments, ignoreBadData, autoDetectEncoding, filePath, propertiesInfo, relationsInit) + : new CsvTableEnumerable(isStringInternEnabled, csvSeparator, noBomEncoding, allowComments, ignoreBadData, autoDetectEncoding, filePath, propertiesInfo, relationsInit); } } \ No newline at end of file diff --git a/Src/CsvLINQPadDriver/CodeGen/CsvTableList.cs b/Src/CsvLINQPadDriver/CodeGen/CsvTableList.cs index 9345a57..3eb384c 100644 --- a/Src/CsvLINQPadDriver/CodeGen/CsvTableList.cs +++ b/Src/CsvLINQPadDriver/CodeGen/CsvTableList.cs @@ -18,10 +18,11 @@ public CsvTableList( NoBomEncoding noBomEncoding, bool allowComments, bool ignoreBadData, + bool autoDetectEncoding, string filePath, IEnumerable propertiesInfo, Action relationsInit) - : base(isStringInternEnabled, csvSeparator, noBomEncoding, allowComments, ignoreBadData, filePath, propertiesInfo, relationsInit) => + : base(isStringInternEnabled, csvSeparator, noBomEncoding, allowComments, ignoreBadData, autoDetectEncoding, filePath, propertiesInfo, relationsInit) => _dataCache = new Lazy>(() => ReadData().Cache($"{typeof(TRow).Name}:{FilePath}")); private IList DataCache => diff --git a/Src/CsvLINQPadDriver/ConnectionDialog.xaml b/Src/CsvLINQPadDriver/ConnectionDialog.xaml index ebd9aa0..399f355 100644 --- a/Src/CsvLINQPadDriver/ConnectionDialog.xaml +++ b/Src/CsvLINQPadDriver/ConnectionDialog.xaml @@ -20,17 +20,20 @@ - - + + + + + + - @@ -102,14 +108,18 @@ Command="{x:Static this:ConnectionDialog.PasteFromClipboardForFolderAndProceedCommand}" Executed="PasteFromClipboardForFolderAndProceedCommandBinding_OnExecuted" CanExecute="ClipboardCommandBinding_OnCanExecute"/> + + Command="{x:Static this:ConnectionDialog.WrapFilesTextCommand}" + Executed="WrapFilesTextCommand_OnExecuted" + CanExecute="CommandBinding_OnCanAlwaysExecute"/> - + - - - - - - - - - - - - - - - - - - - - - + + + + + + + + + + Add files + + + Add folder + + + Add folder and its sub-folders + + + Browse + + + Clear + - - - + + + + + + + + + + - - + + + @@ -246,20 +277,22 @@ SelectedValue="{Binding FilesOrderBy}" ItemsSource="{Binding Source={StaticResource FilesOrderByData}}"/> -