diff --git a/Deploy/buildlpx.cmd b/Deploy/buildlpx.cmd
index 21a659b..bd0c871 100644
--- a/Deploy/buildlpx.cmd
+++ b/Deploy/buildlpx.cmd
@@ -1,6 +1,6 @@
@echo off
-set version=6.15.1
+set version=6.16.0
set fileName=CsvLINQPadDriver.%version%.lpx
set zip="%ProgramFiles%\7-Zip\7z.exe"
diff --git a/README.md b/README.md
index e145153..f3946f1 100644
--- a/README.md
+++ b/README.md
@@ -174,8 +174,8 @@ CSV files connection can be added to LINQPad 6/5 the same way as any other conne
* `c:\Books\Books?.csv`: `Books.csv`, `Books1.csv`, etc. files in folder `c:\Books`
* `c:\Books\*.csv`: all `*.csv` files in folder `c:\Books`
* `c:\Books\**.csv`: all `*.csv` files in folder `c:\Books` and its sub-folders.
-* Order files by: specify files sort order. Affects similar files order.
-* Fallback encoding: specify encoding to use if file encoding could not be detected, e.g. due to missing [BOM](https://en.wikipedia.org/wiki/Byte_order_mark). `UTF-8` is default.
+* Order files by: files sort order. Affects similar files order.
+* Fallback encoding: [encoding](https://docs.microsoft.com/en-us/windows/win32/intl/code-page-identifiers) to use if file encoding could not be detected. `UTF-8` is default.
* Auto-detect file encodings: try to detect file encodings.
* Validate file paths: check if file paths are valid.
* Ignore files with invalid format: files with content which does not resemble CSV will be ignored.
diff --git a/Src/CsvLINQPadDriver/ConnectionDialog.xaml b/Src/CsvLINQPadDriver/ConnectionDialog.xaml
index ee14cbb..5aab72f 100644
--- a/Src/CsvLINQPadDriver/ConnectionDialog.xaml
+++ b/Src/CsvLINQPadDriver/ConnectionDialog.xaml
@@ -327,11 +327,11 @@
-
+
diff --git a/Src/CsvLINQPadDriver/Directory.Build.props b/Src/CsvLINQPadDriver/Directory.Build.props
index 583bd27..ecb0987 100644
--- a/Src/CsvLINQPadDriver/Directory.Build.props
+++ b/Src/CsvLINQPadDriver/Directory.Build.props
@@ -1,7 +1,7 @@
- 6.15.1
- Updated dependencies.
+ 6.16.0
+ Added more encodings.
diff --git a/Src/CsvLINQPadDriver/Extensions/FileExtensions.cs b/Src/CsvLINQPadDriver/Extensions/FileExtensions.cs
index c526c51..2118993 100644
--- a/Src/CsvLINQPadDriver/Extensions/FileExtensions.cs
+++ b/Src/CsvLINQPadDriver/Extensions/FileExtensions.cs
@@ -49,7 +49,7 @@ private static
#endif
StringInternCache = null!;
- private static readonly Lazy> NoBomEncodings = new(CalculateNoBomEncodings);
+ private static readonly Dictionary NoBomEncodings = new();
private record SupportedFileType(FileType FileType, string Extension, string Description)
{
@@ -82,6 +82,11 @@ public string Mask
public static readonly string DefaultMask = GetMask(DefaultFileType);
public static readonly string DefaultRecursiveMask = GetMask(DefaultFileType, true);
+#if NETCOREAPP
+ static FileExtensions() =>
+ Encoding.RegisterProvider(CodePagesEncodingProvider.Instance);
+#endif
+
public static string GetMask(this FileType fileType, bool recursive = false) =>
$"{(recursive ? RecursiveMaskMarker : "*")}.{fileType.GetSupportedFileType().Mask}";
@@ -369,7 +374,7 @@ private static CsvParser CreateCsvParser(
csvConfiguration.Delimiter = csvSeparator?.ToString() ?? csvConfiguration.Delimiter;
csvConfiguration.BadDataFound = ignoreBadData ? null : csvConfiguration.BadDataFound;
- var encoding = (autoDetectEncoding ? DetectEncoding(fileName) : null) ?? NoBomEncodings.Value[noBomEncoding];
+ var encoding = (autoDetectEncoding ? DetectEncoding(fileName) : null) ?? GetFallbackEncoding(noBomEncoding);
return new CsvParser(new StreamReader(fileName, encoding, !autoDetectEncoding, bufferSize / sizeof(char)), csvConfiguration);
}
@@ -450,32 +455,43 @@ private static IEnumerable CsvReadRows(
}
}
- [DllImport("kernel32.dll")]
- private static extern int GetSystemDefaultLCID();
-
- [DllImport("kernel32.dll")]
- private static extern int GetUserDefaultLCID();
-
- private static IReadOnlyDictionary CalculateNoBomEncodings()
+ private static Encoding GetFallbackEncoding(NoBomEncoding noBomEncoding)
{
-#if NETCOREAPP
- Encoding.RegisterProvider(CodePagesEncodingProvider.Instance);
-#endif
-
- return new Dictionary
+ if (!NoBomEncodings.TryGetValue(noBomEncoding, out var encoding))
{
- [NoBomEncoding.UTF8] = Encoding.UTF8,
- [NoBomEncoding.Unicode] = Encoding.Unicode,
- [NoBomEncoding.BigEndianUnicode] = Encoding.BigEndianUnicode,
- [NoBomEncoding.UTF32] = Encoding.UTF32,
- [NoBomEncoding.BigEndianUTF32] = new UTF32Encoding(true, true),
- [NoBomEncoding.ASCII] = Encoding.ASCII,
- [NoBomEncoding.SystemCodePage] = GetCodePageEncoding(false),
- [NoBomEncoding.UserCodePage] = GetCodePageEncoding(true)
- };
+ NoBomEncodings.Add(noBomEncoding, encoding = GetEncoding());
+ }
- static Encoding GetCodePageEncoding(bool user) =>
- Encoding.GetEncoding(CultureInfo.GetCultureInfo(user ? GetUserDefaultLCID() : GetSystemDefaultLCID()).TextInfo.ANSICodePage);
+ return encoding!;
+
+ Encoding GetEncoding()
+ {
+ return noBomEncoding switch
+ {
+ NoBomEncoding.UTF8 => Encoding.UTF8,
+ NoBomEncoding.Unicode => Encoding.Unicode,
+ NoBomEncoding.BigEndianUnicode => Encoding.BigEndianUnicode,
+ NoBomEncoding.UTF32 => Encoding.UTF32,
+ NoBomEncoding.BigEndianUTF32 => new UTF32Encoding(true, true),
+ NoBomEncoding.UTF7 => Encoding.UTF7,
+ NoBomEncoding.ASCII => Encoding.ASCII,
+ NoBomEncoding.SystemCodePage => GetCodePageEncoding(false),
+ NoBomEncoding.UserCodePage => GetCodePageEncoding(true),
+ _ => Encoding.GetEncoding(FromCodePage())
+ };
+
+ static Encoding GetCodePageEncoding(bool user) =>
+ Encoding.GetEncoding(CultureInfo.GetCultureInfo(user ? GetUserDefaultLCID() : GetSystemDefaultLCID()).TextInfo.ANSICodePage);
+
+ int FromCodePage() =>
+ Convert.ToInt32(noBomEncoding.ToString()[2..], CultureInfo.InvariantCulture);
+
+ [DllImport("kernel32.dll")]
+ static extern int GetSystemDefaultLCID();
+
+ [DllImport("kernel32.dll")]
+ static extern int GetUserDefaultLCID();
+ }
}
private static Encoding? DetectEncoding(string fileName)
diff --git a/Src/CsvLINQPadDriver/NoBomEncoding.cs b/Src/CsvLINQPadDriver/NoBomEncoding.cs
index de87c5c..2937872 100644
--- a/Src/CsvLINQPadDriver/NoBomEncoding.cs
+++ b/Src/CsvLINQPadDriver/NoBomEncoding.cs
@@ -1,23 +1,468 @@
using System.ComponentModel;
+// ReSharper disable UnusedMember.Global
+
namespace CsvLINQPadDriver
{
public enum NoBomEncoding
{
- [Description("UTF-8")]
+ [Description("UTF-8|65001")]
UTF8,
- [Description("UTF-16")]
+
+ [Description("UTF-16|1200")]
Unicode,
- [Description("UTF-16 Big Endian")]
+
+ [Description("UTF-16 Big Endian|1201")]
BigEndianUnicode,
- [Description("UTF-32")]
+
+ [Description("UTF-32|12000")]
UTF32,
- [Description("UTF-32 Big Endian")]
+
+ [Description("UTF-32 Big Endian|12001")]
BigEndianUTF32,
+
+ [Description("UTF-7|65000")]
+ UTF7,
+
+ [Description("ASCII|20127")]
ASCII,
+
[Description("System сode page")]
SystemCodePage,
+
[Description("User сode page")]
- UserCodePage
+ UserCodePage,
+
+ [Description("Arabic (ASMO 708)|708")]
+ Cp708,
+
+ [Description("Arabic (ASMO-449+)|709")]
+ Cp709,
+
+ [Description("Arabic (DOS)|720")]
+ Cp720,
+
+ [Description("Arabic (Mac)|10004")]
+ Cp10004,
+
+ [Description("Arabic (OEM)|864")]
+ Cp864,
+
+ [Description("Arabic (Windows)|1256")]
+ Cp1256,
+
+ [Description("Arabic - Transparent Arabic|710")]
+ Cp710,
+
+ [Description("Baltic (DOS)|775")]
+ Cp775,
+
+ [Description("Baltic (Windows)|1257")]
+ Cp1257,
+
+ [Description("Central European (DOS)|852")]
+ Cp852,
+
+ [Description("Central European (Mac)|10029")]
+ Cp10029,
+
+ [Description("Central European (Windows)|1250")]
+ Cp1250,
+
+ [Description("Chinese Simplified (EUC)|51936")]
+ Cp51936,
+
+ [Description("Chinese Simplified (GB18030)|54936")]
+ Cp54936,
+
+ [Description("Chinese Simplified (GB2312)|936")]
+ Cp936,
+
+ [Description("Chinese Simplified (GB2312-80)|20936")]
+ Cp20936,
+
+ [Description("Chinese Simplified (HZ)|52936")]
+ Cp52936,
+
+ [Description("Chinese Simplified (ISO 2022)|50227")]
+ Cp50227,
+
+ [Description("Chinese Simplified (Mac)|10008")]
+ Cp10008,
+
+ [Description("Chinese Traditional (Big5)|950")]
+ Cp950,
+
+ [Description("Chinese Traditional (CNS)|20000")]
+ Cp20000,
+
+ [Description("Chinese Traditional (Eten)|20002")]
+ Cp20002,
+
+ [Description("Chinese Traditional (Mac)|10002")]
+ Cp10002,
+
+ [Description("Croatian (Mac)|10082")]
+ Cp10082,
+
+ [Description("Cyrillic (DOS)|866")]
+ Cp866,
+
+ [Description("Cyrillic (KOI8-R)|20866")]
+ Cp20866,
+
+ [Description("Cyrillic (KOI8-U)|21866")]
+ Cp21866,
+
+ [Description("Cyrillic (Mac)|10007")]
+ Cp10007,
+
+ [Description("Cyrillic (OEM)|855")]
+ Cp855,
+
+ [Description("Cyrillic (Windows)|1251")]
+ Cp1251,
+
+ [Description("EBCDIC Japanese (Katakana) Extended|50930")]
+ Cp50930,
+
+ [Description("EBCDIC Japanese (Latin) Extended and Japanese|50939")]
+ Cp50939,
+
+ [Description("EBCDIC Korean Extended and Korean|50933")]
+ Cp50933,
+
+ [Description("EBCDIC Simplified Chinese|50936")]
+ Cp50936,
+
+ [Description("EBCDIC Simplified Chinese Extended|50935")]
+ Cp50935,
+
+ [Description("EBCDIC US-Canada and Japanese|50931")]
+ Cp50931,
+
+ [Description("EBCDIC US-Canada and Traditional Chinese|50937")]
+ Cp50937,
+
+ [Description("Europa 3|29001")]
+ Cp29001,
+
+ [Description("French Canadian (DOS)|863")]
+ Cp863,
+
+ [Description("Greek (DOS)|737")]
+ Cp737,
+
+ [Description("Greek (Mac)|10006")]
+ Cp10006,
+
+ [Description("Greek (Windows)|1253")]
+ Cp1253,
+
+ [Description("Greek Modern (DOS)|869")]
+ Cp869,
+
+ [Description("Hebrew (DOS)|862")]
+ Cp862,
+
+ [Description("Hebrew (ISO-Logical)|38598")]
+ Cp38598,
+
+ [Description("Hebrew (Mac)|10005")]
+ Cp10005,
+
+ [Description("Hebrew (Windows)|1255")]
+ Cp1255,
+
+ [Description("IA5 German (7-bit)|20106")]
+ Cp20106,
+
+ [Description("IA5 Norwegian (7-bit)|20108")]
+ Cp20108,
+
+ [Description("IA5 Swedish (7-bit)|20107")]
+ Cp20107,
+
+ [Description("IBM EBCDIC (Denmark-Norway-Euro)|1142")]
+ Cp1142,
+
+ [Description("IBM EBCDIC (Finland-Sweden-Euro)|1143")]
+ Cp1143,
+
+ [Description("IBM EBCDIC (France-Euro)|1147")]
+ Cp1147,
+
+ [Description("IBM EBCDIC (Germany-Euro)|1141")]
+ Cp1141,
+
+ [Description("IBM EBCDIC (Icelandic-Euro)|1149")]
+ Cp1149,
+
+ [Description("IBM EBCDIC (International-Euro)|1148")]
+ Cp1148,
+
+ [Description("IBM EBCDIC (Italy-Euro)|1144")]
+ Cp1144,
+
+ [Description("IBM EBCDIC (Spain-Euro)|1145")]
+ Cp1145,
+
+ [Description("IBM EBCDIC (UK-Euro)|1146")]
+ Cp1146,
+
+ [Description("IBM EBCDIC (US-Canada-Euro)|1140")]
+ Cp1140,
+
+ [Description("IBM EBCDIC Arabic|20420")]
+ Cp20420,
+
+ [Description("IBM EBCDIC Cyrillic Russian|20880")]
+ Cp20880,
+
+ [Description("IBM EBCDIC Cyrillic Serbian-Bulgarian|21025")]
+ Cp21025,
+
+ [Description("IBM EBCDIC Denmark-Norway|20277")]
+ Cp20277,
+
+ [Description("IBM EBCDIC Finland-Sweden|20278")]
+ Cp20278,
+
+ [Description("IBM EBCDIC France|20297")]
+ Cp20297,
+
+ [Description("IBM EBCDIC Germany|20273")]
+ Cp20273,
+
+ [Description("IBM EBCDIC Greek|20423")]
+ Cp20423,
+
+ [Description("IBM EBCDIC Greek Modern|875")]
+ Cp875,
+
+ [Description("IBM EBCDIC Hebrew|20424")]
+ Cp20424,
+
+ [Description("IBM EBCDIC Icelandic|20871")]
+ Cp20871,
+
+ [Description("IBM EBCDIC International|500")]
+ Cp500,
+
+ [Description("IBM EBCDIC Italy|20280")]
+ Cp20280,
+
+ [Description("IBM EBCDIC Japanese Katakana Extended|20290")]
+ Cp20290,
+
+ [Description("IBM EBCDIC Korean Extended|20833")]
+ Cp20833,
+
+ [Description("IBM EBCDIC Latin 1/Open System|1047")]
+ Cp1047,
+
+ [Description("IBM EBCDIC Latin 1/Open System (1047 + Euro)|20924")]
+ Cp20924,
+
+ [Description("IBM EBCDIC Latin America-Spain|20284")]
+ Cp20284,
+
+ [Description("IBM EBCDIC Multilingual Latin 2|870")]
+ Cp870,
+
+ [Description("IBM EBCDIC Thai|20838")]
+ Cp20838,
+
+ [Description("IBM EBCDIC Turkish|20905")]
+ Cp20905,
+
+ [Description("IBM EBCDIC Turkish (Latin 5)|1026")]
+ Cp1026,
+
+ [Description("IBM EBCDIC United Kingdom|20285")]
+ Cp20285,
+
+ [Description("IBM EBCDIC US-Canada|037")]
+ Cp037,
+
+ [Description("IBM5550 Taiwan|20003")]
+ Cp20003,
+
+ [Description("Icelandic (DOS)|861")]
+ Cp861,
+
+ [Description("Icelandic (Mac)|10079")]
+ Cp10079,
+
+ [Description("ISCII Assamese|57006")]
+ Cp57006,
+
+ [Description("ISCII Bangla|57003")]
+ Cp57003,
+
+ [Description("ISCII Devanagari|57002")]
+ Cp57002,
+
+ [Description("ISCII Gujarati|57010")]
+ Cp57010,
+
+ [Description("ISCII Kannada|57008")]
+ Cp57008,
+
+ [Description("ISCII Malayalam|57009")]
+ Cp57009,
+
+ [Description("ISCII Odia|57007")]
+ Cp57007,
+
+ [Description("ISCII Punjabi|57011")]
+ Cp57011,
+
+ [Description("ISCII Tamil|57004")]
+ Cp57004,
+
+ [Description("ISCII Telugu|57005")]
+ Cp57005,
+
+ [Description("ISO 6937 Non-Spacing Accent|20269")]
+ Cp20269,
+
+ [Description("ISO 8859-1 Latin 1|28591")]
+ Cp28591,
+
+ [Description("ISO 8859-2 Central European|28592")]
+ Cp28592,
+
+ [Description("ISO 8859-3 Latin 3|28593")]
+ Cp28593,
+
+ [Description("ISO 8859-4 Baltic|28594")]
+ Cp28594,
+
+ [Description("ISO 8859-5 Cyrillic|28595")]
+ Cp28595,
+
+ [Description("ISO 8859-6 Arabic|28596")]
+ Cp28596,
+
+ [Description("ISO 8859-7 Greek|28597")]
+ Cp28597,
+
+ [Description("ISO 8859-8 Hebrew|28598")]
+ Cp28598,
+
+ [Description("ISO 8859-9 Turkish|28599")]
+ Cp28599,
+
+ [Description("ISO 8859-13 Estonian|28603")]
+ Cp28603,
+
+ [Description("ISO 8859-15 Latin 9|28605")]
+ Cp28605,
+
+ [Description("Japanese (EUC)|51932")]
+ Cp51932,
+
+ [Description("Japanese (JIS 0208-1990 and 0212-1990)|20932")]
+ Cp20932,
+
+ [Description("Japanese (JIS)|50220")]
+ Cp50220,
+
+ [Description("Japanese (JIS-Allow 1 byte Kana - SO/SI)|50222")]
+ Cp50222,
+
+ [Description("Japanese (JIS-Allow 1 byte Kana)|50221")]
+ Cp50221,
+
+ [Description("Japanese (Mac)|10001")]
+ Cp10001,
+
+ [Description("Japanese (Shift-JIS)|932")]
+ Cp932,
+
+ [Description("Korean|50225")]
+ Cp50225,
+
+ [Description("Korean (EUC)|51949")]
+ Cp51949,
+
+ [Description("Korean (Johab)|1361")]
+ Cp1361,
+
+ [Description("Korean (Mac)|10003")]
+ Cp10003,
+
+ [Description("Korean (Unified Hangul Code)|949")]
+ Cp949,
+
+ [Description("Korean Wansung|20949")]
+ Cp20949,
+
+ [Description("Multilingual Latin 1 (OEM)|858")]
+ Cp858,
+
+ [Description("Nordic (DOS)|865")]
+ Cp865,
+
+ [Description("Portuguese (DOS)|860")]
+ Cp860,
+
+ [Description("Romanian (Mac)|10010")]
+ Cp10010,
+
+ [Description("T.61|20261")]
+ Cp20261,
+
+ [Description("TCA Taiwan|20001")]
+ Cp20001,
+
+ [Description("TeleText Taiwan|20004")]
+ Cp20004,
+
+ [Description("Thai (Mac)|10021")]
+ Cp10021,
+
+ [Description("Thai (Windows)|874")]
+ Cp874,
+
+ [Description("Traditional Chinese|50229")]
+ Cp50229,
+
+ [Description("Traditional Chinese (EUC)|51950")]
+ Cp51950,
+
+ [Description("Turkish (DOS)|857")]
+ Cp857,
+
+ [Description("Turkish (Mac)|10081")]
+ Cp10081,
+
+ [Description("Turkish (Windows)|1254")]
+ Cp1254,
+
+ [Description("Ukrainian (Mac)|10017")]
+ Cp10017,
+
+ [Description("United States (OEM)|437")]
+ Cp437,
+
+ [Description("Vietnamese (Windows)|1258")]
+ Cp1258,
+
+ [Description("Wang Taiwan|20005")]
+ Cp20005,
+
+ [Description("Western European (DOS)|850")]
+ Cp850,
+
+ [Description("Western European (IA5)|20105")]
+ Cp20105,
+
+ [Description("Western European (Mac)|10000")]
+ Cp10000,
+
+ [Description("Western European (Windows)|1252")]
+ Cp1252
}
}
diff --git a/Src/CsvLINQPadDriver/app.manifest b/Src/CsvLINQPadDriver/app.manifest
index 3338ef0..16f6bb2 100644
--- a/Src/CsvLINQPadDriver/app.manifest
+++ b/Src/CsvLINQPadDriver/app.manifest
@@ -8,7 +8,7 @@