Skip to content

Commit

Permalink
Add single class generation for the similar CSV files.
Browse files Browse the repository at this point in the history
  • Loading branch information
Ivan Ivon committed Mar 8, 2021
1 parent 6b00dd6 commit a672685
Show file tree
Hide file tree
Showing 11 changed files with 146 additions and 61 deletions.
9 changes: 5 additions & 4 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -7,10 +7,10 @@ CsvLINQPadDriver For LINQPad 6

CsvLINQPadDriver is LINQPad 6 data context dynamic driver for querying CSV files.

You can query data in CSV files with LINQ, just like it would be regular database. No need to write custom data model, mappings etc.

Driver automatically generates new data types for every CSV file with corresponding properties and mappings for all columns.
Based on column and file names, possible relations between CSV tables are detected and generated.
- You can query data in CSV files with LINQ, just like it would be regular database. No need to write custom data model, mappings, etc.
- Driver automatically generates new data types for every CSV file with corresponding properties and mappings for all columns.
- Based on column and file names, possible relations between CSV tables are detected and generated.
- Single class generation allows to join similar files and query over them. Might not work well for files with relations.

Website
--
Expand Down Expand Up @@ -154,6 +154,7 @@ Known Issues
--
- Some strange Unicode characters in column names may cause errors in generated data context source code.
- Writing changed objects back to CSV is not directly supported, there is no `.SubmitChanges()` . But you can use LINQPad's `Util.WriteCsv`.
- Similar files single class generation might not work well for files with relations.

Author
--
Expand Down
52 changes: 37 additions & 15 deletions Src/CsvLINQPadDriver/CodeGen/CsvCSharpCodeGenerator.cs
Original file line number Diff line number Diff line change
@@ -1,5 +1,6 @@
using System;
using System.CodeDom;
using System.Collections.Generic;
using System.IO;
using System.Linq;
using System.Security;
Expand Down Expand Up @@ -29,29 +30,39 @@ private CsvCSharpCodeGenerator(string contextNameSpace, string contextTypeName,
}

// ReSharper disable once RedundantAssignment
public static string GenerateCode(CsvDatabase db, ref string nameSpace, ref string typeName, ICsvDataContextDriverProperties props) =>
public static (string Code, IReadOnlyCollection<IGrouping<string, (string Type, string Code, string CodeName)>> CodeGroups)
GenerateCode(CsvDatabase db, ref string nameSpace, ref string typeName, ICsvDataContextDriverProperties props) =>
new CsvCSharpCodeGenerator(nameSpace, typeName = DefaultContextTypeName, props).GenerateSrcFile(db);

private string GenerateSrcFile(CsvDatabase csvDatabase) =>
$@"using System;
private (string, IReadOnlyCollection<IGrouping<string, (string Type, string Code, string CodeName)>>) GenerateSrcFile(CsvDatabase csvDatabase)
{
var (_, csvTables) = csvDatabase;

var groups = csvTables
.Select(table => GenerateTableRowDataTypeClass(table, _properties.HideRelationsFromDump))
.GroupBy(typeCode => typeCode.Type)
.ToList();

return ($@"
using System;
using System.Linq;
using System.Collections.Generic;
namespace {_contextNameSpace}
{{
/// <summary>CSV Data Context</summary>
public class {_contextTypeName} : {typeof(CsvDataContextBase).GetCodeTypeClassName()}
{{ {string.Join(string.Empty, csvDatabase.Tables.Select(table => $@"
{{ {string.Join(string.Empty, csvTables.Select(table => $@"
/// <summary>File: {SecurityElement.Escape(table.FilePath)}</summary>
public {typeof(CsvTableBase<>).GetCodeTypeClassName(table.GetCodeRowClassName())} {table.CodeName} {{ get; private set; }}")
)}
public {_contextTypeName}()
{{
//Init tables data {string.Join(string.Empty, csvDatabase.Tables.Select(table => $@"
//Init tables data {string.Join(string.Empty, csvTables.Select(table => $@"
this.{table.CodeName} = {typeof(CsvTableFactory).GetCodeTypeClassName()}.CreateTable<{table.GetCodeRowClassName()}>(
{(_properties.IsStringInternEnabled ? "true" : "false")},
{(_properties.IsCacheEnabled ? "true" : "false")},
{GetBoolConst(_properties.IsStringInternEnabled)},
{GetBoolConst(_properties.IsCacheEnabled)},
{table.CsvSeparator.AsValidCSharpCode()},
{table.FilePath.AsValidCSharpCode()},
new {typeof(CsvColumnInfoList<>).GetCodeTypeClassName(table.GetCodeRowClassName())}() {{
Expand All @@ -66,12 +77,16 @@ public class {_contextTypeName} : {typeof(CsvDataContextBase).GetCodeTypeClassNa
}}
}}//context class
//Data types {string.Join(string.Empty, csvDatabase.Tables.Select(table => GenerateTableRowDataTypeClass(table, _properties.HideRelationsFromDump)))}
//Data types {string.Join(string.Empty, groups.Select(grouping => grouping.First().Code))}
}}//namespace
";
", groups);

private static string GenerateTableRowDataTypeClass(CsvTable table, bool hideRelationsFromDump) =>
$@"
static string GetBoolConst(bool val) =>
val ? "true" : "false";
}

private static (string Type, string Code, string CodeName) GenerateTableRowDataTypeClass(CsvTable table, bool hideRelationsFromDump) =>
(table.GetCodeRowClassName(), $@"
public class {table.GetCodeRowClassName()} : {typeof(ICsvRowBase).GetCodeTypeClassName()}
{{{string.Join(string.Empty, table.Columns.Select(c => $@"
public string {c.CodeName} {{ get; set; }} ")
Expand All @@ -80,16 +95,23 @@ public class {table.GetCodeRowClassName()} : {typeof(ICsvRowBase).GetCodeTypeCla
[{typeof(HideFromDumpAttribute).GetCodeTypeClassName()}]" : string.Empty)}
public IEnumerable<{csvRelation.TargetTable.GetCodeRowClassName()}> {csvRelation.CodeName} {{ get; set; }} ")
)}
}} ";
}} ", table.CodeName!);
}

internal static class CsvCSharpCodeGeneratorExtensions
{
public static string GetCodeRowClassName(this CsvTable table) =>
$"T{table.CodeName}";
public static string GetCodeRowClassName(this CsvTable table)
{
return ToClassName(table.ClassName);

static string ToClassName(string? name) =>
string.IsNullOrEmpty(name)
? throw new ArgumentNullException(nameof(name), "Name is null or empty")
: $"T{name}";
}

public static string GetCodeTypeClassName(this Type type, params string[] genericParameters) =>
type!.FullName!.Split('`')[0] + (genericParameters.Any() ? $"<{string.Join(",", genericParameters)}>" : string.Empty);
type.FullName!.Split('`').First() + (genericParameters.Any() ? $"<{string.Join(",", genericParameters)}>" : string.Empty);

public static string AsValidCSharpCode<T>(this T input)
{
Expand Down
19 changes: 10 additions & 9 deletions Src/CsvLINQPadDriver/ConnectionDialog.xaml
Original file line number Diff line number Diff line change
Expand Up @@ -35,7 +35,7 @@
HorizontalScrollBarVisibility="Auto"
VerticalScrollBarVisibility="Auto"
Text="{Binding Files, UpdateSourceTrigger=PropertyChanged}"
ToolTip="CSV files. Drag&amp;drop (use Ctrl to add files) or type one file per line. Supports mask '*.csv' or recursive '**.csv'."
ToolTip="CSV files. Drag&amp;drop (use Ctrl to add files) or type one file per line. Supports mask '*.csv' or recursive '**.csv'"
AllowDrop="True"
PreviewDragEnter="FilesTextBox_DragEnter"
PreviewDragOver="FilesTextBox_DragEnter"
Expand All @@ -48,23 +48,24 @@
<StackPanel Margin="2" Height="Auto">
<DockPanel>
<Label Padding="0,0,0,3" Content="CSV separator (autodetect if empty) "/>
<TextBox MaxLength="6" MaxLines="1" Text="{Binding CsvSeparator}" ToolTip="Character used to separate columns in csv file. If empty, separator is auto-detected for each file."/>
<TextBox MaxLength="6" MaxLines="1" Text="{Binding CsvSeparator}" ToolTip="Character used to separate columns in CSV file. If empty, separator is auto-detected for each file"/>
</DockPanel>
<CheckBox IsChecked="{Binding IgnoreInvalidFiles}" ToolTip="If checked, files with suspicious format will be ignored. (Only one column, variable count of columns in rows...)">Ignore files with invalid format</CheckBox>
<CheckBox IsChecked="{Binding IgnoreInvalidFiles}" ToolTip="Ignore files with invalid format">Ignore files with invalid format</CheckBox>
</StackPanel>
</GroupBox>

<GroupBox Header="Memory">
<StackPanel Margin="2" Height="Auto">
<CheckBox IsChecked="{Binding IsCacheEnabled}" ToolTip="If checked - Parsed rows from file are cached. This cache survives multiple query runs, even when query is changed. Cache is cleared as soon as LINQPad clears Application Domain of query. If unchecked - disable cache. Multiple enumerations of file content results in multiple reads and parsing of file. Can be significantly slower for complex queries. Significantly reduces memory usage. Useful when reading very large files.">Cache CSV data in memory</CheckBox>
<CheckBox IsChecked="{Binding IsStringInternEnabled}" ToolTip="If checked, all string values are interned. Can significantly reduce memory consumption, when values in CSV are repeated many times.">String interning</CheckBox>
<CheckBox IsChecked="{Binding IsCacheEnabled}" ToolTip="Cache parsed rows. This cache survives multiple query runs, even when query is changed">Cache CSV data in memory</CheckBox>
<CheckBox IsChecked="{Binding IsStringInternEnabled}" ToolTip="Intern strings. Significantly reduce memory consumption when CSV contains repeatable values">Intern CSV strings</CheckBox>
</StackPanel>
</GroupBox>
<CheckBox IsChecked="{Binding DetectRelations}" ToolTip="If checked, relations between csv files/tables will be detected and created. (Based on files and column names.)">Detect relations</CheckBox>
<CheckBox IsChecked="{Binding HideRelationsFromDump}" IsEnabled="{Binding DetectRelations}" ToolTip="If checked - LINQPad will not show relations content in .Dump(). This prevents loading too many data." Margin="16,0,0,0">Hide relations from .Dump()</CheckBox>
<CheckBox IsChecked="{Binding DebugInfo}" ToolTip="Show/hide additional driver debug info.">Debug info</CheckBox>
<CheckBox IsChecked="{Binding UseSingleClassForSameFiles}" ToolTip="Single class generation allows to join similar files and query over them. Might not work well for files with relations">Generate single class for similar files</CheckBox>
<CheckBox IsChecked="{Binding DetectRelations}" ToolTip="Detect relations between CSV files/tables (based on files and column names)">Detect relations</CheckBox>
<CheckBox IsChecked="{Binding HideRelationsFromDump}" IsEnabled="{Binding DetectRelations}" ToolTip="LINQPad will not show relations content in .Dump(). This prevents from loading too many data" Margin="16,0,0,0">Hide relations from .Dump()</CheckBox>
<CheckBox IsChecked="{Binding DebugInfo}" ToolTip="Show additional driver debug info">Debug info</CheckBox>
<Separator/>
<CheckBox IsChecked="{Binding Persist}">Remember this connection</CheckBox>
<CheckBox IsChecked="{Binding Persist}" ToolTip="Persist connection">Remember this connection</CheckBox>
<StackPanel Margin="0,8,0,0" Orientation="Horizontal" HorizontalAlignment="Right">
<Button Name="OkButton" Content="OK" Margin="5,0,0,0" Padding="8,3" Width="85" IsDefault="True" Click="OkButton_Click" />
<Button Content="Cancel" Margin="5,0,0,0" Padding="8,3" Width="85" IsCancel="True" />
Expand Down
6 changes: 6 additions & 0 deletions Src/CsvLINQPadDriver/CsvDataContextDriverProperties.cs
Original file line number Diff line number Diff line change
Expand Up @@ -72,6 +72,12 @@ public char? CsvSeparatorChar
}
}

public bool UseSingleClassForSameFiles
{
get => GetValue(true);
set => SetValue(value);
}

public bool DetectRelations
{
get => GetValue(true);
Expand Down
2 changes: 1 addition & 1 deletion Src/CsvLINQPadDriver/CsvLINQPadDriver.csproj
Original file line number Diff line number Diff line change
Expand Up @@ -24,7 +24,7 @@
<PackageLicenseExpression>MIT</PackageLicenseExpression>
<RepositoryUrl>https://github.com/i2van/CsvLINQPadDriver</RepositoryUrl>
<PackageIcon>NuGetIcon.png</PackageIcon>
<PackageReleaseNotes>Updated dependencies.</PackageReleaseNotes>
<PackageReleaseNotes>Added single class generation for the similar CSV files.</PackageReleaseNotes>
<Copyright>Copyright © Martin Dobroucký 2013-2014, Ivan Ivon 2021</Copyright>
<AssemblyVersion>6.5.0.0</AssemblyVersion>
<FileVersion>6.5.0.0</FileVersion>
Expand Down
50 changes: 34 additions & 16 deletions Src/CsvLINQPadDriver/DataModel/CsvDataModelGenerator.cs
Original file line number Diff line number Diff line change
Expand Up @@ -59,6 +59,8 @@ private CsvDatabase CreateModel()

IEnumerable<CsvTable> CreateTables()
{
var tableCodeNames = new Dictionary<string, string>();

foreach (var file in files.Where(File.Exists))
{
var csvSeparator = _csvDataContextDriverProperties.CsvSeparatorChar ?? FileUtils.CsvDetectSeparator(file);
Expand All @@ -70,25 +72,41 @@ IEnumerable<CsvTable> CreateTables()

var fileName = Path.GetFileName(file);
var fileDir = (Path.GetDirectoryName($"{file.Remove(0, baseDir.Length)}x") ?? string.Empty).TrimStart(Path.DirectorySeparatorChar);

yield return new CsvTable
(
file,
csvSeparator,
FileUtils.CsvReadHeader(file, csvSeparator)
.Select((value, index) => (value, index))
.Select(col => new CsvColumn(col.value ?? string.Empty, col.index)
{
CodeName = CodeGenHelper.GetSafeCodeName(col.value),
DisplayName = string.Empty
}
).ToList(),
new List<CsvRelation>()
)
var codeName = CodeGenHelper.GetSafeCodeName(Path.GetFileNameWithoutExtension(fileName) + (string.IsNullOrWhiteSpace(fileDir) ? string.Empty : $"_{fileDir}"));

var columns = FileUtils.CsvReadHeader(file, csvSeparator)
.Select((value, index) => (value, index))
.Select(col => new CsvColumn(col.value ?? string.Empty, col.index)
{
CodeName = CodeGenHelper.GetSafeCodeName(col.value),
DisplayName = string.Empty
})
.ToList();

yield return new CsvTable(file, csvSeparator, columns, new List<CsvRelation>())
{
CodeName = CodeGenHelper.GetSafeCodeName(Path.GetFileNameWithoutExtension(fileName) + (string.IsNullOrWhiteSpace(fileDir) ? string.Empty : $"_{fileDir}")),
CodeName = codeName,
ClassName = GetClassName(),
DisplayName = $"{fileName}{(string.IsNullOrWhiteSpace(fileDir) ? string.Empty : $" in {fileDir}")} {FileUtils.GetHumanizedFileSize(file)}"
};

string? GetClassName()
{
if (!_csvDataContextDriverProperties.UseSingleClassForSameFiles)
{
return null;
}

var key = string.Join(string.Empty, columns.Select(c => $"{c.CsvColumnName}\t{c.CsvColumnIndex}\n"));

if (!tableCodeNames.TryGetValue(key, out var className))
{
className = codeName;
tableCodeNames.Add(key, className);
}

return className;
}
}
}

Expand Down
8 changes: 8 additions & 0 deletions Src/CsvLINQPadDriver/DataModel/CsvTable.cs
Original file line number Diff line number Diff line change
Expand Up @@ -13,5 +13,13 @@ IList<CsvRelation> Relations
{
public string? CodeName { get; set; }
public string? DisplayName { get; set; }

public string? ClassName
{
get => _className ?? CodeName;
init => _className = value;
}

private readonly string? _className;
}
}
4 changes: 2 additions & 2 deletions Src/CsvLINQPadDriver/Helpers/FileUtils.cs
Original file line number Diff line number Diff line change
Expand Up @@ -89,8 +89,8 @@ public static bool IsCsvFormatValid(string fileName, char csvSeparator)

var headerRow = csvParser.Record;

// 0 or 1 column.
if (headerRow.Length <= 1)
// No columns.
if (!headerRow.Any())
{
CsvDataContextDriver.WriteToLog($"{header} CSV header had no columns");

Expand Down
7 changes: 6 additions & 1 deletion Src/CsvLINQPadDriver/ICsvDataContextDriverProperties.cs
Original file line number Diff line number Diff line change
Expand Up @@ -24,7 +24,12 @@ public interface ICsvDataContextDriverProperties
char? CsvSeparatorChar { get; }

/// <summary>
/// If <c>true</c> - relations between CSV files/tables will be detected and created. (based on files and column names)
/// If <c>true</c> - generates single class for similar CSV files.
/// </summary>
bool UseSingleClassForSameFiles { get; set; }

/// <summary>
/// If <c>true</c> - relations between CSV files/tables will be detected and created. (based on files and column names).
/// </summary>
bool DetectRelations { get; set; }

Expand Down
Loading

0 comments on commit a672685

Please sign in to comment.