Skip to content

Commit

Permalink
+ Levenshtein distance
Browse files Browse the repository at this point in the history
+Tanimoto coefficient
  • Loading branch information
NickRimmer committed Oct 14, 2015
1 parent f90ed44 commit 96c96aa
Show file tree
Hide file tree
Showing 16 changed files with 577 additions and 0 deletions.
28 changes: 28 additions & 0 deletions StringCompare.sln
Original file line number Diff line number Diff line change
@@ -0,0 +1,28 @@

Microsoft Visual Studio Solution File, Format Version 12.00
# Visual Studio 14
VisualStudioVersion = 14.0.23107.0
MinimumVisualStudioVersion = 10.0.40219.1
Project("{FAE04EC0-301F-11D3-BF4B-00C04F79EFBC}") = "StringCompare", "StringCompare\StringCompare.csproj", "{0DBA6B3E-A274-4EF6-8818-6F298240FEF5}"
EndProject
Project("{FAE04EC0-301F-11D3-BF4B-00C04F79EFBC}") = "StringCompareTests", "StringCompareTests\StringCompareTests.csproj", "{1DE80C20-3175-4FC8-AEBF-FB148968CE58}"
EndProject
Global
GlobalSection(SolutionConfigurationPlatforms) = preSolution
Debug|Any CPU = Debug|Any CPU
Release|Any CPU = Release|Any CPU
EndGlobalSection
GlobalSection(ProjectConfigurationPlatforms) = postSolution
{0DBA6B3E-A274-4EF6-8818-6F298240FEF5}.Debug|Any CPU.ActiveCfg = Debug|Any CPU
{0DBA6B3E-A274-4EF6-8818-6F298240FEF5}.Debug|Any CPU.Build.0 = Debug|Any CPU
{0DBA6B3E-A274-4EF6-8818-6F298240FEF5}.Release|Any CPU.ActiveCfg = Release|Any CPU
{0DBA6B3E-A274-4EF6-8818-6F298240FEF5}.Release|Any CPU.Build.0 = Release|Any CPU
{1DE80C20-3175-4FC8-AEBF-FB148968CE58}.Debug|Any CPU.ActiveCfg = Debug|Any CPU
{1DE80C20-3175-4FC8-AEBF-FB148968CE58}.Debug|Any CPU.Build.0 = Debug|Any CPU
{1DE80C20-3175-4FC8-AEBF-FB148968CE58}.Release|Any CPU.ActiveCfg = Release|Any CPU
{1DE80C20-3175-4FC8-AEBF-FB148968CE58}.Release|Any CPU.Build.0 = Release|Any CPU
EndGlobalSection
GlobalSection(SolutionProperties) = preSolution
HideSolutionNode = FALSE
EndGlobalSection
EndGlobal
7 changes: 7 additions & 0 deletions StringCompare.sln.DotSettings
Original file line number Diff line number Diff line change
@@ -0,0 +1,7 @@
<wpf:ResourceDictionary xml:space="preserve" xmlns:x="http://schemas.microsoft.com/winfx/2006/xaml" xmlns:s="clr-namespace:System;assembly=mscorlib" xmlns:ss="urn:shemas-jetbrains-com:settings-storage-xaml" xmlns:wpf="http://schemas.microsoft.com/winfx/2006/xaml/presentation">
<s:String x:Key="/Default/CodeStyle/CodeCleanup/Profiles/=add_0020headers/@EntryIndexedValue">&lt;?xml version="1.0" encoding="utf-16"?&gt;&lt;Profile name="add headers"&gt;&lt;CSUpdateFileHeader&gt;True&lt;/CSUpdateFileHeader&gt;&lt;/Profile&gt;</s:String>
<s:String x:Key="/Default/CodeStyle/FileHeader/FileHeaderText/@EntryValue">Library for compare strings&#xD;
Copyright (C) $CURRENT_YEAR$ Nick Rimmer. Contacts: &lt;xan@dipteam.com&gt;&#xD;
&#xD;
This file is part of StringCompare library.&#xD;
Licensed under the MIT License (MIT)</s:String></wpf:ResourceDictionary>
50 changes: 50 additions & 0 deletions StringCompare/Algorithms/Levenshtein/LevenshteinAlgorithm.cs
Original file line number Diff line number Diff line change
@@ -0,0 +1,50 @@
// Library for compare strings
// Copyright (C) 2015 Nick Rimmer. Contacts: <xan@dipteam.com>
//
// This file is part of StringCompare library.
// Licensed under the MIT License (MIT)

using System;
using StringCompare.Structures.Interfaces;

namespace StringCompare.Algorithms.Levenshtein
{
public class LevenshteinAlgorithm:ICompareAlgorithm
{
public double GetCompareResult(string source, string target)
{

if (string.IsNullOrEmpty(source))
return string.IsNullOrEmpty(target) ? 0 : target.Length;

if (string.IsNullOrEmpty(target))
return string.IsNullOrEmpty(source) ? 0 : source.Length;

var sourceLength = source.Length;
var targetLength = target.Length;

var distance = new int[sourceLength + 1, targetLength + 1];

source = source.ToLowerInvariant().Trim();
target = target.ToLowerInvariant().Trim();

for (var i = 0; i <= sourceLength; distance[i, 0] = i++) ;
for (var j = 0; j <= targetLength; distance[0, j] = j++) ;

for (var i = 1; i <= sourceLength; i++)
{
for (var j = 1; j <= targetLength; j++)
{
var cost = (target[j - 1] == source[i - 1]) ? 0 : 1;
distance[i, j] = Math.Min(Math.Min(distance[i - 1, j] + 1, distance[i, j - 1] + 1), distance[i - 1, j - 1] + cost);
}
}

//return distance[sourceLength, targetLength];

double stepsToSame = distance[sourceLength, targetLength];
return (1.0 - (stepsToSame / (double)Math.Max(source.Length, target.Length)));

}
}
}
Original file line number Diff line number Diff line change
@@ -0,0 +1,16 @@
// Library for compare strings
// Copyright (C) 2015 Nick Rimmer. Contacts: <xan@dipteam.com>
//
// This file is part of StringCompare library.
// Licensed under the MIT License (MIT)

namespace StringCompare.Algorithms.Levenshtein
{
public static class LevenshteinAlgorithmExtension
{
public static double CompareLevenshtein(this string source, string target)
{
return new LevenshteinAlgorithm().GetCompareResult(source, target);
}
}
}
27 changes: 27 additions & 0 deletions StringCompare/Algorithms/Tanimoto/TanimotoAlgorithm.cs
Original file line number Diff line number Diff line change
@@ -0,0 +1,27 @@
// Library for compare strings
// Copyright (C) 2015 Nick Rimmer. Contacts: <xan@dipteam.com>
//
// This file is part of StringCompare library.
// Licensed under the MIT License (MIT)

using StringCompare.Structures.Interfaces;

namespace StringCompare.Algorithms.Tanimoto
{
public class TanimotoAlgorithm: ICompareAlgorithm
{
public double GetCompareResult(string source, string target)
{
double sourceLength = source.Length;
double targetLength = target.Length;

double commonsCount = 0;
foreach (var sourceSymbol in source.ToLowerInvariant().Trim())
{
if (target.ToLowerInvariant().Trim().IndexOf(sourceSymbol) != -1) commonsCount += 1;
}

return commonsCount / (sourceLength + targetLength - commonsCount);
}
}
}
16 changes: 16 additions & 0 deletions StringCompare/Algorithms/Tanimoto/TanimotoAlgorithmExtension.cs
Original file line number Diff line number Diff line change
@@ -0,0 +1,16 @@
// Library for compare strings
// Copyright (C) 2015 Nick Rimmer. Contacts: <xan@dipteam.com>
//
// This file is part of StringCompare library.
// Licensed under the MIT License (MIT)

namespace StringCompare.Algorithms.Tanimoto
{
public static class TanimotoAlgorithmExtension
{
public static double CompareTanimoto(this string source, string target)
{
return new TanimotoAlgorithm().GetCompareResult(source, target);
}
}
}
21 changes: 21 additions & 0 deletions StringCompare/LICENSE.txt
Original file line number Diff line number Diff line change
@@ -0,0 +1,21 @@
The MIT License (MIT)

Copyright (c) 2015 Nick Rimmer

Permission is hereby granted, free of charge, to any person obtaining a copy
of this software and associated documentation files (the "Software"), to deal
in the Software without restriction, including without limitation the rights
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
copies of the Software, and to permit persons to whom the Software is
furnished to do so, subject to the following conditions:

The above copyright notice and this permission notice shall be included in all
copies or substantial portions of the Software.

THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
SOFTWARE.
36 changes: 36 additions & 0 deletions StringCompare/Properties/AssemblyInfo.cs
Original file line number Diff line number Diff line change
@@ -0,0 +1,36 @@
using System.Reflection;
using System.Runtime.CompilerServices;
using System.Runtime.InteropServices;

// General Information about an assembly is controlled through the following
// set of attributes. Change these attribute values to modify the information
// associated with an assembly.
[assembly: AssemblyTitle("StringCompare")]
[assembly: AssemblyDescription("")]
[assembly: AssemblyConfiguration("")]
[assembly: AssemblyCompany("Nick Rimmer")]
[assembly: AssemblyProduct("StringCompare")]
[assembly: AssemblyCopyright("Copyright (C) 2015 Nick Rimmer. Contacts: <xan@dipteam.com>")]
[assembly: AssemblyTrademark("")]
[assembly: AssemblyCulture("")]

// Setting ComVisible to false makes the types in this assembly not visible
// to COM components. If you need to access a type in this assembly from
// COM, set the ComVisible attribute to true on that type.
[assembly: ComVisible(false)]

// The following GUID is for the ID of the typelib if this project is exposed to COM
[assembly: Guid("0dba6b3e-a274-4ef6-8818-6f298240fef5")]

// Version information for an assembly consists of the following four values:
//
// Major Version
// Minor Version
// Build Number
// Revision
//
// You can specify all the values or you can default the Build and Revision Numbers
// by using the '*' as shown below:
// [assembly: AssemblyVersion("1.0.*")]
[assembly: AssemblyVersion("1.0.0.0")]
[assembly: AssemblyFileVersion("1.0.0.0")]
61 changes: 61 additions & 0 deletions StringCompare/StringCompare.csproj
Original file line number Diff line number Diff line change
@@ -0,0 +1,61 @@
<?xml version="1.0" encoding="utf-8"?>
<Project ToolsVersion="14.0" DefaultTargets="Build" xmlns="http://schemas.microsoft.com/developer/msbuild/2003">
<Import Project="$(MSBuildExtensionsPath)\$(MSBuildToolsVersion)\Microsoft.Common.props" Condition="Exists('$(MSBuildExtensionsPath)\$(MSBuildToolsVersion)\Microsoft.Common.props')" />
<PropertyGroup>
<Configuration Condition=" '$(Configuration)' == '' ">Debug</Configuration>
<Platform Condition=" '$(Platform)' == '' ">AnyCPU</Platform>
<ProjectGuid>{0DBA6B3E-A274-4EF6-8818-6F298240FEF5}</ProjectGuid>
<OutputType>Library</OutputType>
<AppDesignerFolder>Properties</AppDesignerFolder>
<RootNamespace>StringCompare</RootNamespace>
<AssemblyName>StringCompare</AssemblyName>
<TargetFrameworkVersion>v4.0</TargetFrameworkVersion>
<FileAlignment>512</FileAlignment>
</PropertyGroup>
<PropertyGroup Condition=" '$(Configuration)|$(Platform)' == 'Debug|AnyCPU' ">
<DebugSymbols>true</DebugSymbols>
<DebugType>full</DebugType>
<Optimize>false</Optimize>
<OutputPath>bin\Debug\</OutputPath>
<DefineConstants>DEBUG;TRACE</DefineConstants>
<ErrorReport>prompt</ErrorReport>
<WarningLevel>4</WarningLevel>
</PropertyGroup>
<PropertyGroup Condition=" '$(Configuration)|$(Platform)' == 'Release|AnyCPU' ">
<DebugType>none</DebugType>
<Optimize>true</Optimize>
<OutputPath>bin\Release\</OutputPath>
<DefineConstants>TRACE</DefineConstants>
<ErrorReport>prompt</ErrorReport>
<WarningLevel>4</WarningLevel>
<DocumentationFile>bin\Release\StringCompare.XML</DocumentationFile>
</PropertyGroup>
<ItemGroup>
<Reference Include="System" />
<Reference Include="System.Core" />
<Reference Include="System.Xml.Linq" />
<Reference Include="System.Data.DataSetExtensions" />
<Reference Include="Microsoft.CSharp" />
<Reference Include="System.Data" />
<Reference Include="System.Xml" />
</ItemGroup>
<ItemGroup>
<Compile Include="Algorithms\Levenshtein\LevenshteinAlgorithm.cs" />
<Compile Include="Algorithms\Levenshtein\LevenshteinAlgorithmExtension.cs" />
<Compile Include="Properties\AssemblyInfo.cs" />
<Compile Include="Structures\Interfaces\ICompareAlgorithm.cs" />
<Compile Include="Algorithms\Tanimoto\TanimotoAlgorithm.cs" />
<Compile Include="Algorithms\Tanimoto\TanimotoAlgorithmExtension.cs" />
</ItemGroup>
<ItemGroup>
<Content Include="LICENSE.txt" />
</ItemGroup>
<Import Project="$(MSBuildToolsPath)\Microsoft.CSharp.targets" />
<!-- To modify your build process, add your task inside one of the targets below and uncomment it.
Other similar extension points exist, see Microsoft.Common.targets.
<Target Name="BeforeBuild">
</Target>
<Target Name="AfterBuild">
</Target>
-->
</Project>
12 changes: 12 additions & 0 deletions StringCompare/Structures/Interfaces/ICompareAlgorithm.cs
Original file line number Diff line number Diff line change
@@ -0,0 +1,12 @@
// Library for compare strings
// Copyright (C) 2015 Nick Rimmer. Contacts: <xan@dipteam.com>
//
// This file is part of StringCompare library.
// Licensed under the MIT License (MIT)
namespace StringCompare.Structures.Interfaces
{
public interface ICompareAlgorithm
{
double GetCompareResult(string source, string target);
}
}
21 changes: 21 additions & 0 deletions StringCompareTests/LICENSE.txt
Original file line number Diff line number Diff line change
@@ -0,0 +1,21 @@
The MIT License (MIT)

Copyright (c) 2015 Nick Rimmer

Permission is hereby granted, free of charge, to any person obtaining a copy
of this software and associated documentation files (the "Software"), to deal
in the Software without restriction, including without limitation the rights
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
copies of the Software, and to permit persons to whom the Software is
furnished to do so, subject to the following conditions:

The above copyright notice and this permission notice shall be included in all
copies or substantial portions of the Software.

THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
SOFTWARE.
Loading

0 comments on commit 96c96aa

Please sign in to comment.