Skip to content

Commit

Permalink
Refactored namespace and updated class names.
Browse files Browse the repository at this point in the history
Added explanations to README.md.
  • Loading branch information
cidrugHug8 committed Dec 16, 2023
1 parent 4f18eff commit 308d8eb
Show file tree
Hide file tree
Showing 5 changed files with 139 additions and 83 deletions.
37 changes: 1 addition & 36 deletions BleuNet/BleuScore.cs → BleuNet/Metrics.cs
Original file line number Diff line number Diff line change
@@ -1,11 +1,9 @@
using System.Diagnostics;
using System.Diagnostics.CodeAnalysis;
using System.Text;
using System.Text.RegularExpressions;

namespace BleuNet
{
public static class BleuScore
public static class Metrics
{
/// <summary>
/// Fraction class.
Expand Down Expand Up @@ -348,38 +346,6 @@ private static Dictionary<string, int> Ngrams(string[] words, int n)
return ngrams;
}

public static string[] Tokenize(string line, bool lc = true)
{
string norm = line;

if (lc)
{
norm = norm.ToLower();
}

// language-independent part:
norm = norm.Replace("<skipped>", "");
norm = norm.Replace("-\n", "");
norm = norm.Replace("\n", " ");
norm = norm.Replace("&quot;", "\"");
norm = norm.Replace("&amp;", "&");
norm = norm.Replace("&lt;", "<");
norm = norm.Replace("&gt;", ">");

// language-dependent part (assuming Western languages):
norm = " " + norm + " ";
norm = Regex.Replace(norm, "([\\{-\\~\\[-\\` -\\&\\(-\\+\\:-\\@\\/])", " $1 ");
norm = Regex.Replace(norm, "([^0-9])([\\.,])", "$1 $2 ");
norm = Regex.Replace(norm, "([\\.,])([^0-9])", " $1 $2");
norm = Regex.Replace(norm, "([0-9])(-)", "$1 $2 ");
norm = Regex.Replace(norm, "\\s+", " "); // one space only between words
norm = norm.Trim(); // no leading or trailing space

var segmented = norm.Split();

return segmented;
}

public static (double nkt, double precision, double bp) CalculateKendallsTau(string[] reference, string[] hypothesis)
{
static string MapWordsToUnicode(string[] words, Dictionary<string, int> wordDict)
Expand Down Expand Up @@ -500,7 +466,6 @@ static string GetNgram(string text, int start, int length)

var p = n / (double)hypothesis.Length;

// 結果を返す
return (nkt, p, bp);
}

Expand Down
72 changes: 72 additions & 0 deletions BleuNet/Utility.cs
Original file line number Diff line number Diff line change
@@ -0,0 +1,72 @@
using System.Text.RegularExpressions;

namespace BleuNet
{
/// <summary>
/// The Utility class in the BleuNet namespace.
/// This class provides utility methods for tokenizing strings and benchmarking.
/// </summary>
public static class Utility
{
/// <summary>
/// Tokenizes the input string into an array of words.
/// </summary>
/// <param name="line">The input string to tokenize.</param>
/// <param name="lc">A boolean value indicating whether to convert the input string to lower case. Default is true.</param>
/// <returns>An array of words.</returns>
public static string[] Tokenize(string line, bool lc = true)
{
string norm = line;

// Convert the string to lower case if lc is true.
if (lc)
{
norm = norm.ToLower();
}

// language-independent part:
// Replace certain characters and strings with others.
norm = norm.Replace("<skipped>", "");
norm = norm.Replace("-\n", "");
norm = norm.Replace("\n", " ");
norm = norm.Replace("&quot;", "\"");
norm = norm.Replace("&amp;", "&");
norm = norm.Replace("&lt;", "<");
norm = norm.Replace("&gt;", ">");

// language-dependent part (assuming Western languages):
// Add spaces around certain characters and strings.
norm = " " + norm + " ";
norm = Regex.Replace(norm, "([\\{-\\~\\[-\\` -\\&\\(-\\+\\:-\\@\\/])", " $1 ");
norm = Regex.Replace(norm, "([^0-9])([\\.,])", "$1 $2 ");
norm = Regex.Replace(norm, "([\\.,])([^0-9])", " $1 $2");
norm = Regex.Replace(norm, "([0-9])(-)", "$1 $2 ");
norm = Regex.Replace(norm, "\\s+", " "); // one space only between words
norm = norm.Trim(); // no leading or trailing space

// Split the normalized string into words.
var segmented = norm.Split();

return segmented;
}

/// <summary>
/// Runs a benchmark on the CorpusBleu method.
/// </summary>
/// <param name="referenceFilepath">The file path to the reference text file. Default is "./reference.txt".</param>
/// <param name="hypothesisFilepath">The file path to the hypothesis text file. Default is "./hypothesis.txt".</param>
public static void Benchmark(string referenceFilepath="./reference.txt", string hypothesisFilepath= "./hypothesis.txt")
{
for (var i = 0; i < 100; i++)
{
var references = File.ReadAllLines(referenceFilepath)
.Select(x => Utility.Tokenize(x))
.ToArray();
var hypotheses = File.ReadAllLines(hypothesisFilepath)
.Select(x => Utility.Tokenize(x))
.ToArray();
var bleuScore = Metrics.CorpusBleu(references, hypotheses);
}
}
}
}
62 changes: 31 additions & 31 deletions BleuNetTest/BleuTests.cs
Original file line number Diff line number Diff line change
Expand Up @@ -14,12 +14,12 @@ public void TestModifiedPrecision1()

var references = new string[][] { ref1, ref2 };

var hyp1UnigramPrecision = BleuScore.ModifiedPrecision(references, hyp1, 1);
var hyp1UnigramPrecision = Metrics.ModifiedPrecision(references, hyp1, 1);
Assert.Equal(0.2857, Math.Round(hyp1UnigramPrecision, 4));

Assert.Equal(0.28571428, hyp1UnigramPrecision, 0.0001);

Assert.Equal(0.0, BleuScore.ModifiedPrecision(references, hyp1, 2));
Assert.Equal(0.0, Metrics.ModifiedPrecision(references, hyp1, 2));
}

[Fact]
Expand All @@ -33,9 +33,9 @@ public void TestModifiedPrecision2()

var references = new string[][] { ref1, ref2, ref3 };

Assert.Equal(1.0, BleuScore.ModifiedPrecision(references, hyp1, 1));
Assert.Equal(1.0, Metrics.ModifiedPrecision(references, hyp1, 1));

Assert.Equal(1.0, BleuScore.ModifiedPrecision(references, hyp1, 2));
Assert.Equal(1.0, Metrics.ModifiedPrecision(references, hyp1, 2));
}

[Fact]
Expand All @@ -50,17 +50,17 @@ public void TestModifiedPrecision3()

var references = new string[][] { ref1, ref2, ref3 };

var hyp1UnigramPrecision = BleuScore.ModifiedPrecision(references, hyp1, 1);
var hyp2UnigramPrecision = BleuScore.ModifiedPrecision(references, hyp2, 1);
var hyp1UnigramPrecision = Metrics.ModifiedPrecision(references, hyp1, 1);
var hyp2UnigramPrecision = Metrics.ModifiedPrecision(references, hyp2, 1);

Assert.Equal(0.94444444, hyp1UnigramPrecision, 0.0001);
Assert.Equal(0.57142857, hyp2UnigramPrecision, 0.0001);

Assert.Equal(0.9444, Math.Round(hyp1UnigramPrecision, 4));
Assert.Equal(0.5714, Math.Round(hyp2UnigramPrecision, 4));

var hyp1BigramPrecision = BleuScore.ModifiedPrecision(references, hyp1, 2);
var hyp2BigramPrecision = BleuScore.ModifiedPrecision(references, hyp2, 2);
var hyp1BigramPrecision = Metrics.ModifiedPrecision(references, hyp1, 2);
var hyp2BigramPrecision = Metrics.ModifiedPrecision(references, hyp2, 2);

Assert.Equal(0.58823529, hyp1BigramPrecision, 0.0001);
Assert.Equal(0.07692307, hyp2BigramPrecision, 0.0001);
Expand All @@ -75,13 +75,13 @@ public void TestBrevityPenalty()
var references = new string[][] { Enumerable.Repeat("a", 11).ToArray(), Enumerable.Repeat("a", 8).ToArray() };
var hypothesis = Enumerable.Repeat("a", 7).ToArray();
var hypLen = hypothesis.Length;
var closestRefLen = BleuScore.ClosestRefLength(references, hypLen);
Assert.Equal(0.8669, BleuScore.BrevityPenalty(closestRefLen, hypLen), 0.0001);
var closestRefLen = Metrics.ClosestRefLength(references, hypLen);
Assert.Equal(0.8669, Metrics.BrevityPenalty(closestRefLen, hypLen), 0.0001);

references = [Enumerable.Repeat("a", 11).ToArray(), Enumerable.Repeat("a", 8).ToArray(), Enumerable.Repeat("a", 6).ToArray(), Enumerable.Repeat("a", 7).ToArray()];
hypLen = hypothesis.Length;
closestRefLen = BleuScore.ClosestRefLength(references, hypLen);
Assert.Equal(1.0, BleuScore.BrevityPenalty(closestRefLen, hypLen));
closestRefLen = Metrics.ClosestRefLength(references, hypLen);
Assert.Equal(1.0, Metrics.BrevityPenalty(closestRefLen, hypLen));
}

[Fact]
Expand All @@ -95,7 +95,7 @@ public void TestZeroMatches()
for (int n = 1; n < hypothesis.Length; n++)
{
double[] weights = Enumerable.Repeat(1.0 / n, n).ToArray(); // Uniform weights.
Assert.Equal(0.0, BleuScore.SentenceBleu(references, hypothesis, weights));
Assert.Equal(0.0, Metrics.SentenceBleu(references, hypothesis, weights));
}
}

Expand All @@ -110,7 +110,7 @@ public void TestFullMatches()
for (int n = 1; n < hypothesis.Length; n++)
{
double[] weights = Enumerable.Repeat(1.0 / n, n).ToArray(); // Uniform weights.
Assert.Equal(1.0, BleuScore.SentenceBleu(references, hypothesis, weights));
Assert.Equal(1.0, Metrics.SentenceBleu(references, hypothesis, weights));
}
}

Expand All @@ -122,7 +122,7 @@ public void TestPartialMatchesHypothesisLongerThanReference()

// Since no 4-grams matches were found the result should be zero
// exp(w_1 * 1 * w_2 * 1 * w_3 * 1 * w_4 * -inf) = 0
Assert.Equal(0.0, BleuScore.SentenceBleu(references, hypothesis), 0.0001);
Assert.Equal(0.0, Metrics.SentenceBleu(references, hypothesis), 0.0001);

// Checks that the warning has been raised because len(reference) < 4.
// In C#, there's no direct equivalent for Python's warnings, so this part is omitted.
Expand All @@ -144,14 +144,14 @@ public void TestCaseWhereNIsBiggerThanHypothesisLength()
weights[i] = 1.0 / n;
}

double bleuScore = BleuScore.SentenceBleu(references, hypothesis, weights);
double bleuScore = Metrics.SentenceBleu(references, hypothesis, weights);

Assert.Equal(0.0, bleuScore, 4);

references = ["John", "loves", "Mary"];
hypothesis = ["John", "loves", "Mary"];

bleuScore = BleuScore.SentenceBleu(references, hypothesis, weights);
bleuScore = Metrics.SentenceBleu(references, hypothesis, weights);

Assert.Equal(0.0, bleuScore, 4);
}
Expand All @@ -162,7 +162,7 @@ public void TestEmptyHypothesis()
var references = new string[] { "The", "candidate", "has", "no", "alignment", "to", "any", "of", "the", "references" };
string[] hypothesis = [];

double bleuScore = BleuScore.SentenceBleu(references, hypothesis);
double bleuScore = Metrics.SentenceBleu(references, hypothesis);

Assert.Equal(0.0, bleuScore);
}
Expand All @@ -189,7 +189,7 @@ public void TestEmptyReferences()
var references = new string[][] { [] };
var hypothesis = new string[] { "John", "loves", "Mary" };

double bleuScore = BleuScore.SentenceBleu(references, hypothesis);
double bleuScore = Metrics.SentenceBleu(references, hypothesis);

Assert.Equal(0.0, bleuScore);
}
Expand All @@ -200,7 +200,7 @@ public void TestEmptyReferencesAndHypothesis()
string[][] references = [[]];
string[] hypothesis = [];

double bleuScore = BleuScore.SentenceBleu(references, hypothesis);
double bleuScore = Metrics.SentenceBleu(references, hypothesis);

Assert.Equal(0.0, bleuScore);
}
Expand All @@ -211,7 +211,7 @@ public void TestReferenceOrHypothesisShorterThanFourgrams()
var references = new string[] { "let", "it", "go" };
var hypothesis = new string[] { "let", "go", "it" };

double bleuScore = BleuScore.SentenceBleu(references, hypothesis);
double bleuScore = Metrics.SentenceBleu(references, hypothesis);

Assert.Equal(0.0, bleuScore, 4);
}
Expand All @@ -228,7 +228,7 @@ public void TestNumpyWeights()
weights[i] = 0.25;
}

double bleuScore = BleuScore.SentenceBleu(references, hypothesis, weights);
double bleuScore = Metrics.SentenceBleu(references, hypothesis, weights);

Assert.Equal(0.0, bleuScore);
}
Expand All @@ -249,7 +249,7 @@ public void TestCorpusBleuWithBadSentence()

// Check that the warning is raised since no. of 2-grams < 0.
// Verify that the BLEU output is undesired since no. of 2-grams < 0.
Assert.Equal(0.0, BleuScore.CorpusBleu(references, hypotheses, new double[] { 0.25, 0.25, 0.25, 0.25 }), 0.0001);
Assert.Equal(0.0, Metrics.CorpusBleu(references, hypotheses, new double[] { 0.25, 0.25, 0.25, 0.25 }), 0.0001);
}
}

Expand Down Expand Up @@ -364,25 +364,25 @@ public void TestCorpusBleuWithMultipleWeights()
var weight2 = new double[] { 0.25, 0.25, 0.25, 0.25 };
var weight3 = new double[] { 0.0, 0.0, 0.0, 1.0 };

double[] bleuScores = BleuScore.CorpusBleu(
double[] bleuScores = Metrics.CorpusBleu(
[[ref1a, ref1b, ref1c], [ref2a]],
[hyp1, hyp2],
new double[][] { weight1, weight2, weight3 }
);

Assert.Equal(bleuScores[0], BleuScore.CorpusBleu(
Assert.Equal(bleuScores[0], Metrics.CorpusBleu(
[[ref1a, ref1b, ref1c], [ref2a]],
[hyp1, hyp2],
weight1
));

Assert.Equal(bleuScores[1], BleuScore.CorpusBleu(
Assert.Equal(bleuScores[1], Metrics.CorpusBleu(
[[ref1a, ref1b, ref1c], [ref2a]],
[hyp1, hyp2],
weight2
));

Assert.Equal(bleuScores[2], BleuScore.CorpusBleu(
Assert.Equal(bleuScores[2], Metrics.CorpusBleu(
[[ref1a, ref1b, ref1c], [ref2a]],
[hyp1, hyp2],
weight3
Expand All @@ -399,15 +399,15 @@ public void TestCorpusRibes0()
{
string[][][] references = [["The candidate has no alignment to any of the references".Split()]];
string[][] hypothesis = ["John loves Mary".Split()];
Assert.Equal(0.0, BleuScore.CorppusRibes(references, hypothesis));
Assert.Equal(0.0, Metrics.CorppusRibes(references, hypothesis));
}

[Fact]
public void TestCorpusRibes1()
{
string[][][] ref1 = [["He enjoys taking a walk in the park every day .".Split()]];
string[][] hyp1 = ["He likes to walk in the park daily .".Split()];
Assert.Equal(0.883743, BleuScore.CorppusRibes(ref1, hyp1), 0.000001);
Assert.Equal(0.883743, Metrics.CorppusRibes(ref1, hyp1), 0.000001);
}

[Fact]
Expand All @@ -419,7 +419,7 @@ public void TestCorpusRibes2()
string[][] hyp1 = [
"He likes to walk in the park daily, and then enjoys his coffee at a cafe while reading the newspaper, which is his daily routine .".Split()
];
Assert.Equal(0.678417, BleuScore.CorppusRibes(ref1, hyp1), 0.000001);
Assert.Equal(0.678417, Metrics.CorppusRibes(ref1, hyp1), 0.000001);
}

[Fact]
Expand All @@ -435,7 +435,7 @@ public void TestCorpusRibes3()
//Assert.Equal(0.634183, BleuScore.CorppusRibes([[ref2]], hyp1), 0.000001);

string[][][] references = [[ref1, ref2]];
Assert.Equal(0.634183, BleuScore.CorppusRibes(references, hyp1), 0.000001);
Assert.Equal(0.634183, Metrics.CorppusRibes(references, hyp1), 0.000001);
}
}
}
Loading

0 comments on commit 308d8eb

Please sign in to comment.