Skip to content

Commit

Permalink
add files-diff benchmark
Browse files Browse the repository at this point in the history
  • Loading branch information
msakamoto-sf committed Jan 14, 2018
1 parent 7d61492 commit 4c9e3a1
Show file tree
Hide file tree
Showing 21 changed files with 225 additions and 16 deletions.
5 changes: 5 additions & 0 deletions files_diff_base_dir/a.txt
Original file line number Diff line number Diff line change
@@ -0,0 +1,5 @@
aaaaaa

aaaaaa

aaaaaa
3 changes: 3 additions & 0 deletions files_diff_base_dir/b.txt
Original file line number Diff line number Diff line change
@@ -0,0 +1,3 @@
bbbbbb
bbbbbb
bbbbbb
5 changes: 5 additions & 0 deletions files_diff_base_dir/c.txt
Original file line number Diff line number Diff line change
@@ -0,0 +1,5 @@
ccccccccccc

ccccccccccc

ccccccccccc
5 changes: 5 additions & 0 deletions files_diff_base_dir/dir1/dir1_1/dir1_1_1/1_1_1a.txt
Original file line number Diff line number Diff line change
@@ -0,0 +1,5 @@
1_1_1a1_1_1a1_1_1a
1_1_1a1_1_1a1_1_1a
1_1_1a1_1_1a1_1_1a
1_1_1a1_1_1a1_1_1a
1_1_1a1_1_1a1_1_1a
5 changes: 5 additions & 0 deletions files_diff_base_dir/dir1/dir1_1/dir1_1_1/1_1_1b.txt
Original file line number Diff line number Diff line change
@@ -0,0 +1,5 @@
1_1_b1_1_b1_1_b1_1_b
1_1_b1_1_b1_1_b1_1_b
1_1_b1_1_b1_1_b1_1_b
1_1_b1_1_b1_1_b1_1_b
1_1_b1_1_b1_1_b1_1_b
6 changes: 6 additions & 0 deletions files_diff_base_dir/dir1/dir1_2/1_2a.txt
Original file line number Diff line number Diff line change
@@ -0,0 +1,6 @@
1_2a1_2a1_2a1_2a1_2a
1_2a1_2a1_2a1_2a1_2a
1_2a1_2a1_2a1_2a1_2a
1_2a1_2a1_2a1_2a1_2a
1_2a1_2a1_2a1_2a1_2a
1_2a1_2a1_2a1_2a1_2a
5 changes: 5 additions & 0 deletions files_diff_base_dir/dir2/2a.txt
Original file line number Diff line number Diff line change
@@ -0,0 +1,5 @@
2a2a2a2a2a2a
2a2a2a2a2a2a
2a2a2a2a2a2a
2a2a2a2a2a2a
2a2a2a2a2a2a
9 changes: 9 additions & 0 deletions files_diff_base_dir/dir2/2b.txt
Original file line number Diff line number Diff line change
@@ -0,0 +1,9 @@
2b2b2b2b2b2b
2b2b2b2b2b2b
2b2b2b2b2b2b
2b2b2b2b2b2b
2b2b2b2b2b2b
2b2b2b2b2b2b
2b2b2b2b2b2b
2b2b2b2b2b2b
2b2b2b2b2b2b
4 changes: 4 additions & 0 deletions files_diff_base_dir/dir3/3a.txt
Original file line number Diff line number Diff line change
@@ -0,0 +1,4 @@
3a3a3a3a3a3a
3a3a3a3a3a3a
3a3a3a3a3a3a
3a3a3a3a3a3a
3 changes: 3 additions & 0 deletions files_diff_cmp_dir/b.txt
Original file line number Diff line number Diff line change
@@ -0,0 +1,3 @@
bbbbbbx
bbbbbbx
bbbbbbx
5 changes: 5 additions & 0 deletions files_diff_cmp_dir/c.txt
Original file line number Diff line number Diff line change
@@ -0,0 +1,5 @@
cccccccccccx

cccccccccccx

cccccccccccx
4 changes: 4 additions & 0 deletions files_diff_cmp_dir/d.txt
Original file line number Diff line number Diff line change
@@ -0,0 +1,4 @@
dddddddddd
dddddddddd
dddddddddd
dddddddddd
5 changes: 5 additions & 0 deletions files_diff_cmp_dir/dir1/dir1_1/dir1_1_1/1_1_1a.txt
Original file line number Diff line number Diff line change
@@ -0,0 +1,5 @@
1_1_1a1_1_1a1_1_1a
1_1_1a1_1_1a1_1_1ax
1_1_1a1_1_1a1_1_1a
1_1_1a1_1_1a1_1_1ax
1_1_1a1_1_1a1_1_1a
5 changes: 5 additions & 0 deletions files_diff_cmp_dir/dir1/dir1_1/dir1_1_1/1_1_1b.txt
Original file line number Diff line number Diff line change
@@ -0,0 +1,5 @@
1_1_b1_1_b1_1_b1_1_b
1_1_b1_1_b1_1_b1_1_bx
1_1_b1_1_b1_1_b1_1_b
1_1_b1_1_b1_1_b1_1_bx
1_1_b1_1_b1_1_b1_1_b
5 changes: 5 additions & 0 deletions files_diff_cmp_dir/dir1/dir1_1/dir1_1_1/1_1_1c.txt
Original file line number Diff line number Diff line change
@@ -0,0 +1,5 @@
1_1_1c1_1_1c1_1_1c1_1_1c
1_1_1c1_1_1c1_1_1c1_1_1c
1_1_1c1_1_1c1_1_1c1_1_1c
1_1_1c1_1_1c1_1_1c1_1_1c
1_1_1c1_1_1c1_1_1c1_1_1c
4 changes: 4 additions & 0 deletions files_diff_cmp_dir/dir1/dir1_3/1_3a.txt
Original file line number Diff line number Diff line change
@@ -0,0 +1,4 @@
1-3a1-3a1-3a1-3a
1-3a1-3a1-3a1-3a
1-3a1-3a1-3a1-3a

9 changes: 9 additions & 0 deletions files_diff_cmp_dir/dir2/2b.txt
Original file line number Diff line number Diff line change
@@ -0,0 +1,9 @@
2b2b2b2b2b2b
2b2b2b2b2b2bx
2b2b2b2b2b2b
2b2b2b2b2b2bx
2b2b2b2b2b2b
2b2b2b2b2b2bx
2b2b2b2b2b2b
2b2b2b2b2b2bx
2b2b2b2b2b2b
4 changes: 4 additions & 0 deletions files_diff_cmp_dir/dir2/2c.txt
Original file line number Diff line number Diff line change
@@ -0,0 +1,4 @@
2c2c2c2c
2c2c2c2c
2c2c2c2c
2c2c2c2c
4 changes: 4 additions & 0 deletions files_diff_cmp_dir/dir4/4a.txt
Original file line number Diff line number Diff line change
@@ -0,0 +1,4 @@
4a4a4a4a4a4a
4a4a4a4a4a4a
4a4a4a4a4a4a
4a4a4a4a4a4a
Original file line number Diff line number Diff line change
@@ -1,9 +1,16 @@
package net.glamenvseptzen.javadifflibsbenchmark;

import java.io.File;
import java.io.IOException;
import java.nio.charset.Charset;
import java.nio.charset.StandardCharsets;
import java.nio.file.Files;
import java.util.ArrayList;
import java.util.Arrays;
import java.util.LinkedHashMap;
import java.util.List;
import java.util.Map;
import java.util.Objects;

import com.cloudbees.diff.Diff;
import com.sksamuel.diffpatch.DiffMatchPatch;
Expand All @@ -12,6 +19,26 @@

public class BenchamrkDiffFamily {
private final Map<String, Map<String, BenchmarkResult>> brdata = new LinkedHashMap<>();
private final File filesDiffBaseDir;
private final File filesDiffCmpDir;
private final boolean enableFilesDiff;
private final String filesDiffBaseDirAbs;
private final String filesDiffCmpDirAbs;

public BenchamrkDiffFamily(final File filesDiffBaseDir, final File filesDiffCmpDir) {
this.filesDiffBaseDir = filesDiffBaseDir;
this.filesDiffCmpDir = filesDiffCmpDir;
if (Objects.nonNull(this.filesDiffBaseDir) && Objects.nonNull(this.filesDiffCmpDir)
&& this.filesDiffBaseDir.isDirectory() && this.filesDiffCmpDir.isDirectory()) {
enableFilesDiff = true;
filesDiffBaseDirAbs = this.filesDiffBaseDir.getAbsolutePath();
filesDiffCmpDirAbs = this.filesDiffCmpDir.getAbsolutePath();
} else {
enableFilesDiff = false;
filesDiffBaseDirAbs = null;
filesDiffCmpDirAbs = null;
}
}

private BenchmarkResult getBenchmarkResult(final String patternName, final String algName) {
Map<String, BenchmarkResult> algToBr = brdata.getOrDefault(patternName, new LinkedHashMap<>());
Expand Down Expand Up @@ -52,6 +79,70 @@ public void benchmark(final String patternName, final RandomStringPair p) {
tick1.tack();
}

class FilesDiffPair {
final File base;
final File cmp;

FilesDiffPair(final File base, final File cmp) {
this.base = base;
this.cmp = cmp;
}
}

private List<FilesDiffPair> filesDiffPairs = new ArrayList<>();

public void traverse(File dir) {
for (File f : dir.listFiles()) {
if (f.isDirectory()) {
traverse(f);
continue;
}
final String absBasePath = f.getAbsolutePath();
final String absCmpPath = absBasePath.replace(filesDiffBaseDirAbs, filesDiffCmpDirAbs);
final File cmpFile = new File(absCmpPath);
if (!cmpFile.exists()) {
continue;
}
filesDiffPairs.add(new FilesDiffPair(f, cmpFile));
}
}

public void benchmarkFiles() throws IOException {
if (!enableFilesDiff) {
return;
}
traverse(this.filesDiffBaseDir);
for (FilesDiffPair pair : filesDiffPairs) {
final Charset cs = StandardCharsets.ISO_8859_1;
final String baseStr = new String(Files.readAllBytes(pair.base.toPath()), cs);
final String cmpStr = new String(Files.readAllBytes(pair.cmp.toPath()), cs);
List<String> baseByLine = Files.readAllLines(pair.base.toPath(), cs);
List<String> cmpByLine = Files.readAllLines(pair.cmp.toPath(), cs);
final String patternName = "files-diff";

IBenchmark bm = new BenchmarkJavaDiffUtilsByLine();
bm.diffByLine(getBenchmarkResult(patternName, "java-diff-utils:myers:by-line"), baseByLine, cmpByLine);

bm = new BenchmarkJGitMyers();
bm.diffByLine(getBenchmarkResult(patternName, "jgit:myers:by-line"), baseByLine, cmpByLine);

bm = new BenchmarkJGitHistogram();
bm.diffByLine(getBenchmarkResult(patternName, "jgit:histogram:by-line"), baseByLine, cmpByLine);

BenchmarkResult br = getBenchmarkResult(patternName, "diff4j:HuntDiff");
Tick tick0 = br.tick();
Diff.diff(baseByLine, cmpByLine, false);
tick0.tack();

br = getBenchmarkResult(patternName, "google-diff-match-patch");
Tick tick1 = br.tick();
DiffMatchPatch dfp = new DiffMatchPatch();
dfp.Diff_Timeout = 0.0f; // set no-timeout
dfp.diff_main(baseStr, cmpStr);
tick1.tack();
}
}

public void printResults() {
for (Map<String, BenchmarkResult> patternV : brdata.values()) {
for (BenchmarkResult br : patternV.values()) {
Expand Down
55 changes: 39 additions & 16 deletions src/main/java/net/glamenvseptzen/javadifflibsbenchmark/Main.java
Original file line number Diff line number Diff line change
@@ -1,5 +1,7 @@
package net.glamenvseptzen.javadifflibsbenchmark;

import java.io.File;
import java.io.IOException;
import java.security.SecureRandom;
import java.util.Random;

Expand All @@ -12,70 +14,91 @@ public class Main {
}
}

static final int LOOP_PER_PATTERNS = 5;

public static void doBenchmark() {
public static void doBenchmark(final int patternLoopCount, final File filesDiffBaseDir,
final File filesDiffCmpDir) {
SecureRandom sr = new SecureRandom();
Random rand = new Random(sr.nextLong());
BenchamrkDiffFamily bdf = new BenchamrkDiffFamily();
BenchamrkDiffFamily bdf = new BenchamrkDiffFamily(filesDiffBaseDir, filesDiffCmpDir);

System.out.println("p1");
for (int i = 0; i < LOOP_PER_PATTERNS; i++) {
for (int i = 0; i < patternLoopCount; i++) {
RandomStringPair p = RandomStringPair.nextEmptyRandomPair(rand);
bdf.benchmark("base:empty <> cmp:random", p);
System.out.print(".");
}
System.out.println("");

System.out.println("p2");
for (int i = 0; i < LOOP_PER_PATTERNS; i++) {
for (int i = 0; i < patternLoopCount; i++) {
RandomStringPair p = RandomStringPair.nextRandomEmptyPair(rand);
bdf.benchmark("base:random <> cmp:empty", p);
System.out.print(".");
}
System.out.println("");

System.out.println("p3");
for (int i = 0; i < LOOP_PER_PATTERNS; i++) {
for (int i = 0; i < patternLoopCount; i++) {
RandomStringPair p = RandomStringPair.nextRandomPair(rand);
bdf.benchmark("base:random <> cmp:random", p);
System.out.print(".");
}
System.out.println("");

System.out.println("p4");
for (int i = 0; i < LOOP_PER_PATTERNS; i++) {
for (int i = 0; i < patternLoopCount; i++) {
RandomStringPair p = RandomStringPair.nextPrefixedRandomPair(rand);
bdf.benchmark("base:random <> cmp:(base+)random", p);
System.out.print(".");
}
System.out.println("");

System.out.println("p5");
for (int i = 0; i < LOOP_PER_PATTERNS; i++) {
for (int i = 0; i < patternLoopCount; i++) {
RandomStringPair p = RandomStringPair.nextRandomSuffixedPair(rand);
bdf.benchmark("base:random <> cmp:random(+base)", p);
System.out.print(".");
}
System.out.println("");

System.out.println("p6");
for (int i = 0; i < LOOP_PER_PATTERNS; i++) {
for (int i = 0; i < patternLoopCount; i++) {
RandomStringPair p = RandomStringPair.nextRandomPatchedPair(rand);
bdf.benchmark("base:random <> cmp:(base x random patch)", p);
System.out.print(".");
}
System.out.println("");

System.out.println("files-diff");
try {
bdf.benchmarkFiles();
} catch (IOException e) {
e.printStackTrace();
}

bdf.printResults();
}

public static void main(String[] args) {
System.out.println("[Benchmark Session1]");
doBenchmark();
System.out.println("[Benchmark Session2]");
doBenchmark();
System.out.println("[Benchmark Session3]");
doBenchmark();
int sessionCount = 3;
int patternLoopCount = 5;
if (args.length == 0) {
System.out.println("args: <session_count> <pattern_loop_count> <files_diff_base_dir> <files_diff_cmp_dir>");
}
if (args.length > 0) {
sessionCount = Integer.parseInt(args[0]);
}
if (args.length > 1) {
patternLoopCount = Integer.parseInt(args[1]);
}
File filesDiffBaseDir = null;
File filesDiffCmpDir = null;
if (args.length > 3) {
filesDiffBaseDir = new File(args[2]);
filesDiffCmpDir = new File(args[3]);
}
for (int i = 1; i <= sessionCount; i++) {
System.out.println("[Benchmark Session" + i + "]");
doBenchmark(patternLoopCount, filesDiffBaseDir, filesDiffCmpDir);
}
}
}

0 comments on commit 4c9e3a1

Please sign in to comment.