From b870bbd11aa24c171709528e88e8911012c778dc Mon Sep 17 00:00:00 2001 From: Vladimir Panteleev Date: Thu, 19 Sep 2024 09:39:20 +0000 Subject: [PATCH] Update DustMite Update DustMite, from the upstream commit cb0855d (+ the backported fix) to the upstream commit ab68756. * 6634109 splitter: Add null split mode * 1f40c63 dustmite: Add --reject * bba58a4 dustmite: Add --dump-json * ca93991 dustmite: Fix a placeholder variable name in --help * 4e38836 dustmite: Add --json * e504ccc dustmite: Remove use of newer D features * b173b89 Add support for reading from stdin * bb73f5e tests: Run dustmite in the test's directory * f4d2fa7 Remove the extension stripping for single-file input * 5b5e261 Move JSON loading into splitter module * 1bf5b4b dustmite: Print informational messages to standard error * 61b41ae splitter: Don't replace \ with / on POSIX * 461271f dustmite: Allow omitting the mask portion of --split * faee9d3 splitter: Refactor * fd28eb1 splitter: Fix compilation on 32 bits * 67e676f dustmite: Add --temp-dir * f594d0a dustmite: More explicit File finalization in DiskWriter * 372b4a0 dustmite: Fix stdin on MSVCRT * 4e4a303 README: Update badges * 9696d83 README: Add illustration * 56cdd01 README: Show CI status for master branch * 4da8847 Use skipOver * b8703eb polyhash: Update GDC assembler syntax * fd8c598 fuzz: Cast allAddresses length to float * 62f7899 dustmite: Fix lookback and pingpong strategies reducing to empty set * bc5f5d6 splitter: Print used splitter when loading files * c855596 Refactor Entity file properties into separate structure * 190c180 dustmite: Refactor look-ahead thread handling * 07ba696 dustmite: Handle errors * 71362ea Preserve file attributes * ed78a2e Add support for symbolic links * 675c757 dustmite: Remove explicit Writer destructors * ec5c25d Preserve file timestamps * 7482c9d Preserve directories (incl. empty directories) and their attributes * 4688772 splitter: Add lisp splitter * 5b65ade dustmite: Add some assertions * d0af4a5 splitter: Don't try to reduce module names in D source code * 178f916 splitter: Smarter diff splitting * 5f2ac60 splitter: Add debug check to ensure splitting preserves contents * a093fae splitter: Fix firstHead * 00c2400 splitter: Improve indent splitter * 1c54227 splitter: Refactor parseIndent * ab68756 dustmite: Add --man --- DustMite/dustmite.d | 668 ++++++++++++++++++++++++++++++++----------- DustMite/polyhash.d | 8 +- DustMite/splitter.d | 677 ++++++++++++++++++++++++++++++++++++++------ 3 files changed, 1094 insertions(+), 259 deletions(-) diff --git a/DustMite/dustmite.d b/DustMite/dustmite.d index ec7abf262d..e8876b7f90 100644 --- a/DustMite/dustmite.d +++ b/DustMite/dustmite.d @@ -24,7 +24,7 @@ import std.process; import std.random; import std.range; import std.regex; -import std.stdio; +import std.stdio : stdout, stderr, File; import std.string; import std.typecons; @@ -33,26 +33,33 @@ import splitter; alias Splitter = splitter.Splitter; // Issue 314 workarounds -alias std.string.join join; -alias std.string.startsWith startsWith; +alias join = std.string.join; +alias startsWith = std.algorithm.searching.startsWith; -string dir, resultDir, tester, globalCache; -string dirSuffix(string suffix) { return (dir.absolutePath().buildNormalizedPath() ~ "." ~ suffix).relativePath(); } +string dir, resultDir, tmpDir, tester, globalCache; +string dirSuffix(string suffix, Flag!q{temp} temp) +{ + return ( + (temp && tmpDir ? tmpDir.buildPath(dir.baseName) : dir) + .absolutePath().buildNormalizedPath() ~ "." ~ suffix + ).relativePath(); +} size_t maxBreadth; size_t origDescendants; int tests, maxSteps = -1; bool foundAnything; bool noSave, trace, noRedirect, doDump, whiteout; +RemoveRule[] rejectRules; string strategy = "inbreadth"; struct Times { StopWatch total, load, testSave, resultSave, apply, lookaheadApply, lookaheadWaitThread, lookaheadWaitProcess, test, clean, globalCache, misc; } Times times; static this() { times.total.start(); times.misc.start(); } -void measure(string what)(scope void delegate() p) +T measure(string what, T)(scope T delegate() p) { times.misc.stop(); mixin("times."~what~".start();"); - p(); - mixin("times."~what~".stop();"); times.misc.start(); + scope(exit) { mixin("times."~what~".stop();"); times.misc.start(); } + return p(); } struct Reduction @@ -140,29 +147,31 @@ struct RemoveRule { Regex!char regexp; string shellGlob; bool remove; } int main(string[] args) { - bool force, dumpHtml, showTimes, stripComments, obfuscate, fuzz, keepLength, showHelp, showVersion, noOptimize, inPlace; + bool force, dumpHtml, dumpJson, readJson, showTimes, stripComments, obfuscate, fuzz, keepLength, showHelp, openWiki, showVersion, noOptimize, inPlace; string coverageDir; RemoveRule[] removeRules; string[] splitRules; uint lookaheadCount, tabWidth = 8; - args = args.filter!( - (arg) - { - if (arg.startsWith("-j")) + args = args + .filter!((string arg) { + if (arg.skipOver("-j")) { - arg = arg[2..$]; lookaheadCount = arg.length ? arg.to!uint : totalCPUs; return false; } return true; - }).array(); + }) + // Work around getopt's inability to handle "-" in 2.080.0 + .map!((string arg) => arg == "-" ? "\0" ~ arg : arg) + .array(); getopt(args, "force", &force, "reduceonly|reduce-only", (string opt, string value) { removeRules ~= RemoveRule(Regex!char.init, value, true); }, "remove" , (string opt, string value) { removeRules ~= RemoveRule(regex(value, "mg"), null, true); }, "noremove|no-remove" , (string opt, string value) { removeRules ~= RemoveRule(regex(value, "mg"), null, false); }, + "reject" , (string opt, string value) { rejectRules ~= RemoveRule(regex(value, "mg"), null, true); }, "strip-comments", &stripComments, "whiteout|white-out", &whiteout, "coverage", &coverageDir, @@ -173,6 +182,7 @@ int main(string[] args) "split", &splitRules, "dump", &doDump, "dump-html", &dumpHtml, + "dump-json", &dumpJson, "times", &showTimes, "noredirect|no-redirect", &noRedirect, "cache", &globalCache, // for research @@ -180,11 +190,16 @@ int main(string[] args) "nosave|no-save", &noSave, // for research "nooptimize|no-optimize", &noOptimize, // for research "tab-width", &tabWidth, + "temp-dir", &tmpDir, "max-steps", &maxSteps, // for research / benchmarking "i|in-place", &inPlace, + "json", &readJson, "h|help", &showHelp, + "man", &openWiki, "V|version", &showVersion, ); + foreach (ref arg; args) + arg.skipOver("\0"); // Undo getopt hack if (showVersion) { @@ -195,7 +210,14 @@ int main(string[] args) enum source = import("source"); else enum source = "upstream"; - writeln("DustMite build ", __DATE__, " (", source, "), built with ", __VENDOR__, " ", __VERSION__); + stdout.writeln("DustMite build ", __DATE__, " (", source, "), built with ", __VENDOR__, " ", __VERSION__); + if (args.length == 1) + return 0; + } + + if (openWiki) + { + browse("https://github.com/CyberShadow/DustMite/wiki"); if (args.length == 1) return 0; } @@ -204,8 +226,7 @@ int main(string[] args) { stderr.writef(q"EOS Usage: %s [OPTION]... PATH TESTER -PATH should be a directory containing a clean copy of the file-set to reduce. -A file path can also be specified. NAME.EXT will be treated like NAME/NAME.EXT. +PATH should contain a clean copy of the file-set to reduce. TESTER should be a shell command which returns 0 for a correct reduction, and anything else otherwise. Supported options: @@ -216,6 +237,8 @@ Supported options: (may be used multiple times) --no-remove REGEXP Do not reduce blocks containing REGEXP (may be used multiple times) + --reject REGEXP Reject reductions which cause REGEXP to occur in output + (may be used multiple times) --strip-comments Attempt to remove comments from source code --white-out Replace deleted text with spaces to preserve line numbers --coverage DIR Load .lst files corresponding to source files from DIR @@ -227,7 +250,9 @@ Supported options: --split MASK:MODE Parse and reduce files specified by MASK using the given splitter. Can be repeated. MODE must be one of: %-(%s, %) + --json Load PATH as a JSON file (same syntax as --dump-json) --no-redirect Don't redirect stdout/stderr streams of test command + --temp-dir Write and run reduction candidates in this directory -j[N] Use N look-ahead processes (%d by default) EOS", args[0], splitterNames, totalCPUs); @@ -242,10 +267,12 @@ EOS"); stderr.write(q"EOS -h, --help Show this message Less interesting options: + --man Launch the project wiki web page in a web browser -V, --version Show program version --strategy STRAT Set strategy (careful/lookback/pingpong/indepth/inbreadth) - --dump Dump parsed tree to DIR.dump file - --dump-html Dump parsed tree to DIR.html file + --dump Dump parsed tree to PATH.dump file + --dump-html Dump parsed tree to PATH.html file + --dump-json Dump parsed tree to PATH.json file --times Display verbose spent time breakdown --cache DIR Use DIR as persistent disk cache (in addition to memory cache) @@ -278,23 +305,34 @@ EOS"); bool isDotName(string fn) { return fn.startsWith(".") && !(fn=="." || fn==".."); } - bool suspiciousFilesFound; - if (!force && isDir(dir)) + if (!readJson && !force && dir.exists && dir.isDir()) + { + bool suspiciousFilesFound; foreach (string path; dirEntries(dir, SpanMode.breadth)) if (isDotName(baseName(path)) || isDotName(baseName(dirName(path))) || extension(path)==".o" || extension(path)==".obj" || extension(path)==".exe") { stderr.writeln("Warning: Suspicious file found: ", path); suspiciousFilesFound = true; } - if (suspiciousFilesFound) - stderr.writeln("You should use a clean copy of the source tree.\nIf it was your intention to include this file in the file-set to be reduced,\nyou can use --force to silence this message."); + if (suspiciousFilesFound) + stderr.writeln("You should use a clean copy of the source tree.\nIf it was your intention to include this file in the file-set to be reduced,\nyou can use --force to silence this message."); + } ParseRule parseSplitRule(string rule) { auto p = rule.lastIndexOf(':'); - enforce(p > 0, "Invalid parse rule: " ~ rule); - auto pattern = rule[0..p]; - auto splitterName = rule[p+1..$]; + string pattern, splitterName; + if (p < 0) + { + pattern = "*"; + splitterName = rule; + } + else + { + enforce(p > 0, "Invalid parse rule: " ~ rule); + pattern = rule[0 .. p]; + splitterName = rule[p + 1 .. $]; + } auto splitterIndex = splitterNames.countUntil(splitterName); enforce(splitterIndex >= 0, "Unknown splitter: " ~ splitterName); return ParseRule(pattern, cast(Splitter)splitterIndex); @@ -304,7 +342,10 @@ EOS"); ParseOptions parseOptions; parseOptions.stripComments = stripComments; - parseOptions.mode = obfuscate ? ParseOptions.Mode.words : ParseOptions.Mode.source; + parseOptions.mode = + readJson ? ParseOptions.Mode.json : + obfuscate ? ParseOptions.Mode.words : + ParseOptions.Mode.source; parseOptions.rules = splitRules.map!parseSplitRule().array(); parseOptions.tabWidth = tabWidth; measure!"load"({root = loadFiles(dir, parseOptions);}); @@ -324,13 +365,15 @@ EOS"); resetProgress(root); if (doDump) - dumpSet(root, dirSuffix("dump")); + dumpSet(root, dirSuffix("dump", No.temp)); if (dumpHtml) - dumpToHtml(root, dirSuffix("html")); + dumpToHtml(root, dirSuffix("html", No.temp)); + if (dumpJson) + dumpToJson(root, dirSuffix("json", No.temp)); if (tester is null) { - writeln("No tester specified, exiting"); + stderr.writeln("No tester specified, exiting"); return 0; } @@ -338,10 +381,10 @@ EOS"); resultDir = dir; else { - resultDir = dirSuffix("reduced"); + resultDir = dirSuffix("reduced", No.temp); if (resultDir.exists) { - writeln("Hint: read https://github.com/CyberShadow/DustMite/wiki#result-directory-already-exists"); + stderr.writeln("Hint: read https://github.com/CyberShadow/DustMite/wiki#result-directory-already-exists"); throw new Exception("Result directory already exists"); } } @@ -355,20 +398,20 @@ EOS"); version (Posix) { if (testerFile.exists && (testerFile.getAttributes() & octal!111) == 0) - writeln("Hint: test program seems to be a non-executable file, try: chmod +x " ~ testerFile.escapeShellFileName()); + stderr.writeln("Hint: test program seems to be a non-executable file, try: chmod +x " ~ testerFile.escapeShellFileName()); } if (!testerFile.exists && tester.exists) - writeln("Hint: test program path should be relative to the source directory, try " ~ + stderr.writeln("Hint: test program path should be relative to the source directory, try " ~ tester.absolutePath.relativePath(dir.absolutePath).escapeShellFileName() ~ " instead of " ~ tester.escapeShellFileName()); if (!noRedirect) - writeln("Hint: use --no-redirect to see test script output"); - writeln("Hint: read https://github.com/CyberShadow/DustMite/wiki#initial-test-fails"); + stderr.writeln("Hint: use --no-redirect to see test script output"); + stderr.writeln("Hint: read https://github.com/CyberShadow/DustMite/wiki#initial-test-fails"); throw new Exception("Initial test fails: " ~ nullResult.reason); } } - lookaheadProcesses = new Lookahead[lookaheadCount]; + lookaheadProcessSlots = new LookaheadSlot[lookaheadCount]; foundAnything = false; string resultAdjective; @@ -397,20 +440,20 @@ EOS"); { if (noSave) measure!"resultSave"({safeSave(root, resultDir);}); - writefln("Done in %s tests and %s; %s version is in %s", tests, duration, resultAdjective, resultDir); + stderr.writefln("Done in %s tests and %s; %s version is in %s", tests, duration, resultAdjective, resultDir); } else { - writeln("Hint: read https://github.com/CyberShadow/DustMite/wiki#reduced-to-empty-set"); - writefln("Done in %s tests and %s; %s to empty set", tests, duration, resultAdjective); + stderr.writeln("Hint: read https://github.com/CyberShadow/DustMite/wiki#reduced-to-empty-set"); + stderr.writefln("Done in %s tests and %s; %s to empty set", tests, duration, resultAdjective); } } else - writefln("Done in %s tests and %s; no reductions found", tests, duration); + stderr.writefln("Done in %s tests and %s; no reductions found", tests, duration); if (showTimes) foreach (i, t; times.tupleof) - writefln("%s: %s", times.tupleof[i].stringof, times.tupleof[i].peek()); + stderr.writefln("%s: %s", times.tupleof[i].stringof, times.tupleof[i].peek()); return 0; } @@ -494,7 +537,8 @@ void recalculate(Entity root) e.deadHash.put(c.isWhite ? c : ' '); } - putString(e.filename); + if (e.file) + putString(e.file.name); putString(e.head); void addDependents(R)(R range, bool fresh) @@ -602,7 +646,7 @@ void recalculate(Entity root) return; } - inFile |= e.isFile; + inFile |= e.file !is null; assert(e.hash.length == e.deadHash.length); @@ -618,7 +662,8 @@ void recalculate(Entity root) auto start = pos; - putString(e.filename); + if (e.file) + putString(e.file.name); putString(e.head); foreach (c; e.children) passWO(c, inFile); @@ -778,7 +823,7 @@ struct ReductionIterator // Try next reduction type type = Reduction.Type.Concat; - if (e.isFile) + if (e.file) return; // Try this else { @@ -971,7 +1016,7 @@ bool nextAddress(ref size_t[] address, Entity root, bool descend) class LevelStrategy : IterativeStrategy { - bool levelChanged; + bool levelChanged; // We found some reductions while traversing this level bool invalid; override int getDepth() { return cast(int)address.length; } @@ -1084,22 +1129,18 @@ final class LookbackStrategy : LevelStrategy if (!nextInLevel()) { // End of level - if (levelChanged) - { - setLevel(currentLevel ? currentLevel - 1 : 0); - } - else - if (setLevel(maxLevel + 1)) - { - maxLevel = currentLevel; - } - else + auto nextLevel = levelChanged + ? currentLevel ? currentLevel - 1 : 0 + : maxLevel + 1; + if (!setLevel(nextLevel)) { if (iterationChanged) nextIteration(); else done = true; } + else + maxLevel = max(maxLevel, currentLevel); } } } @@ -1119,12 +1160,10 @@ final class PingPongStrategy : LevelStrategy if (!nextInLevel()) { // End of level - if (levelChanged) - { - setLevel(currentLevel ? currentLevel - 1 : 0); - } - else - if (!setLevel(currentLevel + 1)) + auto nextLevel = levelChanged + ? currentLevel ? currentLevel - 1 : 0 + : currentLevel + 1; + if (!setLevel(nextLevel)) { if (iterationChanged) nextIteration(); @@ -1202,12 +1241,12 @@ void reduceByStrategy(Strategy strategy) if (lastIteration != strategy.getIteration()) { - writefln("############### ITERATION %d ################", strategy.getIteration()); + stderr.writefln("############### ITERATION %d ################", strategy.getIteration()); lastIteration = strategy.getIteration(); } if (lastDepth != strategy.getDepth()) { - writefln("============= Depth %d =============", strategy.getDepth()); + stderr.writefln("============= Depth %d =============", strategy.getDepth()); lastDepth = strategy.getDepth(); } if (lastProgressGeneration != strategy.progressGeneration) @@ -1259,7 +1298,7 @@ void obfuscate(ref Entity root, bool keepLength) foreach (f; root.children) { - foreach (entity; parseToWords(f.filename) ~ f.children) + foreach (entity; parseToWords(f.file ? f.file.name : null) ~ f.children) if (entity.head.length && !isDigit(entity.head[0])) if (entity.head !in wordSet) { @@ -1373,20 +1412,24 @@ void dump(Writer)(Entity root, Writer writer) if (e.dead) { if (inFile && e.contents.length) - writer.handleText(e.contents[e.filename.length .. $]); + writer.handleText(e.contents[(e.file ? e.file.name : null).length .. $]); } else - if (!inFile && e.isFile) + if (!inFile && e.file) { - writer.handleFile(e.filename); + writer.handleFile(e.file); foreach (c; e.children) dumpEntity!true(c); } else { if (inFile && e.head.length) writer.handleText(e.head); - foreach (c; e.children) - dumpEntity!inFile(c); + if (inFile) + foreach (c; e.children) + dumpEntity!inFile(c); + else // Create files in reverse order, so that directories' timestamps get set last + foreach_reverse (c; e.children) + dumpEntity!inFile(c); if (inFile && e.tail.length) writer.handleText(e.tail); } } @@ -1398,59 +1441,194 @@ static struct FastWriter(Next) /// Accelerates Writer interface by bulking conti { Next next; immutable(char)* start, end; - void finish() + + private void flush() { if (start != end) next.handleText(start[0 .. end - start]); start = end = null; } - void handleFile(string s) + + void handleFile(const(Entity.FileProperties)* fileProperties) { - finish(); - next.handleFile(s); + flush(); + next.handleFile(fileProperties); } + void handleText(string s) { if (s.ptr != end) { - finish(); + flush(); start = s.ptr; } end = s.ptr + s.length; } - ~this() { finish(); } + + void finish() + { + flush(); + next.finish(); + } } -void save(Entity root, string savedir) +// Workaround for https://issues.dlang.org/show_bug.cgi?id=23683 +// Remove when moving to a DMD version incorporating a fix +version (Windows) { - safeDelete(savedir); - safeMkdir(savedir); + import core.sys.windows.winbase; + import core.sys.windows.winnt; + import std.windows.syserror; - static struct DiskWriter + alias AliasSeq(Args...) = Args; + alias FSChar = WCHAR; + void setTimes(const(char)[] name, + SysTime accessTime, + SysTime modificationTime) { - string dir; + auto namez = (name ~ "\0").to!(FSChar[]).ptr; - File o; - typeof(o.lockingBinaryWriter()) binaryWriter; + import std.datetime.systime : SysTimeToFILETIME; + const ta = SysTimeToFILETIME(accessTime); + const tm = SysTimeToFILETIME(modificationTime); + alias defaults = + AliasSeq!(FILE_WRITE_ATTRIBUTES, + 0, + null, + OPEN_EXISTING, + FILE_ATTRIBUTE_NORMAL | + FILE_ATTRIBUTE_DIRECTORY | + FILE_FLAG_BACKUP_SEMANTICS, + HANDLE.init); + auto h = CreateFileW(namez, defaults); - void handleFile(string fn) - { - static Appender!(char[]) pathBuf; - pathBuf.clear(); - pathBuf.put(dir.chainPath(fn)); - auto path = pathBuf.data; - if (!exists(dirName(path))) - safeMkdir(dirName(path)); + wenforce(h != INVALID_HANDLE_VALUE, "CreateFileW: " ~ name); + + scope(exit) + wenforce(CloseHandle(h), "CloseHandle: " ~ name); + + wenforce(SetFileTime(h, null, &ta, &tm), "SetFileTime: " ~ name); + } +} + +static struct DiskWriter +{ + string dir; + + const(Entity.FileProperties)* fileProperties; + // Regular files + File o; + typeof(o.lockingBinaryWriter()) binaryWriter; + // Symlinks + Appender!(char[]) symlinkBuf; + + @property const(char)[] currentFilePath() + { + static Appender!(char[]) pathBuf; + pathBuf.clear(); + pathBuf.put(dir.chainPath(fileProperties.name)); + return pathBuf.data; + } + + void handleFile(const(Entity.FileProperties)* fileProperties) + { + finish(); + + this.fileProperties = fileProperties; + scope(failure) this.fileProperties = null; + + auto path = currentFilePath; + if (!exists(dirName(path))) + safeMkdir(dirName(path)); // TODO make directories nested instead + + if (attrIsSymlink(fileProperties.mode.get(0))) + symlinkBuf.clear(); + else + if (attrIsDir(fileProperties.mode.get(0))) + {} + else // regular file + { o.open(cast(string)path, "wb"); binaryWriter = o.lockingBinaryWriter; } + } - void handleText(string s) + void handleText(string s) + { + if (attrIsSymlink(fileProperties.mode.get(0))) + symlinkBuf.put(s); + else + if (attrIsDir(fileProperties.mode.get(0))) + enforce(s.length == 0, "Directories cannot have contents"); + else // regular file { + assert(o.isOpen); binaryWriter.put(s); } } + + void finish() + { + if (fileProperties) + { + scope(exit) fileProperties = null; + + auto path = currentFilePath; + + if (attrIsSymlink(fileProperties.mode.get(0))) + symlink(symlinkBuf.data, path); + else + if (attrIsDir(fileProperties.mode.get(0))) + mkdirRecurse(path); + else // regular file + { + assert(o.isOpen); + binaryWriter = typeof(binaryWriter).init; + o.close(); + o = File.init; // Avoid crash on Windows + } + + if (!fileProperties.mode.isNull) + { + auto mode = fileProperties.mode.get(); + if (!attrIsSymlink(mode)) + setAttributes(path, mode); + } + if (!fileProperties.times.isNull) + setTimes(path, fileProperties.times.get()[0], fileProperties.times.get()[1]); + } + } +} + +struct MemoryWriter +{ + char[] buf; + size_t pos; + + void handleFile(const(Entity.FileProperties)* fileProperties) {} + + void handleText(string s) + { + auto end = pos + s.length; + if (buf.length < end) + { + buf.length = end; + buf.length = buf.capacity; + } + buf[pos .. end] = s; + pos = end; + } + + void reset() { pos = 0; } + char[] data() { return buf[0 .. pos]; } +} + +void save(Entity root, string savedir) +{ + safeDelete(savedir); + safeMkdir(savedir); + FastWriter!DiskWriter writer; writer.next.dir = savedir; dump(root, &writer); @@ -1518,7 +1696,7 @@ bool tryReduction(ref Entity root, Reduction r) if (newRoot is root) { assert(r.type != Reduction.Type.None); - writeln(r, " => N/A"); + stderr.writeln(r, " => N/A"); return false; } if (test(newRoot, [r]).success) @@ -1606,7 +1784,8 @@ Entity applyReductionImpl(Entity origRoot, ref Reduction r) { auto fa = rootAddress.children[i]; auto f = edit(fa); - f.filename = applyReductionToPath(f.filename, r); + if (f.file) + f.file.name = applyReductionToPath(f.file.name, r); foreach (j, const word; f.children) if (word.head == r.from) edit(fa.children[j]).head = r.to; @@ -1660,7 +1839,7 @@ Entity applyReductionImpl(Entity origRoot, ref Reduction r) { if (e.dead) return; - if (e.isFile) + if (e.file) { // Skip noRemove files, except when they are the target // (in which case they will keep their contents after the reduction). @@ -1850,10 +2029,10 @@ RoundRobinCache!(ReductionCacheKey, Entity) reductionCache; Entity applyReduction(Entity origRoot, ref Reduction r) { - if (lookaheadProcesses.length) + if (lookaheadProcessSlots.length) { if (!reductionCache.keys) - reductionCache.requireSize(1 + lookaheadProcesses.length); + reductionCache.requireSize(1 + lookaheadProcessSlots.length); auto cacheKey = ReductionCacheKey(origRoot, r); return reductionCache.get(cacheKey, applyReductionImpl(origRoot, r)); @@ -1894,10 +2073,10 @@ void autoRetry(scope void delegate() fun, lazy const(char)[] operation) } catch (Exception e) { - writeln("Error while attempting to " ~ operation ~ ": " ~ e.msg); + stderr.writeln("Error while attempting to " ~ operation ~ ": " ~ e.msg); import core.thread; Thread.sleep(dur!"seconds"(1)); - writeln("Retrying..."); + stderr.writeln("Retrying..."); } } @@ -1955,14 +2134,15 @@ void saveResult(Entity root) measure!"resultSave"({safeSave(root, resultDir);}); } -struct Lookahead +struct LookaheadSlot { + bool active; Thread thread; shared Pid pid; string testdir; EntityHash digest; } -Lookahead[] lookaheadProcesses; +LookaheadSlot[] lookaheadProcessSlots; TestResult[EntityHash] lookaheadResults; @@ -2002,10 +2182,14 @@ struct TestResult lookahead, diskCache, ramCache, + reject, + error, } Source source; int status; + string error; + string reason() { final switch (source) @@ -2022,6 +2206,10 @@ struct TestResult return "Test result was cached on disk as " ~ (success ? "success" : "failure"); case Source.ramCache: return "Test result was cached in memory as " ~ (success ? "success" : "failure"); + case Source.reject: + return "Test result was rejected by a --reject rule"; + case Source.error: + return "Error: " ~ error; } } } @@ -2031,7 +2219,7 @@ TestResult test( Reduction[] reductions, /// For display purposes only ) { - writef("%-(%s, %) => ", reductions); stdout.flush(); + stderr.writef("%-(%s, %) => ", reductions); stdout.flush(); EntityHash digest = root.hash; @@ -2041,7 +2229,7 @@ TestResult test( if (cacheResult) { // Note: as far as I can see, a cache hit for a positive reduction is not possible (except, perhaps, for a no-op reduction) - writeln(*cacheResult ? "Yes" : "No", " (cached)"); + stderr.writeln(*cacheResult ? "Yes" : "No", " (cached)"); return TestResult(*cacheResult, TestResult.Source.ramCache); } auto result = fallback; @@ -2062,13 +2250,13 @@ TestResult test( measure!"globalCache"({ found = exists(cacheBase~"0"); }); if (found) { - writeln("No (disk cache)"); + stderr.writeln("No (disk cache)"); return TestResult(false, TestResult.Source.diskCache); } measure!"globalCache"({ found = exists(cacheBase~"1"); }); if (found) { - writeln("Yes (disk cache)"); + stderr.writeln("Yes (disk cache)"); return TestResult(true, TestResult.Source.diskCache); } auto result = fallback; @@ -2085,34 +2273,57 @@ TestResult test( { // Handle existing lookahead jobs - TestResult reap(ref Lookahead process, int status) + Nullable!TestResult reapThread(ref LookaheadSlot slot) { - scope(success) process = Lookahead.init; - safeDelete(process.testdir); - if (process.thread) - process.thread.join(/*rethrow:*/true); - return lookaheadResults[process.digest] = TestResult(status == 0, TestResult.Source.lookahead, status); + try + { + slot.thread.join(/*rethrow:*/true); + slot.thread = null; + return typeof(return)(); + } + catch (Exception e) + { + scope(success) slot = LookaheadSlot.init; + safeDelete(slot.testdir); + auto result = TestResult(false, TestResult.Source.error); + result.error = e.msg; + lookaheadResults[slot.digest] = result; + return typeof(return)(result); + } + } + + TestResult reapProcess(ref LookaheadSlot slot, int status) + { + scope(success) slot = LookaheadSlot.init; + safeDelete(slot.testdir); + if (slot.thread) + reapThread(slot); // should be null + return lookaheadResults[slot.digest] = TestResult(status == 0, TestResult.Source.lookahead, status); } - foreach (ref process; lookaheadProcesses) - if (process.thread) + foreach (ref slot; lookaheadProcessSlots) // Reap threads + if (slot.thread) { debug (DETERMINISTIC_LOOKAHEAD) - { - process.thread.join(/*rethrow:*/true); - process.thread = null; - } + reapThread(slot); + else + if (!slot.thread.isRunning) + reapThread(slot); + } - auto pid = cast()atomicLoad(process.pid); + foreach (ref slot; lookaheadProcessSlots) // Reap processes + if (slot.active) + { + auto pid = cast()atomicLoad(slot.pid); if (pid) { debug (DETERMINISTIC_LOOKAHEAD) - reap(process, pid.wait()); + reapProcess(slot, pid.wait()); else { auto waitResult = pid.tryWait(); if (waitResult.terminated) - reap(process, waitResult.status); + reapProcess(slot, waitResult.status); } } } @@ -2132,8 +2343,8 @@ TestResult test( size_t numSteps; - foreach (ref process; lookaheadProcesses) - while (!process.thread && !predictionTree.empty) + foreach (ref slot; lookaheadProcessSlots) + while (!slot.active && !predictionTree.empty) { auto state = predictionTree.front; predictionTree.removeFront(); @@ -2141,7 +2352,7 @@ TestResult test( retryIter: if (state.iter.done) continue; - reductionCache.requireSize(lookaheadProcesses.length + ++numSteps); + reductionCache.requireSize(lookaheadProcessSlots.length + ++numSteps); auto reduction = state.iter.front; Entity newRoot; measure!"lookaheadApply"({ newRoot = state.iter.root.applyReduction(reduction); }); @@ -2154,7 +2365,7 @@ TestResult test( auto digest = newRoot.hash; double prediction; - if (digest in cache || digest in lookaheadResults || lookaheadProcesses[].canFind!(p => p.thread && p.digest == digest)) + if (digest in cache || digest in lookaheadResults || lookaheadProcessSlots[].canFind!(p => p.thread && p.digest == digest)) { if (digest in cache) prediction = cache[digest] ? 1 : 0; @@ -2166,25 +2377,26 @@ TestResult test( } else { - process.digest = digest; + slot.active = true; + slot.digest = digest; static int counter; - process.testdir = dirSuffix("lookahead.%d".format(counter++)); + slot.testdir = dirSuffix("lookahead.%d".format(counter++), Yes.temp); // Saving and process creation are expensive. // Don't block the main thread, use a worker thread instead. - static void runThread(Entity newRoot, ref Lookahead process, string tester) + static void runThread(Entity newRoot, ref LookaheadSlot slot, string tester) { - process.thread = new Thread({ - save(newRoot, process.testdir); + slot.thread = new Thread({ + save(newRoot, slot.testdir); auto nul = File(nullFileName, "w+"); - auto pid = spawnShell(tester, nul, nul, nul, null, Config.none, process.testdir); - atomicStore(process.pid, cast(shared)pid); + auto pid = spawnShell(tester, nul, nul, nul, null, Config.none, slot.testdir); + atomicStore(slot.pid, cast(shared)pid); }); - process.thread.start(); + slot.thread.start(); } - runThread(newRoot, process, tester); + runThread(newRoot, slot, tester); prediction = state.predictor.predict(); } @@ -2209,26 +2421,35 @@ TestResult test( auto plookaheadResult = digest in lookaheadResults; if (plookaheadResult) { - writeln(plookaheadResult.success ? "Yes" : "No", " (lookahead)"); + stderr.writeln(plookaheadResult.success ? "Yes" : "No", " (lookahead)"); return *plookaheadResult; } - foreach (ref process; lookaheadProcesses) + foreach (ref slot; lookaheadProcessSlots) { - if (process.thread && process.digest == digest) + if (slot.active && slot.digest == digest) { // Current test is already being tested in the background, wait for its result. // Join the thread first, to guarantee that there is a pid - measure!"lookaheadWaitThread"({ process.thread.join(/*rethrow:*/true); }); - process.thread = null; + if (slot.thread) + { + auto result = measure!"lookaheadWaitThread"({ + return reapThread(slot); + }); + if (!result.isNull) + { + stderr.writefln("%s (lookahead-wait: %s)", result.get().success ? "Yes" : "No", result.get().source); + return result.get(); + } + } - auto pid = cast()atomicLoad(process.pid); + auto pid = cast()atomicLoad(slot.pid); int exitCode; measure!"lookaheadWaitProcess"({ exitCode = pid.wait(); }); - auto result = reap(process, exitCode); - writeln(result.success ? "Yes" : "No", " (lookahead-wait)"); + auto result = reapProcess(slot, exitCode); + stderr.writeln(result.success ? "Yes" : "No", " (lookahead-wait)"); return result; } } @@ -2237,29 +2458,91 @@ TestResult test( return fallback; } + TestResult testReject(lazy TestResult fallback) + { + if (rejectRules.length) + { + bool defaultReject = !rejectRules.front.remove; + + bool scan(Entity e) + { + if (e.file) + { + static MemoryWriter writer; + writer.reset(); + dump(e, &writer); + + static bool[] removeCharBuf; + if (removeCharBuf.length < writer.data.length) + removeCharBuf.length = writer.data.length; + auto removeChar = removeCharBuf[0 .. writer.data.length]; + removeChar[] = defaultReject; + + foreach (ref rule; rejectRules) + if (rule.regexp !is Regex!char.init) + foreach (m; writer.data.matchAll(rule.regexp)) + { + auto start = m.hit.ptr - writer.data.ptr; + auto end = start + m.hit.length; + removeChar[start .. end] = rule.remove; + } + + if (removeChar.canFind(true)) + return true; + } + else + foreach (c; e.children) + if (scan(c)) + return true; + return false; + } + + if (scan(root)) + { + stderr.writeln("No (rejected)"); + return TestResult(false, TestResult.Source.reject); + } + } + return fallback; + } + + TestResult handleError(lazy TestResult fallback) + { + try + return fallback; + catch (Exception e) + { + auto result = TestResult(false, TestResult.Source.error); + result.error = e.msg; + stderr.writefln("No (error: %s)", e.msg); + return result; + } + } + TestResult doTest() { - string testdir = dirSuffix("test"); + string testdir = dirSuffix("test", Yes.temp); measure!"testSave"({save(root, testdir);}); scope(exit) measure!"clean"({safeDelete(testdir);}); + auto nullRead = File(nullFileName, "rb"); Pid pid; if (noRedirect) - pid = spawnShell(tester, null, Config.none, testdir); + pid = spawnShell(tester, nullRead, stdout , stderr , null, Config.none, testdir); else { - auto nul = File(nullFileName, "w+"); - pid = spawnShell(tester, nul, nul, nul, null, Config.none, testdir); + auto nullWrite = File(nullFileName, "wb"); + pid = spawnShell(tester, nullRead, nullWrite, nullWrite, null, Config.none, testdir); } int status; measure!"test"({status = pid.wait();}); auto result = TestResult(status == 0, TestResult.Source.tester, status); - writeln(result.success ? "Yes" : "No"); + stderr.writeln(result.success ? "Yes" : "No"); return result; } - auto result = ramCached(diskCached(lookahead(doTest()))); - if (trace) saveTrace(root, reductions, dirSuffix("trace"), result.success); + auto result = ramCached(diskCached(testReject(lookahead(handleError(doTest()))))); + if (trace) saveTrace(root, reductions, dirSuffix("trace", No.temp), result.success); return result; } @@ -2323,20 +2606,20 @@ void applyNoRemoveRules(Entity root, RemoveRule[] removeRules) // don't remove anything except what's specified by the rule. bool defaultRemove = !removeRules.front.remove; - auto files = root.isFile ? [root] : root.children; + auto files = root.file ? [root] : root.children; foreach (f; files) { - assert(f.isFile); + assert(f.file); // Check file name bool removeFile = defaultRemove; foreach (rule; removeRules) { if ( - (rule.shellGlob && f.filename.globMatch(rule.shellGlob)) + (rule.shellGlob && f.file.name.globMatch(rule.shellGlob)) || - (rule.regexp !is Regex!char.init && f.filename.match(rule.regexp)) + (rule.regexp !is Regex!char.init && f.file.name.match(rule.regexp)) ) removeFile = rule.remove; } @@ -2359,6 +2642,7 @@ void applyNoRemoveRules(Entity root, RemoveRule[] removeRules) return true; auto start = s.ptr - f.contents.ptr; auto end = start + s.length; + assert(start <= end && end <= f.contents.length, "String is not a slice of the file"); return removeChar[start .. end].all; } @@ -2406,10 +2690,10 @@ void loadCoverage(Entity root, string dir) { void scanFile(Entity f) { - auto fn = buildPath(dir, setExtension(baseName(f.filename), "lst")); + auto fn = buildPath(dir, setExtension(baseName(f.file.name), "lst")); if (!exists(fn)) return; - writeln("Loading coverage file ", fn); + stderr.writeln("Loading coverage file ", fn); static bool covered(string line) { @@ -2451,7 +2735,7 @@ void loadCoverage(Entity root, string dir) void scanFiles(Entity e) { - if (e.isFile) + if (e.file) scanFile(e); else foreach (c; e.children) @@ -2492,7 +2776,8 @@ void convertRefs(Entity root) void convertRef(ref EntityRef r) { assert(r.entity && !r.address); - r.address = addresses[r.entity.id]; + r.address = addresses.get(r.entity.id, null); + assert(r.address, "Dependent not in tree"); r.entity = null; } @@ -2597,7 +2882,7 @@ void dumpSet(Entity root, string fn) f.write( " ", e.redirect ? "-> " ~ text(findEntityEx(root, e.redirect).entity.id) ~ " " : "", - e.isFile ? e.filename ? printableFN(e.filename) ~ " " : null : e.head ? printable(e.head) ~ " " : null, + e.file ? e.file.name ? printableFN(e.file.name) ~ " " : null : e.head ? printable(e.head) ~ " " : null, e.tail ? printable(e.tail) ~ " " : null, e.comment ? "/* " ~ e.comment ~ " */ " : null, "]" @@ -2606,7 +2891,7 @@ void dumpSet(Entity root, string fn) else { f.writeln(e.comment ? " // " ~ e.comment : null); - if (e.isFile) f.writeln(prefix, " ", printableFN(e.filename)); + if (e.file) f.writeln(prefix, " ", printableFN(e.file.name)); if (e.head) f.writeln(prefix, " ", printable(e.head)); foreach (c; e.children) print(c, depth+1); @@ -2654,10 +2939,10 @@ void dumpToHtml(Entity root, string fn) void dump(Entity e) { - if (e.isFile) + if (e.file) { buf.put("

"); - dumpText(e.filename); + dumpText(e.file.name); buf.put("

");
 			foreach (c; e.children)
 				dump(c);
@@ -2683,6 +2968,61 @@ EOT");
 	std.file.write(fn, buf.data());
 }
 
+void dumpToJson(Entity root, string fn)
+{
+	import std.json : JSONValue;
+
+	bool[const(Address)*] needLabel;
+
+	void scan(Entity e, const(Address)* addr)
+	{
+		foreach (dependent; e.dependents)
+		{
+			assert(dependent.address);
+			needLabel[dependent.address] = true;
+		}
+		foreach (i, child; e.children)
+			scan(child, addr.child(i));
+	}
+	scan(root, &rootAddress);
+
+	JSONValue toJson(Entity e, const(Address)* addr)
+	{
+		JSONValue[string] o;
+
+		if (e.file)
+			o["filename"] = e.file.name;
+
+		if (e.head.length)
+			o["head"] = e.head;
+		if (e.children.length)
+			o["children"] = e.children.length.iota.map!(i =>
+				toJson(e.children[i], addr.child(i))
+			).array;
+		if (e.tail.length)
+			o["tail"] = e.tail;
+
+		if (e.noRemove)
+			o["noRemove"] = true;
+
+		if (addr in needLabel)
+			o["label"] = e.id.to!string;
+		if (e.dependents.length)
+			o["dependents"] = e.dependents.map!((ref dependent) =>
+				root.findEntity(dependent.address).entity.id.to!string
+			).array;
+
+		return JSONValue(o);
+	}
+
+	auto jsonDoc = JSONValue([
+		"version" : JSONValue(1),
+		"root" : toJson(root, &rootAddress),
+	]);
+
+	std.file.write(fn, jsonDoc.toPrettyString());
+}
+
 // void dumpText(string fn, ref Reduction r = nullReduction)
 // {
 // 	auto f = File(fn, "wt");
@@ -2694,7 +3034,7 @@ version(testsuite)
 shared static this()
 {
 	import core.runtime;
-	"../cov".mkdir.collectException();
-	dmd_coverDestPath("../cov");
+	"../../cov".mkdir.collectException();
+	dmd_coverDestPath("../../cov");
 	dmd_coverSetMerge(true);
 }
diff --git a/DustMite/polyhash.d b/DustMite/polyhash.d
index 5fa9766f62..13ab910d17 100644
--- a/DustMite/polyhash.d
+++ b/DustMite/polyhash.d
@@ -290,8 +290,8 @@ if (is(T : long) && T.sizeof >= 2)
 				asm
 				{
 					"`~x86SignedOpPrefix!T~`mul`~x86SizeOpSuffix!T~` %3"
-					: "=a" low, "=d" high
-					: "a" a, "rm" b;
+					: "=a"(low), "=d"(high)
+					: "a"(a), "rm"(b);
 				}
 			`);
 			return typeof(return)(low, high);
@@ -363,8 +363,8 @@ if (is(T : long) && T.sizeof >= 2 && is(L == LongInt!T))
 				asm
 				{
 					"`~x86SignedOpPrefix!T~`div`~x86SizeOpSuffix!T~` %4"
-					: "=a" quotient, "=d" remainder
-					: "a" low, "d" high, "rm" b;
+					: "=a"(quotient), "=d"(remainder)
+					: "a"(low), "d"(high), "rm"(b);
 				}
 			`);
 			return typeof(return)(quotient, remainder);
diff --git a/DustMite/splitter.d b/DustMite/splitter.d
index ab5da91cc6..be8d5bf372 100644
--- a/DustMite/splitter.d
+++ b/DustMite/splitter.d
@@ -8,14 +8,18 @@ import std.ascii;
 import std.algorithm;
 import std.array;
 import std.conv;
+import std.datetime.systime;
 import std.exception;
 import std.file;
 import std.functional;
 import std.path;
 import std.range;
+import std.stdio : File, stdin;
 import std.string;
 import std.traits;
 import std.stdio : stderr;
+import std.typecons;
+import std.utf : byChar;
 
 import polyhash;
 
@@ -65,8 +69,15 @@ final class Entity
 	Entity[] children;     /// This node's children nodes, e.g. the statements of the statement block.
 	string tail;           /// This node's "tail", e.g. "}" for a statement block.
 
-	string filename, contents;
-	@property bool isFile() { return filename != ""; }
+	string contents;
+
+	struct FileProperties
+	{
+		string name;       /// Relative to the reduction root
+		Nullable!uint mode; /// OS-specific (std.file.getAttributes)
+		Nullable!(SysTime[2]) times; /// Access and modification times
+	}
+	FileProperties* file;  /// If non-null, this node represents a file
 
 	bool isPair;           /// Internal hint for --dump output
 	bool noRemove;         /// Don't try removing this entity (children OK)
@@ -133,22 +144,18 @@ private: // Used during parsing only
 	debug string[] comments;  /// Used to debug the splitter
 }
 
-enum Mode
-{
-	source,
-	words,     /// split identifiers, for obfuscation
-}
-
 enum Splitter
 {
 	files,     /// Load entire files only
 	lines,     /// Split by line ends
+	null_,     /// Split by the \0 (NUL) character
 	words,     /// Split by whitespace
 	D,         /// Parse D source code
 	diff,      /// Unified diffs
 	indent,    /// Indentation (Python, YAML...)
+	lisp,      /// Lisp and similar languages
 }
-immutable string[] splitterNames = [EnumMembers!Splitter].map!(e => e.text().toLower()).array();
+immutable string[] splitterNames = [EnumMembers!Splitter].map!(e => e.text().toLower().chomp("_")).array();
 
 struct ParseRule
 {
@@ -158,7 +165,12 @@ struct ParseRule
 
 struct ParseOptions
 {
-	enum Mode { source, words }
+	enum Mode
+	{
+		source,
+		words,     /// split identifiers, for obfuscation
+		json,
+	}
 
 	bool stripComments;
 	ParseRule[] rules;
@@ -166,21 +178,22 @@ struct ParseOptions
 	uint tabWidth;
 }
 
+version (Posix) {} else
+{
+	// Non-POSIX symlink stubs
+	string readLink(const(char)[]) { throw new Exception("Sorry, symbolic links are only supported on POSIX systems"); }
+	void symlink(const(char)[], const(char)[]) { throw new Exception("Sorry, symbolic links are only supported on POSIX systems"); }
+}
+
 /// Parse the given file/directory.
-/// For files, modifies path to be the base name for .test / .reduced directories.
+/// For files, modifies `path` to be the base name for .test / .reduced directories.
 Entity loadFiles(ref string path, ParseOptions options)
 {
-	if (isFile(path))
-	{
-		auto filePath = path;
-		path = stripExtension(path);
-		return loadFile(filePath.baseName(), filePath, options);
-	}
-	else
+	if (path != "-" && !path.isSymlink && path.exists && path.isDir)
 	{
 		auto set = new Entity();
-		foreach (string entry; dirEntries(path, SpanMode.breadth).array.sort!((a, b) => a.name < b.name))
-			if (isFile(entry))
+		foreach (string entry; dirEntries(path, SpanMode.breadth, /*followSymlink:*/false).array.sort!((a, b) => a.name < b.name))
+			if (isSymlink(entry) || isFile(entry) || isDir(entry))
 			{
 				assert(entry.startsWith(path));
 				auto name = entry[path.length+1..$];
@@ -188,6 +201,16 @@ Entity loadFiles(ref string path, ParseOptions options)
 			}
 		return set;
 	}
+	else
+	{
+		auto realPath = path;
+		string name; // For Entity.filename
+		if (path == "-" || path == "/dev/stdin")
+			name = path = "stdin";
+		else
+			name = realPath.baseName();
+		return loadFile(name, realPath, options);
+	}
 }
 
 enum BIN_SIZE = 2;
@@ -239,61 +262,117 @@ immutable ParseRule[] defaultRules =
 [
 	{ "*.d"    , Splitter.D     },
 	{ "*.di"   , Splitter.D     },
+
 	{ "*.diff" , Splitter.diff  },
 	{ "*.patch", Splitter.diff  },
+
+	{ "*.lisp" , Splitter.lisp  },
+	{ "*.cl"   , Splitter.lisp  },
+	{ "*.lsp"  , Splitter.lisp  },
+	{ "*.el"   , Splitter.lisp  },
+
 	{ "*"      , Splitter.files },
 ];
 
+void[] readFile(File f)
+{
+	import std.range.primitives : put;
+	auto result = appender!(ubyte[]);
+	auto size = f.size;
+	if (size <= uint.max)
+		result.reserve(cast(size_t)size);
+	put(result, f.byChunk(64 * 1024));
+	return result.data;
+}
+
 Entity loadFile(string name, string path, ParseOptions options)
 {
-	stderr.writeln("Loading ", path);
+	auto base = name.baseName();
+	Splitter splitterType = chain(options.rules, defaultRules).find!(rule => base.globMatch(rule.pattern)).front.splitter;
+
+	Nullable!uint mode;
+	if (path != "-")
+	{
+		mode = getLinkAttributes(path);
+		if (attrIsSymlink(mode.get()) || attrIsDir(mode.get()))
+			splitterType = Splitter.files;
+	}
+
+	stderr.writeln("Loading ", path, " [", splitterType, "]");
+	auto contents =
+		attrIsSymlink(mode.get(0)) ? path.readLink() :
+		attrIsDir(mode.get(0)) ? null :
+		cast(string)readFile(path == "-" ? stdin : File(path, "rb"));
+
+	if (options.mode == ParseOptions.Mode.json)
+		return loadJson(contents);
+
 	auto result = new Entity();
-	result.filename = name.replace(`\`, `/`);
-	result.contents = cast(string)read(path);
+	result.file = new Entity.FileProperties;
+	result.file.name = name.replace(dirSeparator, `/`);
+	result.file.mode = mode;
+	if (!mode.isNull() && !attrIsSymlink(mode.get()) && path != "-")
+	{
+		SysTime accessTime, modificationTime;
+		getTimes(path, accessTime, modificationTime);
+		result.file.times = [accessTime, modificationTime];
+	}
+	result.contents = contents;
 
-	auto base = name.baseName();
-	foreach (rule; chain(options.rules, defaultRules))
-		if (base.globMatch(rule.pattern))
-		{
-			final switch (rule.splitter)
+	final switch (splitterType)
+	{
+		case Splitter.files:
+			result.children = [new Entity(result.contents, null, null)];
+			break;
+		case Splitter.lines:
+			result.children = parseToLines(result.contents);
+			break;
+		case Splitter.words:
+			result.children = parseToWords(result.contents);
+			break;
+		case Splitter.null_:
+			result.children = parseToNull(result.contents);
+			break;
+		case Splitter.D:
+			if (result.contents.startsWith("Ddoc"))
+				goto case Splitter.files;
+
+			DSplitter splitter;
+			if (options.stripComments)
+				result.contents = splitter.stripComments(result.contents);
+
+			final switch (options.mode)
 			{
-				case Splitter.files:
-					result.children = [new Entity(result.contents, null, null)];
-					return result;
-				case Splitter.lines:
-					result.children = parseToLines(result.contents);
-					return result;
-				case Splitter.words:
-					result.children = parseToWords(result.contents);
-					return result;
-				case Splitter.D:
-				{
-					if (result.contents.startsWith("Ddoc"))
-						goto case Splitter.files;
+				case ParseOptions.Mode.json:
+					assert(false);
+				case ParseOptions.Mode.source:
+					result.children = splitter.parse(result.contents);
+					break;
+				case ParseOptions.Mode.words:
+					result.children = splitter.parseToWords(result.contents);
+					break;
+			}
+			break;
+		case Splitter.diff:
+			result.children = parseDiff(result.contents);
+			break;
+		case Splitter.indent:
+			result.children = parseIndent(result.contents, options.tabWidth);
+			break;
+		case Splitter.lisp:
+			result.children = parseLisp(result.contents);
+			break;
+	}
 
-					DSplitter splitter;
-					if (options.stripComments)
-						result.contents = splitter.stripComments(result.contents);
+	debug
+	{
+		string resultContents;
+		void walk(Entity[] entities) { foreach (e; entities) { resultContents ~= e.head; walk(e.children); resultContents ~= e.tail; }}
+		walk(result.children);
+		assert(result.contents == resultContents, "Contents mismatch after splitting:\n" ~ resultContents);
+	}
 
-					final switch (options.mode)
-					{
-						case ParseOptions.Mode.source:
-							result.children = splitter.parse(result.contents);
-							return result;
-						case ParseOptions.Mode.words:
-							result.children = splitter.parseToWords(result.contents);
-							return result;
-					}
-				}
-				case Splitter.diff:
-					result.children = parseDiff(result.contents);
-					return result;
-				case Splitter.indent:
-					result.children = parseIndent(result.contents, options.tabWidth);
-					return result;
-			}
-		}
-	assert(false); // default * rule should match everything
+	return result;
 }
 
 // *****************************************************************************************************************************************************************************
@@ -866,6 +945,49 @@ struct DSplitter
 		}
 	}
 
+	// Join together module names. We should not attempt to reduce "import std.stdio" to "import std" (or "import stdio").
+	static void postProcessImports(ref Entity[] entities)
+	{
+		if (entities.length && entities[0].head.strip == "import" && !entities[0].children.length && !entities[0].tail.length)
+			foreach (entity; entities[1 .. $])
+			{
+				static void visit(Entity entity)
+				{
+					static bool isValidModuleName(string s) { return s.byChar.all!(c => isWordChar(c) || isWhite(c) || c == '.'); }
+					static bool canBeMerged(Entity entity)
+					{
+						return
+							isValidModuleName(entity.head) &&
+							entity.children.all!(child => canBeMerged(child)) &&
+							isValidModuleName(entity.tail);
+					}
+
+					if (canBeMerged(entity))
+					{
+						auto root = entity;
+						// Link all ancestors to the root, and in reverse, therefore making them inextricable.
+						void link(Entity entity)
+						{
+							entity.dependents ~= EntityRef(root);
+							// root.dependents ~= EntityRef(entity);
+							foreach (child; entity.children)
+								link(child);
+						}
+						foreach (child; entity.children)
+							link(child);
+					}
+					else
+					{
+						foreach (child; entity.children)
+							visit(child);
+					}
+				}
+
+				foreach (child; entity.children)
+					visit(child);
+			}
+	}
+
 	static void postProcessDependency(ref Entity[] entities)
 	{
 		if (entities.length < 2)
@@ -1014,7 +1136,7 @@ struct DSplitter
 		{
 			if (parenKeywordTokens.canFind(entities[i].token))
 			{
-				auto pparen = firstHead(entities[i+1]);
+				auto pparen = firstNonEmpty(entities[i+1]);
 				if (pparen
 				 && *pparen !is entities[i+1]
 				 && pparen.token == tokenLookup!"(")
@@ -1086,6 +1208,7 @@ struct DSplitter
 				postProcessRecursive(e.children);
 
 		postProcessSimplify(entities);
+		postProcessImports(entities);
 		postProcessTemplates(entities);
 		postProcessDependency(entities);
 		postProcessBlockKeywords(entities);
@@ -1222,16 +1345,18 @@ struct DSplitter
 		postProcessArgs(entities);
 	}
 
-	static Entity* firstHead(ref return Entity e)
+	static Entity* firstNonEmpty(ref return Entity e)
 	{
 		if (e.head.length)
 			return &e;
 		foreach (ref c; e.children)
 		{
-			auto r = firstHead(c);
+			auto r = firstNonEmpty(c);
 			if (r)
 				return r;
 		}
+		if (e.tail.length)
+			return &e;
 		return null;
 	}
 
@@ -1265,6 +1390,7 @@ Entity[] parseSplit(alias fun)(string text)
 
 alias parseToWords = parseSplit!isNotAlphaNum;
 alias parseToLines = parseSplit!isNewline;
+alias parseToNull  = parseSplit!(c => c == '\0');
 
 /// Split s on end~start, preserving end and start on each chunk
 private string[] split2(string end, string start)(string s)
@@ -1295,9 +1421,45 @@ unittest
 	assert(split2!("]", "[")("[foo] [bar]") == ["[foo] [bar]"]);
 }
 
+// From ae.utils.array
+template skipWhile(alias pred)
+{
+	T[] skipWhile(T)(ref T[] source, bool orUntilEnd = false)
+	{
+		enum bool isSlice = is(typeof(pred(source[0..1])));
+		enum bool isElem  = is(typeof(pred(source[0]   )));
+		static assert(isSlice || isElem, "Can't skip " ~ T.stringof ~ " until " ~ pred.stringof);
+		static assert(isSlice != isElem, "Ambiguous types for skipWhile: " ~ T.stringof ~ " and " ~ pred.stringof);
+
+		foreach (i; 0 .. source.length)
+		{
+			bool match;
+			static if (isSlice)
+				match = pred(source[i .. $]);
+			else
+				match = pred(source[i]);
+			if (!match)
+			{
+				auto result = source[0..i];
+				source = source[i .. $];
+				return result;
+			}
+		}
+
+		if (orUntilEnd)
+		{
+			auto result = source;
+			source = null;
+			return result;
+		}
+		else
+			return null;
+	}
+}
+
 Entity[] parseDiff(string s)
 {
-	return s
+	auto entities = s
 		.split2!("\n", "diff ")
 		.map!(
 			(string file)
@@ -1308,54 +1470,387 @@ Entity[] parseDiff(string s)
 		)
 		.array
 	;
+
+	// If a word occurs only in two or more (but not all) hunks,
+	// create dependency nodes which make Dustmite try reducing these
+	// hunks simultaneously.
+	{
+		auto allHunks = entities.map!(entity => entity.children).join;
+		auto hunkWords = allHunks
+			.map!(hunk => hunk.head)
+			.map!((text) {
+				bool[string] words;
+				while (text.length)
+				{
+					alias isWordChar = c => isAlphaNum(c) || c == '_';
+					text.skipWhile!(not!isWordChar)(true);
+					auto word = text.skipWhile!isWordChar(true);
+					if (word.length)
+						words[word] = true;
+				}
+				return words;
+			})
+			.array;
+
+		auto allWords = hunkWords
+			.map!(words => words.byPair)
+			.joiner
+			.assocArray;
+		string[bool[]] sets; // Deduplicated sets of hunks to try to remove at once
+		foreach (word; allWords.byKey)
+		{
+			immutable bool[] hunkHasWord = hunkWords.map!(c => !!(word in c)).array.assumeUnique;
+			auto numHunksWithWord = hunkHasWord.count!(b => b);
+			if (numHunksWithWord > 1 && numHunksWithWord < allHunks.length)
+				sets[hunkHasWord] = word;
+		}
+
+		foreach (set, word; sets)
+		{
+			auto e = new Entity();
+			debug e.comments ~= word;
+			e.dependents ~= allHunks.length.iota
+				.filter!(i => set[i])
+				.map!(i => EntityRef(allHunks[i]))
+				.array;
+			entities ~= e;
+		}
+	}
+
+	return entities;
+}
+
+size_t getIndent(string line, uint tabWidth, size_t lastIndent)
+{
+	size_t indent = 0;
+charLoop:
+	foreach (c; line)
+		switch (c)
+		{
+			case ' ':
+				indent++;
+				break;
+			case '\t':
+				indent += tabWidth;
+				break;
+			case '\r':
+			case '\n':
+				// Treat empty (whitespace-only) lines as belonging to the
+				// immediately higher (most-nested) block.
+				indent = lastIndent;
+				break charLoop;
+			default:
+				break charLoop;
+		}
+	return indent;
 }
 
 Entity[] parseIndent(string s, uint tabWidth)
 {
 	Entity[] root;
-	Entity[]*[] stack;
+	Entity[] stack;
 
 	foreach (line; s.split2!("\n", ""))
 	{
-		size_t indent = 0;
-	charLoop:
-		foreach (c; line)
-			switch (c)
-			{
-				case ' ':
-					indent++;
-					break;
-				case '\t':
-					indent += tabWidth;
-					break;
-				case '\r':
-				case '\n':
-					// Treat empty (whitespace-only) lines as belonging to the
-					// immediately higher (most-nested) block.
-					indent = stack.length;
-					break charLoop;
-				default:
-					break charLoop;
-			}
-
+		auto indent = getIndent(line, tabWidth, stack.length);
 		auto e = new Entity(line);
 		foreach_reverse (i; 0 .. min(indent, stack.length)) // non-inclusively up to indent
 			if (stack[i])
 			{
-				*stack[i] ~= e;
+				stack[i].children ~= e;
 				goto parentFound;
 			}
 		root ~= e;
 	parentFound:
 		stack.length = indent + 1;
-		stack[indent] = &e.children;
+		stack[indent] = new Entity;
+		e.children ~= stack[indent];
 	}
 
 	return root;
 }
 
+Entity[] parseLisp(string s)
+{
+	// leaf head: token (non-whitespace)
+	// leaf tail: whitespace
+	// non-leaf head: "(" and any whitespace
+	// non-leaf tail: ")" and any whitespace
+
+	size_t i;
+
+	size_t last;
+	scope(success) assert(last == s.length, "Incomplete slice");
+	string slice(void delegate() advance)
+	{
+		assert(last == i, "Non-contiguous slices");
+		auto start = i;
+		advance();
+		last = i;
+		return s[start .. i];
+	}
+
+	/// How many characters did `advance` move forward by?
+	size_t countAdvance(void delegate() advance)
+	{
+		auto start = i;
+		advance();
+		return i - start;
+	}
+
+	void advanceWhitespace()
+	{
+		while (i < s.length)
+		{
+			switch (s[i])
+			{
+				case ' ':
+				case '\t':
+				case '\r':
+				case '\n':
+				case '\f':
+				case '\v':
+					i++;
+					continue;
+
+				case ';':
+					i++;
+					while (i < s.length && s[i] != '\n')
+						i++;
+					continue;
+
+				default:
+					return; // stop
+			}
+			assert(false); // unreachable
+		}
+	}
+
+	void advanceToken()
+	{
+		assert(countAdvance(&advanceWhitespace) == 0);
+		assert(i < s.length);
+
+		switch (s[i])
+		{
+			case '(':
+			case ')':
+			case '[':
+			case ']':
+				assert(false);
+			case '"':
+				i++;
+				while (i < s.length)
+				{
+					switch (s[i])
+					{
+						case '"':
+							i++;
+							return; // stop
+
+						case '\\':
+							i++;
+							if (i < s.length)
+								i++;
+							continue;
+
+						default:
+							i++;
+							continue;
+					}
+					assert(false); // unreachable
+				}
+				break;
+			default:
+				while (i < s.length)
+				{
+					switch (s[i])
+					{
+						case ' ':
+						case '\t':
+						case '\r':
+						case '\n':
+						case '\f':
+						case '\v':
+						case ';':
+
+						case '"':
+						case '(':
+						case ')':
+						case '[':
+						case ']':
+							return; // stop
+
+						case '\\':
+							i++;
+							if (i < s.length)
+								i++;
+							continue;
+
+						default:
+							i++;
+							continue;
+					}
+					assert(false); // unreachable
+				}
+				break;
+		}
+	}
+
+	void advanceParen(char paren)
+	{
+		assert(i < s.length && s[i] == paren);
+		i++;
+		advanceWhitespace();
+	}
+
+	Entity[] parse(bool topLevel)
+	{
+		Entity[] result;
+		if (topLevel) // Handle reading whitespace at top-level
+		{
+			auto ws = slice(&advanceWhitespace);
+			if (ws.length)
+				result ~= new Entity(ws);
+		}
+
+		Entity parseParen(char open, char close)
+		{
+			auto entity = new Entity(slice({ advanceParen(open); }));
+			entity.children = parse(false);
+			if (i < s.length)
+				entity.tail = slice({ advanceParen(close); });
+			return entity;
+		}
+
+		while (i < s.length)
+		{
+			switch (s[i])
+			{
+				case '(':
+					result ~= parseParen('(', ')');
+					continue;
+				case '[':
+					result ~= parseParen('[', ']');
+					continue;
+
+				case ')':
+				case ']':
+					if (!topLevel)
+						break;
+					result ~= new Entity(slice({ advanceParen(s[i]); }));
+					continue;
+
+				default:
+					result ~= new Entity(
+						slice(&advanceToken),
+						null,
+						slice(&advanceWhitespace),
+					);
+					continue;
+			}
+			break;
+		}
+		return result;
+	}
+
+	return parse(true);
+}
+
 private:
 
+Entity loadJson(string contents)
+{
+	import std.json : JSONValue, parseJSON;
+
+	auto jsonDoc = parseJSON(contents);
+	enforce(jsonDoc["version"].integer == 1, "Unknown JSON version");
+
+	// Pass 1: calculate the total size of all data.
+	// --no-remove and some optimizations require that entity strings
+	// are arranged in contiguous memory.
+	size_t totalSize;
+	void scanSize(ref JSONValue v)
+	{
+		if (auto p = "head" in v.object)
+			totalSize += p.str.length;
+		if (auto p = "children" in v.object)
+			p.array.each!scanSize();
+		if (auto p = "tail" in v.object)
+			totalSize += p.str.length;
+	}
+	scanSize(jsonDoc["root"]);
+
+	auto buf = new char[totalSize];
+	size_t pos = 0;
+
+	Entity[string] labeledEntities;
+	JSONValue[][Entity] entityDependents;
+
+	// Pass 2: Create the entity tree
+	Entity parse(ref JSONValue v)
+	{
+		auto e = new Entity;
+
+		if (auto p = "filename" in v.object)
+		{
+			e.file = new Entity.FileProperties;
+			e.file.name = p.str.buildNormalizedPath;
+			enforce(e.file.name.length &&
+				!e.file.name.isAbsolute &&
+				!e.file.name.pathSplitter.canFind(`..`),
+				"Invalid filename in JSON file: " ~ p.str);
+		}
+
+		if (auto p = "head" in v.object)
+		{
+			auto end = pos + p.str.length;
+			buf[pos .. end] = p.str;
+			e.head = buf[pos .. end].assumeUnique;
+			pos = end;
+		}
+		if (auto p = "children" in v.object)
+			e.children = p.array.map!parse.array;
+		if (auto p = "tail" in v.object)
+		{
+			auto end = pos + p.str.length;
+			buf[pos .. end] = p.str;
+			e.tail = buf[pos .. end].assumeUnique;
+			pos = end;
+		}
+
+		if (auto p = "noRemove" in v.object)
+			e.noRemove = (){
+				if (*p == JSONValue(true)) return true;
+				if (*p == JSONValue(false)) return false;
+				throw new Exception("noRemove is not a boolean");
+			}();
+
+		if (auto p = "label" in v.object)
+		{
+			enforce(p.str !in labeledEntities, "Duplicate label in JSON file: " ~ p.str);
+			labeledEntities[p.str] = e;
+		}
+		if (auto p = "dependents" in v.object)
+			entityDependents[e] = p.array;
+
+		return e;
+	}
+	auto root = parse(jsonDoc["root"]);
+
+	// Pass 3: Resolve dependents
+	foreach (e, dependents; entityDependents)
+		e.dependents = dependents
+			.map!((ref d) => labeledEntities
+				.get(d.str, null)
+				.enforce("Unknown label in dependents: " ~ d.str)
+				.EntityRef
+			)
+			.array;
+
+	return root;
+}
+
 bool isNewline(char c) { return c == '\r' || c == '\n'; }
 alias isNotAlphaNum = not!isAlphaNum;