Skip to content

Commit

Permalink
metadce fixes (#1329)
Browse files Browse the repository at this point in the history
* ignore missing imports (the wasm may have already had them optimized out)

 * handle segments that hold on to globals (root them, for now, as we can't remove segments)

 * run reorder-functions, as the optimal order may have changed after we dce

* fix global, global init, and segment offset reachability

* fix import rooting and processing - imports may be imported more than once
  • Loading branch information
kripken committed Dec 8, 2017
1 parent 22f1ce8 commit 9c51f2b
Show file tree
Hide file tree
Showing 10 changed files with 193 additions and 65 deletions.
151 changes: 114 additions & 37 deletions src/tools/wasm-metadce.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -51,16 +51,31 @@ struct MetaDCEGraph {
std::unordered_map<Name, DCENode> nodes;
std::unordered_set<Name> roots;

std::unordered_map<Name, Name> importToDCENode; // import internal name => DCE name
std::unordered_map<Name, Name> exportToDCENode; // export exported name => DCE name
std::unordered_map<Name, Name> functionToDCENode; // function name => DCE name
std::unordered_map<Name, Name> globalToDCENode; // global name => DCE name

std::unordered_map<Name, Name> DCENodeToImport; // reverse maps
std::unordered_map<Name, Name> DCENodeToExport;
std::unordered_map<Name, Name> DCENodeToExport; // reverse maps
std::unordered_map<Name, Name> DCENodeToFunction;
std::unordered_map<Name, Name> DCENodeToGlobal;

// imports are not mapped 1:1 to DCE nodes in the wasm, since env.X might
// be imported twice, for example. So we don't map a DCE node to an Import,
// but rather the module.base pair ("id") for the import.
// TODO: implement this in a safer way, not a string with a magic separator
typedef Name ImportId;

ImportId getImportId(Name module, Name base) {
return std::string(module.str) + " (*) " + std::string(base.str);
}

ImportId getImportId(Name name) {
auto* imp = wasm.getImport(name);
return getImportId(imp->module, imp->base);
}

std::unordered_map<Name, Name> importIdToDCENode; // import module.base => DCE name

Module& wasm;

MetaDCEGraph(Module& wasm) : wasm(wasm) {}
Expand All @@ -84,11 +99,13 @@ struct MetaDCEGraph {
nodes[dceName] = DCENode(dceName);
}
for (auto& imp : wasm.imports) {
if (importToDCENode.find(imp->name) == importToDCENode.end()) {
auto dceName = getName("import", imp->name.str);
DCENodeToImport[dceName] = imp->name;
importToDCENode[imp->name] = dceName;
nodes[dceName] = DCENode(dceName);
// only process function and global imports - the table and memory are always there
if (imp->kind == ExternalKind::Function || imp->kind == ExternalKind::Global) {
auto id = getImportId(imp->module, imp->base);
if (importIdToDCENode.find(id) == importIdToDCENode.end()) {
auto dceName = getName("importId", imp->name.str);
importIdToDCENode[id] = dceName;
}
}
}
for (auto& exp : wasm.exports) {
Expand All @@ -104,16 +121,72 @@ struct MetaDCEGraph {
if (wasm.getFunctionOrNull(exp->value)) {
node.reaches.push_back(functionToDCENode[exp->value]);
} else {
node.reaches.push_back(importToDCENode[exp->value]);
node.reaches.push_back(importIdToDCENode[getImportId(exp->value)]);
}
} else if (exp->kind == ExternalKind::Global) {
if (wasm.getGlobalOrNull(exp->value)) {
node.reaches.push_back(globalToDCENode[exp->value]);
} else {
node.reaches.push_back(importToDCENode[exp->value]);
node.reaches.push_back(importIdToDCENode[getImportId(exp->value)]);
}
}
}
// Add initializer dependencies
// if we provide a parent DCE name, that is who can reach what we see
// if none is provided, then it is something we must root
struct InitScanner : public PostWalker<InitScanner> {
InitScanner(MetaDCEGraph* parent, Name parentDceName) : parent(parent), parentDceName(parentDceName) {}

void visitGetGlobal(GetGlobal* curr) {
handleGlobal(curr->name);
}
void visitSetGlobal(SetGlobal* curr) {
handleGlobal(curr->name);
}

private:
MetaDCEGraph* parent;
Name parentDceName;

void handleGlobal(Name name) {
Name dceName;
if (getModule()->getGlobalOrNull(name)) {
// its a global
dceName = parent->globalToDCENode[name];
} else {
// it's an import.
dceName = parent->importIdToDCENode[parent->getImportId(name)];
}
if (parentDceName.isNull()) {
parent->roots.insert(parentDceName);
} else {
parent->nodes[parentDceName].reaches.push_back(dceName);
}
}
};
for (auto& global : wasm.globals) {
InitScanner scanner(this, globalToDCENode[global->name]);
scanner.setModule(&wasm);
scanner.walk(global->init);
}
// we can't remove segments, so root what they need
InitScanner rooter(this, Name());
rooter.setModule(&wasm);
for (auto& segment : wasm.table.segments) {
// TODO: currently, all functions in the table are roots, but we
// should add an option to refine that
for (auto& name : segment.data) {
if (wasm.getFunctionOrNull(name)) {
roots.insert(functionToDCENode[name]);
} else {
roots.insert(importIdToDCENode[getImportId(name)]);
}
}
rooter.walk(segment.offset);
}
for (auto& segment : wasm.memory.segments) {
rooter.walk(segment.offset);
}

// A parallel scanner for function bodies
struct Scanner : public WalkerPass<PostWalker<Scanner>> {
Expand All @@ -133,7 +206,7 @@ struct MetaDCEGraph {
void visitCallImport(CallImport* curr) {
assert(parent->functionToDCENode.count(getFunction()->name) > 0);
parent->nodes[parent->functionToDCENode[getFunction()->name]].reaches.push_back(
parent->importToDCENode[curr->target]
parent->importIdToDCENode[parent->getImportId(curr->target)]
);
}
void visitGetGlobal(GetGlobal* curr) {
Expand All @@ -147,33 +220,23 @@ struct MetaDCEGraph {
MetaDCEGraph* parent;

void handleGlobal(Name name) {
if (getModule()->getGlobalOrNull(name)) return;
// it's an import
parent->nodes[parent->functionToDCENode[getFunction()->name]].reaches.push_back(
parent->importToDCENode[name]
);
if (!getFunction()) return; // non-function stuff (initializers) are handled separately
Name dceName;
if (getModule()->getGlobalOrNull(name)) {
// its a global
dceName = parent->globalToDCENode[name];
} else {
// it's an import.
dceName = parent->importIdToDCENode[parent->getImportId(name)];
}
parent->nodes[parent->functionToDCENode[getFunction()->name]].reaches.push_back(dceName);
}
};

PassRunner runner(&wasm);
runner.setIsNested(true);
runner.add<Scanner>(this);
runner.run();

// also scan segment offsets
Scanner scanner(this);
scanner.setModule(&wasm);
for (auto& segment : wasm.table.segments) {
scanner.walk(segment.offset);
// TODO: currently, all functions in the table are roots, but we
// should add an option to refine that
for (auto& name : segment.data) {
roots.insert(functionToDCENode[name]);
}
}
for (auto& segment : wasm.memory.segments) {
scanner.walk(segment.offset);
}
}

private:
Expand Down Expand Up @@ -229,6 +292,7 @@ struct MetaDCEGraph {
// Now they are gone, standard optimization passes can do the rest!
PassRunner passRunner(&wasm);
passRunner.add("remove-unused-module-elements");
passRunner.add("reorder-functions"); // removing functions may alter the optimum order, as # of calls can change
passRunner.run();
}

Expand All @@ -253,13 +317,18 @@ struct MetaDCEGraph {
for (auto root : roots) {
std::cout << "root: " << root.str << '\n';
}
std::map<Name, ImportId> importMap;
for (auto& pair : importIdToDCENode) {
auto& id = pair.first;
auto dceName = pair.second;
importMap[dceName] = id;
}
for (auto& pair : nodes) {
auto name = pair.first;
auto& node = pair.second;
std::cout << "node: " << name.str << '\n';
if (DCENodeToImport.find(name) != DCENodeToImport.end()) {
auto* imp = wasm.getImport(DCENodeToImport[name]);
std::cout << " is import " << DCENodeToImport[name] << ", " << imp->module.str << '.' << imp->base.str << '\n';
if (importMap.find(name) != importMap.end()) {
std::cout << " is import " << importMap[name] << '\n';
}
if (DCENodeToExport.find(name) != DCENodeToExport.end()) {
std::cout << " is export " << DCENodeToExport[name].str << ", " << wasm.getExport(DCENodeToExport[name])->value << '\n';
Expand Down Expand Up @@ -288,6 +357,7 @@ int main(int argc, const char* argv[]) {
bool emitBinary = true;
bool debugInfo = false;
std::string graphFile;
bool dump = false;

Options options("wasm-metadce", "This tool performs dead code elimination (DCE) on a larger space "
"that the wasm module is just a part of. For example, if you have "
Expand Down Expand Up @@ -350,6 +420,9 @@ int main(int argc, const char* argv[]) {
[&](Options* o, const std::string& argument) {
graphFile = argument;
})
.add("--dump", "-d", "Dump the combined graph file (useful for debugging)",
Options::Arguments::Zero,
[&](Options *o, const std::string &arguments) { dump = true; })
.add_positional("INFILE", Options::Arguments::One,
[](Options* o, const std::string& argument) {
o->extra["infile"] = argument;
Expand Down Expand Up @@ -439,9 +512,8 @@ int main(int argc, const char* argv[]) {
if (!imp->isArray() || imp->size() != 2 || !imp[0]->isString() || !imp[1]->isString()) {
Fatal() << "node.import, if it exists, must be an array of two strings. see --help for the form";
}
auto importName = ImportUtils::getImport(wasm, imp[0]->getIString(), imp[1]->getIString())->name;
graph.importToDCENode[importName] = node.name;
graph.DCENodeToImport[node.name] = importName;
auto id = graph.getImportId(imp[0]->getIString(), imp[1]->getIString());
graph.importIdToDCENode[id] = node.name;
}
// TODO: optimize this copy with a clever move
graph.nodes[node.name] = node;
Expand All @@ -450,6 +522,11 @@ int main(int argc, const char* argv[]) {
// The external graph is now populated. Scan the module
graph.scanWebAssembly();

// Debug dump the graph, if requested
if (dump) {
graph.dump();
}

// Perform the DCE
graph.deadCodeElimination();

Expand Down
23 changes: 23 additions & 0 deletions test/metadce/corners.wast
Original file line number Diff line number Diff line change
@@ -0,0 +1,23 @@
(module
(import "env" "STACKTOP" (global $STACKTOP$asm2wasm$import i32))
(global $STACKTOP (mut i32) (get_global $STACKTOP$asm2wasm$import))

(import "env" "UNUSEDTOP" (global $UNUSEDTOP$asm2wasm$import i32))
(global $UNUSEDTOP (mut i32) (get_global $UNUSEDTOP$asm2wasm$import))

(import "env" "imported_twice" (func $imported_twice_a)) ;; and used just once,
(import "env" "imported_twice" (func $imported_twice_b)) ;; but the other should not kill the import for both!

(import "env" "an-imported-table-func" (func $imported_table_func))

(import "env" "table" (table 10 10 anyfunc))
(elem (i32.const 0) $imported_table_func)

(export "stackAlloc" (func $stackAlloc))

(func $stackAlloc
(drop (get_global $STACKTOP))
(call $imported_twice_a)
)
)

17 changes: 17 additions & 0 deletions test/metadce/corners.wast.dced
Original file line number Diff line number Diff line change
@@ -0,0 +1,17 @@
(module
(type $FUNCSIG$v (func))
(import "env" "STACKTOP" (global $STACKTOP$asm2wasm$import i32))
(import "env" "imported_twice" (func $imported_twice_a))
(import "env" "an-imported-table-func" (func $imported_table_func))
(import "env" "table" (table 10 10 anyfunc))
(global $STACKTOP (mut i32) (get_global $STACKTOP$asm2wasm$import))
(elem (i32.const 0) $imported_table_func)
(memory $0 0)
(export "stackAlloc" (func $stackAlloc))
(func $stackAlloc (; 2 ;) (type $FUNCSIG$v)
(drop
(get_global $STACKTOP)
)
(call $imported_twice_a)
)
)
2 changes: 2 additions & 0 deletions test/metadce/corners.wast.dced.stdout
Original file line number Diff line number Diff line change
@@ -0,0 +1,2 @@
unused: global$UNUSEDTOP$2
unused: ignorable import
16 changes: 16 additions & 0 deletions test/metadce/corners.wast.graph.txt
Original file line number Diff line number Diff line change
@@ -0,0 +1,16 @@
[
{
"name": "ignorable import",
"import": ["non", "existent"]
},
{
"name": "imported_twice",
"import": ["env", "imported_twice"]
},
{
"name": "stackAllocGood",
"export": "stackAlloc",
"root": true
}
]

2 changes: 0 additions & 2 deletions test/metadce/no-outside.wast.dced.stdout
Original file line number Diff line number Diff line change
Expand Up @@ -2,5 +2,3 @@ unused: export$wasm_func$4
unused: export$wasm_func_unused$5
unused: func$a_wasm_func$0
unused: func$an_unused_wasm_func$1
unused: import$a_js_func$2
unused: import$an_unused_js_func$3
8 changes: 4 additions & 4 deletions test/metadce/outside.wast.dced
Original file line number Diff line number Diff line change
Expand Up @@ -11,7 +11,10 @@
(data (i32.const 1024) "abcd")
(data (get_global $from_segment) "abcd")
(export "wasm_func" (func $a_wasm_func))
(func $a_wasm_func (; 1 ;) (type $FUNCSIG$v)
(func $table_func (; 1 ;) (type $FUNCSIG$v)
(nop)
)
(func $a_wasm_func (; 2 ;) (type $FUNCSIG$v)
(call $a_js_func)
(drop
(get_global $DYNAMICTOP_PTR$asm2wasm$import)
Expand All @@ -20,7 +23,4 @@
(get_global $__THREW__)
)
)
(func $table_func (; 2 ;) (type $FUNCSIG$v)
(nop)
)
)
7 changes: 1 addition & 6 deletions test/metadce/outside.wast.dced.stdout
Original file line number Diff line number Diff line change
@@ -1,11 +1,6 @@
unused: export$wasm_func_unused$14
unused: export$wasm_func_unused$12
unused: func$an_unused_wasm_func$1
unused: global$__THREW__$3
unused: global$__THREW__unused$4
unused: global$from_segment$5
unused: global$from_segment_2$6
unused: global$from_segment_never_used$7
unused: import$0$12
unused: import$DYNAMICTOP_PTR$asm2wasm$import_unused$11
unused: import$an_unused_js_func$9
unused: import$import$table$0$13
16 changes: 8 additions & 8 deletions test/metadce/threaded.wast.dced
Original file line number Diff line number Diff line change
Expand Up @@ -8,16 +8,16 @@
(export "wasm_func2" (func $wasm_func_2))
(export "wasm_func3" (func $wasm_func_3))
(export "wasm_func4" (func $wasm_func_4))
(func $wasm_func_1 (; 3 ;) (type $FUNCSIG$v)
(call $js_func_2)
)
(func $wasm_func_2 (; 4 ;) (type $FUNCSIG$v)
(call $js_func_3)
(func $wasm_func_4 (; 3 ;) (type $FUNCSIG$v)
(nop)
)
(func $wasm_func_3 (; 5 ;) (type $FUNCSIG$v)
(func $wasm_func_3 (; 4 ;) (type $FUNCSIG$v)
(call $js_func_4)
)
(func $wasm_func_4 (; 6 ;) (type $FUNCSIG$v)
(nop)
(func $wasm_func_2 (; 5 ;) (type $FUNCSIG$v)
(call $js_func_3)
)
(func $wasm_func_1 (; 6 ;) (type $FUNCSIG$v)
(call $js_func_2)
)
)
Loading

0 comments on commit 9c51f2b

Please sign in to comment.