Skip to content

Commit

Permalink
Use IRBuilder in the binary parser (#6963)
Browse files Browse the repository at this point in the history
IRBuilder is a utility for turning arbitrary valid streams of Wasm
instructions into valid Binaryen IR. It is already used in the text
parser, so now use it in the binary parser as well. Since the IRBuilder
API for building each intruction requires only the information that the
binary and text formats include as immediates to that instruction, the
parser is now much simpler than before. In particular, it does not need
to manage a stack of instructions to figure out what the children of
each expression should be; IRBuilder handles this instead.

There are some differences between the IR constructed by IRBuilder and
the IR the binary parser constructed before this change. Most
importantly, IRBuilder generates better multivalue code because it
avoids eagerly breaking up multivalue results into individual components
that might need to be immediately reassembled into a tuple. It also
parses try-delegate more correctly, allowing the delegate to target
arbitrary labels, not just other `try`s. There are also a couple
superficial differences in the generated label and scratch local names.

As part of this change, add support for recording binary source
locations in IRBuilder.
  • Loading branch information
tlively authored Nov 27, 2024
1 parent 6f0f2e0 commit f8e1622
Show file tree
Hide file tree
Showing 70 changed files with 5,003 additions and 7,938 deletions.
182 changes: 9 additions & 173 deletions src/wasm-binary.h
Original file line number Diff line number Diff line change
Expand Up @@ -29,6 +29,7 @@
#include "ir/module-utils.h"
#include "parsing.h"
#include "wasm-builder.h"
#include "wasm-ir-builder.h"
#include "wasm-traversal.h"
#include "wasm-validator.h"
#include "wasm.h"
Expand Down Expand Up @@ -1543,8 +1544,6 @@ class WasmBinaryReader {
Signature getSignatureByTypeIndex(Index index);
Signature getSignatureByFunctionIndex(Index index);

size_t nextLabel;

Name getNextLabel();

// We read the names section first so we know in advance what names various
Expand Down Expand Up @@ -1573,67 +1572,19 @@ class WasmBinaryReader {
void readVars();
void setLocalNames(Function& func, Index i);

Result<> readInst();

void readExports();

// The strings in the strings section (which are referred to by StringConst).
std::vector<Name> strings;
void readStrings();
Name getIndexedString();

Expression* readExpression();
void readGlobals();

struct BreakTarget {
Name name;
Type type;
BreakTarget(Name name, Type type) : name(name), type(type) {}
};
std::vector<BreakTarget> breakStack;
// the names that breaks target. this lets us know if a block has breaks to it
// or not.
std::unordered_set<Name> breakTargetNames;
// the names that delegates target.
std::unordered_set<Name> exceptionTargetNames;

std::vector<Expression*> expressionStack;

// Control flow structure parsing: these have not just the normal binary
// data for an instruction, but also some bytes later on like "end" or "else".
// We must be aware of the connection between those things, for debug info.
std::vector<Expression*> controlFlowStack;

// Called when we parse the beginning of a control flow structure.
void startControlFlow(Expression* curr);

// set when we know code is unreachable in the sense of the wasm spec: we are
// in a block and after an unreachable element. this helps parse stacky wasm
// code, which can be unsuitable for our IR when unreachable.
bool unreachableInTheWasmSense;

// set when the current code being processed will not be emitted in the
// output, which is the case when it is literally unreachable, for example,
// (block $a
// (unreachable)
// (block $b
// ;; code here is reachable in the wasm sense, even though $b as a whole
// ;; is not
// (unreachable)
// ;; code here is unreachable in the wasm sense
// )
// )
bool willBeIgnored;

BinaryConsts::ASTNodes lastSeparator = BinaryConsts::End;

// process a block-type scope, until an end or else marker, or the end of the
// function
void processExpressions();
void skipUnreachableCode();

void pushExpression(Expression* curr);
Expression* popExpression();
Expression* popNonVoidExpression();
Expression* popTuple(size_t numElems);
Expression* popTypedExpression(Type type);
IRBuilder builder;

// validations that cannot be performed on the Module
void validateBinary();
Expand Down Expand Up @@ -1663,127 +1614,12 @@ class WasmBinaryReader {
void readNextDebugLocation();
void readSourceMapHeader();

// AST reading
int depth = 0; // only for debugging

BinaryConsts::ASTNodes readExpression(Expression*& curr);
void pushBlockElements(Block* curr, Type type, size_t start);
void visitBlock(Block* curr);

// Gets a block of expressions. If it's just one, return that singleton.
Expression* getBlockOrSingleton(Type type);

BreakTarget getBreakTarget(int32_t offset);
Name getExceptionTargetName(int32_t offset);

Index readMemoryAccess(Address& alignment, Address& offset);
std::tuple<Name, Address, Address> getMemarg();

void visitIf(If* curr);
void visitLoop(Loop* curr);
void visitBreak(Break* curr, uint8_t code);
void visitSwitch(Switch* curr);
void visitCall(Call* curr);
void visitCallIndirect(CallIndirect* curr);
void visitLocalGet(LocalGet* curr);
void visitLocalSet(LocalSet* curr, uint8_t code);
void visitGlobalGet(GlobalGet* curr);
void visitGlobalSet(GlobalSet* curr);
bool maybeVisitLoad(Expression*& out,
uint8_t code,
std::optional<BinaryConsts::ASTNodes> prefix);
bool maybeVisitStore(Expression*& out,
uint8_t code,
std::optional<BinaryConsts::ASTNodes> prefix);
bool maybeVisitNontrappingTrunc(Expression*& out, uint32_t code);
bool maybeVisitAtomicRMW(Expression*& out, uint8_t code);
bool maybeVisitAtomicCmpxchg(Expression*& out, uint8_t code);
bool maybeVisitAtomicWait(Expression*& out, uint8_t code);
bool maybeVisitAtomicNotify(Expression*& out, uint8_t code);
bool maybeVisitAtomicFence(Expression*& out, uint8_t code);
bool maybeVisitConst(Expression*& out, uint8_t code);
bool maybeVisitUnary(Expression*& out, uint8_t code);
bool maybeVisitBinary(Expression*& out, uint8_t code);
bool maybeVisitTruncSat(Expression*& out, uint32_t code);
bool maybeVisitSIMDBinary(Expression*& out, uint32_t code);
bool maybeVisitSIMDUnary(Expression*& out, uint32_t code);
bool maybeVisitSIMDConst(Expression*& out, uint32_t code);
bool maybeVisitSIMDStore(Expression*& out, uint32_t code);
bool maybeVisitSIMDExtract(Expression*& out, uint32_t code);
bool maybeVisitSIMDReplace(Expression*& out, uint32_t code);
bool maybeVisitSIMDShuffle(Expression*& out, uint32_t code);
bool maybeVisitSIMDTernary(Expression*& out, uint32_t code);
bool maybeVisitSIMDShift(Expression*& out, uint32_t code);
bool maybeVisitSIMDLoad(Expression*& out, uint32_t code);
bool maybeVisitSIMDLoadStoreLane(Expression*& out, uint32_t code);
bool maybeVisitMemoryInit(Expression*& out, uint32_t code);
bool maybeVisitDataDrop(Expression*& out, uint32_t code);
bool maybeVisitMemoryCopy(Expression*& out, uint32_t code);
bool maybeVisitMemoryFill(Expression*& out, uint32_t code);
bool maybeVisitTableSize(Expression*& out, uint32_t code);
bool maybeVisitTableGrow(Expression*& out, uint32_t code);
bool maybeVisitTableFill(Expression*& out, uint32_t code);
bool maybeVisitTableCopy(Expression*& out, uint32_t code);
bool maybeVisitTableInit(Expression*& out, uint32_t code);
bool maybeVisitRefI31(Expression*& out, uint32_t code);
bool maybeVisitI31Get(Expression*& out, uint32_t code);
bool maybeVisitRefTest(Expression*& out, uint32_t code);
bool maybeVisitRefCast(Expression*& out, uint32_t code);
bool maybeVisitBrOn(Expression*& out, uint32_t code);
bool maybeVisitStructNew(Expression*& out, uint32_t code);
bool maybeVisitStructGet(Expression*& out, uint32_t code);
bool maybeVisitStructSet(Expression*& out, uint32_t code);
bool maybeVisitArrayNewData(Expression*& out, uint32_t code);
bool maybeVisitArrayNewElem(Expression*& out, uint32_t code);
bool maybeVisitArrayNewFixed(Expression*& out, uint32_t code);
bool maybeVisitArrayGet(Expression*& out, uint32_t code);
bool maybeVisitArraySet(Expression*& out, uint32_t code);
bool maybeVisitArrayLen(Expression*& out, uint32_t code);
bool maybeVisitArrayCopy(Expression*& out, uint32_t code);
bool maybeVisitArrayFill(Expression*& out, uint32_t code);
bool maybeVisitArrayInit(Expression*& out, uint32_t code);
bool maybeVisitStringNew(Expression*& out, uint32_t code);
bool maybeVisitStringAsWTF16(Expression*& out, uint32_t code);
bool maybeVisitStringConst(Expression*& out, uint32_t code);
bool maybeVisitStringMeasure(Expression*& out, uint32_t code);
bool maybeVisitStringEncode(Expression*& out, uint32_t code);
bool maybeVisitStringConcat(Expression*& out, uint32_t code);
bool maybeVisitStringEq(Expression*& out, uint32_t code);
bool maybeVisitStringWTF16Get(Expression*& out, uint32_t code);
bool maybeVisitStringSliceWTF(Expression*& out, uint32_t code);
void visitSelect(Select* curr, uint8_t code);
void visitReturn(Return* curr);
void visitMemorySize(MemorySize* curr);
void visitMemoryGrow(MemoryGrow* curr);
void visitNop(Nop* curr);
void visitUnreachable(Unreachable* curr);
void visitDrop(Drop* curr);
void visitRefNull(RefNull* curr);
void visitRefIsNull(RefIsNull* curr);
void visitRefFunc(RefFunc* curr);
void visitRefEq(RefEq* curr);
void visitTableGet(TableGet* curr);
void visitTableSet(TableSet* curr);
void visitTryOrTryInBlock(Expression*& out);
void visitTryTable(TryTable* curr);
void visitThrow(Throw* curr);
void visitRethrow(Rethrow* curr);
void visitThrowRef(ThrowRef* curr);
void visitCallRef(CallRef* curr);
void visitRefAsCast(RefCast* curr, uint32_t code);
void visitRefAs(RefAs* curr, uint8_t code);
void visitContNew(ContNew* curr);
void visitContBind(ContBind* curr);
void visitResume(Resume* curr);
void visitSuspend(Suspend* curr);

[[noreturn]] void throwError(std::string text);

// Struct/Array instructions have an unnecessary heap type that is just for
// validation (except for the case of unreachability, but that's not a problem
// anyhow, we can ignore it there). That is, we also have a reference typed
// child from which we can infer the type anyhow, and we just need to check
// that type is the same.
void validateHeapTypeUsingChild(Expression* child, HeapType heapType);
[[noreturn]] void throwError(std::string text) {
throw ParseException(text, 0, pos);
}

private:
bool hasDWARFSections();
Expand Down
25 changes: 25 additions & 0 deletions src/wasm-ir-builder.h
Original file line number Diff line number Diff line change
Expand Up @@ -47,6 +47,10 @@ class IRBuilder : public UnifiedExpressionVisitor<IRBuilder, Result<>> {
// of instructions after this is called.
Result<Expression*> build();

// If the IRBuilder is empty, then it's ready to parse a new self-contained
// sequence of instructions.
[[nodiscard]] bool empty() { return scopeStack.empty(); }

// Call visit() on an existing Expression with its non-child fields
// initialized to initialize the child fields and refinalize it.
Result<> visit(Expression*);
Expand All @@ -59,6 +63,15 @@ class IRBuilder : public UnifiedExpressionVisitor<IRBuilder, Result<>> {
// pushed instruction.
void setDebugLocation(const std::optional<Function::DebugLocation>&);

// Give the builder a pointer to the counter tracking the current location in
// the binary. If this pointer is non-null, the builder will record the binary
// locations relative to the given code section offset for all instructions
// and delimiters inside functions.
void setBinaryLocation(size_t* binaryPos, size_t codeSectionOffset) {
this->binaryPos = binaryPos;
this->codeSectionOffset = codeSectionOffset;
}

// Set the function used to add scratch locals when constructing an isolated
// sequence of IR.
void setFunction(Function* func) { this->func = func; }
Expand Down Expand Up @@ -232,6 +245,11 @@ class IRBuilder : public UnifiedExpressionVisitor<IRBuilder, Result<>> {
Function* func = nullptr;
Builder builder;

// Used for setting DWARF expression locations.
size_t* binaryPos = nullptr;
size_t lastBinaryPos = 0;
size_t codeSectionOffset = 0;

// The location lacks debug info as it was marked as not having it.
struct NoDebug : public std::monostate {};
// The location lacks debug info, but was not marked as not having
Expand Down Expand Up @@ -316,6 +334,9 @@ class IRBuilder : public UnifiedExpressionVisitor<IRBuilder, Result<>> {
// stack-polymorphic unreachable mode.
bool unreachable = false;

// The binary location of the start of the scope, used to set debug info.
size_t startPos = 0;

ScopeCtx() : scope(NoScope{}) {}
ScopeCtx(Scope scope) : scope(scope) {}
ScopeCtx(Scope scope, Name label, bool labelUsed)
Expand Down Expand Up @@ -529,6 +550,10 @@ class IRBuilder : public UnifiedExpressionVisitor<IRBuilder, Result<>> {
// Record the original label to handle references to it correctly.
labelDepths[label].push_back(scopeStack.size() + 1);
}
if (binaryPos) {
scope.startPos = lastBinaryPos;
lastBinaryPos = *binaryPos;
}
scopeStack.push_back(scope);
}

Expand Down
Loading

0 comments on commit f8e1622

Please sign in to comment.