From efb62b773266a46c5d4af9a82a3bdec16a6ed31f Mon Sep 17 00:00:00 2001 From: Raoul Schaffranek Date: Wed, 18 Sep 2024 13:57:44 +0200 Subject: [PATCH] Add instruction offsets --- pyevmasm/evmasm.py | 66 +++++++++++++++++++++++++++++++++++----------- 1 file changed, 51 insertions(+), 15 deletions(-) diff --git a/pyevmasm/evmasm.py b/pyevmasm/evmasm.py index 668b4ba..ee0257b 100644 --- a/pyevmasm/evmasm.py +++ b/pyevmasm/evmasm.py @@ -61,6 +61,7 @@ def __init__( description, operand=None, pc=0, + offset=0, ): """ This represents an EVM instruction. @@ -75,6 +76,7 @@ def __init__( :param description: textual description of the instruction :param operand: optional immediate operand :param pc: optional program counter of this instruction in the program + :param offset: optional offset of this instruction in the bytecode Example use:: @@ -83,6 +85,7 @@ def __init__( >>> print('\tdescription:', instruction.description) >>> print('\tgroup:', instruction.group) >>> print('\tpc:', instruction.pc) + >>> print('\toffset:', instruction.offset) >>> print('\tsize:', instruction.size) >>> print('\thas_operand:', instruction.has_operand) >>> print('\toperand_size:', instruction.operand_size) @@ -110,6 +113,7 @@ def __init__( self._description = description self._operand = operand # Immediate operand if any self._pc = pc + self._offset = offset def __eq__(self, other): """Instructions are equal if all features match""" @@ -122,11 +126,12 @@ def __eq__(self, other): and self._pushes == other._pushes and self._fee == other._fee and self._pc == other._pc + and self._offset == other._offset and self._description == other._description ) def __repr__(self): - output = "Instruction(0x{:x}, {}, {:d}, {:d}, {:d}, {:d}, {}, {}, {})".format( + output = "Instruction(0x{:x}, {}, {:d}, {:d}, {:d}, {:d}, {}, {}, {}, {})".format( self._opcode, self._name, self._operand_size, @@ -136,6 +141,7 @@ def __repr__(self): self._description, self._operand, self._pc, + self._offset ) return output @@ -261,6 +267,15 @@ def pc(self, value): """Location in the program (optional)""" self._pc = value + @property + def offset(self): + return self._offset + + @offset.setter + def offset(self, value): + """Offset in the bytecode (optional)""" + self._offset = value + @property def group(self): """Instruction classification as per the yellow paper""" @@ -407,13 +422,15 @@ def is_arithmetic(self): } -def assemble_one(asmcode, pc=0, fork=DEFAULT_FORK): +def assemble_one(asmcode, pc=0, offset=0, fork=DEFAULT_FORK): """Assemble one EVM instruction from its textual representation. :param asmcode: assembly code for one instruction :type asmcode: str :param pc: program counter of the instruction(optional) :type pc: int + :param offset: offset of the instruction in the bytecode(optional) + :type offset: int :param fork: fork name (optional) :type fork: str :return: An Instruction object @@ -431,6 +448,8 @@ def assemble_one(asmcode, pc=0, fork=DEFAULT_FORK): instr = instruction_table[asmcode[0].upper()] if pc: instr.pc = pc + if offset: + instr.offset = offset if instr.operand_size > 0: assert len(asmcode) == 2 instr.operand = int(asmcode[1], 0) @@ -439,13 +458,15 @@ def assemble_one(asmcode, pc=0, fork=DEFAULT_FORK): raise AssembleError("Something wrong at pc {:d}".format(pc)) -def assemble_all(asmcode, pc=0, fork=DEFAULT_FORK): +def assemble_all(asmcode, pc=0, offset=0, fork=DEFAULT_FORK): """ Assemble a sequence of textual representation of EVM instructions :param asmcode: assembly code for any number of instructions :type asmcode: str :param pc: program counter of the first instruction(optional) :type pc: int + :param offset: offset of the first instruction in the bytecode(optional) + :type offset: int :param fork: fork name (optional) :type fork: str :return: An generator of Instruction objects @@ -471,18 +492,21 @@ def assemble_all(asmcode, pc=0, fork=DEFAULT_FORK): for line in asmcode: if not line.strip(): continue - instr = assemble_one(line, pc=pc, fork=fork) + instr = assemble_one(line, pc=pc, offset=offset, fork=fork) yield instr pc += instr.size + offset += 1 -def disassemble_one(bytecode, pc=0, fork=DEFAULT_FORK): +def disassemble_one(bytecode, pc=0, offset=0, fork=DEFAULT_FORK): """Disassemble a single instruction from a bytecode :param bytecode: the bytecode stream :type bytecode: str | bytes | bytearray | iterator :param pc: program counter of the instruction(optional) :type pc: int + :param offset: offset of the instruction in the bytecode(optional) + :type offset: int :param fork: fork name (optional) :type fork: str :return: an Instruction object @@ -513,6 +537,7 @@ def disassemble_one(bytecode, pc=0, fork=DEFAULT_FORK): opcode, "INVALID", 0, 0, 0, 0, "Unspecified invalid instruction." ) instruction.pc = pc + instruction.offset = offset try: if instruction.has_operand: @@ -523,13 +548,15 @@ def disassemble_one(bytecode, pc=0, fork=DEFAULT_FORK): return instruction -def disassemble_all(bytecode, pc=0, fork=DEFAULT_FORK): +def disassemble_all(bytecode, pc=0, offset=0, fork=DEFAULT_FORK): """Disassemble all instructions in bytecode :param bytecode: an evm bytecode (binary) :type bytecode: str | bytes | bytearray | iterator :param pc: program counter of the first instruction(optional) :type pc: int + :param offset: offset of the first instruction in the bytecode(optional) + :type offset: int :param fork: fork name (optional) :type fork: str :return: An generator of Instruction objects @@ -561,20 +588,23 @@ def disassemble_all(bytecode, pc=0, fork=DEFAULT_FORK): bytecode = iter(bytecode) while True: - instr = disassemble_one(bytecode, pc=pc, fork=fork) + instr = disassemble_one(bytecode, pc=pc, offset=offset, fork=fork) if not instr: return pc += instr.size + offset += 1 yield instr -def disassemble(bytecode, pc=0, fork=DEFAULT_FORK): +def disassemble(bytecode, pc=0, offset=0, fork=DEFAULT_FORK): """Disassemble an EVM bytecode :param bytecode: binary representation of an evm bytecode :type bytecode: str | bytes | bytearray :param pc: program counter of the first instruction(optional) :type pc: int + :param offset: offset of the first instruction in the bytecode(optional) + :type offset: int :param fork: fork name (optional) :type fork: str :return: the text representation of the assembler code @@ -590,16 +620,18 @@ def disassemble(bytecode, pc=0, fork=DEFAULT_FORK): PUSH2 0x100 """ - return "\n".join(map(str, disassemble_all(bytecode, pc=pc, fork=fork))) + return "\n".join(map(str, disassemble_all(bytecode, pc=pc, offset=offset, fork=fork))) -def assemble(asmcode, pc=0, fork=DEFAULT_FORK): +def assemble(asmcode, pc=0, offset=0, fork=DEFAULT_FORK): """ Assemble an EVM program :param asmcode: an evm assembler program :type asmcode: str :param pc: program counter of the first instruction(optional) :type pc: int + :param offset: offset of the first instruction in the bytecode(optional) + :type offset: int :param fork: fork name (optional) :type fork: str :return: the hex representation of the bytecode @@ -616,16 +648,18 @@ def assemble(asmcode, pc=0, fork=DEFAULT_FORK): ... b"\x60\x60\x60\x40\x52\x60\x02\x61\x01\x00" """ - return b"".join(x.bytes for x in assemble_all(asmcode, pc=pc, fork=fork)) + return b"".join(x.bytes for x in assemble_all(asmcode, pc=pc, offset=offset, fork=fork)) -def disassemble_hex(bytecode, pc=0, fork=DEFAULT_FORK): +def disassemble_hex(bytecode, pc=0, offset=0, fork=DEFAULT_FORK): """Disassemble an EVM bytecode :param bytecode: canonical representation of an evm bytecode (hexadecimal) :type bytecode: str :param pc: program counter of the first instruction(optional) :type pc: int + :param offset: offset of the first instruction in the bytecode(optional) + :type offset: int :param fork: fork name (optional) :type fork: str :return: the text representation of the assembler code @@ -645,16 +679,18 @@ def disassemble_hex(bytecode, pc=0, fork=DEFAULT_FORK): if bytecode.startswith("0x"): bytecode = bytecode[2:] bytecode = unhexlify(bytecode) - return disassemble(bytecode, pc=pc, fork=fork) + return disassemble(bytecode, pc=pc, offset=offset, fork=fork) -def assemble_hex(asmcode, pc=0, fork=DEFAULT_FORK): +def assemble_hex(asmcode, pc=0, offset=0, fork=DEFAULT_FORK): """ Assemble an EVM program :param asmcode: an evm assembler program :type asmcode: str | iterator[Instruction] :param pc: program counter of the first instruction(optional) :type pc: int + :param offset: offset of the first instruction in the bytecode(optional) + :type offset: int :param fork: fork name (optional) :type fork: str :return: the hex representation of the bytecode @@ -673,7 +709,7 @@ def assemble_hex(asmcode, pc=0, fork=DEFAULT_FORK): """ if isinstance(asmcode, list): return "0x" + hexlify(b"".join([x.bytes for x in asmcode])).decode("ascii") - return "0x" + hexlify(assemble(asmcode, pc=pc, fork=fork)).decode("ascii") + return "0x" + hexlify(assemble(asmcode, pc=pc, offset=offset, fork=fork)).decode("ascii") class InstructionTable: