From c1398dd4c6dab8b8d6f7e2ed9029cec27de63e24 Mon Sep 17 00:00:00 2001 From: Han Dai Date: Fri, 6 May 2022 14:42:22 +0000 Subject: [PATCH 1/4] initialize radix to 10 --- llvm/keystone/ks.cpp | 1 - llvm/lib/MC/MCAsmInfo.cpp | 1 + 2 files changed, 1 insertion(+), 1 deletion(-) diff --git a/llvm/keystone/ks.cpp b/llvm/keystone/ks.cpp index 736a0fbc..af2b5991 100644 --- a/llvm/keystone/ks.cpp +++ b/llvm/keystone/ks.cpp @@ -554,7 +554,6 @@ ks_err ks_close(ks_engine *ks) KEYSTONE_EXPORT ks_err ks_option(ks_engine *ks, ks_opt_type type, size_t value) { - ks->MAI->setRadix(16); switch(type) { case KS_OPT_SYNTAX: if (ks->arch != KS_ARCH_X86) diff --git a/llvm/lib/MC/MCAsmInfo.cpp b/llvm/lib/MC/MCAsmInfo.cpp index 0939eb86..234cf3d8 100644 --- a/llvm/lib/MC/MCAsmInfo.cpp +++ b/llvm/lib/MC/MCAsmInfo.cpp @@ -49,6 +49,7 @@ MCAsmInfo::MCAsmInfo() { Code32Directive = ".code32"; Code64Directive = ".code64"; AssemblerDialect = 0; + Radix = 10; AllowAtInName = false; SupportsQuotedNames = true; UseDataRegionDirectives = false; From ed814a9f6767c49728888f22d864b8b81e0d9827 Mon Sep 17 00:00:00 2001 From: Tobias Svensson Date: Sun, 24 Sep 2023 16:21:12 +0100 Subject: [PATCH 2/4] Fix sym_resolver symbol offsets and fix Python examples --- bindings/python/sample.py | 77 ++++++++++++++++++++++--------- llvm/lib/MC/MCAssembler.cpp | 2 +- suite/regress/x64_sym_resolver.py | 8 ++-- 3 files changed, 61 insertions(+), 26 deletions(-) diff --git a/bindings/python/sample.py b/bindings/python/sample.py index 2e2830ee..6b26445e 100755 --- a/bindings/python/sample.py +++ b/bindings/python/sample.py @@ -19,50 +19,70 @@ def test_ks(arch, mode, code, syntax=0): print("%02x " % i, end='') print("]") + return bytes(encoding) + # test symbol resolver -def test_sym_resolver(): +def test_sym_resolver(arch, mode, code, base, symbol_table): def sym_resolver(symbol, value): # is this the missing symbol we want to handle? - if symbol == "_l1": + if symbol in symbol_table: # put value of this symbol in @value - value = 0x1002 + value[0] = symbol_table[symbol] # we handled this symbol, so return true return True # we did not handle this symbol, so return false return False - ks = Ks(KS_ARCH_X86, KS_MODE_32) + ks = Ks(arch, mode) # register callback for symbol resolver ks.sym_resolver = sym_resolver - CODE = b"jmp _l1; nop; _l1:" - encoding, count = ks.asm(CODE, 0x1000) + encoding, count = ks.asm(code, base) - print("%s = [ " % CODE, end='') + print("%s = [ " % code, end='') for i in encoding: print("%02x " % i, end='') print("]") + return bytes(encoding) + if __name__ == '__main__': # X86 - test_ks(KS_ARCH_X86, KS_MODE_16, b"add eax, ecx") - test_ks(KS_ARCH_X86, KS_MODE_32, b"add eax, ecx") - test_ks(KS_ARCH_X86, KS_MODE_64, b"add rax, rcx") - test_ks(KS_ARCH_X86, KS_MODE_32, b"add %ecx, %eax", KS_OPT_SYNTAX_ATT) - test_ks(KS_ARCH_X86, KS_MODE_64, b"add %rcx, %rax", KS_OPT_SYNTAX_ATT) - - test_ks(KS_ARCH_X86, KS_MODE_32, b"add eax, 0x15") - test_ks(KS_ARCH_X86, KS_MODE_32, b"add eax, 15h"); - test_ks(KS_ARCH_X86, KS_MODE_32, b"add eax, 15") - - # RADIX16 syntax Intel (default syntax) - test_ks(KS_ARCH_X86, KS_MODE_32, b"add eax, 15", KS_OPT_SYNTAX_RADIX16) + encoding = test_ks(KS_ARCH_X86, KS_MODE_16, b"add eax, ecx") + assert encoding == bytes.fromhex("66 01 c8") + + encoding = test_ks(KS_ARCH_X86, KS_MODE_32, b"add eax, ecx") + assert encoding == bytes.fromhex("01 c8") + + encoding = test_ks(KS_ARCH_X86, KS_MODE_64, b"add rax, rcx") + assert encoding == bytes.fromhex("48 01 c8") + + encoding = test_ks(KS_ARCH_X86, KS_MODE_32, b"add %ecx, %eax", KS_OPT_SYNTAX_ATT) + assert encoding == bytes.fromhex("01 c8") + + encoding = test_ks(KS_ARCH_X86, KS_MODE_64, b"add %rcx, %rax", KS_OPT_SYNTAX_ATT) + assert encoding == bytes.fromhex("48 01 c8") + + encoding = test_ks(KS_ARCH_X86, KS_MODE_32, b"add eax, 0x15") + assert encoding == bytes.fromhex("83 c0 15") + + encoding = test_ks(KS_ARCH_X86, KS_MODE_32, b"add eax, 15h"); + assert encoding == bytes.fromhex("83 c0 15") + + encoding = test_ks(KS_ARCH_X86, KS_MODE_32, b"add eax, 15") + assert encoding == bytes.fromhex("83 c0 0f") + + # RADIX16 syntax for Intel + encoding = test_ks(KS_ARCH_X86, KS_MODE_32, b"add eax, 15", KS_OPT_SYNTAX_RADIX16) + assert encoding == bytes.fromhex("83 c0 15") + # RADIX16 syntax for AT&T - test_ks(KS_ARCH_X86, KS_MODE_32, b"add $15, %eax", KS_OPT_SYNTAX_RADIX16 | KS_OPT_SYNTAX_ATT) + encoding = test_ks(KS_ARCH_X86, KS_MODE_32, b"add $15, %eax", KS_OPT_SYNTAX_RADIX16 | KS_OPT_SYNTAX_ATT) + assert encoding == bytes.fromhex("83 c0 15") # ARM test_ks(KS_ARCH_ARM, KS_MODE_ARM, b"sub r1, r2, r5") @@ -95,4 +115,19 @@ def sym_resolver(symbol, value): test_ks(KS_ARCH_SYSTEMZ, KS_MODE_BIG_ENDIAN, b"a %r0, 4095(%r15,%r1)") # test symbol resolver - test_sym_resolver() + + encoding = test_sym_resolver(KS_ARCH_X86, KS_MODE_64, b"mov rax, 80", 0x1000, {}) + assert encoding == bytes.fromhex("48 c7 c0 50 00 00 00") + + # X64 - Backward jump + encoding = test_sym_resolver(KS_ARCH_X86, KS_MODE_64, b"jmp _l1; nop", 0x1000, {b"_l1": 0x1000}) + assert encoding == bytes.fromhex("eb fe 90") + + # X64 - Forward jump + encoding = test_sym_resolver(KS_ARCH_X86, KS_MODE_64, b"jmp _l1; nop", 0x1000, {b"_l1": 0x1002}) + assert encoding == bytes.fromhex("eb 00 90") + + # X64 - Absolute address + encoding = test_sym_resolver(KS_ARCH_X86, KS_MODE_64, b"jmp _l1; nop", 0x1000, {b"_l1": 0xAABBCCDD}) + assert encoding == bytes.fromhex("e9 d8 bc bb aa 90") + diff --git a/llvm/lib/MC/MCAssembler.cpp b/llvm/lib/MC/MCAssembler.cpp index 2e656e7d..4972cbc5 100644 --- a/llvm/lib/MC/MCAssembler.cpp +++ b/llvm/lib/MC/MCAssembler.cpp @@ -208,7 +208,7 @@ bool MCAssembler::evaluateFixup(const MCAsmLayout &Layout, ks_sym_resolver resolver = (ks_sym_resolver)KsSymResolver; if (resolver(Sym.getName().str().c_str(), &imm)) { // resolver handled this symbol - Value = imm; + Value += imm; IsResolved = true; } else { // resolver did not handle this symbol diff --git a/suite/regress/x64_sym_resolver.py b/suite/regress/x64_sym_resolver.py index 1bf6a121..e9f9144f 100755 --- a/suite/regress/x64_sym_resolver.py +++ b/suite/regress/x64_sym_resolver.py @@ -13,13 +13,13 @@ class TestX86(regress.RegressTest): def runTest(self): def sym_resolver(symbol, value): # is this the missing symbol we want to handle? - if symbol == "ZwQueryInformationProcess": + if symbol == b"ZwQueryInformationProcess": # put value of this symbol in @value - value = 0x7FF98A050840 + value[0] = 0x7FF98A050840 # we handled this symbol, so return true - print 'sym_resolver called!' + print("sym_resolver called!") return True - + # we did not handle this symbol, so return false return False From 4e595bdbaa0deed77deb8e98cfa8b2744f7cf958 Mon Sep 17 00:00:00 2001 From: Tobias Svensson Date: Sun, 24 Sep 2023 16:59:13 +0100 Subject: [PATCH 3/4] Move sym_resolver offset assertions into regression tests --- bindings/python/sample.py | 11 ----------- suite/regress/x64_sym_resolver.py | 32 +++++++++++++++++++++++++------ 2 files changed, 26 insertions(+), 17 deletions(-) diff --git a/bindings/python/sample.py b/bindings/python/sample.py index 6b26445e..db3e70dc 100755 --- a/bindings/python/sample.py +++ b/bindings/python/sample.py @@ -116,18 +116,7 @@ def sym_resolver(symbol, value): # test symbol resolver - encoding = test_sym_resolver(KS_ARCH_X86, KS_MODE_64, b"mov rax, 80", 0x1000, {}) - assert encoding == bytes.fromhex("48 c7 c0 50 00 00 00") - # X64 - Backward jump encoding = test_sym_resolver(KS_ARCH_X86, KS_MODE_64, b"jmp _l1; nop", 0x1000, {b"_l1": 0x1000}) assert encoding == bytes.fromhex("eb fe 90") - # X64 - Forward jump - encoding = test_sym_resolver(KS_ARCH_X86, KS_MODE_64, b"jmp _l1; nop", 0x1000, {b"_l1": 0x1002}) - assert encoding == bytes.fromhex("eb 00 90") - - # X64 - Absolute address - encoding = test_sym_resolver(KS_ARCH_X86, KS_MODE_64, b"jmp _l1; nop", 0x1000, {b"_l1": 0xAABBCCDD}) - assert encoding == bytes.fromhex("e9 d8 bc bb aa 90") - diff --git a/suite/regress/x64_sym_resolver.py b/suite/regress/x64_sym_resolver.py index e9f9144f..5b7189fa 100755 --- a/suite/regress/x64_sym_resolver.py +++ b/suite/regress/x64_sym_resolver.py @@ -4,20 +4,28 @@ # Github issue: #244 # Author: Duncan (mrexodia) +# Author: endofunky from keystone import * import regress + class TestX86(regress.RegressTest): def runTest(self): + symbol_table = { + b"ZwQueryInformationProcess": 0x7FF98A050840, + b"_l1": 0x1000, + b"_l2": 0x1002, + b"_l3": 0xAABBCCDD, + } + def sym_resolver(symbol, value): # is this the missing symbol we want to handle? - if symbol == b"ZwQueryInformationProcess": + if symbol in symbol_table: # put value of this symbol in @value - value[0] = 0x7FF98A050840 + value[0] = symbol_table[symbol] # we handled this symbol, so return true - print("sym_resolver called!") return True # we did not handle this symbol, so return false @@ -28,11 +36,23 @@ def sym_resolver(symbol, value): ks.sym_resolver = sym_resolver encoding, _ = ks.asm(b"call 0x7FF98A050840", 0x7FF98A081A38) - self.assertEqual(encoding, [ 0xE8, 0x03, 0xEE, 0xFC, 0xFF ]) + self.assertEqual(encoding, [0xE8, 0x03, 0xEE, 0xFC, 0xFF]) encoding, _ = ks.asm(b"call ZwQueryInformationProcess", 0x7FF98A081A38) - self.assertEqual(encoding, [ 0xE8, 0x03, 0xEE, 0xFC, 0xFF ]) + self.assertEqual(encoding, [0xE8, 0x03, 0xEE, 0xFC, 0xFF]) + + encoding, _ = ks.asm(b"mov rax, 80", 0x1000) + self.assertEqual(encoding, [0x48, 0xC7, 0xC0, 0x50, 0x00, 0x00, 0x00]) + + encoding, _ = ks.asm(b"jmp _l1; nop", 0x1000) + self.assertEqual(encoding, [0xEB, 0xFE, 0x90]) + + encoding, _ = ks.asm(b"jmp _l2; nop", 0x1000) + self.assertEqual(encoding, [0xEB, 0x00, 0x90]) + + encoding, _ = ks.asm(b"jmp _l3; nop", 0x1000) + self.assertEqual(encoding, [0xE9, 0xD8, 0xBC, 0xBB, 0xAA, 0x90]) -if __name__ == '__main__': +if __name__ == "__main__": regress.main() From 4cefe542c7a90c197e73d978743cd4151a1ea6ef Mon Sep 17 00:00:00 2001 From: Tobias Svensson Date: Sun, 24 Sep 2023 17:12:29 +0100 Subject: [PATCH 4/4] Fix switching radix from 16 to 10 again --- llvm/keystone/ks.cpp | 7 +++++ suite/regress/x64_radix.py | 63 ++++++++++++++++++++++++++++++++++++++ 2 files changed, 70 insertions(+) create mode 100644 suite/regress/x64_radix.py diff --git a/llvm/keystone/ks.cpp b/llvm/keystone/ks.cpp index af2b5991..7e7a41a6 100644 --- a/llvm/keystone/ks.cpp +++ b/llvm/keystone/ks.cpp @@ -558,6 +558,13 @@ ks_err ks_option(ks_engine *ks, ks_opt_type type, size_t value) case KS_OPT_SYNTAX: if (ks->arch != KS_ARCH_X86) return KS_ERR_OPT_INVALID; + + // Reset to radix 10, the default, first. When + // KS_OPT_SYNTAX_RADIX16 is given, this will be set to 16 again. + // This allows to switch the radix on a keystone instance from 16 + // to 10 again. + ks->MAI->setRadix(10); + switch(value) { default: return KS_ERR_OPT_INVALID; diff --git a/suite/regress/x64_radix.py b/suite/regress/x64_radix.py new file mode 100644 index 00000000..da646da1 --- /dev/null +++ b/suite/regress/x64_radix.py @@ -0,0 +1,63 @@ +#!/usr/bin/python + +# Test radix configuration for X86 + +# Github issue: #481 #436 #538 +# Author: endofunky + +from keystone import * + +import regress + + +class TestX86(regress.RegressTest): + def runTest(self): + # Default value without ks_option + ks = Ks(KS_ARCH_X86, KS_MODE_64) + + encoding, _ = ks.asm(b"add eax, 0x15", 0x1000) + self.assertEqual(encoding, [0x83, 0xC0, 0x15]) + + encoding, _ = ks.asm(b"add eax, 15h", 0x1000) + self.assertEqual(encoding, [0x83, 0xC0, 0x15]) + + encoding, _ = ks.asm(b"add eax, 15", 0x1000) + self.assertEqual(encoding, [0x83, 0xC0, 0x0F]) + + # NASM + RADIX16 + ks = Ks(KS_ARCH_X86, KS_MODE_64) + ks.syntax = KS_OPT_SYNTAX_NASM | KS_OPT_SYNTAX_RADIX16 + encoding, _ = ks.asm(b"add eax, 15", 0x1000) + self.assertEqual(encoding, [0x83, 0xC0, 0x15]) + + # AT&T + RADIX16 + ks = Ks(KS_ARCH_X86, KS_MODE_64) + ks.syntax = KS_OPT_SYNTAX_ATT | KS_OPT_SYNTAX_RADIX16 + encoding, _ = ks.asm(b"add $15, %eax", 0x1000) + self.assertEqual(encoding, [0x83, 0xC0, 0x15]) + + # Default with symbol resolver set (#481) + def sym_resolver(symbol, value): + return False + + ks = Ks(KS_ARCH_X86, KS_MODE_64) + ks.sym_resolver = sym_resolver + + encoding, _ = ks.asm(b"add eax, 15", 0x1000) + self.assertEqual(encoding, [0x83, 0xC0, 0x0F]) + + # Switching from 16 to 10 + ks = Ks(KS_ARCH_X86, KS_MODE_64) + ks.syntax = KS_OPT_SYNTAX_NASM | KS_OPT_SYNTAX_RADIX16 + + encoding, _ = ks.asm(b"add eax, 15", 0x1000) + self.assertEqual(encoding, [0x83, 0xC0, 0x15]) + + ks.syntax = KS_OPT_SYNTAX_NASM + + encoding, _ = ks.asm(b"add eax, 15", 0x1000) + self.assertEqual(encoding, [0x83, 0xC0, 0x0F]) + + +if __name__ == "__main__": + regress.main()