Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Fix symbol resolver issues #562

Open
wants to merge 4 commits into
base: master
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
66 changes: 45 additions & 21 deletions bindings/python/sample.py
Original file line number Diff line number Diff line change
Expand Up @@ -19,50 +19,70 @@ def test_ks(arch, mode, code, syntax=0):
print("%02x " % i, end='')
print("]")

return bytes(encoding)


# test symbol resolver
def test_sym_resolver():
def test_sym_resolver(arch, mode, code, base, symbol_table):
def sym_resolver(symbol, value):
# is this the missing symbol we want to handle?
if symbol == "_l1":
if symbol in symbol_table:
# put value of this symbol in @value
value = 0x1002
value[0] = symbol_table[symbol]
# we handled this symbol, so return true
return True

# we did not handle this symbol, so return false
return False

ks = Ks(KS_ARCH_X86, KS_MODE_32)
ks = Ks(arch, mode)

# register callback for symbol resolver
ks.sym_resolver = sym_resolver

CODE = b"jmp _l1; nop; _l1:"
encoding, count = ks.asm(CODE, 0x1000)
encoding, count = ks.asm(code, base)

print("%s = [ " % CODE, end='')
print("%s = [ " % code, end='')
for i in encoding:
print("%02x " % i, end='')
print("]")

return bytes(encoding)


if __name__ == '__main__':
# X86
test_ks(KS_ARCH_X86, KS_MODE_16, b"add eax, ecx")
test_ks(KS_ARCH_X86, KS_MODE_32, b"add eax, ecx")
test_ks(KS_ARCH_X86, KS_MODE_64, b"add rax, rcx")
test_ks(KS_ARCH_X86, KS_MODE_32, b"add %ecx, %eax", KS_OPT_SYNTAX_ATT)
test_ks(KS_ARCH_X86, KS_MODE_64, b"add %rcx, %rax", KS_OPT_SYNTAX_ATT)

test_ks(KS_ARCH_X86, KS_MODE_32, b"add eax, 0x15")
test_ks(KS_ARCH_X86, KS_MODE_32, b"add eax, 15h");
test_ks(KS_ARCH_X86, KS_MODE_32, b"add eax, 15")

# RADIX16 syntax Intel (default syntax)
test_ks(KS_ARCH_X86, KS_MODE_32, b"add eax, 15", KS_OPT_SYNTAX_RADIX16)
encoding = test_ks(KS_ARCH_X86, KS_MODE_16, b"add eax, ecx")
assert encoding == bytes.fromhex("66 01 c8")

encoding = test_ks(KS_ARCH_X86, KS_MODE_32, b"add eax, ecx")
assert encoding == bytes.fromhex("01 c8")

encoding = test_ks(KS_ARCH_X86, KS_MODE_64, b"add rax, rcx")
assert encoding == bytes.fromhex("48 01 c8")

encoding = test_ks(KS_ARCH_X86, KS_MODE_32, b"add %ecx, %eax", KS_OPT_SYNTAX_ATT)
assert encoding == bytes.fromhex("01 c8")

encoding = test_ks(KS_ARCH_X86, KS_MODE_64, b"add %rcx, %rax", KS_OPT_SYNTAX_ATT)
assert encoding == bytes.fromhex("48 01 c8")

encoding = test_ks(KS_ARCH_X86, KS_MODE_32, b"add eax, 0x15")
assert encoding == bytes.fromhex("83 c0 15")

encoding = test_ks(KS_ARCH_X86, KS_MODE_32, b"add eax, 15h");
assert encoding == bytes.fromhex("83 c0 15")

encoding = test_ks(KS_ARCH_X86, KS_MODE_32, b"add eax, 15")
assert encoding == bytes.fromhex("83 c0 0f")

# RADIX16 syntax for Intel
encoding = test_ks(KS_ARCH_X86, KS_MODE_32, b"add eax, 15", KS_OPT_SYNTAX_RADIX16)
assert encoding == bytes.fromhex("83 c0 15")

# RADIX16 syntax for AT&T
test_ks(KS_ARCH_X86, KS_MODE_32, b"add $15, %eax", KS_OPT_SYNTAX_RADIX16 | KS_OPT_SYNTAX_ATT)
encoding = test_ks(KS_ARCH_X86, KS_MODE_32, b"add $15, %eax", KS_OPT_SYNTAX_RADIX16 | KS_OPT_SYNTAX_ATT)
assert encoding == bytes.fromhex("83 c0 15")

# ARM
test_ks(KS_ARCH_ARM, KS_MODE_ARM, b"sub r1, r2, r5")
Expand Down Expand Up @@ -95,4 +115,8 @@ def sym_resolver(symbol, value):
test_ks(KS_ARCH_SYSTEMZ, KS_MODE_BIG_ENDIAN, b"a %r0, 4095(%r15,%r1)")

# test symbol resolver
test_sym_resolver()

# X64 - Backward jump
encoding = test_sym_resolver(KS_ARCH_X86, KS_MODE_64, b"jmp _l1; nop", 0x1000, {b"_l1": 0x1000})
assert encoding == bytes.fromhex("eb fe 90")

8 changes: 7 additions & 1 deletion llvm/keystone/ks.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -554,11 +554,17 @@ ks_err ks_close(ks_engine *ks)
KEYSTONE_EXPORT
ks_err ks_option(ks_engine *ks, ks_opt_type type, size_t value)
{
ks->MAI->setRadix(16);
switch(type) {
case KS_OPT_SYNTAX:
if (ks->arch != KS_ARCH_X86)
return KS_ERR_OPT_INVALID;

// Reset to radix 10, the default, first. When
// KS_OPT_SYNTAX_RADIX16 is given, this will be set to 16 again.
// This allows to switch the radix on a keystone instance from 16
// to 10 again.
ks->MAI->setRadix(10);

switch(value) {
default:
return KS_ERR_OPT_INVALID;
Expand Down
1 change: 1 addition & 0 deletions llvm/lib/MC/MCAsmInfo.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -49,6 +49,7 @@ MCAsmInfo::MCAsmInfo() {
Code32Directive = ".code32";
Code64Directive = ".code64";
AssemblerDialect = 0;
Radix = 10;
AllowAtInName = false;
SupportsQuotedNames = true;
UseDataRegionDirectives = false;
Expand Down
2 changes: 1 addition & 1 deletion llvm/lib/MC/MCAssembler.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -208,7 +208,7 @@ bool MCAssembler::evaluateFixup(const MCAsmLayout &Layout,
ks_sym_resolver resolver = (ks_sym_resolver)KsSymResolver;
if (resolver(Sym.getName().str().c_str(), &imm)) {
// resolver handled this symbol
Value = imm;
Value += imm;
IsResolved = true;
} else {
// resolver did not handle this symbol
Expand Down
63 changes: 63 additions & 0 deletions suite/regress/x64_radix.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,63 @@
#!/usr/bin/python

# Test radix configuration for X86

# Github issue: #481 #436 #538
# Author: endofunky

from keystone import *

import regress


class TestX86(regress.RegressTest):
def runTest(self):
# Default value without ks_option
ks = Ks(KS_ARCH_X86, KS_MODE_64)

encoding, _ = ks.asm(b"add eax, 0x15", 0x1000)
self.assertEqual(encoding, [0x83, 0xC0, 0x15])

encoding, _ = ks.asm(b"add eax, 15h", 0x1000)
self.assertEqual(encoding, [0x83, 0xC0, 0x15])

encoding, _ = ks.asm(b"add eax, 15", 0x1000)
self.assertEqual(encoding, [0x83, 0xC0, 0x0F])

# NASM + RADIX16
ks = Ks(KS_ARCH_X86, KS_MODE_64)
ks.syntax = KS_OPT_SYNTAX_NASM | KS_OPT_SYNTAX_RADIX16
encoding, _ = ks.asm(b"add eax, 15", 0x1000)
self.assertEqual(encoding, [0x83, 0xC0, 0x15])

# AT&T + RADIX16
ks = Ks(KS_ARCH_X86, KS_MODE_64)
ks.syntax = KS_OPT_SYNTAX_ATT | KS_OPT_SYNTAX_RADIX16
encoding, _ = ks.asm(b"add $15, %eax", 0x1000)
self.assertEqual(encoding, [0x83, 0xC0, 0x15])

# Default with symbol resolver set (#481)
def sym_resolver(symbol, value):
return False

ks = Ks(KS_ARCH_X86, KS_MODE_64)
ks.sym_resolver = sym_resolver

encoding, _ = ks.asm(b"add eax, 15", 0x1000)
self.assertEqual(encoding, [0x83, 0xC0, 0x0F])

# Switching from 16 to 10
ks = Ks(KS_ARCH_X86, KS_MODE_64)
ks.syntax = KS_OPT_SYNTAX_NASM | KS_OPT_SYNTAX_RADIX16

encoding, _ = ks.asm(b"add eax, 15", 0x1000)
self.assertEqual(encoding, [0x83, 0xC0, 0x15])

ks.syntax = KS_OPT_SYNTAX_NASM

encoding, _ = ks.asm(b"add eax, 15", 0x1000)
self.assertEqual(encoding, [0x83, 0xC0, 0x0F])


if __name__ == "__main__":
regress.main()
34 changes: 27 additions & 7 deletions suite/regress/x64_sym_resolver.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,22 +4,30 @@

# Github issue: #244
# Author: Duncan (mrexodia)
# Author: endofunky

from keystone import *

import regress


class TestX86(regress.RegressTest):
def runTest(self):
symbol_table = {
b"ZwQueryInformationProcess": 0x7FF98A050840,
b"_l1": 0x1000,
b"_l2": 0x1002,
b"_l3": 0xAABBCCDD,
}

def sym_resolver(symbol, value):
# is this the missing symbol we want to handle?
if symbol == "ZwQueryInformationProcess":
if symbol in symbol_table:
# put value of this symbol in @value
value = 0x7FF98A050840
value[0] = symbol_table[symbol]
# we handled this symbol, so return true
print 'sym_resolver called!'
return True

# we did not handle this symbol, so return false
return False

Expand All @@ -28,11 +36,23 @@ def sym_resolver(symbol, value):
ks.sym_resolver = sym_resolver

encoding, _ = ks.asm(b"call 0x7FF98A050840", 0x7FF98A081A38)
self.assertEqual(encoding, [ 0xE8, 0x03, 0xEE, 0xFC, 0xFF ])
self.assertEqual(encoding, [0xE8, 0x03, 0xEE, 0xFC, 0xFF])

encoding, _ = ks.asm(b"call ZwQueryInformationProcess", 0x7FF98A081A38)
self.assertEqual(encoding, [ 0xE8, 0x03, 0xEE, 0xFC, 0xFF ])
self.assertEqual(encoding, [0xE8, 0x03, 0xEE, 0xFC, 0xFF])

encoding, _ = ks.asm(b"mov rax, 80", 0x1000)
self.assertEqual(encoding, [0x48, 0xC7, 0xC0, 0x50, 0x00, 0x00, 0x00])

encoding, _ = ks.asm(b"jmp _l1; nop", 0x1000)
self.assertEqual(encoding, [0xEB, 0xFE, 0x90])

encoding, _ = ks.asm(b"jmp _l2; nop", 0x1000)
self.assertEqual(encoding, [0xEB, 0x00, 0x90])

encoding, _ = ks.asm(b"jmp _l3; nop", 0x1000)
self.assertEqual(encoding, [0xE9, 0xD8, 0xBC, 0xBB, 0xAA, 0x90])


if __name__ == '__main__':
if __name__ == "__main__":
regress.main()