From 4bb25ea4c9bef4fd51453a4156049b4aaad7e7a4 Mon Sep 17 00:00:00 2001 From: Anton Kochkov Date: Fri, 23 Aug 2019 18:27:03 +0800 Subject: [PATCH] Dirty, very dirty initial draft --- oasis/dalvik | 18 + plugins/dalvik/dalvik.ml | 12 + plugins/dalvik/dalvik_dex.ml | 1025 ++++++++++++++++++ plugins/dalvik/dalvik_dex.mli | 643 ++++++++++++ plugins/dalvik/dalvik_disasm.ml | 1712 +++++++++++++++++++++++++++++++ plugins/dalvik/dalvik_java.ml | 40 + plugins/dalvik/dalvik_lifter.ml | 716 +++++++++++++ plugins/dalvik/dalvik_parse.ml | 897 ++++++++++++++++ 8 files changed, 5063 insertions(+) create mode 100644 oasis/dalvik create mode 100644 plugins/dalvik/dalvik.ml create mode 100644 plugins/dalvik/dalvik_dex.ml create mode 100644 plugins/dalvik/dalvik_dex.mli create mode 100644 plugins/dalvik/dalvik_disasm.ml create mode 100644 plugins/dalvik/dalvik_java.ml create mode 100644 plugins/dalvik/dalvik_lifter.ml create mode 100644 plugins/dalvik/dalvik_parse.ml diff --git a/oasis/dalvik b/oasis/dalvik new file mode 100644 index 0000000000..c3752a8c46 --- /dev/null +++ b/oasis/dalvik @@ -0,0 +1,18 @@ +Flag mips + Description: Build Dalvik/DEX lifter + Default: true + +Library mips_plugin + XMETADescription: provide Dalvik/DEX lifter + Path: plugins/dalvik + FindlibName: bap-plugin-dalvik + Build$: flag(everything) || flag (dalvik) + BuildDepends: bap, bap-abi, bap-c, bap-core-theory + InternalModules: + Dalvik, + Dalvik_disasm, + Dalvik_dex, + Dalvik_java, + Dalvik_lifter + + XMETAExtraLines: tags="dalvik,lifter,arch,abi" diff --git a/plugins/dalvik/dalvik.ml b/plugins/dalvik/dalvik.ml new file mode 100644 index 0000000000..dfb73058d5 --- /dev/null +++ b/plugins/dalvik/dalvik.ml @@ -0,0 +1,12 @@ +open Core_kernel +open Bap_core_theory +open KB.Syntax + +let package = "dalvik" + +let () = Project.Input.register_loader "dex" @@ fun filename -> + let empty = Memmap.empty in + let prog = prog_of_dex filename in + Project.Input.create `dalvik filename ~code:empty ~data:empty + ~finish:(fun proj -> Project.with_program prog) + diff --git a/plugins/dalvik/dalvik_dex.ml b/plugins/dalvik/dalvik_dex.ml new file mode 100644 index 0000000000..a0d91302ba --- /dev/null +++ b/plugins/dalvik/dalvik_dex.ml @@ -0,0 +1,1025 @@ +(* + * Copyright (c) 2010-2014, + * Jinseong Jeon + * Kris Micinski + * Jeff Foster + * All rights reserved. + * + * Based on the src/dex.ml from https://github.com/plum-umd/redexer + *) + +(* Format reference is https://source.android.com/devices/tech/dalvik/dex-format.html + *) + +(***********************************************************************) +(* Dex *) +(***********************************************************************) + +module DA = DynArray + +module I = Dalvik_disasm + +module J = Java + +(***********************************************************************) +(* Basic Types *) +(***********************************************************************) + +exception Wrong_dex of string +exception Wrong_match of string +exception No_return +exception NOT_YET of string + +type dex = { + header : dex_header; + d_string_ids : link DA.t; + d_type_ids : link DA.t; + d_proto_ids : proto_id_item DA.t; + d_field_ids : field_id_item DA.t; + d_method_ids : method_id_item DA.t; + d_class_defs : class_def_item DA.t; + mutable d_data : data_item IM.t; +} + +and link = + | Idx of int + | Off of I.offset + +and dex_header = { + magic : string; + checksum : int64; + signature : char list; + mutable file_size : int; + header_size : int; (* usually, 0x70 *) + endian_tag : endian; (* usually, ENDIAN_CONSTANT *) + link : section; + map_off : link; + h_string_ids : section; + h_type_ids : section; + h_proto_ids : section; + h_field_ids : section; + h_method_ids : section; + h_class_defs : section; + h_data : section; +} + +and endian = + | LITTLE (* ENDIAN_CONSTANT = 0x12345678 *) + | BIG (* REVERSE_ENDIAN_CONSTANT = 0x78563412 *) + +and section = { + size : int; + offset : link; +} + +and proto_id_item = { + shorty : link; + mutable return_type : link; + parameter_off : link; +} + +and field_id_item = { + f_class_id : link; + mutable f_type_id : link; + f_name_id : link; +} + +and method_id_item = { + m_class_id : link; + m_proto_id : link; + m_name_id : link; +} + +and class_def_item = { + c_class_id : link; + mutable c_access_flag : int; + mutable superclass : link; + mutable interfaces : link; + source_file : link; + annotations : link; + mutable class_data : link; + static_values : link; +} + +and data_item = + | MAP_LIST of map_item list + | TYPE_LIST of link list + | ANNO_SET_REF of link list + | ANNO_SET of link list + | CLASS_DATA of class_data_item + | CODE_ITEM of code_item + | STRING_DATA of UTF8.t + | DEBUG_INFO of debug_info_item + | ANNOTATION of annotation_item + | STATIC_VALUE of encoded_value list + | ANNO_DIR of anno_dir_item + | INSTRUCTION of I.instr + | FILL_ARRAY of fill_array_data + | SWITCH of switch + +and map_item = { + type_of_item : type_code; + (* unsigned short padding *) + mi_size : int; + mi_offset : link; +} + +and type_code = + | TYPE_HEADER_ITEM (* 0x0000 *) + | TYPE_STRING_ITEM (* 0x0001 *) + | TYPE_TYPE_ID_ITEM (* 0x0002 *) + | TYPE_PROTO_ID_ITEM (* 0x0003 *) + | TYPE_FIELD_ID_ITEM (* 0x0004 *) + | TYPE_METHOD_ID_ITEM (* 0x0005 *) + | TYPE_CLASS_DEF_ITEM (* 0x0006 *) + | TYPE_MAP_LIST (* 0x1000 *) + | TYPE_TYPE_LIST (* 0x1001 *) + | TYPE_ANNOTATION_SET_REF_LIST (* 0x1002 *) + | TYPE_ANNOTATION_SET_ITEM (* 0x1003 *) + | TYPE_CLASS_DATA_ITEM (* 0x2000 *) + | TYPE_CODE_ITEM (* 0x2001 *) + | TYPE_STRING_DATA_ITEM (* 0x2002 *) + | TYPE_DEBUG_INFO_ITEM (* 0x2003 *) + | TYPE_ANNOTATION_ITEM (* 0x2004 *) + | TYPE_ENCODED_ARRAY_ITEM (* 0x2005 *) + | TYPE_ANNOTATION_DIRECTORY_ITEM (* 0x2006 *) + +and class_data_item = { + mutable static_fields : encoded_field list; + mutable instance_fields : encoded_field list; + mutable direct_methods : encoded_method list; + mutable virtual_methods : encoded_method list; +} + +and encoded_field = { + field_idx : link; + f_access_flag : int; +} + +and encoded_method = { + method_idx : link; + mutable m_access_flag : int; + code_off : link; +} + +and code_item = { + mutable registers_size : int; + mutable ins_size : int; + mutable outs_size : int; + mutable tries_size : int; + mutable debug_info_off : link; + mutable insns_size : int; + insns : link DA.t; + (* unsigned short padding *) + mutable tries : try_item list; + mutable c_handlers : encoded_catch_handler list; +} + +and switch = { + mutable sw_base : link; + sw_size : int; + sw_keys : int list; + sw_targets : link list; +} + +and fill_array_data = { + ad_width : int; + ad_size : int; + ad_data : I.operand list; +} + +and try_item = { + start_addr : link; + end_addr : link; + handler_off : link; +} + +and encoded_catch_handler = { + e_handlers : type_addr_pair list; + catch_all_addr : link; +} + +and type_addr_pair = { + mutable ch_type_idx : link; + addr : link; +} + +and debug_info_item = { + line_start : int; + parameter_name : link list; + mutable state_machine : (state_machine_instr * I.operand list) list; +} + +and state_machine_instr = + | DBG_END_SEQUENCE (* 0x00 *) + | DBG_ADVANCE_PC (* 0x01 *) + | DBG_ADVANCE_LINE (* 0x02 *) + | DBG_START_LOCAL (* 0x03 *) + | DBG_START_LOCAL_EXTENDED (* 0x04 *) + | DBG_END_LOCAL (* 0x05 *) + | DBG_RESTART_LOCAL (* 0x06 *) + | DBG_SET_PROLOGUE_END (* 0x07 *) + | DBG_SET_EPILOGUE_BEGIN (* 0x08 *) + | DBG_SET_FILE (* 0x09 *) + | DBG_SPECIAL (* 0x0a..0xff *) + +and anno_dir_item = { + class_anno_off : link; + fields : anno_off list; + methods : anno_off list; + parameters : anno_off list; +} + +and anno_off = { + target : link; + annotation_off : link; +} + +and annotation_item = { + visible : visibility; + annotation : encoded_annotation; +} + +and visibility = + | VISIBILITY_BUILD (* 0x00 *) + | VISIBILITY_RUNTIME (* 0x01 *) + | VISIBILITY_SYSTEM (* 0x02 *) + +and encoded_annotation = { + mutable an_type_idx : link; + elements : annotation_element list; +} + +and annotation_element = { + name_idx : link; + mutable value : encoded_value; +} + +and encoded_value = + | VALUE_BYTE of int64 (* 0x00 *) + | VALUE_SHORT of int64 (* 0x02 *) + | VALUE_CHAR of int64 (* 0x03 *) + | VALUE_INT of int64 (* 0x04 *) + | VALUE_LONG of int64 (* 0x06 *) + | VALUE_FLOAT of int64 (* 0x10 *) + | VALUE_DOUBLE of int64 (* 0x11 *) + | VALUE_STRING of int (* 0x17 *) + | VALUE_TYPE of int (* 0x18 *) + | VALUE_FIELD of int (* 0x19 *) + | VALUE_METHOD of int (* 0x1a *) + | VALUE_ENUM of int (* 0x1b *) + | VALUE_ARRAY of encoded_value list (* 0x1c *) + | VALUE_ANNOTATION of encoded_annotation (* 0x1d *) + | VALUE_NULL (* 0x1e *) + | VALUE_BOOLEAN of bool (* 0x1f *) + +(***********************************************************************) +(* Utilities *) +(***********************************************************************) + +exception Wrong_link of string + +(* to_idx : int -> link *) +let to_idx (i: int) : link = Idx i + +(* to_off : int -> link *) +let to_off (i: int) : link = Off (Int32.of_int i) + +(* of_idx : link -> int *) +let of_idx (l: link) : int = + match l with + | Idx idx -> idx + | _ -> raise (Wrong_link "of_idx") + +(* of_off : link -> int *) +let of_off (l: link) : int = + match l with + | Off off -> Int32.to_int off + | _ -> raise (Wrong_link "of_off") + +module IdxKey = +struct + type t = link + let compare id1 id2 = Pervasives.compare (of_idx id1) (of_idx id2) +end + +module OffKey = +struct + type t = link + let compare o1 o2 = Pervasives.compare (of_off o1) (of_off o2) +end + +(* opr2idx : I.operand -> link *) +let opr2idx (opr: I.operand) : link = + match opr with + | I.OPR_INDEX i -> to_idx i + | _ -> raise (Wrong_match "opr2idx") + +(* opr2off : I.operand -> link *) +let opr2off (opr: I.operand) : link = + match opr with + | I.OPR_OFFSET i32 -> Off i32 + | _ -> raise (Wrong_match "opr2off") + +(* idx2opr : link -> I.operand *) +let idx2opr (l: link) : I.operand = + match l with + | Idx idx -> I.OPR_INDEX idx + | _ -> raise (Wrong_match "idx2opr") + +(* off2opr : link -> I.operand *) +let off2opr (l: link) : I.operand = + match l with + | Off off -> I.OPR_OFFSET off + | _ -> raise (Wrong_match "off2opr") + +(* get_off : link -> I.offset *) +let get_off (l: link) : I.offset = + match l with + | Off off -> off + | _ -> raise (Wrong_link "get_off") + +(* str_to_endian : string -> endian *) +let str_to_endian (str: string) : endian = + if (str = "0x12345678L") then LITTLE + else if (str = "0x78563412L") then BIG + else raise (Wrong_dex "str_to_endian") + +(* endian_to_str : endian -> string *) +let endian_to_str (e: endian) : string = + match e with + | LITTLE -> "0x12345678L" + | BIG -> "0x78563412L" + +(* to_type_code : int -> type_code *) +let to_type_code (i: int) : type_code = + match i with + | 0x0000 -> TYPE_HEADER_ITEM + | 0x0001 -> TYPE_STRING_ITEM + | 0x0002 -> TYPE_TYPE_ID_ITEM + | 0x0003 -> TYPE_PROTO_ID_ITEM + | 0x0004 -> TYPE_FIELD_ID_ITEM + | 0x0005 -> TYPE_METHOD_ID_ITEM + | 0x0006 -> TYPE_CLASS_DEF_ITEM + | 0x1000 -> TYPE_MAP_LIST + | 0x1001 -> TYPE_TYPE_LIST + | 0x1002 -> TYPE_ANNOTATION_SET_REF_LIST + | 0x1003 -> TYPE_ANNOTATION_SET_ITEM + | 0x2000 -> TYPE_CLASS_DATA_ITEM + | 0x2001 -> TYPE_CODE_ITEM + | 0x2002 -> TYPE_STRING_DATA_ITEM + | 0x2003 -> TYPE_DEBUG_INFO_ITEM + | 0x2004 -> TYPE_ANNOTATION_ITEM + | 0x2005 -> TYPE_ENCODED_ARRAY_ITEM + | 0x2006 -> TYPE_ANNOTATION_DIRECTORY_ITEM + | _ -> raise (Wrong_dex ("illegal type code: "^(string_of_int i))) + +(* of_type_code : type_code -> int *) +let of_type_code (t: type_code) : int = + match t with + | TYPE_HEADER_ITEM -> 0x0000 + | TYPE_STRING_ITEM -> 0x0001 + | TYPE_TYPE_ID_ITEM -> 0x0002 + | TYPE_PROTO_ID_ITEM -> 0x0003 + | TYPE_FIELD_ID_ITEM -> 0x0004 + | TYPE_METHOD_ID_ITEM -> 0x0005 + | TYPE_CLASS_DEF_ITEM -> 0x0006 + | TYPE_MAP_LIST -> 0x1000 + | TYPE_TYPE_LIST -> 0x1001 + | TYPE_ANNOTATION_SET_REF_LIST -> 0x1002 + | TYPE_ANNOTATION_SET_ITEM -> 0x1003 + | TYPE_CLASS_DATA_ITEM -> 0x2000 + | TYPE_CODE_ITEM -> 0x2001 + | TYPE_STRING_DATA_ITEM -> 0x2002 + | TYPE_DEBUG_INFO_ITEM -> 0x2003 + | TYPE_ANNOTATION_ITEM -> 0x2004 + | TYPE_ENCODED_ARRAY_ITEM -> 0x2005 + | TYPE_ANNOTATION_DIRECTORY_ITEM -> 0x2006 + +(* type_code_to_str : type_code -> string *) +let type_code_to_str (tc: type_code) : string = + match tc with + | TYPE_HEADER_ITEM -> "HDR_ITM" + | TYPE_STRING_ITEM -> "STR_ITM" + | TYPE_TYPE_ID_ITEM -> "TYP_ITM" + | TYPE_PROTO_ID_ITEM -> "PRT_ITM" + | TYPE_FIELD_ID_ITEM -> "FLD_ITM" + | TYPE_METHOD_ID_ITEM -> "MTD_ITM" + | TYPE_CLASS_DEF_ITEM -> "CLS_DEF" + | TYPE_MAP_LIST -> "MAP_LST" + | TYPE_TYPE_LIST -> "TYP_LST" + | TYPE_ANNOTATION_SET_REF_LIST -> "ANN_REF" + | TYPE_ANNOTATION_SET_ITEM -> "ANN_SET" + | TYPE_CLASS_DATA_ITEM -> "CLS_ITM" + | TYPE_CODE_ITEM -> "COD_ITM" + | TYPE_STRING_DATA_ITEM -> "STR_DAT" + | TYPE_DEBUG_INFO_ITEM -> "DBG_INF" + | TYPE_ANNOTATION_ITEM -> "ANN_ITM" + | TYPE_ENCODED_ARRAY_ITEM -> "ENC_ARR" + | TYPE_ANNOTATION_DIRECTORY_ITEM -> "ANN_DIR" + +(* machine_instr_to_str : state_machine_instr -> string *) +let machine_instr_to_str (op: state_machine_instr) : string = + match op with + | DBG_END_SEQUENCE -> "0x00" + | DBG_ADVANCE_PC -> "0x01" + | DBG_ADVANCE_LINE -> "0x02" + | DBG_START_LOCAL -> "0x03" + | DBG_START_LOCAL_EXTENDED -> "0x04" + | DBG_END_LOCAL -> "0x05" + | DBG_RESTART_LOCAL -> "0x06" + | DBG_SET_PROLOGUE_END -> "0x07" + | DBG_SET_EPILOGUE_BEGIN -> "0x08" + | DBG_SET_FILE -> "0x09" + | DBG_SPECIAL -> "0x??" + +(***********************************************************************) +(* Access flags *) +(***********************************************************************) + +type access_flag = + | ACC_PUBLIC + | ACC_PRIVATE + | ACC_PROTECTED + | ACC_STATIC + | ACC_FINAL + | ACC_SYNCHRONIZED + | ACC_VOLATILE + | ACC_BRIDGE + | ACC_TRANSIENT + | ACC_VARARGS + | ACC_NATIVE + | ACC_INTERFACE + | ACC_ABSTRACT + | ACC_STRICT + | ACC_SYNTHETIC + | ACC_ANNOTATION + | ACC_ENUM + | ACC_CONSTRUCTOR + | ACC_DECLARED_SYNCHRONIZED + +type acc_kind = + | ACC_FOR_CLASSES + | ACC_FOR_FIELDS + | ACC_FOR_METHODS + +(* to_acc_flag : acc_kind -> access_flag list -> int *) +let rec to_acc_flag (k: acc_kind) = function + | [] -> 0 + | h::t -> + ( + let v = + match k, h with + | _, ACC_PUBLIC -> 0x1 + | _, ACC_PRIVATE -> 0x2 + | _, ACC_PROTECTED -> 0x4 + | _, ACC_STATIC -> 0x8 + | _, ACC_FINAL -> 0x10 + | ACC_FOR_METHODS, ACC_SYNCHRONIZED -> 0x20 + | ACC_FOR_FIELDS , ACC_VOLATILE -> 0x40 + | ACC_FOR_METHODS, ACC_BRIDGE -> 0x40 + | ACC_FOR_FIELDS , ACC_TRANSIENT -> 0x80 + | ACC_FOR_METHODS, ACC_VARARGS -> 0x80 + | ACC_FOR_METHODS, ACC_NATIVE -> 0x100 + | ACC_FOR_CLASSES, ACC_INTERFACE -> 0x200 + | ACC_FOR_CLASSES, ACC_ABSTRACT + | ACC_FOR_METHODS, ACC_ABSTRACT -> 0x400 + | ACC_FOR_METHODS, ACC_STRICT -> 0x800 + | _, ACC_SYNTHETIC -> 0x1000 + | ACC_FOR_CLASSES, ACC_ANNOTATION -> 0x2000 + | ACC_FOR_CLASSES, ACC_ENUM + | ACC_FOR_FIELDS , ACC_ENUM -> 0x4000 + | ACC_FOR_METHODS, ACC_CONSTRUCTOR -> 0x10000 + | ACC_FOR_METHODS, ACC_DECLARED_SYNCHRONIZED -> 0x20000 + in + v lor (to_acc_flag k t) + ) + +(* chk_acc_flag : acc_kind -> access_flag list -> int -> bool *) +let chk_acc_flag (k: acc_kind) (flags: access_flag list) (flag: int) : bool = + 0 <> flag land (to_acc_flag k flags) + +(* is_static : int -> bool *) +let is_static (flag: int) : bool = + chk_acc_flag ACC_FOR_METHODS [ACC_STATIC] flag + +(* is_interface : int -> bool *) +let is_interface (flag: int) : bool = + chk_acc_flag ACC_FOR_CLASSES [ACC_INTERFACE] flag + +(* is_synthetic : int -> bool *) +let is_synthetic (flag: int) : bool = + chk_acc_flag ACC_FOR_METHODS [ACC_SYNTHETIC] flag + +(* pub : access_flag list *) +let pub = [ACC_PUBLIC] + +(* spub : access_flag list *) +let spub = ACC_STATIC :: pub + +(***********************************************************************) +(* DEX Navigation *) +(***********************************************************************) + +let no_index = -1 (* NO_INDEX = 0xffffffff (= -1 if signed int) *) +let no_offset = 0 + +let no_idx = to_idx no_index +let no_off = to_off no_offset + +(* get_data_item : dex -> link -> data_item *) +let get_data_item dx (l: link) : data_item = + let off = get_off l in + try IM.find off dx.d_data with Not_found -> + let hx = Printf.sprintf "0x%08X" (Int32.to_int off) in + raise (Wrong_dex ("get_data_item, no such offset: "^hx)) + +(* get_ins : dex -> link -> I.instr *) +let get_ins dx (l: link) : I.instr = + match get_data_item dx l with + | INSTRUCTION ins -> ins + | _ -> raise (Wrong_match "get_ins") + +(* is_ins : dex -> link -> bool *) +let is_ins dx (l: link) : bool = + try ignore (get_ins dx l); true with _ -> false + +(* get_str : dex -> link -> string *) +let get_str dx (sid: link) : string = + let ditm = get_data_item dx (DA.get dx.d_string_ids (of_idx sid)) in + match ditm with + | STRING_DATA str -> str + | _ -> raise (Wrong_match "get_str") + +(* find_str : dex -> string -> link *) +let find_str dx (str: string) : link = + let finder off : bool = + match get_data_item dx off with + | STRING_DATA str' when 0 = String.compare str' str -> true + | _ -> false + in + try to_idx (DA.index_of finder dx.d_string_ids) + with Not_found -> no_idx + +(* get_ty_str : dex -> link -> string *) +let get_ty_str dx (tid: link) : string = + if tid = no_idx then "" else + get_str dx (DA.get dx.d_type_ids (of_idx tid)) + +(* find_ty_str : dex -> string -> link *) +let find_ty_str dx (str: string) : link = + let finder sid : bool = + 0 = String.compare str (get_str dx sid) + in + try to_idx (DA.index_of finder dx.d_type_ids) + with Not_found -> no_idx + +let ty_comp_possibly_relaxed dx tid1 tid2 r = + let s1 = get_ty_str dx tid1 + and s2 = get_ty_str dx tid2 in + if r then + try + let c1 = J.get_class_name s1 + and c2 = J.get_class_name s2 + in String.compare c1 c2 + with _ -> String.compare s1 s2 + else + String.compare s1 s2 + +(* ty_comp : dex -> link -> link -> int *) +let ty_comp dx (tid1: link) (tid2: link) : int = + ty_comp_possibly_relaxed dx tid1 tid2 false + +(* get_ty_lst : dex -> link -> link list *) +let get_ty_lst dx (off: link) : link list = + if off = no_off then [] else + match get_data_item dx off with + | TYPE_LIST tl -> tl + | _ -> raise (Wrong_match "get_ty_lst") + +(* get_fit : dex -> link -> field_id_item *) +let get_fit dx (fid: link) : field_id_item = + DA.get dx.d_field_ids (of_idx fid) + +(* get_mit : dex -> link -> method_id_item *) +let get_mit dx (mid: link) : method_id_item = + DA.get dx.d_method_ids (of_idx mid) + +(* get_pit : dex -> method_id_item -> proto_id_item *) +let get_pit dx (mit: method_id_item) : proto_id_item = + DA.get dx.d_proto_ids (of_idx mit.m_proto_id) + +(* get_fty : dex -> field_id_item -> link *) +let get_fty dx (fit: field_id_item) : link = + fit.f_type_id + +(* get_argv : dex -> method_id_item -> link list *) +let get_argv dx (mit: method_id_item) : link list = + let pit = get_pit dx mit in + get_ty_lst dx pit.parameter_off + +(* get_rety : dex -> method_id_item -> link *) +let get_rety dx (mit: method_id_item) : link = + (get_pit dx mit).return_type + +(* fld_comp : dex -> field_id_item -> field_id_item -> int *) +let fld_comp dx fit1 fit2 : int = + let fname1 = get_str dx fit1.f_name_id + and fname2 = get_str dx fit2.f_name_id in + let c1 = compare fname1 fname2 in + if c1 <> 0 then c1 else ty_comp dx fit1.f_type_id fit2.f_type_id + +let rec ty_lst_comp_possibly_relaxed dx l1 l2 r = + match l1, l2 with + | [], [] -> 0 | [], _ -> -1 | _, [] -> 1 + | h1::t1, h2::t2 -> + let c = ty_comp_possibly_relaxed dx h1 h2 r in + if c <> 0 then c else ty_lst_comp_possibly_relaxed dx t1 t2 r + +(* ty_lst_comp : dex -> link list -> link list -> int *) +let ty_lst_comp dx (l1: link list) (l2: link list) : int = + ty_lst_comp_possibly_relaxed dx l1 l2 false + +(* ty_lst_comp_relaxed : dex -> link list -> link list -> int *) +let ty_lst_comp_relaxed dx (l1: link list) (l2: link list) : int = + ty_lst_comp_possibly_relaxed dx l1 l2 true + +let mtd_comp_possibly_relaxed dx mit1 mit2 r : int = + let mname1 = get_str dx mit1.m_name_id + and mname2 = get_str dx mit2.m_name_id in + let c1 = compare mname1 mname2 in + if c1 <> 0 then c1 else + let rety1 = get_rety dx mit1 + and rety2 = get_rety dx mit2 + and argv1 = get_argv dx mit1 + and argv2 = get_argv dx mit2 in + ty_lst_comp_possibly_relaxed dx (rety1::argv1) (rety2::argv2) r + +(* mtd_comp : dex -> method_id_item -> method_id_item -> int *) +let mtd_comp dx mit1 mit2 : int = + mtd_comp_possibly_relaxed dx mit1 mit2 false + +(* mtd_comp_relaxed : dex -> method_id_item -> method_id_item -> int *) +let mtd_comp_relaxed dx mit1 mit2 : int = + mtd_comp_possibly_relaxed dx mit1 mit2 true + +(* get_cid_from_fid : dex -> link -> link *) +let get_cid_from_fid dx (fid: link) : link = + (get_fit dx fid).f_class_id + +(* get_cid_from_mid : dex -> link -> link *) +let get_cid_from_mid dx (mid: link) : link = + (get_mit dx mid).m_class_id + +(* get_fld_name : dex -> link -> string *) +let get_fld_name dx (fid: link) : string = + get_str dx (get_fit dx fid).f_name_id + +(* get_mtd_name : dex -> link -> string *) +let get_mtd_name dx (mid: link) : string = + get_str dx (get_mit dx mid).m_name_id + +(* get_fld_full_name : dex -> link -> string *) +let get_fld_full_name dx (fid: link) : string = + let cid = get_cid_from_fid dx fid in + let cname = get_ty_str dx cid + and fname = get_fld_name dx fid in + cname^"."^fname + +(* get_mtd_full_name : dex -> link -> string *) +let get_mtd_full_name dx (mid: link) : string = + let cid = get_cid_from_mid dx mid in + let cname = get_ty_str dx cid + and mname = get_mtd_name dx mid in + cname^"->"^mname + +(* get_mtd_sig : dex -> link -> string *) +let get_mtd_sig dx (mid: link) : string = + let mname = get_mtd_full_name dx mid + and mit = get_mit dx mid in + let argv = List.map (get_ty_str dx) (get_argv dx mit) + and rety = get_ty_str dx (get_rety dx mit) in + mname^"("^(List.fold_left (^) "" argv)^")"^rety + +(* get_cid : dex -> string -> link *) +let get_cid dx (name: string) : link = + find_ty_str dx name + +(* get_cdef : dex -> link -> class_def_item *) +let get_cdef dx (cid: link) : class_def_item = + let finder (cdef: class_def_item) : bool = + cid = cdef.c_class_id + in + DA.get dx.d_class_defs (DA.index_of finder dx.d_class_defs) + +let get_classes dx (f: link -> bool) : link list = + let folder acc (cdef: class_def_item) = + let cid = cdef.c_class_id in + if f cid then cid :: acc else acc + in + DA.fold_left folder [] dx.d_class_defs + +(* get_interfaces : dex -> link -> link list *) +let get_interfaces dx (cid: link) : link list = + try get_ty_lst dx (get_cdef dx cid).interfaces + with Not_found -> [] + +(* get_implementers : dex -> link -> link list *) +let get_implementers dx (cid: link) : link list = + let f cid' = + List.mem cid (get_interfaces dx cid') + in + get_classes dx f + +(* get_superclass : dex -> link -> link *) +let get_superclass dx cid : link = + try (get_cdef dx cid).superclass + with Not_found -> no_idx + +(* get_superclasses : dex -> link -> link list *) +let get_superclasses dx (cid: link) : link list = + let rec h acc = function x -> + if x = no_idx then acc + else h (x::acc) (get_superclass dx x) + in h [] cid + +(* in_hierarchy : dex -> (link -> bool) -> link -> bool *) +let rec in_hierarchy dx (f: link -> bool) (cid: link) : bool = + if cid = no_idx then false else + if f cid then true else in_hierarchy dx f (get_superclass dx cid) + +(* is_superclass : dex -> link -> link -> bool *) +let is_superclass dx (cid: link) (sup: link) : bool = + in_hierarchy dx ((=) sup) cid + +(* is_innerclass : dex -> link -> link -> bool *) +let is_innerclass dx (cid: link) (inner: link) : bool = + let cname = get_ty_str dx cid + and inner_name = get_ty_str dx inner in + 0 = String.compare cname (J.get_owning_class inner_name) + +(* get_innerclasses : dex -> link -> link list *) +let get_innerclasses dx (cid: link) : link list = + let f cid' = + J.is_inner_class (get_ty_str dx cid') + && is_innerclass dx cid cid' + in + get_classes dx f + +(* get_owning_class : dex -> link -> link *) +let get_owning_class dx (cid: link) : link = + let cname = get_ty_str dx cid in + if not (J.is_inner_class cname) then no_idx else + get_cid dx (J.get_owning_class cname) + +(* get_flds : dex -> link -> (link * field_id_item) list *) +let get_flds dx (cid: link) : (link * field_id_item) list = + let folder (id, acc) (fit: field_id_item) = + let nid = id + 1 in + if fit.f_class_id = cid + then (nid, (to_idx id, fit)::acc) else (nid, acc) + in + snd (DA.fold_left folder (0,[]) dx.d_field_ids) + +(* get_fldS : dex -> link -> (link * field_id_item) list *) +let get_fldS dx (cid: link) : (link * field_id_item) list = + let rec collect_super cid' prv = + let flds = get_flds dx cid' in + let prv' = List.rev_append (L.rev flds) prv in + let sid = get_superclass dx cid' in + if sid <> no_idx then collect_super sid prv' else prv' + in + collect_super cid [] + +(* get_the_fld : dex -> link -> string -> link * field_id_item *) +let rec get_the_fld dx (cid: link) (fname: string) : link * field_id_item = + let finder (cid': link) (fit: field_id_item) : bool = + fit.f_class_id = cid' && get_str dx fit.f_name_id = fname + in + try + let fid = DA.index_of (finder cid) dx.d_field_ids in + to_idx fid, get_fit dx (to_idx fid) + with Not_found -> + ( + let sid = get_superclass dx cid in + if sid <> no_idx then get_the_fld dx sid fname + else raise (Wrong_dex ("get_the_fld: can't find the field: "^fname)) + ) + +(* own_the_fld : dex -> link -> link -> bool *) +let own_the_fld dx (cid: link) (fid: link) : bool = + get_cid_from_fid dx fid = cid + +(* get_mtds : dex -> link -> (link * method_id_item) list *) +let get_mtds dx (cid: link) : (link * method_id_item) list = + let folder (id, acc) (mit: method_id_item) = + let nid = id + 1 in + if 0 = ty_comp dx mit.m_class_id cid + then (nid, (to_idx id, mit)::acc) else (nid, acc) + in + snd (DA.fold_left folder (0,[]) dx.d_method_ids) + +(* get_mtdS : dex -> link -> (link * method_id_item) list *) +let get_mtdS dx (cid: link) : (link * method_id_item) list = + let rec collect_super cid' prv = + let mtds = get_mtds dx cid' in + let folder acc (id, mit) = + let m_finder (_, mit') : bool = + mtd_comp dx mit mit' = 0 + in (* to avoid redundancy *) + try ignore (List.find m_finder acc); acc + with Not_found -> (id, mit)::acc + in + let prv' = List.fold_left folder prv mtds in + let sid = get_superclass dx cid' in + if sid <> no_idx then collect_super sid prv' else prv' + in + collect_super cid [] + +(* get_supermethod: dex -> link -> link -> link *) +let rec get_supermethod dx (cid: link) (mid: link) : link = + let mit = get_mit dx mid in + let finder (_, mit') : bool = + mtd_comp dx mit mit' = 0 + in + let sid = get_superclass dx cid in + if sid = no_idx then no_idx else + try fst (List.find finder (get_mtds dx sid)) + with Not_found -> get_supermethod dx sid mid + +(* get_the_mtd_abstr : dex -> link -> (link -> method_id_item -> bool) + -> link * method_id_item *) +let rec get_the_mtd_abstr dx (cid: link) finder : link * method_id_item = + try + let mid = DA.index_of (finder cid) dx.d_method_ids in + to_idx mid, get_mit dx (to_idx mid) + with Not_found -> + ( + let sid = get_superclass dx cid in + if sid <> no_idx then get_the_mtd_abstr dx sid finder + else raise (Wrong_dex ("get_the_mtd: can't find the method")) + ) + +(* get_the_mtd : dex -> link -> string -> link * method_id_item *) +let get_the_mtd dx (cid: link) (mname: string) : link * method_id_item = + let finder (cid': link) (mit: method_id_item) : bool = + 0 = ty_comp dx mit.m_class_id cid' && + 0 = String.compare mname (get_str dx mit.m_name_id) + in + try + get_the_mtd_abstr dx cid finder + with (Wrong_dex _) -> + raise (Wrong_dex ("get_the_mtd: can't find method: "^mname)) + +(* get_the_mtd_shorty : dex -> link -> string -> string + -> link * method_id_item *) +let get_the_mtd_shorty dx cid mname shorty : link * method_id_item = + let finder (cid': link) (mit: method_id_item) : bool = + let pit = get_pit dx mit in + 0 = String.compare shorty (get_str dx pit.shorty) && + 0 = ty_comp dx mit.m_class_id cid' && + 0 = String.compare mname (get_str dx mit.m_name_id) + in + get_the_mtd_abstr dx cid finder + +(* own_the_mtd : dex -> link -> link -> bool *) +let own_the_mtd dx (cid: link) (mid: link) : bool = + get_cid_from_mid dx mid = cid + +(* get_cdata : dex -> link -> link * class_data_item *) +let get_cdata dx (cid: link) : link * class_data_item = + let off = (get_cdef dx cid).class_data in + match get_data_item dx off with + | CLASS_DATA cdat -> off, cdat + | _ -> raise (Wrong_match "get_cdata: not CLASS_DATA") + +(* get_stt_flds : dex -> link -> (link * encoded_value option) list *) +let get_stt_flds dx (cid: link) : (link * encoded_value option) list = + let cdef = get_cdef dx cid in + let _, cdat = get_cdata dx cid in + let evs = + if no_off = cdef.static_values then [] else + match get_data_item dx cdef.static_values with + | STATIC_VALUE evs -> evs + | _ -> raise (Wrong_match "get_stt_flds: not STATIC_VALUE") + in + let rec iter fids evs = + match fids, evs with + | fid::tl1, ev::tl2 -> (fid, Some ev) :: (iter tl1 tl2) + | fid::tl1, [] -> (fid, None) :: (iter tl1 []) + | [], [] -> [] + in + iter (List.map (fun efld -> efld.field_idx) cdat.static_fields) evs + +(* get_emtd : dex -> link -> link -> encoded_method *) +let get_emtd dx (cid: link) (mid: link) : encoded_method = + try + let emth_finder (emth: encoded_method) : bool = + mid = emth.method_idx + and _, cdat = get_cdata dx cid in + let mtds = cdat.direct_methods @ cdat.virtual_methods in + List.find emth_finder mtds + with Not_found -> + let mname = get_mtd_name dx mid in + raise (Wrong_dex ("get_emtd: not defined: "^mname)) + +(* get_citm : dex -> link -> link -> link * code_item *) +let get_citm dx (cid: link) (mid: link) : link * code_item = + let emth = get_emtd dx cid mid in + let off = emth.code_off in + match get_data_item dx off with + | CODE_ITEM citm -> off, citm + | _ -> raise (Wrong_match "get_citm: not CODE_ITEM") + +(* calc_this : code_item -> int *) +let calc_this (citm: code_item) : int = + citm.registers_size - citm.ins_size + +(* is_param : code_item -> int -> bool *) +let is_param (citm: code_item) (r: int) : bool = + let this = calc_this citm in + this <= r && r < citm.registers_size + +(***********************************************************************) +(* DEX modification helper *) +(***********************************************************************) + +(* empty_section : unit -> section *) +let empty_section () : section = + { + size = 0; + offset = no_off; + } + +(* empty_dex : unit -> dex *) +let empty_dex () : dex = + let hd = { + magic = "dex\n035"; + checksum = Int64.zero; + signature = []; + file_size = 0; + header_size = 0x70; + endian_tag = LITTLE; + link = empty_section (); + map_off = no_off; + h_string_ids = empty_section (); + h_type_ids = empty_section (); + h_proto_ids = empty_section (); + h_field_ids = empty_section (); + h_method_ids = empty_section (); + h_class_defs = empty_section (); + h_data = empty_section (); + } in + { + header = hd; + d_string_ids = DA.create (); + d_type_ids = DA.create (); + d_proto_ids = DA.create (); + d_field_ids = DA.create (); + d_method_ids = DA.create (); + d_class_defs = DA.create (); + d_data = IM.empty; + } + +(* empty_citm : unit -> code_item *) +let empty_citm () : code_item = + { + registers_size = 0; + ins_size = 0; + outs_size = 0; + tries_size = 0; + debug_info_off = no_off; + insns_size = 0; + insns = DA.create (); + tries = []; + c_handlers = []; + } + +(* insrt_data : dex -> link -> data_item -> unit *) +let insrt_data dx (off: link) (data: data_item) : unit = + dx.d_data <- IM.add (get_off off) data dx.d_data + +(* rm_data : dex -> link -> unit *) +let rm_data dx (off: link) : unit = + dx.d_data <- IM.remove (get_off off) dx.d_data + +(* insrt_ins : dex -> link -> I.instr -> unit *) +let insrt_ins dx (off: link) (ins: I.instr) : unit = + insrt_data dx off (INSTRUCTION ins) + +(* insrt_str : dex -> link -> string -> unit *) +let insrt_str dx (off: link) (str: string) : unit = + insrt_data dx off (STRING_DATA str) + +(* insrt_ty_lst : dex -> link -> link list -> unit *) +let insrt_ty_lst dx (off: link) (tl: link list) : unit = + insrt_data dx off (TYPE_LIST tl) + +(* insrt_stt : dex -> link -> encoded_value list -> unit *) +let insrt_stt dx (off: link) (evl : encoded_value list) : unit = + insrt_data dx off (STATIC_VALUE evl) + +(* insrt_citm : dex -> link -> code_item -> unit *) +let insrt_citm dx (off: link) (citm: code_item) : unit = + insrt_data dx off (CODE_ITEM citm) + diff --git a/plugins/dalvik/dalvik_dex.mli b/plugins/dalvik/dalvik_dex.mli new file mode 100644 index 0000000000..e14cdc0528 --- /dev/null +++ b/plugins/dalvik/dalvik_dex.mli @@ -0,0 +1,643 @@ +(* + * Copyright (c) 2010-2014, + * Jinseong Jeon + * Kris Micinski + * Jeff Foster + * All rights reserved. + * + * Based on the src/dex.mli from https://github.com/plum-umd/redexer + *) + +(* Format reference is https://source.android.com/devices/tech/dalvik/dex-format.html + *) + +(***********************************************************************) +(* Dex *) +(***********************************************************************) + +(** This module defines types for DEX binary and provides utility functions + for traversing DEX file and getting info from DEX file. *) + +(** {2 Types} *) + +(** raise if something is logically incorrect *) +exception Wrong_dex of string + +(** raise if there is no other cases for match block *) +exception Wrong_match of string + +(** raise when attempting to get last instruction of method that has + no return *) +exception No_return + +(** raise if something is not implemented yet *) +exception NOT_YET of string + +(** The top-level representation of a DEX binary file *) +type dex = { + header : dex_header; + d_string_ids : link DynArray.t; + d_type_ids : link DynArray.t; + d_proto_ids : proto_id_item DynArray.t; + d_field_ids : field_id_item DynArray.t; + d_method_ids : method_id_item DynArray.t; + d_class_defs : class_def_item DynArray.t; + mutable d_data : data_item Instr.IM.t; +} + +(** encapsulation of in/direct access *) +and link = + | Idx of int + | Off of Instr.offset + +(** header_item format *) +and dex_header = { + magic : string; + checksum : int64; + signature : char list; + mutable file_size : int; + header_size : int; + endian_tag : endian; + link : section; + map_off : link; + h_string_ids : section; + h_type_ids : section; + h_proto_ids : section; + h_field_ids : section; + h_method_ids : section; + h_class_defs : section; + h_data : section; +} + +(** [endian_tag] within {!dex_header} *) +and endian = + | LITTLE (** ENDIAN_CONSTANT = 0x12345678 *) + | BIG (** REVERSE_ENDIAN_CONSTANT = 0x78563412 *) + +(** a pair of size and offset, used at {!dex_header} *) +and section = { + size : int; + offset : link; +} + +(** {!proto_id_item} appears in the [d_proto_ids] *) +and proto_id_item = { + shorty : link; + mutable return_type : link; + parameter_off : link; +} + +(** {!field_id_item} appears in the [d_field_ids] *) +and field_id_item = { + f_class_id : link; + mutable f_type_id : link; + f_name_id : link; +} + +(** {!method_id_item} appears in the [d_method_ids] *) +and method_id_item = { + m_class_id : link; + m_proto_id : link; + m_name_id : link; +} + +(** {!class_def_item} appears in the [d_class_defs] *) +and class_def_item = { + c_class_id : link; + mutable c_access_flag : int; + mutable superclass : link; + mutable interfaces : link; + source_file : link; + annotations : link; + mutable class_data : link; + static_values : link; +} + +(** items in the data pool, which appears in the [d_data] *) +and data_item = + | MAP_LIST of map_item list + | TYPE_LIST of link list + | ANNO_SET_REF of link list (** [annotation_set_ref_list] *) + | ANNO_SET of link list (** [annotation_set_item] *) + | CLASS_DATA of class_data_item + | CODE_ITEM of code_item + | STRING_DATA of UTF8.t (** same as [string] *) + | DEBUG_INFO of debug_info_item + | ANNOTATION of annotation_item + | STATIC_VALUE of encoded_value list (** [encoded_array] *) + | ANNO_DIR of anno_dir_item + | INSTRUCTION of Instr.instr + | FILL_ARRAY of fill_array_data + | SWITCH of switch + +(** [map_item] format for [map_list], which appears in the [d_data] *) +and map_item = { + type_of_item : type_code; + (* unsigned short padding *) + mi_size : int; + mi_offset : link; +} + +(** type of the items, used at {!map_item} *) +and type_code = + | TYPE_HEADER_ITEM (** 0x0000 *) + | TYPE_STRING_ITEM (** 0x0001 *) + | TYPE_TYPE_ID_ITEM (** 0x0002 *) + | TYPE_PROTO_ID_ITEM (** 0x0003 *) + | TYPE_FIELD_ID_ITEM (** 0x0004 *) + | TYPE_METHOD_ID_ITEM (** 0x0005 *) + | TYPE_CLASS_DEF_ITEM (** 0x0006 *) + | TYPE_MAP_LIST (** 0x1000 *) + | TYPE_TYPE_LIST (** 0x1001 *) + | TYPE_ANNOTATION_SET_REF_LIST (** 0x1002 *) + | TYPE_ANNOTATION_SET_ITEM (** 0x1003 *) + | TYPE_CLASS_DATA_ITEM (** 0x2000 *) + | TYPE_CODE_ITEM (** 0x2001 *) + | TYPE_STRING_DATA_ITEM (** 0x2002 *) + | TYPE_DEBUG_INFO_ITEM (** 0x2003 *) + | TYPE_ANNOTATION_ITEM (** 0x2004 *) + | TYPE_ENCODED_ARRAY_ITEM (** 0x2005 *) + | TYPE_ANNOTATION_DIRECTORY_ITEM (** 0x2006 *) + +(** {!class_data_item} referenced from {!class_def_item} *) +and class_data_item = { + mutable static_fields : encoded_field list; + mutable instance_fields : encoded_field list; + mutable direct_methods : encoded_method list; + mutable virtual_methods : encoded_method list; +} + +(** {!encoded_field} format used at {!class_data_item} *) +and encoded_field = { + field_idx : link; + f_access_flag : int; +} + +(** {!encoded_method} format used at {!class_data_item} *) +and encoded_method = { + method_idx : link; + mutable m_access_flag : int; + code_off : link; +} + +(** {!code_item} referenced from {!encoded_method} *) +and code_item = { + mutable registers_size : int; + mutable ins_size : int; + mutable outs_size : int; + mutable tries_size : int; + mutable debug_info_off : link; + mutable insns_size : int; + insns : link DynArray.t; + (* unsigned short padding *) + mutable tries : try_item list; + mutable c_handlers : encoded_catch_handler list; +} + +(** [packed-switch] and [sparse-switch] format in [insns] of {!code_item} *) +and switch = { + mutable sw_base : link; + sw_size : int; + sw_keys : int list; + sw_targets : link list; +} + +(** [fill-array-data] format in [insns] of {!code_item} *) +and fill_array_data = { + ad_width : int; + ad_size : int; + ad_data : Instr.operand list; +} + +(** {!try_item} format referenced from {!code_item} *) +and try_item = { + start_addr : link; + end_addr : link; + handler_off : link; +} + +(** {!encoded_catch_handler} format referenced from {!code_item} *) +and encoded_catch_handler = { + e_handlers : type_addr_pair list; + catch_all_addr : link; +} + +(** [encoded_type_addr_pair] format referenced from {!encoded_catch_handler} *) +and type_addr_pair = { + mutable ch_type_idx : link; + addr : link; +} + +(** {!debug_info_item} referenced from {!code_item} *) +and debug_info_item = { + line_start : int; + parameter_name : link list; + mutable state_machine : (state_machine_instr * Instr.operand list) list; +} + +(** byte code values for [state_machine] inside {!debug_info_item} *) +and state_machine_instr = + | DBG_END_SEQUENCE (** 0x00 *) + | DBG_ADVANCE_PC (** 0x01 *) + | DBG_ADVANCE_LINE (** 0x02 *) + | DBG_START_LOCAL (** 0x03 *) + | DBG_START_LOCAL_EXTENDED (** 0x04 *) + | DBG_END_LOCAL (** 0x05 *) + | DBG_RESTART_LOCAL (** 0x06 *) + | DBG_SET_PROLOGUE_END (** 0x07 *) + | DBG_SET_EPILOGUE_BEGIN (** 0x08 *) + | DBG_SET_FILE (** 0x09 *) + | DBG_SPECIAL (** 0x0a..0xff *) + +(** [annotations_directory_item] referenced from {!class_def_item} *) +and anno_dir_item = { + class_anno_off : link; + fields : anno_off list; + methods : anno_off list; + parameters : anno_off list; +} + +(** [(field|method|parameter)_annotation] format used at {!anno_dir_item} *) +and anno_off = { + target : link; + annotation_off : link; +} + +(** {!annotation_item} referenced from [ANNO_SET] *) +and annotation_item = { + visible : visibility; + annotation : encoded_annotation; +} + +(** [Visibility] values *) +and visibility = + | VISIBILITY_BUILD (** 0x00 *) + | VISIBILITY_RUNTIME (** 0x01 *) + | VISIBILITY_SYSTEM (** 0x02 *) + +(** {!encoded_annotation} format referenced from {!encoded_value} *) +and encoded_annotation = { + mutable an_type_idx : link; + elements : annotation_element list; +} + +(** {!annotation_element} format referenced from {!encoded_annotation} *) +and annotation_element = { + name_idx : link; + mutable value : encoded_value; +} + +(** {!encoded_value} encoding + embedded in {!annotation_element} and [encoded_array] *) +and encoded_value = + | VALUE_BYTE of int64 (** 0x00 *) + | VALUE_SHORT of int64 (** 0x02 *) + | VALUE_CHAR of int64 (** 0x03 *) + | VALUE_INT of int64 (** 0x04 *) + | VALUE_LONG of int64 (** 0x06 *) + | VALUE_FLOAT of int64 (** 0x10 *) + | VALUE_DOUBLE of int64 (** 0x11 *) + | VALUE_STRING of int (** 0x17 *) + | VALUE_TYPE of int (** 0x18 *) + | VALUE_FIELD of int (** 0x19 *) + | VALUE_METHOD of int (** 0x1a *) + | VALUE_ENUM of int (** 0x1b *) + | VALUE_ARRAY of encoded_value list (** 0x1c *) + | VALUE_ANNOTATION of encoded_annotation (** 0x1d *) + | VALUE_NULL (** 0x1e *) + | VALUE_BOOLEAN of bool (** 0x1f *) + +(** {2 Utilities} *) + +(** wrapping with [Idx] *) +val to_idx : int -> link + +(** wrapping with [Off] *) +val to_off : int -> link + +(** unwrapping [Idx] *) +val of_idx : link -> int + +(** unwrapping [Off] *) +val of_off : link -> int + +module IdxKey : +sig + type t = link + val compare : t -> t -> int +end + +module OffKey : +sig + type t = link + val compare : t -> t -> int +end + +(** from [OPR_INDEX] to [Idx] *) +val opr2idx : Instr.operand -> link + +(** from [OPR_OFFSET] to [Off] *) +val opr2off : Instr.operand -> link + +(** from [Idx] to [OPR_INDEX] *) +val idx2opr : link -> Instr.operand + +(** from [Off] to [OPR_OFFSET] *) +val off2opr : link -> Instr.operand + +(** obtain 32-bits offset from [Off] *) +val get_off : link -> Instr.offset + +(** obtain {!endian} from [string] representation *) +val str_to_endian : string -> endian + +(** [string] representation of {!endian} *) +val endian_to_str : endian -> string + +(** convert [int] to corresponding {!type_code} *) +val to_type_code : int -> type_code + +(** get [int] value of given {!type_code} *) +val of_type_code : type_code -> int + +(** get [string] notation of given {!type_code} *) +val type_code_to_str : type_code -> string + +(** get [string] notation of given {!state_machine_instr} *) +val machine_instr_to_str : state_machine_instr -> string + +(** {2 Access flags} *) + +(** indicate the accessibility *) +type access_flag = + | ACC_PUBLIC (** 0x1, for all kinds *) + | ACC_PRIVATE (** 0x2, for all kinds *) + | ACC_PROTECTED (** 0x4, for all kinds *) + | ACC_STATIC (** 0x8, for all kinds *) + | ACC_FINAL (** 0x10, for all kinds *) + | ACC_SYNCHRONIZED (** 0x20, only for methods *) + | ACC_VOLATILE (** 0x40, only for fields *) + | ACC_BRIDGE (** 0x40, only for methods *) + | ACC_TRANSIENT (** 0x80, only for fields *) + | ACC_VARARGS (** 0x80, only for methods *) + | ACC_NATIVE (** 0x100, only for methods *) + | ACC_INTERFACE (** 0x200, only for classes *) + | ACC_ABSTRACT (** 0x400, except for fields *) + | ACC_STRICT (** 0x800, only for methods *) + | ACC_SYNTHETIC (** 0x1000, for all kinds *) + | ACC_ANNOTATION (** 0x2000, only for classes *) + | ACC_ENUM (** 0x4000, except for methods *) + | ACC_CONSTRUCTOR (** 0x10000, only for methods *) + | ACC_DECLARED_SYNCHRONIZED (** 0x20000, only for methods *) + +(** distinguish targets for {!access_flag} *) +type acc_kind = + | ACC_FOR_CLASSES + | ACC_FOR_FIELDS + | ACC_FOR_METHODS + +(** make [int] representation from bitfields of {!access_flag} *) +val to_acc_flag : acc_kind -> access_flag list -> int + +(** check certain flags are set *) +val chk_acc_flag : acc_kind -> access_flag list -> int -> bool + +(** [true] if [ACC_STATIC] is set *) +val is_static : int -> bool + +(** [true] if [ACC_INTERFACE] is set *) +val is_interface : int -> bool + +(** [true] if [ACC_SYNTHETIC] is set *) +val is_synthetic : int -> bool + +(** [ACC_FOR_PUBLIC] *) +val pub : access_flag list + +(** [ACC_STATIC] along with {!pub} *) +val spub : access_flag list + +(** {2 Navigation} *) + +(** 0xffffffff (= -1 if signed int) *) +val no_index : int + +(** 0x00000000 *) +val no_offset : int + +(** wrapping {!no_index} with [Idx] *) +val no_idx : link + +(** wrapping {!no_offset} with [Off] *) +val no_off : link + +(** get {!data_item} for given offset *) +val get_data_item : dex -> link -> data_item + +(** get {!Instr.instr} for given offset, + raise {!Wrong_match} unless [INSTRUCTION] *) +val get_ins : dex -> link -> Instr.instr + +(** [true] if the item for given offset is {!Instr.instr} *) +val is_ins : dex -> link -> bool + +(** get [string] for given string id, + raise {!Wrong_match} unless [STRING_DATA] *) +val get_str : dex -> link -> string + +(** find string id for given [string], + {!no_idx} unless found *) +val find_str : dex -> string -> link + +(** get type name for given type id *) +val get_ty_str : dex -> link -> string + +(** find type id for given [string], + {!no_idx} unless found *) +val find_ty_str : dex -> string -> link + +(** comparator for type ids *) +val ty_comp : dex -> link -> link -> int + +(** get [TYPE_LIST] for given offset, + raise {!Wrong_match} unless [TYPE_LIST] *) +val get_ty_lst : dex -> link -> link list + +(** get {!field_id_item} for given field id *) +val get_fit : dex -> link -> field_id_item + +(** get {!method_id_item} for given method id *) +val get_mit : dex -> link -> method_id_item + +(** get {!proto_id_item} for a given method. *) +val get_pit : dex -> method_id_item -> proto_id_item + +(** get type for given field *) +val get_fty : dex -> field_id_item -> link + +(** get a [list] of arguments for given method *) +val get_argv : dex -> method_id_item -> link list + +(** get return type for given method *) +val get_rety : dex -> method_id_item -> link + +(** comparator for field signatures: field name and type *) +val fld_comp : dex -> field_id_item -> field_id_item -> int + +(** comparator for a [list] of type ids *) +val ty_lst_comp : dex -> link list -> link list -> int + +(** comparator for a [list] of type ids, + but ignore the package name for types. *) +val ty_lst_comp_relaxed : dex -> link list -> link list -> int + +(** comparator for method signatures: method name, return type, and arguments *) +val mtd_comp : dex -> method_id_item -> method_id_item -> int + +(** comparator for method signatures: method name, return type, and arguments, + but ignore the package name for return types and arguments. *) +val mtd_comp_relaxed : dex -> method_id_item -> method_id_item -> int + +(** get class id from field id *) +val get_cid_from_fid : dex -> link -> link + +(** get class id from method id *) +val get_cid_from_mid : dex -> link -> link + +(** get name for given field *) +val get_fld_name : dex -> link -> string + +(** get name for given method *) +val get_mtd_name : dex -> link -> string + +(** get name for given field, along with class name *) +val get_fld_full_name : dex -> link -> string + +(** get name for given method, along with class name *) +val get_mtd_full_name : dex -> link -> string + +(** get method signature, e.g., [Lpkg/cls;->mtd(arg1;arg2;...)rety] *) +val get_mtd_sig : dex -> link -> string + +(** get class id from name, {!no_idx} unless found *) +val get_cid : dex -> string -> link + +(** get {!class_def_item} for given class id, + raise [Not_found] unless found *) +val get_cdef : dex -> link -> class_def_item + +(** get interfaces implemented by the given class *) +val get_interfaces : dex -> link -> link list + +(** get classes that implement the given interface *) +val get_implementers : dex -> link -> link list + +(** get super class id for given class, + {!no_idx} if it's at the top level *) +val get_superclass : dex -> link -> link + +(** get super classes for a given class *) +val get_superclasses : dex -> link -> link list + +(** check that some property (given as a function {!link} to [bool]) + holds in hierarchy starting from the given class *) +val in_hierarchy : dex -> (link -> bool) -> link -> bool + +(** check whether some class is a super class (up through the hierarchy) + of a given class *) +val is_superclass : dex -> link -> link -> bool + +(** check whether some class is an inner class of the given class *) +val is_innerclass : dex -> link -> link -> bool + +(** get inner classes for the given class *) +val get_innerclasses : dex -> link -> link list + +(** get owning class if the given class is an inner class *) +val get_owning_class : dex -> link -> link + +(** get all fields, along with ids, for given class *) +val get_flds : dex -> link -> (link * field_id_item) list + +(** get all fields, along with ids, for given class and superclasses *) +val get_fldS : dex -> link -> (link * field_id_item) list + +(** get the specific field of given class and given field name *) +val get_the_fld : dex -> link -> string -> link * field_id_item + +(** [true] if the class owns the field *) +val own_the_fld : dex -> link -> link -> bool + +(** get all methods, along with ids, for given class *) +val get_mtds : dex -> link -> (link * method_id_item) list + +(** get all methods, along with ids, for given class and superclasses *) +val get_mtdS : dex -> link -> (link * method_id_item) list + +(** get overriden method at the superclass for given class and method, + {!no_idx} unless overridable *) +val get_supermethod : dex -> link -> link -> link + +(** get the specific method of given class and given method name *) +val get_the_mtd : dex -> link -> string -> link * method_id_item + +(** get the specific method of given class, method name, + and shorty descriptor (useful for overloading) *) +val get_the_mtd_shorty : dex -> link -> string -> string -> link * method_id_item + +(** [true] if the class owns the method *) +val own_the_mtd : dex -> link -> link -> bool + +(** get {!class_data_item} for given class, + raise {!Wrong_match} unless [CLASS_DATA] *) +val get_cdata : dex -> link -> link * class_data_item + +(** get static fields for given class, along with initial values if exists *) +val get_stt_flds : dex -> link -> (link * encoded_value option) list + +(** get {!encoded_method} for given class and method, + raise {!Wrong_dex} if such method is not defined *) +val get_emtd : dex -> link -> link -> encoded_method + +(** get {!code_item} for given class and method, + raise {!Wrong_match} unless [CODE_ITEM] *) +val get_citm : dex -> link -> link -> link * code_item + +(** calculate a register number that holds [this] pointer *) +val calc_this : code_item -> int + +(** [true] if the given register is used as a parameter *) +val is_param : code_item -> int -> bool + +(** {2 Modification helper} *) + +(** empty {!section} *) +val empty_section : unit -> section + +(** empty {!dex} *) +val empty_dex : unit -> dex + +(** empty {!code_item} *) +val empty_citm : unit -> code_item + +(** insert {!data_item} into the data pool *) +val insrt_data : dex -> link -> data_item -> unit + +(** remove {!data_item} in the data pool *) +val rm_data : dex -> link -> unit + +(** insert {!Instr.instr} into the data pool *) +val insrt_ins : dex -> link -> Instr.instr -> unit + +(** insert [string] into the data pool *) +val insrt_str : dex -> link -> string -> unit + +(** insert [TYPE_LIST] into the data pool *) +val insrt_ty_lst : dex -> link -> link list -> unit + +(** insert [STATIC_VALUE] into the data pool *) +val insrt_stt : dex -> link -> encoded_value list -> unit + +(** insert {!code_item} into the data pool *) +val insrt_citm : dex -> link -> code_item -> unit + diff --git a/plugins/dalvik/dalvik_disasm.ml b/plugins/dalvik/dalvik_disasm.ml new file mode 100644 index 0000000000..d947568519 --- /dev/null +++ b/plugins/dalvik/dalvik_disasm.ml @@ -0,0 +1,1712 @@ +(* + * Copyright (c) 2010-2014, + * Jinseong Jeon + * Kris Micinski + * Jeff Foster + * All rights reserved. + * + * Based on the src/instr.ml from https://github.com/plum-umd/redexer + *) + +(* Instruction reference is based on https://source.android.com/devices/tech/dalvik/dalvik-bytecode + *) + +(* let's pull a little bit redexer *) + +type offset = int32 + +type instr = opcode * operand list + +and operand = + | OPR_CONST of int64 + (* TODO: Maybe isolate the OPR_REGPAIR type too? *) + | OPR_REGISTER of int + | OPR_INDEX of int + | OPR_OFFSET of offset + +and opcode = + | OP_NOP (* 0x00 *) + + | OP_MOVE (* 0x01 *) + | OP_MOVE_FROM16 (* 0x02 *) + | OP_MOVE_16 (* 0x03 *) + | OP_MOVE_WIDE (* 0x04 *) + | OP_MOVE_WIDE_FROM16 (* 0x05 *) + | OP_MOVE_WIDE_16 (* 0x06 *) + | OP_MOVE_OBJECT (* 0x07 *) + | OP_MOVE_OBJECT_FROM16 (* 0x08 *) + | OP_MOVE_OBJECT_16 (* 0x09 *) + + | OP_MOVE_RESULT (* 0x0a *) + | OP_MOVE_RESULT_WIDE (* 0x0b *) + | OP_MOVE_RESULT_OBJECT (* 0x0c *) + | OP_MOVE_EXCEPTION (* 0x0d *) + + | OP_RETURN_VOID (* 0x0e *) + | OP_RETURN (* 0x0f *) + | OP_RETURN_WIDE (* 0x10 *) + | OP_RETURN_OBJECT (* 0x11 *) + + | OP_CONST_4 (* 0x12 *) + | OP_CONST_16 (* 0x13 *) + | OP_CONST (* 0x14 *) + | OP_CONST_HIGH16 (* 0x15 *) + | OP_CONST_WIDE_16 (* 0x16 *) + | OP_CONST_WIDE_32 (* 0x17 *) + | OP_CONST_WIDE (* 0x18 *) + | OP_CONST_WIDE_HIGH16 (* 0x19 *) + | OP_CONST_STRING (* 0x1a *) + | OP_CONST_STRING_JUMBO (* 0x1b *) + | OP_CONST_CLASS (* 0x1c *) + + | OP_MONITOR_ENTER (* 0x1d *) + | OP_MONITOR_EXIT (* 0x1e *) + + | OP_CHECK_CAST (* 0x1f *) + | OP_INSTANCE_OF (* 0x20 *) + + | OP_ARRAY_LENGTH (* 0x21 *) + + | OP_NEW_INSTANCE (* 0x22 *) + | OP_NEW_ARRAY (* 0x23 *) + + | OP_FILLED_NEW_ARRAY (* 0x24 *) + | OP_FILLED_NEW_ARRAY_RANGE (* 0x25 *) + | OP_FILL_ARRAY_DATA (* 0x26 *) + + | OP_THROW (* 0x27 *) + | OP_GOTO (* 0x28 *) + | OP_GOTO_16 (* 0x29 *) + | OP_GOTO_32 (* 0x2a *) + | OP_PACKED_SWITCH (* 0x2b *) + | OP_SPARSE_SWITCH (* 0x2c *) + + | OP_CMPL_FLOAT (* 0x2d *) + | OP_CMPG_FLOAT (* 0x2e *) + | OP_CMPL_DOUBLE (* 0x2f *) + | OP_CMPG_DOUBLE (* 0x30 *) + | OP_CMP_LONG (* 0x31 *) + + | OP_IF_EQ (* 0x32 *) + | OP_IF_NE (* 0x33 *) + | OP_IF_LT (* 0x34 *) + | OP_IF_GE (* 0x35 *) + | OP_IF_GT (* 0x36 *) + | OP_IF_LE (* 0x37 *) + | OP_IF_EQZ (* 0x38 *) + | OP_IF_NEZ (* 0x39 *) + | OP_IF_LTZ (* 0x3a *) + | OP_IF_GEZ (* 0x3b *) + | OP_IF_GTZ (* 0x3c *) + | OP_IF_LEZ (* 0x3d *) + | OP_UNUSED_3E (* 0x3e *) + | OP_UNUSED_3F (* 0x3f *) + | OP_UNUSED_40 (* 0x40 *) + | OP_UNUSED_41 (* 0x41 *) + | OP_UNUSED_42 (* 0x42 *) + | OP_UNUSED_43 (* 0x43 *) + | OP_AGET (* 0x44 *) + | OP_AGET_WIDE (* 0x45 *) + | OP_AGET_OBJECT (* 0x46 *) + | OP_AGET_BOOLEAN (* 0x47 *) + | OP_AGET_BYTE (* 0x48 *) + | OP_AGET_CHAR (* 0x49 *) + | OP_AGET_SHORT (* 0x4a *) + | OP_APUT (* 0x4b *) + | OP_APUT_WIDE (* 0x4c *) + | OP_APUT_OBJECT (* 0x4d *) + | OP_APUT_BOOLEAN (* 0x4e *) + | OP_APUT_BYTE (* 0x4f *) + | OP_APUT_CHAR (* 0x50 *) + | OP_APUT_SHORT (* 0x51 *) + + | OP_IGET (* 0x52 *) + | OP_IGET_WIDE (* 0x53 *) + | OP_IGET_OBJECT (* 0x54 *) + | OP_IGET_BOOLEAN (* 0x55 *) + | OP_IGET_BYTE (* 0x56 *) + | OP_IGET_CHAR (* 0x57 *) + | OP_IGET_SHORT (* 0x58 *) + | OP_IPUT (* 0x59 *) + | OP_IPUT_WIDE (* 0x5a *) + | OP_IPUT_OBJECT (* 0x5b *) + | OP_IPUT_BOOLEAN (* 0x5c *) + | OP_IPUT_BYTE (* 0x5d *) + | OP_IPUT_CHAR (* 0x5e *) + | OP_IPUT_SHORT (* 0x5f *) + + | OP_SGET (* 0x60 *) + | OP_SGET_WIDE (* 0x61 *) + | OP_SGET_OBJECT (* 0x62 *) + | OP_SGET_BOOLEAN (* 0x63 *) + | OP_SGET_BYTE (* 0x64 *) + | OP_SGET_CHAR (* 0x65 *) + | OP_SGET_SHORT (* 0x66 *) + | OP_SPUT (* 0x67 *) + | OP_SPUT_WIDE (* 0x68 *) + | OP_SPUT_OBJECT (* 0x69 *) + | OP_SPUT_BOOLEAN (* 0x6a *) + | OP_SPUT_BYTE (* 0x6b *) + | OP_SPUT_CHAR (* 0x6c *) + | OP_SPUT_SHORT (* 0x6d *) + + | OP_INVOKE_VIRTUAL (* 0x6e *) + | OP_INVOKE_SUPER (* 0x6f *) + | OP_INVOKE_DIRECT (* 0x70 *) + | OP_INVOKE_STATIC (* 0x71 *) + | OP_INVOKE_INTERFACE (* 0x72 *) + | OP_UNUSED_73 (* 0x73 *) + | OP_INVOKE_VIRTUAL_RANGE (* 0x74 *) + | OP_INVOKE_SUPER_RANGE (* 0x75 *) + | OP_INVOKE_DIRECT_RANGE (* 0x76 *) + | OP_INVOKE_STATIC_RANGE (* 0x77 *) + | OP_INVOKE_INTERFACE_RANGE (* 0x78 *) + | OP_UNUSED_79 (* 0x79 *) + | OP_UNUSED_7A (* 0x7a *) + | OP_NEG_INT (* 0x7b *) + | OP_NOT_INT (* 0x7c *) + | OP_NEG_LONG (* 0x7d *) + | OP_NOT_LONG (* 0x7e *) + | OP_NEG_FLOAT (* 0x7f *) + | OP_NEG_DOUBLE (* 0x80 *) + | OP_INT_TO_LONG (* 0x81 *) + | OP_INT_TO_FLOAT (* 0x82 *) + | OP_INT_TO_DOUBLE (* 0x83 *) + | OP_LONG_TO_INT (* 0x84 *) + | OP_LONG_TO_FLOAT (* 0x85 *) + | OP_LONG_TO_DOUBLE (* 0x86 *) + | OP_FLOAT_TO_INT (* 0x87 *) + | OP_FLOAT_TO_LONG (* 0x88 *) + | OP_FLOAT_TO_DOUBLE (* 0x89 *) + | OP_DOUBLE_TO_INT (* 0x8a *) + | OP_DOUBLE_TO_LONG (* 0x8b *) + | OP_DOUBLE_TO_FLOAT (* 0x8c *) + | OP_INT_TO_BYTE (* 0x8d *) + | OP_INT_TO_CHAR (* 0x8e *) + | OP_INT_TO_SHORT (* 0x8f *) + + | OP_ADD_INT (* 0x90 *) + | OP_SUB_INT (* 0x91 *) + | OP_MUL_INT (* 0x92 *) + | OP_DIV_INT (* 0x93 *) + | OP_REM_INT (* 0x94 *) + | OP_AND_INT (* 0x95 *) + | OP_OR_INT (* 0x96 *) + | OP_XOR_INT (* 0x97 *) + | OP_SHL_INT (* 0x98 *) + | OP_SHR_INT (* 0x99 *) + | OP_USHR_INT (* 0x9a *) + + | OP_ADD_LONG (* 0x9b *) + | OP_SUB_LONG (* 0x9c *) + | OP_MUL_LONG (* 0x9d *) + | OP_DIV_LONG (* 0x9e *) + | OP_REM_LONG (* 0x9f *) + | OP_AND_LONG (* 0xa0 *) + | OP_OR_LONG (* 0xa1 *) + | OP_XOR_LONG (* 0xa2 *) + | OP_SHL_LONG (* 0xa3 *) + | OP_SHR_LONG (* 0xa4 *) + | OP_USHR_LONG (* 0xa5 *) + + | OP_ADD_FLOAT (* 0xa6 *) + | OP_SUB_FLOAT (* 0xa7 *) + | OP_MUL_FLOAT (* 0xa8 *) + | OP_DIV_FLOAT (* 0xa9 *) + | OP_REM_FLOAT (* 0xaa *) + | OP_ADD_DOUBLE (* 0xab *) + | OP_SUB_DOUBLE (* 0xac *) + | OP_MUL_DOUBLE (* 0xad *) + | OP_DIV_DOUBLE (* 0xae *) + | OP_REM_DOUBLE (* 0xaf *) + + | OP_ADD_INT_2ADDR (* 0xb0 *) + | OP_SUB_INT_2ADDR (* 0xb1 *) + | OP_MUL_INT_2ADDR (* 0xb2 *) + | OP_DIV_INT_2ADDR (* 0xb3 *) + | OP_REM_INT_2ADDR (* 0xb4 *) + | OP_AND_INT_2ADDR (* 0xb5 *) + | OP_OR_INT_2ADDR (* 0xb6 *) + | OP_XOR_INT_2ADDR (* 0xb7 *) + | OP_SHL_INT_2ADDR (* 0xb8 *) + | OP_SHR_INT_2ADDR (* 0xb9 *) + | OP_USHR_INT_2ADDR (* 0xba *) + + | OP_ADD_LONG_2ADDR (* 0xbb *) + | OP_SUB_LONG_2ADDR (* 0xbc *) + | OP_MUL_LONG_2ADDR (* 0xbd *) + | OP_DIV_LONG_2ADDR (* 0xbe *) + | OP_REM_LONG_2ADDR (* 0xbf *) + | OP_AND_LONG_2ADDR (* 0xc0 *) + | OP_OR_LONG_2ADDR (* 0xc1 *) + | OP_XOR_LONG_2ADDR (* 0xc2 *) + | OP_SHL_LONG_2ADDR (* 0xc3 *) + | OP_SHR_LONG_2ADDR (* 0xc4 *) + | OP_USHR_LONG_2ADDR (* 0xc5 *) + + | OP_ADD_FLOAT_2ADDR (* 0xc6 *) + | OP_SUB_FLOAT_2ADDR (* 0xc7 *) + | OP_MUL_FLOAT_2ADDR (* 0xc8 *) + | OP_DIV_FLOAT_2ADDR (* 0xc9 *) + | OP_REM_FLOAT_2ADDR (* 0xca *) + | OP_ADD_DOUBLE_2ADDR (* 0xcb *) + | OP_SUB_DOUBLE_2ADDR (* 0xcc *) + | OP_MUL_DOUBLE_2ADDR (* 0xcd *) + | OP_DIV_DOUBLE_2ADDR (* 0xce *) + | OP_REM_DOUBLE_2ADDR (* 0xcf *) + + | OP_ADD_INT_LIT16 (* 0xd0 *) + | OP_RSUB_INT (* 0xd1 *) (* no _LIT16 suffix for this *) + | OP_MUL_INT_LIT16 (* 0xd2 *) + | OP_DIV_INT_LIT16 (* 0xd3 *) + | OP_REM_INT_LIT16 (* 0xd4 *) + | OP_AND_INT_LIT16 (* 0xd5 *) + | OP_OR_INT_LIT16 (* 0xd6 *) + | OP_XOR_INT_LIT16 (* 0xd7 *) + + | OP_ADD_INT_LIT8 (* 0xd8 *) + | OP_RSUB_INT_LIT8 (* 0xd9 *) + | OP_MUL_INT_LIT8 (* 0xda *) + | OP_DIV_INT_LIT8 (* 0xdb *) + | OP_REM_INT_LIT8 (* 0xdc *) + | OP_AND_INT_LIT8 (* 0xdd *) + | OP_OR_INT_LIT8 (* 0xde *) + | OP_XOR_INT_LIT8 (* 0xdf *) + | OP_SHL_INT_LIT8 (* 0xe0 *) + | OP_SHR_INT_LIT8 (* 0xe1 *) + | OP_USHR_INT_LIT8 (* 0xe2 *) + | OP_UNUSED_E3 (* 0xe3 *) + | OP_UNUSED_E4 (* 0xe4 *) + | OP_UNUSED_E5 (* 0xe5 *) + | OP_UNUSED_E6 (* 0xe6 *) + | OP_UNUSED_E7 (* 0xe7 *) + | OP_UNUSED_E8 (* 0xe8 *) + | OP_UNUSED_E9 (* 0xe9 *) + | OP_UNUSED_EA (* 0xea *) + | OP_UNUSED_EB (* 0xeb *) + + (* + * The "breakpoint" instruction is special, in that it should never + * be seen by anything but the debug interpreter. During debugging + * it takes the place of an arbitrary opcode, which means operations + * like "tell me the opcode width so I can find the next instruction" + * aren't possible. (This is correctable, but probably not useful.) + *) + + | OP_BREAKPOINT (* 0xec *) + + (* optimizer output -- these are never generated by "dx" *) + + | OP_THROW_VERIFICATION_ERROR (* 0xed *) + | OP_EXECUTE_INLINE (* 0xee *) + | OP_EXECUTE_INLINE_RANGE (* 0xef *) + + | OP_INVOKE_DIRECT_EMPTY (* 0xf0 *) + | OP_UNUSED_F1 (* 0xf1 *) (* OP_INVOKE_DIRECT_EMPTY_RANGE? *) + | OP_IGET_QUICK (* 0xf2 *) + | OP_IGET_WIDE_QUICK (* 0xf3 *) + | OP_IGET_OBJECT_QUICK (* 0xf4 *) + | OP_IPUT_QUICK (* 0xf5 *) + | OP_IPUT_WIDE_QUICK (* 0xf6 *) + | OP_IPUT_OBJECT_QUICK (* 0xf7 *) + + | OP_INVOKE_VIRTUAL_QUICK (* 0xf8 *) + | OP_INVOKE_VIRTUAL_QUICK_RANGE (* 0xf9 *) + | OP_INVOKE_SUPER_QUICK (* 0xfa *) + | OP_INVOKE_SUPER_QUICK_RANGE (* 0xfb *) + | OP_UNUSED_FC (* 0xfc *) (* OP_INVOKE_DIRECT_QUICK? *) + | OP_UNUSED_FD (* 0xfd *) (* OP_INVOKE_DIRECT_QUICK_RANGE? *) + | OP_UNUSED_FE (* 0xfe *) (* OP_INVOKE_INTERFACE_QUICK? *) + | OP_UNUSED_FF (* 0xff *) (* OP_INVOKE_INTERFACE_QUICK_RANGE *) + +(* range : int -> int -> int list -> int list *) +let rec range (x: int) (y: int) res : int list = + if y < x then res else + let new_res = y :: res in + if x = y then new_res else range x (y-1) new_res + +(* rm_last : 'a list -> 'a list *) +let rm_last (l: 'a list) : 'a list = + List.rev (List.tl (List.rev l)) + +(* hx_to_op_and_size : int -> opcode * int *) +let hx_to_op_and_size (hx: int) : opcode * int = + match hx with + | 0x00 -> OP_NOP, 2 + + | 0x01 -> OP_MOVE, 2 + | 0x02 -> OP_MOVE_FROM16, 4 + | 0x03 -> OP_MOVE_16, 6 + | 0x04 -> OP_MOVE_WIDE, 2 + | 0x05 -> OP_MOVE_WIDE_FROM16, 4 + | 0x06 -> OP_MOVE_WIDE_16, 6 + | 0x07 -> OP_MOVE_OBJECT, 2 + | 0x08 -> OP_MOVE_OBJECT_FROM16, 4 + | 0x09 -> OP_MOVE_OBJECT_16, 6 + + | 0x0a -> OP_MOVE_RESULT, 2 + | 0x0b -> OP_MOVE_RESULT_WIDE, 2 + | 0x0c -> OP_MOVE_RESULT_OBJECT, 2 + | 0x0d -> OP_MOVE_EXCEPTION, 2 + + | 0x0e -> OP_RETURN_VOID, 2 + | 0x0f -> OP_RETURN, 2 + | 0x10 -> OP_RETURN_WIDE, 2 + | 0x11 -> OP_RETURN_OBJECT, 2 + + | 0x12 -> OP_CONST_4, 2 + | 0x13 -> OP_CONST_16, 4 + | 0x14 -> OP_CONST, 6 + | 0x15 -> OP_CONST_HIGH16, 4 + | 0x16 -> OP_CONST_WIDE_16, 4 + | 0x17 -> OP_CONST_WIDE_32, 6 + | 0x18 -> OP_CONST_WIDE, 10 + | 0x19 -> OP_CONST_WIDE_HIGH16, 4 + | 0x1a -> OP_CONST_STRING, 4 + | 0x1b -> OP_CONST_STRING_JUMBO, 6 + | 0x1c -> OP_CONST_CLASS, 4 + + | 0x1d -> OP_MONITOR_ENTER, 2 + | 0x1e -> OP_MONITOR_EXIT, 2 + + | 0x1f -> OP_CHECK_CAST, 4 + | 0x20 -> OP_INSTANCE_OF, 4 + + | 0x21 -> OP_ARRAY_LENGTH, 2 + + | 0x22 -> OP_NEW_INSTANCE, 4 + | 0x23 -> OP_NEW_ARRAY, 4 + + | 0x24 -> OP_FILLED_NEW_ARRAY, 6 + | 0x25 -> OP_FILLED_NEW_ARRAY_RANGE, 6 + | 0x26 -> OP_FILL_ARRAY_DATA, 6 + + | 0x27 -> OP_THROW, 2 + | 0x28 -> OP_GOTO, 2 + | 0x29 -> OP_GOTO_16, 4 + | 0x2a -> OP_GOTO_32, 6 + | 0x2b -> OP_PACKED_SWITCH, 6 + | 0x2c -> OP_SPARSE_SWITCH, 6 + + | 0x2d -> OP_CMPL_FLOAT, 4 + | 0x2e -> OP_CMPG_FLOAT, 4 + | 0x2f -> OP_CMPL_DOUBLE, 4 + | 0x30 -> OP_CMPG_DOUBLE, 4 + | 0x31 -> OP_CMP_LONG, 4 + + | 0x32 -> OP_IF_EQ, 4 + | 0x33 -> OP_IF_NE, 4 + | 0x34 -> OP_IF_LT, 4 + | 0x35 -> OP_IF_GE, 4 + | 0x36 -> OP_IF_GT, 4 + | 0x37 -> OP_IF_LE, 4 + | 0x38 -> OP_IF_EQZ, 4 + | 0x39 -> OP_IF_NEZ, 4 + | 0x3a -> OP_IF_LTZ, 4 + | 0x3b -> OP_IF_GEZ, 4 + | 0x3c -> OP_IF_GTZ, 4 + | 0x3d -> OP_IF_LEZ, 4 + + | 0x44 -> OP_AGET, 4 + | 0x45 -> OP_AGET_WIDE, 4 + | 0x46 -> OP_AGET_OBJECT, 4 + | 0x47 -> OP_AGET_BOOLEAN, 4 + | 0x48 -> OP_AGET_BYTE, 4 + | 0x49 -> OP_AGET_CHAR, 4 + | 0x4a -> OP_AGET_SHORT, 4 + | 0x4b -> OP_APUT, 4 + | 0x4c -> OP_APUT_WIDE, 4 + | 0x4d -> OP_APUT_OBJECT, 4 + | 0x4e -> OP_APUT_BOOLEAN, 4 + | 0x4f -> OP_APUT_BYTE, 4 + | 0x50 -> OP_APUT_CHAR, 4 + | 0x51 -> OP_APUT_SHORT, 4 + + | 0x52 -> OP_IGET, 4 + | 0x53 -> OP_IGET_WIDE, 4 + | 0x54 -> OP_IGET_OBJECT, 4 + | 0x55 -> OP_IGET_BOOLEAN, 4 + | 0x56 -> OP_IGET_BYTE, 4 + | 0x57 -> OP_IGET_CHAR, 4 + | 0x58 -> OP_IGET_SHORT, 4 + | 0x59 -> OP_IPUT, 4 + | 0x5a -> OP_IPUT_WIDE, 4 + | 0x5b -> OP_IPUT_OBJECT, 4 + | 0x5c -> OP_IPUT_BOOLEAN, 4 + | 0x5d -> OP_IPUT_BYTE, 4 + | 0x5e -> OP_IPUT_CHAR, 4 + | 0x5f -> OP_IPUT_SHORT, 4 + + | 0x60 -> OP_SGET, 4 + | 0x61 -> OP_SGET_WIDE, 4 + | 0x62 -> OP_SGET_OBJECT, 4 + | 0x63 -> OP_SGET_BOOLEAN, 4 + | 0x64 -> OP_SGET_BYTE, 4 + | 0x65 -> OP_SGET_CHAR, 4 + | 0x66 -> OP_SGET_SHORT, 4 + | 0x67 -> OP_SPUT, 4 + | 0x68 -> OP_SPUT_WIDE, 4 + | 0x69 -> OP_SPUT_OBJECT, 4 + | 0x6a -> OP_SPUT_BOOLEAN, 4 + | 0x6b -> OP_SPUT_BYTE, 4 + | 0x6c -> OP_SPUT_CHAR, 4 + | 0x6d -> OP_SPUT_SHORT, 4 + + | 0x6e -> OP_INVOKE_VIRTUAL, 6 + | 0x6f -> OP_INVOKE_SUPER, 6 + | 0x70 -> OP_INVOKE_DIRECT, 6 + | 0x71 -> OP_INVOKE_STATIC, 6 + | 0x72 -> OP_INVOKE_INTERFACE, 6 + + | 0x74 -> OP_INVOKE_VIRTUAL_RANGE, 6 + | 0x75 -> OP_INVOKE_SUPER_RANGE, 6 + | 0x76 -> OP_INVOKE_DIRECT_RANGE, 6 + | 0x77 -> OP_INVOKE_STATIC_RANGE, 6 + | 0x78 -> OP_INVOKE_INTERFACE_RANGE, 6 + + | 0x7b -> OP_NEG_INT, 2 + | 0x7c -> OP_NOT_INT, 2 + | 0x7d -> OP_NEG_LONG, 2 + | 0x7e -> OP_NOT_LONG, 2 + | 0x7f -> OP_NEG_FLOAT, 2 + | 0x80 -> OP_NEG_DOUBLE, 2 + | 0x81 -> OP_INT_TO_LONG, 2 + | 0x82 -> OP_INT_TO_FLOAT, 2 + | 0x83 -> OP_INT_TO_DOUBLE, 2 + | 0x84 -> OP_LONG_TO_INT, 2 + | 0x85 -> OP_LONG_TO_FLOAT, 2 + | 0x86 -> OP_LONG_TO_DOUBLE, 2 + | 0x87 -> OP_FLOAT_TO_INT, 2 + | 0x88 -> OP_FLOAT_TO_LONG, 2 + | 0x89 -> OP_FLOAT_TO_DOUBLE, 2 + | 0x8a -> OP_DOUBLE_TO_INT, 2 + | 0x8b -> OP_DOUBLE_TO_LONG, 2 + | 0x8c -> OP_DOUBLE_TO_FLOAT, 2 + | 0x8d -> OP_INT_TO_BYTE, 2 + | 0x8e -> OP_INT_TO_CHAR, 2 + | 0x8f -> OP_INT_TO_SHORT, 2 + + | 0x90 -> OP_ADD_INT, 4 + | 0x91 -> OP_SUB_INT, 4 + | 0x92 -> OP_MUL_INT, 4 + | 0x93 -> OP_DIV_INT, 4 + | 0x94 -> OP_REM_INT, 4 + | 0x95 -> OP_AND_INT, 4 + | 0x96 -> OP_OR_INT, 4 + | 0x97 -> OP_XOR_INT, 4 + | 0x98 -> OP_SHL_INT, 4 + | 0x99 -> OP_SHR_INT, 4 + | 0x9a -> OP_USHR_INT, 4 + + | 0x9b -> OP_ADD_LONG, 4 + | 0x9c -> OP_SUB_LONG, 4 + | 0x9d -> OP_MUL_LONG, 4 + | 0x9e -> OP_DIV_LONG, 4 + | 0x9f -> OP_REM_LONG, 4 + | 0xa0 -> OP_AND_LONG, 4 + | 0xa1 -> OP_OR_LONG, 4 + | 0xa2 -> OP_XOR_LONG, 4 + | 0xa3 -> OP_SHL_LONG, 4 + | 0xa4 -> OP_SHR_LONG, 4 + | 0xa5 -> OP_USHR_LONG, 4 + + | 0xa6 -> OP_ADD_FLOAT, 4 + | 0xa7 -> OP_SUB_FLOAT, 4 + | 0xa8 -> OP_MUL_FLOAT, 4 + | 0xa9 -> OP_DIV_FLOAT, 4 + | 0xaa -> OP_REM_FLOAT, 4 + | 0xab -> OP_ADD_DOUBLE, 4 + | 0xac -> OP_SUB_DOUBLE, 4 + | 0xad -> OP_MUL_DOUBLE, 4 + | 0xae -> OP_DIV_DOUBLE, 4 + | 0xaf -> OP_REM_DOUBLE, 4 + + | 0xb0 -> OP_ADD_INT_2ADDR, 2 + | 0xb1 -> OP_SUB_INT_2ADDR, 2 + | 0xb2 -> OP_MUL_INT_2ADDR, 2 + | 0xb3 -> OP_DIV_INT_2ADDR, 2 + | 0xb4 -> OP_REM_INT_2ADDR, 2 + | 0xb5 -> OP_AND_INT_2ADDR, 2 + | 0xb6 -> OP_OR_INT_2ADDR, 2 + | 0xb7 -> OP_XOR_INT_2ADDR, 2 + | 0xb8 -> OP_SHL_INT_2ADDR, 2 + | 0xb9 -> OP_SHR_INT_2ADDR, 2 + | 0xba -> OP_USHR_INT_2ADDR, 2 + + | 0xbb -> OP_ADD_LONG_2ADDR, 2 + | 0xbc -> OP_SUB_LONG_2ADDR, 2 + | 0xbd -> OP_MUL_LONG_2ADDR, 2 + | 0xbe -> OP_DIV_LONG_2ADDR, 2 + | 0xbf -> OP_REM_LONG_2ADDR, 2 + | 0xc0 -> OP_AND_LONG_2ADDR, 2 + | 0xc1 -> OP_OR_LONG_2ADDR, 2 + | 0xc2 -> OP_XOR_LONG_2ADDR, 2 + | 0xc3 -> OP_SHL_LONG_2ADDR, 2 + | 0xc4 -> OP_SHR_LONG_2ADDR, 2 + | 0xc5 -> OP_USHR_LONG_2ADDR, 2 + + | 0xc6 -> OP_ADD_FLOAT_2ADDR, 2 + | 0xc7 -> OP_SUB_FLOAT_2ADDR, 2 + | 0xc8 -> OP_MUL_FLOAT_2ADDR, 2 + | 0xc9 -> OP_DIV_FLOAT_2ADDR, 2 + | 0xca -> OP_REM_FLOAT_2ADDR, 2 + | 0xcb -> OP_ADD_DOUBLE_2ADDR, 2 + | 0xcc -> OP_SUB_DOUBLE_2ADDR, 2 + | 0xcd -> OP_MUL_DOUBLE_2ADDR, 2 + | 0xce -> OP_DIV_DOUBLE_2ADDR, 2 + | 0xcf -> OP_REM_DOUBLE_2ADDR, 2 + + | 0xd0 -> OP_ADD_INT_LIT16, 4 + | 0xd1 -> OP_RSUB_INT, 4 + | 0xd2 -> OP_MUL_INT_LIT16, 4 + | 0xd3 -> OP_DIV_INT_LIT16, 4 + | 0xd4 -> OP_REM_INT_LIT16, 4 + | 0xd5 -> OP_AND_INT_LIT16, 4 + | 0xd6 -> OP_OR_INT_LIT16, 4 + | 0xd7 -> OP_XOR_INT_LIT16, 4 + + | 0xd8 -> OP_ADD_INT_LIT8, 4 + | 0xd9 -> OP_RSUB_INT_LIT8, 4 + | 0xda -> OP_MUL_INT_LIT8, 4 + | 0xdb -> OP_DIV_INT_LIT8, 4 + | 0xdc -> OP_REM_INT_LIT8, 4 + | 0xdd -> OP_AND_INT_LIT8, 4 + | 0xde -> OP_OR_INT_LIT8, 4 + | 0xdf -> OP_XOR_INT_LIT8, 4 + | 0xe0 -> OP_SHL_INT_LIT8, 4 + | 0xe1 -> OP_SHR_INT_LIT8, 4 + | 0xe2 -> OP_USHR_INT_LIT8, 4 + + | _ -> OP_NOP, 0 + +(* hx_to_op : int -> opcode *) +let hx_to_op (hx: int) : opcode = + fst (hx_to_op_and_size hx) + +(***********************************************************************) +(* Utilities *) +(***********************************************************************) + +let to_i32 = Int32.of_int +let of_i32 = Int32.to_int + +let to_i64 = Int64.of_int +let of_i64 = Int64.to_int + +let to_con c = OPR_CONST c +let to_reg r = OPR_REGISTER r +let to_idx i = OPR_INDEX i +let to_off f = OPR_OFFSET f + +(* of_reg : operand -> int *) +let of_reg = function OPR_REGISTER r -> r + +(* instr_to_string : instr -> string *) +let rec instr_to_string (op, opr) = + let buf = Buffer.create 80 in + Buffer.add_string buf (op_to_string op); + Buffer.add_char buf ' '; + let per_opr opr' = + Buffer.add_string buf (opr_to_string opr'); + Buffer.add_char buf ' ' + in + List.iter per_opr opr; + Buffer.contents buf + +(* opr_to_string : operand -> string *) +and opr_to_string = function + | OPR_CONST cs -> Int64.to_string cs + | OPR_REGISTER rg -> "v"^(string_of_int rg) + | OPR_INDEX ix -> Printf.sprintf "@%X" ix (* dexdump format *) + | OPR_OFFSET off -> Printf.sprintf "0x%08X" (of_i32 off) + +(* op_to_string : opcode -> string *) +and op_to_string = function + | OP_NOP -> "nop" + + | OP_MOVE -> "move" + | OP_MOVE_FROM16 -> "move/from16" + | OP_MOVE_16 -> "move/16" + | OP_MOVE_WIDE -> "move-wide" + | OP_MOVE_WIDE_FROM16 -> "move-wide/from16" + | OP_MOVE_WIDE_16 -> "move-wide/16" + | OP_MOVE_OBJECT -> "move-object" + | OP_MOVE_OBJECT_FROM16 -> "move-object/from16" + | OP_MOVE_OBJECT_16 -> "move-object/16" + + | OP_MOVE_RESULT -> "move-result" + | OP_MOVE_RESULT_WIDE -> "move-result-wide" + | OP_MOVE_RESULT_OBJECT -> "move-result-object" + | OP_MOVE_EXCEPTION -> "move-exception" + + | OP_RETURN_VOID -> "return-void" + | OP_RETURN -> "return" + | OP_RETURN_WIDE -> "return-wide" + | OP_RETURN_OBJECT -> "return-object" + + | OP_CONST_4 -> "const/4" + | OP_CONST_16 -> "const/16" + | OP_CONST -> "const" + | OP_CONST_HIGH16 -> "const/high16" + | OP_CONST_WIDE_16 -> "const-wide/16" + | OP_CONST_WIDE_32 -> "const-wide/32" + | OP_CONST_WIDE -> "const-wide" + | OP_CONST_WIDE_HIGH16 -> "const-wide/high16" + | OP_CONST_STRING -> "const-string" + | OP_CONST_STRING_JUMBO -> "const-string/jumbo" + | OP_CONST_CLASS -> "const-class" + + | OP_MONITOR_ENTER -> "monitor-enter" + | OP_MONITOR_EXIT -> "monitor-exit" + + | OP_CHECK_CAST -> "check-cast" + | OP_INSTANCE_OF -> "instance-of" + + | OP_ARRAY_LENGTH -> "array-length" + + | OP_NEW_INSTANCE -> "new-instance" + | OP_NEW_ARRAY -> "new-array" + + | OP_FILLED_NEW_ARRAY -> "filled-new-array" + | OP_FILLED_NEW_ARRAY_RANGE -> "filled-new-array/range" + | OP_FILL_ARRAY_DATA -> "fill-array-data" + + | OP_THROW -> "throw" + | OP_GOTO -> "goto" + | OP_GOTO_16 -> "goto/16" + | OP_GOTO_32 -> "goto/32" + | OP_PACKED_SWITCH -> "packed-switch" + | OP_SPARSE_SWITCH -> "sparse-switch" + + | OP_CMPL_FLOAT -> "cmpl-float" + | OP_CMPG_FLOAT -> "cmpg-float" + | OP_CMPL_DOUBLE -> "cmpl-double" + | OP_CMPG_DOUBLE -> "cmpg-double" + | OP_CMP_LONG -> "cmp-long" + + | OP_IF_EQ -> "if-eq" + | OP_IF_NE -> "if-ne" + | OP_IF_LT -> "if-lt" + | OP_IF_GE -> "if-ge" + | OP_IF_GT -> "if-gt" + | OP_IF_LE -> "if-le" + | OP_IF_EQZ -> "if-eqz" + | OP_IF_NEZ -> "if-nez" + | OP_IF_LTZ -> "if-ltz" + | OP_IF_GEZ -> "if-gez" + | OP_IF_GTZ -> "if-gtz" + | OP_IF_LEZ -> "if-lez" + + | OP_AGET -> "aget" + | OP_AGET_WIDE -> "aget-wide" + | OP_AGET_OBJECT -> "aget-object" + | OP_AGET_BOOLEAN -> "aget-boolean" + | OP_AGET_BYTE -> "aget-byte" + | OP_AGET_CHAR -> "aget-char" + | OP_AGET_SHORT -> "aget-short" + | OP_APUT -> "aput" + | OP_APUT_WIDE -> "aput-wide" + | OP_APUT_OBJECT -> "aput-object" + | OP_APUT_BOOLEAN -> "aput-boolean" + | OP_APUT_BYTE -> "aput-byte" + | OP_APUT_CHAR -> "aput-char" + | OP_APUT_SHORT -> "aput-short" + + | OP_IGET -> "iget" + | OP_IGET_WIDE -> "iget-wide" + | OP_IGET_OBJECT -> "iget-object" + | OP_IGET_BOOLEAN -> "iget-boolean" + | OP_IGET_BYTE -> "iget-byte" + | OP_IGET_CHAR -> "iget-char" + | OP_IGET_SHORT -> "iget-short" + | OP_IPUT -> "iput" + | OP_IPUT_WIDE -> "iput-wide" + | OP_IPUT_OBJECT -> "iput-object" + | OP_IPUT_BOOLEAN -> "iput-boolean" + | OP_IPUT_BYTE -> "iput-byte" + | OP_IPUT_CHAR -> "iput-char" + | OP_IPUT_SHORT -> "iput-short" + + | OP_SGET -> "sget" + | OP_SGET_WIDE -> "sget-wide" + | OP_SGET_OBJECT -> "sget-object" + | OP_SGET_BOOLEAN -> "sget-boolean" + | OP_SGET_BYTE -> "sget-byte" + | OP_SGET_CHAR -> "sget-char" + | OP_SGET_SHORT -> "sget-short" + | OP_SPUT -> "sput" + | OP_SPUT_WIDE -> "sput-wide" + | OP_SPUT_OBJECT -> "sput-object" + | OP_SPUT_BOOLEAN -> "sput-boolean" + | OP_SPUT_BYTE -> "sput-byte" + | OP_SPUT_CHAR -> "sput-char" + | OP_SPUT_SHORT -> "sput-short" + + | OP_INVOKE_VIRTUAL -> "invoke-virtual" + | OP_INVOKE_SUPER -> "invoke-super" + | OP_INVOKE_DIRECT -> "invoke-direct" + | OP_INVOKE_STATIC -> "invoke-static" + | OP_INVOKE_INTERFACE -> "invoke-interface" + + | OP_INVOKE_VIRTUAL_RANGE -> "invoke-virtual/range" + | OP_INVOKE_SUPER_RANGE -> "invoke-super/range" + | OP_INVOKE_DIRECT_RANGE -> "invoke-direct/range" + | OP_INVOKE_STATIC_RANGE -> "invoke-static/range" + | OP_INVOKE_INTERFACE_RANGE -> "invoke-interface/range" + + | OP_NEG_INT -> "neg-int" + | OP_NOT_INT -> "not-int" + | OP_NEG_LONG -> "neg-long" + | OP_NOT_LONG -> "not-long" + | OP_NEG_FLOAT -> "neg-float" + | OP_NEG_DOUBLE -> "neg-double" + | OP_INT_TO_LONG -> "int-to-long" + | OP_INT_TO_FLOAT -> "int-to-float" + | OP_INT_TO_DOUBLE -> "int-to-double" + | OP_LONG_TO_INT -> "long-to-int" + | OP_LONG_TO_FLOAT -> "long-to-float" + | OP_LONG_TO_DOUBLE -> "long-to-double" + | OP_FLOAT_TO_INT -> "float-to-int" + | OP_FLOAT_TO_LONG -> "float-to-long" + | OP_FLOAT_TO_DOUBLE -> "float-to-double" + | OP_DOUBLE_TO_INT -> "double-to-int" + | OP_DOUBLE_TO_LONG -> "double-to-long" + | OP_DOUBLE_TO_FLOAT -> "double-to-float" + | OP_INT_TO_BYTE -> "int-to-byte" + | OP_INT_TO_CHAR -> "int-to-char" + | OP_INT_TO_SHORT -> "int-to-short" + + | OP_ADD_INT -> "add-int" + | OP_SUB_INT -> "sub-int" + | OP_MUL_INT -> "mul-int" + | OP_DIV_INT -> "div-int" + | OP_REM_INT -> "rem-int" + | OP_AND_INT -> "and-int" + | OP_OR_INT -> "or-int" + | OP_XOR_INT -> "xor-int" + | OP_SHL_INT -> "shl-int" + | OP_SHR_INT -> "shr-int" + | OP_USHR_INT -> "ushr-int" + + | OP_ADD_LONG -> "add-long" + | OP_SUB_LONG -> "sub-long" + | OP_MUL_LONG -> "mul-long" + | OP_DIV_LONG -> "div-long" + | OP_REM_LONG -> "rem-long" + | OP_AND_LONG -> "and-long" + | OP_OR_LONG -> "or-long" + | OP_XOR_LONG -> "xor-long" + | OP_SHL_LONG -> "shl-long" + | OP_SHR_LONG -> "shr-long" + | OP_USHR_LONG -> "ushr-long" + + | OP_ADD_FLOAT -> "add-float" + | OP_SUB_FLOAT -> "sub-float" + | OP_MUL_FLOAT -> "mul-float" + | OP_DIV_FLOAT -> "div-float" + | OP_REM_FLOAT -> "rem-float" + | OP_ADD_DOUBLE -> "add-double" + | OP_SUB_DOUBLE -> "sub-double" + | OP_MUL_DOUBLE -> "mul-double" + | OP_DIV_DOUBLE -> "div-double" + | OP_REM_DOUBLE -> "rem-double" + + | OP_ADD_INT_2ADDR -> "add-int/2addr" + | OP_SUB_INT_2ADDR -> "sub-int/2addr" + | OP_MUL_INT_2ADDR -> "mul-int/2addr" + | OP_DIV_INT_2ADDR -> "div-int/2addr" + | OP_REM_INT_2ADDR -> "rem-int/2addr" + | OP_AND_INT_2ADDR -> "and-int/2addr" + | OP_OR_INT_2ADDR -> "or-int/2addr" + | OP_XOR_INT_2ADDR -> "xor-int/2addr" + | OP_SHL_INT_2ADDR -> "shl-int/2addr" + | OP_SHR_INT_2ADDR -> "shr-int/2addr" + | OP_USHR_INT_2ADDR -> "ushr-int/2addr" + + | OP_ADD_LONG_2ADDR -> "add-long/2addr" + | OP_SUB_LONG_2ADDR -> "sub-long/2addr" + | OP_MUL_LONG_2ADDR -> "mul-long/2addr" + | OP_DIV_LONG_2ADDR -> "div-long/2addr" + | OP_REM_LONG_2ADDR -> "rem-long/2addr" + | OP_AND_LONG_2ADDR -> "and-long/2addr" + | OP_OR_LONG_2ADDR -> "or-long/2addr" + | OP_XOR_LONG_2ADDR -> "xor-long/2addr" + | OP_SHL_LONG_2ADDR -> "shl-long/2addr" + | OP_SHR_LONG_2ADDR -> "shr-long/2addr" + | OP_USHR_LONG_2ADDR -> "ushr-long/2addr" + + | OP_ADD_FLOAT_2ADDR -> "add-float/2addr" + | OP_SUB_FLOAT_2ADDR -> "sub-float/2addr" + | OP_MUL_FLOAT_2ADDR -> "mul-float/2addr" + | OP_DIV_FLOAT_2ADDR -> "div-float/2addr" + | OP_REM_FLOAT_2ADDR -> "rem-float/2addr" + | OP_ADD_DOUBLE_2ADDR -> "add-double/2addr" + | OP_SUB_DOUBLE_2ADDR -> "sub-double/2addr" + | OP_MUL_DOUBLE_2ADDR -> "mul-double/2addr" + | OP_DIV_DOUBLE_2ADDR -> "div-double/2addr" + | OP_REM_DOUBLE_2ADDR -> "rem-double/2addr" + + | OP_ADD_INT_LIT16 -> "add-int/lit16" + | OP_RSUB_INT -> "rsub-int" + | OP_MUL_INT_LIT16 -> "mul-int/lit16" + | OP_DIV_INT_LIT16 -> "div-int/lit16" + | OP_REM_INT_LIT16 -> "rem-int/lit16" + | OP_AND_INT_LIT16 -> "and-int/lit16" + | OP_OR_INT_LIT16 -> "or-int/lit16" + | OP_XOR_INT_LIT16 -> "xor-int/lit16" + + | OP_ADD_INT_LIT8 -> "add-int/lit8" + | OP_RSUB_INT_LIT8 -> "rsub-int/lit8" + | OP_MUL_INT_LIT8 -> "mul-int/lit8" + | OP_DIV_INT_LIT8 -> "div-int/lit8" + | OP_REM_INT_LIT8 -> "rem-int/lit8" + | OP_AND_INT_LIT8 -> "and-int/lit8" + | OP_OR_INT_LIT8 -> "or-int/lit8" + | OP_XOR_INT_LIT8 -> "xor-int/lit8" + | OP_SHL_INT_LIT8 -> "shl-int/lit8" + | OP_SHR_INT_LIT8 -> "shr-int/lit8" + | OP_USHR_INT_LIT8 -> "ushr-int/lit8" + | _ -> "unused" + +(* op_to_hx_and_size : opcode -> int * int *) +let op_to_hx_and_size (op: opcode) : int * int = + match op with + | OP_NOP -> 0x00, 2 + + | OP_MOVE -> 0x01, 2 + | OP_MOVE_FROM16 -> 0x02, 4 + | OP_MOVE_16 -> 0x03, 6 + | OP_MOVE_WIDE -> 0x04, 2 + | OP_MOVE_WIDE_FROM16 -> 0x05, 4 + | OP_MOVE_WIDE_16 -> 0x06, 6 + | OP_MOVE_OBJECT -> 0x07, 2 + | OP_MOVE_OBJECT_FROM16 -> 0x08, 4 + | OP_MOVE_OBJECT_16 -> 0x09, 6 + + | OP_MOVE_RESULT -> 0x0a, 2 + | OP_MOVE_RESULT_WIDE -> 0x0b, 2 + | OP_MOVE_RESULT_OBJECT -> 0x0c, 2 + | OP_MOVE_EXCEPTION -> 0x0d, 2 + + | OP_RETURN_VOID -> 0x0e, 2 + | OP_RETURN -> 0x0f, 2 + | OP_RETURN_WIDE -> 0x10, 2 + | OP_RETURN_OBJECT -> 0x11, 2 + + | OP_CONST_4 -> 0x12, 2 + | OP_CONST_16 -> 0x13, 4 + | OP_CONST -> 0x14, 6 + | OP_CONST_HIGH16 -> 0x15, 4 + | OP_CONST_WIDE_16 -> 0x16, 4 + | OP_CONST_WIDE_32 -> 0x17, 6 + | OP_CONST_WIDE -> 0x18, 10 + | OP_CONST_WIDE_HIGH16 -> 0x19, 4 + | OP_CONST_STRING -> 0x1a, 4 + | OP_CONST_STRING_JUMBO -> 0x1b, 6 + | OP_CONST_CLASS -> 0x1c, 4 + + | OP_MONITOR_ENTER -> 0x1d, 2 + | OP_MONITOR_EXIT -> 0x1e, 2 + + | OP_CHECK_CAST -> 0x1f, 4 + | OP_INSTANCE_OF -> 0x20, 4 + + | OP_ARRAY_LENGTH -> 0x21, 2 + + | OP_NEW_INSTANCE -> 0x22, 4 + | OP_NEW_ARRAY -> 0x23, 4 + + | OP_FILLED_NEW_ARRAY -> 0x24, 6 + | OP_FILLED_NEW_ARRAY_RANGE -> 0x25, 6 + | OP_FILL_ARRAY_DATA -> 0x26, 6 + + | OP_THROW -> 0x27, 2 + | OP_GOTO -> 0x28, 2 + | OP_GOTO_16 -> 0x29, 4 + | OP_GOTO_32 -> 0x2a, 6 + | OP_PACKED_SWITCH -> 0x2b, 6 + | OP_SPARSE_SWITCH -> 0x2c, 6 + + | OP_CMPL_FLOAT -> 0x2d, 4 + | OP_CMPG_FLOAT -> 0x2e, 4 + | OP_CMPL_DOUBLE -> 0x2f, 4 + | OP_CMPG_DOUBLE -> 0x30, 4 + | OP_CMP_LONG -> 0x31, 4 + + | OP_IF_EQ -> 0x32, 4 + | OP_IF_NE -> 0x33, 4 + | OP_IF_LT -> 0x34, 4 + | OP_IF_GE -> 0x35, 4 + | OP_IF_GT -> 0x36, 4 + | OP_IF_LE -> 0x37, 4 + | OP_IF_EQZ -> 0x38, 4 + | OP_IF_NEZ -> 0x39, 4 + | OP_IF_LTZ -> 0x3a, 4 + | OP_IF_GEZ -> 0x3b, 4 + | OP_IF_GTZ -> 0x3c, 4 + | OP_IF_LEZ -> 0x3d, 4 + + | OP_AGET -> 0x44, 4 + | OP_AGET_WIDE -> 0x45, 4 + | OP_AGET_OBJECT -> 0x46, 4 + | OP_AGET_BOOLEAN -> 0x47, 4 + | OP_AGET_BYTE -> 0x48, 4 + | OP_AGET_CHAR -> 0x49, 4 + | OP_AGET_SHORT -> 0x4a, 4 + | OP_APUT -> 0x4b, 4 + | OP_APUT_WIDE -> 0x4c, 4 + | OP_APUT_OBJECT -> 0x4d, 4 + | OP_APUT_BOOLEAN -> 0x4e, 4 + | OP_APUT_BYTE -> 0x4f, 4 + | OP_APUT_CHAR -> 0x50, 4 + | OP_APUT_SHORT -> 0x51, 4 + + | OP_IGET -> 0x52, 4 + | OP_IGET_WIDE -> 0x53, 4 + | OP_IGET_OBJECT -> 0x54, 4 + | OP_IGET_BOOLEAN -> 0x55, 4 + | OP_IGET_BYTE -> 0x56, 4 + | OP_IGET_CHAR -> 0x57, 4 + | OP_IGET_SHORT -> 0x58, 4 + | OP_IPUT -> 0x59, 4 + | OP_IPUT_WIDE -> 0x5a, 4 + | OP_IPUT_OBJECT -> 0x5b, 4 + | OP_IPUT_BOOLEAN -> 0x5c, 4 + | OP_IPUT_BYTE -> 0x5d, 4 + | OP_IPUT_CHAR -> 0x5e, 4 + | OP_IPUT_SHORT -> 0x5f, 4 + + | OP_SGET -> 0x60, 4 + | OP_SGET_WIDE -> 0x61, 4 + | OP_SGET_OBJECT -> 0x62, 4 + | OP_SGET_BOOLEAN -> 0x63, 4 + | OP_SGET_BYTE -> 0x64, 4 + | OP_SGET_CHAR -> 0x65, 4 + | OP_SGET_SHORT -> 0x66, 4 + | OP_SPUT -> 0x67, 4 + | OP_SPUT_WIDE -> 0x68, 4 + | OP_SPUT_OBJECT -> 0x69, 4 + | OP_SPUT_BOOLEAN -> 0x6a, 4 + | OP_SPUT_BYTE -> 0x6b, 4 + | OP_SPUT_CHAR -> 0x6c, 4 + | OP_SPUT_SHORT -> 0x6d, 4 + + | OP_INVOKE_VIRTUAL -> 0x6e, 6 + | OP_INVOKE_SUPER -> 0x6f, 6 + | OP_INVOKE_DIRECT -> 0x70, 6 + | OP_INVOKE_STATIC -> 0x71, 6 + | OP_INVOKE_INTERFACE -> 0x72, 6 + + | OP_INVOKE_VIRTUAL_RANGE -> 0x74, 6 + | OP_INVOKE_SUPER_RANGE -> 0x75, 6 + | OP_INVOKE_DIRECT_RANGE -> 0x76, 6 + | OP_INVOKE_STATIC_RANGE -> 0x77, 6 + | OP_INVOKE_INTERFACE_RANGE -> 0x78, 6 + + | OP_NEG_INT -> 0x7b, 2 + | OP_NOT_INT -> 0x7c, 2 + | OP_NEG_LONG -> 0x7d, 2 + | OP_NOT_LONG -> 0x7e, 2 + | OP_NEG_FLOAT -> 0x7f, 2 + | OP_NEG_DOUBLE -> 0x80, 2 + | OP_INT_TO_LONG -> 0x81, 2 + | OP_INT_TO_FLOAT -> 0x82, 2 + | OP_INT_TO_DOUBLE -> 0x83, 2 + | OP_LONG_TO_INT -> 0x84, 2 + | OP_LONG_TO_FLOAT -> 0x85, 2 + | OP_LONG_TO_DOUBLE -> 0x86, 2 + | OP_FLOAT_TO_INT -> 0x87, 2 + | OP_FLOAT_TO_LONG -> 0x88, 2 + | OP_FLOAT_TO_DOUBLE -> 0x89, 2 + | OP_DOUBLE_TO_INT -> 0x8a, 2 + | OP_DOUBLE_TO_LONG -> 0x8b, 2 + | OP_DOUBLE_TO_FLOAT -> 0x8c, 2 + | OP_INT_TO_BYTE -> 0x8d, 2 + | OP_INT_TO_CHAR -> 0x8e, 2 + | OP_INT_TO_SHORT -> 0x8f, 2 + + | OP_ADD_INT -> 0x90, 4 + | OP_SUB_INT -> 0x91, 4 + | OP_MUL_INT -> 0x92, 4 + | OP_DIV_INT -> 0x93, 4 + | OP_REM_INT -> 0x94, 4 + | OP_AND_INT -> 0x95, 4 + | OP_OR_INT -> 0x96, 4 + | OP_XOR_INT -> 0x97, 4 + | OP_SHL_INT -> 0x98, 4 + | OP_SHR_INT -> 0x99, 4 + | OP_USHR_INT -> 0x9a, 4 + + | OP_ADD_LONG -> 0x9b, 4 + | OP_SUB_LONG -> 0x9c, 4 + | OP_MUL_LONG -> 0x9d, 4 + | OP_DIV_LONG -> 0x9e, 4 + | OP_REM_LONG -> 0x9f, 4 + | OP_AND_LONG -> 0xa0, 4 + | OP_OR_LONG -> 0xa1, 4 + | OP_XOR_LONG -> 0xa2, 4 + | OP_SHL_LONG -> 0xa3, 4 + | OP_SHR_LONG -> 0xa4, 4 + | OP_USHR_LONG -> 0xa5, 4 + + | OP_ADD_FLOAT -> 0xa6, 4 + | OP_SUB_FLOAT -> 0xa7, 4 + | OP_MUL_FLOAT -> 0xa8, 4 + | OP_DIV_FLOAT -> 0xa9, 4 + | OP_REM_FLOAT -> 0xaa, 4 + | OP_ADD_DOUBLE -> 0xab, 4 + | OP_SUB_DOUBLE -> 0xac, 4 + | OP_MUL_DOUBLE -> 0xad, 4 + | OP_DIV_DOUBLE -> 0xae, 4 + | OP_REM_DOUBLE -> 0xaf, 4 + + | OP_ADD_INT_2ADDR -> 0xb0, 2 + | OP_SUB_INT_2ADDR -> 0xb1, 2 + | OP_MUL_INT_2ADDR -> 0xb2, 2 + | OP_DIV_INT_2ADDR -> 0xb3, 2 + | OP_REM_INT_2ADDR -> 0xb4, 2 + | OP_AND_INT_2ADDR -> 0xb5, 2 + | OP_OR_INT_2ADDR -> 0xb6, 2 + | OP_XOR_INT_2ADDR -> 0xb7, 2 + | OP_SHL_INT_2ADDR -> 0xb8, 2 + | OP_SHR_INT_2ADDR -> 0xb9, 2 + | OP_USHR_INT_2ADDR -> 0xba, 2 + + | OP_ADD_LONG_2ADDR -> 0xbb, 2 + | OP_SUB_LONG_2ADDR -> 0xbc, 2 + | OP_MUL_LONG_2ADDR -> 0xbd, 2 + | OP_DIV_LONG_2ADDR -> 0xbe, 2 + | OP_REM_LONG_2ADDR -> 0xbf, 2 + | OP_AND_LONG_2ADDR -> 0xc0, 2 + | OP_OR_LONG_2ADDR -> 0xc1, 2 + | OP_XOR_LONG_2ADDR -> 0xc2, 2 + | OP_SHL_LONG_2ADDR -> 0xc3, 2 + | OP_SHR_LONG_2ADDR -> 0xc4, 2 + | OP_USHR_LONG_2ADDR -> 0xc5, 2 + + | OP_ADD_FLOAT_2ADDR -> 0xc6, 2 + | OP_SUB_FLOAT_2ADDR -> 0xc7, 2 + | OP_MUL_FLOAT_2ADDR -> 0xc8, 2 + | OP_DIV_FLOAT_2ADDR -> 0xc9, 2 + | OP_REM_FLOAT_2ADDR -> 0xca, 2 + | OP_ADD_DOUBLE_2ADDR -> 0xcb, 2 + | OP_SUB_DOUBLE_2ADDR -> 0xcc, 2 + | OP_MUL_DOUBLE_2ADDR -> 0xcd, 2 + | OP_DIV_DOUBLE_2ADDR -> 0xce, 2 + | OP_REM_DOUBLE_2ADDR -> 0xcf, 2 + + | OP_ADD_INT_LIT16 -> 0xd0, 4 + | OP_RSUB_INT -> 0xd1, 4 + | OP_MUL_INT_LIT16 -> 0xd2, 4 + | OP_DIV_INT_LIT16 -> 0xd3, 4 + | OP_REM_INT_LIT16 -> 0xd4, 4 + | OP_AND_INT_LIT16 -> 0xd5, 4 + | OP_OR_INT_LIT16 -> 0xd6, 4 + | OP_XOR_INT_LIT16 -> 0xd7, 4 + + | OP_ADD_INT_LIT8 -> 0xd8, 4 + | OP_RSUB_INT_LIT8 -> 0xd9, 4 + | OP_MUL_INT_LIT8 -> 0xda, 4 + | OP_DIV_INT_LIT8 -> 0xdb, 4 + | OP_REM_INT_LIT8 -> 0xdc, 4 + | OP_AND_INT_LIT8 -> 0xdd, 4 + | OP_OR_INT_LIT8 -> 0xde, 4 + | OP_XOR_INT_LIT8 -> 0xdf, 4 + | OP_SHL_INT_LIT8 -> 0xe0, 4 + | OP_SHR_INT_LIT8 -> 0xe1, 4 + | OP_USHR_INT_LIT8 -> 0xe2, 4 + | _ -> 0x00, 0 (* the rest of the opcodes? *) + +(* op_to_hx: opcode -> int *) +let op_to_hx (op: opcode) : int = + fst (op_to_hx_and_size op) + +(* low_reg : opcode -> int *) +let low_reg (op: opcode) : int = + let hx = op_to_hx op + and moves = [0x01; 0x04; 0x07] + and b = 2.0 in + let pow = + (* MOVE *) + if List.mem hx moves then b ** 4.0 + else if List.mem hx (List.map ((+) 1) moves) then b ** 8.0 + else if List.mem hx (List.map ((+) 2) moves) then b ** 16.0 + (* MOVE_RESULT and RETURN *) + else if List.mem hx (range 0x0a 0x0d (range 0x0f 0x11 [])) then b ** 8.0 + (* CONST *) + else if 0x12 = hx then b ** 4.0 + else if 0x13 <= hx && hx <= 0x1c then b ** 8.0 + (* OBJ *) + else if 0x25 = hx then b ** 16.0 + else if List.mem hx [0x20; 0x21; 0x23; 0x24] then b ** 4.0 + else if 0x1d <= hx && hx <= 0x27 then b ** 8.0 + (* SWITCH, cmp, and if-test *) + else if 0x32 <= hx && hx <= 0x37 then b ** 4.0 + else if 0x2b <= hx && hx <= 0x3d then b ** 8.0 + (* arrayop *) + else if 0x44 <= hx && hx <= 0x51 then b ** 8.0 + (* instanceop *) + else if 0x52 <= hx && hx <= 0x5f then b ** 4.0 + (* staticop *) + else if 0x60 <= hx && hx <= 0x6d then b ** 8.0 + (* invoke *) + else if 0x6e <= hx && hx <= 0x72 then b ** 4.0 + (* invoke/range *) + else if 0x74 <= hx && hx <= 0x78 then b ** 16.0 + (* unop *) + else if 0x7b <= hx && hx <= 0x8f then b ** 4.0 + (* binop *) + else if 0x90 <= hx && hx <= 0xaf then b ** 8.0 + (* binop/(2addr|lit16) *) + else if 0xb0 <= hx && hx <= 0xd7 then b ** 4.0 + (* binop/lit8 *) + else if 0xd8 <= hx && hx <= 0xe2 then b ** 8.0 + else 0.0 + in + int_of_float pow + +type link_sort = + | STRING_IDS + | TYPE_IDS + | FIELD_IDS + | METHOD_IDS + | OFFSET + | NOT_LINK + +(* access_link : opcode -> link_sort *) +let access_link (op: opcode) : link_sort = + let hx = op_to_hx op in + if List.mem hx [0x1a; 0x1b] then STRING_IDS + else if List.mem hx [0x1c; 0x1f; 0x20] + || 0x22 <= hx && hx <= 0x25 then TYPE_IDS + else if 0x52 <= hx && hx <= 0x6d then FIELD_IDS + else if 0x6e <= hx && hx <= 0x78 && hx <> 0x73 then METHOD_IDS + else if 0x26 <= hx && hx <= 0x2c && hx <> 0x27 + || 0x32 <= hx && hx <= 0x3d then OFFSET + else NOT_LINK + +(* get_argv : instr -> operand list *) +let get_argv (ins: instr) : operand list = + let op, opr = ins in + match access_link op with + | METHOD_IDS -> + ( + let argv = rm_last opr + and hx = op_to_hx op in + if 0x6e <= hx && hx <= 0x72 then argv (* normal *) + else if 0x74 <= hx && hx <= 0x78 then (* range *) + let (OPR_REGISTER fst_arg)::(OPR_REGISTER last_arg)::[] = argv in + List.map (fun i -> OPR_REGISTER i) (range fst_arg last_arg []) + else [] + ) + | _ -> [] + +type reg_sort = + | R_OBJ + | R_WIDE + | R_WIDE_L + | R_NORMAL + +(* get_reg_sorts : instr -> (int * reg_sort) list *) +let get_reg_sorts (ins: instr) : (int * reg_sort) list = + let wrap_normal opr = + let r = of_reg opr in (r, R_NORMAL) + and wrap_wide opr = + let r = of_reg opr in [(r, R_WIDE); (r+1, R_WIDE_L)] + in + let op, opr = ins in + let hx = op_to_hx op in + match op, opr with + | _, OPR_REGISTER d :: OPR_REGISTER s :: [] + when 0x01 <= hx && hx <= 0x09 -> (* MOVE *) + ( + if 0x01 <= hx && hx <= 0x03 then [(d, R_NORMAL); (s, R_NORMAL)] + else if 0x07 <= hx && hx <= 0x09 then [(d, R_OBJ); (s, R_OBJ)] + else [(d, R_WIDE); (d+1, R_WIDE_L); (s, R_WIDE); (s+1, R_WIDE_L)] + ) + + | _, OPR_REGISTER r :: [] + when 0x0a <= hx && hx <= 0x11 -> (* MOVE_RESULT and RETURN *) + ( + if List.mem hx [0x0c; 0x0d; 0x11] then [(r, R_OBJ)] + else if List.mem hx [0x0b; 0x10] then [(r, R_WIDE); (r+1, R_WIDE_L)] + else [(r, R_NORMAL)] + ) + + | _, OPR_REGISTER r :: _ + when 0x12 <= hx && hx <= 0x1c -> (* CONST *) + ( + if 0x12 <= hx && hx <= 0x15 then [(r, R_NORMAL)] + else if 0x1a <= hx && hx <= 0x1c then [(r, R_OBJ)] + else [(r, R_WIDE); (r+1, R_WIDE_L)] + ) + + | OP_MONITOR_ENTER, OPR_REGISTER r :: [] + | OP_MONITOR_EXIT, OPR_REGISTER r :: [] + | OP_CHECK_CAST, OPR_REGISTER r :: _ + | OP_NEW_INSTANCE, OPR_REGISTER r :: _ + | OP_FILL_ARRAY_DATA, OPR_REGISTER r :: _ + | OP_THROW, OPR_REGISTER r :: [] -> [(r, R_OBJ)] + + | OP_INSTANCE_OF, OPR_REGISTER d :: OPR_REGISTER o :: _ + | OP_ARRAY_LENGTH, OPR_REGISTER d :: OPR_REGISTER o :: [] -> + [(d, R_NORMAL); (o, R_OBJ)] + + | OP_NEW_ARRAY, OPR_REGISTER d :: OPR_REGISTER s :: _ -> + [(d, R_OBJ); (s, R_NORMAL)] + +(* reference types are acceptable... + | OP_FILLED_NEW_ARRAY, _ + | OP_FILLED_NEW_ARRAY_RANGE, _ + | OP_PACKED_SWITCH, _ + | OP_SPARSE_SWITCH, _ -> + List.map (fun opr -> wrap_normal opr) (rm_last opr) +*) + + | _, OPR_REGISTER d :: OPR_REGISTER s1 :: OPR_REGISTER s2 :: [] + when 0x2d <= hx && hx <= 0x31 -> (* CMP *) + ( + if 0x2d <= hx && hx <= 0x2e then List.map wrap_normal opr + else List.flatten (List.map wrap_wide opr) + ) + +(* reference types are acceptable... + | _, OPR_REGISTER s1 :: OPR_REGISTER s2 :: _ + when 0x32 <= hx && hx <= 0x37 -> (* IF-test *) + [(s1, R_NORMAL); (s2, R_NORMAL)] + | _, OPR_REGISTER t :: _ + when 0x38 <= hx && hx <= 0x3d -> (* IF-testz *) + [(t, R_NORMAL)] +*) + + | _, OPR_REGISTER d :: OPR_REGISTER a :: OPR_REGISTER i :: [] + when 0x44 <= hx && hx <= 0x51 -> (* arrayop *) + ( + let common = [(a, R_OBJ); (i, R_NORMAL)] in + if List.mem hx [0x45; 0x4c] then (d, R_WIDE) :: (d+1, R_WIDE_L) :: common + else if List.mem hx [0x46; 0x4d] then (d, R_OBJ) :: common + else (d, R_NORMAL) :: common + ) + + | _, OPR_REGISTER d :: OPR_REGISTER o :: _ + when 0x52 <= hx && hx <= 0x5f -> (* instanceop *) + ( + let common = [(o, R_OBJ)] in + if List.mem hx [0x53; 0x5a] then (d, R_WIDE) :: (d+1, R_WIDE_L) :: common + else if List.mem hx [0x54; 0x5b] then (d, R_OBJ) :: common + else (d, R_NORMAL) :: common + ) + + | _, OPR_REGISTER d :: _ + when 0x60 <= hx && hx <= 0x6d -> (* staticop *) + ( + if List.mem hx [0x61; 0x68] then [(d, R_WIDE); (d+1, R_WIDE_L)] + else if List.mem hx [0x62; 0x69] then [(d, R_OBJ)] + else [(d, R_NORMAL)] + ) + +(* can't figure out sorts of registers from an instr perspective + | _, _ when 0x63 <= hx && hx <= 0x78 -> (* invoke *) +*) + + | _, OPR_REGISTER d :: OPR_REGISTER s :: [] + when 0x7b <= hx && hx <= 0x8f -> (* unop *) + ( + let d_sorts = + if List.mem hx [0x7d; 0x7e; 0x80; 0x81; 0x83; 0x86; 0x88; 0x89; 0x8b] + then [(d, R_WIDE); (d+1, R_WIDE_L)] else [(d, R_NORMAL)] + and s_sorts = + if List.mem hx [0x7d; 0x7e; 0x80; 0x84; 0x85; 0x86; 0x8a; 0x8b; 0x8c] + then [(s, R_WIDE); (s+1, R_WIDE_L)] else [(s, R_NORMAL)] + in + d_sorts @ s_sorts + ) + + | _, _ when 0x90 <= hx && hx <= 0xcf -> (* binop(/2addr) *) + ( + if List.mem hx (range 0x9b 0xa5 (range 0xab 0xaf [])) + || List.mem hx (range 0xbb 0xc5 (range 0xcb 0xcf [])) then + List.flatten (List.map wrap_wide opr) + else List.map wrap_normal opr + ) + + | _, _ when 0xd0 <= hx && hx <= 0xe2 -> (* binop/lit(16|8) *) + List.map wrap_normal (rm_last opr) + + | _, _ -> [] + +(***********************************************************************) +(* Parsing *) +(***********************************************************************) + +(* make_instr : opcode -> int list -> instr *) +let make_instr (op: opcode) (args: int list) : instr = + let to_sign4 x = if (x land 0x8) <> 0 then x - 0x10 else x + and to_sign8 x = if (x land 0x80) <> 0 then x - 0x100 else x + and to_sign16 x = if (x land 0x8000) <> 0 then x - 0x10000 else x + in + let opr4 i = (i land 0x0f), (i lsr 4) + and opr16u l h = (h lsl 8) lor l in + let opr16 l h = to_sign16 (opr16u l h) + and opr32 l h = (h lsl 16) lor l + and opr32u l h = Int64.logor (Int64.shift_left (to_i64 h) 16) (to_i64 l) + in + let f_10x () = [] + and f_12x () = + let ba::[] = args in + let a,b = opr4 ba in [OPR_REGISTER a; OPR_REGISTER b] + and f_11n () = + let ba::[] = args in + let a,b = opr4 ba in [OPR_REGISTER a; OPR_CONST (to_i64 (to_sign4 b))] + and f_11x () = + let aa::[] = args in [OPR_REGISTER aa] + and f_10t () = + let aa::[] = args in [OPR_OFFSET (to_i32 (to_sign8 aa))] + and f_20t () = + let _::al::ah::[] = args in + let a = opr16 al ah in [OPR_OFFSET (to_i32 a)] + and f_22x () = + let aa::bl::bh::[] = args in + let b = opr16 bl bh in [OPR_REGISTER aa; OPR_REGISTER b] + and f_21t () = + let aa::bl::bh::[] = args in + let b = opr16 bl bh in [OPR_REGISTER aa; OPR_OFFSET (to_i32 b)] + and f_21s () = + let aa::bl::bh::[] = args in + let b = opr16 bl bh in [OPR_REGISTER aa; OPR_CONST (to_i64 b)] + and f_21h () = + let aa::bl::bh::[] = args in + let b = opr16 bl bh in [OPR_REGISTER aa; OPR_CONST (to_i64 b)] + and f_21c () = + let aa::bl::bh::[] = args in + let b = opr16u bl bh in [OPR_REGISTER aa; OPR_INDEX b] + and f_23x () = + let aa::bb::cc::[] = args in + [OPR_REGISTER aa; OPR_REGISTER bb; OPR_REGISTER cc] + and f_22b () = + let aa::bb::cc::[] = args in + [OPR_REGISTER aa; OPR_REGISTER bb; OPR_CONST (to_i64 cc)] + and f_22t () = + let ba::cl::ch::[] = args in + let a,b = opr4 ba and c = opr16 cl ch in + [OPR_REGISTER a; OPR_REGISTER b; OPR_OFFSET (to_i32 c)] + and f_22s () = + let ba::cl::ch::[] = args in + let a,b = opr4 ba and c = opr16 cl ch in + [OPR_REGISTER a; OPR_REGISTER b; OPR_CONST (to_i64 c)] + and f_22c () = + let ba::cl::ch::[] = args in + let a,b = opr4 ba and c = opr16u cl ch in + [OPR_REGISTER a; OPR_REGISTER b; OPR_INDEX c] + and f_30t () = + let _::all::alh::ahl::ahh::[] = args in + let al = opr16u all alh and ah = opr16u ahl ahh in + let a = opr32 al ah in [OPR_OFFSET (to_i32 a)] + and f_32x () = + let _::al::ah::bl::bh::[] = args in + let a = opr16 al ah and b = opr16 bl bh in + [OPR_REGISTER a; OPR_REGISTER b] + and f_31i () = + let aa::bll::blh::bhl::bhh::[] = args in + let bl = opr16u bll blh and bh = opr16u bhl bhh in + let b = opr32u bl bh in [OPR_REGISTER aa; OPR_CONST b] + and f_31t () = + let aa::bll::blh::bhl::bhh::[] = args in + let bl = opr16u bll blh and bh = opr16u bhl bhh in + let b = opr32 bl bh in [OPR_REGISTER aa; OPR_OFFSET (to_i32 b)] + and f_31c () = + let aa::bll::blh::bhl::bhh::[] = args in + let bl = opr16u bll blh and bh = opr16u bhl bhh in + let b = opr32 bl bh in [OPR_REGISTER aa; OPR_INDEX b] + and f_35c () = + let ba::cl::ch::ed::gf::[] = args in + let a,b = opr4 ba and c = opr16u cl ch + and d,e = opr4 ed and f,g = opr4 gf in + let iC = OPR_INDEX c and vD = OPR_REGISTER d + and vE = OPR_REGISTER e and vF = OPR_REGISTER f + and vG = OPR_REGISTER g and vA = OPR_REGISTER a in + ( + match b with + | 0 -> [iC] + | 1 -> [vD; iC] + | 2 -> [vD; vE; iC] + | 3 -> [vD; vE; vF; iC] + | 4 -> [vD; vE; vF; vG; iC] + | 5 -> [vD; vE; vF; vG; vA; iC] + | _ -> [] + ) + and f_3rc () = + let aa::bl::bh::cl::ch::[] = args in + let b = opr16u bl bh and c = opr16 cl ch in + let n = c + aa - 1 in [OPR_REGISTER c; OPR_REGISTER n; OPR_INDEX b] + and f_51l () = + let aa::b1l::b1h::b2l::b2h::b3l::b3h::b4l::b4h::[] = args in + let b1 = opr16u b1l b1h and b2 = opr16u b2l b2h + and b3 = opr16u b3l b3h and b4 = opr16u b4l b4h in + let b12 = opr32u b1 b2 and b34 = opr32u b3 b4 in + let b = Int64.logor (Int64.shift_left b34 32) b12 in + [OPR_REGISTER aa; OPR_CONST b] + in + op, match op with + | OP_NOP + | OP_RETURN_VOID -> f_10x () + + | OP_MOVE + | OP_MOVE_WIDE + | OP_MOVE_OBJECT + + | OP_ARRAY_LENGTH + + | OP_NEG_INT + | OP_NOT_INT + | OP_NEG_LONG + | OP_NOT_LONG + | OP_NEG_FLOAT + | OP_NEG_DOUBLE + | OP_INT_TO_LONG + | OP_INT_TO_FLOAT + | OP_INT_TO_DOUBLE + | OP_LONG_TO_INT + | OP_LONG_TO_FLOAT + | OP_LONG_TO_DOUBLE + | OP_FLOAT_TO_INT + | OP_FLOAT_TO_LONG + | OP_FLOAT_TO_DOUBLE + | OP_DOUBLE_TO_INT + | OP_DOUBLE_TO_LONG + | OP_DOUBLE_TO_FLOAT + | OP_INT_TO_BYTE + | OP_INT_TO_CHAR + | OP_INT_TO_SHORT + + | OP_ADD_INT_2ADDR + | OP_SUB_INT_2ADDR + | OP_MUL_INT_2ADDR + | OP_DIV_INT_2ADDR + | OP_REM_INT_2ADDR + | OP_AND_INT_2ADDR + | OP_OR_INT_2ADDR + | OP_XOR_INT_2ADDR + | OP_SHL_INT_2ADDR + | OP_SHR_INT_2ADDR + | OP_USHR_INT_2ADDR + + | OP_ADD_LONG_2ADDR + | OP_SUB_LONG_2ADDR + | OP_MUL_LONG_2ADDR + | OP_DIV_LONG_2ADDR + | OP_REM_LONG_2ADDR + | OP_AND_LONG_2ADDR + | OP_OR_LONG_2ADDR + | OP_XOR_LONG_2ADDR + | OP_SHL_LONG_2ADDR + | OP_SHR_LONG_2ADDR + | OP_USHR_LONG_2ADDR + + | OP_ADD_FLOAT_2ADDR + | OP_SUB_FLOAT_2ADDR + | OP_MUL_FLOAT_2ADDR + | OP_DIV_FLOAT_2ADDR + | OP_REM_FLOAT_2ADDR + | OP_ADD_DOUBLE_2ADDR + | OP_SUB_DOUBLE_2ADDR + | OP_MUL_DOUBLE_2ADDR + | OP_DIV_DOUBLE_2ADDR + | OP_REM_DOUBLE_2ADDR -> f_12x () + + | OP_CONST_4 -> f_11n () + + | OP_MOVE_RESULT + | OP_MOVE_RESULT_WIDE + | OP_MOVE_RESULT_OBJECT + | OP_MOVE_EXCEPTION + + | OP_RETURN + | OP_RETURN_WIDE + | OP_RETURN_OBJECT + + | OP_MONITOR_ENTER + | OP_MONITOR_EXIT + + | OP_THROW -> f_11x () + + | OP_MOVE_FROM16 + | OP_MOVE_WIDE_FROM16 + | OP_MOVE_OBJECT_FROM16 -> f_22x () + + | OP_IF_EQZ + | OP_IF_NEZ + | OP_IF_LTZ + | OP_IF_GEZ + | OP_IF_GTZ + | OP_IF_LEZ -> f_21t () + + | OP_CONST_16 + | OP_CONST_WIDE_16 + | OP_CONST_HIGH16 -> f_21s () + + | OP_CONST_WIDE_HIGH16 -> f_21h () + + | OP_CONST_STRING + | OP_CONST_CLASS + + | OP_CHECK_CAST + + | OP_NEW_INSTANCE + + | OP_SGET + | OP_SGET_WIDE + | OP_SGET_OBJECT + | OP_SGET_BOOLEAN + | OP_SGET_BYTE + | OP_SGET_CHAR + | OP_SGET_SHORT + | OP_SPUT + | OP_SPUT_WIDE + | OP_SPUT_OBJECT + | OP_SPUT_BOOLEAN + | OP_SPUT_BYTE + | OP_SPUT_CHAR + | OP_SPUT_SHORT -> f_21c () + + | OP_CMPL_FLOAT + | OP_CMPG_FLOAT + | OP_CMPL_DOUBLE + | OP_CMPG_DOUBLE + | OP_CMP_LONG + + | OP_AGET + | OP_AGET_WIDE + | OP_AGET_OBJECT + | OP_AGET_BOOLEAN + | OP_AGET_BYTE + | OP_AGET_CHAR + | OP_AGET_SHORT + | OP_APUT + | OP_APUT_WIDE + | OP_APUT_OBJECT + | OP_APUT_BOOLEAN + | OP_APUT_BYTE + | OP_APUT_CHAR + | OP_APUT_SHORT + + | OP_ADD_INT + | OP_SUB_INT + | OP_MUL_INT + | OP_DIV_INT + | OP_REM_INT + | OP_AND_INT + | OP_OR_INT + | OP_XOR_INT + | OP_SHL_INT + | OP_SHR_INT + | OP_USHR_INT + + | OP_ADD_LONG + | OP_SUB_LONG + | OP_MUL_LONG + | OP_DIV_LONG + | OP_REM_LONG + | OP_AND_LONG + | OP_OR_LONG + | OP_XOR_LONG + | OP_SHL_LONG + | OP_SHR_LONG + | OP_USHR_LONG + + | OP_ADD_FLOAT + | OP_SUB_FLOAT + | OP_MUL_FLOAT + | OP_DIV_FLOAT + | OP_REM_FLOAT + | OP_ADD_DOUBLE + | OP_SUB_DOUBLE + | OP_MUL_DOUBLE + | OP_DIV_DOUBLE + | OP_REM_DOUBLE -> f_23x () + + | OP_ADD_INT_LIT8 + | OP_RSUB_INT_LIT8 + | OP_MUL_INT_LIT8 + | OP_DIV_INT_LIT8 + | OP_REM_INT_LIT8 + | OP_AND_INT_LIT8 + | OP_OR_INT_LIT8 + | OP_XOR_INT_LIT8 + | OP_SHL_INT_LIT8 + | OP_SHR_INT_LIT8 + | OP_USHR_INT_LIT8 -> f_22b () + + | OP_IF_EQ + | OP_IF_NE + | OP_IF_LT + | OP_IF_GE + | OP_IF_GT + | OP_IF_LE -> f_22t () + + | OP_ADD_INT_LIT16 + | OP_RSUB_INT + | OP_MUL_INT_LIT16 + | OP_DIV_INT_LIT16 + | OP_REM_INT_LIT16 + | OP_AND_INT_LIT16 + | OP_OR_INT_LIT16 + | OP_XOR_INT_LIT16 -> f_22s () + + | OP_INSTANCE_OF + + | OP_NEW_ARRAY + + | OP_IGET + | OP_IGET_WIDE + | OP_IGET_OBJECT + | OP_IGET_BOOLEAN + | OP_IGET_BYTE + | OP_IGET_CHAR + | OP_IGET_SHORT + | OP_IPUT + | OP_IPUT_WIDE + | OP_IPUT_OBJECT + | OP_IPUT_BOOLEAN + | OP_IPUT_BYTE + | OP_IPUT_CHAR + | OP_IPUT_SHORT -> f_22c () + + | OP_MOVE_16 + | OP_MOVE_WIDE_16 + | OP_MOVE_OBJECT_16 -> f_32x () + + | OP_CONST + | OP_CONST_WIDE_32 -> f_31i () + + | OP_FILL_ARRAY_DATA + + | OP_PACKED_SWITCH + | OP_SPARSE_SWITCH -> f_31t () + + | OP_CONST_STRING_JUMBO -> f_31c () + + | OP_FILLED_NEW_ARRAY + + | OP_INVOKE_VIRTUAL + | OP_INVOKE_SUPER + | OP_INVOKE_DIRECT + | OP_INVOKE_STATIC + | OP_INVOKE_INTERFACE -> f_35c () + + | OP_FILLED_NEW_ARRAY_RANGE + + | OP_INVOKE_VIRTUAL_RANGE + | OP_INVOKE_SUPER_RANGE + | OP_INVOKE_DIRECT_RANGE + | OP_INVOKE_STATIC_RANGE + | OP_INVOKE_INTERFACE_RANGE -> f_3rc () + + | OP_GOTO -> f_10t () + | OP_GOTO_16 -> f_20t () + | OP_GOTO_32 -> f_30t () + + | OP_CONST_WIDE -> f_51l () + + + diff --git a/plugins/dalvik/dalvik_java.ml b/plugins/dalvik/dalvik_java.ml new file mode 100644 index 0000000000..6e744f86a3 --- /dev/null +++ b/plugins/dalvik/dalvik_java.ml @@ -0,0 +1,40 @@ +open Bap_core_theory +open KB.Syntax + +(* TODO: Move into the separate plugin? *) +(* TODO: Enrich more with the standard types (and builtin methods/classes?) *) +module Java = struct + type reg_name + type regpair_name + type value + + (* register pairs - it's wrong FIXME *) + let regpair_name : regpair_name Theory.Bitv.t Theory.Value.sort = + Theory.Bitv.define 4 + + (* registers are indices in a stack frame *) + let reg_name : reg_name Theory.Bitv.t Theory.Value.sort = + Theory.Bitv.define 4 + + (* we can define our own type hierarchy for Java, + but let's start with just 32 bit integers for all + primitive and reference types, it will hit us when + we will start dealing with doubles and longs. + *) + let value : value Theory.Bitv.t Theory.Value.sort = + Theory.Bitv.define 32 + + (* Note the importance of the frames here! *) + (* but frame is still a mapping from 4 bit offsets to 32 bit values. *) + let frame = Theory.Mem.define reg_name value + + let current_frame = Theory.Var.define frame "current_frame" + + (* Heap operations *) + let heap_type = Theory.Mem.define value value + let brk = Theory.Var.define value "brk" + let heap = Theory.Var.define heap_type "mem" + +end + + diff --git a/plugins/dalvik/dalvik_lifter.ml b/plugins/dalvik/dalvik_lifter.ml new file mode 100644 index 0000000000..fff18f0d69 --- /dev/null +++ b/plugins/dalvik/dalvik_lifter.ml @@ -0,0 +1,716 @@ +open Bap_core_theory +open KB.Syntax +open Dalvik_disasm +open Dalvik_java + +(* Instruction reference is based on https://source.android.com/devices/tech/dalvik/dalvik-bytecode + *) + +let package = "dalvik" + +(* modular arithmetics for 4 bit values *) +module M4 = Bitvec.Make(struct let modulus = Bitvec.modulus 4 end) +module M8 = Bitvec.Make(struct let modulus = Bitvec.modulus 8 end) +module M16 = Bitvec.Make(struct let modulus = Bitvec.modulus 16 end) +module M32 = Bitvec.M32 +module M64 = Bitvec.M64 + +module Dalvik(Core : Theory.Core) = struct + open Core + open Java + + module Types + (* This one should load the type length by the index *) + let get_length _i = 64 + end + + let pass = perform Theory.Effect.Sort.bot + let skip = perform Theory.Effect.Sort.bot + + let frame = var current_frame + + let set_reg x v = + set current_frame (store frame x v) + + let get_reg x = load frame x + + (* FIXME: its wrong, just a stub for now *) + (* Returns the result of the preceding invoke-* instructions *) + let get_result s = + match s with + | 8 -> load frame 0 + | 16 -> load frame 0 + | _ -> failwith "WRONG result width" + + let allocate_object dst len = + seq (set_reg dst (var brk)) (set brk (add (var brk) (int value len))) + + let new_array dst len _typ : unit Theory.Effect.t KB.t = + unlabeled >>= fun lbl -> + blk lbl (allocated_object (int reg_name dst) len) skip + + let get_memory addr pos = + let stride = M64.int (Theory.Bitv.size value / 8) in + let off = add addr (mul pos (int value stride)) in + get heap (load (var heap) off) + + let set_memory addr pos data = + let stride = M64.int (Theory.Bitv.size value / 8) in + let off = add addr (mul pos (int value stride)) in + set heap (store (var heap) off data) + + let set_slot dst pos data = + set_memory (get_reg dst) pos data + + let get_slot src pos = + get_memory (get_reg src) pos data + + let data_block = function + | [] -> pass + | xs -> List.reduce_exn xs ~f:seq + + let block xs = + unlabeled >>= fun lbl -> + blk lbl (data_block xs) skip + + let array_get dst src pos = + let value = get_slot src pos in + (* FIXME: set the destination right! *) + dst := value + + let mov x y = set_reg x (get_reg y) + let const x y = set_reg x y + let cmp d x y = + (* Boils to Theory.Bitv *) + let open Core.Basic in + let x = int reg_name (M4.int x) + and y = int reg_name (M4.int x) + and d = int reg_name (M4.int z) in + let res = sub x y in + if is_zero res then set_reg d 0 else + if sgt x y then set_reg d 1 else set_reg d -1 + + let mov_rr x y = + let x = int reg_name x + and y = int reg_name y in + mov x y + + let mov_rr4 x y = + mov_rr (M4.int x) (M4.int y) + + let mov_rr816 x y = + mov_rr (M8.int x) (M16.int y) + + let mov_rr16 x y = + mov_rr (M16.int x) (M16.int y) + + let movw_rr x y = + let x = int regpair_name x + and y = int regpair_name y in + mov x y + + let movw_rr4 x y = + movw_rr (M4.int x) (M4.int y) + + let movw_rr816 x y = + movw_rr (M8.int x) (M16.int y) + + let movw_rr16 x y = + movw_rr (M16.int x) (M16.int y) + + let mov_r x = + let x = int reg_name (M4.int x) in + mov x (get_result 8) + + let movw_r x = + let x = int regpair_name (M8.int x) in + mov x (get_result 16) + + let mov_rc x y = + let x = int reg_name (M4.int x) in + const x y + + let mov_rc16 x c = + let x = int reg_name (M8.int x) in + const x y + + let movw_rc x c = + let x = int regpair_name x in + const x y + + let movw_rc8 x c = + movw_rc (M8.int x) c + + let movw_rc32 x c = + movw_rc (M32.int x) c + + let move eff = + KB.Object.create Theory.Program.cls >>= fun lbl -> + blk lbl eff skip + + let apply f x y = + let x = int reg_name (M4.int x) + and y = int reg_name (M4.int y) in + mov x (f y) + + let apply2 f dest x y = + let x = int reg_name (M8.int x) + and y = int reg_name (M8.int y) + and dest = int reg_name (M8.int dest) in + mov dest (f x y) + + let apply2c f dest x c = + let x = int reg_name (M8.int x) + and c = Bitv.define c + and dest = int reg_name (M8.int dest) in + mov dest (f x c) + + let fapply f x y = + let open Fbasic in + let x = float reg_name (M4.int x) + and y = float reg_name (M4.int y) in + mov x (f y) + + let fapply2 f dest x y = + let open Fbasic in + let x = float reg_name (M4.int x) + and y = float reg_name (M4.int y) + and dest = float reg_name (M8.int dest) in + mov dest (f x y) + + (* TODO: Merge these two functions together *) + let filled_new_array dst len data = + Theory.Var.fresh value >>= fun i -> + let dst = int reg_name dst in + let data = int value data in + block [ + set i (int value Bitvec.zero); + allocate_object dst len; + repeat (ult (var i) (int value len)) @@ + data_block [ + set_slot dst (var i) data; + set i (add (var i) (int value Bitvec.one)) + ] + ] + + let fill_memory_data addr data len = + Theory.Var.fresh value >>= fun i -> + let data = int value data in + block [ + set i (int value Bitvec.zero); + repeat (ult (var i) (int value len)) @@ + data_block [ + set_memory addr (var i) data; + set i (add (var i) (int value Bitvec.one)) + ] + ] + + let fill_array_data dst data len = + let addr = int reg_name dst in + fill_memory_data addr data len + + let array_put dst src pos = + (* FIXME: Load the data first? *) + let dst = int reg_name dst in + set_slot dst pos src + + let array_put_static src idx len = + (* How to find the destination address? *) + let addr = 0 in + let src = int reg_name src in + fill_memory_data addr src len + + let nop = + KB.return @@ + Theory.Effect.empty Theory.Effect.Sort.top + + let jmp = + KB.return @@ + Theory.Effect.jmp + + let run + : insn -> unit Theory.Effect.t KB.t = + function + | (OP_NOP,[]) -> nop + | (OP_MOVE, [OPR_REGISTER x; OPR_REGISTER y]) -> + move (mov_rr4 x y) + | (OP_MOVE_FROM16, [OPR_REGISTER x; OPR_REGISTER y]) -> + move (mov_rr816 x y) + | (OP_MOVE_16, [OPR_REGISTER x; OPR_REGISTER y]) -> + move (mov_rr16 x y) + | (OP_MOVE_WIDE, [OPR_REGISTER x; OPR_REGISTER y]) -> + move (movw_rr4 x y) + | (OP_MOVE_WIDE_FROM16, [OPR_REGISTER x; OPR_REGISTER y]) -> + move (movw_rr816 x y) + | (OP_MOVE_WIDE16, [OPR_REGISTER x; OPR_REGISTER y]) -> + move (movw_rr16 x y) + (* Object registers essentially do the same, but with objects *) + | (OP_MOVE_OBJECT, [OPR_REGISTER x; OPR_REGISTER y]) -> + move (mov_rr4 x y) + | (OP_MOVE_OBJECT_FROM16, [OPR_REGISTER x; OPR_REGISTER y]) -> + move (mov_rr816 x y) + | (OP_MOVE_OBJECT_16, [OPR_REGISTER x; OPR_REGISTER y]) -> + move (mov_rr16 x y) + (* Result accessing just takes the value from the frame *) + | (OP_MOVE_RESULT, [OPR_REGISTER x]) -> + move (mov_r x) + | (OP_MOVE_RESULT_WIDE, [OPR_REGISTER x]) -> + move (movw_r x) + | (OP_MOVE_RESULT_OBJECT, [OPR_REGISTER x]) -> + move (mov_r x) + | (OP_MOVE_EXCEPTION, [OPR_REGISTER x]) -> + (* FIXME: Not sure what to do here, unsupported for now *) + failwith "UNSUPPORTED" + | (OP_RETURN_VOID) + | (OP_RETURN) + | (OP_RETURN_WIDE) + | (OP_RETURN_OBJECT) -> + (* FIXME: Not sure what to do here, unsupported for now *) + failwith "UNSUPPORTED" + | (OP_CONST_4, [OPR_REGISTER x; OPR_CONST c]) -> + move (mov_rc x c) + | (OP_CONST_16, [OPR_REGISTER x; OPR_CONST c]) -> + move (mov_rc16 x c) + | (OP_CONST, [OPR_REGISTER x; OPR_CONST c]) -> + move (mov_rc32 x c) + | (OP_CONST_HIGH16, [OPR_REGISTER x; OPR_CONST c]) -> + move (mov_rc16 x c) + | (OP_CONST_WIDE_16, [OPR_REGISTER x; OPR_CONST c]) -> + move (movw_rc16 x c) + | (OP_CONST_WIDE_32, [OPR_REGISTER x; OPR_CONST c]) -> + move (movw_rc32 x c) + | (OP_CONST_WIDE, [OPR_REGISTER x; OPR_CONST c]) -> + (* FIXME: *) + failwith "UNIMPLEMENTED" + | (OP_CONST_WIDE_HIGH16, [OPR_REGISTER x; OPR_CONST c]) -> + move (movw_rc16 x c) + (* Index based operations load the indexes from the corresponding DEX section *) + | (OP_CONST_STRING, [OPR_REGISTER x; OPR_INDEX i]) -> + move (movi x i) + (* FIXME: What's the difference? *) + | (OP_CONST_STRING_JUMBO, [OPR_REGISTER x; OPR_INDEX i]) -> + move (movi x i) + | (OP_CONST_CLASS, [OPR_REGISTER x; OPR_INDEX i]) -> + move (movi x i) + | (OP_MONITOR_ENTER, [OPR_REGISTER _]) -> + nop + | (OP_MONITOR_EXIT, [OPR_REGISTER _]) -> + nop + | (OP_CHECK_CAST, [OPR_REGISTER x; OPR_INDEX i]) -> + (* FIXME: What to do here? *) + nop + | (OP_INSTANCE_OF, [OPR_REGISTER x; OPR_REGISTER y; OPR_INDEX i]) -> + (* FIXME: What to do here? *) + nop + | (OP_ARRAY_LENGTH, [OPR_REGISTER x; OPR_REGISTER y]) -> + (* FIXME: What to do here? *) + nop + | (OP_NEW_INSTANCE, [OPR_REGISTER x; OPR_INDEX i]) -> + (* FIXME: What to do here? *) + nop + | (OP_NEW_ARRAY, [OPR_REGISTER dst; OPR_REGISTER size; OPR_INDEX i]) -> + let len = int reg_name size in + new_array dst len i + (* This one, not sure what to do - is this a variable-length array? *) + | (OP_FILLED_NEW_ARRAY, [OPR_REGISTER dst; OPR_INDEX i; OPR_CONST data]) -> + let len = Types.get_length i in + filled_new_array (M4.int dst) (M32.int64 len) (M32.int64 data) + | (OP_FILLED_NEW_ARRAY_RANGE, [OPR_REGISTER dst; OPR_INDEX i; OPR_REGISTER y]) -> + filled_new_array (M8.int dst) (M32.int64 len) + | (OP_FILL_ARRAY_DATA, [OPR_REGISTER dst; OPR_OFFSET data]) -> + fill_array_data (M8.int dst) (M32.int64 data) + | (OP_THROW, _) -> nop + | (OP_GOTO, [OPR_OFFSET o]) -> + jmp o + | (OP_GOTO_16, [OPR_OFFSET o]) -> + jmp o + | (OP_GOTO_32, [OPR_OFFSET o]) -> + jmp o + (* TODO: Switch tables *) + | (OP_PACKED_SWITCH, [OPR_REGISTER x; OPR_OFFSET o]) -> + failwith "UNIMPLEMENTED" + | (OP_SPARSE_SWITCH, [OPR_REGISTER x; OPR_OFFSET o]) -> + failwith "UNIMPLEMENTED" + | (OP_CMPL_FLOAT, [OPR_REGISTER dest; OPR_REGISTER x; OPR_REGISTER y]) -> + failwith "NO FLOATS YET" + | (OP_CMPG_FLOAT, [OPR_REGISTER dest; OPR_REGISTER x; OPR_REGISTER y]) -> + failwith "NO FLOATS yET" + | (OP_CMPL_DOUBLE, [OPR_REGISTER dest; OPR_REGISTER x; OPR_REGISTER y]) -> + failwith "NO FLOATS YET" + | (OP_CMPG_DOUBLE, [OPR_REGISTER dest; OPR_REGISTER x; OPR_REGISTER y]) -> + failwith "NO FLOATS YET" + | (OP_CMP_LONG, [OPR_REGISTER dest; OPR_REGISTER x; OPR_REGISTER y]) -> + cmp dest x y + | (OP_IF_EQ, [OPR_REGISTER x; OPR_REGISTER y; OPR_OFFSET o]) -> + if eq x y then jmp o + | (OP_IF_NE, [OPR_REGISTER x; OPR_REGISTEr y; OPR_OFFSET o]) -> + if neq x y then jmp o + | (OP_IF_LT, [OPR_REGISTER x; OPR_REGISTER y; OPR_OFFSET o]) -> + if slt x y then jmp o + | (OP_IF_GE, [OPR_REGISTER x; OPR_REGISTER y; OPR_OFFSET o]) -> + if sgt x y || eq x y then jmp o + | (OP_IF_GT, [OPR_REGISTER x; OPR_REGISTER y; OPR_OFFSET o]) -> + if sgt x y then jmp o + | (OP_IF_LE, [OPR_REGISTER x; OPR_REGISTER y; OPR_OFFSET o]) -> + if slt x y || eq x y then jmp o + | (OP_IF_EQZ, [OPR_REGISTER x; OPR_OFFSET o]) -> + if eq x zero then jmp o + | (OP_IF_NEZ, [OPR_REGISTER x; OPR_OFFSET o]) -> + if neq x zero then jmp o + | (OP_IF_LTZ, [OPR_REGISTER x; OPR_OFFSET o]) -> + if slt x zero then jmp o + | (OP_IF_GEZ, [OPR_REGISTER x; OPR_OFFSET o]) -> + if sgt x zero || eq x zero then jmp o + | (OP_IF_GTZ, [OPR_REGISTER x; OPR_OFFSET o]) -> + if sgt x zero then jmp o + | (OP_IF_LEZ, [OPR_REGISTER x; OPR_OFFSET o]) -> + if sle x zero || eq x zero then jmp o + + | (OP_AGET, [OPR_REGISTER dst; OPR_REGISTER a; OPR_INDEX i]) -> + array_get (M8.int dst) (M8.int a) (M8.int i) + | (OP_AGET_WIDE, [OPR_REGISTER dst; OPR_REGISTER a; OPR_INDEX i]) -> + (* reads array element into register pair *) + array_getw (M8.int dst) (M8.int a) (M8.int i) + | (OP_AGET_OBJECT, [OPR_REGISTER dst; OPR_REGISTER a; OPR_INDEX i]) -> + (* FIXME: figure out the size of the object first? *) + array_get (M8.int dst) (M8.int a) (M8.int i) + | (OP_AGET_BOOLEAN, [OPR_REGISTER dst; OPR_REGISTER a; OPR_INDEX i]) -> + (* FIXME: what is the ACTUAL semantics of these instructions? *) + array_get (M8.int dst) (M8.int a) (M8.int i) + | (OP_AGET_BYTE, [OPR_REGISTER dst; OPR_REGISTER a; OPR_INDEX i]) -> + array_get (M8.int dst) (M8.int a) (M8.int i) + | (OP_AGET_CHAR, [OPR_REGISTER dst; OPR_REGISTER a; OPR_INDEX i]) -> + array_get (M8.dst) (M8.int a) (M8.int i) + | (OP_AGET_SHORT, [OPR_REGISTER dst; OPR_REGISTER a; OPR_INDEX i]) -> + array_get (M8.int dst) (M8.int a) (M8.int i) + | (OP_APUT, [OPR_REGISTER src; OPR_REGISTER a; OPR_INDEX i]) -> + array_put (M8.int a) (M8.int src) (M8.int i) + | (OP_APUT_WIDE, [OPR_REGISTER src; OPR_REGISTER a; OPR_INDEX i]) -> + (* puts an array element from the register pair *) + array_put (M8.int a) (M8.int src) (M8.int i) + | (OP_APUT_OBJECT, [OPR_REGISTER src; OPR_REGISTER a; OPR_INDEX i]) -> + (* FIXME: figure out the size of the object first? *) + array_put (M8.int a) (M8.int src) (M8.int i) + | (OP_APUT_BOOLEAN, [OPR_REGISTER src; OPR_REGISTER a; OPR_INDEX i]) -> + array_put (M8.int a) (M8.int src) (M8.int i) + | (OP_APUT_BYTE, [OPR_REGISTER src; OPR_REGISTER a; OPR_INDEX i]) -> + array_put (M8.int a) (M8.int src) (M8.int i) + | (OP_APUT_CHAR, [OPR_REGISTER src; OPR_REGISTER a; OPR_INDEX i]) -> + array_put (M8.int a) (M8.int src) (M8.int i) + | (OP_APUT_SHORT, [OPR_REGISTER src; OPR_REGISTER a; OPR_INDEX i]) -> + array_put (M8.int a) (M8.int src) (M8.int i) + + (* Semantically these are identical to the array operations *) + | (OP_IGET, [OPR_REGISTER dst; OPR_REGISTER o; OPR_INDEX i]) -> + array_get (M4.int dst) (M4.int o) (M16.int i) + | (OP_IGET_WIDE, [OPR_REGISTER dst; OPR_REGISTER o; OPR_INDEX i]) -> + array_getw (M4.int dst) (M4.int o) (M16.int i) + | (OP_IGET_OBJECT, [OPR_REGISTER dst; OPR_REGISTER o; OPR_INDEX i]) -> + (* FIXME: figure out the size of the object first? *) + array_get (M4.int dst) (M4.int o) (M16.int i) + (* FIXME: what is the ACTUAL semantics of these instructions? *) + | (OP_IGET_BOOLEAN, [OPR_REGISTER dst; OPR_REGISTER o; OPR_INDEX i]) -> + array_get (M4.int dst) (M4.int o) (M16.int i) + | (OP_IGET_BYTE, [OPR_REGISTER dst; OPR_REGISTER o; OPR_INDEX i]) -> + array_get (M4.int dst) (M4.int o) (M16.int i) + | (OP_IGET_CHAR, [OPR_REGISTER dst; OPR_REGISTER o; OPR_INDEX i]) -> + array_get (M4.int dst) (M4.int o) (M16.int i) + | (OP_IGET_SHORT, [OPR_REGISTER dst; OPR_REGISTER o; OPR_INDEX i]) -> + array_get (M4.int dst) (M4.int o) (M16.int i) + | (OP_IPUT, [OPR_REGISTER src; OPR_REGISTER o; OPR_INDEX i]) -> + array_put (M4.int o) (M4.int src) (M16.int i) + | (OP_IPUT_WIDE, [OPR_REGISTER src; OPR_REGISTER o; OPR_INDEX i]) -> + array_putw (M4.int o) (M4.int src) (M16.int i) + | (OP_IPUT_OBJECT, [OPR_REGISTER src; OPR_REGISTER o; OPR_INDEX i]) -> + (* FIXME: figure out the size of the object first? *) + array_put (M4.int o) (M4.int src) (M16.int i) + | (OP_IPUT_BOOLEAN, [OPR_REGISTER src; OPR_REGISTER o; OPR_INDEX i]) -> + array_put (M4.int o) (M4.int src) (M16.int i) + | (OP_IPUT_BYTE, [OPR_REGISTER src; OPR_REGISTER o; OPR_INDEX i]) -> + array_put (M4.int o) (M4.int src) (M16.int i) + | (OP_IPUT_CHAR, [OPR_REGISTER src; OPR_REGISTER o; OPR_INDEX i]) -> + array_put (M4.int o) (M4.int src) (M16.int i) + | (OP_IPUT_SHORT, [OPR_REGISTER src; OPR_REGISTER o; OPR_INDEX i]) -> + array_put (M4.int o) (M4.int src) (M16.int i) + + | (OP_SGET, [OPR_REGISTER dst; OPR_INDEX i]) -> + array_get_static (M4.int dst) (M16.int i) + | (OP_SGET_WIDE, [OPR_REGISTER dst; OPR_INDEX i]) -> + array_get_static (M4.int dst) (M16.int i) + | (OP_SGET_OBJECT, [OPR_REGISTER dst; OPR_INDEX i]) -> + array_get_static (M4.int dst) (M16.int i) + | (OP_SGET_BOOLEAN, [OPR_REGISTER dst; OPR_INDEX i]) -> + array_get_static (M4.int dst) (M16.int i) 1 + | (OP_SGET_BYTE, [OPR_REGISTER x; OPR_INDEX i]) -> + array_get_static (M4.int dst) (M16.int i) 1 + | (OP_SGET_CHAR, [OPR_REGISTER dst; OPR_INDEX i]) -> + array_get_static (M4.int dst) (M16.int i) 1 + | (OP_SGET_SHORT, [OPR_REGISTER dst; OPR_INDEX i]) -> + array_get_static (M4.int dst) (M16.int i) 2 + | (OP_SPUT, [OPR_REGISTER src; OPR_INDEX i]) -> + array_put_static (M4.int src) (M16.int i) + | (OP_SPUT_WIDE, [OPR_REGISTER src; OPR_INDEX i]) -> + array_put_static (M4.int src) (M16.int i) + | (OP_SPUT_OBJECT, [OPR_REGISTER src; OPR_INDEX i]) -> + array_put_static (M4.int src) (M16.int i) + | (OP_SPUT_BOOLEAN, [OPR_REGISTER x; OPR_INDEX i]) -> + array_put_static (M4.int src) (M16.int i) 1 + | (OP_SPUT_BYTE, [OPR_REGISTER src; OPR_INDEX i]) -> + array_put_static (M4.int src) (M16.int i) 1 + | (OP_SPUT_CHAR, [OPR_REGISTER src; OPR_INDEX i]) -> + array_put_static (M4.int src) (M16.int i) 1 + | (OP_SPUT_SHORT, [OPR_REGISTER x; OPR_INDEX i]) -> + array_put_static (M4.int src) (M16.int i) 2 + + (* regs here is just a list of registers, amount is variadic *) + | (OP_INVOKE_VIRTUAL, [OPR_CONST c; OPR_INDEX i; regs]) -> + (* Call jump here? *) + | (OP_INVOKE_SUPER, [OPR_CONST c; OPR_INDEX i; regs]) -> + | (OP_INVOKE_DIRECT, [OPR_CONST c; OPR_INDEX i; regs]) -> + | (OP_INVOKE_STATIC, [OPR_CONST c; OPR_INDEX i; regs]) -> + | (OP_INVOKE_INTERFACE, [OPR_CONST c; OPR_INDEX i; regs]) -> + + | (OP_INVOKE_VIRTUAL_RANGE, [OPR_CONST c; OPR_INDEX i; OPR_REGISTER x]) -> + | (OP_INVOKE_SUPER_RANGE, [OPR_CONST c; OPR_INDEX i; OPR_REGISTER x]) -> + | (OP_INVOKE_DIRECT_RANGE, [OPR_CONST c; OPR_INDEX i; OPR_REGISTER x]) -> + | (OP_INVOKE_STATIC_RANGE, [OPR_CONST c; OPR_INDEX i; OPR_REGISTER x]) -> + | (OP_INVOKE_INTERFACE_RANGE, [OPR_CONST c; OPR_INDEX i; OPR_REGISTER x]) -> + + | (OP_NEG_INT, [OPR_REGISTER x; OPR_REGISTER y]) -> + apply neg x y + | (OP_NOT_INT, [OPR_REGISTER x; OPR_REGISTER y]) -> + apply not x y + | (OP_NEG_LONG, [OPR_REGISTER x; OPR_REGISTER y]) -> + apply neg x y + | (OP_NOT_LONG, [OPR_REGISTER x; OPR_REGISTER y]) -> + apply not x y + | (OP_NEG_FLOAT, [OPR_REGISTER x; OPR_REGISTER y]) -> + failwith "floats are unimplemented" + | (OP_NEG_DOUBLE, [OPR_REGISTER x; OPR_REGISTER y]) -> + failwith "floats are unimplemented" + | (OP_INT_TO_LONG, [OPR_REGISTER x; OPR_REGISTER y]) + | (OP_INT_TO_FLOAT, [OPR_REGISTER x; OPR_REGISTER y]) + | (OP_INT_TO_DOUBLE, [OPR_REGISTER x; OPR_REGISTER y]) + | (OP_LONG_TO_INT, [OPR_REGISTER x; OPR_REGISTER y]) + | (OP_LONG_TO_FLOAT, [OPR_REGISTER x; OPR_REGISTER y]) + | (OP_LONG_TO_DOUBLE, [OPR_REGISTER x; OPR_REGISTER y]) + | (OP_FLOAT_TO_INT, [OPR_REGISTER x; OPR_REGISTER y]) + | (OP_FLOAT_TO_LONG, [OPR_REGISTER x; OPR_REGISTER y]) + | (OP_FLOAT_TO_DOUBLE, [OPR_REGISTER x; OPR_REGISTER y]) + | (OP_DOUBLE_TO_INT, [OPR_REGISTER x; OPR_REGISTER y]) + | (OP_DOUBLE_TO_LONG, [OPR_REGISTER x; OPR_REGISTER y]) + | (OP_DOUBLE_TO_FLOAT, [OPR_REGISTER x; OPR_REGISTER y]) + | (OP_INT_TO_BYTE, [OPR_REGISTER x; OPR_REGISTER y]) + | (OP_INT_TO_CHAR, [OPR_REGISTER x; OPR_REGISTER y]) + | (OP_INT_TO_SHORT, [OPR_REGISTER x; OPR_REGISTER y]) -> + failwith "unimplemented" + + | (OP_ADD_INT, [OPR_REGISTER dest; OPR_REGISTER x; OPR_REGISTER y]) -> + apply2 add dest x y + | (OP_SUB_INT, [OPR_REGISTER dest; OPR_REGISTER x; OPR_REGISTER y]) -> + apply2 sub dest x y + | (OP_MUL_INT, [OPR_REGISTER dest; OPR_REGISTER x; OPR_REGISTER y]) -> + apply2 mul dest x y + | (OP_DIV_INT, [OPR_REGISTER dest; OPR_REGISTER x; OPR_REGISTER y]) -> + apply2 div dest x y + | (OP_REM_INT, [OPR_REGISTER dest; OPR_REGISTER x; OPR_REGISTER y]) -> + (* is this "remnant"? *) + apply2 modulo dest x y + | (OP_AND_INT, [OPR_REGISTER dest; OPR_REGISTER x; OPR_REGISTER y]) -> + apply2 logand dest x y + | (OP_OR_INT, [OPR_REGISTER dest; OPR_REGISTER x; OPR_REGISTER y]) -> + apply2 logor dest x y + | (OP_XOR_INT, [OPR_REGISTER dest; OPR_REGISTER x; OPR_REGISTER y]) -> + apply2 logxor dest x y + | (OP_SHL_INT, [OPR_REGISTER dest; OPR_REGISTER x; OPR_REGISTER y]) -> + apply2 (shiftl true) dest x y + | (OP_SHR_INT, [OPR_REGISTER dest; OPR_REGISTER x; OPR_REGISTER y]) -> + apply2 (shiftr true) dest x y + | (OP_USHR_INT, [OPR_REGISTER dest; OPR_REGISTER x; OPR_REGISTER y]) -> + apply2 (shiftr true) dest x y + + | (OP_ADD_LONG, [OPR_REGISTER dest; OPR_REGISTER x; OPR_REGISTER y]) -> + apply2 add dest x y + | (OP_SUB_LONG, [OPR_REGISTER dest; OPR_REGISTER x; OPR_REGISTER y]) -> + apply2 sub dest x y + | (OP_MUL_LONG, [OPR_REGISTER dest; OPR_REGISTER x; OPR_REGISTER y]) -> + apply2 mul dest x y + | (OP_DIV_LONG, [OPR_REGISTER dest; OPR_REGISTER x; OPR_REGISTER y]) -> + apply2 div dest x y + | (OP_REM_LONG, [OPR_REGISTER dest; OPR_REGISTER x; OPR_REGISTER y]) -> + (* is this "remnant"? *) + apply2 modulo dest x y + | (OP_AND_LONG, [OPR_REGISTER dest; OPR_REGISTER x; OPR_REGISTER y]) -> + apply2 logand dest x y + | (OP_OR_LONG, [OPR_REGISTER dest; OPR_REGISTER x; OPR_REGISTER y]) -> + apply2 logor dest x y + | (OP_XOR_LONG, [OPR_REGISTER dest; OPR_REGISTER x; OPR_REGISTER y]) -> + apply2 logxor dest x y + | (OP_SHL_LONG, [OPR_REGISTER dest; OPR_REGISTER x; OPR_REGISTER y]) -> + apply2 (shiftl true) dest x y + | (OP_SHR_LONG, [OPR_REGISTER dest; OPR_REGISTER x; OPR_REGISTER y]) -> + apply2 (shiftr true) dest x y + | (OP_USHR_LONG, [OPR_REGISTER dest; OPR_REGISTER x; OPR_REGISTER y]) -> + apply2 (shiftr true) dest x y + + | (OP_ADD_FLOAT, [OPR_REGISTER dest; OPR_REGISTER x; OPR_REGISTER y]) -> + fapply2 fadd dest x y + | (OP_SUB_FLOAT, [OPR_REGISTER dest; OPR_REGISTER x; OPR_REGISTER y]) -> + fapply2 fsub dest x y + | (OP_MUL_FLOAT, [OPR_REGISTER dest; OPR_REGISTER x; OPR_REGISTER y]) -> + fapply2 fmul dest x y + | (OP_DIV_FLOAT, [OPR_REGISTER dest; OPR_REGISTER x; OPR_REGISTER y]) -> + fapply2 fdiv dest x y + | (OP_REM_FLOAT, [OPR_REGISTER dest; OPR_REGISTER x; OPR_REGISTER y]) -> + fapply2 fmodulo dest x y + | (OP_ADD_DOUBLE, [OPR_REGISTER dest; OPR_REGISTER x; OPR_REGISTER y]) -> + fapply2 fadd dest x y + | (OP_SUB_DOUBLE, [OPR_REGISTER dest; OPR_REGISTER x; OPR_REGISTER y]) -> + fapply2 fsub dest x y + | (OP_MUL_DOUBLE, [OPR_REGISTER dest; OPR_REGISTER x; OPR_REGISTER y]) -> + fapply2 fmul dest x y + | (OP_DIV_DOUBLE, [OPR_REGISTER dest; OPR_REGISTER x; OPR_REGISTER y]) -> + fapply2 fdiv dest x y + | (OP_REM_DOUBLE, [OPR_REGISTER dest; OPR_REGISTER x; OPR_REGISTER y]) -> + fapply2 fmodulo dest x y + + | (OP_ADD_INT_2ADDR, [OPR_REGISTER x; OPR_REGISTER y]) -> + apply2 add x x y + | (OP_SUB_INT_2ADDR, [OPR_REGISTER x; OPR_REGISTER y]) -> + apply2 sub x x y + | (OP_MUL_INT_2ADDR, [OPR_REGISTER x; OPR_REGISTER y]) -> + apply2 mul x x y + | (OP_DIV_INT_2ADDR, [OPR_REGISTER x; OPR_REGISTER y]) -> + apply2 div x x y + | (OP_REM_INT_2ADDR, [OPR_REGISTER x; OPR_REGISTER y]) -> + apply2 modulo x x y + | (OP_AND_INT_2ADDR, [OPR_REGISTER x; OPR_REGISTER y]) -> + apply2 logand x x y + | (OP_OR_INT_2ADDR, [OPR_REGISTER x; OPR_REGISTER y]) -> + apply2 logor x x y + | (OP_XOR_INT_2ADDR, [OPR_REGISTER x; OPR_REGISTER y]) -> + apply2 logxor x x y + | (OP_SHL_INT_2ADDR, [OPR_REGISTER x; OPR_REGISTER y]) -> + apply2 (shiftl true) x x y + | (OP_SHR_INT_2ADDR, [OPR_REGISTER x; OPR_REGISTER y]) -> + apply2 (shiftr true) x x y + | (OP_USHR_INT_2ADDR, [OPR_REGISTER x; OPR_REGISTER y]) -> + apply2 (shiftr true) x x y + | (OP_ADD_LONG_2ADDR, [OPR_REGISTER x; OPR_REGISTER y]) -> + apply2 add x x y + | (OP_SUB_LONG_2ADDR, [OPR_REGISTER x; OPR_REGISTER y]) -> + apply2 sub x x y + | (OP_MUL_LONG_2ADDR, [OPR_REGISTER x; OPR_REGISTER y]) -> + apply2 mul x x y + | (OP_DIV_LONG_2ADDR, [OPR_REGISTER x; OPR_REGISTER y]) -> + apply2 div x x y + | (OP_REM_LONG_2ADDR, [OPR_REGISTER x; OPR_REGISTER y]) -> + apply2 modulo x x y + | (OP_AND_LONG_2ADDR, [OPR_REGISTER x; OPR_REGISTER y]) -> + apply2 logand x x y + | (OP_OR_LONG_2ADDR, [OPR_REGISTER x; OPR_REGISTER y]) -> + apply2 logor x x y + | (OP_XOR_LONG_2ADDR, [OPR_REGISTER x; OPR_REGISTER y]) -> + apply2 logxor x x y + | (OP_SHL_LONG_2ADDR, [OPR_REGISTER x; OPR_REGISTER y]) -> + apply2 (shiftl true) x x y + | (OP_SHR_LONG_2ADDR, [OPR_REGISTER x; OPR_REGISTER y]) -> + apply2 (shiftr true) x x y + | (OP_USHR_LONG_2ADDR, [OPR_REGISTER x; OPR_REGISTER y]) -> + apply2 (shiftr true) x x y + + | (OP_ADD_FLOAT_2ADDR, [OPR_REGISTER x; OPR_REGISTER y]) -> + fapply2 fadd x x y + | (OP_SUB_FLOAT_2ADDR, [OPR_REGISTER x; OPR_REGISTER y]) -> + fapply2 fsub x x y + | (OP_MUL_FLOAT_2ADDR, [OPR_REGISTER x; OPR_REGISTER y]) -> + fapply2 fmul x x y + | (OP_DIV_FLOAT_2ADDR, [OPR_REGISTER x; OPR_REGISTER y]) -> + fapply2 fdiv x x y + | (OP_REM_FLOAT_2ADDR, [OPR_REGISTER x; OPR_REGISTER y]) -> + fapply2 fmodulo x x y + | (OP_ADD_DOUBLE_2ADDR, [OPR_REGISTER x; OPR_REGISTER y]) -> + fapply2 fadd x x y + | (OP_SUB_DOUBLE_2ADDR, [OPR_REGISTER x; OPR_REGISTER y]) -> + fapply2 fsub x x y + | (OP_MUL_DOUBLE_2ADDR, [OPR_REGISTER x; OPR_REGISTER y]) -> + fapply2 fmul x x y + | (OP_DIV_DOUBLE_2ADDR, [OPR_REGISTER x; OPR_REGISTER y]) -> + fapply2 fdiv x x y + | (OP_REM_DOUBLE_2ADDR, [OPR_REGISTER x; OPR_REGISTER y]) -> + fapply2 fmodulo x x y + + | (OP_ADD_INT_LIT16, [OPR_REGISTER dest; OPR_REGISTER x; OPR_CONST c]) -> + apply2c add dest x c + | (OP_RSUB_INT, [OPR_REGISTER dest; OPR_REGISTER x; OPR_CONST c]) -> + apply2c sub dest x c (* FIXME: reverse it! *) + | (OP_MUL_INT_LIT16, [OPR_REGISTER dest; OPR_REGISTER x; OPR_CONST c]) -> + apply2c mul dest x c + | (OP_DIV_INT_LIT16, [OPR_REGISTER dest; OPR_REGISTER x; OPR_CONST c]) -> + apply2c mul dest x c + | (OP_REM_INT_LIT16, [OPR_REGISTER dest; OPR_REGISTER x; OPR_CONST c]) -> + apply2c modulo dest x c + | (OP_AND_INT_LIT16, [OPR_REGISTER dest; OPR_REGISTER x; OPR_CONST c]) -> + apply2c modulo dest x c + | (OP_OR_INT_LIT16, [OPR_REGISTER dest; OPR_REGISTER x; OPR_CONST c]) -> + apply2c logor dest x c + | (OP_XOR_INT_LIT16, [OPR_REGISTER dest; OPR_REGISTER x; OPR_CONST c]) -> + apply2c logxor dest x c + | (OP_ADD_INT_LIT8, [OPR_REGISTER dest; OPR_REGISTER x; OPR_CONST c]) -> + apply2c add dest x c + | (OP_RSUB_INT_LIT8, [OPR_REGISTER dest; OPR_REGISTER x; OPR_CONST c]) -> + apply2c sub dest x c (* FIXME: reverse it! *) + | (OP_MUL_INT_LIT8, [OPR_REGISTER dest; OPR_REGISTER x; OPR_CONST c]) -> + apply2c mul dest x c + | (OP_DIV_INT_LIT8, [OPR_REGISTER dest; OPR_REGISTER x; OPR_CONST c]) -> + apply2c div dest x c + | (OP_REM_INT_LIT8, [OPR_REGISTER dest; OPR_REGISTER x; OPR_CONST c]) -> + apply2c modulo dest x c + | (OP_AND_INT_LIT8, [OPR_REGISTER dest; OPR_REGISTER x; OPR_CONST c]) -> + apply2c logand dest x c + | (OP_OR_INT_LIT8, [OPR_REGISTER dest; OPR_REGISTER x; OPR_CONST c]) -> + apply2c logor dest x c + | (OP_XOR_INT_LIT8, [OPR_REGISTER dest; OPR_REGISTER x; OPR_CONST c]) -> + apply2c logxor dest x c + | (OP_SHL_INT_LIT8, [OPR_REGISTER dest; OPR_REGISTER x; OPR_CONST c]) -> + apply2c (shiftl true) dest x c + | (OP_SHR_INT_LIT8, [OPR_REGISTER dest; OPR_REGISTER x; OPR_CONST c]) -> + apply2c (shiftr true) dest x c + | (OP_USHR_INT_LIT8, [OPR_REGISTER dest; OPR_REGISTER x; OPR_CONST c]) -> + apply2c (shiftr true) dest x c + (* optimizer output -- these are never generated by "dx" *) + + | (OP_THROW_VERIFICATION_ERROR, _) + | (OP_EXECUTE_INLINE, _) + | (OP_EXECUTE_INLINE_RANGE, _) + | (OP_INVOKE_DIRECT_EMPTY, _) + | (OP_IGET_QUICK, _) + | (OP_IGET_WIDE_QUICK, _) + | (OP_IGET_OBJECT_QUICK, _) + | (OP_IPUT_QUICK, _) + | (OP_IPUT_WIDE_QUICK, _) + | (OP_IPUT_OBJECT_QUICK, _) + + | (OP_INVOKE_VIRTUAL_QUICK, _) + | (OP_INVOKE_VIRTUAL_QUICK_RANGE, _) + | (OP_INVOKE_SUPER_QUICK, _) + | (OP_INVOKE_SUPER_QUICK_RANGE, _) -> + nop + + | _ -> failwith "not ready" + +end + +module Lifter = Dalvik(Theory.Manager) + +let lift opcode = + KB.Object.create Theory.Program.cls >>= fun insn -> + Lifter.run opcode >>= fun sema -> + KB.provide Theory.Program.Semantics.slot insn sema >>| fun () -> + insn + +let test opcode = + match KB.run Theory.Program.cls (lift opcode) KB.empty with + | Error _ -> failwith "Oops, we've got a conflict!" + | Ok (code,_) -> + Format.printf "%a@\n" KB.Value.pp code diff --git a/plugins/dalvik/dalvik_parse.ml b/plugins/dalvik/dalvik_parse.ml new file mode 100644 index 0000000000..ef412801f5 --- /dev/null +++ b/plugins/dalvik/dalvik_parse.ml @@ -0,0 +1,897 @@ +(* + * Copyright (c) 2010-2014, + * Jinseong Jeon + * Kris Micinski + * Jeff Foster + * All rights reserved. + * + * Based on the src/parse.ml from https://github.com/plum-umd/redexer + *) + +(***********************************************************************) +(* Parse *) +(* reference: dalvik/libdex/Leb128.c, *) +(* dalvik/dx/rop/cst/CstUtf8.java *) +(***********************************************************************) + +module DA = DynArray + +module Instr = Dalvik_disasm +module Dex = Dalvik_dex +module Java = Dalvik_java + +module IM = Map.Make(Int32) +module IS = Set.Make(Int32) + +(* TODO: use BAP logging instead? *) + +(***********************************************************************) +(* Utilities/Reading *) +(***********************************************************************) + +let to_i32 x = Int32.of_int x +let to_i64 x = Int64.of_int x + +(* reading 8 bit *) +let read8 (chan: in_channel) : int = + int_of_char (input_char chan) + +(* reading 8 bit - signed *) +let read8s (chan: in_channel) : int = + let x = read8 chan in + if (x land 0x80) <> 0 then x - 0x100 else x + +(* reading 16 bit *) +let read16 (chan: in_channel) : int = + let l = read8 chan in + let h = read8 chan in + (h lsl 8) lor l + +(* reading 32 bit *) +let read32 (chan: in_channel) : int = + let l = read16 chan in + let h = read16 chan in + Int32.to_int (to_i32 ((h lsl 16) lor l)) + +(* reading 32 bit - unsign *) +let read32u (chan: in_channel) : int64 = + let l = to_i64 (read16 chan) in + let h = to_i64 (read16 chan) in + Int64.logor (Int64.shift_left h 16) l + +(* reading 64 bit *) +let read64 (chan: in_channel) : int64 = + let l = read32u chan in + let h = read32u chan in + Int64.logor (Int64.shift_left h 32) l + +let until (sz: int) : int list = + let rec gen i j : int list = + if i = j then [i] else i :: (gen (i+1) j) + in + gen 0 sz + +(* reading unsigned LEB128 *) +let read_uleb128 (chan: in_channel) : int = + let extender (res, cur) i = + if cur > 0x7f then + let cur' = read8 chan in + let res' = res lor ((cur' land 0x7f) lsl (i * 7)) in + (res', cur') + else (res, cur) + in + fst (List.fold_left extender (0, 0x80) (until 4)) + +(* reading unsigned LEB128 - 1 *) +let read_uleb128p1 (chan: in_channel) : int = + (read_uleb128 chan) - 1 + +(* reading signed LEB128 *) +let read_sleb128 (chan: in_channel) : int = + let extender (res, cur, pos) i = + if cur > 0x7f then + let cur' = read8 chan in + let res' = res lor ((cur' land 0x7f) lsl (i * 7)) in + (res', cur', i * 7) + else (res, cur, pos) + in + let ret, _, pos = List.fold_left extender (0, 0x80, 0) (until 4) in + if ((0x1 lsl (pos + 6)) land ret) <> 0 then + ret - (0x1 lsl (pos + 7)) + else ret + +(* reading variable-length number *) +let read_VLN (chan: in_channel) (sz: int) (signed: bool) : int64 = + let extender (res, last) i = + let cur = read8 chan in + Int64.logor res (Int64.shift_left (to_i64 cur) (i * 8)), cur + in + let ret, b = List.fold_left extender (Int64.zero, 0) (until (sz - 1)) in + if signed && (b land 0x80) <> 0 && sz < 8 then + let fill_0xff res i = + Int64.logor res (Int64.shift_left (Int64.of_int (0xff)) ((i+sz)*8)) + in + List.fold_left fill_0xff ret (until (8 - sz - 1)) + else ret + +(* reading variable-length floating-point number *) +let read_fVLN (chan: in_channel) (sz: int) : int64 = + let extender res i = + let cur = read8 chan in + Int64.logor res (Int64.shift_left (to_i64 cur) (i * 8)) + in + let ret = List.fold_left extender Int64.zero (until (sz - 1)) in + Int64.shift_left ret ((8 - sz) * 8) + +(* reading 16 bit index *) +let read_idx16 (chan: in_channel) : Dex.link = + Dex.to_idx (read16 chan) + +(* reading 32 bit index *) +let read_idx32 (chan: in_channel) : Dex.link = + let i = read32u chan in + if Int64.compare i (Int64.of_string "0xffffffff") = 0 + then Dex.no_idx + else Dex.to_idx (Int64.to_int i) + +(* reading offset *) +let read_off (chan: in_channel) : Dex.link = + let i = read32u chan in + Dex.to_off (Int64.to_int i) + +(* byte alignment *) +let alignment (chan: in_channel) (n: int) : int = + let pos = pos_in chan in + let r = pos mod n in + if r <> 0 then List.iter (fun _ -> ignore (read8 chan)) (until (r - 1)); + r + +(***********************************************************************) +(* Validation *) +(***********************************************************************) + +(* validating channel position *) +let is_validate (chan: in_channel) (s: Dex.section) (msg: string) : unit = + let cur = pos_in chan in + match s.Dex.offset with + | Dex.Off off -> + if cur <> (Int32.to_int off) && s.Dex.size > 0 then raise (Dex.Wrong_dex msg) + | _ -> raise (Dex.Wrong_dex "is_validate") + +(* validating type strings *) +let validate_type (dx: Dex.dex) : unit = + let iter_type (id: Dex.link) : unit = + let str = Dex.get_str dx id in + if not (Java.is_type_descr str) then + Log.w ("not type descryption: "^str) + and iter_proto (it: Dex.proto_id_item) : unit = + let str = Dex.get_str dx it.Dex.shorty in + if not (Java.is_shorty_descr str) then + Log.w ("not shorty descryption: "^str) + in + DA.iter iter_type dx.Dex.d_type_ids; + DA.iter iter_proto dx.Dex.d_proto_ids + +(***********************************************************************) +(* Parsing *) +(***********************************************************************) + +(* return int type offset if it is non-zero *) +let get_off (offset: Dex.link) : int32 * int option = + match offset with + | Dex.Off off -> + let ioff = Int32.to_int off in + if ioff = Dex.no_offset then off, None else off, Some ioff + | _ -> raise (Dex.Wrong_dex "get_off") + +(* making list of any item, e.g. all link structures *) +let rec parse_list ch (job: in_channel -> 'a) (length: int) : 'a list = + if length = 0 then [] else + let elt = job ch in elt :: (parse_list ch job (length-1)) + +(* reading encoded value *) +let rec read_encoded_value (chan: in_channel) : Dex.encoded_value = + let byte = read8 chan in + let v_arg = byte lsr 5 and v_ty = byte land 0x1f in + match v_ty with + | 0x00 -> + let v = read8s chan in Dex.VALUE_BYTE (to_i64 v) + | 0x02 -> + let v = read_VLN chan (v_arg+1) true in Dex.VALUE_SHORT v + | 0x03 -> + let v = read_VLN chan (v_arg+1) false in Dex.VALUE_CHAR v + | 0x04 -> + let v = read_VLN chan (v_arg+1) true in Dex.VALUE_INT v + | 0x06 -> + let v = read_VLN chan (v_arg+1) true in Dex.VALUE_LONG v + | 0x10 -> + let v = read_fVLN chan (v_arg+1) in + Dex.VALUE_FLOAT (Int64.shift_right_logical v 32) + | 0x11 -> + let v = read_fVLN chan (v_arg+1) in Dex.VALUE_DOUBLE v + | 0x17 | 0x18 | 0x19 | 0x1a | 0x1b -> + ( + let id = Int64.to_int (read_VLN chan (v_arg+1) false) in + match v_ty with + | 0x17 -> Dex.VALUE_STRING id + | 0x18 -> Dex.VALUE_TYPE id + | 0x19 -> Dex.VALUE_FIELD id + | 0x1a -> Dex.VALUE_METHOD id + | 0x1b -> Dex.VALUE_ENUM id + | _ -> raise (Dex.Wrong_match "read_encoded_value: 0x17 ~ 0x1b") + ) + | 0x1c -> + let sz = read_uleb128 chan in + Dex.VALUE_ARRAY (parse_list chan read_encoded_value sz) + | 0x1d -> Dex.VALUE_ANNOTATION (parse_encoded_ann chan) + | 0x1e -> Dex.VALUE_NULL + | 0x1f -> Dex.VALUE_BOOLEAN (v_arg > 0) + | _ -> raise (Dex.Wrong_dex ("illegal ev type :"^(string_of_int v_ty))) + +and parse_encoded_ann (ch: in_channel) : Dex.encoded_annotation = + let id = read_uleb128 ch + and sz = read_uleb128 ch in + let parse_elts ch' = + let nm = read_uleb128 ch' + and v = read_encoded_value ch' in + { + Dex.name_idx = Dex.to_idx nm; + Dex.value = v; + } + in + let elts = parse_list ch parse_elts sz in + { + Dex.an_type_idx = Dex.to_idx id; + Dex.elements = elts; + } + +(* from length of string to unicode encoded string *) +let build_string ch len : UTF8.t = + let buf = UTF8.Buf.create len in + let add_buf i = + UTF8.Buf.add_char buf (UChar.uchar_of_int i) + in + for i = 0 to (len - 1) do + let v0 = read8 ch in + let v4 = v0 lsr 4 in + if 0x00 <= v4 && v4 <= 0x07 then (* single-byte encoding *) + add_buf v0 + else if 0x0c <= v4 && v4 <= 0x0d then (* two-byte encoding *) + ( + let v1 = read8 ch in + if v1 land 0xc0 <> 0x80 then raise (Dex.Wrong_dex "bad MUTF-8, 2bytes-v1"); + let v = ((v0 land 0x1f) lsl 6) lor (v1 land 0x3f) in + if v <> 0 && v < 0x80 then raise (Dex.Wrong_dex "bad MUTF-8, 2bytes-v"); + add_buf v + ) + else if v4 = 0x0e then (* three-byte encoding *) + ( + let v1 = read8 ch in + if v1 land 0xc0 <> 0x80 then raise (Dex.Wrong_dex "bad MUTF-8, 3bytes-v1"); + let v2 = read8 ch in + if v2 land 0xc0 <> 0x80 then raise (Dex.Wrong_dex "bad MUTF-8, 3bytes-v2"); + let v = ((v0 land 0x0f) lsl 12) lor + (((v1 land 0x3f) lsl 6) lor (v2 land 0x3f)) in + if v < 0x800 then raise (Dex.Wrong_dex "bad MUTF-8, 3bytes-v"); + add_buf v + ) + else raise (Dex.Wrong_dex "bad MUTF-8, only 1-3 bytes") + done; + UTF8.Buf.contents buf + +(* parse : in_channel -> Dex.dex *) +let rec parse (ch: in_channel) : Dex.dex = + let header = parse_header ch + in + is_validate ch header.Dex.h_string_ids "string_ids"; + let string_ids = parse_list ch read_off header.Dex.h_string_ids.Dex.size + in + is_validate ch header.Dex.h_type_ids "type_ids"; + let type_ids = parse_list ch read_idx32 header.Dex.h_type_ids.Dex.size + in + is_validate ch header.Dex.h_proto_ids "proto_ids"; + let proto_ids = + let get_proto = fun ch -> + let sh = read_idx32 ch + and rt = read_idx32 ch + and pa = read_off ch in + { Dex.shorty = sh; Dex.return_type = rt; Dex.parameter_off = pa; } in + parse_list ch get_proto header.Dex.h_proto_ids.Dex.size + in + is_validate ch header.Dex.h_field_ids "field_ids"; + let field_ids = + let get_field = fun ch -> + let cid = read_idx16 ch + and fid = read_idx16 ch + and nid = read_idx32 ch in + { Dex.f_class_id = cid; Dex.f_type_id = fid; Dex.f_name_id = nid; } in + parse_list ch get_field header.Dex.h_field_ids.Dex.size + in + is_validate ch header.Dex.h_method_ids "method_ids"; + let method_ids = + let get_method = fun ch -> + let cid = read_idx16 ch + and pid = read_idx16 ch + and nid = read_idx32 ch in + { Dex.m_class_id = cid; Dex.m_proto_id = pid; Dex.m_name_id = nid; } in + parse_list ch get_method header.Dex.h_method_ids.Dex.size + in + is_validate ch header.Dex.h_class_defs "class_defs"; + let class_defs = + let get_class = fun ch -> + let cid = read_idx32 ch + and acs = read32 ch + and sup = read_idx32 ch + and ifo = read_off ch + and src = read_idx32 ch + and ann = read_off ch + and cdo = read_off ch + and stt = read_off ch in + { + Dex.c_class_id = cid; + Dex.c_access_flag = acs; + Dex.superclass = sup; + Dex.interfaces = ifo; + Dex.source_file = src; + Dex.annotations = ann; + Dex.class_data = cdo; + Dex.static_values = stt; + } in + parse_list ch get_class header.Dex.h_class_defs.Dex.size + in + let data = parse_data ch string_ids proto_ids class_defs in + let map = parse_map ch header.Dex.map_off data in + let dex = { + Dex.header = header; + Dex.d_string_ids = DA.of_list string_ids; + Dex.d_type_ids = DA.of_list type_ids; + Dex.d_proto_ids = DA.of_list proto_ids; + Dex.d_field_ids = DA.of_list field_ids; + Dex.d_method_ids = DA.of_list method_ids; + Dex.d_class_defs = DA.of_list class_defs; + Dex.d_data = map; + } in + validate_type dex; + dex + +(* parsing header *) +and parse_header (ch: in_channel) : Dex.dex_header = + let mg = build_string ch 8 + and ck = read32u ch + and si = parse_list ch (fun ch' -> char_of_int (read8 ch')) 20 + and fs = read32 ch + and hs = read32 ch + and ed = match read32 ch with + | 0x12345678 -> Dex.LITTLE + | 0x78563412 -> Dex.BIG + | _ -> raise (Dex.Wrong_dex "endian_tag") + and lk = parse_section ch + and mo = read_off ch + and st = parse_section ch + and ty = parse_section ch + and pt = parse_section ch + and fd = parse_section ch + and md = parse_section ch + and cl = parse_section ch + and dt = parse_section ch + in + { + Dex.magic = mg; + Dex.checksum = ck; + Dex.signature = si; + Dex.file_size = fs; + Dex.header_size = hs; + Dex.endian_tag = ed; + Dex.link = lk; + Dex.map_off = mo; + Dex.h_string_ids = st; + Dex.h_type_ids = ty; + Dex.h_proto_ids = pt; + Dex.h_field_ids = fd; + Dex.h_method_ids = md; + Dex.h_class_defs = cl; + Dex.h_data = dt; + } + +and parse_section (ch: in_channel) : Dex.section = + let size = read32 ch + and off = read_off ch in + { Dex.size = size; Dex.offset = off; } + +(* parsing data pool *) +and parse_data ch string_ids proto_ids class_defs : Dex.data_item IM.t = + let str_map = parse_string ch string_ids IM.empty in + let prt_map = parse_proto ch proto_ids str_map in + let cls_map = parse_class ch class_defs prt_map in cls_map + +and parse_string ch string_ids map : Dex.data_item IM.t = + let parse_str_item acc l : Dex.data_item IM.t = + match get_off l with + | off, Some ioff -> + ( + seek_in ch ioff; + let sz = read_uleb128 ch in + let str = build_string ch sz in + ignore (read8 ch); (* null termination *) + IM.add off (Dex.STRING_DATA str) acc + ) + | _, _ -> acc + in + List.fold_left parse_str_item map string_ids + +and parse_type_list ch offset map : Dex.data_item IM.t = + match get_off offset with + | off, Some ioff -> + ( + seek_in ch ioff; + let sz = read32 ch in + let type_idx = parse_list ch read_idx16 sz in + ignore (alignment ch 4); + IM.add off (Dex.TYPE_LIST type_idx) map + ) + | _, _ -> map + +and parse_proto ch proto_ids map : Dex.data_item IM.t = + let each_proto acc (p: Dex.proto_id_item) : Dex.data_item IM.t = + parse_type_list ch p.Dex.parameter_off acc + in + List.fold_left each_proto map proto_ids + +(* parsing class *) +and parse_class ch class_defs map : Dex.data_item IM.t = + let each_cdefs acc (def: Dex.class_def_item) : Dex.data_item IM.t = + let itf_map = parse_type_list ch def.Dex.interfaces acc in + let ann_map = parse_annotations ch def.Dex.annotations itf_map in + let cld_map = parse_class_data ch def.Dex.class_data ann_map in + let stt_map = parse_static_val ch def.Dex.static_values cld_map in + stt_map + in + List.fold_left each_cdefs map class_defs + +and parse_annotations ch offset map : Dex.data_item IM.t = + match get_off offset with + | off, Some ioff -> + ( + seek_in ch ioff; + let cls = read_off ch + and fsz = read32 ch + and msz = read32 ch + and psz = read32 ch + and parse_anno_off ch' : Dex.anno_off = + let idx = read_idx32 ch' + and aff = read_off ch' in + { + Dex.target = idx; + Dex.annotation_off = aff; + } + in + let fds = parse_list ch parse_anno_off fsz + and mds = parse_list ch parse_anno_off msz + and pam = parse_list ch parse_anno_off psz in + let ann = + { + Dex.class_anno_off = cls; + Dex.fields = fds; + Dex.methods = mds; + Dex.parameters = pam; + } in + let ann_map = IM.add off (Dex.ANNO_DIR ann) map in + let cls_map = parse_anno_set ch cls ann_map in + let f_set = fun acc f -> parse_anno_set ch f.Dex.annotation_off acc + and f_ref = fun acc f -> parse_anno_ref ch f.Dex.annotation_off acc in + let fds_map = List.fold_left f_set cls_map fds in + let mds_map = List.fold_left f_set fds_map mds in + List.fold_left f_ref mds_map pam + ) + | _, _ -> map + +and parse_anno_ref ch offset map : Dex.data_item IM.t = + match get_off offset with + | off, Some ioff -> + ( + seek_in ch ioff; + let sz = read32 ch in + let lst = parse_list ch read_off sz in + let ref_map = IM.add off (Dex.ANNO_SET_REF lst) map in + List.fold_left (fun acc l -> parse_anno_set ch l acc) ref_map lst + ) + | _, _ -> map + +and parse_anno_set ch offset map : Dex.data_item IM.t = + match get_off offset with + | off, Some ioff -> + ( + seek_in ch ioff; + let sz = read32 ch in + let lst = parse_list ch read_off sz in + let set_map = IM.add off (Dex.ANNO_SET lst) map in + List.fold_left (fun acc l -> parse_anno_item ch l acc) set_map lst + ) + | _, _ -> map + +and parse_anno_item ch offset map : Dex.data_item IM.t = + match get_off offset with + | off, Some ioff -> + ( + seek_in ch ioff; + let ub = read8 ch in + let vis = match ub with + | 0x00 -> Dex.VISIBILITY_BUILD + | 0x01 -> Dex.VISIBILITY_RUNTIME + | 0x02 -> Dex.VISIBILITY_SYSTEM + | _ -> raise (Dex.Wrong_dex ("visibility: "^(string_of_int ub))) + in + let ann = parse_encoded_ann ch in + let elt = + { + Dex.visible = vis; + Dex.annotation = ann; + } + in IM.add off (Dex.ANNOTATION elt) map + ) + | _, _ -> map + +and parse_class_data ch offset map : Dex.data_item IM.t = + match get_off offset with + | off, Some ioff -> + ( + seek_in ch ioff; + let s_fd_sz = read_uleb128 ch + and i_fd_sz = read_uleb128 ch + and d_md_sz = read_uleb128 ch + and v_md_sz = read_uleb128 ch in + let prev = ref 0 in + let parse_encoded_field ch' : Dex.encoded_field = + let id_dif = read_uleb128 ch' + and ac_flg = read_uleb128 ch' in + prev := (!prev + id_dif); + { + Dex.field_idx = Dex.to_idx !prev; + Dex.f_access_flag = ac_flg; + } + and parse_encoded_method ch' : Dex.encoded_method = + let id_dif = read_uleb128 ch' + and ac_flg = read_uleb128 ch' + and cd_off = Dex.to_off (read_uleb128 ch') in + prev := (!prev + id_dif); + { + Dex.method_idx = Dex.to_idx !prev; + Dex.m_access_flag = ac_flg; + Dex.code_off = cd_off; + } in + prev := 0; + let s_fd = parse_list ch parse_encoded_field s_fd_sz in + prev := 0; + let i_fd = parse_list ch parse_encoded_field i_fd_sz in + prev := 0; + let d_md = parse_list ch parse_encoded_method d_md_sz in + prev := 0; + let v_md = parse_list ch parse_encoded_method v_md_sz in + let c_data = + { + Dex.static_fields = s_fd; + Dex.instance_fields = i_fd; + Dex.direct_methods = d_md; + Dex.virtual_methods = v_md; + } in + let cmap = IM.add off (Dex.CLASS_DATA c_data) map in + let md_folder acc (emd: Dex.encoded_method) : Dex.data_item IM.t = + parse_code ch emd.Dex.code_off acc + in + let map' = List.fold_left md_folder cmap d_md in + List.fold_left md_folder map' v_md + ) + | _, _ -> map + +and parse_code ch offset map : Dex.data_item IM.t = + match get_off offset with + | off, Some ioff -> + ( + seek_in ch ioff; + let reg_sz = read16 ch + and ins_sz = read16 ch + and out_sz = read16 ch + and tri_sz = read16 ch + and d_info = read_off ch + and ist_sz = 2 * (read32 ch) in + let prv_ins = pos_in ch + and insns, ins_map = parse_instr ch ist_sz map in + seek_in ch (prv_ins + ist_sz); + let tries, handles = + if tri_sz <> 0 then + ( + if (ist_sz / 2) mod 2 <> 0 then ignore (read16 ch); (* padding *) + let base = Dex.of_off (DA.get insns 0) in + let tri = parse_list ch (parse_try ins_map base) tri_sz + and hdl_sz = read_uleb128 ch in + let hdl = parse_list ch (parse_handle base) hdl_sz in + convert_handler_off tri, hdl + ) else [], [] in + let c_item = + { + Dex.registers_size = reg_sz; + Dex.ins_size = ins_sz; + Dex.outs_size = out_sz; + Dex.tries_size = tri_sz; + Dex.debug_info_off = d_info; + Dex.insns_size = ist_sz; + Dex.insns = insns; + Dex.tries = tries; + Dex.c_handlers = handles; + } in + ignore (alignment ch 4); + let code_map = IM.add off (Dex.CODE_ITEM c_item) ins_map in + parse_debug_info ch d_info code_map + ) + | _, _ -> map + +and parse_try map base ch : Dex.try_item = + let sta = base + 2 * (Int64.to_int (read32u ch)) + and cnt = 2 * (read16 ch) + and off = read16 ch in + let find_valid_addr (found, addr) offset = + if found then (found, addr) else + try + let addr' = sta + cnt - offset in + ignore (IM.find (Int32.of_int addr') map); + (true, addr') + with Not_found -> (false, addr) + in + let _, edr = List.fold_left find_valid_addr (false, sta + cnt) [2;4;6;10] in + { + Dex.start_addr = Dex.to_off sta; + Dex.end_addr = Dex.to_off edr; + Dex.handler_off = Dex.to_off off; + } + +and convert_handler_off tries : Dex.try_item list = + let collect_off acc (ti: Dex.try_item) = + IS.add (Dex.get_off ti.Dex.handler_off) acc + in + let offs = List.fold_left collect_off IS.empty tries in + let hash = Hashtbl.create 9 in + let make_hash i off = Hashtbl.add hash (Dex.Off off) i in + DA.iteri make_hash (DA.of_list (IS.elements offs)); + let convert_off ti : Dex.try_item = { ti with + Dex.handler_off = Dex.to_idx (Hashtbl.find hash ti.Dex.handler_off) + } in + List.map convert_off tries + +and parse_handle base ch : Dex.encoded_catch_handler = + let parse_ty_addr ch' = + let id = read_uleb128 ch' + and ad = base + 2 * (read_uleb128 ch') in + { + Dex.ch_type_idx = Dex.to_idx id; + Dex.addr = Dex.to_off ad; + } + and sz = read_sleb128 ch in + let handlers = parse_list ch parse_ty_addr (abs sz) in + let ch_all = if sz <= 0 then base + 2 * (read_uleb128 ch) else 0 in + { + Dex.e_handlers = handlers; + Dex.catch_all_addr = Dex.to_off ch_all; + } + +and parse_instr ch sz map : Dex.link DA.t * Dex.data_item IM.t = + let off_lst = ref ([]: int list) + and itm_lst = ref ([]: Dex.data_item list) + and max_off = ref ((pos_in ch) + sz) + in + while pos_in ch <= (!max_off - 2) do + let off = pos_in ch in + let hx = read8 ch in + let op, ins_sz = Instr.hx_to_op_and_size hx in + if 0 = ins_sz then + raise (Dex.Wrong_dex ("undefined op: "^(string_of_int hx))) + else + ( + let args = parse_list ch read8 (ins_sz - 1) in + let p_ins = Instr.make_instr op args in + let itm = Dex.INSTRUCTION + ( + let rec to_abs_addr base = function + | [] -> [] + | [Instr.OPR_OFFSET i32] -> + let abs = base + ((Int32.to_int i32) * 2) + in [Instr.OPR_OFFSET (to_i32 abs)] + | hd::tl -> hd::(to_abs_addr base tl) + in + match op with + | Instr.OP_GOTO | Instr.OP_GOTO_16 | Instr.OP_GOTO_32 + | Instr.OP_IF_EQ | Instr.OP_IF_NE | Instr.OP_IF_LT + | Instr.OP_IF_GE | Instr.OP_IF_GT | Instr.OP_IF_LE + | Instr.OP_IF_EQZ | Instr.OP_IF_NEZ | Instr.OP_IF_LTZ + | Instr.OP_IF_GEZ | Instr.OP_IF_GTZ | Instr.OP_IF_LEZ -> + op, to_abs_addr off (snd p_ins) + | Instr.OP_FILL_ARRAY_DATA -> + ( + let cur_off = pos_in ch in + let n_args = to_abs_addr off (snd p_ins) in + ( + match n_args with + | _::Instr.OPR_OFFSET i32::[] -> + ( + let off' = Int32.to_int i32 in + seek_in ch off'; + let ident = read16 ch in + if ident <> 0x0300 then + raise (Dex.Wrong_dex ("fill-array-data: ident")) + else + ( + let ar_wd = read16 ch + and ar_sz = read32 ch in + let reader ch = match ar_wd with + | 1 -> Instr.OPR_CONST (to_i64 (read8 ch)) + | 2 -> Instr.OPR_CONST (to_i64 (read16 ch)) + | 4 -> Instr.OPR_CONST (read32u ch) + | 8 -> Instr.OPR_CONST (read64 ch) + | _ -> raise (Dex.NOT_YET (string_of_int ar_wd)) + in + let ar_data = parse_list ch reader ar_sz in + let item = Dex.FILL_ARRAY + { + Dex.ad_width = ar_wd; + Dex.ad_size = ar_sz; + Dex.ad_data = ar_data; + } in + off_lst := off' :: !off_lst; + itm_lst := item :: !itm_lst; + max_off := min !max_off off' + ) + ) + | _ -> raise (Dex.Wrong_dex ("fill-array-data: args")) + ); + seek_in ch cur_off; + op, n_args + ) + | Instr.OP_PACKED_SWITCH + | Instr.OP_SPARSE_SWITCH -> + ( + let cur_off = pos_in ch in + let n_args = to_abs_addr off (snd p_ins) in + ( + match n_args with + | _::Instr.OPR_OFFSET i32::[] -> + ( + let off' = Int32.to_int i32 in + seek_in ch off'; + let ident = read16 ch in + if ident <> 0x0100 && ident <> 0x0200 then + raise (Dex.Wrong_dex ("switch: ident")) + else + ( + let sw_sz = read16 ch in + let sw_k = + if 0x0100 = ident then [read32 ch] + else parse_list ch read32 sw_sz + in + let make_off ch = + let offset = read32 ch in + let addr = off + (offset * 2) in Dex.to_off addr + in + let sw_tg = parse_list ch make_off sw_sz in + let item = Dex.SWITCH + { + Dex.sw_base = Dex.to_off off; + Dex.sw_size = sw_sz; + Dex.sw_keys = sw_k; + Dex.sw_targets = sw_tg; + } in + off_lst := off' :: !off_lst; + itm_lst := item :: !itm_lst; + max_off := min !max_off off' + ) + ) + | _ -> raise (Dex.Wrong_dex ("switch: args")) + ); + seek_in ch cur_off; + op, n_args + ) + | _ -> p_ins + ) + in + off_lst := off :: !off_lst; + itm_lst := itm :: !itm_lst + ) + done; + let inserter acc off itm : Dex.data_item IM.t = + IM.add (Int32.of_int off) itm acc + and f off1 off2 = Pervasives.compare off1 off2 + in + DA.of_list (List.map Dex.to_off (L.stable_sort f !off_lst)), + List.fold_left2 inserter map !off_lst !itm_lst + +and parse_debug_info ch offset map : Dex.data_item IM.t = + match get_off offset with + | off, Some ioff -> + ( + seek_in ch ioff; + let line = read_uleb128 ch + and p_sz = read_uleb128 ch + and parse_name ch' = Dex.to_idx (read_uleb128p1 ch') in + let parm = parse_list ch parse_name p_sz + and prv = ref 1 + and dbgm = ref ([]: (Dex.state_machine_instr * Instr.operand list) list) in + while !prv <> 0x00 do + prv := read8 ch; + match !prv with + | 0x00 -> + dbgm := (Dex.DBG_END_SEQUENCE, []) :: !dbgm + | 0x01 -> + let addr_diff = to_i64 (read_uleb128 ch) in + dbgm := (Dex.DBG_ADVANCE_PC, [Instr.OPR_CONST addr_diff]) :: !dbgm + | 0x02 -> + let line_diff = to_i64 (read_sleb128 ch) in + dbgm := (Dex.DBG_ADVANCE_LINE, [Instr.OPR_CONST line_diff]) :: !dbgm + | 0x03 -> + let v = Instr.OPR_REGISTER (read_uleb128 ch) + and n = Instr.OPR_INDEX (read_uleb128p1 ch) + and t = Instr.OPR_INDEX (read_uleb128p1 ch) in + dbgm := (Dex.DBG_START_LOCAL, [v;n;t]) :: !dbgm + | 0x04 -> + let v = Instr.OPR_REGISTER (read_uleb128 ch) + and n = Instr.OPR_INDEX (read_uleb128p1 ch) + and t = Instr.OPR_INDEX (read_uleb128p1 ch) + and s = Instr.OPR_INDEX (read_uleb128p1 ch) in + dbgm := (Dex.DBG_START_LOCAL_EXTENDED, [v;n;t;s]) :: !dbgm + | 0x05 -> + let v = Instr.OPR_REGISTER (read_uleb128 ch) in + dbgm := (Dex.DBG_END_LOCAL, [v]) :: !dbgm + | 0x06 -> + let v = Instr.OPR_REGISTER (read_uleb128 ch) in + dbgm := (Dex.DBG_RESTART_LOCAL, [v]) :: !dbgm + | 0x07 -> + dbgm := (Dex.DBG_SET_PROLOGUE_END, []) :: !dbgm + | 0x08 -> + dbgm := (Dex.DBG_SET_EPILOGUE_BEGIN, []) :: !dbgm + | 0x09 -> + let n = Instr.OPR_INDEX (read_uleb128p1 ch) in + dbgm := (Dex.DBG_SET_FILE, [n]) :: !dbgm + | _ -> + let op = Instr.OPR_CONST (to_i64 !prv) in + dbgm := (Dex.DBG_SPECIAL, [op]) :: !dbgm + done; + let d_info = + { + Dex.line_start = line; + Dex.parameter_name = parm; + Dex.state_machine = List.rev !dbgm; + } in + IM.add off (Dex.DEBUG_INFO d_info) map + ) + | _, _ -> map + +and parse_static_val ch offset map : Dex.data_item IM.t = + match get_off offset with + | off, Some ioff -> + ( + seek_in ch ioff; + let size = read_uleb128 ch in + let values = parse_list ch read_encoded_value size in + IM.add off (Dex.STATIC_VALUE values) map + ) + | _, _ -> map + +and parse_map ch offset map : Dex.data_item IM.t = + match get_off offset with + | off, Some ioff -> + ( + let parse_map_item ch' : Dex.map_item = + let ty = read16 ch' in + ignore (read16 ch'); + let sz = Int64.to_int (read32u ch') + and os = read_off ch' in + { + Dex.type_of_item = Dex.to_type_code ty; + Dex.mi_size = sz; + Dex.mi_offset = os; + } + in + seek_in ch ioff; + let size = Int64.to_int (read32u ch) in + let mlst = parse_list ch parse_map_item size in + IM.add off (Dex.MAP_LIST mlst) map + ) + | _, _ -> map +