-
Notifications
You must be signed in to change notification settings - Fork 0
Expand file tree
/
Copy pathdiffObjdump.py
More file actions
164 lines (143 loc) · 7.43 KB
/
diffObjdump.py
File metadata and controls
164 lines (143 loc) · 7.43 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
import capstone
class FunctionFeatures:
name: str
num_of_instructions: int
num_of_memory_access: int
num_of_possible_branches: int
call_instructions: dict # function_name -> number_of_calls
def __init__(self, name, dumpedobj):
self.__instructions_list = dumpedobj.get_function_instructions(name)
self.name = name
self.num_of_instructions = len(self.__instructions_list)
self.num_of_memory_access = 0
self.num_of_possible_branches = 0
self.call_instructions = {}
self.__compute_instructions(dumpedobj)
def __is_branch_inst(self, instruction_groups):
if(capstone.x86.X86_GRP_JUMP in instruction_groups
or capstone.x86.X86_GRP_CALL in instruction_groups
or capstone.x86.X86_GRP_BRANCH_RELATIVE in instruction_groups
or capstone.x86.X86_GRP_RET in instruction_groups):
return True
return False
def __get_immediate_operator_values(self, operands):
result = []
for op in operands:
if(op.type == capstone.x86.X86_OP_IMM):
result.append(op.value.imm)
return result
def __compute_memory_access(self, operands):
for op in operands:
if(op.type == capstone.x86.X86_OP_MEM):
self.num_of_memory_access += 1
def __compute_instructions(self, dumpedobj):
for ins in self.__instructions_list:
# return a boolean informing if this instruction is a branch_type or not
branch_instruction = self.__is_branch_inst(ins.groups)
if(branch_instruction):
self.num_of_possible_branches += 1
# return a list with the immediate values
immediate_values = self.__get_immediate_operator_values(ins.operands)
for imm_value in immediate_values:
func_name = dumpedobj.get_function_name_by_address(address=imm_value)
if (func_name is not None and func_name != self.name):
if(func_name in self.call_instructions):
self.call_instructions[func_name] += 1
else:
self.call_instructions[func_name] = 1
# increases the value of num_of_memory_access when the instruction has memory operators
self.__compute_memory_access(ins.operands)
# print(str(self))
# for debugging purposes
def __str__(self):
return "<%s>"%(self.name) +\
"\n\tFunc calls: " + str(self.call_instructions.keys()) +\
"\n\tMemory access: " + str(self.num_of_memory_access) +\
"\n\tPossible branches: " + str(self.num_of_possible_branches) +\
"\n\tInstructions qtt " + str(self.num_of_instructions)
class DiffObjdump:
def __init__(self, dump1, dump2):
self.__dump1 = dump1
self.__dump2 = dump2
def diff_report(self):
# Get {dump1} and {dump2} function names and store it in a set,
# after generate the stats for those functions, remove its names from the set.
# The remainig elements are those that match only in one dump.
d1_func_names = set(self.__dump1.get_function_names())
d2_func_names = set(self.__dump2.get_function_names())
# for each function_name in {dump1}, generate function stats:
# called functions and the respectice count, memory access count, branches count
for func_name in d1_func_names:
func1_features = FunctionFeatures(func_name, self.__dump1)
# check if {dump2} also have such function name:
if(func_name in d2_func_names):
# if so: compute stats and compare one by one with the {dump1} stats
func2_features = FunctionFeatures(func_name, self.__dump2)
d2_func_names.remove(func_name)
self.__report_comparison_between_features(func1_features, func2_features)
else:
# else: print a report informing that {dump2} does not have such function, and print {dump1} stats
self.__report_function_in_only_one_scope(func1_features, 1)
# for the remaining function names in the set of {dump2}, report that {dump1} does not have such function
# and print this function information
for func_name in d2_func_names:
func_features = FunctionFeatures(func_name, self.__dump2.name)
self.__report_function_in_only_one_scope(func_features, 1)
def __report_function_in_only_one_scope(self, func_feature, function_scope_name):
print("Function <{func_name}> report:".format(func_name=func_feature.name))
print("\tIt is only present in {scope}".format(scope=function_scope_name))
print("\t{property} instructions.".format(property=func_feature.num_of_instructions))
print("\t{property} memory access.".format(property=func_feature.num_of_memory_access))
print("\t{property} possible branches.".format(property=func_feature.num_of_possible_branches))
print("\tHaving the following function calls:")
for func_call in func_feature.call_instructions.keys():
print("\t\t", func_call)
# name: str
# call_instructions: dict # function_name -> number_of_calls
def __report_comparison_between_features(self, func_feature1, func_feature2):
report_information = []
if(func_feature1.num_of_instructions != func_feature2.num_of_instructions):
report_information.append(
"\tIn {obj1} it has {property1} instructions, whereas in {obj2} it has {property2}.".
format(obj1=self.__dump1.name, property1=func_feature1.num_of_instructions,
obj2=self.__dump2.name, property2=func_feature2.num_of_instructions)
)
if(func_feature1.num_of_memory_access != func_feature2.num_of_memory_access):
report_information.append(
"\tIn {obj1} it has {property1} memory access, whereas in {obj2} it has {property2}.".
format(obj1=self.__dump1.name, property1=func_feature1.num_of_memory_access,
obj2=self.__dump2.name, property2=func_feature2.num_of_memory_access)
)
if(func_feature1.num_of_possible_branches != func_feature2.num_of_possible_branches):
report_information.append(
"\tIn {obj1} it has {property1} possible branches, whereas in {obj2} it has {property2}.".
format(obj1=self.__dump1.name, property1=func_feature1.num_of_possible_branches,
obj2=self.__dump2.name, property2=func_feature2.num_of_possible_branches)
)
func2_call_names = set(func_feature2.call_instructions.keys())
for (call_name, call_qtt) in func_feature1.call_instructions.items():
if(call_name in func_feature2.call_instructions):
call_qtt2 = func_feature2.call_instructions[call_name]
if(call_qtt != call_qtt2):
report_information.append(
"\tIn {obj1} it {call_name} has {property1} call(s), whereas in {obj2} it has {property2}.".
format(obj1=self.__dump1.name, property1=func_feature1.num_of_possible_branches,
obj2=self.__dump2.name, property2=func_feature2.num_of_possible_branches,
call_name=call_name)
)
func2_call_names.remove(call_name)
else:
report_information.append(
"\tFunction {call_name} is only called in {obj1}, but not in {obj2}.".
format(obj1=self.__dump1.name, call_name=call_name, obj2=self.__dump2.name)
)
func_feature1.remove(call_name)
for call_name in func2_call_names:
report_information.append(
"\tFunction {call_name} is only called in {obj1}, but not in {obj2}.".
format(obj1=self.__dump2.name, call_name=call_name, obj2=self.__dump1.name)
)
if (len(report_information)>0):
print("Function <{func_name}> report:".format(func_name=func_feature1.name))
for ft in report_information:
print(ft)