Skip to content

Commit

Permalink
Merge pull request #6 from CySHell/Non-Rtti_constructors
Browse files Browse the repository at this point in the history
Non rtti constructors
  • Loading branch information
CySHell authored Dec 18, 2022
2 parents 5ea1876 + 41976a9 commit 6cff61a
Show file tree
Hide file tree
Showing 26 changed files with 233 additions and 116 deletions.
181 changes: 127 additions & 54 deletions ClassDataStructureDetection/Constructors/DetectConstructor.py
Original file line number Diff line number Diff line change
@@ -1,33 +1,92 @@
import binaryninja as bn
from typing import List
from typing import *
from ... import Config
from ...RttiInfomation.VirtualTableInference import VirtualFunctionTable
import pysnooper


def GetVirtualTableAssignmentInstruction(func: bn.function.Function):
current_candidate_instr = None
for instr in func.hlil.instructions:
# <HighLevelILOperation.HLIL_ASSIGN: 17>
if instr.operation == 17:
# Check if Arg1 is being assigned to.
func_params = func.hlil.source_function.parameter_vars.vars
if func_params and instr.vars:
if func_params[0] == instr.vars[0]:
# <HighLevelILOperation.HLIL_CONST_PTR: 27>, <HighLevelILOperation.HLIL_CONST: 26>
if instr.operands[1].operation == 27 or instr.operands[1].operation == 26:
# <HighLevelILOperation.HLIL_DEREF: 23>
# <HighLevelILOperation.HLIL_DEREF_FIELD: 24>
# De-referencing the pointer, meaning if this
# pointer is to a struct, this is de-referencing offset 0x0.
if instr.operands[0].operation == 23 or instr.operands[0].operation == 24:
if type(instr.operands[0].operands[0]) == bn.highlevelil.HighLevelILVar:
current_candidate_instr = instr
from ...RttiInformation.VirtualTableInference import VirtualFunctionTable
from ...Common import Utils

global_constructor_destructor_list: List = list()


def GetAllAssignmentInstructions(func: bn.function.Function) -> Dict:
"""
Search the given function for all data references assigned to an offset into the pointer given
in Arg1 of the function (the "This" pointer).
:return {offset_into_Arg1: [DataVar address]}
"""
candidate_instructions: Dict = dict()

try:
for instr in func.hlil.instructions:
# <HighLevelILOperation.HLIL_ASSIGN: 17>
if instr.operation.name == "HLIL_ASSIGN":
# Check if Arg1 is being assigned to.
func_params = func.hlil.source_function.parameter_vars.vars
if func_params and instr.vars:
if func_params[0] == instr.vars[0]:
if instr.operands[1].operation.name == "HLIL_CONST_PTR" or \
instr.operands[1].operation.name == "HLIL_CONST":
# A pointer or a constant is being assigned into an offset within Arg1
# Example: <HLIL_ASSIGN: *(arg1 + 0x1a000) = &data_140645958>
# <HLIL_DEREF: *(arg1 + 0x1a000)> <HLIL_CONST_PTR: &data_140645958>
if instr.operands[0].operation.name == "HLIL_DEREF" or \
instr.operands[0].operation.name == "HLIL_DEREF_FIELD":
if type(instr.operands[0].operands[0]) == bn.highlevelil.HighLevelILVar:
if instr.operands[0].operation.name == "HLIL_ARRAY_INDEX":
# Arg1 is treated as an array and the assignment is being done into
# an offset within the array.
# Example: <HLIL_ARRAY_INDEX: arg1[0x3400]>
# <HLIL_VAR: arg1>, <HLIL_CONST: 0x3400>
offset_into_class = instr.operands[0].operands[1].operands[0]
else:
# Directly De-referencing the pointer, meaning if this pointer is to a
# struct, this is de-referencing offset 0x0.
offset_into_class = 0

if candidate_instructions.get(offset_into_class):
candidate_instructions[offset_into_class].append(
instr.operands[1].value.value
)
else:
candidate_instructions.update({0: [instr.operands[1].value.value]})

elif type(instr.operands[0].operands[0]) == bn.highlevelil.HighLevelILAdd:
# Referencing an offset within the pointer.
# example: <HLIL_ADD: arg1 + 0x1a000>
# [<HLIL_VAR: arg1>, <HLIL_CONST: 0x1a000>]
if instr.operands[0].operands[0].operands[1].operation.name == "HLIL_CONST":
offset_into_class = instr.operands[0].operands[0].operands[1].value.value
if candidate_instructions.get(offset_into_class):
candidate_instructions[offset_into_class].append(
instr.operands[1].value.value
)
else:
candidate_instructions.update(
{
offset_into_class: [
instr.operands[1].value.value
]
}
)
else:
Utils.LogToFile(f"GetAllAssignmentInstructions: UNKNOWN assignment type at HLIL "
f"Address {hex(instr.address)} ! please report this. "
f"\nInstruction: {instr}")
except Exception as e:
print(f"GetAllAssignmentInstructions {hex(func.start)}, Exception: {e}")
# We are only interested in the last assignment of a vfTable in a function, since the ones before it are
# base classes.
return candidate_instructions


def GetThisClassVirtualTableAssignmentInstruction(func: bn.function.Function) -> Optional[int]:
candidate_instructions = GetAllAssignmentInstructions(func)

# We are only interested in the last assignment of a vfTable in a function, since the ones before it are
# base classes.
# [
return current_candidate_instr
if candidate_instructions.get(0):
return candidate_instructions[0][-1]
else:
return None


def GetPotentialConstructors(bv: bn.binaryview, vfTable_addr: int) -> \
Expand All @@ -45,14 +104,14 @@ def GetPotentialConstructors(bv: bn.binaryview, vfTable_addr: int) -> \
return potential_constructors


def DetectConstructorForVTable(bv: bn.binaryview, vfTable_addr: int, vfTable_contained_functions: List[int]) -> bool:
found_constructors = 0
potential_constructors: List[bn.function.Function] = GetPotentialConstructors(bv, vfTable_addr)
for potential_constructor in potential_constructors:
if VerifyConstructor(bv, potential_constructor, found_constructors):
print(f'ClassyPP: Found constructor - {potential_constructor.name}')
found_constructors += 1
return found_constructors != 0
def DetectConstructorForVTable(bv: bn.binaryview, vfTable_addr: int) -> list[bn.function.Function]:
potential_constructors: List[bn.function.Function] = list()
for potential_constructor in GetPotentialConstructors(bv, vfTable_addr):
if VerifyConstructor(bv, potential_constructor):
potential_constructors.append(potential_constructor)
print(f'Found constructor - {potential_constructor.name}')
global_constructor_destructor_list.append(potential_constructor.start)
return potential_constructors


def IsDestructor(bv: bn.binaryview, potential_destructor: bn.function.Function) -> bool:
Expand All @@ -67,36 +126,50 @@ def IsDestructor(bv: bn.binaryview, potential_destructor: bn.function.Function)
return False


def VerifyConstructor(bv: bn.binaryview, potential_constructor: bn.function.Function, found_constructors: int) -> bool:
def DefineConstructor(bv: bn.binaryview, potential_constructors: list[bn.function.Function],
vtable_addr: int, class_name=None) -> bool:
# Since several constructors with the same name (but different signature) may exist, we
# will attach a postfix index to each of the names.
constructor_index = 0
if not class_name:
class_name: str = bv.get_data_var_at(vtable_addr).name
if class_name:
if class_name.endswith("_vfTable"):
# Remove the _vfTable tag from the name
class_name = class_name[:-8]
for constructor in potential_constructors:
func_type = "Constructor"
if IsDestructor(bv, constructor):
func_type = "Destructor"
if Config.CONSTRUCTOR_FUNCTION_HANDLING == 0:
AddComment(bv, constructor.start, vtable_addr,
class_name, func_type)
elif Config.CONSTRUCTOR_FUNCTION_HANDLING == 1:
ChangeFuncName(bv, constructor.start, constructor_index,
class_name, func_type)
else:
# invalid choice
return False
constructor_index += 1
return True
else:
print(f"DefineConstructor: Cannot get class name for vtable at {hex(vtable_addr)}")


def VerifyConstructor(bv: bn.binaryview, potential_constructor: bn.function.Function) -> bool:
# The heuristics used here will locate both the constructors and destructors.
# It is not easy to automatically distinguish between the two.
func_type = "Constructor"

try:
if instr := GetVirtualTableAssignmentInstruction(potential_constructor):
pointer: int = instr.operands[1].operands[0]
if pointer := GetThisClassVirtualTableAssignmentInstruction(potential_constructor):
data_refs = list(bv.get_data_refs_from(pointer))
if data_refs:
if len(data_refs) != 1:
# print(f'Error, too many data refs for {pointer}')
pass
return False
else:
# Check if this is a function pointer
if bv.get_function_at(data_refs[0]):
class_name: str = bv.get_data_var_at(pointer).name
if class_name.endswith("_vfTable"):
# Remove the _vfTable tag from the name
class_name = class_name[:-8]
if IsDestructor(bv, potential_constructor):
func_type = "Destructor"
if Config.CONSTRUCTOR_FUNCTION_HANDLING == 0:
AddComment(bv, potential_constructor.start, pointer,
class_name, func_type)
elif Config.CONSTRUCTOR_FUNCTION_HANDLING == 1:
ChangeFuncName(bv, potential_constructor.start, found_constructors,
class_name, func_type)
else:
# invalid choice
return False
if bv.get_function_at(data_refs[0]) is not None:
return True
else:
# print(f'Error in instruction {instr}')
Expand Down
33 changes: 25 additions & 8 deletions ClassObjectRepresentation/CppClass.py
Original file line number Diff line number Diff line change
@@ -1,13 +1,30 @@
from dataclasses import dataclass, field

# {ClassName: ClassyClass}
global_classes: dict = dict()


def GenerateClassNameFromVtableAddr(vTable_addr: int) -> str:
return f"class_{hex(vTable_addr)}_vfTable"


@dataclass
class ClassyClass:

def __init__(self):
self.name: str = ''
self.size: int = 0
def __init__(self, name: str, vfTable_addr: int, constructors: list[int] = None,
inherited_classes: list = None, vfTable_functions: list[int] = None, size: int = 0,
namespace="", fields=None):

self.name: str = name
self.vfTable_addr: int = vfTable_addr
self.constructors: list[int] = constructors if constructors else list()
self.namespace: str = namespace
self.size: int = size
# fields - {offset: (<binaryninja.types>, <Field name>, Optional(Address in executable pointed to))}
self.fields: dict = dict()
self.fields: dict = fields if fields else dict()
# list[ClassyClass]
self.inherited_classes: list = inherited_classes if inherited_classes else list()
# vfTable - A list of all function addresses in the table
self.vfTable: list[int] = list()
self.constructors: list[int] = list()
self.inherited_classes: list[ClassyClass] = list()
self.namespace: str = ''
self.vfTable_functions: list[int] = vfTable_functions if vfTable_functions else list()

global_classes.update({self.name: self})
File renamed without changes.
File renamed without changes.
File renamed without changes.
Original file line number Diff line number Diff line change
Expand Up @@ -6,7 +6,7 @@
from ..Common import Utils
from .. import Config
from .ClassMemoryLayout import ClassStructCreation
import pysnooper


###############################################################################################
# GLOBAL STRUCTS
Expand Down Expand Up @@ -106,13 +106,13 @@ def DebugPrintCol(self, Col: CompleteObjectLocator, current_address):
def DeduceClassHierarchies(self):
ClassHierarchyDeduction.DefineClassHierarchy(self.bv)

def DefineRTTI(self) -> bool:
def DetectAndDefineAllInformation(self) -> bool:
for sect in self.bv.sections.values():
if IsSectionCompatibleToRTTI(sect):
current_address = sect.start
while current_address < sect.end - self.rtti_complete_object_locator_size:
if Col := self.GetCompleteObjectLocator(current_address):
Utils.LogToFile(f'DefineRTTI: Defined {Col.__repr__()} \n')
Utils.LogToFile(f'Defined {Col.__repr__()} \n')
print(f"Defined Class: {Utils.DemangleName(Col.mangled_class_name)}")
if Config.ENABLE_DEBUG_LOGGING:
self.DebugPrintCol(Col, current_address)
Expand Down
File renamed without changes.
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,7 @@

import networkx as nx
from networkx import DiGraph
from ...RttiInfomation import ClassContext
from ...RttiInformation import ClassContext
from ...Common import Utils
from ... import Config
import binaryninja as bn
Expand Down Expand Up @@ -76,7 +76,8 @@ def RenameFunction(bv: bn.binaryview, vtable_function: int, lca: int, function_i
else:
func.set_comment_at(func.start, f'{class_name}_method{function_index}')
return True
except:
except Exception as e:
print(f"Unable to rename function {hex(vtable_function)}, got Exception: \n{e}")
return False


Expand Down Expand Up @@ -136,17 +137,17 @@ def CreateBcdHierarchyRecursively(base_class_array: List[int],
def WriteGraphToFile(graph: DiGraph, gexf=True, graphml=False):
if gexf:
# To read the following stored graph: read_gexf(Config.GRAPH_FILE_FULL_PATH)
nx.write_gexf(graph, Config.GRAPH_FILE_FULL_PATH + 'RttiInfomation.gexf')
nx.write_gexf(graph, Config.GRAPH_FILE_FULL_PATH + 'RttiInformation.gexf')

if graphml:
# Write the graph in graphml form in order to be able to upload it to other databases (such as neo4j)
# In neo4j:
# CALL apoc.import.graphml('RttiInfomation.graphml', {storeNodeIds: true})
# CALL apoc.import.graphml('RttiInformation.graphml', {storeNodeIds: true})
# MATCH (n)
# CALL apoc.create.addLabels([id(n)], [n.id])
# yield node
# return node
nx.write_graphml(graph, Config.GRAPH_FILE_FULL_PATH + 'RttiInfomation.graphml')
nx.write_graphml(graph, Config.GRAPH_FILE_FULL_PATH + 'RttiInformation.graphml')


def CreateHierarchyGraph() -> nx.DiGraph:
Expand Down
File renamed without changes.
File renamed without changes.
File renamed without changes.
Loading

0 comments on commit 6cff61a

Please sign in to comment.