Skip to content

Commit bfd3064

Browse files
committed
Add RecognizeConstantData to StringRecognizer API
Adds a new recognize_constant_data method to the StringRecognizer API, enabling plugins to decode constant data expressions (HLIL_CONST_DATA) recovered by the outline resolver. The recognizer receives the full instruction, giving access to the data buffer and builtin type via the constant_data accessor. Also fixes a pre-existing ctypes mismatch in get_constant_data_and_builtin where BNBuiltinType (uint8_t) was incorrectly passed as c_int.
1 parent 424dc72 commit bfd3064

5 files changed

Lines changed: 95 additions & 3 deletions

File tree

binaryninjaapi.h

Lines changed: 17 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -22342,6 +22342,19 @@ namespace BinaryNinja {
2234222342
virtual std::optional<DerivedString> RecognizeImport(
2234322343
const HighLevelILInstruction& instr, Type* type, int64_t val);
2234422344

22345+
/*! Can be overridden to recognize strings for constant data expressions (HLIL_CONST_DATA).
22346+
These expressions are generated by the outline resolver when it recovers constant data
22347+
streams from scattered stores. The instruction provides access to the data buffer and
22348+
builtin type via its \c constant_data accessor.
22349+
22350+
If a string is found, return a \c DerivedString with the string information.
22351+
22352+
\param instr High level expression containing the constant data
22353+
\return Optional \c DerivedString for any string that is found
22354+
*/
22355+
virtual std::optional<DerivedString> RecognizeConstantData(
22356+
const HighLevelILInstruction& instr);
22357+
2234522358
/*! Registers the string recognizer.
2234622359

2234722360
\param recognizer The string recognizer to register.
@@ -22361,6 +22374,8 @@ namespace BinaryNinja {
2236122374
int64_t val, uint64_t offset, BNDerivedString* result);
2236222375
static bool RecognizeImportCallback(
2236322376
void* ctxt, BNHighLevelILFunction* hlil, size_t expr, BNType* type, int64_t val, BNDerivedString* result);
22377+
static bool RecognizeConstantDataCallback(
22378+
void* ctxt, BNHighLevelILFunction* hlil, size_t expr, BNDerivedString* result);
2236422379
};
2236522380

2236622381
class CoreStringRecognizer : public StringRecognizer
@@ -22376,6 +22391,8 @@ namespace BinaryNinja {
2237622391
const HighLevelILInstruction& instr, Type* type, int64_t val, uint64_t offset) override;
2237722392
std::optional<DerivedString> RecognizeImport(
2237822393
const HighLevelILInstruction& instr, Type* type, int64_t val) override;
22394+
std::optional<DerivedString> RecognizeConstantData(
22395+
const HighLevelILInstruction& instr) override;
2237922396
};
2238022397
} // namespace BinaryNinja
2238122398

binaryninjacore.h

Lines changed: 6 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -37,14 +37,14 @@
3737
// Current ABI version for linking to the core. This is incremented any time
3838
// there are changes to the API that affect linking, including new functions,
3939
// new types, or modifications to existing functions or types.
40-
#define BN_CURRENT_CORE_ABI_VERSION 165
40+
#define BN_CURRENT_CORE_ABI_VERSION 166
4141

4242
// Minimum ABI version that is supported for loading of plugins. Plugins that
4343
// are linked to an ABI version less than this will not be able to load and
4444
// will require rebuilding. The minimum version is increased when there are
4545
// incompatible changes that break binary compatibility, such as changes to
4646
// existing types or functions.
47-
#define BN_MINIMUM_CORE_ABI_VERSION 165
47+
#define BN_MINIMUM_CORE_ABI_VERSION 166
4848

4949
#ifdef __GNUC__
5050
#ifdef BINARYNINJACORE_LIBRARY
@@ -4022,6 +4022,8 @@ extern "C"
40224022
uint64_t offset, BNDerivedString* result);
40234023
bool (*recognizeImport)(
40244024
void* ctxt, BNHighLevelILFunction* hlil, size_t expr, BNType* type, int64_t val, BNDerivedString* result);
4025+
bool (*recognizeConstantData)(
4026+
void* ctxt, BNHighLevelILFunction* hlil, size_t expr, BNDerivedString* result);
40254027
} BNCustomStringRecognizer;
40264028

40274029
typedef struct BNCustomStringTypeInfo
@@ -9079,6 +9081,8 @@ extern "C"
90799081
BNHighLevelILFunction* il, size_t exprIndex, BNType* type, int64_t val, uint64_t offset, BNDerivedString* out);
90809082
BINARYNINJACOREAPI bool BNStringRecognizerRecognizeImport(BNStringRecognizer* recognizer, BNHighLevelILFunction* il,
90819083
size_t exprIndex, BNType* type, int64_t val, BNDerivedString* out);
9084+
BINARYNINJACOREAPI bool BNStringRecognizerRecognizeConstantData(BNStringRecognizer* recognizer,
9085+
BNHighLevelILFunction* il, size_t exprIndex, BNDerivedString* out);
90829086

90839087
// PossibleValueSet operations
90849088
BINARYNINJACOREAPI void BNFreePossibleValueSet(BNPossibleValueSet* object);

python/function.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1938,7 +1938,7 @@ def get_constant_data(self, state: RegisterValueType, value: int, size: int = 0)
19381938
def get_constant_data_and_builtin(
19391939
self, state: RegisterValueType, value: int, size: int = 0
19401940
) -> Tuple[databuffer.DataBuffer, BuiltinType]:
1941-
builtin = ctypes.c_int()
1941+
builtin = ctypes.c_ubyte()
19421942
db = databuffer.DataBuffer(
19431943
handle=core.BNGetConstantData(self.handle, state, value, size, ctypes.byref(builtin)))
19441944
return db, BuiltinType(builtin.value)

python/stringrecognizer.py

Lines changed: 40 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -195,6 +195,9 @@ def register(self):
195195
self._cb.recognizeExternPointer = self._cb.recognizeExternPointer.__class__(self._recognize_extern_pointer)
196196
if self.recognize_import.__func__ != StringRecognizer.recognize_import:
197197
self._cb.recognizeImport = self._cb.recognizeImport.__class__(self._recognize_import)
198+
if self.recognize_constant_data.__func__ != StringRecognizer.recognize_constant_data:
199+
self._cb.recognizeConstantData = self._cb.recognizeConstantData.__class__(
200+
self._recognize_constant_data)
198201
self.handle = core.BNRegisterStringRecognizer(self.__class__.recognizer_name, self._cb)
199202
self.__class__._registered_recognizers.append(self)
200203

@@ -263,6 +266,19 @@ def _recognize_import(self, ctxt, hlil, expr, type, val, result):
263266
log_error_for_exception("Unhandled Python exception in StringRecognizer._recognize_import")
264267
return False
265268

269+
def _recognize_constant_data(self, ctxt, hlil, expr, result):
270+
try:
271+
hlil = highlevelil.HighLevelILFunction(handle=core.BNNewHighLevelILFunctionReference(hlil))
272+
instr = hlil.get_expr(highlevelil.ExpressionIndex(expr))
273+
ref = self.recognize_constant_data(instr)
274+
if ref is None:
275+
return False
276+
result[0] = ref._to_core_struct(True)
277+
return True
278+
except Exception:
279+
log_error_for_exception("Unhandled Python exception in StringRecognizer._recognize_constant_data")
280+
return False
281+
266282
@property
267283
def name(self) -> str:
268284
if hasattr(self, 'handle'):
@@ -347,6 +363,22 @@ def recognize_import(
347363
"""
348364
return None
349365

366+
def recognize_constant_data(
367+
self, instr: 'highlevelil.HighLevelILInstruction'
368+
) -> Optional['binaryview.DerivedString']:
369+
"""
370+
Can be overridden to recognize strings for constant data expressions (HLIL_CONST_DATA).
371+
These expressions are generated by the outline resolver when it recovers constant data
372+
streams from scattered stores. The instruction provides access to the data buffer and
373+
builtin type via its ``constant_data`` accessor.
374+
375+
If a string is found, return a :py:class:`~binaryninja.binaryview.DerivedString` with the string information.
376+
377+
:param instr: High level expression containing the constant data
378+
:return: Optional :py:class:`~binaryninja.binaryview.DerivedString` for any string that is found.
379+
"""
380+
return None
381+
350382

351383
_recognizer_cache = {}
352384

@@ -402,3 +434,11 @@ def recognize_import(
402434
if not core.BNStringRecognizerRecognizeImport(self.handle, instr.function.handle, instr.expr_index, type.handle, val, string):
403435
return None
404436
return binaryview.DerivedString._from_core_struct(string, True)
437+
438+
def recognize_constant_data(
439+
self, instr: 'highlevelil.HighLevelILInstruction'
440+
) -> Optional['binaryview.DerivedString']:
441+
string = core.BNDerivedString()
442+
if not core.BNStringRecognizerRecognizeConstantData(self.handle, instr.function.handle, instr.expr_index, string):
443+
return None
444+
return binaryview.DerivedString._from_core_struct(string, True)

stringrecognizer.cpp

Lines changed: 31 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -136,6 +136,12 @@ std::optional<DerivedString> StringRecognizer::RecognizeImport(const HighLevelIL
136136
}
137137

138138

139+
std::optional<DerivedString> StringRecognizer::RecognizeConstantData(const HighLevelILInstruction&)
140+
{
141+
return std::nullopt;
142+
}
143+
144+
139145
void StringRecognizer::Register(StringRecognizer* recognizer)
140146
{
141147
BNCustomStringRecognizer callbacks;
@@ -145,6 +151,7 @@ void StringRecognizer::Register(StringRecognizer* recognizer)
145151
callbacks.recognizeConstantPointer = RecognizeConstantPointerCallback;
146152
callbacks.recognizeExternPointer = RecognizeExternPointerCallback;
147153
callbacks.recognizeImport = RecognizeImportCallback;
154+
callbacks.recognizeConstantData = RecognizeConstantDataCallback;
148155

149156
recognizer->AddRefForRegistration();
150157
recognizer->m_object = BNRegisterStringRecognizer(recognizer->m_nameForRegister.c_str(), &callbacks);
@@ -220,6 +227,20 @@ bool StringRecognizer::RecognizeImportCallback(
220227
}
221228

222229

230+
bool StringRecognizer::RecognizeConstantDataCallback(
231+
void* ctxt, BNHighLevelILFunction* hlil, size_t expr, BNDerivedString* result)
232+
{
233+
StringRecognizer* recognizer = (StringRecognizer*)ctxt;
234+
Ref<HighLevelILFunction> hlilObj = new HighLevelILFunction(BNNewHighLevelILFunctionReference(hlil));
235+
HighLevelILInstruction instr = hlilObj->GetExpr(expr);
236+
auto str = recognizer->RecognizeConstantData(instr);
237+
if (!str.has_value())
238+
return false;
239+
*result = str->ToAPIObject(true);
240+
return true;
241+
}
242+
243+
223244
Ref<StringRecognizer> StringRecognizer::GetByName(const std::string& name)
224245
{
225246
BNStringRecognizer* recognizer = BNGetStringRecognizerByName(name.c_str());
@@ -298,3 +319,13 @@ std::optional<DerivedString> CoreStringRecognizer::RecognizeImport(
298319
return std::nullopt;
299320
return DerivedString::FromAPIObject(&str, true);
300321
}
322+
323+
324+
std::optional<DerivedString> CoreStringRecognizer::RecognizeConstantData(
325+
const HighLevelILInstruction& instr)
326+
{
327+
BNDerivedString str;
328+
if (!BNStringRecognizerRecognizeConstantData(m_object, instr.function->GetObject(), instr.exprIndex, &str))
329+
return std::nullopt;
330+
return DerivedString::FromAPIObject(&str, true);
331+
}

0 commit comments

Comments
 (0)