Skip to content

Commit b8b213a

Browse files
xusheng6claude
andauthored
Fix big-endian register parsing for GDB RSP adapter (#1003)
* Fix big-endian register parsing for PowerPC and other BE targets When connecting to big-endian targets like PowerPC via QEMU, register values were being incorrectly parsed using little-endian byte order. This caused wrong PC values and prevented proper memory reads. Changes: - Parse <endian> element from target description XML when available - Add IsBigEndianArchitecture() fallback for known BE architectures (PowerPC, SPARC, M68K, S/390) - Add m_isBigEndian member variable to track target endianness - Add parseBigEndianHexToUint512() for reading BE register values - Add uint512ToBigEndianHex() for writing BE register values - Update ReadAllRegisters() and WriteRegister() to use correct endianness based on target Fixes #1000 (partial - the crash fix was in PR #1001, this addresses the endianness issue that remained after the crash was fixed) Co-Authored-By: Claude Opus 4.5 <noreply@anthropic.com> * Fix parseBigEndianHexToUint512 buffer placement and add forward declarations Right-justify bytes in the 64-byte buffer before calling intx::be::load, which expects MSB at buffer[0] for a full 512-bit value. Without this, a 4-byte register like PC=0x4082e4c0 was placed at buffer[0..3] and interpreted as the top 32 bits of a 512-bit number. Also add forward declarations for IsBigEndianArchitecture to fix compilation errors (function defined after first use in LoadRegisterInfo). Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com> --------- Co-authored-by: Claude Opus 4.5 <noreply@anthropic.com>
1 parent 9c3d024 commit b8b213a

6 files changed

Lines changed: 232 additions & 9 deletions

File tree

core/adapters/corelliumadapter.cpp

Lines changed: 74 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -49,6 +49,8 @@ using namespace BinaryNinja;
4949
using namespace std;
5050
using namespace BinaryNinjaDebugger;
5151

52+
static bool IsBigEndianArchitecture(const std::string& arch);
53+
5254
CorelliumAdapter::CorelliumAdapter(BinaryView* data, bool redirectGDBServer): DebugAdapter(data)
5355
{
5456
m_isTargetRunning = false;
@@ -93,17 +95,19 @@ bool CorelliumAdapter::LoadRegisterInfo()
9395

9496
std::string architecture{};
9597
std::string os_abi{};
98+
std::string endian{};
9699
size_t lastRegIndex = -1;
97100
for (auto node = doc.first_child().child("architecture"); node; node = node.next_sibling())
98101
{
99102
using namespace std::literals::string_literals;
100103

101104
if ( node.name() == "architecture"s )
102105
architecture = node.child_value();
103-
if ( node.name() == "osabi"s )
106+
else if ( node.name() == "osabi"s )
104107
os_abi = node.child_value();
105-
106-
if ( node.name() == "feature"s )
108+
else if ( node.name() == "endian"s )
109+
endian = node.child_value();
110+
else if ( node.name() == "feature"s )
107111
{
108112
for (auto reg_child = node.child("reg"); reg_child; reg_child = reg_child.next_sibling())
109113
{
@@ -134,6 +138,9 @@ bool CorelliumAdapter::LoadRegisterInfo()
134138
if (architecture.empty())
135139
throw std::runtime_error("failed to find architecture");
136140

141+
// Store the original architecture for endianness detection before stripping the prefix
142+
std::string fullArchitecture = architecture;
143+
137144
if (architecture.find(':') != std::string::npos)
138145
{
139146
architecture.erase(0, architecture.find(':') + 1);
@@ -143,6 +150,12 @@ bool CorelliumAdapter::LoadRegisterInfo()
143150
}
144151
m_remoteArch = architecture;
145152

153+
// Determine endianness: prefer explicit <endian> element, fall back to architecture-based detection
154+
if (!endian.empty())
155+
m_isBigEndian = (endian == "big");
156+
else
157+
m_isBigEndian = IsBigEndianArchitecture(fullArchitecture);
158+
146159
std::unordered_map<std::uint32_t, std::string> id_name{};
147160
std::unordered_map<std::uint32_t, std::uint32_t> id_width{};
148161

@@ -438,6 +451,22 @@ bool CorelliumAdapter::BreakpointExists(uint64_t address) const
438451
DebugBreakpoint(address)) != this->m_debugBreakpoints.end();
439452
}
440453

454+
static bool IsBigEndianArchitecture(const std::string& arch) {
455+
// PowerPC architectures (powerpc, ppc, common from "powerpc:common")
456+
if (arch.find("powerpc") != std::string::npos || arch.find("ppc") != std::string::npos || arch == "common")
457+
return true;
458+
// SPARC
459+
if (arch.find("sparc") != std::string::npos)
460+
return true;
461+
// Motorola 68k
462+
if (arch.find("m68k") != std::string::npos || arch.find("68k") != std::string::npos)
463+
return true;
464+
// IBM S/390
465+
if (arch.find("s390") != std::string::npos)
466+
return true;
467+
return false;
468+
}
469+
441470
static intx::uint512 parseLittleEndianHexToUint512(const std::string& hex) {
442471
if (hex.size() % 2 != 0)
443472
return {};
@@ -456,6 +485,27 @@ static intx::uint512 parseLittleEndianHexToUint512(const std::string& hex) {
456485
return intx::le::load<intx::uint512>(buffer);
457486
}
458487

488+
static intx::uint512 parseBigEndianHexToUint512(const std::string& hex) {
489+
if (hex.size() % 2 != 0)
490+
return {};
491+
492+
uint8_t buffer[64] = {}; // Zero-initialized
493+
494+
size_t byteCount = hex.size() / 2;
495+
size_t limit = std::min(byteCount, size_t(64));
496+
497+
// For big-endian: the hex string has MSB first. intx::be::load expects MSB at buffer[0]
498+
// for a full 512-bit value, so we must right-justify the bytes in the buffer.
499+
size_t offset = 64 - limit;
500+
for (size_t i = 0; i < limit; ++i)
501+
{
502+
std::string byteStr = hex.substr(i * 2, 2);
503+
buffer[offset + i] = static_cast<uint8_t>(strtoul(byteStr.c_str(), nullptr, 16));
504+
}
505+
506+
return intx::be::load<intx::uint512>(buffer);
507+
}
508+
459509
std::unordered_map<std::string, DebugRegister> CorelliumAdapter::ReadAllRegisters()
460510
{
461511
if (m_regCache.has_value())
@@ -485,7 +535,8 @@ std::unordered_map<std::string, DebugRegister> CorelliumAdapter::ReadAllRegister
485535
const auto number_of_chars = 2 * ( register_info.m_bitSize / 8 );
486536
const auto value_string = register_info_reply_string.substr(0, number_of_chars);
487537
if (!value_string.empty()) {
488-
intx::uint512 value = parseLittleEndianHexToUint512(value_string);
538+
intx::uint512 value = m_isBigEndian ? parseBigEndianHexToUint512(value_string)
539+
: parseLittleEndianHexToUint512(value_string);
489540
all_regs[register_name] = DebugRegister(register_name, value, register_info.m_bitSize, register_info.m_regNum);
490541
}
491542
register_info_reply_string.erase(0, number_of_chars);
@@ -521,6 +572,23 @@ static std::string uint512ToLittleEndianHex(const intx::uint512& value, size_t w
521572
return result;
522573
}
523574

575+
static std::string uint512ToBigEndianHex(const intx::uint512& value, size_t width) {
576+
// Truncate to 64 bytes (512 bits max)
577+
if (width > 64)
578+
width = 64;
579+
580+
uint8_t buffer[64] = {};
581+
intx::be::store(buffer, value); // Store as big-endian
582+
583+
// For big-endian, we need to output starting from the correct offset to get 'width' bytes
584+
size_t offset = 64 - width;
585+
std::string result;
586+
for (size_t i = 0; i < width; ++i)
587+
result += fmt::format("{:02X}", buffer[offset + i]);
588+
589+
return result;
590+
}
591+
524592
bool CorelliumAdapter::WriteRegister(const std::string& reg, intx::uint512 value)
525593
{
526594
if (m_isTargetRunning)
@@ -529,7 +597,8 @@ bool CorelliumAdapter::WriteRegister(const std::string& reg, intx::uint512 value
529597
if (!this->m_registerInfo.contains(reg))
530598
return false;
531599

532-
const auto newRegString = uint512ToLittleEndianHex(value, this->m_registerInfo[reg].m_bitSize / 8);
600+
const auto newRegString = m_isBigEndian ? uint512ToBigEndianHex(value, this->m_registerInfo[reg].m_bitSize / 8)
601+
: uint512ToLittleEndianHex(value, this->m_registerInfo[reg].m_bitSize / 8);
533602
const auto reply = this->m_rspConnector->TransmitAndReceive(RspData("P{:02X}={}",
534603
this->m_registerInfo[reg].m_regNum, newRegString));
535604
if (reply.m_data[0])

core/adapters/corelliumadapter.h

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -76,6 +76,10 @@ namespace BinaryNinjaDebugger
7676
// support the case -- so we do not really lose a lot anyways.
7777
std::string m_remoteArch;
7878

79+
// Whether the target uses big-endian byte order. Determined from target XML <endian> element
80+
// or inferred from architecture name.
81+
bool m_isBigEndian = false;
82+
7983
void InvalidateCache();
8084

8185
virtual DebugStopReason SignalToStopReason(std::unordered_map<std::string, std::uint64_t>& map);

core/adapters/esrevenadapter.cpp

Lines changed: 73 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -51,6 +51,8 @@ using namespace BinaryNinja;
5151
using namespace std;
5252
using namespace BinaryNinjaDebugger;
5353

54+
static bool IsBigEndianArchitecture(const std::string& arch);
55+
5456
EsrevenAdapter::EsrevenAdapter(BinaryView* data, bool redirectGDBServer): DebugAdapter(data)
5557
{
5658
m_isTargetRunning = false;
@@ -95,6 +97,7 @@ bool EsrevenAdapter::LoadRegisterInfo()
9597

9698
std::string architecture{};
9799
std::string os_abi{};
100+
std::string endian{};
98101
size_t lastRegIndex = -1;
99102

100103
auto processFeatures = [&](const pugi::xml_node& node) {
@@ -134,6 +137,8 @@ bool EsrevenAdapter::LoadRegisterInfo()
134137
architecture = node.child_value();
135138
else if ( node.name() == "osabi"s )
136139
os_abi = node.child_value();
140+
else if ( node.name() == "endian"s )
141+
endian = node.child_value();
137142
else if ( node.name() == "feature"s )
138143
processFeatures(node);
139144
else if (node.name() == "xi:include"s )
@@ -157,6 +162,9 @@ bool EsrevenAdapter::LoadRegisterInfo()
157162
if (architecture.empty())
158163
throw std::runtime_error("failed to find architecture");
159164

165+
// Store the original architecture for endianness detection before stripping the prefix
166+
std::string fullArchitecture = architecture;
167+
160168
if (architecture.find(':') != std::string::npos)
161169
{
162170
architecture.erase(0, architecture.find(':') + 1);
@@ -166,6 +174,12 @@ bool EsrevenAdapter::LoadRegisterInfo()
166174
}
167175
m_remoteArch = architecture;
168176

177+
// Determine endianness: prefer explicit <endian> element, fall back to architecture-based detection
178+
if (!endian.empty())
179+
m_isBigEndian = (endian == "big");
180+
else
181+
m_isBigEndian = IsBigEndianArchitecture(fullArchitecture);
182+
169183
std::unordered_map<std::uint32_t, std::string> id_name{};
170184
std::unordered_map<std::uint32_t, std::uint32_t> id_width{};
171185

@@ -473,6 +487,22 @@ bool EsrevenAdapter::BreakpointExists(uint64_t address) const
473487
DebugBreakpoint(address)) != this->m_debugBreakpoints.end();
474488
}
475489

490+
static bool IsBigEndianArchitecture(const std::string& arch) {
491+
// PowerPC architectures (powerpc, ppc, common from "powerpc:common")
492+
if (arch.find("powerpc") != std::string::npos || arch.find("ppc") != std::string::npos || arch == "common")
493+
return true;
494+
// SPARC
495+
if (arch.find("sparc") != std::string::npos)
496+
return true;
497+
// Motorola 68k
498+
if (arch.find("m68k") != std::string::npos || arch.find("68k") != std::string::npos)
499+
return true;
500+
// IBM S/390
501+
if (arch.find("s390") != std::string::npos)
502+
return true;
503+
return false;
504+
}
505+
476506
static intx::uint512 parseLittleEndianHexToUint512(const std::string& hex) {
477507
if (hex.size() % 2 != 0)
478508
return {};
@@ -491,6 +521,27 @@ static intx::uint512 parseLittleEndianHexToUint512(const std::string& hex) {
491521
return intx::le::load<intx::uint512>(buffer);
492522
}
493523

524+
static intx::uint512 parseBigEndianHexToUint512(const std::string& hex) {
525+
if (hex.size() % 2 != 0)
526+
return {};
527+
528+
uint8_t buffer[64] = {}; // Zero-initialized
529+
530+
size_t byteCount = hex.size() / 2;
531+
size_t limit = std::min(byteCount, size_t(64));
532+
533+
// For big-endian: the hex string has MSB first. intx::be::load expects MSB at buffer[0]
534+
// for a full 512-bit value, so we must right-justify the bytes in the buffer.
535+
size_t offset = 64 - limit;
536+
for (size_t i = 0; i < limit; ++i)
537+
{
538+
std::string byteStr = hex.substr(i * 2, 2);
539+
buffer[offset + i] = static_cast<uint8_t>(strtoul(byteStr.c_str(), nullptr, 16));
540+
}
541+
542+
return intx::be::load<intx::uint512>(buffer);
543+
}
544+
494545
std::unordered_map<std::string, DebugRegister> EsrevenAdapter::ReadAllRegisters()
495546
{
496547
if (m_isTargetRunning || !m_rspConnector)
@@ -523,7 +574,8 @@ std::unordered_map<std::string, DebugRegister> EsrevenAdapter::ReadAllRegisters(
523574
const auto number_of_chars = 2 * ( register_info.m_bitSize / 8 );
524575
const auto value_string = register_info_reply_string.substr(0, number_of_chars);
525576
if (!value_string.empty()) {
526-
intx::uint512 value = parseLittleEndianHexToUint512(value_string);
577+
intx::uint512 value = m_isBigEndian ? parseBigEndianHexToUint512(value_string)
578+
: parseLittleEndianHexToUint512(value_string);
527579
all_regs[register_name] = DebugRegister(register_name, value, register_info.m_bitSize, register_info.m_regNum);
528580
}
529581
register_info_reply_string.erase(0, number_of_chars);
@@ -559,6 +611,24 @@ static std::string uint512ToLittleEndianHex(const intx::uint512& value, size_t w
559611
return result;
560612
}
561613

614+
static std::string uint512ToBigEndianHex(const intx::uint512& value, size_t width) {
615+
// Truncate to 64 bytes (512 bits max)
616+
if (width > 64)
617+
width = 64;
618+
619+
uint8_t buffer[64] = {};
620+
intx::be::store(buffer, value); // Store as big-endian
621+
622+
std::string result;
623+
// For big-endian, we need to output from the position where the value starts
624+
// The value is stored right-aligned in the 64-byte buffer
625+
size_t offset = 64 - width;
626+
for (size_t i = 0; i < width; ++i)
627+
result += fmt::format("{:02X}", buffer[offset + i]);
628+
629+
return result;
630+
}
631+
562632
bool EsrevenAdapter::WriteRegister(const std::string& reg, intx::uint512 value)
563633
{
564634
if (m_isTargetRunning || !m_rspConnector)
@@ -567,7 +637,8 @@ bool EsrevenAdapter::WriteRegister(const std::string& reg, intx::uint512 value)
567637
if (!this->m_registerInfo.contains(reg))
568638
return false;
569639

570-
const auto newRegString = uint512ToLittleEndianHex(value, this->m_registerInfo[reg].m_bitSize / 8);
640+
const auto newRegString = m_isBigEndian ? uint512ToBigEndianHex(value, this->m_registerInfo[reg].m_bitSize / 8)
641+
: uint512ToLittleEndianHex(value, this->m_registerInfo[reg].m_bitSize / 8);
571642
const auto reply = this->m_rspConnector->TransmitAndReceive(RspData("P{:02X}={}",
572643
this->m_registerInfo[reg].m_regNum, newRegString));
573644

core/adapters/esrevenadapter.h

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -79,6 +79,10 @@ namespace BinaryNinjaDebugger
7979
// support the case -- so we do not really lose a lot anyways.
8080
std::string m_remoteArch;
8181

82+
// Whether the target uses big-endian byte order. Determined from target XML <endian> element
83+
// or inferred from architecture name.
84+
bool m_isBigEndian = false;
85+
8286
bool m_canReverseContinue = false;
8387
bool m_canReverseStep = false;
8488

0 commit comments

Comments
 (0)