Skip to content

Commit c22aebe

Browse files
committed
Refactor JIT FP walk to per-iteration unwind and fix JIT region detection
Restructure the eBPF JIT frame pointer walk: instead of a separate loop before the main CFP walk, advance the native FP chain by one frame per main loop iteration. This keeps the native unwind state in lockstep with the Ruby VM stack, supporting both YJIT (1 JIT frame, exits after first iteration) and ZJIT (1 JIT frame per iseq, 1:1 with CFPs). Fix JIT region detection in SynchronizeMappings to scan all mappings (including non-executable ---p reservations) for the prctl-labeled JIT region, then only register LPM prefixes for executable pages. This ensures jit_start/jit_end cover the full reserved address range even when the r-xp committed pages don't carry the label. Also fix IsAnonymous() to recognize [anon:...] labeled mappings, and remove debug log spam from parseMappings.
1 parent 59c794c commit c22aebe

4 files changed

Lines changed: 88 additions & 75 deletions

File tree

interpreter/ruby/ruby.go

Lines changed: 48 additions & 22 deletions
Original file line numberDiff line numberDiff line change
@@ -1167,38 +1167,56 @@ func hasJitFramePointers(pr process.Process) bool {
11671167

11681168
func (r *rubyInstance) SynchronizeMappings(ebpf interpreter.EbpfHandler,
11691169
_ reporter.ExecutableReporter, pr process.Process, mappings []process.Mapping) error {
1170-
var jitMapping *process.Mapping
1171-
11721170
pid := pr.PID()
1173-
jitFound := false
11741171
r.mappingGeneration++
11751172

11761173
log.Debugf("Synchronizing ruby mappings")
11771174

1175+
// First pass: detect JIT bounds from ALL mappings (including non-executable).
1176+
// Ruby reserves a large address range for JIT code via mmap and labels it with
1177+
// prctl(PR_SET_VMA), giving it a path like "[anon:Ruby:rb_jit_reserve_addr_space]".
1178+
// The reserved region is typically ---p (non-executable). Ruby then mprotects individual
1179+
// pages to r-xp as JIT code is compiled. We need the full reserved region bounds for
1180+
// jit_start/jit_end so the eBPF program can recognize any PC within the JIT range,
1181+
// even as new pages are made executable.
1182+
var jitStart, jitEnd uint64
1183+
jitFound := false
1184+
for idx := range mappings {
1185+
m := &mappings[idx]
1186+
if strings.Contains(m.Path.String(), "jit_reserve_addr_space") {
1187+
if !jitFound || m.Vaddr < jitStart {
1188+
jitStart = m.Vaddr
1189+
}
1190+
if !jitFound || m.Vaddr+m.Length > jitEnd {
1191+
jitEnd = m.Vaddr + m.Length
1192+
}
1193+
jitFound = true
1194+
}
1195+
}
1196+
1197+
// Second pass: register LPM prefixes for executable anonymous/JIT mappings.
1198+
// This only covers r-xp pages so the eBPF unwinder is invoked for JIT code that
1199+
// has actually been committed. If no labeled JIT region was found above, fall back
1200+
// to heuristic detection from executable anonymous mappings.
1201+
var heuristicJitMapping *process.Mapping
11781202
for idx := range mappings {
11791203
m := &mappings[idx]
11801204
if !m.IsExecutable() {
11811205
continue
11821206
}
11831207

1184-
// Check for prctl-labeled JIT region first.
1185-
// On Linux with CONFIG_ANON_VMA_NAME, Ruby labels its JIT memory via prctl(PR_SET_VMA)
1186-
// which gives it a path like "[anon:Ruby:rb_yjit_reserve_addr_space]".
1187-
// This is NOT considered anonymous by IsAnonymous() since the path is non-null,
1188-
// so we must check for it explicitly before the IsAnonymous() filter.
1189-
if strings.Contains(m.Path.String(), "jit_reserve_addr_space") {
1190-
jitMapping = m
1191-
jitFound = true
1192-
} else if !m.IsAnonymous() {
1208+
isJitLabeled := strings.Contains(m.Path.String(), "jit_reserve_addr_space")
1209+
if !isJitLabeled && !m.IsAnonymous() {
11931210
continue
11941211
}
11951212

1196-
// Use the first executable anon region we find if it isn't labeled
1197-
// If we find more, prefer ones earlier in memory or larger in size
1198-
if !jitFound && (jitMapping == nil || m.Vaddr < jitMapping.Vaddr || m.Length > jitMapping.Length) {
1199-
// Don't set jitFound here as it is a heuristic, we aren't sure
1200-
// could be on a system without linux config flag to allow prctl to label memoy
1201-
jitMapping = m
1213+
// Heuristic fallback: if no prctl label was found, use the first/smallest-addr
1214+
// executable anonymous mapping as the JIT region.
1215+
if !jitFound && !isJitLabeled {
1216+
if heuristicJitMapping == nil || m.Vaddr < heuristicJitMapping.Vaddr ||
1217+
m.Length > heuristicJitMapping.Length {
1218+
heuristicJitMapping = m
1219+
}
12021220
}
12031221

12041222
if _, exists := r.mappings[*m]; exists {
@@ -1211,7 +1229,6 @@ func (r *rubyInstance) SynchronizeMappings(ebpf interpreter.EbpfHandler,
12111229
mappingGeneration := r.mappingGeneration
12121230
r.mappings[*m] = &mappingGeneration
12131231

1214-
// Just assume all anonymous and executable mappings are Ruby for now
12151232
log.Debugf("Enabling Ruby interpreter for %#x/%#x", m.Vaddr, m.Length)
12161233

12171234
prefixes, err := lpm.CalculatePrefixList(m.Vaddr, m.Vaddr+m.Length)
@@ -1230,9 +1247,18 @@ func (r *rubyInstance) SynchronizeMappings(ebpf interpreter.EbpfHandler,
12301247
r.prefixes[prefix] = &mappingGeneration
12311248
}
12321249
}
1233-
if jitMapping != nil && (r.procInfo.Jit_start != jitMapping.Vaddr || r.procInfo.Jit_end != jitMapping.Vaddr+jitMapping.Length) {
1234-
r.procInfo.Jit_start = jitMapping.Vaddr
1235-
r.procInfo.Jit_end = jitMapping.Vaddr + jitMapping.Length
1250+
1251+
// Determine final JIT bounds: prefer labeled region, fall back to heuristic.
1252+
if !jitFound && heuristicJitMapping != nil {
1253+
jitStart = heuristicJitMapping.Vaddr
1254+
jitEnd = heuristicJitMapping.Vaddr + heuristicJitMapping.Length
1255+
jitFound = true
1256+
}
1257+
1258+
// Update proc info if JIT bounds changed.
1259+
if jitStart != 0 && (r.procInfo.Jit_start != jitStart || r.procInfo.Jit_end != jitEnd) {
1260+
r.procInfo.Jit_start = jitStart
1261+
r.procInfo.Jit_end = jitEnd
12361262

12371263
// Detect whether the JIT is emitting frame pointers.
12381264
// On arm64, YJIT always emits frame pointers unconditionally.

process/process.go

Lines changed: 0 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -265,7 +265,6 @@ func parseMappings(mapsFile io.Reader) ([]Mapping, uint32, error) {
265265
// This is an anonymous mapping, keep it
266266
} else if strings.HasPrefix(fields[5], "[anon:") {
267267
// This is an anonymous mapping, that has been named with prctl, keep the name
268-
log.Debugf("Got named mapping: %s", fields[5])
269268
path = libpf.Intern(trimMappingPath(fields[5]))
270269
} else {
271270
// Ignore other mappings that are invalid, non-existent or are special pseudo-files

process/types.go

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -45,7 +45,7 @@ func (m *Mapping) IsExecutable() bool {
4545
}
4646

4747
func (m *Mapping) IsAnonymous() bool {
48-
return m.Path == libpf.NullString || m.IsMemFD()
48+
return m.Path == libpf.NullString || m.IsMemFD() || strings.HasPrefix(m.Path.String(), "[anon:")
4949
}
5050

5151
func (m *Mapping) IsMemFD() bool {

support/ebpf/ruby_tracer.ebpf.c

Lines changed: 39 additions & 51 deletions
Original file line numberDiff line numberDiff line change
@@ -20,17 +20,13 @@ struct ruby_procs_t {
2020
// NOTE the maximum size stack is this times 33
2121
#define FRAMES_PER_WALK_RUBY_STACK 32
2222

23-
// The maximum number of JIT frames to unwind via frame pointers.
24-
// YJIT creates one native frame per JIT entry (not per Ruby method),
25-
// so in practice there is typically only 1 (occasionally 2 for nested entries).
26-
#define MAX_JIT_FP_FRAMES 4
2723
// When resolving a CME, we need to traverse environment pointers until we
2824
// find IMEMO_MENT. Since we can't do a while loop, we have to bound this
2925
// the max encountered in experimentation on a production rails app is 6.
3026
// This increases insn for the kernel verifier all code in the ep check "loop"
3127
// is M*N for instruction checks, so be extra sensitive about additions there.
3228
// If we get ERR_RUBY_READ_CME_MAX_EP regularly, we may need to raise it.
33-
#define MAX_EP_CHECKS 6
29+
#define MAX_EP_CHECKS 6
3430

3531
// Constants related to reading a method entry
3632
// https://github.com/ruby/ruby/blob/523857bfcb0f0cdfd1ed7faa09b9c59a0266e7e2/method.h#L118
@@ -407,55 +403,27 @@ static EBPF_INLINE ErrorCode walk_ruby_stack(
407403
record->rubyUnwindState.cfunc_saved_frame = 0;
408404
}
409405

410-
// If the CPU PC is in the JIT region, walk the native frame pointer chain through JIT frames.
411-
// This follows the same pattern as the V8 unwinder (v8_tracer.ebpf.c): push each JIT frame,
412-
// then use unwinder_unwind_frame_pointer() to advance PC/SP/FP to the caller.
413-
// YJIT creates one native FP frame per JIT entry, not per Ruby method, so there are
414-
// typically only 1-2 frames to walk.
406+
// Detect if the CPU PC is in the JIT region.
407+
// When frame pointers are available, we keep the native unwind state in sync with
408+
// the Ruby VM stack by advancing the FP chain by one frame per loop iteration.
409+
// This handles both YJIT (1 JIT frame, exits after first iteration) and ZJIT
410+
// (1 JIT frame per iseq, 1:1 with CFPs, stays in sync throughout the walk).
415411
//
416-
// If frame_pointers_enabled is false (e.g. x86_64 without --yjit-perf), we push a single
417-
// dummy JIT frame and skip FP walking -- the stack will be truncated at the Ruby VM frames
418-
// but won't produce garbage from following an invalid FP chain.
419-
if (
420-
rubyinfo->jit_start > 0 && record->state.pc > rubyinfo->jit_start &&
421-
record->state.pc < rubyinfo->jit_end) {
422-
if (rubyinfo->frame_pointers_enabled) {
423-
// Walk the native FP chain through JIT frames, pushing each as a JIT frame
424-
// so it can potentially be symbolized via perf maps later.
425-
UNROLL for (int j = 0; j < MAX_JIT_FP_FRAMES; j++)
426-
{
427-
ErrorCode jit_error =
428-
push_ruby(&record->state, trace, RUBY_FRAME_TYPE_JIT, (u64)record->state.pc, 0, 0);
429-
if (jit_error) {
430-
return jit_error;
431-
}
412+
// When frame pointers are not available, we push a single dummy JIT frame and
413+
// set jit_detected to suppress native unwinding.
414+
bool in_jit = rubyinfo->jit_start > 0 && record->state.pc > rubyinfo->jit_start &&
415+
record->state.pc < rubyinfo->jit_end;
432416

433-
if (!unwinder_unwind_frame_pointer(&record->state)) {
434-
// FP chain broken, cannot continue
435-
*next_unwinder = PROG_UNWIND_STOP;
436-
return ERR_OK;
437-
}
438-
439-
// Check if we've left the JIT region
440-
if (record->state.pc < rubyinfo->jit_start || record->state.pc >= rubyinfo->jit_end) {
441-
break;
442-
}
443-
}
444-
// After walking JIT frames, PC should be in rb_vm_exec or other native code.
445-
// We must resolve the mapping for the new PC so that text_section_id/offset/bias
446-
// are up to date. Without this, the native unwinder would try to use stale mapping
447-
// info from the JIT region and fail with ERR_NATIVE_NO_PID_PAGE_MAPPING.
448-
ErrorCode map_err = get_next_unwinder_after_native_frame(record, next_unwinder);
449-
if (map_err) {
450-
return map_err;
417+
if (in_jit) {
418+
if (rubyinfo->frame_pointers_enabled) {
419+
// Push a leaf JIT frame with the raw machine PC for perf-map symbolization.
420+
ErrorCode jit_error =
421+
push_ruby(&record->state, trace, RUBY_FRAME_TYPE_JIT, (u64)record->state.pc, 0, 0);
422+
if (jit_error) {
423+
return jit_error;
451424
}
452-
// The resolved unwinder should be PROG_UNWIND_RUBY (since PC is in rb_vm_exec
453-
// which is in interpreter_offsets) or PROG_UNWIND_NATIVE. Either way, we continue
454-
// with the Ruby VM stack walk below and the mapping state is now correct for when
455-
// we eventually hand off to the native unwinder.
456425
} else {
457426
// No frame pointers available: push a single dummy JIT frame.
458-
// We cannot walk the FP chain so we will not be able to resume native unwinding.
459427
// Mark jit_detected so that cfuncs are pushed inline and end-of-stack uses
460428
// PROG_UNWIND_STOP instead of PROG_UNWIND_NATIVE.
461429
record->rubyUnwindState.jit_detected = true;
@@ -464,19 +432,39 @@ static EBPF_INLINE ErrorCode walk_ruby_stack(
464432
if (jit_error) {
465433
return jit_error;
466434
}
435+
in_jit = false;
467436
}
468437
}
469438

470439
UNROLL for (u32 i = 0; i < FRAMES_PER_WALK_RUBY_STACK; ++i)
471440
{
441+
// Keep the native unwind state in sync: if the native PC is still in the JIT
442+
// region, advance it by one frame pointer to match the Ruby VM stack pop.
443+
// For YJIT this exits JIT on the first iteration. For ZJIT this pops one JIT
444+
// native frame per CFP, keeping the two stacks in lockstep.
445+
if (in_jit) {
446+
if (!unwinder_unwind_frame_pointer(&record->state)) {
447+
*next_unwinder = PROG_UNWIND_STOP;
448+
return ERR_OK;
449+
}
450+
if (record->state.pc < rubyinfo->jit_start || record->state.pc >= rubyinfo->jit_end) {
451+
// Exited the JIT region. Resolve the mapping for the post-JIT PC so that
452+
// text_section_id/offset/bias are correct for native unwinding later.
453+
in_jit = false;
454+
ErrorCode map_err = get_next_unwinder_after_native_frame(record, next_unwinder);
455+
if (map_err) {
456+
return map_err;
457+
}
458+
}
459+
}
460+
472461
error = read_ruby_frame(record, rubyinfo, stack_ptr, next_unwinder);
473462
if (error != ERR_OK)
474463
return error;
475464

476465
if (last_stack_frame <= stack_ptr) {
477466
// We have processed all frames in the Ruby VM and can stop here.
478-
// If we walked through JIT frames via FP, the state is clean and native unwinding
479-
// can continue. If JIT was detected without FP, the PC is still in the JIT region
467+
// If JIT was detected without FP, the PC is still in the JIT region
480468
// and native unwinding would fail, so we stop.
481469
*next_unwinder = record->rubyUnwindState.jit_detected ? PROG_UNWIND_STOP : PROG_UNWIND_NATIVE;
482470
goto save_state;

0 commit comments

Comments
 (0)