Skip to content

Commit 59c794c

Browse files
committed
Walk YJIT JIT frames via frame pointers for full stack unwinding
Replace the jit_detected flag approach with V8-style frame pointer unwinding through Ruby JIT frames. When YJIT emits frame pointers (always on arm64, with --yjit-perf on x86_64), the Ruby eBPF unwinder walks the native FP chain through JIT frames, pushes each as a RUBY_FRAME_TYPE_JIT frame, then resolves the post-JIT mapping so native unwinding can continue below the Ruby VM stack. When frame pointers are not available, the original behavior is preserved: a single dummy JIT frame is pushed, cfuncs are pushed inline, and native unwinding is stopped at the end of the Ruby stack. Also fixes parseMappings discarding prctl-labeled [anon:...] mappings, which prevented the YJIT JIT region from being visible to interpreter handlers.
1 parent 5f6e044 commit 59c794c

5 files changed

Lines changed: 118 additions & 19 deletions

File tree

interpreter/ruby/ruby.go

Lines changed: 43 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -4,10 +4,12 @@
44
package ruby // import "go.opentelemetry.io/ebpf-profiler/interpreter/ruby"
55

66
import (
7+
"debug/elf"
78
"encoding/binary"
89
"errors"
910
"fmt"
1011
"math/bits"
12+
"os"
1113
"regexp"
1214
"runtime"
1315
"strconv"
@@ -1141,6 +1143,28 @@ func profileFrameFullLabel(classPath, label, baseLabel, methodName libpf.String,
11411143
return libpf.Intern(profileLabel)
11421144
}
11431145

1146+
// hasJitFramePointers detects whether YJIT is emitting frame pointers for this process.
1147+
// On arm64, YJIT always emits frame pointers unconditionally.
1148+
// On x86_64, frame pointers are only emitted when --yjit-perf or --yjit-perf=fp is used.
1149+
// When --yjit-perf is active, YJIT also creates /tmp/perf-PID.map, which we use as the
1150+
// detection signal on x86_64.
1151+
func hasJitFramePointers(pr process.Process) bool {
1152+
machine := pr.GetMachineData().Machine
1153+
if machine == elf.EM_AARCH64 {
1154+
// YJIT on arm64 always emits frame pointers (unconditionally in the backend).
1155+
return true
1156+
}
1157+
1158+
// On x86_64, check for the perf map file which indicates --yjit-perf was used.
1159+
// The --yjit-perf flag enables both frame pointers and the perf map.
1160+
perfMapPath := fmt.Sprintf("/tmp/perf-%d.map", pr.PID())
1161+
if _, err := os.Stat(perfMapPath); err == nil {
1162+
return true
1163+
}
1164+
1165+
return false
1166+
}
1167+
11441168
func (r *rubyInstance) SynchronizeMappings(ebpf interpreter.EbpfHandler,
11451169
_ reporter.ExecutableReporter, pr process.Process, mappings []process.Mapping) error {
11461170
var jitMapping *process.Mapping
@@ -1153,15 +1177,22 @@ func (r *rubyInstance) SynchronizeMappings(ebpf interpreter.EbpfHandler,
11531177

11541178
for idx := range mappings {
11551179
m := &mappings[idx]
1156-
if !m.IsExecutable() || !m.IsAnonymous() {
1180+
if !m.IsExecutable() {
11571181
continue
11581182
}
1159-
// If prctl is allowed, ruby should label the memory region
1160-
// always prefer that
1183+
1184+
// Check for prctl-labeled JIT region first.
1185+
// On Linux with CONFIG_ANON_VMA_NAME, Ruby labels its JIT memory via prctl(PR_SET_VMA)
1186+
// which gives it a path like "[anon:Ruby:rb_yjit_reserve_addr_space]".
1187+
// This is NOT considered anonymous by IsAnonymous() since the path is non-null,
1188+
// so we must check for it explicitly before the IsAnonymous() filter.
11611189
if strings.Contains(m.Path.String(), "jit_reserve_addr_space") {
11621190
jitMapping = m
11631191
jitFound = true
1192+
} else if !m.IsAnonymous() {
1193+
continue
11641194
}
1195+
11651196
// Use the first executable anon region we find if it isn't labeled
11661197
// If we find more, prefer ones earlier in memory or larger in size
11671198
if !jitFound && (jitMapping == nil || m.Vaddr < jitMapping.Vaddr || m.Length > jitMapping.Length) {
@@ -1202,10 +1233,18 @@ func (r *rubyInstance) SynchronizeMappings(ebpf interpreter.EbpfHandler,
12021233
if jitMapping != nil && (r.procInfo.Jit_start != jitMapping.Vaddr || r.procInfo.Jit_end != jitMapping.Vaddr+jitMapping.Length) {
12031234
r.procInfo.Jit_start = jitMapping.Vaddr
12041235
r.procInfo.Jit_end = jitMapping.Vaddr + jitMapping.Length
1236+
1237+
// Detect whether the JIT is emitting frame pointers.
1238+
// On arm64, YJIT always emits frame pointers unconditionally.
1239+
// On x86_64, frame pointers are only emitted with --yjit-perf or --yjit-perf=fp,
1240+
// which also creates a /tmp/perf-PID.map file as a side effect.
1241+
r.procInfo.Frame_pointers_enabled = hasJitFramePointers(pr)
1242+
12051243
if err := ebpf.UpdateProcData(libpf.Ruby, pr.PID(), unsafe.Pointer(r.procInfo)); err != nil {
12061244
return err
12071245
}
1208-
log.Debugf("Added jit mapping %08x ruby proc info, %08x", r.procInfo.Jit_start, r.procInfo.Jit_end)
1246+
log.Debugf("Added jit mapping %08x-%08x ruby proc info (frame_pointers=%v)",
1247+
r.procInfo.Jit_start, r.procInfo.Jit_end, r.procInfo.Frame_pointers_enabled)
12091248
}
12101249
// Remove prefixes not seen
12111250
for prefix, generationPtr := range r.prefixes {

process/process.go

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -263,6 +263,10 @@ func parseMappings(mapsFile io.Reader) ([]Mapping, uint32, error) {
263263
inode = vdsoInode
264264
} else if fields[5] == "" {
265265
// This is an anonymous mapping, keep it
266+
} else if strings.HasPrefix(fields[5], "[anon:") {
267+
// This is an anonymous mapping, that has been named with prctl, keep the name
268+
log.Debugf("Got named mapping: %s", fields[5])
269+
path = libpf.Intern(trimMappingPath(fields[5]))
266270
} else {
267271
// Ignore other mappings that are invalid, non-existent or are special pseudo-files
268272
continue

support/ebpf/ruby_tracer.ebpf.c

Lines changed: 63 additions & 13 deletions
Original file line numberDiff line numberDiff line change
@@ -19,13 +19,18 @@ struct ruby_procs_t {
1919
// option is to adjust this number downwards.
2020
// NOTE the maximum size stack is this times 33
2121
#define FRAMES_PER_WALK_RUBY_STACK 32
22+
23+
// The maximum number of JIT frames to unwind via frame pointers.
24+
// YJIT creates one native frame per JIT entry (not per Ruby method),
25+
// so in practice there is typically only 1 (occasionally 2 for nested entries).
26+
#define MAX_JIT_FP_FRAMES 4
2227
// When resolving a CME, we need to traverse environment pointers until we
2328
// find IMEMO_MENT. Since we can't do a while loop, we have to bound this
2429
// the max encountered in experimentation on a production rails app is 6.
2530
// This increases insn for the kernel verifier all code in the ep check "loop"
2631
// is M*N for instruction checks, so be extra sensitive about additions there.
2732
// If we get ERR_RUBY_READ_CME_MAX_EP regularly, we may need to raise it.
28-
#define MAX_EP_CHECKS 6
33+
#define MAX_EP_CHECKS 6
2934

3035
// Constants related to reading a method entry
3136
// https://github.com/ruby/ruby/blob/523857bfcb0f0cdfd1ed7faa09b9c59a0266e7e2/method.h#L118
@@ -222,8 +227,9 @@ static EBPF_INLINE ErrorCode read_ruby_frame(
222227
// frames will almost certainly be incorrect for Ruby versions < 2.6.
223228
frame_type = RUBY_FRAME_TYPE_CME_CFUNC;
224229
} else if (record->rubyUnwindState.jit_detected) {
225-
// If we detected a jit frame and are now in a cfunc, push the c frame
226-
// as we can no longer unwind native anymore
230+
// JIT is active but frame pointers are not available, so we cannot unwind
231+
// through JIT frames to get back to native code. Push the cfunc inline
232+
// instead of handing off to the native unwinder.
227233
frame_type = RUBY_FRAME_TYPE_CME_CFUNC;
228234
} else {
229235
// We save this cfp on in the "Record" entry, and when we start the unwinder
@@ -401,19 +407,62 @@ static EBPF_INLINE ErrorCode walk_ruby_stack(
401407
record->rubyUnwindState.cfunc_saved_frame = 0;
402408
}
403409

410+
// If the CPU PC is in the JIT region, walk the native frame pointer chain through JIT frames.
411+
// This follows the same pattern as the V8 unwinder (v8_tracer.ebpf.c): push each JIT frame,
412+
// then use unwinder_unwind_frame_pointer() to advance PC/SP/FP to the caller.
413+
// YJIT creates one native FP frame per JIT entry, not per Ruby method, so there are
414+
// typically only 1-2 frames to walk.
415+
//
416+
// If frame_pointers_enabled is false (e.g. x86_64 without --yjit-perf), we push a single
417+
// dummy JIT frame and skip FP walking -- the stack will be truncated at the Ruby VM frames
418+
// but won't produce garbage from following an invalid FP chain.
404419
if (
405420
rubyinfo->jit_start > 0 && record->state.pc > rubyinfo->jit_start &&
406421
record->state.pc < rubyinfo->jit_end) {
407-
record->rubyUnwindState.jit_detected = true;
422+
if (rubyinfo->frame_pointers_enabled) {
423+
// Walk the native FP chain through JIT frames, pushing each as a JIT frame
424+
// so it can potentially be symbolized via perf maps later.
425+
UNROLL for (int j = 0; j < MAX_JIT_FP_FRAMES; j++)
426+
{
427+
ErrorCode jit_error =
428+
push_ruby(&record->state, trace, RUBY_FRAME_TYPE_JIT, (u64)record->state.pc, 0, 0);
429+
if (jit_error) {
430+
return jit_error;
431+
}
432+
433+
if (!unwinder_unwind_frame_pointer(&record->state)) {
434+
// FP chain broken, cannot continue
435+
*next_unwinder = PROG_UNWIND_STOP;
436+
return ERR_OK;
437+
}
408438

409-
// If the first frame is a jit PC, the leaf ruby frame should be the jit "owner"
410-
// the cpu PC is also pushed as the address,
411-
// as in theory this can be used to symbolize the JIT frame later
412-
if (trace->num_frames == 0) {
413-
ErrorCode error =
439+
// Check if we've left the JIT region
440+
if (record->state.pc < rubyinfo->jit_start || record->state.pc >= rubyinfo->jit_end) {
441+
break;
442+
}
443+
}
444+
// After walking JIT frames, PC should be in rb_vm_exec or other native code.
445+
// We must resolve the mapping for the new PC so that text_section_id/offset/bias
446+
// are up to date. Without this, the native unwinder would try to use stale mapping
447+
// info from the JIT region and fail with ERR_NATIVE_NO_PID_PAGE_MAPPING.
448+
ErrorCode map_err = get_next_unwinder_after_native_frame(record, next_unwinder);
449+
if (map_err) {
450+
return map_err;
451+
}
452+
// The resolved unwinder should be PROG_UNWIND_RUBY (since PC is in rb_vm_exec
453+
// which is in interpreter_offsets) or PROG_UNWIND_NATIVE. Either way, we continue
454+
// with the Ruby VM stack walk below and the mapping state is now correct for when
455+
// we eventually hand off to the native unwinder.
456+
} else {
457+
// No frame pointers available: push a single dummy JIT frame.
458+
// We cannot walk the FP chain so we will not be able to resume native unwinding.
459+
// Mark jit_detected so that cfuncs are pushed inline and end-of-stack uses
460+
// PROG_UNWIND_STOP instead of PROG_UNWIND_NATIVE.
461+
record->rubyUnwindState.jit_detected = true;
462+
ErrorCode jit_error =
414463
push_ruby(&record->state, trace, RUBY_FRAME_TYPE_JIT, (u64)record->state.pc, 0, 0);
415-
if (error) {
416-
return error;
464+
if (jit_error) {
465+
return jit_error;
417466
}
418467
}
419468
}
@@ -426,8 +475,9 @@ static EBPF_INLINE ErrorCode walk_ruby_stack(
426475

427476
if (last_stack_frame <= stack_ptr) {
428477
// We have processed all frames in the Ruby VM and can stop here.
429-
// if this process has been JIT'd, the PC is invalid and we cannot resume native unwinding so
430-
// we are done
478+
// If we walked through JIT frames via FP, the state is clean and native unwinding
479+
// can continue. If JIT was detected without FP, the PC is still in the JIT region
480+
// and native unwinding would fail, so we stop.
431481
*next_unwinder = record->rubyUnwindState.jit_detected ? PROG_UNWIND_STOP : PROG_UNWIND_NATIVE;
432482
goto save_state;
433483
} else {

support/ebpf/types.h

Lines changed: 6 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -463,6 +463,10 @@ typedef struct RubyProcInfo {
463463
// JIT regions, for detecting if a native PC was JIT
464464
u64 jit_start, jit_end;
465465

466+
// Whether the JIT is emitting frame pointers (e.g. --yjit-perf on x86_64, always on arm64).
467+
// When true, we walk the native FP chain through JIT frames instead of stopping.
468+
bool frame_pointers_enabled;
469+
466470
// Offsets and sizes of Ruby internal structs
467471

468472
// rb_execution_context_struct offsets:
@@ -689,7 +693,8 @@ typedef struct RubyUnwindState {
689693
void *last_stack_frame;
690694
// Frame for last cfunc before we switched to native unwinder
691695
u64 cfunc_saved_frame;
692-
// Detect if JIT code ran in the process (at any time)
696+
// Set when JIT code is detected in the current trace and frame pointers are not available.
697+
// Used to suppress native unwinding and push cfuncs inline.
693698
bool jit_detected;
694699
} RubyUnwindState;
695700

support/types.go

Lines changed: 2 additions & 1 deletion
Some generated files are not rendered by default. Learn more about customizing how changed files appear on GitHub.

0 commit comments

Comments
 (0)