Skip to content

Commit c434fbd

Browse files
committed
JIT: add register value caching across all backends
Add register value tracking (jit_regs) to all JIT backends (x86-64, aarch64, armv6m, riscv32). This enables optimization of redundant loads when a register already contains the needed value. Signed-off-by: Paul Guyot <pguyot@kallisys.net>
1 parent e2dda4e commit c434fbd

14 files changed

Lines changed: 2606 additions & 981 deletions

libs/jit/src/CMakeLists.txt

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -25,6 +25,7 @@ include(BuildErlang)
2525
set(ERLANG_MODULES
2626
jit
2727
jit_precompile
28+
jit_regs
2829
jit_stream_binary
2930
jit_stream_flash
3031
jit_stream_mmap

libs/jit/src/jit_aarch64.erl

Lines changed: 348 additions & 155 deletions
Large diffs are not rendered by default.

libs/jit/src/jit_armv6m.erl

Lines changed: 461 additions & 178 deletions
Large diffs are not rendered by default.

libs/jit/src/jit_regs.erl

Lines changed: 197 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,197 @@
1+
%
2+
% This file is part of AtomVM.
3+
%
4+
% Copyright 2025-2026 Paul Guyot <pguyot@kallisys.net>
5+
%
6+
% Licensed under the Apache License, Version 2.0 (the "License");
7+
% you may not use this file except in compliance with the License.
8+
% You may obtain a copy of the License at
9+
%
10+
% http://www.apache.org/licenses/LICENSE-2.0
11+
%
12+
% Unless required by applicable law or agreed to in writing, software
13+
% distributed under the License is distributed on an "AS IS" BASIS,
14+
% WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
15+
% See the License for the specific language governing permissions and
16+
% limitations under the License.
17+
%
18+
% SPDX-License-Identifier: Apache-2.0 OR LGPL-2.1-or-later
19+
%
20+
21+
%% @doc Track CPU register contents for the JIT backend.
22+
%%
23+
%% This module maintains knowledge about what each CPU register holds
24+
%% (VM x/y register values, immediates, pointers), enabling the backend to
25+
%% skip redundant loads.
26+
%%
27+
%% Tracking must be invalidated at:
28+
%% - Labels (any offset can be a branch target)
29+
%% - Function calls (ABI clobbers caller-saved registers)
30+
%% - Branches (the target has unknown incoming register state)
31+
%%
32+
%% The tracked information includes:
33+
%% - `contents`: maps cpu_reg -> what the register holds
34+
35+
-module(jit_regs).
36+
37+
-export([
38+
new/0,
39+
get_contents/2,
40+
set_contents/3,
41+
invalidate_reg/2,
42+
invalidate_all/1,
43+
invalidate_volatile/2,
44+
invalidate_vm_loc/2,
45+
find_reg_with_contents/2,
46+
merge/2,
47+
stack_push/2,
48+
stack_pop/1,
49+
stack_clear/1,
50+
stack_contents/1,
51+
value_to_contents/2,
52+
vm_dest_to_contents/2,
53+
regs_to_mask/2
54+
]).
55+
56+
-export_type([regs/0, contents/0]).
57+
58+
-type vm_loc() ::
59+
{x_reg, non_neg_integer()}
60+
| {y_reg, non_neg_integer()}.
61+
62+
%% What a CPU register holds
63+
-type contents() ::
64+
vm_loc()
65+
| {ptr, vm_loc()}
66+
%% Register holds a known immediate value
67+
| {imm, integer()}
68+
%% Register holds the address of CP
69+
| cp
70+
%% Register holds the module index
71+
| module_index
72+
%% Unknown / clobbered
73+
| unknown.
74+
75+
-record(regs, {
76+
contents = #{} :: #{atom() => contents()},
77+
stack = [] :: [atom() | contents()]
78+
}).
79+
80+
-opaque regs() :: #regs{}.
81+
82+
%% @doc Create a new empty register tracking state.
83+
-spec new() -> regs().
84+
new() ->
85+
#regs{}.
86+
87+
%% @doc Get what a CPU register currently holds.
88+
-spec get_contents(regs(), atom()) -> contents().
89+
get_contents(#regs{contents = C}, Reg) ->
90+
maps:get(Reg, C, unknown).
91+
92+
%% @doc Record that a CPU register now holds the given contents.
93+
-spec set_contents(regs(), atom(), contents()) -> regs().
94+
set_contents(#regs{contents = C} = Regs, Reg, Contents) ->
95+
Regs#regs{contents = C#{Reg => Contents}}.
96+
97+
%% @doc Invalidate tracking for a single CPU register (e.g. it was clobbered).
98+
-spec invalidate_reg(regs(), atom()) -> regs().
99+
invalidate_reg(#regs{contents = C} = Regs, Reg) ->
100+
Regs#regs{contents = maps:remove(Reg, C)}.
101+
102+
%% @doc Invalidate all register tracking (e.g. at a label or unknown branch target).
103+
-spec invalidate_all(regs()) -> regs().
104+
invalidate_all(Regs) ->
105+
Regs#regs{contents = #{}, stack = []}.
106+
107+
%% @doc Invalidate registers that are volatile across a C function call.
108+
%% On x86-64 System V ABI, all our scratch registers (rax, rcx, rdx, rsi, rdi,
109+
%% r8, r9, r10, r11) are caller-saved, so after a C call they're all clobbered.
110+
%% However, the special registers (rdi=ctx, rsi=jit_state, rdx=native_interface)
111+
%% are restored by the JIT after the call via push/pop, so we keep their tracking.
112+
-spec invalidate_volatile(regs(), [atom()]) -> regs().
113+
invalidate_volatile(#regs{contents = C0} = Regs, PreservedRegs) ->
114+
C1 = maps:filter(fun(Reg, _) -> lists:member(Reg, PreservedRegs) end, C0),
115+
Regs#regs{contents = C1}.
116+
117+
%% @doc Invalidate all CPU registers that reference a given VM location.
118+
%% Call this when a VM register is written to, so that any CPU register
119+
%% that was caching its old value is invalidated.
120+
-spec invalidate_vm_loc(regs(), vm_loc()) -> regs().
121+
invalidate_vm_loc(#regs{contents = C} = Regs, VmLoc) ->
122+
C1 = maps:filter(fun(_Reg, Val) -> Val =/= VmLoc end, C),
123+
Regs#regs{contents = C1}.
124+
125+
%% @doc Find a CPU register that holds the given contents.
126+
%% Returns `{ok, Reg}` or `none`.
127+
-spec find_reg_with_contents(regs(), contents()) -> {ok, atom()} | none.
128+
find_reg_with_contents(#regs{contents = C}, Contents) ->
129+
find_in_map(maps:iterator(C), Contents).
130+
131+
find_in_map(Iterator, Contents) ->
132+
case maps:next(Iterator) of
133+
{Reg, Contents, _Next} -> {ok, Reg};
134+
{_Reg, _Other, Next} -> find_in_map(Next, Contents);
135+
none -> none
136+
end.
137+
138+
%% @doc Merge two register tracking states (for control flow merge points).
139+
%% Only keeps information that is consistent in both states.
140+
-spec merge(regs(), regs()) -> regs().
141+
merge(#regs{contents = C1}, #regs{contents = C2}) ->
142+
%% Keep only entries that match in both maps
143+
MergedContents = maps:filter(
144+
fun(Reg, Val) -> maps:get(Reg, C2, undefined) =:= Val end,
145+
C1
146+
),
147+
#regs{contents = MergedContents, stack = []}.
148+
149+
%% @doc Record a push to the C stack.
150+
-spec stack_push(regs(), atom() | contents()) -> regs().
151+
stack_push(#regs{stack = S} = Regs, Value) ->
152+
Regs#regs{stack = [Value | S]}.
153+
154+
%% @doc Record a pop from the C stack.
155+
-spec stack_pop(regs()) -> {atom() | contents(), regs()}.
156+
stack_pop(#regs{stack = [Top | Rest]} = Regs) ->
157+
{Top, Regs#regs{stack = Rest}};
158+
stack_pop(#regs{stack = []} = Regs) ->
159+
{unknown, Regs}.
160+
161+
%% @doc Clear the C stack tracking.
162+
-spec stack_clear(regs()) -> regs().
163+
stack_clear(Regs) ->
164+
Regs#regs{stack = []}.
165+
166+
%% @doc Get the current C stack contents.
167+
-spec stack_contents(regs()) -> [atom() | contents()].
168+
stack_contents(#regs{stack = S}) -> S.
169+
170+
%% @doc Convert a backend value to a contents descriptor for tracking.
171+
%% MaxReg is the maximum number of x registers (typically ?MAX_REG from jit.hrl).
172+
-spec value_to_contents(term(), non_neg_integer()) -> contents().
173+
value_to_contents(cp, _MaxReg) -> cp;
174+
value_to_contents({x_reg, N}, _MaxReg) when is_integer(N) -> {x_reg, N};
175+
value_to_contents({x_reg, extra}, MaxReg) -> {x_reg, MaxReg};
176+
value_to_contents({y_reg, N}, _MaxReg) -> {y_reg, N};
177+
value_to_contents(Imm, _MaxReg) when is_integer(Imm) -> {imm, Imm};
178+
value_to_contents({ptr, _}, _MaxReg) -> unknown;
179+
value_to_contents(_, _MaxReg) -> unknown.
180+
181+
%% @doc Convert a VM destination register to a contents descriptor for tracking.
182+
%% MaxReg is the maximum number of x registers (typically ?MAX_REG from jit.hrl).
183+
-spec vm_dest_to_contents(term(), non_neg_integer()) -> contents().
184+
vm_dest_to_contents({x_reg, X}, MaxReg) when is_integer(X), X < MaxReg -> {x_reg, X};
185+
vm_dest_to_contents({x_reg, extra}, MaxReg) -> {x_reg, MaxReg};
186+
vm_dest_to_contents({y_reg, Y}, _MaxReg) -> {y_reg, Y};
187+
vm_dest_to_contents(_, _MaxReg) -> unknown.
188+
189+
%% @doc Convert a list of register atoms to a bitmask.
190+
%% Skips non-register entries like `imm`, `jit_state`, and `stack`.
191+
%% RegBitFn maps register atoms to their bit positions.
192+
-spec regs_to_mask([atom()], fun((atom()) -> non_neg_integer())) -> non_neg_integer().
193+
regs_to_mask([], _RegBitFn) -> 0;
194+
regs_to_mask([imm | T], RegBitFn) -> regs_to_mask(T, RegBitFn);
195+
regs_to_mask([jit_state | T], RegBitFn) -> regs_to_mask(T, RegBitFn);
196+
regs_to_mask([stack | T], RegBitFn) -> regs_to_mask(T, RegBitFn);
197+
regs_to_mask([Reg | T], RegBitFn) -> RegBitFn(Reg) bor regs_to_mask(T, RegBitFn).

0 commit comments

Comments
 (0)