Skip to content

Commit 6d771f3

Browse files
committed
Merge pull request #2179 from pguyot/w10/add-register-value-caching
JIT: add register value caching across all backends These changes are made under both the "Apache 2.0" and the "GNU Lesser General Public License 2.1 or later" license terms (dual license). SPDX-License-Identifier: Apache-2.0 OR LGPL-2.1-or-later
2 parents 0ce9854 + c434fbd commit 6d771f3

14 files changed

Lines changed: 2606 additions & 981 deletions

libs/jit/src/CMakeLists.txt

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -25,6 +25,7 @@ include(BuildErlang)
2525
set(ERLANG_MODULES
2626
jit
2727
jit_precompile
28+
jit_regs
2829
jit_stream_binary
2930
jit_stream_flash
3031
jit_stream_mmap

libs/jit/src/jit_aarch64.erl

Lines changed: 348 additions & 155 deletions
Large diffs are not rendered by default.

libs/jit/src/jit_armv6m.erl

Lines changed: 461 additions & 178 deletions
Large diffs are not rendered by default.

libs/jit/src/jit_regs.erl

Lines changed: 197 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,197 @@
1+
%
2+
% This file is part of AtomVM.
3+
%
4+
% Copyright 2025-2026 Paul Guyot <pguyot@kallisys.net>
5+
%
6+
% Licensed under the Apache License, Version 2.0 (the "License");
7+
% you may not use this file except in compliance with the License.
8+
% You may obtain a copy of the License at
9+
%
10+
% http://www.apache.org/licenses/LICENSE-2.0
11+
%
12+
% Unless required by applicable law or agreed to in writing, software
13+
% distributed under the License is distributed on an "AS IS" BASIS,
14+
% WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
15+
% See the License for the specific language governing permissions and
16+
% limitations under the License.
17+
%
18+
% SPDX-License-Identifier: Apache-2.0 OR LGPL-2.1-or-later
19+
%
20+
21+
%% @doc Track CPU register contents for the JIT backend.
22+
%%
23+
%% This module maintains knowledge about what each CPU register holds
24+
%% (VM x/y register values, immediates, pointers), enabling the backend to
25+
%% skip redundant loads.
26+
%%
27+
%% Tracking must be invalidated at:
28+
%% - Labels (any offset can be a branch target)
29+
%% - Function calls (ABI clobbers caller-saved registers)
30+
%% - Branches (the target has unknown incoming register state)
31+
%%
32+
%% The tracked information includes:
33+
%% - `contents`: maps cpu_reg -> what the register holds
34+
35+
-module(jit_regs).
36+
37+
-export([
38+
new/0,
39+
get_contents/2,
40+
set_contents/3,
41+
invalidate_reg/2,
42+
invalidate_all/1,
43+
invalidate_volatile/2,
44+
invalidate_vm_loc/2,
45+
find_reg_with_contents/2,
46+
merge/2,
47+
stack_push/2,
48+
stack_pop/1,
49+
stack_clear/1,
50+
stack_contents/1,
51+
value_to_contents/2,
52+
vm_dest_to_contents/2,
53+
regs_to_mask/2
54+
]).
55+
56+
-export_type([regs/0, contents/0]).
57+
58+
-type vm_loc() ::
59+
{x_reg, non_neg_integer()}
60+
| {y_reg, non_neg_integer()}.
61+
62+
%% What a CPU register holds
63+
-type contents() ::
64+
vm_loc()
65+
| {ptr, vm_loc()}
66+
%% Register holds a known immediate value
67+
| {imm, integer()}
68+
%% Register holds the address of CP
69+
| cp
70+
%% Register holds the module index
71+
| module_index
72+
%% Unknown / clobbered
73+
| unknown.
74+
75+
-record(regs, {
76+
contents = #{} :: #{atom() => contents()},
77+
stack = [] :: [atom() | contents()]
78+
}).
79+
80+
-opaque regs() :: #regs{}.
81+
82+
%% @doc Create a new empty register tracking state.
83+
-spec new() -> regs().
84+
new() ->
85+
#regs{}.
86+
87+
%% @doc Get what a CPU register currently holds.
88+
-spec get_contents(regs(), atom()) -> contents().
89+
get_contents(#regs{contents = C}, Reg) ->
90+
maps:get(Reg, C, unknown).
91+
92+
%% @doc Record that a CPU register now holds the given contents.
93+
-spec set_contents(regs(), atom(), contents()) -> regs().
94+
set_contents(#regs{contents = C} = Regs, Reg, Contents) ->
95+
Regs#regs{contents = C#{Reg => Contents}}.
96+
97+
%% @doc Invalidate tracking for a single CPU register (e.g. it was clobbered).
98+
-spec invalidate_reg(regs(), atom()) -> regs().
99+
invalidate_reg(#regs{contents = C} = Regs, Reg) ->
100+
Regs#regs{contents = maps:remove(Reg, C)}.
101+
102+
%% @doc Invalidate all register tracking (e.g. at a label or unknown branch target).
103+
-spec invalidate_all(regs()) -> regs().
104+
invalidate_all(Regs) ->
105+
Regs#regs{contents = #{}, stack = []}.
106+
107+
%% @doc Invalidate registers that are volatile across a C function call.
108+
%% On x86-64 System V ABI, all our scratch registers (rax, rcx, rdx, rsi, rdi,
109+
%% r8, r9, r10, r11) are caller-saved, so after a C call they're all clobbered.
110+
%% However, the special registers (rdi=ctx, rsi=jit_state, rdx=native_interface)
111+
%% are restored by the JIT after the call via push/pop, so we keep their tracking.
112+
-spec invalidate_volatile(regs(), [atom()]) -> regs().
113+
invalidate_volatile(#regs{contents = C0} = Regs, PreservedRegs) ->
114+
C1 = maps:filter(fun(Reg, _) -> lists:member(Reg, PreservedRegs) end, C0),
115+
Regs#regs{contents = C1}.
116+
117+
%% @doc Invalidate all CPU registers that reference a given VM location.
118+
%% Call this when a VM register is written to, so that any CPU register
119+
%% that was caching its old value is invalidated.
120+
-spec invalidate_vm_loc(regs(), vm_loc()) -> regs().
121+
invalidate_vm_loc(#regs{contents = C} = Regs, VmLoc) ->
122+
C1 = maps:filter(fun(_Reg, Val) -> Val =/= VmLoc end, C),
123+
Regs#regs{contents = C1}.
124+
125+
%% @doc Find a CPU register that holds the given contents.
126+
%% Returns `{ok, Reg}` or `none`.
127+
-spec find_reg_with_contents(regs(), contents()) -> {ok, atom()} | none.
128+
find_reg_with_contents(#regs{contents = C}, Contents) ->
129+
find_in_map(maps:iterator(C), Contents).
130+
131+
find_in_map(Iterator, Contents) ->
132+
case maps:next(Iterator) of
133+
{Reg, Contents, _Next} -> {ok, Reg};
134+
{_Reg, _Other, Next} -> find_in_map(Next, Contents);
135+
none -> none
136+
end.
137+
138+
%% @doc Merge two register tracking states (for control flow merge points).
139+
%% Only keeps information that is consistent in both states.
140+
-spec merge(regs(), regs()) -> regs().
141+
merge(#regs{contents = C1}, #regs{contents = C2}) ->
142+
%% Keep only entries that match in both maps
143+
MergedContents = maps:filter(
144+
fun(Reg, Val) -> maps:get(Reg, C2, undefined) =:= Val end,
145+
C1
146+
),
147+
#regs{contents = MergedContents, stack = []}.
148+
149+
%% @doc Record a push to the C stack.
150+
-spec stack_push(regs(), atom() | contents()) -> regs().
151+
stack_push(#regs{stack = S} = Regs, Value) ->
152+
Regs#regs{stack = [Value | S]}.
153+
154+
%% @doc Record a pop from the C stack.
155+
-spec stack_pop(regs()) -> {atom() | contents(), regs()}.
156+
stack_pop(#regs{stack = [Top | Rest]} = Regs) ->
157+
{Top, Regs#regs{stack = Rest}};
158+
stack_pop(#regs{stack = []} = Regs) ->
159+
{unknown, Regs}.
160+
161+
%% @doc Clear the C stack tracking.
162+
-spec stack_clear(regs()) -> regs().
163+
stack_clear(Regs) ->
164+
Regs#regs{stack = []}.
165+
166+
%% @doc Get the current C stack contents.
167+
-spec stack_contents(regs()) -> [atom() | contents()].
168+
stack_contents(#regs{stack = S}) -> S.
169+
170+
%% @doc Convert a backend value to a contents descriptor for tracking.
171+
%% MaxReg is the maximum number of x registers (typically ?MAX_REG from jit.hrl).
172+
-spec value_to_contents(term(), non_neg_integer()) -> contents().
173+
value_to_contents(cp, _MaxReg) -> cp;
174+
value_to_contents({x_reg, N}, _MaxReg) when is_integer(N) -> {x_reg, N};
175+
value_to_contents({x_reg, extra}, MaxReg) -> {x_reg, MaxReg};
176+
value_to_contents({y_reg, N}, _MaxReg) -> {y_reg, N};
177+
value_to_contents(Imm, _MaxReg) when is_integer(Imm) -> {imm, Imm};
178+
value_to_contents({ptr, _}, _MaxReg) -> unknown;
179+
value_to_contents(_, _MaxReg) -> unknown.
180+
181+
%% @doc Convert a VM destination register to a contents descriptor for tracking.
182+
%% MaxReg is the maximum number of x registers (typically ?MAX_REG from jit.hrl).
183+
-spec vm_dest_to_contents(term(), non_neg_integer()) -> contents().
184+
vm_dest_to_contents({x_reg, X}, MaxReg) when is_integer(X), X < MaxReg -> {x_reg, X};
185+
vm_dest_to_contents({x_reg, extra}, MaxReg) -> {x_reg, MaxReg};
186+
vm_dest_to_contents({y_reg, Y}, _MaxReg) -> {y_reg, Y};
187+
vm_dest_to_contents(_, _MaxReg) -> unknown.
188+
189+
%% @doc Convert a list of register atoms to a bitmask.
190+
%% Skips non-register entries like `imm`, `jit_state`, and `stack`.
191+
%% RegBitFn maps register atoms to their bit positions.
192+
-spec regs_to_mask([atom()], fun((atom()) -> non_neg_integer())) -> non_neg_integer().
193+
regs_to_mask([], _RegBitFn) -> 0;
194+
regs_to_mask([imm | T], RegBitFn) -> regs_to_mask(T, RegBitFn);
195+
regs_to_mask([jit_state | T], RegBitFn) -> regs_to_mask(T, RegBitFn);
196+
regs_to_mask([stack | T], RegBitFn) -> regs_to_mask(T, RegBitFn);
197+
regs_to_mask([Reg | T], RegBitFn) -> RegBitFn(Reg) bor regs_to_mask(T, RegBitFn).

0 commit comments

Comments
 (0)