Skip to content

Commit 24c4a6d

Browse files
authored
Simplified bytecode op list (#400)
Remove dedicated immediate ops in favor of a reserved register. This experimentally improves GPU interpreter performance (not yet published).
1 parent 14e93c3 commit 24c4a6d

7 files changed

Lines changed: 180 additions & 85 deletions

File tree

CHANGELOG.md

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -4,6 +4,12 @@
44
- Bumped many dependencies
55
- Add `Image::build` function to build an image from a `Vec<T>` and
66
`ImageSizeLike`, returning an error if the data size is incorrect.
7+
- Revamping `fidget-bytecode`
8+
- Reserve register `u8::MAX` to represent an inline immediate
9+
- Remove separate opcodes, e.g. `SubRegReg`, `SubRegImm`, and `SubImmReg`
10+
now all generate `BytecodeOp::Sub` (using the reserved register as needed
11+
for immediates).
12+
- This also removes the `enum RegOpDiscriminants` from `fidget_core`
713

814
# 0.4.2
915
- Change `depth` member in `GeometryPixel` from `u32` to `f32` ([#381](https://github.com/mkeeter/fidget/pull/381))

Cargo.lock

Lines changed: 2 additions & 0 deletions
Some generated files are not rendered by default. Learn more about customizing how changed files appear on GitHub.

demos/web-editor/crate/Cargo.lock

Lines changed: 2 additions & 0 deletions
Some generated files are not rendered by default. Learn more about customizing how changed files appear on GitHub.

fidget-bytecode/Cargo.toml

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -14,5 +14,7 @@ rust-version.workspace = true
1414
fidget-core.workspace = true
1515
workspace-hack.workspace = true
1616

17+
serde.workspace = true
1718
strum.workspace = true
19+
thiserror.workspace = true
1820
zerocopy.workspace = true

fidget-bytecode/src/lib.rs

Lines changed: 166 additions & 68 deletions
Original file line numberDiff line numberDiff line change
@@ -20,10 +20,7 @@
2020
//! user-defined semantics, as long as the immediate is not either reserved
2121
//! value.
2222
//!
23-
//! ## Register-only operations
24-
//!
25-
//! Register-only operations (i.e. opcodes without an immediate `f32` or `u32`)
26-
//! are packed into a single `u32` as follows:
23+
//! Operations are packed into the first `u32` as follows:
2724
//!
2825
//! | Byte | Value |
2926
//! |------|---------------------------------------------|
@@ -32,49 +29,127 @@
3229
//! | 2 | first input register |
3330
//! | 3 | second input register |
3431
//!
35-
//! Depending on the opcode, the input register bytes may not be used.
36-
//!
37-
//! The second word is always `0xFF000000`
38-
//!
39-
//! ## Operations with an `f32` immediate
40-
//!
41-
//! Operations with an `f32` immediate are packed into two `u32` words.
42-
//! The first word is similar to before:
43-
//!
44-
//! | Byte | Value |
45-
//! |------|---------------------------------------------|
46-
//! | 0 | opcode |
47-
//! | 1 | output register |
48-
//! | 2 | first input register |
49-
//! | 3 | not used |
50-
//!
51-
//! The second word is the `f32` reinterpreted as a `u32`.
32+
//! The opcode byte is generated automatically from [`BytecodeOp`] tags.
5233
//!
53-
//! ## Operations with an `u32` immediate
54-
//!
55-
//! Operations with a `u32` immediate (e.g.
56-
//! [`Load`](RegOp::Load)) are also packed into two `u32`
57-
//! words. The first word is what you'd expect:
58-
//!
59-
//! | Byte | Value |
60-
//! |------|---------------------------------------------|
61-
//! | 0 | opcode |
62-
//! | 1 | input or output register |
63-
//! | 2 | not used |
64-
//! | 3 | not used |
65-
//!
66-
//! The second word is the `u32` immediate.
34+
//! Depending on the opcode, the input register bytes may not be used.
6735
//!
68-
//! ## Opcode values
36+
//! An input register byte of `0xFF` indicates that the second word should be
37+
//! used as an immediate value; the `u32` should be bitcast to an `f32`.
6938
//!
70-
//! Opcode values are generated automatically from [`BytecodeOp`]
71-
//! values, which are one-to-one with [`RegOp`] variants.
39+
//! [`Load`](RegOp::Load) and [`Store`](RegOp::Store) are implemented with
40+
//! [`BytecodeOp::Mem`], using the immediate flag `0xFF` to indicate whether the
41+
//! operation reads or writes to memory. The second word is the `u32` immediate
42+
//! representing a memory slot.
43+
7244
#![warn(missing_docs)]
7345

7446
use fidget_core::{compiler::RegOp, vm::VmData};
7547
use zerocopy::IntoBytes;
7648

77-
pub use fidget_core::compiler::RegOpDiscriminants as BytecodeOp;
49+
/// Error type for bytecode builder
50+
#[derive(thiserror::Error, Debug, PartialEq)]
51+
#[error("register 255 is reserved")]
52+
pub struct ReservedRegister;
53+
54+
/// Operations in the bytecode tape
55+
#[derive(
56+
Copy,
57+
Clone,
58+
Debug,
59+
PartialEq,
60+
serde::Serialize,
61+
serde::Deserialize,
62+
strum::EnumIter,
63+
strum::EnumCount,
64+
strum::IntoStaticStr,
65+
strum::FromRepr,
66+
)]
67+
#[expect(missing_docs)]
68+
#[repr(u8)]
69+
pub enum BytecodeOp {
70+
Output,
71+
Input,
72+
Copy,
73+
Neg,
74+
Abs,
75+
Recip,
76+
Sqrt,
77+
Square,
78+
Floor,
79+
Ceil,
80+
Round,
81+
Not,
82+
Sin,
83+
Cos,
84+
Tan,
85+
Asin,
86+
Acos,
87+
Atan,
88+
Exp,
89+
Ln,
90+
Add,
91+
Sub,
92+
Mul,
93+
Div,
94+
Atan2,
95+
Compare,
96+
Mod,
97+
Min,
98+
Max,
99+
And,
100+
Or,
101+
Mem,
102+
}
103+
104+
impl From<RegOp> for BytecodeOp {
105+
fn from(op: RegOp) -> Self {
106+
match op {
107+
RegOp::Input(..) => BytecodeOp::Input,
108+
RegOp::Output(..) => BytecodeOp::Output,
109+
RegOp::NegReg(..) => BytecodeOp::Neg,
110+
RegOp::AbsReg(..) => BytecodeOp::Abs,
111+
RegOp::RecipReg(..) => BytecodeOp::Recip,
112+
RegOp::SqrtReg(..) => BytecodeOp::Sqrt,
113+
RegOp::SquareReg(..) => BytecodeOp::Square,
114+
RegOp::FloorReg(..) => BytecodeOp::Floor,
115+
RegOp::CeilReg(..) => BytecodeOp::Ceil,
116+
RegOp::RoundReg(..) => BytecodeOp::Round,
117+
RegOp::SinReg(..) => BytecodeOp::Sin,
118+
RegOp::CosReg(..) => BytecodeOp::Cos,
119+
RegOp::TanReg(..) => BytecodeOp::Tan,
120+
RegOp::AsinReg(..) => BytecodeOp::Asin,
121+
RegOp::AcosReg(..) => BytecodeOp::Acos,
122+
RegOp::AtanReg(..) => BytecodeOp::Atan,
123+
RegOp::ExpReg(..) => BytecodeOp::Exp,
124+
RegOp::LnReg(..) => BytecodeOp::Ln,
125+
RegOp::NotReg(..) => BytecodeOp::Not,
126+
RegOp::Load(..) | RegOp::Store(..) => BytecodeOp::Mem,
127+
RegOp::CopyImm(..) | RegOp::CopyReg(..) => BytecodeOp::Copy,
128+
129+
RegOp::AddRegReg(..) | RegOp::AddRegImm(..) => BytecodeOp::Add,
130+
RegOp::MulRegReg(..) | RegOp::MulRegImm(..) => BytecodeOp::Mul,
131+
RegOp::DivRegReg(..)
132+
| RegOp::DivRegImm(..)
133+
| RegOp::DivImmReg(..) => BytecodeOp::Div,
134+
RegOp::SubRegReg(..)
135+
| RegOp::SubRegImm(..)
136+
| RegOp::SubImmReg(..) => BytecodeOp::Sub,
137+
RegOp::AtanRegReg(..)
138+
| RegOp::AtanRegImm(..)
139+
| RegOp::AtanImmReg(..) => BytecodeOp::Atan2,
140+
RegOp::MinRegReg(..) | RegOp::MinRegImm(..) => BytecodeOp::Min,
141+
RegOp::MaxRegReg(..) | RegOp::MaxRegImm(..) => BytecodeOp::Max,
142+
RegOp::CompareRegReg(..)
143+
| RegOp::CompareRegImm(..)
144+
| RegOp::CompareImmReg(..) => BytecodeOp::Compare,
145+
RegOp::ModRegReg(..)
146+
| RegOp::ModRegImm(..)
147+
| RegOp::ModImmReg(..) => BytecodeOp::Mod,
148+
RegOp::AndRegReg(..) | RegOp::AndRegImm(..) => BytecodeOp::And,
149+
RegOp::OrRegReg(..) | RegOp::OrRegImm(..) => BytecodeOp::Or,
150+
}
151+
}
152+
}
78153

79154
/// Serialized bytecode for external evaluation
80155
pub struct Bytecode {
@@ -96,6 +171,8 @@ impl Bytecode {
96171
}
97172

98173
/// Maximum register index used by the tape
174+
///
175+
/// This does not include the virtual register `0xFF` used for immediates
99176
pub fn reg_count(&self) -> u8 {
100177
self.reg_count
101178
}
@@ -111,33 +188,49 @@ impl Bytecode {
111188
}
112189

113190
/// Builds a new bytecode object from VM data
114-
pub fn new<const N: usize>(t: &VmData<N>) -> Self {
191+
///
192+
/// Returns an error if the reserved register (255) is in use
193+
pub fn new<const N: usize>(
194+
t: &VmData<N>,
195+
) -> Result<Self, ReservedRegister> {
115196
// The initial opcode is `OP_JUMP 0x0000_0000`
116197
let mut data = vec![u32::MAX, 0u32];
117198
let mut reg_count = 0u8;
118199
let mut mem_count = 0u32;
119200
for op in t.iter_asm() {
120-
let r = BytecodeOp::from(op);
121-
let mut word = [r as u8, 0xFF, 0xFF, 0xFF];
201+
let mut word = [0xFF; 4];
122202
let mut imm = None;
123203
let mut store_reg = |i, r| {
124-
reg_count = reg_count.max(r); // update the max reg
125-
word[i] = r;
204+
if r == u8::MAX {
205+
Err(ReservedRegister)
206+
} else {
207+
reg_count = reg_count.max(r); // update the max reg
208+
word[i] = r;
209+
Ok(())
210+
}
126211
};
127212
match op {
128213
RegOp::Input(reg, slot) | RegOp::Output(reg, slot) => {
129-
store_reg(1, reg);
214+
store_reg(1, reg)?;
130215
imm = Some(slot);
131216
}
132217

133-
RegOp::Load(reg, slot) | RegOp::Store(reg, slot) => {
134-
store_reg(1, reg);
218+
RegOp::Load(reg, slot) => {
219+
store_reg(1, reg)?;
220+
store_reg(2, u8::MAX)?;
221+
mem_count = mem_count.max(slot);
222+
imm = Some(slot);
223+
}
224+
RegOp::Store(reg, slot) => {
225+
store_reg(1, u8::MAX)?;
226+
store_reg(2, reg)?;
135227
mem_count = mem_count.max(slot);
136228
imm = Some(slot);
137229
}
138230

139231
RegOp::CopyImm(out, imm_f32) => {
140-
store_reg(1, out);
232+
store_reg(1, out)?;
233+
word[2] = u8::MAX;
141234
imm = Some(imm_f32.to_bits());
142235
}
143236
RegOp::NegReg(out, reg)
@@ -158,28 +251,35 @@ impl Bytecode {
158251
| RegOp::ExpReg(out, reg)
159252
| RegOp::LnReg(out, reg)
160253
| RegOp::NotReg(out, reg) => {
161-
store_reg(1, out);
162-
store_reg(2, reg);
254+
store_reg(1, out)?;
255+
store_reg(2, reg)?;
163256
}
164257

165258
RegOp::AddRegImm(out, reg, imm_f32)
166259
| RegOp::MulRegImm(out, reg, imm_f32)
167260
| RegOp::DivRegImm(out, reg, imm_f32)
168-
| RegOp::DivImmReg(out, reg, imm_f32)
169-
| RegOp::SubImmReg(out, reg, imm_f32)
170261
| RegOp::SubRegImm(out, reg, imm_f32)
171262
| RegOp::AtanRegImm(out, reg, imm_f32)
172-
| RegOp::AtanImmReg(out, reg, imm_f32)
173263
| RegOp::MinRegImm(out, reg, imm_f32)
174264
| RegOp::MaxRegImm(out, reg, imm_f32)
175265
| RegOp::CompareRegImm(out, reg, imm_f32)
176-
| RegOp::CompareImmReg(out, reg, imm_f32)
177266
| RegOp::ModRegImm(out, reg, imm_f32)
178-
| RegOp::ModImmReg(out, reg, imm_f32)
179267
| RegOp::AndRegImm(out, reg, imm_f32)
180268
| RegOp::OrRegImm(out, reg, imm_f32) => {
181-
store_reg(1, out);
182-
store_reg(2, reg);
269+
store_reg(1, out)?;
270+
store_reg(2, reg)?;
271+
word[3] = u8::MAX;
272+
imm = Some(imm_f32.to_bits());
273+
}
274+
275+
RegOp::DivImmReg(out, reg, imm_f32)
276+
| RegOp::SubImmReg(out, reg, imm_f32)
277+
| RegOp::AtanImmReg(out, reg, imm_f32)
278+
| RegOp::CompareImmReg(out, reg, imm_f32)
279+
| RegOp::ModImmReg(out, reg, imm_f32) => {
280+
store_reg(1, out)?;
281+
store_reg(3, reg)?;
282+
word[2] = u8::MAX;
183283
imm = Some(imm_f32.to_bits());
184284
}
185285

@@ -194,22 +294,23 @@ impl Bytecode {
194294
| RegOp::ModRegReg(out, lhs, rhs)
195295
| RegOp::AndRegReg(out, lhs, rhs)
196296
| RegOp::OrRegReg(out, lhs, rhs) => {
197-
store_reg(1, out);
198-
store_reg(2, lhs);
199-
store_reg(3, rhs);
297+
store_reg(1, out)?;
298+
store_reg(2, lhs)?;
299+
store_reg(3, rhs)?;
200300
}
201-
}
301+
};
302+
word[0] = BytecodeOp::from(op) as u8;
202303
data.push(u32::from_le_bytes(word));
203304
data.push(imm.unwrap_or(0xFF000000));
204305
}
205306
// Add the final `OP_JUMP 0xFFFF_FFFF`
206307
data.extend([u32::MAX, u32::MAX]);
207308

208-
Bytecode {
309+
Ok(Bytecode {
209310
data,
210311
mem_count,
211312
reg_count,
212-
}
313+
})
213314
}
214315
}
215316

@@ -236,7 +337,7 @@ mod test {
236337
let c = ctx.constant(1.0);
237338
let out = ctx.add(x, c).unwrap();
238339
let data = VmData::<255>::new(&ctx, &[out]).unwrap();
239-
let bc = Bytecode::new(&data);
340+
let bc = Bytecode::new(&data).unwrap();
240341
let mut iter = bc.data.iter();
241342
let mut next = || *iter.next().unwrap();
242343
assert_eq!(next(), 0xFFFFFFFF); // start marker
@@ -246,10 +347,7 @@ mod test {
246347
[BytecodeOp::Input as u8, 0, 0xFF, 0xFF]
247348
);
248349
assert_eq!(next(), 0); // input slot 0
249-
assert_eq!(
250-
next().to_le_bytes(),
251-
[BytecodeOp::AddRegImm as u8, 0, 0, 0xFF]
252-
);
350+
assert_eq!(next().to_le_bytes(), [BytecodeOp::Add as u8, 0, 0, 0xFF]);
253351
assert_eq!(f32::from_bits(next()), 1.0);
254352
assert_eq!(
255353
next().to_le_bytes(),

fidget-core/src/compiler/mod.rs

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -14,7 +14,7 @@ mod op;
1414

1515
mod lru;
1616
pub(crate) use lru::Lru;
17-
pub use op::{RegOp, RegOpDiscriminants, SsaOp};
17+
pub use op::{RegOp, SsaOp};
1818

1919
mod reg_tape;
2020
mod ssa_tape;

0 commit comments

Comments
 (0)