Skip to content

Commit e29d333

Browse files
st0012amomchilov
andauthored
ZJIT: Implement concatstrings insn (ruby#14154)
Co-authored-by: Alexander Momchilov <alexander.momchilov@shopify.com>
1 parent 4f34edd commit e29d333

4 files changed

Lines changed: 161 additions & 38 deletions

File tree

test/ruby/test_zjit.rb

Lines changed: 16 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1506,6 +1506,22 @@ def test(val) = val.class
15061506
}, call_threshold: 2
15071507
end
15081508

1509+
def test_string_concat
1510+
assert_compiles '"123"', %q{
1511+
def test = "#{1}#{2}#{3}"
1512+
1513+
test
1514+
}, insns: [:concatstrings]
1515+
end
1516+
1517+
def test_string_concat_empty
1518+
assert_compiles '""', %q{
1519+
def test = "#{}"
1520+
1521+
test
1522+
}, insns: [:concatstrings]
1523+
end
1524+
15091525
private
15101526

15111527
# Assert that every method call in `test_script` can be compiled by ZJIT

zjit/src/backend/lir.rs

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -2233,6 +2233,12 @@ impl Assembler {
22332233
out
22342234
}
22352235

2236+
pub fn sub_into(&mut self, left: Opnd, right: Opnd) -> Opnd {
2237+
let out = self.sub(left, right);
2238+
self.mov(left, out);
2239+
out
2240+
}
2241+
22362242
#[must_use]
22372243
pub fn mul(&mut self, left: Opnd, right: Opnd) -> Opnd {
22382244
let out = self.new_vreg(Opnd::match_num_bits(&[left, right]));

zjit/src/codegen.rs

Lines changed: 51 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -330,6 +330,7 @@ fn gen_insn(cb: &mut CodeBlock, jit: &mut JITState, asm: &mut Assembler, functio
330330
Insn::NewRange { low, high, flag, state } => gen_new_range(asm, opnd!(low), opnd!(high), *flag, &function.frame_state(*state)),
331331
Insn::ArrayDup { val, state } => gen_array_dup(asm, opnd!(val), &function.frame_state(*state)),
332332
Insn::StringCopy { val, chilled, state } => gen_string_copy(asm, opnd!(val), *chilled, &function.frame_state(*state)),
333+
Insn::StringConcat { strings, state } => gen_string_concat(jit, asm, opnds!(strings), &function.frame_state(*state))?,
333334
Insn::Param { idx } => unreachable!("block.insns should not have Insn::Param({idx})"),
334335
Insn::Snapshot { .. } => return Some(()), // we don't need to do anything for this instruction at the moment
335336
Insn::Jump(branch) => return gen_jump(jit, asm, branch),
@@ -1456,6 +1457,56 @@ pub fn gen_stub_exit(cb: &mut CodeBlock) -> Option<CodePtr> {
14561457
})
14571458
}
14581459

1460+
fn gen_string_concat(jit: &mut JITState, asm: &mut Assembler, strings: Vec<Opnd>, state: &FrameState) -> Option<Opnd> {
1461+
let n = strings.len();
1462+
1463+
// concatstrings shouldn't have 0 strings
1464+
// If it happens we abort the compilation for now
1465+
if n == 0 {
1466+
return None;
1467+
}
1468+
1469+
gen_prepare_non_leaf_call(jit, asm, state)?;
1470+
1471+
// Calculate the compile-time NATIVE_STACK_PTR offset from NATIVE_BASE_PTR
1472+
// At this point, frame_setup(&[], jit.c_stack_slots) has been called,
1473+
// which allocated aligned_stack_bytes(jit.c_stack_slots) on the stack
1474+
let frame_size = aligned_stack_bytes(jit.c_stack_slots);
1475+
let allocation_size = aligned_stack_bytes(n);
1476+
1477+
asm_comment!(asm, "allocate {} bytes on C stack for {} strings", allocation_size, n);
1478+
asm.sub_into(NATIVE_STACK_PTR, allocation_size.into());
1479+
1480+
// Calculate the total offset from NATIVE_BASE_PTR to our buffer
1481+
let total_offset_from_base = (frame_size + allocation_size) as i32;
1482+
1483+
for (idx, &string_opnd) in strings.iter().enumerate() {
1484+
let slot_offset = -total_offset_from_base + (idx as i32 * SIZEOF_VALUE_I32);
1485+
asm.mov(
1486+
Opnd::mem(VALUE_BITS, NATIVE_BASE_PTR, slot_offset),
1487+
string_opnd
1488+
);
1489+
}
1490+
1491+
let first_string_ptr = asm.lea(Opnd::mem(64, NATIVE_BASE_PTR, -total_offset_from_base));
1492+
1493+
let result = asm_ccall!(asm, rb_str_concat_literals, n.into(), first_string_ptr);
1494+
1495+
asm_comment!(asm, "restore C stack pointer");
1496+
asm.add_into(NATIVE_STACK_PTR, allocation_size.into());
1497+
1498+
Some(result)
1499+
}
1500+
1501+
/// Given the number of spill slots needed for a function, return the number of bytes
1502+
/// the function needs to allocate on the stack for the stack frame.
1503+
fn aligned_stack_bytes(num_slots: usize) -> usize {
1504+
// Both x86_64 and arm64 require the stack to be aligned to 16 bytes.
1505+
// Since SIZEOF_VALUE is 8 bytes, we need to round up the size to the nearest even number.
1506+
let num_slots = num_slots + (num_slots % 2);
1507+
num_slots * SIZEOF_VALUE
1508+
}
1509+
14591510
impl Assembler {
14601511
/// Make a C call while marking the start and end positions of it
14611512
fn ccall_with_branch(&mut self, fptr: *const u8, opnds: Vec<Opnd>, branch: &Rc<Branch>) -> Opnd {

zjit/src/hir.rs

Lines changed: 88 additions & 38 deletions
Original file line numberDiff line numberDiff line change
@@ -445,6 +445,7 @@ pub enum Insn {
445445

446446
StringCopy { val: InsnId, chilled: bool, state: InsnId },
447447
StringIntern { val: InsnId },
448+
StringConcat { strings: Vec<InsnId>, state: InsnId },
448449

449450
/// Put special object (VMCORE, CBASE, etc.) based on value_type
450451
PutSpecialObject { value_type: SpecialObjectType },
@@ -675,6 +676,16 @@ impl<'a> std::fmt::Display for InsnPrinter<'a> {
675676
Insn::ArrayDup { val, .. } => { write!(f, "ArrayDup {val}") }
676677
Insn::HashDup { val, .. } => { write!(f, "HashDup {val}") }
677678
Insn::StringCopy { val, .. } => { write!(f, "StringCopy {val}") }
679+
Insn::StringConcat { strings, .. } => {
680+
write!(f, "StringConcat")?;
681+
let mut prefix = " ";
682+
for string in strings {
683+
write!(f, "{prefix}{string}")?;
684+
prefix = ", ";
685+
}
686+
687+
Ok(())
688+
}
678689
Insn::Test { val } => { write!(f, "Test {val}") }
679690
Insn::IsNil { val } => { write!(f, "IsNil {val}") }
680691
Insn::Jump(target) => { write!(f, "Jump {target}") }
@@ -1135,6 +1146,7 @@ impl Function {
11351146
&Throw { throw_state, val } => Throw { throw_state, val: find!(val) },
11361147
&StringCopy { val, chilled, state } => StringCopy { val: find!(val), chilled, state },
11371148
&StringIntern { val } => StringIntern { val: find!(val) },
1149+
&StringConcat { ref strings, state } => StringConcat { strings: find_vec!(strings), state: find!(state) },
11381150
&Test { val } => Test { val: find!(val) },
11391151
&IsNil { val } => IsNil { val: find!(val) },
11401152
&Jump(ref target) => Jump(find_branch_edge!(target)),
@@ -1258,6 +1270,7 @@ impl Function {
12581270
Insn::IsNil { .. } => types::CBool,
12591271
Insn::StringCopy { .. } => types::StringExact,
12601272
Insn::StringIntern { .. } => types::StringExact,
1273+
Insn::StringConcat { .. } => types::StringExact,
12611274
Insn::NewArray { .. } => types::ArrayExact,
12621275
Insn::ArrayDup { .. } => types::ArrayExact,
12631276
Insn::NewHash { .. } => types::HashExact,
@@ -1887,6 +1900,10 @@ impl Function {
18871900
worklist.push_back(high);
18881901
worklist.push_back(state);
18891902
}
1903+
&Insn::StringConcat { ref strings, state, .. } => {
1904+
worklist.extend(strings);
1905+
worklist.push_back(state);
1906+
}
18901907
| &Insn::StringIntern { val }
18911908
| &Insn::Return { val }
18921909
| &Insn::Throw { val, .. }
@@ -2469,6 +2486,16 @@ impl FrameState {
24692486
self.stack.pop().ok_or_else(|| ParseError::StackUnderflow(self.clone()))
24702487
}
24712488

2489+
fn stack_pop_n(&mut self, count: usize) -> Result<Vec<InsnId>, ParseError> {
2490+
// Check if we have enough values on the stack
2491+
let stack_len = self.stack.len();
2492+
if stack_len < count {
2493+
return Err(ParseError::StackUnderflow(self.clone()));
2494+
}
2495+
2496+
Ok(self.stack.split_off(stack_len - count))
2497+
}
2498+
24722499
/// Get a stack-top operand
24732500
fn stack_top(&self) -> Result<InsnId, ParseError> {
24742501
self.stack.last().ok_or_else(|| ParseError::StackUnderflow(self.clone())).copied()
@@ -2789,24 +2816,23 @@ pub fn iseq_to_hir(iseq: *const rb_iseq_t) -> Result<Function, ParseError> {
27892816
let insn_id = fun.push_insn(block, Insn::StringIntern { val });
27902817
state.stack_push(insn_id);
27912818
}
2819+
YARVINSN_concatstrings => {
2820+
let count = get_arg(pc, 0).as_u32();
2821+
let exit_id = fun.push_insn(block, Insn::Snapshot { state: exit_state });
2822+
let strings = state.stack_pop_n(count as usize)?;
2823+
let insn_id = fun.push_insn(block, Insn::StringConcat { strings, state: exit_id });
2824+
state.stack_push(insn_id);
2825+
}
27922826
YARVINSN_newarray => {
27932827
let count = get_arg(pc, 0).as_usize();
27942828
let exit_id = fun.push_insn(block, Insn::Snapshot { state: exit_state });
2795-
let mut elements = vec![];
2796-
for _ in 0..count {
2797-
elements.push(state.stack_pop()?);
2798-
}
2799-
elements.reverse();
2829+
let elements = state.stack_pop_n(count)?;
28002830
state.stack_push(fun.push_insn(block, Insn::NewArray { elements, state: exit_id }));
28012831
}
28022832
YARVINSN_opt_newarray_send => {
28032833
let count = get_arg(pc, 0).as_usize();
28042834
let method = get_arg(pc, 1).as_u32();
2805-
let mut elements = vec![];
2806-
for _ in 0..count {
2807-
elements.push(state.stack_pop()?);
2808-
}
2809-
elements.reverse();
2835+
let elements = state.stack_pop_n(count)?;
28102836
let exit_id = fun.push_insn(block, Insn::Snapshot { state: exit_state });
28112837
let (bop, insn) = match method {
28122838
VM_OPT_NEWARRAY_SEND_MAX => (BOP_MAX, Insn::ArrayMax { elements, state: exit_id }),
@@ -2871,13 +2897,10 @@ pub fn iseq_to_hir(iseq: *const rb_iseq_t) -> Result<Function, ParseError> {
28712897
}
28722898
YARVINSN_pushtoarray => {
28732899
let count = get_arg(pc, 0).as_usize();
2874-
let mut vals = vec![];
2875-
for _ in 0..count {
2876-
vals.push(state.stack_pop()?);
2877-
}
2900+
let vals = state.stack_pop_n(count)?;
28782901
let array = state.stack_pop()?;
28792902
let exit_id = fun.push_insn(block, Insn::Snapshot { state: exit_state });
2880-
for val in vals.into_iter().rev() {
2903+
for val in vals.into_iter() {
28812904
fun.push_insn(block, Insn::ArrayPush { array, val, state: exit_id });
28822905
}
28832906
state.stack_push(array);
@@ -3079,12 +3102,7 @@ pub fn iseq_to_hir(iseq: *const rb_iseq_t) -> Result<Function, ParseError> {
30793102
}
30803103
let argc = unsafe { vm_ci_argc((*cd).ci) };
30813104

3082-
let mut args = vec![];
3083-
for _ in 0..argc {
3084-
args.push(state.stack_pop()?);
3085-
}
3086-
args.reverse();
3087-
3105+
let args = state.stack_pop_n(argc as usize)?;
30883106
let recv = state.stack_pop()?;
30893107
let exit_id = fun.push_insn(block, Insn::Snapshot { state: exit_state });
30903108
let send = fun.push_insn(block, Insn::SendWithoutBlock { self_val: recv, cd, args, state: exit_id });
@@ -3160,12 +3178,7 @@ pub fn iseq_to_hir(iseq: *const rb_iseq_t) -> Result<Function, ParseError> {
31603178
}
31613179
let argc = unsafe { vm_ci_argc((*cd).ci) };
31623180

3163-
let mut args = vec![];
3164-
for _ in 0..argc {
3165-
args.push(state.stack_pop()?);
3166-
}
3167-
args.reverse();
3168-
3181+
let args = state.stack_pop_n(argc as usize)?;
31693182
let recv = state.stack_pop()?;
31703183
let exit_id = fun.push_insn(block, Insn::Snapshot { state: exit_state });
31713184
let send = fun.push_insn(block, Insn::SendWithoutBlock { self_val: recv, cd, args, state: exit_id });
@@ -3183,12 +3196,7 @@ pub fn iseq_to_hir(iseq: *const rb_iseq_t) -> Result<Function, ParseError> {
31833196
}
31843197
let argc = unsafe { vm_ci_argc((*cd).ci) };
31853198

3186-
let mut args = vec![];
3187-
for _ in 0..argc {
3188-
args.push(state.stack_pop()?);
3189-
}
3190-
args.reverse();
3191-
3199+
let args = state.stack_pop_n(argc as usize)?;
31923200
let recv = state.stack_pop()?;
31933201
let exit_id = fun.push_insn(block, Insn::Snapshot { state: exit_state });
31943202
let send = fun.push_insn(block, Insn::Send { self_val: recv, cd, blockiseq, args, state: exit_id });
@@ -5224,7 +5232,47 @@ mod tests {
52245232
v3:Fixnum[1] = Const Value(1)
52255233
v5:BasicObject = ObjToString v3
52265234
v7:String = AnyToString v3, str: v5
5227-
SideExit UnknownOpcode(concatstrings)
5235+
v9:StringExact = StringConcat v2, v7
5236+
Return v9
5237+
"#]]);
5238+
}
5239+
5240+
#[test]
5241+
fn test_string_concat() {
5242+
eval(r##"
5243+
def test = "#{1}#{2}#{3}"
5244+
"##);
5245+
assert_method_hir_with_opcode("test", YARVINSN_concatstrings, expect![[r#"
5246+
fn test@<compiled>:2:
5247+
bb0(v0:BasicObject):
5248+
v2:Fixnum[1] = Const Value(1)
5249+
v4:BasicObject = ObjToString v2
5250+
v6:String = AnyToString v2, str: v4
5251+
v7:Fixnum[2] = Const Value(2)
5252+
v9:BasicObject = ObjToString v7
5253+
v11:String = AnyToString v7, str: v9
5254+
v12:Fixnum[3] = Const Value(3)
5255+
v14:BasicObject = ObjToString v12
5256+
v16:String = AnyToString v12, str: v14
5257+
v18:StringExact = StringConcat v6, v11, v16
5258+
Return v18
5259+
"#]]);
5260+
}
5261+
5262+
#[test]
5263+
fn test_string_concat_empty() {
5264+
eval(r##"
5265+
def test = "#{}"
5266+
"##);
5267+
assert_method_hir_with_opcode("test", YARVINSN_concatstrings, expect![[r#"
5268+
fn test@<compiled>:2:
5269+
bb0(v0:BasicObject):
5270+
v2:StringExact[VALUE(0x1000)] = Const Value(VALUE(0x1000))
5271+
v3:NilClass = Const Value(nil)
5272+
v5:BasicObject = ObjToString v3
5273+
v7:String = AnyToString v3, str: v5
5274+
v9:StringExact = StringConcat v2, v7
5275+
Return v9
52285276
"#]]);
52295277
}
52305278

@@ -7172,7 +7220,8 @@ mod opt_tests {
71727220
v2:StringExact[VALUE(0x1000)] = Const Value(VALUE(0x1000))
71737221
v3:StringExact[VALUE(0x1008)] = Const Value(VALUE(0x1008))
71747222
v5:StringExact = StringCopy v3
7175-
SideExit UnknownOpcode(concatstrings)
7223+
v11:StringExact = StringConcat v2, v5
7224+
Return v11
71767225
"#]]);
71777226
}
71787227

@@ -7186,9 +7235,10 @@ mod opt_tests {
71867235
bb0(v0:BasicObject):
71877236
v2:StringExact[VALUE(0x1000)] = Const Value(VALUE(0x1000))
71887237
v3:Fixnum[1] = Const Value(1)
7189-
v10:BasicObject = SendWithoutBlock v3, :to_s
7190-
v7:String = AnyToString v3, str: v10
7191-
SideExit UnknownOpcode(concatstrings)
7238+
v11:BasicObject = SendWithoutBlock v3, :to_s
7239+
v7:String = AnyToString v3, str: v11
7240+
v9:StringExact = StringConcat v2, v7
7241+
Return v9
71927242
"#]]);
71937243
}
71947244

0 commit comments

Comments
 (0)