Skip to content

Commit 48fe3bb

Browse files
authored
Winch: Add abs SIMD instructions for x86 using AVX (#10202)
* Winch: Add abs SIMD instructions for x86 using AVX * Add _simd_load.wast to unsupported if no AVX
1 parent ac76c09 commit 48fe3bb

File tree

14 files changed

+436
-15
lines changed

14 files changed

+436
-15
lines changed

crates/wast-util/src/lib.rs

+1-1
Original file line numberDiff line numberDiff line change
@@ -442,7 +442,6 @@ impl WastTest {
442442
"spec_testsuite/simd_i32x4_extmul_i16x8.wast",
443443
"spec_testsuite/simd_i32x4_trunc_sat_f32x4.wast",
444444
"spec_testsuite/simd_i32x4_trunc_sat_f64x2.wast",
445-
"spec_testsuite/simd_i64x2_arith2.wast",
446445
"spec_testsuite/simd_i64x2_extmul_i32x4.wast",
447446
"spec_testsuite/simd_i8x16_arith2.wast",
448447
"spec_testsuite/simd_lane.wast",
@@ -474,6 +473,7 @@ impl WastTest {
474473
"spec_testsuite/simd_f64x2_cmp.wast",
475474
"spec_testsuite/simd_i16x8_cmp.wast",
476475
"spec_testsuite/simd_i32x4_cmp.wast",
476+
"spec_testsuite/simd_i64x2_arith2.wast",
477477
"spec_testsuite/simd_i64x2_cmp.wast",
478478
"spec_testsuite/simd_i8x16_cmp.wast",
479479
"spec_testsuite/simd_int_to_int_extend.wast",
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,36 @@
1+
;;! target = "x86_64"
2+
;;! test = "winch"
3+
;;! flags = [ "-Ccranelift-has-avx" ]
4+
5+
(module
6+
(func (result v128)
7+
(f32x4.abs (v128.const f32x4 0 1 2 3))
8+
)
9+
)
10+
;; wasm[0]::function[0]:
11+
;; pushq %rbp
12+
;; movq %rsp, %rbp
13+
;; movq 8(%rdi), %r11
14+
;; movq 0x10(%r11), %r11
15+
;; addq $0x10, %r11
16+
;; cmpq %rsp, %r11
17+
;; ja 0x49
18+
;; 1c: movq %rdi, %r14
19+
;; subq $0x10, %rsp
20+
;; movq %rdi, 8(%rsp)
21+
;; movq %rsi, (%rsp)
22+
;; movdqu 0x1c(%rip), %xmm0
23+
;; vpcmpeqd %xmm15, %xmm15, %xmm15
24+
;; vpsrld $1, %xmm15, %xmm15
25+
;; vandps %xmm0, %xmm15, %xmm0
26+
;; addq $0x10, %rsp
27+
;; popq %rbp
28+
;; retq
29+
;; 49: ud2
30+
;; 4b: addb %al, (%rax)
31+
;; 4d: addb %al, (%rax)
32+
;; 4f: addb %al, (%rax)
33+
;; 51: addb %al, (%rax)
34+
;; 53: addb %al, (%rax)
35+
;; 55: addb %al, 0x3f(%rax)
36+
;; 5b: addb %al, (%rax)
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,39 @@
1+
;;! target = "x86_64"
2+
;;! test = "winch"
3+
;;! flags = [ "-Ccranelift-has-avx" ]
4+
5+
(module
6+
(func (result v128)
7+
(f64x2.abs (v128.const f64x2 0 1))
8+
)
9+
)
10+
;; wasm[0]::function[0]:
11+
;; pushq %rbp
12+
;; movq %rsp, %rbp
13+
;; movq 8(%rdi), %r11
14+
;; movq 0x10(%r11), %r11
15+
;; addq $0x10, %r11
16+
;; cmpq %rsp, %r11
17+
;; ja 0x49
18+
;; 1c: movq %rdi, %r14
19+
;; subq $0x10, %rsp
20+
;; movq %rdi, 8(%rsp)
21+
;; movq %rsi, (%rsp)
22+
;; movdqu 0x1c(%rip), %xmm0
23+
;; vpcmpeqq %xmm15, %xmm15, %xmm15
24+
;; vpsrlq $1, %xmm15, %xmm15
25+
;; vandpd %xmm0, %xmm15, %xmm0
26+
;; addq $0x10, %rsp
27+
;; popq %rbp
28+
;; retq
29+
;; 49: ud2
30+
;; 4b: addb %al, (%rax)
31+
;; 4d: addb %al, (%rax)
32+
;; 4f: addb %al, (%rax)
33+
;; 51: addb %al, (%rax)
34+
;; 53: addb %al, (%rax)
35+
;; 55: addb %al, (%rax)
36+
;; 57: addb %al, (%rax)
37+
;; 59: addb %al, (%rax)
38+
;; 5b: addb %al, (%rax)
39+
;; 5d: addb %dh, %al
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,40 @@
1+
;;! target = "x86_64"
2+
;;! test = "winch"
3+
;;! flags = [ "-Ccranelift-has-avx" ]
4+
5+
(module
6+
(func (result v128)
7+
(i16x8.abs (v128.const i16x8 0 1 2 3 4 5 6 7))
8+
)
9+
)
10+
;; wasm[0]::function[0]:
11+
;; pushq %rbp
12+
;; movq %rsp, %rbp
13+
;; movq 8(%rdi), %r11
14+
;; movq 0x10(%r11), %r11
15+
;; addq $0x10, %r11
16+
;; cmpq %rsp, %r11
17+
;; ja 0x3f
18+
;; 1c: movq %rdi, %r14
19+
;; subq $0x10, %rsp
20+
;; movq %rdi, 8(%rsp)
21+
;; movq %rsi, (%rsp)
22+
;; movdqu 0x1c(%rip), %xmm0
23+
;; vpabsw %xmm0, %xmm0
24+
;; addq $0x10, %rsp
25+
;; popq %rbp
26+
;; retq
27+
;; 3f: ud2
28+
;; 41: addb %al, (%rax)
29+
;; 43: addb %al, (%rax)
30+
;; 45: addb %al, (%rax)
31+
;; 47: addb %al, (%rax)
32+
;; 49: addb %al, (%rax)
33+
;; 4b: addb %al, (%rax)
34+
;; 4d: addb %al, (%rax)
35+
;; 4f: addb %al, (%rax)
36+
;; 51: addb %al, (%rcx)
37+
;; 53: addb %al, (%rdx)
38+
;; 55: addb %al, (%rbx)
39+
;; 57: addb %al, (%rax, %rax)
40+
;; 5a: addl $0x7000600, %eax
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,42 @@
1+
;;! target = "x86_64"
2+
;;! test = "winch"
3+
;;! flags = [ "-Ccranelift-has-avx" ]
4+
5+
(module
6+
(func (result v128)
7+
(i32x4.abs (v128.const i32x4 0 1 2 3))
8+
)
9+
)
10+
;; wasm[0]::function[0]:
11+
;; pushq %rbp
12+
;; movq %rsp, %rbp
13+
;; movq 8(%rdi), %r11
14+
;; movq 0x10(%r11), %r11
15+
;; addq $0x10, %r11
16+
;; cmpq %rsp, %r11
17+
;; ja 0x3f
18+
;; 1c: movq %rdi, %r14
19+
;; subq $0x10, %rsp
20+
;; movq %rdi, 8(%rsp)
21+
;; movq %rsi, (%rsp)
22+
;; movdqu 0x1c(%rip), %xmm0
23+
;; vpabsd %xmm0, %xmm0
24+
;; addq $0x10, %rsp
25+
;; popq %rbp
26+
;; retq
27+
;; 3f: ud2
28+
;; 41: addb %al, (%rax)
29+
;; 43: addb %al, (%rax)
30+
;; 45: addb %al, (%rax)
31+
;; 47: addb %al, (%rax)
32+
;; 49: addb %al, (%rax)
33+
;; 4b: addb %al, (%rax)
34+
;; 4d: addb %al, (%rax)
35+
;; 4f: addb %al, (%rax)
36+
;; 51: addb %al, (%rax)
37+
;; 53: addb %al, (%rcx)
38+
;; 55: addb %al, (%rax)
39+
;; 57: addb %al, (%rdx)
40+
;; 59: addb %al, (%rax)
41+
;; 5b: addb %al, (%rbx)
42+
;; 5d: addb %al, (%rax)
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,38 @@
1+
;;! target = "x86_64"
2+
;;! test = "winch"
3+
;;! flags = [ "-Ccranelift-has-avx" ]
4+
5+
(module
6+
(func (result v128)
7+
(i64x2.abs (v128.const i64x2 0 1))
8+
)
9+
)
10+
;; wasm[0]::function[0]:
11+
;; pushq %rbp
12+
;; movq %rsp, %rbp
13+
;; movq 8(%rdi), %r11
14+
;; movq 0x10(%r11), %r11
15+
;; addq $0x10, %r11
16+
;; cmpq %rsp, %r11
17+
;; ja 0x4e
18+
;; 1c: movq %rdi, %r14
19+
;; subq $0x10, %rsp
20+
;; movq %rdi, 8(%rsp)
21+
;; movq %rsi, (%rsp)
22+
;; movdqu 0x1c(%rip), %xmm0
23+
;; vpsrad $0x1f, %xmm0, %xmm15
24+
;; vpshufd $0xf5, %xmm15, %xmm15
25+
;; vpxor %xmm0, %xmm15, %xmm0
26+
;; vpsubq %xmm15, %xmm0, %xmm0
27+
;; addq $0x10, %rsp
28+
;; popq %rbp
29+
;; retq
30+
;; 4e: ud2
31+
;; 50: addb %al, (%rax)
32+
;; 52: addb %al, (%rax)
33+
;; 54: addb %al, (%rax)
34+
;; 56: addb %al, (%rax)
35+
;; 58: addl %eax, (%rax)
36+
;; 5a: addb %al, (%rax)
37+
;; 5c: addb %al, (%rax)
38+
;; 5e: addb %al, (%rax)
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,39 @@
1+
;;! target = "x86_64"
2+
;;! test = "winch"
3+
;;! flags = [ "-Ccranelift-has-avx" ]
4+
5+
(module
6+
(func (result v128)
7+
(i8x16.abs (v128.const i8x16 0 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15))
8+
)
9+
)
10+
;; wasm[0]::function[0]:
11+
;; pushq %rbp
12+
;; movq %rsp, %rbp
13+
;; movq 8(%rdi), %r11
14+
;; movq 0x10(%r11), %r11
15+
;; addq $0x10, %r11
16+
;; cmpq %rsp, %r11
17+
;; ja 0x3f
18+
;; 1c: movq %rdi, %r14
19+
;; subq $0x10, %rsp
20+
;; movq %rdi, 8(%rsp)
21+
;; movq %rsi, (%rsp)
22+
;; movdqu 0x1c(%rip), %xmm0
23+
;; vpabsb %xmm0, %xmm0
24+
;; addq $0x10, %rsp
25+
;; popq %rbp
26+
;; retq
27+
;; 3f: ud2
28+
;; 41: addb %al, (%rax)
29+
;; 43: addb %al, (%rax)
30+
;; 45: addb %al, (%rax)
31+
;; 47: addb %al, (%rax)
32+
;; 49: addb %al, (%rax)
33+
;; 4b: addb %al, (%rax)
34+
;; 4d: addb %al, (%rax)
35+
;; 4f: addb %al, (%rax)
36+
;; 51: addl %eax, (%rdx)
37+
;; 53: addl 0x9080706(, %rax), %eax
38+
;; 5a: orb (%rbx), %cl
39+
;; 5c: orb $0xd, %al

tests/misc_testsuite/winch/_simd_load.wast

+7-7
Original file line numberDiff line numberDiff line change
@@ -86,13 +86,13 @@
8686
;; )
8787
;; (assert_return (invoke "as-f32x4.mul-operand") (v128.const f32x4 256 2 3.6 -2))
8888

89-
;; (module (memory 1)
90-
;; (data (offset (i32.const 0)) "\ff\ff\ff\ff\ff\ff\ff\ff\ff\ff\ff\ff\ff\ff\ff\ff") ;; 1111 ...
91-
;; (func (export "as-f32x4.abs-operand") (result v128)
92-
;; (f32x4.abs (v128.load (i32.const 0)))
93-
;; )
94-
;; )
95-
;; (assert_return (invoke "as-f32x4.abs-operand") (v128.const i32x4 0x7fffffff 0x7fffffff 0x7fffffff 0x7fffffff)) ;; 1111 -> 0111
89+
(module (memory 1)
90+
(data (offset (i32.const 0)) "\ff\ff\ff\ff\ff\ff\ff\ff\ff\ff\ff\ff\ff\ff\ff\ff") ;; 1111 ...
91+
(func (export "as-f32x4.abs-operand") (result v128)
92+
(f32x4.abs (v128.load (i32.const 0)))
93+
)
94+
)
95+
(assert_return (invoke "as-f32x4.abs-operand") (v128.const i32x4 0x7fffffff 0x7fffffff 0x7fffffff 0x7fffffff)) ;; 1111 -> 0111
9696

9797
;; (module (memory 1)
9898
;; (data (offset (i32.const 0)) "\AA\AA\AA\AA\AA\AA\AA\AA\AA\AA\AA\AA\AA\AA\AA\AA")

tests/misc_testsuite/winch/_simd_splat.wast

+3-3
Original file line numberDiff line numberDiff line change
@@ -273,8 +273,8 @@
273273
(f64x2.eq (f64x2.splat (local.get 0)) (f64x2.splat (local.get 1))))
274274

275275
;; Floating-point sign bit operations
276-
;; (func (export "as-f32x4_abs-operand") (param f32) (result v128)
277-
;; (f32x4.abs (f32x4.splat (local.get 0))))
276+
(func (export "as-f32x4_abs-operand") (param f32) (result v128)
277+
(f32x4.abs (f32x4.splat (local.get 0))))
278278

279279
;; Floating-point min
280280
;; (func (export "as-f32x4_min-operands") (param f32 f32) (result v128)
@@ -336,7 +336,7 @@
336336
(assert_return (invoke "as-i32x4_eq-operands2" (i64.const 1) (i64.const 2)) (v128.const i64x2 0xffffffff00000000 0xffffffff00000000))
337337
(assert_return (invoke "as-f64x2_eq-operands" (f64.const +0.0) (f64.const -0.0)) (v128.const i64x2 -1 -1))
338338

339-
;; (assert_return (invoke "as-f32x4_abs-operand" (f32.const -1.125)) (v128.const f32x4 1.125 1.125 1.125 1.125))
339+
(assert_return (invoke "as-f32x4_abs-operand" (f32.const -1.125)) (v128.const f32x4 1.125 1.125 1.125 1.125))
340340
;; (assert_return (invoke "as-f32x4_min-operands" (f32.const 0.25) (f32.const 1e-38)) (v128.const f32x4 1e-38 1e-38 1e-38 1e-38))
341341
;; (assert_return (invoke "as-f32x4_div-operands" (f32.const 1.0) (f32.const 8.0)) (v128.const f32x4 0.125 0.125 0.125 0.125))
342342

winch/codegen/src/isa/aarch64/masm.rs

+7-2
Original file line numberDiff line numberDiff line change
@@ -16,8 +16,9 @@ use crate::{
1616
CalleeKind, DivKind, Extend, ExtendKind, ExtractLaneKind, FloatCmpKind, HandleOverflowKind,
1717
Imm as I, IntCmpKind, LoadKind, MacroAssembler as Masm, MulWideKind, OperandSize, RegImm,
1818
RemKind, ReplaceLaneKind, RmwOp, RoundingMode, SPOffset, ShiftKind, SplatKind, StackSlot,
19-
StoreKind, TrapCode, TruncKind, V128ConvertKind, V128ExtendKind, V128NarrowKind,
20-
VectorCompareKind, VectorEqualityKind, Zero, TRUSTED_FLAGS, UNTRUSTED_FLAGS,
19+
StoreKind, TrapCode, TruncKind, V128AbsKind, V128ConvertKind, V128ExtendKind,
20+
V128NarrowKind, VectorCompareKind, VectorEqualityKind, Zero, TRUSTED_FLAGS,
21+
UNTRUSTED_FLAGS,
2122
},
2223
stack::TypedReg,
2324
};
@@ -1158,6 +1159,10 @@ impl Masm for MacroAssembler {
11581159
Err(anyhow!(CodeGenError::unimplemented_masm_instruction()))
11591160
}
11601161

1162+
fn v128_abs(&mut self, _src: Reg, _dst: WritableReg, _kind: V128AbsKind) -> Result<()> {
1163+
bail!(CodeGenError::unimplemented_masm_instruction())
1164+
}
1165+
11611166
fn v128_neg(&mut self, _op: WritableReg, _size: OperandSize) -> Result<()> {
11621167
Err(anyhow!(CodeGenError::unimplemented_masm_instruction()))
11631168
}

winch/codegen/src/isa/x64/asm.rs

+52
Original file line numberDiff line numberDiff line change
@@ -2096,6 +2096,7 @@ impl Assembler {
20962096
pub fn xmm_vpsrl_rr(&mut self, src: Reg, dst: WritableReg, imm: u32, size: OperandSize) {
20972097
let op = match size {
20982098
OperandSize::S32 => AvxOpcode::Vpsrld,
2099+
OperandSize::S64 => AvxOpcode::Vpsrlq,
20992100
_ => unimplemented!(),
21002101
};
21012102

@@ -2111,6 +2112,7 @@ impl Assembler {
21112112
pub fn xmm_vpsub_rrr(&mut self, src1: Reg, src2: Reg, dst: WritableReg, size: OperandSize) {
21122113
let op = match size {
21132114
OperandSize::S32 => AvxOpcode::Vpsubd,
2115+
OperandSize::S64 => AvxOpcode::Vpsubq,
21142116
_ => unimplemented!(),
21152117
};
21162118

@@ -2442,6 +2444,56 @@ impl Assembler {
24422444
dst: dst.map(Into::into),
24432445
});
24442446
}
2447+
2448+
/// Compute the absolute value of elements in vector `src` and put the
2449+
/// results in `dst`.
2450+
pub fn xmm_vpabs_rr(&mut self, src: Reg, dst: WritableReg, size: OperandSize) {
2451+
let op = match size {
2452+
OperandSize::S8 => AvxOpcode::Vpabsb,
2453+
OperandSize::S16 => AvxOpcode::Vpabsw,
2454+
OperandSize::S32 => AvxOpcode::Vpabsd,
2455+
_ => unimplemented!(),
2456+
};
2457+
2458+
self.emit(Inst::XmmUnaryRmRVex {
2459+
op,
2460+
src: src.into(),
2461+
dst: dst.to_reg().into(),
2462+
});
2463+
}
2464+
2465+
/// Arithmetically (sign preserving) right shift on vector in `src` by
2466+
/// `imm` with result written to `dst`.
2467+
pub fn xmm_vpsra_rri(&mut self, src: Reg, dst: WritableReg, imm: u32, size: OperandSize) {
2468+
let op = match size {
2469+
OperandSize::S32 => AvxOpcode::Vpsrad,
2470+
_ => unimplemented!(),
2471+
};
2472+
2473+
self.emit(Inst::XmmRmiRVex {
2474+
op,
2475+
src1: src.into(),
2476+
src2: XmmMemImm::unwrap_new(RegMemImm::imm(imm)),
2477+
dst: dst.to_reg().into(),
2478+
});
2479+
}
2480+
2481+
/// Perform an `and` operation on vectors of floats in `src1` and `src2`
2482+
/// and put the results in `dst`.
2483+
pub fn xmm_vandp_rrr(&mut self, src1: Reg, src2: Reg, dst: WritableReg, size: OperandSize) {
2484+
let op = match size {
2485+
OperandSize::S32 => AvxOpcode::Vandps,
2486+
OperandSize::S64 => AvxOpcode::Vandpd,
2487+
_ => unimplemented!(),
2488+
};
2489+
2490+
self.emit(Inst::XmmRmiRVex {
2491+
op,
2492+
src1: src1.into(),
2493+
src2: src2.into(),
2494+
dst: dst.to_reg().into(),
2495+
});
2496+
}
24452497
}
24462498

24472499
/// Captures the region in a MachBuffer where an add-with-immediate instruction would be emitted,

0 commit comments

Comments
 (0)