Skip to content

Commit ddee2d8

Browse files
authored
[X86][APX] Combine (X86Sub 0, AND(X, Y)) to (X86And X, Y) for CLOAD/CSTORE (#136429)
https://godbolt.org/z/TsWochrbf
1 parent 0e3e0bf commit ddee2d8

File tree

2 files changed

+59
-9
lines changed

2 files changed

+59
-9
lines changed

Diff for: llvm/lib/Target/X86/X86ISelLowering.cpp

+23-9
Original file line numberDiff line numberDiff line change
@@ -57808,27 +57808,41 @@ static SDValue combineSubSetcc(SDNode *N, SelectionDAG &DAG) {
5780857808
}
5780957809

5781057810
static SDValue combineX86CloadCstore(SDNode *N, SelectionDAG &DAG) {
57811-
// res, flags2 = sub 0, (setcc cc, flag)
57812-
// cload/cstore ..., cond_ne, flag2
57813-
// ->
57814-
// cload/cstore cc, flag
5781557811
if (N->getConstantOperandVal(3) != X86::COND_NE)
5781657812
return SDValue();
5781757813

5781857814
SDValue Sub = N->getOperand(4);
5781957815
if (Sub.getOpcode() != X86ISD::SUB)
5782057816
return SDValue();
5782157817

57822-
SDValue SetCC = Sub.getOperand(1);
57818+
SDValue Op1 = Sub.getOperand(1);
5782357819

57824-
if (!X86::isZeroNode(Sub.getOperand(0)) || SetCC.getOpcode() != X86ISD::SETCC)
57820+
if (!X86::isZeroNode(Sub.getOperand(0)))
5782557821
return SDValue();
5782657822

57823+
SDLoc DL(N);
5782757824
SmallVector<SDValue, 5> Ops(N->op_values());
57828-
Ops[3] = SetCC.getOperand(0);
57829-
Ops[4] = SetCC.getOperand(1);
57825+
if (Op1.getOpcode() == X86ISD::SETCC) {
57826+
// res, flags2 = sub 0, (setcc cc, flag)
57827+
// cload/cstore ..., cond_ne, flag2
57828+
// ->
57829+
// cload/cstore cc, flag
57830+
Ops[3] = Op1.getOperand(0);
57831+
Ops[4] = Op1.getOperand(1);
57832+
} else if (Op1.getOpcode() == ISD::AND && Sub.getValue(0).use_empty()) {
57833+
// res, flags2 = sub 0, (and X, Y)
57834+
// cload/cstore ..., cond_ne, flag2
57835+
// ->
57836+
// res, flags2 = and X, Y
57837+
// cload/cstore ..., cond_ne, flag2
57838+
Ops[4] = DAG.getNode(X86ISD::AND, DL, Sub->getVTList(), Op1.getOperand(0),
57839+
Op1.getOperand(1))
57840+
.getValue(1);
57841+
} else {
57842+
return SDValue();
57843+
}
5783057844

57831-
return DAG.getMemIntrinsicNode(N->getOpcode(), SDLoc(N), N->getVTList(), Ops,
57845+
return DAG.getMemIntrinsicNode(N->getOpcode(), DL, N->getVTList(), Ops,
5783257846
cast<MemSDNode>(N)->getMemoryVT(),
5783357847
cast<MemSDNode>(N)->getMemOperand());
5783457848
}

Diff for: llvm/test/CodeGen/X86/apx/cf.ll

+36
Original file line numberDiff line numberDiff line change
@@ -158,3 +158,39 @@ entry:
158158
tail call void @llvm.masked.store.v1i16.p0(<1 x i16> %5, ptr %p, i32 2, <1 x i1> %1)
159159
ret void
160160
}
161+
162+
define void @load_zext(i1 %cond, ptr %b, ptr %p) {
163+
; CHECK-LABEL: load_zext:
164+
; CHECK: # %bb.0: # %entry
165+
; CHECK-NEXT: andb $1, %dil
166+
; CHECK-NEXT: cfcmovnew (%rsi), %ax
167+
; CHECK-NEXT: movzwl %ax, %eax
168+
; CHECK-NEXT: cfcmovnel %eax, (%rdx)
169+
; CHECK-NEXT: retq
170+
entry:
171+
%0 = bitcast i1 %cond to <1 x i1>
172+
%1 = call <1 x i16> @llvm.masked.load.v1i16.p0(ptr %b, i32 2, <1 x i1> %0, <1 x i16> poison)
173+
%2 = bitcast <1 x i16> %1 to i16
174+
%zext = zext i16 %2 to i32
175+
%3 = bitcast i32 %zext to <1 x i32>
176+
call void @llvm.masked.store.v1i32.p0(<1 x i32> %3, ptr %p, i32 4, <1 x i1> %0)
177+
ret void
178+
}
179+
180+
define void @load_sext(i1 %cond, ptr %b, ptr %p) {
181+
; CHECK-LABEL: load_sext:
182+
; CHECK: # %bb.0: # %entry
183+
; CHECK-NEXT: andb $1, %dil
184+
; CHECK-NEXT: cfcmovnel (%rsi), %eax
185+
; CHECK-NEXT: cltq
186+
; CHECK-NEXT: cfcmovneq %rax, (%rdx)
187+
; CHECK-NEXT: retq
188+
entry:
189+
%0 = bitcast i1 %cond to <1 x i1>
190+
%1 = call <1 x i32> @llvm.masked.load.v1i32.p0(ptr %b, i32 2, <1 x i1> %0, <1 x i32> poison)
191+
%2 = bitcast <1 x i32> %1 to i32
192+
%zext = sext i32 %2 to i64
193+
%3 = bitcast i64 %zext to <1 x i64>
194+
call void @llvm.masked.store.v1i64.p0(<1 x i64> %3, ptr %p, i32 4, <1 x i1> %0)
195+
ret void
196+
}

0 commit comments

Comments
 (0)