@@ -3815,91 +3815,25 @@ define dso_local i64 @idx_scalar_dec(ptr %a, ptr %b, i64 %ii, i64 %n) {
38153815; NO-EVL-NEXT: [[CMP_NOT9:%.*]] = icmp eq i64 [[N:%.*]], 0
38163816; NO-EVL-NEXT: br i1 [[CMP_NOT9]], label [[FOR_COND_CLEANUP:%.*]], label [[FOR_BODY_PREHEADER:%.*]]
38173817; NO-EVL: for.body.preheader:
3818- ; NO-EVL-NEXT: [[MIN_ITERS_CHECK:%.*]] = icmp ult i64 [[N]], 8
3819- ; NO-EVL-NEXT: br i1 [[MIN_ITERS_CHECK]], label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]]
3820- ; NO-EVL: vector.ph:
3821- ; NO-EVL-NEXT: [[N_MOD_VF:%.*]] = urem i64 [[N]], 8
3822- ; NO-EVL-NEXT: [[N_VEC:%.*]] = sub i64 [[N]], [[N_MOD_VF]]
3823- ; NO-EVL-NEXT: [[IND_END:%.*]] = sub i64 [[N]], [[N_VEC]]
3824- ; NO-EVL-NEXT: [[DOTSPLATINSERT:%.*]] = insertelement <4 x i64> poison, i64 [[N]], i64 0
3825- ; NO-EVL-NEXT: [[DOTSPLAT:%.*]] = shufflevector <4 x i64> [[DOTSPLATINSERT]], <4 x i64> poison, <4 x i32> zeroinitializer
3826- ; NO-EVL-NEXT: [[INDUCTION:%.*]] = add <4 x i64> [[DOTSPLAT]], <i64 0, i64 -1, i64 -2, i64 -3>
3827- ; NO-EVL-NEXT: br label [[VECTOR_BODY:%.*]]
3828- ; NO-EVL: vector.body:
3829- ; NO-EVL-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ]
3830- ; NO-EVL-NEXT: [[CSA_MASK_PHI:%.*]] = phi <4 x i1> [ zeroinitializer, [[VECTOR_PH]] ], [ [[CSA_MASK_SEL8:%.*]], [[VECTOR_BODY]] ]
3831- ; NO-EVL-NEXT: [[VEC_IND:%.*]] = phi <4 x i64> [ [[INDUCTION]], [[VECTOR_PH]] ], [ [[VEC_IND_NEXT:%.*]], [[VECTOR_BODY]] ]
3832- ; NO-EVL-NEXT: [[CSA_DATA_PHI:%.*]] = phi <4 x i64> [ poison, [[VECTOR_PH]] ], [ [[CSA_DATA_SEL9:%.*]], [[VECTOR_BODY]] ]
3833- ; NO-EVL-NEXT: [[STEP_ADD:%.*]] = add <4 x i64> [[VEC_IND]], <i64 -4, i64 -4, i64 -4, i64 -4>
3834- ; NO-EVL-NEXT: [[TMP0:%.*]] = add <4 x i64> [[VEC_IND]], <i64 -1, i64 -1, i64 -1, i64 -1>
3835- ; NO-EVL-NEXT: [[TMP1:%.*]] = add <4 x i64> [[STEP_ADD]], <i64 -1, i64 -1, i64 -1, i64 -1>
3836- ; NO-EVL-NEXT: [[TMP2:%.*]] = extractelement <4 x i64> [[TMP0]], i32 0
3837- ; NO-EVL-NEXT: [[TMP3:%.*]] = getelementptr inbounds i64, ptr [[A:%.*]], i64 [[TMP2]]
3838- ; NO-EVL-NEXT: [[TMP4:%.*]] = extractelement <4 x i64> [[TMP1]], i32 0
3839- ; NO-EVL-NEXT: [[TMP5:%.*]] = getelementptr inbounds i64, ptr [[A]], i64 [[TMP4]]
3840- ; NO-EVL-NEXT: [[TMP6:%.*]] = getelementptr inbounds i64, ptr [[TMP3]], i32 0
3841- ; NO-EVL-NEXT: [[TMP7:%.*]] = getelementptr inbounds i64, ptr [[TMP6]], i32 -3
3842- ; NO-EVL-NEXT: [[TMP8:%.*]] = getelementptr inbounds i64, ptr [[TMP3]], i32 -4
3843- ; NO-EVL-NEXT: [[TMP9:%.*]] = getelementptr inbounds i64, ptr [[TMP8]], i32 -3
3844- ; NO-EVL-NEXT: [[WIDE_LOAD:%.*]] = load <4 x i64>, ptr [[TMP7]], align 8
3845- ; NO-EVL-NEXT: [[REVERSE:%.*]] = shufflevector <4 x i64> [[WIDE_LOAD]], <4 x i64> poison, <4 x i32> <i32 3, i32 2, i32 1, i32 0>
3846- ; NO-EVL-NEXT: [[WIDE_LOAD2:%.*]] = load <4 x i64>, ptr [[TMP9]], align 8
3847- ; NO-EVL-NEXT: [[REVERSE3:%.*]] = shufflevector <4 x i64> [[WIDE_LOAD2]], <4 x i64> poison, <4 x i32> <i32 3, i32 2, i32 1, i32 0>
3848- ; NO-EVL-NEXT: [[TMP10:%.*]] = getelementptr inbounds i64, ptr [[B:%.*]], i64 [[TMP2]]
3849- ; NO-EVL-NEXT: [[TMP11:%.*]] = getelementptr inbounds i64, ptr [[B]], i64 [[TMP4]]
3850- ; NO-EVL-NEXT: [[TMP12:%.*]] = getelementptr inbounds i64, ptr [[TMP10]], i32 0
3851- ; NO-EVL-NEXT: [[TMP13:%.*]] = getelementptr inbounds i64, ptr [[TMP12]], i32 -3
3852- ; NO-EVL-NEXT: [[TMP14:%.*]] = getelementptr inbounds i64, ptr [[TMP10]], i32 -4
3853- ; NO-EVL-NEXT: [[TMP15:%.*]] = getelementptr inbounds i64, ptr [[TMP14]], i32 -3
3854- ; NO-EVL-NEXT: [[WIDE_LOAD4:%.*]] = load <4 x i64>, ptr [[TMP13]], align 8
3855- ; NO-EVL-NEXT: [[REVERSE5:%.*]] = shufflevector <4 x i64> [[WIDE_LOAD4]], <4 x i64> poison, <4 x i32> <i32 3, i32 2, i32 1, i32 0>
3856- ; NO-EVL-NEXT: [[WIDE_LOAD6:%.*]] = load <4 x i64>, ptr [[TMP15]], align 8
3857- ; NO-EVL-NEXT: [[REVERSE7:%.*]] = shufflevector <4 x i64> [[WIDE_LOAD6]], <4 x i64> poison, <4 x i32> <i32 3, i32 2, i32 1, i32 0>
3858- ; NO-EVL-NEXT: [[TMP16:%.*]] = icmp sgt <4 x i64> [[REVERSE]], [[REVERSE5]]
3859- ; NO-EVL-NEXT: [[TMP17:%.*]] = icmp sgt <4 x i64> [[REVERSE3]], [[REVERSE7]]
3860- ; NO-EVL-NEXT: [[TMP18:%.*]] = call i1 @llvm.vector.reduce.or.v4i1(<4 x i1> [[TMP16]])
3861- ; NO-EVL-NEXT: [[TMP19:%.*]] = call i1 @llvm.vector.reduce.or.v4i1(<4 x i1> [[TMP17]])
3862- ; NO-EVL-NEXT: [[CSA_MASK_SEL:%.*]] = select i1 [[TMP18]], <4 x i1> [[TMP16]], <4 x i1> [[CSA_MASK_PHI]]
3863- ; NO-EVL-NEXT: [[CSA_MASK_SEL8]] = select i1 [[TMP19]], <4 x i1> [[TMP17]], <4 x i1> [[CSA_MASK_SEL]]
3864- ; NO-EVL-NEXT: [[CSA_DATA_SEL:%.*]] = select i1 [[TMP18]], <4 x i64> [[VEC_IND]], <4 x i64> [[CSA_DATA_PHI]]
3865- ; NO-EVL-NEXT: [[CSA_DATA_SEL9]] = select i1 [[TMP19]], <4 x i64> [[STEP_ADD]], <4 x i64> [[CSA_DATA_SEL]]
3866- ; NO-EVL-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 8
3867- ; NO-EVL-NEXT: [[VEC_IND_NEXT]] = add <4 x i64> [[STEP_ADD]], <i64 -4, i64 -4, i64 -4, i64 -4>
3868- ; NO-EVL-NEXT: [[TMP20:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]]
3869- ; NO-EVL-NEXT: br i1 [[TMP20]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP16:![0-9]+]]
3870- ; NO-EVL: middle.block:
3871- ; NO-EVL-NEXT: [[TMP21:%.*]] = select <4 x i1> [[CSA_MASK_SEL8]], <4 x i32> <i32 0, i32 1, i32 2, i32 3>, <4 x i32> zeroinitializer
3872- ; NO-EVL-NEXT: [[TMP22:%.*]] = call i32 @llvm.vector.reduce.umax.v4i32(<4 x i32> [[TMP21]])
3873- ; NO-EVL-NEXT: [[TMP23:%.*]] = extractelement <4 x i1> [[CSA_MASK_SEL8]], i64 0
3874- ; NO-EVL-NEXT: [[TMP24:%.*]] = icmp eq i32 [[TMP22]], 0
3875- ; NO-EVL-NEXT: [[TMP25:%.*]] = and i1 [[TMP23]], [[TMP24]]
3876- ; NO-EVL-NEXT: [[TMP26:%.*]] = select i1 [[TMP25]], i32 0, i32 -1
3877- ; NO-EVL-NEXT: [[CSA_EXTRACT:%.*]] = extractelement <4 x i64> [[CSA_DATA_SEL9]], i32 [[TMP26]]
3878- ; NO-EVL-NEXT: [[TMP27:%.*]] = icmp sge i32 [[TMP26]], 0
3879- ; NO-EVL-NEXT: [[TMP28:%.*]] = select i1 [[TMP27]], i64 [[CSA_EXTRACT]], i64 [[II:%.*]]
3880- ; NO-EVL-NEXT: [[CMP_N:%.*]] = icmp eq i64 [[N]], [[N_VEC]]
3881- ; NO-EVL-NEXT: br i1 [[CMP_N]], label [[FOR_COND_CLEANUP_LOOPEXIT:%.*]], label [[SCALAR_PH]]
3882- ; NO-EVL: scalar.ph:
3883- ; NO-EVL-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ [[IND_END]], [[MIDDLE_BLOCK]] ], [ [[N]], [[FOR_BODY_PREHEADER]] ]
38843818; NO-EVL-NEXT: br label [[FOR_BODY:%.*]]
38853819; NO-EVL: for.cond.cleanup.loopexit:
3886- ; NO-EVL-NEXT: [[COND_LCSSA:%.*]] = phi i64 [ [[COND:%.*]], [[FOR_BODY]] ], [ [[TMP28]], [[MIDDLE_BLOCK]] ]
3820+ ; NO-EVL-NEXT: [[COND_LCSSA:%.*]] = phi i64 [ [[COND:%.*]], [[FOR_BODY]] ]
38873821; NO-EVL-NEXT: br label [[FOR_COND_CLEANUP]]
38883822; NO-EVL: for.cond.cleanup:
3889- ; NO-EVL-NEXT: [[IDX_0_LCSSA:%.*]] = phi i64 [ [[II]], [[ENTRY:%.*]] ], [ [[COND_LCSSA]], [[FOR_COND_CLEANUP_LOOPEXIT]] ]
3823+ ; NO-EVL-NEXT: [[IDX_0_LCSSA:%.*]] = phi i64 [ [[II:%.* ]], [[ENTRY:%.*]] ], [ [[COND_LCSSA]], [[FOR_COND_CLEANUP_LOOPEXIT:%.* ]] ]
38903824; NO-EVL-NEXT: ret i64 [[IDX_0_LCSSA]]
38913825; NO-EVL: for.body:
3892- ; NO-EVL-NEXT: [[I_011:%.*]] = phi i64 [ [[SUB:%.*]], [[FOR_BODY]] ], [ [[BC_RESUME_VAL ]], [[SCALAR_PH ]] ]
3893- ; NO-EVL-NEXT: [[IDX_010:%.*]] = phi i64 [ [[COND]], [[FOR_BODY]] ], [ [[II]], [[SCALAR_PH ]] ]
3826+ ; NO-EVL-NEXT: [[I_011:%.*]] = phi i64 [ [[SUB:%.*]], [[FOR_BODY]] ], [ [[N ]], [[FOR_BODY_PREHEADER ]] ]
3827+ ; NO-EVL-NEXT: [[IDX_010:%.*]] = phi i64 [ [[COND]], [[FOR_BODY]] ], [ [[II]], [[FOR_BODY_PREHEADER ]] ]
38943828; NO-EVL-NEXT: [[SUB]] = add i64 [[I_011]], -1
3895- ; NO-EVL-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i64, ptr [[A]], i64 [[SUB]]
3896- ; NO-EVL-NEXT: [[TMP29 :%.*]] = load i64, ptr [[ARRAYIDX]], align 8
3897- ; NO-EVL-NEXT: [[ARRAYIDX2:%.*]] = getelementptr inbounds i64, ptr [[B]], i64 [[SUB]]
3898- ; NO-EVL-NEXT: [[TMP30 :%.*]] = load i64, ptr [[ARRAYIDX2]], align 8
3899- ; NO-EVL-NEXT: [[CMP3:%.*]] = icmp sgt i64 [[TMP29 ]], [[TMP30 ]]
3829+ ; NO-EVL-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i64, ptr [[A:%.* ]], i64 [[SUB]]
3830+ ; NO-EVL-NEXT: [[TMP0 :%.*]] = load i64, ptr [[ARRAYIDX]], align 8
3831+ ; NO-EVL-NEXT: [[ARRAYIDX2:%.*]] = getelementptr inbounds i64, ptr [[B:%.* ]], i64 [[SUB]]
3832+ ; NO-EVL-NEXT: [[TMP1 :%.*]] = load i64, ptr [[ARRAYIDX2]], align 8
3833+ ; NO-EVL-NEXT: [[CMP3:%.*]] = icmp sgt i64 [[TMP0 ]], [[TMP1 ]]
39003834; NO-EVL-NEXT: [[COND]] = select i1 [[CMP3]], i64 [[I_011]], i64 [[IDX_010]]
39013835; NO-EVL-NEXT: [[CMP_NOT:%.*]] = icmp eq i64 [[SUB]], 0
3902- ; NO-EVL-NEXT: br i1 [[CMP_NOT]], label [[FOR_COND_CLEANUP_LOOPEXIT]], label [[FOR_BODY]], !llvm.loop [[LOOP17:![0-9]+]]
3836+ ; NO-EVL-NEXT: br i1 [[CMP_NOT]], label [[FOR_COND_CLEANUP_LOOPEXIT]], label [[FOR_BODY]]
39033837;
39043838; DATA-LABEL: @idx_scalar_dec(
39053839; DATA-NEXT: entry:
@@ -4037,7 +3971,7 @@ define i32 @simple_csa_int_select_neg_cond(i32 %N, ptr %data) {
40373971; NO-EVL-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], [[TMP5]]
40383972; NO-EVL-NEXT: [[VEC_IND_NEXT]] = add <vscale x 4 x i64> [[VEC_IND]], [[DOTSPLAT]]
40393973; NO-EVL-NEXT: [[TMP19:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]]
4040- ; NO-EVL-NEXT: br i1 [[TMP19]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP18 :![0-9]+]]
3974+ ; NO-EVL-NEXT: br i1 [[TMP19]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP16 :![0-9]+]]
40413975; NO-EVL: middle.block:
40423976; NO-EVL-NEXT: [[CSA_STEP:%.*]] = call <vscale x 4 x i32> @llvm.stepvector.nxv4i32()
40433977; NO-EVL-NEXT: [[TMP20:%.*]] = select <vscale x 4 x i1> [[CSA_MASK_SEL]], <vscale x 4 x i32> [[CSA_STEP]], <vscale x 4 x i32> zeroinitializer
@@ -4070,7 +4004,7 @@ define i32 @simple_csa_int_select_neg_cond(i32 %N, ptr %data) {
40704004; NO-EVL-NEXT: [[SPEC_SELECT]] = select i1 [[CMP1_NOT]], i32 [[T_010]], i32 [[TMP28]]
40714005; NO-EVL-NEXT: [[INDVARS_IV_NEXT]] = add nuw nsw i64 [[INDVARS_IV]], 1
40724006; NO-EVL-NEXT: [[EXITCOND_NOT:%.*]] = icmp eq i64 [[INDVARS_IV_NEXT]], [[WIDE_TRIP_COUNT]]
4073- ; NO-EVL-NEXT: br i1 [[EXITCOND_NOT]], label [[FOR_COND_CLEANUP_LOOPEXIT]], label [[FOR_BODY]], !llvm.loop [[LOOP19 :![0-9]+]]
4007+ ; NO-EVL-NEXT: br i1 [[EXITCOND_NOT]], label [[FOR_COND_CLEANUP_LOOPEXIT]], label [[FOR_BODY]], !llvm.loop [[LOOP17 :![0-9]+]]
40744008;
40754009; DATA-LABEL: @simple_csa_int_select_neg_cond(
40764010; DATA-NEXT: entry:
@@ -4207,7 +4141,7 @@ define ptr @simple_csa_ptr_select(i32 %N, ptr %data) {
42074141; NO-EVL-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], [[TMP5]]
42084142; NO-EVL-NEXT: [[VEC_IND_NEXT]] = add <vscale x 2 x i64> [[VEC_IND]], [[DOTSPLAT]]
42094143; NO-EVL-NEXT: [[TMP18:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]]
4210- ; NO-EVL-NEXT: br i1 [[TMP18]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP20 :![0-9]+]]
4144+ ; NO-EVL-NEXT: br i1 [[TMP18]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP18 :![0-9]+]]
42114145; NO-EVL: middle.block:
42124146; NO-EVL-NEXT: [[CSA_STEP:%.*]] = call <vscale x 2 x i32> @llvm.stepvector.nxv2i32()
42134147; NO-EVL-NEXT: [[TMP19:%.*]] = select <vscale x 2 x i1> [[CSA_MASK_SEL]], <vscale x 2 x i32> [[CSA_STEP]], <vscale x 2 x i32> zeroinitializer
@@ -4241,7 +4175,7 @@ define ptr @simple_csa_ptr_select(i32 %N, ptr %data) {
42414175; NO-EVL-NEXT: [[SPEC_SELECT]] = select i1 [[CMP1]], ptr [[TMP27]], ptr [[T_010]]
42424176; NO-EVL-NEXT: [[INDVARS_IV_NEXT]] = add nuw nsw i64 [[INDVARS_IV]], 1
42434177; NO-EVL-NEXT: [[EXITCOND_NOT:%.*]] = icmp eq i64 [[INDVARS_IV_NEXT]], [[WIDE_TRIP_COUNT]]
4244- ; NO-EVL-NEXT: br i1 [[EXITCOND_NOT]], label [[FOR_COND_CLEANUP_LOOPEXIT]], label [[FOR_BODY]], !llvm.loop [[LOOP21 :![0-9]+]]
4178+ ; NO-EVL-NEXT: br i1 [[EXITCOND_NOT]], label [[FOR_COND_CLEANUP_LOOPEXIT]], label [[FOR_BODY]], !llvm.loop [[LOOP19 :![0-9]+]]
42454179;
42464180; DATA-LABEL: @simple_csa_ptr_select(
42474181; DATA-NEXT: entry:
0 commit comments