117 bool LogicalShift =
false;
118 bool ShiftLeft =
false;
121 switch (
II.getIntrinsicID()) {
124 case Intrinsic::x86_sse2_psrai_d:
125 case Intrinsic::x86_sse2_psrai_w:
126 case Intrinsic::x86_avx2_psrai_d:
127 case Intrinsic::x86_avx2_psrai_w:
128 case Intrinsic::x86_avx512_psrai_q_128:
129 case Intrinsic::x86_avx512_psrai_q_256:
130 case Intrinsic::x86_avx512_psrai_d_512:
131 case Intrinsic::x86_avx512_psrai_q_512:
132 case Intrinsic::x86_avx512_psrai_w_512:
135 case Intrinsic::x86_sse2_psra_d:
136 case Intrinsic::x86_sse2_psra_w:
137 case Intrinsic::x86_avx2_psra_d:
138 case Intrinsic::x86_avx2_psra_w:
139 case Intrinsic::x86_avx512_psra_q_128:
140 case Intrinsic::x86_avx512_psra_q_256:
141 case Intrinsic::x86_avx512_psra_d_512:
142 case Intrinsic::x86_avx512_psra_q_512:
143 case Intrinsic::x86_avx512_psra_w_512:
144 LogicalShift =
false;
147 case Intrinsic::x86_sse2_psrli_d:
148 case Intrinsic::x86_sse2_psrli_q:
149 case Intrinsic::x86_sse2_psrli_w:
150 case Intrinsic::x86_avx2_psrli_d:
151 case Intrinsic::x86_avx2_psrli_q:
152 case Intrinsic::x86_avx2_psrli_w:
153 case Intrinsic::x86_avx512_psrli_d_512:
154 case Intrinsic::x86_avx512_psrli_q_512:
155 case Intrinsic::x86_avx512_psrli_w_512:
158 case Intrinsic::x86_sse2_psrl_d:
159 case Intrinsic::x86_sse2_psrl_q:
160 case Intrinsic::x86_sse2_psrl_w:
161 case Intrinsic::x86_avx2_psrl_d:
162 case Intrinsic::x86_avx2_psrl_q:
163 case Intrinsic::x86_avx2_psrl_w:
164 case Intrinsic::x86_avx512_psrl_d_512:
165 case Intrinsic::x86_avx512_psrl_q_512:
166 case Intrinsic::x86_avx512_psrl_w_512:
170 case Intrinsic::x86_sse2_pslli_d:
171 case Intrinsic::x86_sse2_pslli_q:
172 case Intrinsic::x86_sse2_pslli_w:
173 case Intrinsic::x86_avx2_pslli_d:
174 case Intrinsic::x86_avx2_pslli_q:
175 case Intrinsic::x86_avx2_pslli_w:
176 case Intrinsic::x86_avx512_pslli_d_512:
177 case Intrinsic::x86_avx512_pslli_q_512:
178 case Intrinsic::x86_avx512_pslli_w_512:
181 case Intrinsic::x86_sse2_psll_d:
182 case Intrinsic::x86_sse2_psll_q:
183 case Intrinsic::x86_sse2_psll_w:
184 case Intrinsic::x86_avx2_psll_d:
185 case Intrinsic::x86_avx2_psll_q:
186 case Intrinsic::x86_avx2_psll_w:
187 case Intrinsic::x86_avx512_psll_d_512:
188 case Intrinsic::x86_avx512_psll_q_512:
189 case Intrinsic::x86_avx512_psll_w_512:
194 assert((LogicalShift || !ShiftLeft) &&
"Only logical shifts can shift left");
196 Value *Vec =
II.getArgOperand(0);
197 Value *Amt =
II.getArgOperand(1);
199 Type *SVT = VT->getElementType();
201 unsigned VWidth = VT->getNumElements();
212 Amt = Builder.CreateZExtOrTrunc(Amt, SVT);
213 Amt = Builder.CreateVectorSplat(VWidth, Amt);
214 return (LogicalShift ? (ShiftLeft ? Builder.CreateShl(Vec, Amt)
215 : Builder.CreateLShr(Vec, Amt))
216 : Builder.CreateAShr(Vec, Amt));
221 Amt = ConstantInt::get(SVT,
BitWidth - 1);
222 return Builder.CreateAShr(Vec, Builder.CreateVectorSplat(VWidth, Amt));
229 "Unexpected shift-by-scalar type");
234 Amt, DemandedLower,
II.getDataLayout());
236 Amt, DemandedUpper,
II.getDataLayout());
240 Amt = Builder.CreateShuffleVector(Amt, ZeroSplat);
241 return (LogicalShift ? (ShiftLeft ? Builder.CreateShl(Vec, Amt)
242 : Builder.CreateLShr(Vec, Amt))
243 : Builder.CreateAShr(Vec, Amt));
256 "Unexpected shift-by-scalar type");
260 for (
unsigned i = 0, NumSubElts = 64 /
BitWidth; i != NumSubElts; ++i) {
261 unsigned SubEltIdx = (NumSubElts - 1) - i;
264 Count |= SubElt->getValue().zextOrTrunc(64);
282 auto ShiftAmt = ConstantInt::get(SVT,
Count.zextOrTrunc(
BitWidth));
283 auto ShiftVec = Builder.CreateVectorSplat(VWidth, ShiftAmt);
286 return Builder.CreateShl(Vec, ShiftVec);
289 return Builder.CreateLShr(Vec, ShiftVec);
291 return Builder.CreateAShr(Vec, ShiftVec);
2155 auto SimplifyDemandedVectorEltsLow = [&IC](
Value *
Op,
unsigned Width,
2156 unsigned DemandedWidth) {
2157 APInt UndefElts(Width, 0);
2164 case Intrinsic::x86_bmi_bextr_32:
2165 case Intrinsic::x86_bmi_bextr_64:
2166 case Intrinsic::x86_tbm_bextri_u32:
2167 case Intrinsic::x86_tbm_bextri_u64:
2173 unsigned BitWidth =
II.getType()->getIntegerBitWidth();
2180 uint64_t Result = InC->getZExtValue() >> Shift;
2185 ConstantInt::get(
II.getType(), Result));
2192 case Intrinsic::x86_bmi_bzhi_32:
2193 case Intrinsic::x86_bmi_bzhi_64:
2196 uint64_t Index =
C->getZExtValue() & 0xff;
2197 unsigned BitWidth =
II.getType()->getIntegerBitWidth();
2206 uint64_t Result = InC->getZExtValue();
2209 ConstantInt::get(
II.getType(), Result));
2214 case Intrinsic::x86_bmi_pext_32:
2215 case Intrinsic::x86_bmi_pext_64:
2217 if (MaskC->isNullValue()) {
2220 if (MaskC->isAllOnesValue()) {
2224 unsigned MaskIdx, MaskLen;
2225 if (MaskC->getValue().isShiftedMask(MaskIdx, MaskLen)) {
2231 Value *ShiftAmt = ConstantInt::get(
II.getType(), MaskIdx);
2237 uint64_t Src = SrcC->getZExtValue();
2238 uint64_t Mask = MaskC->getZExtValue();
2245 if (BitToTest & Src)
2254 ConstantInt::get(
II.getType(), Result));
2258 case Intrinsic::x86_bmi_pdep_32:
2259 case Intrinsic::x86_bmi_pdep_64:
2261 if (MaskC->isNullValue()) {
2264 if (MaskC->isAllOnesValue()) {
2268 unsigned MaskIdx, MaskLen;
2269 if (MaskC->getValue().isShiftedMask(MaskIdx, MaskLen)) {
2274 Value *ShiftAmt = ConstantInt::get(
II.getType(), MaskIdx);
2281 uint64_t Src = SrcC->getZExtValue();
2282 uint64_t Mask = MaskC->getZExtValue();
2289 if (BitToTest & Src)
2298 ConstantInt::get(
II.getType(), Result));
2303 case Intrinsic::x86_sse_cvtss2si:
2304 case Intrinsic::x86_sse_cvtss2si64:
2305 case Intrinsic::x86_sse_cvttss2si:
2306 case Intrinsic::x86_sse_cvttss2si64:
2307 case Intrinsic::x86_sse2_cvtsd2si:
2308 case Intrinsic::x86_sse2_cvtsd2si64:
2309 case Intrinsic::x86_sse2_cvttsd2si:
2310 case Intrinsic::x86_sse2_cvttsd2si64:
2311 case Intrinsic::x86_avx512_vcvtss2si32:
2312 case Intrinsic::x86_avx512_vcvtss2si64:
2313 case Intrinsic::x86_avx512_vcvtss2usi32:
2314 case Intrinsic::x86_avx512_vcvtss2usi64:
2315 case Intrinsic::x86_avx512_vcvtsd2si32:
2316 case Intrinsic::x86_avx512_vcvtsd2si64:
2317 case Intrinsic::x86_avx512_vcvtsd2usi32:
2318 case Intrinsic::x86_avx512_vcvtsd2usi64:
2319 case Intrinsic::x86_avx512_cvttss2si:
2320 case Intrinsic::x86_avx512_cvttss2si64:
2321 case Intrinsic::x86_avx512_cvttss2usi:
2322 case Intrinsic::x86_avx512_cvttss2usi64:
2323 case Intrinsic::x86_avx512_cvttsd2si:
2324 case Intrinsic::x86_avx512_cvttsd2si64:
2325 case Intrinsic::x86_avx512_cvttsd2usi:
2326 case Intrinsic::x86_avx512_cvttsd2usi64: {
2329 Value *Arg =
II.getArgOperand(0);
2331 if (
Value *V = SimplifyDemandedVectorEltsLow(Arg, VWidth, 1)) {
2337 case Intrinsic::x86_mmx_pmovmskb:
2338 case Intrinsic::x86_sse_movmsk_ps:
2339 case Intrinsic::x86_sse2_movmsk_pd:
2340 case Intrinsic::x86_sse2_pmovmskb_128:
2341 case Intrinsic::x86_avx_movmsk_pd_256:
2342 case Intrinsic::x86_avx_movmsk_ps_256:
2343 case Intrinsic::x86_avx2_pmovmskb:
2349 case Intrinsic::x86_sse_comieq_ss:
2350 case Intrinsic::x86_sse_comige_ss:
2351 case Intrinsic::x86_sse_comigt_ss:
2352 case Intrinsic::x86_sse_comile_ss:
2353 case Intrinsic::x86_sse_comilt_ss:
2354 case Intrinsic::x86_sse_comineq_ss:
2355 case Intrinsic::x86_sse_ucomieq_ss:
2356 case Intrinsic::x86_sse_ucomige_ss:
2357 case Intrinsic::x86_sse_ucomigt_ss:
2358 case Intrinsic::x86_sse_ucomile_ss:
2359 case Intrinsic::x86_sse_ucomilt_ss:
2360 case Intrinsic::x86_sse_ucomineq_ss:
2361 case Intrinsic::x86_sse2_comieq_sd:
2362 case Intrinsic::x86_sse2_comige_sd:
2363 case Intrinsic::x86_sse2_comigt_sd:
2364 case Intrinsic::x86_sse2_comile_sd:
2365 case Intrinsic::x86_sse2_comilt_sd:
2366 case Intrinsic::x86_sse2_comineq_sd:
2367 case Intrinsic::x86_sse2_ucomieq_sd:
2368 case Intrinsic::x86_sse2_ucomige_sd:
2369 case Intrinsic::x86_sse2_ucomigt_sd:
2370 case Intrinsic::x86_sse2_ucomile_sd:
2371 case Intrinsic::x86_sse2_ucomilt_sd:
2372 case Intrinsic::x86_sse2_ucomineq_sd:
2373 case Intrinsic::x86_avx512_vcomi_ss:
2374 case Intrinsic::x86_avx512_vcomi_sd:
2375 case Intrinsic::x86_avx512_mask_cmp_ss:
2376 case Intrinsic::x86_avx512_mask_cmp_sd: {
2379 bool MadeChange =
false;
2380 Value *Arg0 =
II.getArgOperand(0);
2381 Value *Arg1 =
II.getArgOperand(1);
2383 if (
Value *V = SimplifyDemandedVectorEltsLow(Arg0, VWidth, 1)) {
2387 if (
Value *V = SimplifyDemandedVectorEltsLow(Arg1, VWidth, 1)) {
2397 case Intrinsic::x86_avx512_add_ps_512:
2398 case Intrinsic::x86_avx512_div_ps_512:
2399 case Intrinsic::x86_avx512_mul_ps_512:
2400 case Intrinsic::x86_avx512_sub_ps_512:
2401 case Intrinsic::x86_avx512_add_pd_512:
2402 case Intrinsic::x86_avx512_div_pd_512:
2403 case Intrinsic::x86_avx512_mul_pd_512:
2404 case Intrinsic::x86_avx512_sub_pd_512:
2408 if (R->getValue() == 4) {
2409 Value *Arg0 =
II.getArgOperand(0);
2410 Value *Arg1 =
II.getArgOperand(1);
2416 case Intrinsic::x86_avx512_add_ps_512:
2417 case Intrinsic::x86_avx512_add_pd_512:
2420 case Intrinsic::x86_avx512_sub_ps_512:
2421 case Intrinsic::x86_avx512_sub_pd_512:
2424 case Intrinsic::x86_avx512_mul_ps_512:
2425 case Intrinsic::x86_avx512_mul_pd_512:
2428 case Intrinsic::x86_avx512_div_ps_512:
2429 case Intrinsic::x86_avx512_div_pd_512:
2439 case Intrinsic::x86_avx512_mask_add_ss_round:
2440 case Intrinsic::x86_avx512_mask_div_ss_round:
2441 case Intrinsic::x86_avx512_mask_mul_ss_round:
2442 case Intrinsic::x86_avx512_mask_sub_ss_round:
2443 case Intrinsic::x86_avx512_mask_add_sd_round:
2444 case Intrinsic::x86_avx512_mask_div_sd_round:
2445 case Intrinsic::x86_avx512_mask_mul_sd_round:
2446 case Intrinsic::x86_avx512_mask_sub_sd_round:
2450 if (R->getValue() == 4) {
2452 Value *Arg0 =
II.getArgOperand(0);
2453 Value *Arg1 =
II.getArgOperand(1);
2461 case Intrinsic::x86_avx512_mask_add_ss_round:
2462 case Intrinsic::x86_avx512_mask_add_sd_round:
2465 case Intrinsic::x86_avx512_mask_sub_ss_round:
2466 case Intrinsic::x86_avx512_mask_sub_sd_round:
2469 case Intrinsic::x86_avx512_mask_mul_ss_round:
2470 case Intrinsic::x86_avx512_mask_mul_sd_round:
2473 case Intrinsic::x86_avx512_mask_div_ss_round:
2474 case Intrinsic::x86_avx512_mask_div_sd_round:
2480 Value *Mask =
II.getArgOperand(3);
2483 if (!
C || !
C->getValue()[0]) {
2507 case Intrinsic::x86_sse2_psrai_d:
2508 case Intrinsic::x86_sse2_psrai_w:
2509 case Intrinsic::x86_avx2_psrai_d:
2510 case Intrinsic::x86_avx2_psrai_w:
2511 case Intrinsic::x86_avx512_psrai_q_128:
2512 case Intrinsic::x86_avx512_psrai_q_256:
2513 case Intrinsic::x86_avx512_psrai_d_512:
2514 case Intrinsic::x86_avx512_psrai_q_512:
2515 case Intrinsic::x86_avx512_psrai_w_512:
2516 case Intrinsic::x86_sse2_psrli_d:
2517 case Intrinsic::x86_sse2_psrli_q:
2518 case Intrinsic::x86_sse2_psrli_w:
2519 case Intrinsic::x86_avx2_psrli_d:
2520 case Intrinsic::x86_avx2_psrli_q:
2521 case Intrinsic::x86_avx2_psrli_w:
2522 case Intrinsic::x86_avx512_psrli_d_512:
2523 case Intrinsic::x86_avx512_psrli_q_512:
2524 case Intrinsic::x86_avx512_psrli_w_512:
2525 case Intrinsic::x86_sse2_pslli_d:
2526 case Intrinsic::x86_sse2_pslli_q:
2527 case Intrinsic::x86_sse2_pslli_w:
2528 case Intrinsic::x86_avx2_pslli_d:
2529 case Intrinsic::x86_avx2_pslli_q:
2530 case Intrinsic::x86_avx2_pslli_w:
2531 case Intrinsic::x86_avx512_pslli_d_512:
2532 case Intrinsic::x86_avx512_pslli_q_512:
2533 case Intrinsic::x86_avx512_pslli_w_512:
2539 case Intrinsic::x86_sse2_psra_d:
2540 case Intrinsic::x86_sse2_psra_w:
2541 case Intrinsic::x86_avx2_psra_d:
2542 case Intrinsic::x86_avx2_psra_w:
2543 case Intrinsic::x86_avx512_psra_q_128:
2544 case Intrinsic::x86_avx512_psra_q_256:
2545 case Intrinsic::x86_avx512_psra_d_512:
2546 case Intrinsic::x86_avx512_psra_q_512:
2547 case Intrinsic::x86_avx512_psra_w_512:
2548 case Intrinsic::x86_sse2_psrl_d:
2549 case Intrinsic::x86_sse2_psrl_q:
2550 case Intrinsic::x86_sse2_psrl_w:
2551 case Intrinsic::x86_avx2_psrl_d:
2552 case Intrinsic::x86_avx2_psrl_q:
2553 case Intrinsic::x86_avx2_psrl_w:
2554 case Intrinsic::x86_avx512_psrl_d_512:
2555 case Intrinsic::x86_avx512_psrl_q_512:
2556 case Intrinsic::x86_avx512_psrl_w_512:
2557 case Intrinsic::x86_sse2_psll_d:
2558 case Intrinsic::x86_sse2_psll_q:
2559 case Intrinsic::x86_sse2_psll_w:
2560 case Intrinsic::x86_avx2_psll_d:
2561 case Intrinsic::x86_avx2_psll_q:
2562 case Intrinsic::x86_avx2_psll_w:
2563 case Intrinsic::x86_avx512_psll_d_512:
2564 case Intrinsic::x86_avx512_psll_q_512:
2565 case Intrinsic::x86_avx512_psll_w_512: {
2572 Value *Arg1 =
II.getArgOperand(1);
2574 "Unexpected packed shift size");
2577 if (
Value *V = SimplifyDemandedVectorEltsLow(Arg1, VWidth, VWidth / 2)) {
2583 case Intrinsic::x86_avx2_psllv_d:
2584 case Intrinsic::x86_avx2_psllv_d_256:
2585 case Intrinsic::x86_avx2_psllv_q:
2586 case Intrinsic::x86_avx2_psllv_q_256:
2587 case Intrinsic::x86_avx512_psllv_d_512:
2588 case Intrinsic::x86_avx512_psllv_q_512:
2589 case Intrinsic::x86_avx512_psllv_w_128:
2590 case Intrinsic::x86_avx512_psllv_w_256:
2591 case Intrinsic::x86_avx512_psllv_w_512:
2592 case Intrinsic::x86_avx2_psrav_d:
2593 case Intrinsic::x86_avx2_psrav_d_256:
2594 case Intrinsic::x86_avx512_psrav_q_128:
2595 case Intrinsic::x86_avx512_psrav_q_256:
2596 case Intrinsic::x86_avx512_psrav_d_512:
2597 case Intrinsic::x86_avx512_psrav_q_512:
2598 case Intrinsic::x86_avx512_psrav_w_128:
2599 case Intrinsic::x86_avx512_psrav_w_256:
2600 case Intrinsic::x86_avx512_psrav_w_512:
2601 case Intrinsic::x86_avx2_psrlv_d:
2602 case Intrinsic::x86_avx2_psrlv_d_256:
2603 case Intrinsic::x86_avx2_psrlv_q:
2604 case Intrinsic::x86_avx2_psrlv_q_256:
2605 case Intrinsic::x86_avx512_psrlv_d_512:
2606 case Intrinsic::x86_avx512_psrlv_q_512:
2607 case Intrinsic::x86_avx512_psrlv_w_128:
2608 case Intrinsic::x86_avx512_psrlv_w_256:
2609 case Intrinsic::x86_avx512_psrlv_w_512:
2615 case Intrinsic::x86_sse2_packssdw_128:
2616 case Intrinsic::x86_sse2_packsswb_128:
2617 case Intrinsic::x86_avx2_packssdw:
2618 case Intrinsic::x86_avx2_packsswb:
2619 case Intrinsic::x86_avx512_packssdw_512:
2620 case Intrinsic::x86_avx512_packsswb_512:
2626 case Intrinsic::x86_sse2_packuswb_128:
2627 case Intrinsic::x86_sse41_packusdw:
2628 case Intrinsic::x86_avx2_packusdw:
2629 case Intrinsic::x86_avx2_packuswb:
2630 case Intrinsic::x86_avx512_packusdw_512:
2631 case Intrinsic::x86_avx512_packuswb_512:
2637 case Intrinsic::x86_sse2_pmulh_w:
2638 case Intrinsic::x86_avx2_pmulh_w:
2639 case Intrinsic::x86_avx512_pmulh_w_512:
2645 case Intrinsic::x86_sse2_pmulhu_w:
2646 case Intrinsic::x86_avx2_pmulhu_w:
2647 case Intrinsic::x86_avx512_pmulhu_w_512:
2653 case Intrinsic::x86_ssse3_pmul_hr_sw_128:
2654 case Intrinsic::x86_avx2_pmul_hr_sw:
2655 case Intrinsic::x86_avx512_pmul_hr_sw_512:
2661 case Intrinsic::x86_sse2_pmadd_wd:
2662 case Intrinsic::x86_avx2_pmadd_wd:
2663 case Intrinsic::x86_avx512_pmaddw_d_512:
2669 case Intrinsic::x86_ssse3_pmadd_ub_sw_128:
2670 case Intrinsic::x86_avx2_pmadd_ub_sw:
2671 case Intrinsic::x86_avx512_pmaddubs_w_512:
2677 case Intrinsic::x86_pclmulqdq:
2678 case Intrinsic::x86_pclmulqdq_256:
2679 case Intrinsic::x86_pclmulqdq_512: {
2681 unsigned Imm =
C->getZExtValue();
2683 bool MadeChange =
false;
2684 Value *Arg0 =
II.getArgOperand(0);
2685 Value *Arg1 =
II.getArgOperand(1);
2689 APInt UndefElts1(VWidth, 0);
2690 APInt DemandedElts1 =
2698 APInt UndefElts2(VWidth, 0);
2699 APInt DemandedElts2 =
2721 case Intrinsic::x86_sse41_insertps:
2727 case Intrinsic::x86_sse4a_extrq: {
2728 Value *Op0 =
II.getArgOperand(0);
2729 Value *Op1 =
II.getArgOperand(1);
2734 VWidth1 == 16 &&
"Unexpected operand sizes");
2752 bool MadeChange =
false;
2753 if (
Value *V = SimplifyDemandedVectorEltsLow(Op0, VWidth0, 1)) {
2757 if (
Value *V = SimplifyDemandedVectorEltsLow(Op1, VWidth1, 2)) {
2767 case Intrinsic::x86_sse4a_extrqi: {
2770 Value *Op0 =
II.getArgOperand(0);
2773 "Unexpected operand size");
2786 if (
Value *V = SimplifyDemandedVectorEltsLow(Op0, VWidth, 1)) {
2792 case Intrinsic::x86_sse4a_insertq: {
2793 Value *Op0 =
II.getArgOperand(0);
2794 Value *Op1 =
II.getArgOperand(1);
2799 "Unexpected operand size");
2809 const APInt &V11 = CI11->getValue();
2819 if (
Value *V = SimplifyDemandedVectorEltsLow(Op0, VWidth, 1)) {
2825 case Intrinsic::x86_sse4a_insertqi: {
2829 Value *Op0 =
II.getArgOperand(0);
2830 Value *Op1 =
II.getArgOperand(1);
2835 VWidth1 == 2 &&
"Unexpected operand sizes");
2842 if (CILength && CIIndex) {
2843 APInt Len = CILength->getValue().zextOrTrunc(6);
2852 bool MadeChange =
false;
2853 if (
Value *V = SimplifyDemandedVectorEltsLow(Op0, VWidth0, 1)) {
2857 if (
Value *V = SimplifyDemandedVectorEltsLow(Op1, VWidth1, 1)) {
2867 case Intrinsic::x86_sse41_pblendvb:
2868 case Intrinsic::x86_sse41_blendvps:
2869 case Intrinsic::x86_sse41_blendvpd:
2870 case Intrinsic::x86_avx_blendv_ps_256:
2871 case Intrinsic::x86_avx_blendv_pd_256:
2872 case Intrinsic::x86_avx2_pblendvb: {
2875 Value *Op0 =
II.getArgOperand(0);
2876 Value *Op1 =
II.getArgOperand(1);
2877 Value *Mask =
II.getArgOperand(2);
2900 if (MaskTy->getScalarSizeInBits() == OpTy->getScalarSizeInBits()) {
2912 Value *MaskSrc =
nullptr;
2915 m_Mask(ShuffleMask))))) {
2918 if (NumElts < (
int)ShuffleMask.size() || !
isPowerOf2_32(NumElts) ||
2920 [NumElts](
int M) {
return M < 0 || M >= NumElts; }))
2932 unsigned NumMaskElts = MaskTy->getNumElements();
2933 unsigned NumOperandElts = OpTy->getNumElements();
2937 unsigned NumMaskSrcElts =
2939 NumMaskElts = (ShuffleMask.size() * NumMaskElts) / NumMaskSrcElts;
2941 if (NumMaskElts > NumOperandElts)
2949 assert(MaskTy->getPrimitiveSizeInBits() ==
2950 OpTy->getPrimitiveSizeInBits() &&
2951 "Not expecting mask and operands with different sizes");
2953 if (NumMaskElts == NumOperandElts) {
2959 if (NumMaskElts < NumOperandElts) {
2970 case Intrinsic::x86_ssse3_pshuf_b_128:
2971 case Intrinsic::x86_avx2_pshuf_b:
2972 case Intrinsic::x86_avx512_pshuf_b_512: {
2983 case Intrinsic::x86_avx_vpermilvar_ps:
2984 case Intrinsic::x86_avx_vpermilvar_ps_256:
2985 case Intrinsic::x86_avx512_vpermilvar_ps_512: {
2996 case Intrinsic::x86_avx_vpermilvar_pd:
2997 case Intrinsic::x86_avx_vpermilvar_pd_256:
2998 case Intrinsic::x86_avx512_vpermilvar_pd_512: {
3009 case Intrinsic::x86_avx2_permd:
3010 case Intrinsic::x86_avx2_permps:
3011 case Intrinsic::x86_avx512_permvar_df_256:
3012 case Intrinsic::x86_avx512_permvar_df_512:
3013 case Intrinsic::x86_avx512_permvar_di_256:
3014 case Intrinsic::x86_avx512_permvar_di_512:
3015 case Intrinsic::x86_avx512_permvar_hi_128:
3016 case Intrinsic::x86_avx512_permvar_hi_256:
3017 case Intrinsic::x86_avx512_permvar_hi_512:
3018 case Intrinsic::x86_avx512_permvar_qi_128:
3019 case Intrinsic::x86_avx512_permvar_qi_256:
3020 case Intrinsic::x86_avx512_permvar_qi_512:
3021 case Intrinsic::x86_avx512_permvar_sf_512:
3022 case Intrinsic::x86_avx512_permvar_si_512:
3030 case Intrinsic::x86_avx512_vpermi2var_d_128:
3031 case Intrinsic::x86_avx512_vpermi2var_d_256:
3032 case Intrinsic::x86_avx512_vpermi2var_d_512:
3033 case Intrinsic::x86_avx512_vpermi2var_hi_128:
3034 case Intrinsic::x86_avx512_vpermi2var_hi_256:
3035 case Intrinsic::x86_avx512_vpermi2var_hi_512:
3036 case Intrinsic::x86_avx512_vpermi2var_pd_128:
3037 case Intrinsic::x86_avx512_vpermi2var_pd_256:
3038 case Intrinsic::x86_avx512_vpermi2var_pd_512:
3039 case Intrinsic::x86_avx512_vpermi2var_ps_128:
3040 case Intrinsic::x86_avx512_vpermi2var_ps_256:
3041 case Intrinsic::x86_avx512_vpermi2var_ps_512:
3042 case Intrinsic::x86_avx512_vpermi2var_q_128:
3043 case Intrinsic::x86_avx512_vpermi2var_q_256:
3044 case Intrinsic::x86_avx512_vpermi2var_q_512:
3045 case Intrinsic::x86_avx512_vpermi2var_qi_128:
3046 case Intrinsic::x86_avx512_vpermi2var_qi_256:
3047 case Intrinsic::x86_avx512_vpermi2var_qi_512:
3055 case Intrinsic::x86_avx_maskload_ps:
3056 case Intrinsic::x86_avx_maskload_pd:
3057 case Intrinsic::x86_avx_maskload_ps_256:
3058 case Intrinsic::x86_avx_maskload_pd_256:
3059 case Intrinsic::x86_avx2_maskload_d:
3060 case Intrinsic::x86_avx2_maskload_q:
3061 case Intrinsic::x86_avx2_maskload_d_256:
3062 case Intrinsic::x86_avx2_maskload_q_256:
3068 case Intrinsic::x86_sse2_maskmov_dqu:
3069 case Intrinsic::x86_avx_maskstore_ps:
3070 case Intrinsic::x86_avx_maskstore_pd:
3071 case Intrinsic::x86_avx_maskstore_ps_256:
3072 case Intrinsic::x86_avx_maskstore_pd_256:
3073 case Intrinsic::x86_avx2_maskstore_d:
3074 case Intrinsic::x86_avx2_maskstore_q:
3075 case Intrinsic::x86_avx2_maskstore_d_256:
3076 case Intrinsic::x86_avx2_maskstore_q_256:
3082 case Intrinsic::x86_addcarry_32:
3083 case Intrinsic::x86_addcarry_64:
3089 case Intrinsic::x86_avx512_pternlog_d_128:
3090 case Intrinsic::x86_avx512_pternlog_d_256:
3091 case Intrinsic::x86_avx512_pternlog_d_512:
3092 case Intrinsic::x86_avx512_pternlog_q_128:
3093 case Intrinsic::x86_avx512_pternlog_q_256:
3094 case Intrinsic::x86_avx512_pternlog_q_512:
3102 return std::nullopt;
3149 simplifyAndSetOp)
const {
3151 switch (
II.getIntrinsicID()) {
3154 case Intrinsic::x86_xop_vfrcz_ss:
3155 case Intrinsic::x86_xop_vfrcz_sd:
3160 if (!DemandedElts[0]) {
3167 simplifyAndSetOp(&
II, 0, DemandedElts, UndefElts);
3170 UndefElts = UndefElts[0];
3174 case Intrinsic::x86_sse_rcp_ss:
3175 case Intrinsic::x86_sse_rsqrt_ss:
3176 simplifyAndSetOp(&
II, 0, DemandedElts, UndefElts);
3179 if (!DemandedElts[0]) {
3181 return II.getArgOperand(0);
3190 case Intrinsic::x86_sse_min_ss:
3191 case Intrinsic::x86_sse_max_ss:
3192 case Intrinsic::x86_sse_cmp_ss:
3193 case Intrinsic::x86_sse2_min_sd:
3194 case Intrinsic::x86_sse2_max_sd:
3195 case Intrinsic::x86_sse2_cmp_sd: {
3196 simplifyAndSetOp(&
II, 0, DemandedElts, UndefElts);
3199 if (!DemandedElts[0]) {
3201 return II.getArgOperand(0);
3206 simplifyAndSetOp(&
II, 1, DemandedElts, UndefElts2);
3218 case Intrinsic::x86_sse41_round_ss:
3219 case Intrinsic::x86_sse41_round_sd: {
3221 APInt DemandedElts2 = DemandedElts;
3223 simplifyAndSetOp(&
II, 0, DemandedElts2, UndefElts);
3226 if (!DemandedElts[0]) {
3228 return II.getArgOperand(0);
3233 simplifyAndSetOp(&
II, 1, DemandedElts, UndefElts2);
3238 UndefElts |= UndefElts2[0];
3245 case Intrinsic::x86_avx512_mask_add_ss_round:
3246 case Intrinsic::x86_avx512_mask_div_ss_round:
3247 case Intrinsic::x86_avx512_mask_mul_ss_round:
3248 case Intrinsic::x86_avx512_mask_sub_ss_round:
3249 case Intrinsic::x86_avx512_mask_max_ss_round:
3250 case Intrinsic::x86_avx512_mask_min_ss_round:
3251 case Intrinsic::x86_avx512_mask_add_sd_round:
3252 case Intrinsic::x86_avx512_mask_div_sd_round:
3253 case Intrinsic::x86_avx512_mask_mul_sd_round:
3254 case Intrinsic::x86_avx512_mask_sub_sd_round:
3255 case Intrinsic::x86_avx512_mask_max_sd_round:
3256 case Intrinsic::x86_avx512_mask_min_sd_round:
3257 simplifyAndSetOp(&
II, 0, DemandedElts, UndefElts);
3260 if (!DemandedElts[0]) {
3262 return II.getArgOperand(0);
3267 simplifyAndSetOp(&
II, 1, DemandedElts, UndefElts2);
3268 simplifyAndSetOp(&
II, 2, DemandedElts, UndefElts3);
3272 if (!UndefElts2[0] || !UndefElts3[0])
3277 case Intrinsic::x86_sse3_addsub_pd:
3278 case Intrinsic::x86_sse3_addsub_ps:
3279 case Intrinsic::x86_avx_addsub_pd_256:
3280 case Intrinsic::x86_avx_addsub_ps_256: {
3285 bool IsSubOnly = DemandedElts.
isSubsetOf(SubMask);
3286 bool IsAddOnly = DemandedElts.
isSubsetOf(AddMask);
3287 if (IsSubOnly || IsAddOnly) {
3288 assert((IsSubOnly ^ IsAddOnly) &&
"Can't be both add-only and sub-only");
3291 Value *Arg0 =
II.getArgOperand(0), *Arg1 =
II.getArgOperand(1);
3293 IsSubOnly ? Instruction::FSub : Instruction::FAdd, Arg0, Arg1);
3296 simplifyAndSetOp(&
II, 0, DemandedElts, UndefElts);
3297 simplifyAndSetOp(&
II, 1, DemandedElts, UndefElts2);
3298 UndefElts &= UndefElts2;
3303 case Intrinsic::x86_avx2_psllv_d:
3304 case Intrinsic::x86_avx2_psllv_d_256:
3305 case Intrinsic::x86_avx2_psllv_q:
3306 case Intrinsic::x86_avx2_psllv_q_256:
3307 case Intrinsic::x86_avx2_psrlv_d:
3308 case Intrinsic::x86_avx2_psrlv_d_256:
3309 case Intrinsic::x86_avx2_psrlv_q:
3310 case Intrinsic::x86_avx2_psrlv_q_256:
3311 case Intrinsic::x86_avx2_psrav_d:
3312 case Intrinsic::x86_avx2_psrav_d_256: {
3313 simplifyAndSetOp(&
II, 0, DemandedElts, UndefElts);
3314 simplifyAndSetOp(&
II, 1, DemandedElts, UndefElts2);
3315 UndefElts &= UndefElts2;
3319 case Intrinsic::x86_sse2_pmulh_w:
3320 case Intrinsic::x86_avx2_pmulh_w:
3321 case Intrinsic::x86_avx512_pmulh_w_512:
3322 case Intrinsic::x86_sse2_pmulhu_w:
3323 case Intrinsic::x86_avx2_pmulhu_w:
3324 case Intrinsic::x86_avx512_pmulhu_w_512:
3325 case Intrinsic::x86_ssse3_pmul_hr_sw_128:
3326 case Intrinsic::x86_avx2_pmul_hr_sw:
3327 case Intrinsic::x86_avx512_pmul_hr_sw_512: {
3328 simplifyAndSetOp(&
II, 0, DemandedElts, UndefElts);
3329 simplifyAndSetOp(&
II, 1, DemandedElts, UndefElts2);
3334 case Intrinsic::x86_sse2_packssdw_128:
3335 case Intrinsic::x86_sse2_packsswb_128:
3336 case Intrinsic::x86_sse2_packuswb_128:
3337 case Intrinsic::x86_sse41_packusdw:
3338 case Intrinsic::x86_avx2_packssdw:
3339 case Intrinsic::x86_avx2_packsswb:
3340 case Intrinsic::x86_avx2_packusdw:
3341 case Intrinsic::x86_avx2_packuswb:
3342 case Intrinsic::x86_avx512_packssdw_512:
3343 case Intrinsic::x86_avx512_packsswb_512:
3344 case Intrinsic::x86_avx512_packusdw_512:
3345 case Intrinsic::x86_avx512_packuswb_512: {
3346 auto *Ty0 =
II.getArgOperand(0)->getType();
3348 assert(VWidth == (InnerVWidth * 2) &&
"Unexpected input size");
3350 unsigned NumLanes = Ty0->getPrimitiveSizeInBits() / 128;
3351 unsigned VWidthPerLane = VWidth / NumLanes;
3352 unsigned InnerVWidthPerLane = InnerVWidth / NumLanes;
3358 for (
int OpNum = 0; OpNum != 2; ++OpNum) {
3359 APInt OpDemandedElts(InnerVWidth, 0);
3360 for (
unsigned Lane = 0; Lane != NumLanes; ++Lane) {
3361 unsigned LaneIdx = Lane * VWidthPerLane;
3362 for (
unsigned Elt = 0; Elt != InnerVWidthPerLane; ++Elt) {
3363 unsigned Idx = LaneIdx + Elt + InnerVWidthPerLane * OpNum;
3364 if (DemandedElts[Idx])
3365 OpDemandedElts.
setBit((Lane * InnerVWidthPerLane) + Elt);
3370 APInt OpUndefElts(InnerVWidth, 0);
3371 simplifyAndSetOp(&
II, OpNum, OpDemandedElts, OpUndefElts);
3374 OpUndefElts = OpUndefElts.
zext(VWidth);
3375 for (
unsigned Lane = 0; Lane != NumLanes; ++Lane) {
3376 APInt LaneElts = OpUndefElts.
lshr(InnerVWidthPerLane * Lane);
3377 LaneElts = LaneElts.
getLoBits(InnerVWidthPerLane);
3378 LaneElts <<= InnerVWidthPerLane * (2 * Lane + OpNum);
3379 UndefElts |= LaneElts;
3385 case Intrinsic::x86_sse2_pmadd_wd:
3386 case Intrinsic::x86_avx2_pmadd_wd:
3387 case Intrinsic::x86_avx512_pmaddw_d_512:
3388 case Intrinsic::x86_ssse3_pmadd_ub_sw_128:
3389 case Intrinsic::x86_avx2_pmadd_ub_sw:
3390 case Intrinsic::x86_avx512_pmaddubs_w_512: {
3392 auto *ArgTy =
II.getArgOperand(0)->getType();
3394 assert((VWidth * 2) == InnerVWidth &&
"Unexpected input size");
3396 APInt Op0UndefElts(InnerVWidth, 0);
3397 APInt Op1UndefElts(InnerVWidth, 0);
3398 simplifyAndSetOp(&
II, 0, OpDemandedElts, Op0UndefElts);
3399 simplifyAndSetOp(&
II, 1, OpDemandedElts, Op1UndefElts);
3405 case Intrinsic::x86_ssse3_pshuf_b_128:
3406 case Intrinsic::x86_avx2_pshuf_b:
3407 case Intrinsic::x86_avx512_pshuf_b_512:
3409 case Intrinsic::x86_avx_vpermilvar_ps:
3410 case Intrinsic::x86_avx_vpermilvar_ps_256:
3411 case Intrinsic::x86_avx512_vpermilvar_ps_512:
3412 case Intrinsic::x86_avx_vpermilvar_pd:
3413 case Intrinsic::x86_avx_vpermilvar_pd_256:
3414 case Intrinsic::x86_avx512_vpermilvar_pd_512:
3416 case Intrinsic::x86_avx2_permd:
3417 case Intrinsic::x86_avx2_permps: {
3418 simplifyAndSetOp(&
II, 1, DemandedElts, UndefElts);
3424 case Intrinsic::x86_sse4a_extrq:
3425 case Intrinsic::x86_sse4a_extrqi:
3426 case Intrinsic::x86_sse4a_insertq:
3427 case Intrinsic::x86_sse4a_insertqi:
3431 return std::nullopt;