1 //===-- X86ISelLowering.cpp - X86 DAG Lowering Implementation -------------===//
3 // The LLVM Compiler Infrastructure
5 // This file is distributed under the University of Illinois Open Source
6 // License. See LICENSE.TXT for details.
8 //===----------------------------------------------------------------------===//
10 // This file defines the interfaces that X86 uses to lower LLVM code into a
13 //===----------------------------------------------------------------------===//
15 #include "X86ISelLowering.h"
16 #include "Utils/X86ShuffleDecode.h"
17 #include "X86CallingConv.h"
18 #include "X86FrameLowering.h"
19 #include "X86InstrBuilder.h"
20 #include "X86MachineFunctionInfo.h"
21 #include "X86TargetMachine.h"
22 #include "X86TargetObjectFile.h"
23 #include "llvm/ADT/SmallBitVector.h"
24 #include "llvm/ADT/SmallSet.h"
25 #include "llvm/ADT/Statistic.h"
26 #include "llvm/ADT/StringExtras.h"
27 #include "llvm/ADT/StringSwitch.h"
28 #include "llvm/CodeGen/IntrinsicLowering.h"
29 #include "llvm/CodeGen/MachineFrameInfo.h"
30 #include "llvm/CodeGen/MachineFunction.h"
31 #include "llvm/CodeGen/MachineInstrBuilder.h"
32 #include "llvm/CodeGen/MachineJumpTableInfo.h"
33 #include "llvm/CodeGen/MachineModuleInfo.h"
34 #include "llvm/CodeGen/MachineRegisterInfo.h"
35 #include "llvm/CodeGen/WinEHFuncInfo.h"
36 #include "llvm/IR/CallSite.h"
37 #include "llvm/IR/CallingConv.h"
38 #include "llvm/IR/Constants.h"
39 #include "llvm/IR/DerivedTypes.h"
40 #include "llvm/IR/Function.h"
41 #include "llvm/IR/GlobalAlias.h"
42 #include "llvm/IR/GlobalVariable.h"
43 #include "llvm/IR/Instructions.h"
44 #include "llvm/IR/Intrinsics.h"
45 #include "llvm/MC/MCAsmInfo.h"
46 #include "llvm/MC/MCContext.h"
47 #include "llvm/MC/MCExpr.h"
48 #include "llvm/MC/MCSymbol.h"
49 #include "llvm/Support/CommandLine.h"
50 #include "llvm/Support/Debug.h"
51 #include "llvm/Support/ErrorHandling.h"
52 #include "llvm/Support/MathExtras.h"
53 #include "llvm/Target/TargetOptions.h"
54 #include "X86IntrinsicsInfo.h"
60 #define DEBUG_TYPE "x86-isel"
62 STATISTIC(NumTailCalls, "Number of tail calls");
64 static cl::opt<bool> ExperimentalVectorWideningLegalization(
65 "x86-experimental-vector-widening-legalization", cl::init(false),
66 cl::desc("Enable an experimental vector type legalization through widening "
67 "rather than promotion."),
70 X86TargetLowering::X86TargetLowering(const X86TargetMachine &TM,
71 const X86Subtarget &STI)
72 : TargetLowering(TM), Subtarget(&STI) {
73 X86ScalarSSEf64 = Subtarget->hasSSE2();
74 X86ScalarSSEf32 = Subtarget->hasSSE1();
75 MVT PtrVT = MVT::getIntegerVT(8 * TM.getPointerSize());
77 // Set up the TargetLowering object.
78 static const MVT IntVTs[] = { MVT::i8, MVT::i16, MVT::i32, MVT::i64 };
80 // X86 is weird. It always uses i8 for shift amounts and setcc results.
81 setBooleanContents(ZeroOrOneBooleanContent);
82 // X86-SSE is even stranger. It uses -1 or 0 for vector masks.
83 setBooleanVectorContents(ZeroOrNegativeOneBooleanContent);
85 // For 64-bit, since we have so many registers, use the ILP scheduler.
86 // For 32-bit, use the register pressure specific scheduling.
87 // For Atom, always use ILP scheduling.
88 if (Subtarget->isAtom())
89 setSchedulingPreference(Sched::ILP);
90 else if (Subtarget->is64Bit())
91 setSchedulingPreference(Sched::ILP);
93 setSchedulingPreference(Sched::RegPressure);
94 const X86RegisterInfo *RegInfo = Subtarget->getRegisterInfo();
95 setStackPointerRegisterToSaveRestore(RegInfo->getStackRegister());
97 // Bypass expensive divides on Atom when compiling with O2.
98 if (TM.getOptLevel() >= CodeGenOpt::Default) {
99 if (Subtarget->hasSlowDivide32())
100 addBypassSlowDiv(32, 8);
101 if (Subtarget->hasSlowDivide64() && Subtarget->is64Bit())
102 addBypassSlowDiv(64, 16);
105 if (Subtarget->isTargetKnownWindowsMSVC()) {
106 // Setup Windows compiler runtime calls.
107 setLibcallName(RTLIB::SDIV_I64, "_alldiv");
108 setLibcallName(RTLIB::UDIV_I64, "_aulldiv");
109 setLibcallName(RTLIB::SREM_I64, "_allrem");
110 setLibcallName(RTLIB::UREM_I64, "_aullrem");
111 setLibcallName(RTLIB::MUL_I64, "_allmul");
112 setLibcallCallingConv(RTLIB::SDIV_I64, CallingConv::X86_StdCall);
113 setLibcallCallingConv(RTLIB::UDIV_I64, CallingConv::X86_StdCall);
114 setLibcallCallingConv(RTLIB::SREM_I64, CallingConv::X86_StdCall);
115 setLibcallCallingConv(RTLIB::UREM_I64, CallingConv::X86_StdCall);
116 setLibcallCallingConv(RTLIB::MUL_I64, CallingConv::X86_StdCall);
119 if (Subtarget->isTargetDarwin()) {
120 // Darwin should use _setjmp/_longjmp instead of setjmp/longjmp.
121 setUseUnderscoreSetJmp(false);
122 setUseUnderscoreLongJmp(false);
123 } else if (Subtarget->isTargetWindowsGNU()) {
124 // MS runtime is weird: it exports _setjmp, but longjmp!
125 setUseUnderscoreSetJmp(true);
126 setUseUnderscoreLongJmp(false);
128 setUseUnderscoreSetJmp(true);
129 setUseUnderscoreLongJmp(true);
132 // Set up the register classes.
133 addRegisterClass(MVT::i8, &X86::GR8RegClass);
134 addRegisterClass(MVT::i16, &X86::GR16RegClass);
135 addRegisterClass(MVT::i32, &X86::GR32RegClass);
136 if (Subtarget->is64Bit())
137 addRegisterClass(MVT::i64, &X86::GR64RegClass);
139 for (MVT VT : MVT::integer_valuetypes())
140 setLoadExtAction(ISD::SEXTLOAD, VT, MVT::i1, Promote);
142 // We don't accept any truncstore of integer registers.
143 setTruncStoreAction(MVT::i64, MVT::i32, Expand);
144 setTruncStoreAction(MVT::i64, MVT::i16, Expand);
145 setTruncStoreAction(MVT::i64, MVT::i8 , Expand);
146 setTruncStoreAction(MVT::i32, MVT::i16, Expand);
147 setTruncStoreAction(MVT::i32, MVT::i8 , Expand);
148 setTruncStoreAction(MVT::i16, MVT::i8, Expand);
150 setTruncStoreAction(MVT::f64, MVT::f32, Expand);
152 // SETOEQ and SETUNE require checking two conditions.
153 setCondCodeAction(ISD::SETOEQ, MVT::f32, Expand);
154 setCondCodeAction(ISD::SETOEQ, MVT::f64, Expand);
155 setCondCodeAction(ISD::SETOEQ, MVT::f80, Expand);
156 setCondCodeAction(ISD::SETUNE, MVT::f32, Expand);
157 setCondCodeAction(ISD::SETUNE, MVT::f64, Expand);
158 setCondCodeAction(ISD::SETUNE, MVT::f80, Expand);
160 // Promote all UINT_TO_FP to larger SINT_TO_FP's, as X86 doesn't have this
162 setOperationAction(ISD::UINT_TO_FP , MVT::i1 , Promote);
163 setOperationAction(ISD::UINT_TO_FP , MVT::i8 , Promote);
164 setOperationAction(ISD::UINT_TO_FP , MVT::i16 , Promote);
166 if (Subtarget->is64Bit()) {
167 if (!Subtarget->useSoftFloat() && Subtarget->hasAVX512())
168 // f32/f64 are legal, f80 is custom.
169 setOperationAction(ISD::UINT_TO_FP , MVT::i32 , Custom);
171 setOperationAction(ISD::UINT_TO_FP , MVT::i32 , Promote);
172 setOperationAction(ISD::UINT_TO_FP , MVT::i64 , Custom);
173 } else if (!Subtarget->useSoftFloat()) {
174 // We have an algorithm for SSE2->double, and we turn this into a
175 // 64-bit FILD followed by conditional FADD for other targets.
176 setOperationAction(ISD::UINT_TO_FP , MVT::i64 , Custom);
177 // We have an algorithm for SSE2, and we turn this into a 64-bit
178 // FILD or VCVTUSI2SS/SD for other targets.
179 setOperationAction(ISD::UINT_TO_FP , MVT::i32 , Custom);
182 // Promote i1/i8 SINT_TO_FP to larger SINT_TO_FP's, as X86 doesn't have
184 setOperationAction(ISD::SINT_TO_FP , MVT::i1 , Promote);
185 setOperationAction(ISD::SINT_TO_FP , MVT::i8 , Promote);
187 if (!Subtarget->useSoftFloat()) {
188 // SSE has no i16 to fp conversion, only i32
189 if (X86ScalarSSEf32) {
190 setOperationAction(ISD::SINT_TO_FP , MVT::i16 , Promote);
191 // f32 and f64 cases are Legal, f80 case is not
192 setOperationAction(ISD::SINT_TO_FP , MVT::i32 , Custom);
194 setOperationAction(ISD::SINT_TO_FP , MVT::i16 , Custom);
195 setOperationAction(ISD::SINT_TO_FP , MVT::i32 , Custom);
198 setOperationAction(ISD::SINT_TO_FP , MVT::i16 , Promote);
199 setOperationAction(ISD::SINT_TO_FP , MVT::i32 , Promote);
202 // In 32-bit mode these are custom lowered. In 64-bit mode F32 and F64
203 // are Legal, f80 is custom lowered.
204 setOperationAction(ISD::FP_TO_SINT , MVT::i64 , Custom);
205 setOperationAction(ISD::SINT_TO_FP , MVT::i64 , Custom);
207 // Promote i1/i8 FP_TO_SINT to larger FP_TO_SINTS's, as X86 doesn't have
209 setOperationAction(ISD::FP_TO_SINT , MVT::i1 , Promote);
210 setOperationAction(ISD::FP_TO_SINT , MVT::i8 , Promote);
212 if (X86ScalarSSEf32) {
213 setOperationAction(ISD::FP_TO_SINT , MVT::i16 , Promote);
214 // f32 and f64 cases are Legal, f80 case is not
215 setOperationAction(ISD::FP_TO_SINT , MVT::i32 , Custom);
217 setOperationAction(ISD::FP_TO_SINT , MVT::i16 , Custom);
218 setOperationAction(ISD::FP_TO_SINT , MVT::i32 , Custom);
221 // Handle FP_TO_UINT by promoting the destination to a larger signed
223 setOperationAction(ISD::FP_TO_UINT , MVT::i1 , Promote);
224 setOperationAction(ISD::FP_TO_UINT , MVT::i8 , Promote);
225 setOperationAction(ISD::FP_TO_UINT , MVT::i16 , Promote);
227 if (Subtarget->is64Bit()) {
228 if (!Subtarget->useSoftFloat() && Subtarget->hasAVX512()) {
229 // FP_TO_UINT-i32/i64 is legal for f32/f64, but custom for f80.
230 setOperationAction(ISD::FP_TO_UINT , MVT::i32 , Custom);
231 setOperationAction(ISD::FP_TO_UINT , MVT::i64 , Custom);
233 setOperationAction(ISD::FP_TO_UINT , MVT::i32 , Promote);
234 setOperationAction(ISD::FP_TO_UINT , MVT::i64 , Expand);
236 } else if (!Subtarget->useSoftFloat()) {
237 // Since AVX is a superset of SSE3, only check for SSE here.
238 if (Subtarget->hasSSE1() && !Subtarget->hasSSE3())
239 // Expand FP_TO_UINT into a select.
240 // FIXME: We would like to use a Custom expander here eventually to do
241 // the optimal thing for SSE vs. the default expansion in the legalizer.
242 setOperationAction(ISD::FP_TO_UINT , MVT::i32 , Expand);
244 // With AVX512 we can use vcvts[ds]2usi for f32/f64->i32, f80 is custom.
245 // With SSE3 we can use fisttpll to convert to a signed i64; without
246 // SSE, we're stuck with a fistpll.
247 setOperationAction(ISD::FP_TO_UINT , MVT::i32 , Custom);
249 setOperationAction(ISD::FP_TO_UINT , MVT::i64 , Custom);
252 // TODO: when we have SSE, these could be more efficient, by using movd/movq.
253 if (!X86ScalarSSEf64) {
254 setOperationAction(ISD::BITCAST , MVT::f32 , Expand);
255 setOperationAction(ISD::BITCAST , MVT::i32 , Expand);
256 if (Subtarget->is64Bit()) {
257 setOperationAction(ISD::BITCAST , MVT::f64 , Expand);
258 // Without SSE, i64->f64 goes through memory.
259 setOperationAction(ISD::BITCAST , MVT::i64 , Expand);
263 // Scalar integer divide and remainder are lowered to use operations that
264 // produce two results, to match the available instructions. This exposes
265 // the two-result form to trivial CSE, which is able to combine x/y and x%y
266 // into a single instruction.
268 // Scalar integer multiply-high is also lowered to use two-result
269 // operations, to match the available instructions. However, plain multiply
270 // (low) operations are left as Legal, as there are single-result
271 // instructions for this in x86. Using the two-result multiply instructions
272 // when both high and low results are needed must be arranged by dagcombine.
273 for (unsigned i = 0; i != array_lengthof(IntVTs); ++i) {
275 setOperationAction(ISD::MULHS, VT, Expand);
276 setOperationAction(ISD::MULHU, VT, Expand);
277 setOperationAction(ISD::SDIV, VT, Expand);
278 setOperationAction(ISD::UDIV, VT, Expand);
279 setOperationAction(ISD::SREM, VT, Expand);
280 setOperationAction(ISD::UREM, VT, Expand);
282 // Add/Sub overflow ops with MVT::Glues are lowered to EFLAGS dependences.
283 setOperationAction(ISD::ADDC, VT, Custom);
284 setOperationAction(ISD::ADDE, VT, Custom);
285 setOperationAction(ISD::SUBC, VT, Custom);
286 setOperationAction(ISD::SUBE, VT, Custom);
289 setOperationAction(ISD::BR_JT , MVT::Other, Expand);
290 setOperationAction(ISD::BRCOND , MVT::Other, Custom);
291 setOperationAction(ISD::BR_CC , MVT::f32, Expand);
292 setOperationAction(ISD::BR_CC , MVT::f64, Expand);
293 setOperationAction(ISD::BR_CC , MVT::f80, Expand);
294 setOperationAction(ISD::BR_CC , MVT::i8, Expand);
295 setOperationAction(ISD::BR_CC , MVT::i16, Expand);
296 setOperationAction(ISD::BR_CC , MVT::i32, Expand);
297 setOperationAction(ISD::BR_CC , MVT::i64, Expand);
298 setOperationAction(ISD::SELECT_CC , MVT::f32, Expand);
299 setOperationAction(ISD::SELECT_CC , MVT::f64, Expand);
300 setOperationAction(ISD::SELECT_CC , MVT::f80, Expand);
301 setOperationAction(ISD::SELECT_CC , MVT::i8, Expand);
302 setOperationAction(ISD::SELECT_CC , MVT::i16, Expand);
303 setOperationAction(ISD::SELECT_CC , MVT::i32, Expand);
304 setOperationAction(ISD::SELECT_CC , MVT::i64, Expand);
305 if (Subtarget->is64Bit())
306 setOperationAction(ISD::SIGN_EXTEND_INREG, MVT::i32, Legal);
307 setOperationAction(ISD::SIGN_EXTEND_INREG, MVT::i16 , Legal);
308 setOperationAction(ISD::SIGN_EXTEND_INREG, MVT::i8 , Legal);
309 setOperationAction(ISD::SIGN_EXTEND_INREG, MVT::i1 , Expand);
310 setOperationAction(ISD::FP_ROUND_INREG , MVT::f32 , Expand);
312 if (Subtarget->is32Bit() && Subtarget->isTargetKnownWindowsMSVC()) {
313 // On 32 bit MSVC, `fmodf(f32)` is not defined - only `fmod(f64)`
314 // is. We should promote the value to 64-bits to solve this.
315 // This is what the CRT headers do - `fmodf` is an inline header
316 // function casting to f64 and calling `fmod`.
317 setOperationAction(ISD::FREM , MVT::f32 , Promote);
319 setOperationAction(ISD::FREM , MVT::f32 , Expand);
322 setOperationAction(ISD::FREM , MVT::f64 , Expand);
323 setOperationAction(ISD::FREM , MVT::f80 , Expand);
324 setOperationAction(ISD::FLT_ROUNDS_ , MVT::i32 , Custom);
326 // Promote the i8 variants and force them on up to i32 which has a shorter
328 setOperationAction(ISD::CTTZ , MVT::i8 , Promote);
329 AddPromotedToType (ISD::CTTZ , MVT::i8 , MVT::i32);
330 setOperationAction(ISD::CTTZ_ZERO_UNDEF , MVT::i8 , Promote);
331 AddPromotedToType (ISD::CTTZ_ZERO_UNDEF , MVT::i8 , MVT::i32);
332 if (Subtarget->hasBMI()) {
333 setOperationAction(ISD::CTTZ_ZERO_UNDEF, MVT::i16 , Expand);
334 setOperationAction(ISD::CTTZ_ZERO_UNDEF, MVT::i32 , Expand);
335 if (Subtarget->is64Bit())
336 setOperationAction(ISD::CTTZ_ZERO_UNDEF, MVT::i64, Expand);
338 setOperationAction(ISD::CTTZ , MVT::i16 , Custom);
339 setOperationAction(ISD::CTTZ , MVT::i32 , Custom);
340 if (Subtarget->is64Bit())
341 setOperationAction(ISD::CTTZ , MVT::i64 , Custom);
344 if (Subtarget->hasLZCNT()) {
345 // When promoting the i8 variants, force them to i32 for a shorter
347 setOperationAction(ISD::CTLZ , MVT::i8 , Promote);
348 AddPromotedToType (ISD::CTLZ , MVT::i8 , MVT::i32);
349 setOperationAction(ISD::CTLZ_ZERO_UNDEF, MVT::i8 , Promote);
350 AddPromotedToType (ISD::CTLZ_ZERO_UNDEF, MVT::i8 , MVT::i32);
351 setOperationAction(ISD::CTLZ_ZERO_UNDEF, MVT::i16 , Expand);
352 setOperationAction(ISD::CTLZ_ZERO_UNDEF, MVT::i32 , Expand);
353 if (Subtarget->is64Bit())
354 setOperationAction(ISD::CTLZ_ZERO_UNDEF, MVT::i64, Expand);
356 setOperationAction(ISD::CTLZ , MVT::i8 , Custom);
357 setOperationAction(ISD::CTLZ , MVT::i16 , Custom);
358 setOperationAction(ISD::CTLZ , MVT::i32 , Custom);
359 setOperationAction(ISD::CTLZ_ZERO_UNDEF, MVT::i8 , Custom);
360 setOperationAction(ISD::CTLZ_ZERO_UNDEF, MVT::i16 , Custom);
361 setOperationAction(ISD::CTLZ_ZERO_UNDEF, MVT::i32 , Custom);
362 if (Subtarget->is64Bit()) {
363 setOperationAction(ISD::CTLZ , MVT::i64 , Custom);
364 setOperationAction(ISD::CTLZ_ZERO_UNDEF, MVT::i64, Custom);
368 // Special handling for half-precision floating point conversions.
369 // If we don't have F16C support, then lower half float conversions
370 // into library calls.
371 if (Subtarget->useSoftFloat() || !Subtarget->hasF16C()) {
372 setOperationAction(ISD::FP16_TO_FP, MVT::f32, Expand);
373 setOperationAction(ISD::FP_TO_FP16, MVT::f32, Expand);
376 // There's never any support for operations beyond MVT::f32.
377 setOperationAction(ISD::FP16_TO_FP, MVT::f64, Expand);
378 setOperationAction(ISD::FP16_TO_FP, MVT::f80, Expand);
379 setOperationAction(ISD::FP_TO_FP16, MVT::f64, Expand);
380 setOperationAction(ISD::FP_TO_FP16, MVT::f80, Expand);
382 setLoadExtAction(ISD::EXTLOAD, MVT::f32, MVT::f16, Expand);
383 setLoadExtAction(ISD::EXTLOAD, MVT::f64, MVT::f16, Expand);
384 setLoadExtAction(ISD::EXTLOAD, MVT::f80, MVT::f16, Expand);
385 setTruncStoreAction(MVT::f32, MVT::f16, Expand);
386 setTruncStoreAction(MVT::f64, MVT::f16, Expand);
387 setTruncStoreAction(MVT::f80, MVT::f16, Expand);
389 if (Subtarget->hasPOPCNT()) {
390 setOperationAction(ISD::CTPOP , MVT::i8 , Promote);
392 setOperationAction(ISD::CTPOP , MVT::i8 , Expand);
393 setOperationAction(ISD::CTPOP , MVT::i16 , Expand);
394 setOperationAction(ISD::CTPOP , MVT::i32 , Expand);
395 if (Subtarget->is64Bit())
396 setOperationAction(ISD::CTPOP , MVT::i64 , Expand);
399 setOperationAction(ISD::READCYCLECOUNTER , MVT::i64 , Custom);
401 if (!Subtarget->hasMOVBE())
402 setOperationAction(ISD::BSWAP , MVT::i16 , Expand);
404 // These should be promoted to a larger select which is supported.
405 setOperationAction(ISD::SELECT , MVT::i1 , Promote);
406 // X86 wants to expand cmov itself.
407 setOperationAction(ISD::SELECT , MVT::i8 , Custom);
408 setOperationAction(ISD::SELECT , MVT::i16 , Custom);
409 setOperationAction(ISD::SELECT , MVT::i32 , Custom);
410 setOperationAction(ISD::SELECT , MVT::f32 , Custom);
411 setOperationAction(ISD::SELECT , MVT::f64 , Custom);
412 setOperationAction(ISD::SELECT , MVT::f80 , Custom);
413 setOperationAction(ISD::SETCC , MVT::i8 , Custom);
414 setOperationAction(ISD::SETCC , MVT::i16 , Custom);
415 setOperationAction(ISD::SETCC , MVT::i32 , Custom);
416 setOperationAction(ISD::SETCC , MVT::f32 , Custom);
417 setOperationAction(ISD::SETCC , MVT::f64 , Custom);
418 setOperationAction(ISD::SETCC , MVT::f80 , Custom);
419 if (Subtarget->is64Bit()) {
420 setOperationAction(ISD::SELECT , MVT::i64 , Custom);
421 setOperationAction(ISD::SETCC , MVT::i64 , Custom);
423 setOperationAction(ISD::EH_RETURN , MVT::Other, Custom);
424 // NOTE: EH_SJLJ_SETJMP/_LONGJMP supported here is NOT intended to support
425 // SjLj exception handling but a light-weight setjmp/longjmp replacement to
426 // support continuation, user-level threading, and etc.. As a result, no
427 // other SjLj exception interfaces are implemented and please don't build
428 // your own exception handling based on them.
429 // LLVM/Clang supports zero-cost DWARF exception handling.
430 setOperationAction(ISD::EH_SJLJ_SETJMP, MVT::i32, Custom);
431 setOperationAction(ISD::EH_SJLJ_LONGJMP, MVT::Other, Custom);
434 setOperationAction(ISD::ConstantPool , MVT::i32 , Custom);
435 setOperationAction(ISD::JumpTable , MVT::i32 , Custom);
436 setOperationAction(ISD::GlobalAddress , MVT::i32 , Custom);
437 setOperationAction(ISD::GlobalTLSAddress, MVT::i32 , Custom);
438 if (Subtarget->is64Bit())
439 setOperationAction(ISD::GlobalTLSAddress, MVT::i64, Custom);
440 setOperationAction(ISD::ExternalSymbol , MVT::i32 , Custom);
441 setOperationAction(ISD::BlockAddress , MVT::i32 , Custom);
442 if (Subtarget->is64Bit()) {
443 setOperationAction(ISD::ConstantPool , MVT::i64 , Custom);
444 setOperationAction(ISD::JumpTable , MVT::i64 , Custom);
445 setOperationAction(ISD::GlobalAddress , MVT::i64 , Custom);
446 setOperationAction(ISD::ExternalSymbol, MVT::i64 , Custom);
447 setOperationAction(ISD::BlockAddress , MVT::i64 , Custom);
449 // 64-bit addm sub, shl, sra, srl (iff 32-bit x86)
450 setOperationAction(ISD::SHL_PARTS , MVT::i32 , Custom);
451 setOperationAction(ISD::SRA_PARTS , MVT::i32 , Custom);
452 setOperationAction(ISD::SRL_PARTS , MVT::i32 , Custom);
453 if (Subtarget->is64Bit()) {
454 setOperationAction(ISD::SHL_PARTS , MVT::i64 , Custom);
455 setOperationAction(ISD::SRA_PARTS , MVT::i64 , Custom);
456 setOperationAction(ISD::SRL_PARTS , MVT::i64 , Custom);
459 if (Subtarget->hasSSE1())
460 setOperationAction(ISD::PREFETCH , MVT::Other, Legal);
462 setOperationAction(ISD::ATOMIC_FENCE , MVT::Other, Custom);
464 // Expand certain atomics
465 for (unsigned i = 0; i != array_lengthof(IntVTs); ++i) {
467 setOperationAction(ISD::ATOMIC_CMP_SWAP_WITH_SUCCESS, VT, Custom);
468 setOperationAction(ISD::ATOMIC_LOAD_SUB, VT, Custom);
469 setOperationAction(ISD::ATOMIC_STORE, VT, Custom);
472 if (Subtarget->hasCmpxchg16b()) {
473 setOperationAction(ISD::ATOMIC_CMP_SWAP_WITH_SUCCESS, MVT::i128, Custom);
476 // FIXME - use subtarget debug flags
477 if (!Subtarget->isTargetDarwin() && !Subtarget->isTargetELF() &&
478 !Subtarget->isTargetCygMing() && !Subtarget->isTargetWin64()) {
479 setOperationAction(ISD::EH_LABEL, MVT::Other, Expand);
482 if (Subtarget->isTarget64BitLP64()) {
483 setExceptionPointerRegister(X86::RAX);
484 setExceptionSelectorRegister(X86::RDX);
486 setExceptionPointerRegister(X86::EAX);
487 setExceptionSelectorRegister(X86::EDX);
489 setOperationAction(ISD::FRAME_TO_ARGS_OFFSET, MVT::i32, Custom);
490 setOperationAction(ISD::FRAME_TO_ARGS_OFFSET, MVT::i64, Custom);
492 setOperationAction(ISD::INIT_TRAMPOLINE, MVT::Other, Custom);
493 setOperationAction(ISD::ADJUST_TRAMPOLINE, MVT::Other, Custom);
495 setOperationAction(ISD::TRAP, MVT::Other, Legal);
496 setOperationAction(ISD::DEBUGTRAP, MVT::Other, Legal);
498 // VASTART needs to be custom lowered to use the VarArgsFrameIndex
499 setOperationAction(ISD::VASTART , MVT::Other, Custom);
500 setOperationAction(ISD::VAEND , MVT::Other, Expand);
501 if (Subtarget->is64Bit()) {
502 setOperationAction(ISD::VAARG , MVT::Other, Custom);
503 setOperationAction(ISD::VACOPY , MVT::Other, Custom);
505 // TargetInfo::CharPtrBuiltinVaList
506 setOperationAction(ISD::VAARG , MVT::Other, Expand);
507 setOperationAction(ISD::VACOPY , MVT::Other, Expand);
510 setOperationAction(ISD::STACKSAVE, MVT::Other, Expand);
511 setOperationAction(ISD::STACKRESTORE, MVT::Other, Expand);
513 setOperationAction(ISD::DYNAMIC_STACKALLOC, PtrVT, Custom);
515 // GC_TRANSITION_START and GC_TRANSITION_END need custom lowering.
516 setOperationAction(ISD::GC_TRANSITION_START, MVT::Other, Custom);
517 setOperationAction(ISD::GC_TRANSITION_END, MVT::Other, Custom);
519 if (!Subtarget->useSoftFloat() && X86ScalarSSEf64) {
520 // f32 and f64 use SSE.
521 // Set up the FP register classes.
522 addRegisterClass(MVT::f32, &X86::FR32RegClass);
523 addRegisterClass(MVT::f64, &X86::FR64RegClass);
525 // Use ANDPD to simulate FABS.
526 setOperationAction(ISD::FABS , MVT::f64, Custom);
527 setOperationAction(ISD::FABS , MVT::f32, Custom);
529 // Use XORP to simulate FNEG.
530 setOperationAction(ISD::FNEG , MVT::f64, Custom);
531 setOperationAction(ISD::FNEG , MVT::f32, Custom);
533 // Use ANDPD and ORPD to simulate FCOPYSIGN.
534 setOperationAction(ISD::FCOPYSIGN, MVT::f64, Custom);
535 setOperationAction(ISD::FCOPYSIGN, MVT::f32, Custom);
537 // Lower this to FGETSIGNx86 plus an AND.
538 setOperationAction(ISD::FGETSIGN, MVT::i64, Custom);
539 setOperationAction(ISD::FGETSIGN, MVT::i32, Custom);
541 // We don't support sin/cos/fmod
542 setOperationAction(ISD::FSIN , MVT::f64, Expand);
543 setOperationAction(ISD::FCOS , MVT::f64, Expand);
544 setOperationAction(ISD::FSINCOS, MVT::f64, Expand);
545 setOperationAction(ISD::FSIN , MVT::f32, Expand);
546 setOperationAction(ISD::FCOS , MVT::f32, Expand);
547 setOperationAction(ISD::FSINCOS, MVT::f32, Expand);
549 // Expand FP immediates into loads from the stack, except for the special
551 addLegalFPImmediate(APFloat(+0.0)); // xorpd
552 addLegalFPImmediate(APFloat(+0.0f)); // xorps
553 } else if (!Subtarget->useSoftFloat() && X86ScalarSSEf32) {
554 // Use SSE for f32, x87 for f64.
555 // Set up the FP register classes.
556 addRegisterClass(MVT::f32, &X86::FR32RegClass);
557 addRegisterClass(MVT::f64, &X86::RFP64RegClass);
559 // Use ANDPS to simulate FABS.
560 setOperationAction(ISD::FABS , MVT::f32, Custom);
562 // Use XORP to simulate FNEG.
563 setOperationAction(ISD::FNEG , MVT::f32, Custom);
565 setOperationAction(ISD::UNDEF, MVT::f64, Expand);
567 // Use ANDPS and ORPS to simulate FCOPYSIGN.
568 setOperationAction(ISD::FCOPYSIGN, MVT::f64, Expand);
569 setOperationAction(ISD::FCOPYSIGN, MVT::f32, Custom);
571 // We don't support sin/cos/fmod
572 setOperationAction(ISD::FSIN , MVT::f32, Expand);
573 setOperationAction(ISD::FCOS , MVT::f32, Expand);
574 setOperationAction(ISD::FSINCOS, MVT::f32, Expand);
576 // Special cases we handle for FP constants.
577 addLegalFPImmediate(APFloat(+0.0f)); // xorps
578 addLegalFPImmediate(APFloat(+0.0)); // FLD0
579 addLegalFPImmediate(APFloat(+1.0)); // FLD1
580 addLegalFPImmediate(APFloat(-0.0)); // FLD0/FCHS
581 addLegalFPImmediate(APFloat(-1.0)); // FLD1/FCHS
583 if (!TM.Options.UnsafeFPMath) {
584 setOperationAction(ISD::FSIN , MVT::f64, Expand);
585 setOperationAction(ISD::FCOS , MVT::f64, Expand);
586 setOperationAction(ISD::FSINCOS, MVT::f64, Expand);
588 } else if (!Subtarget->useSoftFloat()) {
589 // f32 and f64 in x87.
590 // Set up the FP register classes.
591 addRegisterClass(MVT::f64, &X86::RFP64RegClass);
592 addRegisterClass(MVT::f32, &X86::RFP32RegClass);
594 setOperationAction(ISD::UNDEF, MVT::f64, Expand);
595 setOperationAction(ISD::UNDEF, MVT::f32, Expand);
596 setOperationAction(ISD::FCOPYSIGN, MVT::f64, Expand);
597 setOperationAction(ISD::FCOPYSIGN, MVT::f32, Expand);
599 if (!TM.Options.UnsafeFPMath) {
600 setOperationAction(ISD::FSIN , MVT::f64, Expand);
601 setOperationAction(ISD::FSIN , MVT::f32, Expand);
602 setOperationAction(ISD::FCOS , MVT::f64, Expand);
603 setOperationAction(ISD::FCOS , MVT::f32, Expand);
604 setOperationAction(ISD::FSINCOS, MVT::f64, Expand);
605 setOperationAction(ISD::FSINCOS, MVT::f32, Expand);
607 addLegalFPImmediate(APFloat(+0.0)); // FLD0
608 addLegalFPImmediate(APFloat(+1.0)); // FLD1
609 addLegalFPImmediate(APFloat(-0.0)); // FLD0/FCHS
610 addLegalFPImmediate(APFloat(-1.0)); // FLD1/FCHS
611 addLegalFPImmediate(APFloat(+0.0f)); // FLD0
612 addLegalFPImmediate(APFloat(+1.0f)); // FLD1
613 addLegalFPImmediate(APFloat(-0.0f)); // FLD0/FCHS
614 addLegalFPImmediate(APFloat(-1.0f)); // FLD1/FCHS
617 // We don't support FMA.
618 setOperationAction(ISD::FMA, MVT::f64, Expand);
619 setOperationAction(ISD::FMA, MVT::f32, Expand);
621 // Long double always uses X87.
622 if (!Subtarget->useSoftFloat()) {
623 addRegisterClass(MVT::f80, &X86::RFP80RegClass);
624 setOperationAction(ISD::UNDEF, MVT::f80, Expand);
625 setOperationAction(ISD::FCOPYSIGN, MVT::f80, Expand);
627 APFloat TmpFlt = APFloat::getZero(APFloat::x87DoubleExtended);
628 addLegalFPImmediate(TmpFlt); // FLD0
630 addLegalFPImmediate(TmpFlt); // FLD0/FCHS
633 APFloat TmpFlt2(+1.0);
634 TmpFlt2.convert(APFloat::x87DoubleExtended, APFloat::rmNearestTiesToEven,
636 addLegalFPImmediate(TmpFlt2); // FLD1
637 TmpFlt2.changeSign();
638 addLegalFPImmediate(TmpFlt2); // FLD1/FCHS
641 if (!TM.Options.UnsafeFPMath) {
642 setOperationAction(ISD::FSIN , MVT::f80, Expand);
643 setOperationAction(ISD::FCOS , MVT::f80, Expand);
644 setOperationAction(ISD::FSINCOS, MVT::f80, Expand);
647 setOperationAction(ISD::FFLOOR, MVT::f80, Expand);
648 setOperationAction(ISD::FCEIL, MVT::f80, Expand);
649 setOperationAction(ISD::FTRUNC, MVT::f80, Expand);
650 setOperationAction(ISD::FRINT, MVT::f80, Expand);
651 setOperationAction(ISD::FNEARBYINT, MVT::f80, Expand);
652 setOperationAction(ISD::FMA, MVT::f80, Expand);
655 // Always use a library call for pow.
656 setOperationAction(ISD::FPOW , MVT::f32 , Expand);
657 setOperationAction(ISD::FPOW , MVT::f64 , Expand);
658 setOperationAction(ISD::FPOW , MVT::f80 , Expand);
660 setOperationAction(ISD::FLOG, MVT::f80, Expand);
661 setOperationAction(ISD::FLOG2, MVT::f80, Expand);
662 setOperationAction(ISD::FLOG10, MVT::f80, Expand);
663 setOperationAction(ISD::FEXP, MVT::f80, Expand);
664 setOperationAction(ISD::FEXP2, MVT::f80, Expand);
665 setOperationAction(ISD::FMINNUM, MVT::f80, Expand);
666 setOperationAction(ISD::FMAXNUM, MVT::f80, Expand);
668 // First set operation action for all vector types to either promote
669 // (for widening) or expand (for scalarization). Then we will selectively
670 // turn on ones that can be effectively codegen'd.
671 for (MVT VT : MVT::vector_valuetypes()) {
672 setOperationAction(ISD::ADD , VT, Expand);
673 setOperationAction(ISD::SUB , VT, Expand);
674 setOperationAction(ISD::FADD, VT, Expand);
675 setOperationAction(ISD::FNEG, VT, Expand);
676 setOperationAction(ISD::FSUB, VT, Expand);
677 setOperationAction(ISD::MUL , VT, Expand);
678 setOperationAction(ISD::FMUL, VT, Expand);
679 setOperationAction(ISD::SDIV, VT, Expand);
680 setOperationAction(ISD::UDIV, VT, Expand);
681 setOperationAction(ISD::FDIV, VT, Expand);
682 setOperationAction(ISD::SREM, VT, Expand);
683 setOperationAction(ISD::UREM, VT, Expand);
684 setOperationAction(ISD::LOAD, VT, Expand);
685 setOperationAction(ISD::VECTOR_SHUFFLE, VT, Expand);
686 setOperationAction(ISD::EXTRACT_VECTOR_ELT, VT,Expand);
687 setOperationAction(ISD::INSERT_VECTOR_ELT, VT, Expand);
688 setOperationAction(ISD::EXTRACT_SUBVECTOR, VT,Expand);
689 setOperationAction(ISD::INSERT_SUBVECTOR, VT,Expand);
690 setOperationAction(ISD::FABS, VT, Expand);
691 setOperationAction(ISD::FSIN, VT, Expand);
692 setOperationAction(ISD::FSINCOS, VT, Expand);
693 setOperationAction(ISD::FCOS, VT, Expand);
694 setOperationAction(ISD::FSINCOS, VT, Expand);
695 setOperationAction(ISD::FREM, VT, Expand);
696 setOperationAction(ISD::FMA, VT, Expand);
697 setOperationAction(ISD::FPOWI, VT, Expand);
698 setOperationAction(ISD::FSQRT, VT, Expand);
699 setOperationAction(ISD::FCOPYSIGN, VT, Expand);
700 setOperationAction(ISD::FFLOOR, VT, Expand);
701 setOperationAction(ISD::FCEIL, VT, Expand);
702 setOperationAction(ISD::FTRUNC, VT, Expand);
703 setOperationAction(ISD::FRINT, VT, Expand);
704 setOperationAction(ISD::FNEARBYINT, VT, Expand);
705 setOperationAction(ISD::SMUL_LOHI, VT, Expand);
706 setOperationAction(ISD::MULHS, VT, Expand);
707 setOperationAction(ISD::UMUL_LOHI, VT, Expand);
708 setOperationAction(ISD::MULHU, VT, Expand);
709 setOperationAction(ISD::SDIVREM, VT, Expand);
710 setOperationAction(ISD::UDIVREM, VT, Expand);
711 setOperationAction(ISD::FPOW, VT, Expand);
712 setOperationAction(ISD::CTPOP, VT, Expand);
713 setOperationAction(ISD::CTTZ, VT, Expand);
714 setOperationAction(ISD::CTTZ_ZERO_UNDEF, VT, Expand);
715 setOperationAction(ISD::CTLZ, VT, Expand);
716 setOperationAction(ISD::CTLZ_ZERO_UNDEF, VT, Expand);
717 setOperationAction(ISD::SHL, VT, Expand);
718 setOperationAction(ISD::SRA, VT, Expand);
719 setOperationAction(ISD::SRL, VT, Expand);
720 setOperationAction(ISD::ROTL, VT, Expand);
721 setOperationAction(ISD::ROTR, VT, Expand);
722 setOperationAction(ISD::BSWAP, VT, Expand);
723 setOperationAction(ISD::SETCC, VT, Expand);
724 setOperationAction(ISD::FLOG, VT, Expand);
725 setOperationAction(ISD::FLOG2, VT, Expand);
726 setOperationAction(ISD::FLOG10, VT, Expand);
727 setOperationAction(ISD::FEXP, VT, Expand);
728 setOperationAction(ISD::FEXP2, VT, Expand);
729 setOperationAction(ISD::FP_TO_UINT, VT, Expand);
730 setOperationAction(ISD::FP_TO_SINT, VT, Expand);
731 setOperationAction(ISD::UINT_TO_FP, VT, Expand);
732 setOperationAction(ISD::SINT_TO_FP, VT, Expand);
733 setOperationAction(ISD::SIGN_EXTEND_INREG, VT,Expand);
734 setOperationAction(ISD::TRUNCATE, VT, Expand);
735 setOperationAction(ISD::SIGN_EXTEND, VT, Expand);
736 setOperationAction(ISD::ZERO_EXTEND, VT, Expand);
737 setOperationAction(ISD::ANY_EXTEND, VT, Expand);
738 setOperationAction(ISD::VSELECT, VT, Expand);
739 setOperationAction(ISD::SELECT_CC, VT, Expand);
740 for (MVT InnerVT : MVT::vector_valuetypes()) {
741 setTruncStoreAction(InnerVT, VT, Expand);
743 setLoadExtAction(ISD::SEXTLOAD, InnerVT, VT, Expand);
744 setLoadExtAction(ISD::ZEXTLOAD, InnerVT, VT, Expand);
746 // N.b. ISD::EXTLOAD legality is basically ignored except for i1-like
747 // types, we have to deal with them whether we ask for Expansion or not.
748 // Setting Expand causes its own optimisation problems though, so leave
750 if (VT.getVectorElementType() == MVT::i1)
751 setLoadExtAction(ISD::EXTLOAD, InnerVT, VT, Expand);
753 // EXTLOAD for MVT::f16 vectors is not legal because f16 vectors are
754 // split/scalarized right now.
755 if (VT.getVectorElementType() == MVT::f16)
756 setLoadExtAction(ISD::EXTLOAD, InnerVT, VT, Expand);
760 // FIXME: In order to prevent SSE instructions being expanded to MMX ones
761 // with -msoft-float, disable use of MMX as well.
762 if (!Subtarget->useSoftFloat() && Subtarget->hasMMX()) {
763 addRegisterClass(MVT::x86mmx, &X86::VR64RegClass);
764 // No operations on x86mmx supported, everything uses intrinsics.
767 // MMX-sized vectors (other than x86mmx) are expected to be expanded
768 // into smaller operations.
769 for (MVT MMXTy : {MVT::v8i8, MVT::v4i16, MVT::v2i32, MVT::v1i64}) {
770 setOperationAction(ISD::MULHS, MMXTy, Expand);
771 setOperationAction(ISD::AND, MMXTy, Expand);
772 setOperationAction(ISD::OR, MMXTy, Expand);
773 setOperationAction(ISD::XOR, MMXTy, Expand);
774 setOperationAction(ISD::SCALAR_TO_VECTOR, MMXTy, Expand);
775 setOperationAction(ISD::SELECT, MMXTy, Expand);
776 setOperationAction(ISD::BITCAST, MMXTy, Expand);
778 setOperationAction(ISD::INSERT_VECTOR_ELT, MVT::v1i64, Expand);
780 if (!Subtarget->useSoftFloat() && Subtarget->hasSSE1()) {
781 addRegisterClass(MVT::v4f32, &X86::VR128RegClass);
783 setOperationAction(ISD::FADD, MVT::v4f32, Legal);
784 setOperationAction(ISD::FSUB, MVT::v4f32, Legal);
785 setOperationAction(ISD::FMUL, MVT::v4f32, Legal);
786 setOperationAction(ISD::FDIV, MVT::v4f32, Legal);
787 setOperationAction(ISD::FSQRT, MVT::v4f32, Legal);
788 setOperationAction(ISD::FNEG, MVT::v4f32, Custom);
789 setOperationAction(ISD::FABS, MVT::v4f32, Custom);
790 setOperationAction(ISD::LOAD, MVT::v4f32, Legal);
791 setOperationAction(ISD::BUILD_VECTOR, MVT::v4f32, Custom);
792 setOperationAction(ISD::VECTOR_SHUFFLE, MVT::v4f32, Custom);
793 setOperationAction(ISD::VSELECT, MVT::v4f32, Custom);
794 setOperationAction(ISD::EXTRACT_VECTOR_ELT, MVT::v4f32, Custom);
795 setOperationAction(ISD::SELECT, MVT::v4f32, Custom);
796 setOperationAction(ISD::UINT_TO_FP, MVT::v4i32, Custom);
799 if (!Subtarget->useSoftFloat() && Subtarget->hasSSE2()) {
800 addRegisterClass(MVT::v2f64, &X86::VR128RegClass);
802 // FIXME: Unfortunately, -soft-float and -no-implicit-float mean XMM
803 // registers cannot be used even for integer operations.
804 addRegisterClass(MVT::v16i8, &X86::VR128RegClass);
805 addRegisterClass(MVT::v8i16, &X86::VR128RegClass);
806 addRegisterClass(MVT::v4i32, &X86::VR128RegClass);
807 addRegisterClass(MVT::v2i64, &X86::VR128RegClass);
809 setOperationAction(ISD::ADD, MVT::v16i8, Legal);
810 setOperationAction(ISD::ADD, MVT::v8i16, Legal);
811 setOperationAction(ISD::ADD, MVT::v4i32, Legal);
812 setOperationAction(ISD::ADD, MVT::v2i64, Legal);
813 setOperationAction(ISD::MUL, MVT::v16i8, Custom);
814 setOperationAction(ISD::MUL, MVT::v4i32, Custom);
815 setOperationAction(ISD::MUL, MVT::v2i64, Custom);
816 setOperationAction(ISD::UMUL_LOHI, MVT::v4i32, Custom);
817 setOperationAction(ISD::SMUL_LOHI, MVT::v4i32, Custom);
818 setOperationAction(ISD::MULHU, MVT::v8i16, Legal);
819 setOperationAction(ISD::MULHS, MVT::v8i16, Legal);
820 setOperationAction(ISD::SUB, MVT::v16i8, Legal);
821 setOperationAction(ISD::SUB, MVT::v8i16, Legal);
822 setOperationAction(ISD::SUB, MVT::v4i32, Legal);
823 setOperationAction(ISD::SUB, MVT::v2i64, Legal);
824 setOperationAction(ISD::MUL, MVT::v8i16, Legal);
825 setOperationAction(ISD::FADD, MVT::v2f64, Legal);
826 setOperationAction(ISD::FSUB, MVT::v2f64, Legal);
827 setOperationAction(ISD::FMUL, MVT::v2f64, Legal);
828 setOperationAction(ISD::FDIV, MVT::v2f64, Legal);
829 setOperationAction(ISD::FSQRT, MVT::v2f64, Legal);
830 setOperationAction(ISD::FNEG, MVT::v2f64, Custom);
831 setOperationAction(ISD::FABS, MVT::v2f64, Custom);
833 setOperationAction(ISD::SMAX, MVT::v8i16, Legal);
834 setOperationAction(ISD::UMAX, MVT::v16i8, Legal);
835 setOperationAction(ISD::SMIN, MVT::v8i16, Legal);
836 setOperationAction(ISD::UMIN, MVT::v16i8, Legal);
838 setOperationAction(ISD::SETCC, MVT::v2i64, Custom);
839 setOperationAction(ISD::SETCC, MVT::v16i8, Custom);
840 setOperationAction(ISD::SETCC, MVT::v8i16, Custom);
841 setOperationAction(ISD::SETCC, MVT::v4i32, Custom);
843 setOperationAction(ISD::SCALAR_TO_VECTOR, MVT::v16i8, Custom);
844 setOperationAction(ISD::SCALAR_TO_VECTOR, MVT::v8i16, Custom);
845 setOperationAction(ISD::INSERT_VECTOR_ELT, MVT::v8i16, Custom);
846 setOperationAction(ISD::INSERT_VECTOR_ELT, MVT::v4i32, Custom);
847 setOperationAction(ISD::INSERT_VECTOR_ELT, MVT::v4f32, Custom);
849 setOperationAction(ISD::CTPOP, MVT::v16i8, Custom);
850 setOperationAction(ISD::CTPOP, MVT::v8i16, Custom);
851 setOperationAction(ISD::CTPOP, MVT::v4i32, Custom);
852 setOperationAction(ISD::CTPOP, MVT::v2i64, Custom);
854 setOperationAction(ISD::CTTZ, MVT::v16i8, Custom);
855 setOperationAction(ISD::CTTZ, MVT::v8i16, Custom);
856 setOperationAction(ISD::CTTZ, MVT::v4i32, Custom);
857 // ISD::CTTZ v2i64 - scalarization is faster.
858 setOperationAction(ISD::CTTZ_ZERO_UNDEF, MVT::v16i8, Custom);
859 setOperationAction(ISD::CTTZ_ZERO_UNDEF, MVT::v8i16, Custom);
860 setOperationAction(ISD::CTTZ_ZERO_UNDEF, MVT::v4i32, Custom);
861 // ISD::CTTZ_ZERO_UNDEF v2i64 - scalarization is faster.
863 // Custom lower build_vector, vector_shuffle, and extract_vector_elt.
864 for (int i = MVT::v16i8; i != MVT::v2i64; ++i) {
865 MVT VT = (MVT::SimpleValueType)i;
866 // Do not attempt to custom lower non-power-of-2 vectors
867 if (!isPowerOf2_32(VT.getVectorNumElements()))
869 // Do not attempt to custom lower non-128-bit vectors
870 if (!VT.is128BitVector())
872 setOperationAction(ISD::BUILD_VECTOR, VT, Custom);
873 setOperationAction(ISD::VECTOR_SHUFFLE, VT, Custom);
874 setOperationAction(ISD::VSELECT, VT, Custom);
875 setOperationAction(ISD::EXTRACT_VECTOR_ELT, VT, Custom);
878 // We support custom legalizing of sext and anyext loads for specific
879 // memory vector types which we can load as a scalar (or sequence of
880 // scalars) and extend in-register to a legal 128-bit vector type. For sext
881 // loads these must work with a single scalar load.
882 for (MVT VT : MVT::integer_vector_valuetypes()) {
883 setLoadExtAction(ISD::SEXTLOAD, VT, MVT::v4i8, Custom);
884 setLoadExtAction(ISD::SEXTLOAD, VT, MVT::v4i16, Custom);
885 setLoadExtAction(ISD::SEXTLOAD, VT, MVT::v8i8, Custom);
886 setLoadExtAction(ISD::EXTLOAD, VT, MVT::v2i8, Custom);
887 setLoadExtAction(ISD::EXTLOAD, VT, MVT::v2i16, Custom);
888 setLoadExtAction(ISD::EXTLOAD, VT, MVT::v2i32, Custom);
889 setLoadExtAction(ISD::EXTLOAD, VT, MVT::v4i8, Custom);
890 setLoadExtAction(ISD::EXTLOAD, VT, MVT::v4i16, Custom);
891 setLoadExtAction(ISD::EXTLOAD, VT, MVT::v8i8, Custom);
894 setOperationAction(ISD::BUILD_VECTOR, MVT::v2f64, Custom);
895 setOperationAction(ISD::BUILD_VECTOR, MVT::v2i64, Custom);
896 setOperationAction(ISD::VECTOR_SHUFFLE, MVT::v2f64, Custom);
897 setOperationAction(ISD::VECTOR_SHUFFLE, MVT::v2i64, Custom);
898 setOperationAction(ISD::VSELECT, MVT::v2f64, Custom);
899 setOperationAction(ISD::VSELECT, MVT::v2i64, Custom);
900 setOperationAction(ISD::INSERT_VECTOR_ELT, MVT::v2f64, Custom);
901 setOperationAction(ISD::EXTRACT_VECTOR_ELT, MVT::v2f64, Custom);
903 if (Subtarget->is64Bit()) {
904 setOperationAction(ISD::INSERT_VECTOR_ELT, MVT::v2i64, Custom);
905 setOperationAction(ISD::EXTRACT_VECTOR_ELT, MVT::v2i64, Custom);
908 // Promote v16i8, v8i16, v4i32 load, select, and, or, xor to v2i64.
909 for (int i = MVT::v16i8; i != MVT::v2i64; ++i) {
910 MVT VT = (MVT::SimpleValueType)i;
912 // Do not attempt to promote non-128-bit vectors
913 if (!VT.is128BitVector())
916 setOperationAction(ISD::AND, VT, Promote);
917 AddPromotedToType (ISD::AND, VT, MVT::v2i64);
918 setOperationAction(ISD::OR, VT, Promote);
919 AddPromotedToType (ISD::OR, VT, MVT::v2i64);
920 setOperationAction(ISD::XOR, VT, Promote);
921 AddPromotedToType (ISD::XOR, VT, MVT::v2i64);
922 setOperationAction(ISD::LOAD, VT, Promote);
923 AddPromotedToType (ISD::LOAD, VT, MVT::v2i64);
924 setOperationAction(ISD::SELECT, VT, Promote);
925 AddPromotedToType (ISD::SELECT, VT, MVT::v2i64);
928 // Custom lower v2i64 and v2f64 selects.
929 setOperationAction(ISD::LOAD, MVT::v2f64, Legal);
930 setOperationAction(ISD::LOAD, MVT::v2i64, Legal);
931 setOperationAction(ISD::SELECT, MVT::v2f64, Custom);
932 setOperationAction(ISD::SELECT, MVT::v2i64, Custom);
934 setOperationAction(ISD::FP_TO_SINT, MVT::v4i32, Legal);
935 setOperationAction(ISD::SINT_TO_FP, MVT::v4i32, Legal);
937 setOperationAction(ISD::SINT_TO_FP, MVT::v2i32, Custom);
939 setOperationAction(ISD::UINT_TO_FP, MVT::v4i8, Custom);
940 setOperationAction(ISD::UINT_TO_FP, MVT::v4i16, Custom);
941 // As there is no 64-bit GPR available, we need build a special custom
942 // sequence to convert from v2i32 to v2f32.
943 if (!Subtarget->is64Bit())
944 setOperationAction(ISD::UINT_TO_FP, MVT::v2f32, Custom);
946 setOperationAction(ISD::FP_EXTEND, MVT::v2f32, Custom);
947 setOperationAction(ISD::FP_ROUND, MVT::v2f32, Custom);
949 for (MVT VT : MVT::fp_vector_valuetypes())
950 setLoadExtAction(ISD::EXTLOAD, VT, MVT::v2f32, Legal);
952 setOperationAction(ISD::BITCAST, MVT::v2i32, Custom);
953 setOperationAction(ISD::BITCAST, MVT::v4i16, Custom);
954 setOperationAction(ISD::BITCAST, MVT::v8i8, Custom);
957 if (!Subtarget->useSoftFloat() && Subtarget->hasSSE41()) {
958 for (MVT RoundedTy : {MVT::f32, MVT::f64, MVT::v4f32, MVT::v2f64}) {
959 setOperationAction(ISD::FFLOOR, RoundedTy, Legal);
960 setOperationAction(ISD::FCEIL, RoundedTy, Legal);
961 setOperationAction(ISD::FTRUNC, RoundedTy, Legal);
962 setOperationAction(ISD::FRINT, RoundedTy, Legal);
963 setOperationAction(ISD::FNEARBYINT, RoundedTy, Legal);
966 setOperationAction(ISD::SMAX, MVT::v16i8, Legal);
967 setOperationAction(ISD::SMAX, MVT::v4i32, Legal);
968 setOperationAction(ISD::UMAX, MVT::v8i16, Legal);
969 setOperationAction(ISD::UMAX, MVT::v4i32, Legal);
970 setOperationAction(ISD::SMIN, MVT::v16i8, Legal);
971 setOperationAction(ISD::SMIN, MVT::v4i32, Legal);
972 setOperationAction(ISD::UMIN, MVT::v8i16, Legal);
973 setOperationAction(ISD::UMIN, MVT::v4i32, Legal);
975 // FIXME: Do we need to handle scalar-to-vector here?
976 setOperationAction(ISD::MUL, MVT::v4i32, Legal);
978 // We directly match byte blends in the backend as they match the VSELECT
980 setOperationAction(ISD::VSELECT, MVT::v16i8, Legal);
982 // SSE41 brings specific instructions for doing vector sign extend even in
983 // cases where we don't have SRA.
984 for (MVT VT : MVT::integer_vector_valuetypes()) {
985 setLoadExtAction(ISD::SEXTLOAD, VT, MVT::v2i8, Custom);
986 setLoadExtAction(ISD::SEXTLOAD, VT, MVT::v2i16, Custom);
987 setLoadExtAction(ISD::SEXTLOAD, VT, MVT::v2i32, Custom);
990 // SSE41 also has vector sign/zero extending loads, PMOV[SZ]X
991 setLoadExtAction(ISD::SEXTLOAD, MVT::v8i16, MVT::v8i8, Legal);
992 setLoadExtAction(ISD::SEXTLOAD, MVT::v4i32, MVT::v4i8, Legal);
993 setLoadExtAction(ISD::SEXTLOAD, MVT::v2i64, MVT::v2i8, Legal);
994 setLoadExtAction(ISD::SEXTLOAD, MVT::v4i32, MVT::v4i16, Legal);
995 setLoadExtAction(ISD::SEXTLOAD, MVT::v2i64, MVT::v2i16, Legal);
996 setLoadExtAction(ISD::SEXTLOAD, MVT::v2i64, MVT::v2i32, Legal);
998 setLoadExtAction(ISD::ZEXTLOAD, MVT::v8i16, MVT::v8i8, Legal);
999 setLoadExtAction(ISD::ZEXTLOAD, MVT::v4i32, MVT::v4i8, Legal);
1000 setLoadExtAction(ISD::ZEXTLOAD, MVT::v2i64, MVT::v2i8, Legal);
1001 setLoadExtAction(ISD::ZEXTLOAD, MVT::v4i32, MVT::v4i16, Legal);
1002 setLoadExtAction(ISD::ZEXTLOAD, MVT::v2i64, MVT::v2i16, Legal);
1003 setLoadExtAction(ISD::ZEXTLOAD, MVT::v2i64, MVT::v2i32, Legal);
1005 // i8 and i16 vectors are custom because the source register and source
1006 // source memory operand types are not the same width. f32 vectors are
1007 // custom since the immediate controlling the insert encodes additional
1009 setOperationAction(ISD::INSERT_VECTOR_ELT, MVT::v16i8, Custom);
1010 setOperationAction(ISD::INSERT_VECTOR_ELT, MVT::v8i16, Custom);
1011 setOperationAction(ISD::INSERT_VECTOR_ELT, MVT::v4i32, Custom);
1012 setOperationAction(ISD::INSERT_VECTOR_ELT, MVT::v4f32, Custom);
1014 setOperationAction(ISD::EXTRACT_VECTOR_ELT, MVT::v16i8, Custom);
1015 setOperationAction(ISD::EXTRACT_VECTOR_ELT, MVT::v8i16, Custom);
1016 setOperationAction(ISD::EXTRACT_VECTOR_ELT, MVT::v4i32, Custom);
1017 setOperationAction(ISD::EXTRACT_VECTOR_ELT, MVT::v4f32, Custom);
1019 // FIXME: these should be Legal, but that's only for the case where
1020 // the index is constant. For now custom expand to deal with that.
1021 if (Subtarget->is64Bit()) {
1022 setOperationAction(ISD::INSERT_VECTOR_ELT, MVT::v2i64, Custom);
1023 setOperationAction(ISD::EXTRACT_VECTOR_ELT, MVT::v2i64, Custom);
1027 if (Subtarget->hasSSE2()) {
1028 setOperationAction(ISD::SIGN_EXTEND_VECTOR_INREG, MVT::v2i64, Custom);
1029 setOperationAction(ISD::SIGN_EXTEND_VECTOR_INREG, MVT::v4i32, Custom);
1030 setOperationAction(ISD::SIGN_EXTEND_VECTOR_INREG, MVT::v8i16, Custom);
1032 setOperationAction(ISD::SRL, MVT::v8i16, Custom);
1033 setOperationAction(ISD::SRL, MVT::v16i8, Custom);
1035 setOperationAction(ISD::SHL, MVT::v8i16, Custom);
1036 setOperationAction(ISD::SHL, MVT::v16i8, Custom);
1038 setOperationAction(ISD::SRA, MVT::v8i16, Custom);
1039 setOperationAction(ISD::SRA, MVT::v16i8, Custom);
1041 // In the customized shift lowering, the legal cases in AVX2 will be
1043 setOperationAction(ISD::SRL, MVT::v2i64, Custom);
1044 setOperationAction(ISD::SRL, MVT::v4i32, Custom);
1046 setOperationAction(ISD::SHL, MVT::v2i64, Custom);
1047 setOperationAction(ISD::SHL, MVT::v4i32, Custom);
1049 setOperationAction(ISD::SRA, MVT::v2i64, Custom);
1050 setOperationAction(ISD::SRA, MVT::v4i32, Custom);
1053 if (!Subtarget->useSoftFloat() && Subtarget->hasFp256()) {
1054 addRegisterClass(MVT::v32i8, &X86::VR256RegClass);
1055 addRegisterClass(MVT::v16i16, &X86::VR256RegClass);
1056 addRegisterClass(MVT::v8i32, &X86::VR256RegClass);
1057 addRegisterClass(MVT::v8f32, &X86::VR256RegClass);
1058 addRegisterClass(MVT::v4i64, &X86::VR256RegClass);
1059 addRegisterClass(MVT::v4f64, &X86::VR256RegClass);
1061 setOperationAction(ISD::LOAD, MVT::v8f32, Legal);
1062 setOperationAction(ISD::LOAD, MVT::v4f64, Legal);
1063 setOperationAction(ISD::LOAD, MVT::v4i64, Legal);
1065 setOperationAction(ISD::FADD, MVT::v8f32, Legal);
1066 setOperationAction(ISD::FSUB, MVT::v8f32, Legal);
1067 setOperationAction(ISD::FMUL, MVT::v8f32, Legal);
1068 setOperationAction(ISD::FDIV, MVT::v8f32, Legal);
1069 setOperationAction(ISD::FSQRT, MVT::v8f32, Legal);
1070 setOperationAction(ISD::FFLOOR, MVT::v8f32, Legal);
1071 setOperationAction(ISD::FCEIL, MVT::v8f32, Legal);
1072 setOperationAction(ISD::FTRUNC, MVT::v8f32, Legal);
1073 setOperationAction(ISD::FRINT, MVT::v8f32, Legal);
1074 setOperationAction(ISD::FNEARBYINT, MVT::v8f32, Legal);
1075 setOperationAction(ISD::FNEG, MVT::v8f32, Custom);
1076 setOperationAction(ISD::FABS, MVT::v8f32, Custom);
1078 setOperationAction(ISD::FADD, MVT::v4f64, Legal);
1079 setOperationAction(ISD::FSUB, MVT::v4f64, Legal);
1080 setOperationAction(ISD::FMUL, MVT::v4f64, Legal);
1081 setOperationAction(ISD::FDIV, MVT::v4f64, Legal);
1082 setOperationAction(ISD::FSQRT, MVT::v4f64, Legal);
1083 setOperationAction(ISD::FFLOOR, MVT::v4f64, Legal);
1084 setOperationAction(ISD::FCEIL, MVT::v4f64, Legal);
1085 setOperationAction(ISD::FTRUNC, MVT::v4f64, Legal);
1086 setOperationAction(ISD::FRINT, MVT::v4f64, Legal);
1087 setOperationAction(ISD::FNEARBYINT, MVT::v4f64, Legal);
1088 setOperationAction(ISD::FNEG, MVT::v4f64, Custom);
1089 setOperationAction(ISD::FABS, MVT::v4f64, Custom);
1091 // (fp_to_int:v8i16 (v8f32 ..)) requires the result type to be promoted
1092 // even though v8i16 is a legal type.
1093 setOperationAction(ISD::FP_TO_SINT, MVT::v8i16, Promote);
1094 setOperationAction(ISD::FP_TO_UINT, MVT::v8i16, Promote);
1095 setOperationAction(ISD::FP_TO_SINT, MVT::v8i32, Legal);
1097 setOperationAction(ISD::SINT_TO_FP, MVT::v8i16, Promote);
1098 setOperationAction(ISD::SINT_TO_FP, MVT::v8i32, Legal);
1099 setOperationAction(ISD::FP_ROUND, MVT::v4f32, Legal);
1101 setOperationAction(ISD::UINT_TO_FP, MVT::v8i8, Custom);
1102 setOperationAction(ISD::UINT_TO_FP, MVT::v8i16, Custom);
1104 for (MVT VT : MVT::fp_vector_valuetypes())
1105 setLoadExtAction(ISD::EXTLOAD, VT, MVT::v4f32, Legal);
1107 setOperationAction(ISD::SRL, MVT::v16i16, Custom);
1108 setOperationAction(ISD::SRL, MVT::v32i8, Custom);
1110 setOperationAction(ISD::SHL, MVT::v16i16, Custom);
1111 setOperationAction(ISD::SHL, MVT::v32i8, Custom);
1113 setOperationAction(ISD::SRA, MVT::v16i16, Custom);
1114 setOperationAction(ISD::SRA, MVT::v32i8, Custom);
1116 setOperationAction(ISD::SETCC, MVT::v32i8, Custom);
1117 setOperationAction(ISD::SETCC, MVT::v16i16, Custom);
1118 setOperationAction(ISD::SETCC, MVT::v8i32, Custom);
1119 setOperationAction(ISD::SETCC, MVT::v4i64, Custom);
1121 setOperationAction(ISD::SELECT, MVT::v4f64, Custom);
1122 setOperationAction(ISD::SELECT, MVT::v4i64, Custom);
1123 setOperationAction(ISD::SELECT, MVT::v8f32, Custom);
1125 setOperationAction(ISD::SIGN_EXTEND, MVT::v4i64, Custom);
1126 setOperationAction(ISD::SIGN_EXTEND, MVT::v8i32, Custom);
1127 setOperationAction(ISD::SIGN_EXTEND, MVT::v16i16, Custom);
1128 setOperationAction(ISD::ZERO_EXTEND, MVT::v4i64, Custom);
1129 setOperationAction(ISD::ZERO_EXTEND, MVT::v8i32, Custom);
1130 setOperationAction(ISD::ZERO_EXTEND, MVT::v16i16, Custom);
1131 setOperationAction(ISD::ANY_EXTEND, MVT::v4i64, Custom);
1132 setOperationAction(ISD::ANY_EXTEND, MVT::v8i32, Custom);
1133 setOperationAction(ISD::ANY_EXTEND, MVT::v16i16, Custom);
1134 setOperationAction(ISD::TRUNCATE, MVT::v16i8, Custom);
1135 setOperationAction(ISD::TRUNCATE, MVT::v8i16, Custom);
1136 setOperationAction(ISD::TRUNCATE, MVT::v4i32, Custom);
1138 setOperationAction(ISD::CTPOP, MVT::v32i8, Custom);
1139 setOperationAction(ISD::CTPOP, MVT::v16i16, Custom);
1140 setOperationAction(ISD::CTPOP, MVT::v8i32, Custom);
1141 setOperationAction(ISD::CTPOP, MVT::v4i64, Custom);
1143 setOperationAction(ISD::CTTZ, MVT::v32i8, Custom);
1144 setOperationAction(ISD::CTTZ, MVT::v16i16, Custom);
1145 setOperationAction(ISD::CTTZ, MVT::v8i32, Custom);
1146 setOperationAction(ISD::CTTZ, MVT::v4i64, Custom);
1147 setOperationAction(ISD::CTTZ_ZERO_UNDEF, MVT::v32i8, Custom);
1148 setOperationAction(ISD::CTTZ_ZERO_UNDEF, MVT::v16i16, Custom);
1149 setOperationAction(ISD::CTTZ_ZERO_UNDEF, MVT::v8i32, Custom);
1150 setOperationAction(ISD::CTTZ_ZERO_UNDEF, MVT::v4i64, Custom);
1152 if (Subtarget->hasFMA() || Subtarget->hasFMA4() || Subtarget->hasAVX512()) {
1153 setOperationAction(ISD::FMA, MVT::v8f32, Legal);
1154 setOperationAction(ISD::FMA, MVT::v4f64, Legal);
1155 setOperationAction(ISD::FMA, MVT::v4f32, Legal);
1156 setOperationAction(ISD::FMA, MVT::v2f64, Legal);
1157 setOperationAction(ISD::FMA, MVT::f32, Legal);
1158 setOperationAction(ISD::FMA, MVT::f64, Legal);
1161 if (Subtarget->hasInt256()) {
1162 setOperationAction(ISD::ADD, MVT::v4i64, Legal);
1163 setOperationAction(ISD::ADD, MVT::v8i32, Legal);
1164 setOperationAction(ISD::ADD, MVT::v16i16, Legal);
1165 setOperationAction(ISD::ADD, MVT::v32i8, Legal);
1167 setOperationAction(ISD::SUB, MVT::v4i64, Legal);
1168 setOperationAction(ISD::SUB, MVT::v8i32, Legal);
1169 setOperationAction(ISD::SUB, MVT::v16i16, Legal);
1170 setOperationAction(ISD::SUB, MVT::v32i8, Legal);
1172 setOperationAction(ISD::MUL, MVT::v4i64, Custom);
1173 setOperationAction(ISD::MUL, MVT::v8i32, Legal);
1174 setOperationAction(ISD::MUL, MVT::v16i16, Legal);
1175 setOperationAction(ISD::MUL, MVT::v32i8, Custom);
1177 setOperationAction(ISD::UMUL_LOHI, MVT::v8i32, Custom);
1178 setOperationAction(ISD::SMUL_LOHI, MVT::v8i32, Custom);
1179 setOperationAction(ISD::MULHU, MVT::v16i16, Legal);
1180 setOperationAction(ISD::MULHS, MVT::v16i16, Legal);
1182 setOperationAction(ISD::SMAX, MVT::v32i8, Legal);
1183 setOperationAction(ISD::SMAX, MVT::v16i16, Legal);
1184 setOperationAction(ISD::SMAX, MVT::v8i32, Legal);
1185 setOperationAction(ISD::UMAX, MVT::v32i8, Legal);
1186 setOperationAction(ISD::UMAX, MVT::v16i16, Legal);
1187 setOperationAction(ISD::UMAX, MVT::v8i32, Legal);
1188 setOperationAction(ISD::SMIN, MVT::v32i8, Legal);
1189 setOperationAction(ISD::SMIN, MVT::v16i16, Legal);
1190 setOperationAction(ISD::SMIN, MVT::v8i32, Legal);
1191 setOperationAction(ISD::UMIN, MVT::v32i8, Legal);
1192 setOperationAction(ISD::UMIN, MVT::v16i16, Legal);
1193 setOperationAction(ISD::UMIN, MVT::v8i32, Legal);
1195 // The custom lowering for UINT_TO_FP for v8i32 becomes interesting
1196 // when we have a 256bit-wide blend with immediate.
1197 setOperationAction(ISD::UINT_TO_FP, MVT::v8i32, Custom);
1199 // AVX2 also has wider vector sign/zero extending loads, VPMOV[SZ]X
1200 setLoadExtAction(ISD::SEXTLOAD, MVT::v16i16, MVT::v16i8, Legal);
1201 setLoadExtAction(ISD::SEXTLOAD, MVT::v8i32, MVT::v8i8, Legal);
1202 setLoadExtAction(ISD::SEXTLOAD, MVT::v4i64, MVT::v4i8, Legal);
1203 setLoadExtAction(ISD::SEXTLOAD, MVT::v8i32, MVT::v8i16, Legal);
1204 setLoadExtAction(ISD::SEXTLOAD, MVT::v4i64, MVT::v4i16, Legal);
1205 setLoadExtAction(ISD::SEXTLOAD, MVT::v4i64, MVT::v4i32, Legal);
1207 setLoadExtAction(ISD::ZEXTLOAD, MVT::v16i16, MVT::v16i8, Legal);
1208 setLoadExtAction(ISD::ZEXTLOAD, MVT::v8i32, MVT::v8i8, Legal);
1209 setLoadExtAction(ISD::ZEXTLOAD, MVT::v4i64, MVT::v4i8, Legal);
1210 setLoadExtAction(ISD::ZEXTLOAD, MVT::v8i32, MVT::v8i16, Legal);
1211 setLoadExtAction(ISD::ZEXTLOAD, MVT::v4i64, MVT::v4i16, Legal);
1212 setLoadExtAction(ISD::ZEXTLOAD, MVT::v4i64, MVT::v4i32, Legal);
1214 setOperationAction(ISD::ADD, MVT::v4i64, Custom);
1215 setOperationAction(ISD::ADD, MVT::v8i32, Custom);
1216 setOperationAction(ISD::ADD, MVT::v16i16, Custom);
1217 setOperationAction(ISD::ADD, MVT::v32i8, Custom);
1219 setOperationAction(ISD::SUB, MVT::v4i64, Custom);
1220 setOperationAction(ISD::SUB, MVT::v8i32, Custom);
1221 setOperationAction(ISD::SUB, MVT::v16i16, Custom);
1222 setOperationAction(ISD::SUB, MVT::v32i8, Custom);
1224 setOperationAction(ISD::MUL, MVT::v4i64, Custom);
1225 setOperationAction(ISD::MUL, MVT::v8i32, Custom);
1226 setOperationAction(ISD::MUL, MVT::v16i16, Custom);
1227 setOperationAction(ISD::MUL, MVT::v32i8, Custom);
1229 setOperationAction(ISD::SMAX, MVT::v32i8, Custom);
1230 setOperationAction(ISD::SMAX, MVT::v16i16, Custom);
1231 setOperationAction(ISD::SMAX, MVT::v8i32, Custom);
1232 setOperationAction(ISD::UMAX, MVT::v32i8, Custom);
1233 setOperationAction(ISD::UMAX, MVT::v16i16, Custom);
1234 setOperationAction(ISD::UMAX, MVT::v8i32, Custom);
1235 setOperationAction(ISD::SMIN, MVT::v32i8, Custom);
1236 setOperationAction(ISD::SMIN, MVT::v16i16, Custom);
1237 setOperationAction(ISD::SMIN, MVT::v8i32, Custom);
1238 setOperationAction(ISD::UMIN, MVT::v32i8, Custom);
1239 setOperationAction(ISD::UMIN, MVT::v16i16, Custom);
1240 setOperationAction(ISD::UMIN, MVT::v8i32, Custom);
1243 // In the customized shift lowering, the legal cases in AVX2 will be
1245 setOperationAction(ISD::SRL, MVT::v4i64, Custom);
1246 setOperationAction(ISD::SRL, MVT::v8i32, Custom);
1248 setOperationAction(ISD::SHL, MVT::v4i64, Custom);
1249 setOperationAction(ISD::SHL, MVT::v8i32, Custom);
1251 setOperationAction(ISD::SRA, MVT::v4i64, Custom);
1252 setOperationAction(ISD::SRA, MVT::v8i32, Custom);
1254 // Custom lower several nodes for 256-bit types.
1255 for (MVT VT : MVT::vector_valuetypes()) {
1256 if (VT.getScalarSizeInBits() >= 32) {
1257 setOperationAction(ISD::MLOAD, VT, Legal);
1258 setOperationAction(ISD::MSTORE, VT, Legal);
1260 // Extract subvector is special because the value type
1261 // (result) is 128-bit but the source is 256-bit wide.
1262 if (VT.is128BitVector()) {
1263 setOperationAction(ISD::EXTRACT_SUBVECTOR, VT, Custom);
1265 // Do not attempt to custom lower other non-256-bit vectors
1266 if (!VT.is256BitVector())
1269 setOperationAction(ISD::BUILD_VECTOR, VT, Custom);
1270 setOperationAction(ISD::VECTOR_SHUFFLE, VT, Custom);
1271 setOperationAction(ISD::VSELECT, VT, Custom);
1272 setOperationAction(ISD::INSERT_VECTOR_ELT, VT, Custom);
1273 setOperationAction(ISD::EXTRACT_VECTOR_ELT, VT, Custom);
1274 setOperationAction(ISD::SCALAR_TO_VECTOR, VT, Custom);
1275 setOperationAction(ISD::INSERT_SUBVECTOR, VT, Custom);
1276 setOperationAction(ISD::CONCAT_VECTORS, VT, Custom);
1279 if (Subtarget->hasInt256())
1280 setOperationAction(ISD::VSELECT, MVT::v32i8, Legal);
1282 // Promote v32i8, v16i16, v8i32 select, and, or, xor to v4i64.
1283 for (int i = MVT::v32i8; i != MVT::v4i64; ++i) {
1284 MVT VT = (MVT::SimpleValueType)i;
1286 // Do not attempt to promote non-256-bit vectors
1287 if (!VT.is256BitVector())
1290 setOperationAction(ISD::AND, VT, Promote);
1291 AddPromotedToType (ISD::AND, VT, MVT::v4i64);
1292 setOperationAction(ISD::OR, VT, Promote);
1293 AddPromotedToType (ISD::OR, VT, MVT::v4i64);
1294 setOperationAction(ISD::XOR, VT, Promote);
1295 AddPromotedToType (ISD::XOR, VT, MVT::v4i64);
1296 setOperationAction(ISD::LOAD, VT, Promote);
1297 AddPromotedToType (ISD::LOAD, VT, MVT::v4i64);
1298 setOperationAction(ISD::SELECT, VT, Promote);
1299 AddPromotedToType (ISD::SELECT, VT, MVT::v4i64);
1303 if (!Subtarget->useSoftFloat() && Subtarget->hasAVX512()) {
1304 addRegisterClass(MVT::v16i32, &X86::VR512RegClass);
1305 addRegisterClass(MVT::v16f32, &X86::VR512RegClass);
1306 addRegisterClass(MVT::v8i64, &X86::VR512RegClass);
1307 addRegisterClass(MVT::v8f64, &X86::VR512RegClass);
1309 addRegisterClass(MVT::i1, &X86::VK1RegClass);
1310 addRegisterClass(MVT::v8i1, &X86::VK8RegClass);
1311 addRegisterClass(MVT::v16i1, &X86::VK16RegClass);
1313 for (MVT VT : MVT::fp_vector_valuetypes())
1314 setLoadExtAction(ISD::EXTLOAD, VT, MVT::v8f32, Legal);
1316 setLoadExtAction(ISD::ZEXTLOAD, MVT::v16i32, MVT::v16i8, Legal);
1317 setLoadExtAction(ISD::SEXTLOAD, MVT::v16i32, MVT::v16i8, Legal);
1318 setLoadExtAction(ISD::ZEXTLOAD, MVT::v16i32, MVT::v16i16, Legal);
1319 setLoadExtAction(ISD::SEXTLOAD, MVT::v16i32, MVT::v16i16, Legal);
1320 setLoadExtAction(ISD::ZEXTLOAD, MVT::v32i16, MVT::v32i8, Legal);
1321 setLoadExtAction(ISD::SEXTLOAD, MVT::v32i16, MVT::v32i8, Legal);
1322 setLoadExtAction(ISD::ZEXTLOAD, MVT::v8i64, MVT::v8i8, Legal);
1323 setLoadExtAction(ISD::SEXTLOAD, MVT::v8i64, MVT::v8i8, Legal);
1324 setLoadExtAction(ISD::ZEXTLOAD, MVT::v8i64, MVT::v8i16, Legal);
1325 setLoadExtAction(ISD::SEXTLOAD, MVT::v8i64, MVT::v8i16, Legal);
1326 setLoadExtAction(ISD::ZEXTLOAD, MVT::v8i64, MVT::v8i32, Legal);
1327 setLoadExtAction(ISD::SEXTLOAD, MVT::v8i64, MVT::v8i32, Legal);
1329 setOperationAction(ISD::BR_CC, MVT::i1, Expand);
1330 setOperationAction(ISD::SETCC, MVT::i1, Custom);
1331 setOperationAction(ISD::XOR, MVT::i1, Legal);
1332 setOperationAction(ISD::OR, MVT::i1, Legal);
1333 setOperationAction(ISD::AND, MVT::i1, Legal);
1334 setOperationAction(ISD::SUB, MVT::i1, Custom);
1335 setOperationAction(ISD::ADD, MVT::i1, Custom);
1336 setOperationAction(ISD::MUL, MVT::i1, Custom);
1337 setOperationAction(ISD::LOAD, MVT::v16f32, Legal);
1338 setOperationAction(ISD::LOAD, MVT::v8f64, Legal);
1339 setOperationAction(ISD::LOAD, MVT::v8i64, Legal);
1340 setOperationAction(ISD::LOAD, MVT::v16i32, Legal);
1341 setOperationAction(ISD::LOAD, MVT::v16i1, Legal);
1343 setOperationAction(ISD::FADD, MVT::v16f32, Legal);
1344 setOperationAction(ISD::FSUB, MVT::v16f32, Legal);
1345 setOperationAction(ISD::FMUL, MVT::v16f32, Legal);
1346 setOperationAction(ISD::FDIV, MVT::v16f32, Legal);
1347 setOperationAction(ISD::FSQRT, MVT::v16f32, Legal);
1348 setOperationAction(ISD::FNEG, MVT::v16f32, Custom);
1350 setOperationAction(ISD::FADD, MVT::v8f64, Legal);
1351 setOperationAction(ISD::FSUB, MVT::v8f64, Legal);
1352 setOperationAction(ISD::FMUL, MVT::v8f64, Legal);
1353 setOperationAction(ISD::FDIV, MVT::v8f64, Legal);
1354 setOperationAction(ISD::FSQRT, MVT::v8f64, Legal);
1355 setOperationAction(ISD::FNEG, MVT::v8f64, Custom);
1356 setOperationAction(ISD::FMA, MVT::v8f64, Legal);
1357 setOperationAction(ISD::FMA, MVT::v16f32, Legal);
1359 setOperationAction(ISD::FP_TO_SINT, MVT::v16i32, Legal);
1360 setOperationAction(ISD::FP_TO_UINT, MVT::v16i32, Legal);
1361 setOperationAction(ISD::FP_TO_UINT, MVT::v8i32, Legal);
1362 setOperationAction(ISD::FP_TO_UINT, MVT::v4i32, Legal);
1363 setOperationAction(ISD::SINT_TO_FP, MVT::v16i32, Legal);
1364 setOperationAction(ISD::SINT_TO_FP, MVT::v8i1, Custom);
1365 setOperationAction(ISD::SINT_TO_FP, MVT::v16i1, Custom);
1366 setOperationAction(ISD::SINT_TO_FP, MVT::v16i8, Promote);
1367 setOperationAction(ISD::SINT_TO_FP, MVT::v16i16, Promote);
1368 setOperationAction(ISD::UINT_TO_FP, MVT::v16i32, Legal);
1369 setOperationAction(ISD::UINT_TO_FP, MVT::v8i32, Legal);
1370 setOperationAction(ISD::UINT_TO_FP, MVT::v4i32, Legal);
1371 setOperationAction(ISD::UINT_TO_FP, MVT::v16i8, Custom);
1372 setOperationAction(ISD::UINT_TO_FP, MVT::v16i16, Custom);
1373 setOperationAction(ISD::FP_ROUND, MVT::v8f32, Legal);
1374 setOperationAction(ISD::FP_EXTEND, MVT::v8f32, Legal);
1376 setTruncStoreAction(MVT::v8i64, MVT::v8i8, Legal);
1377 setTruncStoreAction(MVT::v8i64, MVT::v8i16, Legal);
1378 setTruncStoreAction(MVT::v8i64, MVT::v8i32, Legal);
1379 setTruncStoreAction(MVT::v16i32, MVT::v16i8, Legal);
1380 setTruncStoreAction(MVT::v16i32, MVT::v16i16, Legal);
1381 if (Subtarget->hasVLX()){
1382 setTruncStoreAction(MVT::v4i64, MVT::v4i8, Legal);
1383 setTruncStoreAction(MVT::v4i64, MVT::v4i16, Legal);
1384 setTruncStoreAction(MVT::v4i64, MVT::v4i32, Legal);
1385 setTruncStoreAction(MVT::v8i32, MVT::v8i8, Legal);
1386 setTruncStoreAction(MVT::v8i32, MVT::v8i16, Legal);
1388 setTruncStoreAction(MVT::v2i64, MVT::v2i8, Legal);
1389 setTruncStoreAction(MVT::v2i64, MVT::v2i16, Legal);
1390 setTruncStoreAction(MVT::v2i64, MVT::v2i32, Legal);
1391 setTruncStoreAction(MVT::v4i32, MVT::v4i8, Legal);
1392 setTruncStoreAction(MVT::v4i32, MVT::v4i16, Legal);
1394 setOperationAction(ISD::TRUNCATE, MVT::i1, Custom);
1395 setOperationAction(ISD::TRUNCATE, MVT::v16i8, Custom);
1396 setOperationAction(ISD::TRUNCATE, MVT::v8i32, Custom);
1397 setOperationAction(ISD::VECTOR_SHUFFLE, MVT::v8i1, Custom);
1398 setOperationAction(ISD::VECTOR_SHUFFLE, MVT::v16i1, Custom);
1399 if (Subtarget->hasDQI()) {
1400 setOperationAction(ISD::TRUNCATE, MVT::v2i1, Custom);
1401 setOperationAction(ISD::TRUNCATE, MVT::v4i1, Custom);
1403 setOperationAction(ISD::SINT_TO_FP, MVT::v8i64, Legal);
1404 setOperationAction(ISD::UINT_TO_FP, MVT::v8i64, Legal);
1405 setOperationAction(ISD::FP_TO_SINT, MVT::v8i64, Legal);
1406 setOperationAction(ISD::FP_TO_UINT, MVT::v8i64, Legal);
1407 if (Subtarget->hasVLX()) {
1408 setOperationAction(ISD::SINT_TO_FP, MVT::v4i64, Legal);
1409 setOperationAction(ISD::SINT_TO_FP, MVT::v2i64, Legal);
1410 setOperationAction(ISD::UINT_TO_FP, MVT::v4i64, Legal);
1411 setOperationAction(ISD::UINT_TO_FP, MVT::v2i64, Legal);
1412 setOperationAction(ISD::FP_TO_SINT, MVT::v4i64, Legal);
1413 setOperationAction(ISD::FP_TO_SINT, MVT::v2i64, Legal);
1414 setOperationAction(ISD::FP_TO_UINT, MVT::v4i64, Legal);
1415 setOperationAction(ISD::FP_TO_UINT, MVT::v2i64, Legal);
1418 if (Subtarget->hasVLX()) {
1419 setOperationAction(ISD::SINT_TO_FP, MVT::v8i32, Legal);
1420 setOperationAction(ISD::UINT_TO_FP, MVT::v8i32, Legal);
1421 setOperationAction(ISD::FP_TO_SINT, MVT::v8i32, Legal);
1422 setOperationAction(ISD::FP_TO_UINT, MVT::v8i32, Legal);
1423 setOperationAction(ISD::SINT_TO_FP, MVT::v4i32, Legal);
1424 setOperationAction(ISD::UINT_TO_FP, MVT::v4i32, Legal);
1425 setOperationAction(ISD::FP_TO_SINT, MVT::v4i32, Legal);
1426 setOperationAction(ISD::FP_TO_UINT, MVT::v4i32, Legal);
1428 setOperationAction(ISD::TRUNCATE, MVT::v8i1, Custom);
1429 setOperationAction(ISD::TRUNCATE, MVT::v16i1, Custom);
1430 setOperationAction(ISD::TRUNCATE, MVT::v16i16, Custom);
1431 setOperationAction(ISD::ZERO_EXTEND, MVT::v16i32, Custom);
1432 setOperationAction(ISD::ZERO_EXTEND, MVT::v8i64, Custom);
1433 setOperationAction(ISD::ANY_EXTEND, MVT::v16i32, Custom);
1434 setOperationAction(ISD::ANY_EXTEND, MVT::v8i64, Custom);
1435 setOperationAction(ISD::SIGN_EXTEND, MVT::v16i32, Custom);
1436 setOperationAction(ISD::SIGN_EXTEND, MVT::v8i64, Custom);
1437 setOperationAction(ISD::SIGN_EXTEND, MVT::v16i8, Custom);
1438 setOperationAction(ISD::SIGN_EXTEND, MVT::v8i16, Custom);
1439 setOperationAction(ISD::SIGN_EXTEND, MVT::v16i16, Custom);
1440 if (Subtarget->hasDQI()) {
1441 setOperationAction(ISD::SIGN_EXTEND, MVT::v4i32, Custom);
1442 setOperationAction(ISD::SIGN_EXTEND, MVT::v2i64, Custom);
1444 setOperationAction(ISD::FFLOOR, MVT::v16f32, Legal);
1445 setOperationAction(ISD::FFLOOR, MVT::v8f64, Legal);
1446 setOperationAction(ISD::FCEIL, MVT::v16f32, Legal);
1447 setOperationAction(ISD::FCEIL, MVT::v8f64, Legal);
1448 setOperationAction(ISD::FTRUNC, MVT::v16f32, Legal);
1449 setOperationAction(ISD::FTRUNC, MVT::v8f64, Legal);
1450 setOperationAction(ISD::FRINT, MVT::v16f32, Legal);
1451 setOperationAction(ISD::FRINT, MVT::v8f64, Legal);
1452 setOperationAction(ISD::FNEARBYINT, MVT::v16f32, Legal);
1453 setOperationAction(ISD::FNEARBYINT, MVT::v8f64, Legal);
1455 setOperationAction(ISD::CONCAT_VECTORS, MVT::v8f64, Custom);
1456 setOperationAction(ISD::CONCAT_VECTORS, MVT::v8i64, Custom);
1457 setOperationAction(ISD::CONCAT_VECTORS, MVT::v16f32, Custom);
1458 setOperationAction(ISD::CONCAT_VECTORS, MVT::v16i32, Custom);
1459 setOperationAction(ISD::CONCAT_VECTORS, MVT::v16i1, Legal);
1461 setOperationAction(ISD::SETCC, MVT::v16i1, Custom);
1462 setOperationAction(ISD::SETCC, MVT::v8i1, Custom);
1464 setOperationAction(ISD::MUL, MVT::v8i64, Custom);
1466 setOperationAction(ISD::EXTRACT_VECTOR_ELT, MVT::v8i1, Custom);
1467 setOperationAction(ISD::EXTRACT_VECTOR_ELT, MVT::v16i1, Custom);
1468 setOperationAction(ISD::INSERT_VECTOR_ELT, MVT::v16i1, Custom);
1469 setOperationAction(ISD::INSERT_VECTOR_ELT, MVT::v8i1, Custom);
1470 setOperationAction(ISD::BUILD_VECTOR, MVT::v8i1, Custom);
1471 setOperationAction(ISD::BUILD_VECTOR, MVT::v16i1, Custom);
1472 setOperationAction(ISD::SELECT, MVT::v8f64, Custom);
1473 setOperationAction(ISD::SELECT, MVT::v8i64, Custom);
1474 setOperationAction(ISD::SELECT, MVT::v16f32, Custom);
1475 setOperationAction(ISD::SELECT, MVT::v16i1, Custom);
1476 setOperationAction(ISD::SELECT, MVT::v8i1, Custom);
1478 setOperationAction(ISD::SMAX, MVT::v16i32, Legal);
1479 setOperationAction(ISD::SMAX, MVT::v8i64, Legal);
1480 setOperationAction(ISD::UMAX, MVT::v16i32, Legal);
1481 setOperationAction(ISD::UMAX, MVT::v8i64, Legal);
1482 setOperationAction(ISD::SMIN, MVT::v16i32, Legal);
1483 setOperationAction(ISD::SMIN, MVT::v8i64, Legal);
1484 setOperationAction(ISD::UMIN, MVT::v16i32, Legal);
1485 setOperationAction(ISD::UMIN, MVT::v8i64, Legal);
1487 setOperationAction(ISD::ADD, MVT::v8i64, Legal);
1488 setOperationAction(ISD::ADD, MVT::v16i32, Legal);
1490 setOperationAction(ISD::SUB, MVT::v8i64, Legal);
1491 setOperationAction(ISD::SUB, MVT::v16i32, Legal);
1493 setOperationAction(ISD::MUL, MVT::v16i32, Legal);
1495 setOperationAction(ISD::SRL, MVT::v8i64, Custom);
1496 setOperationAction(ISD::SRL, MVT::v16i32, Custom);
1498 setOperationAction(ISD::SHL, MVT::v8i64, Custom);
1499 setOperationAction(ISD::SHL, MVT::v16i32, Custom);
1501 setOperationAction(ISD::SRA, MVT::v8i64, Custom);
1502 setOperationAction(ISD::SRA, MVT::v16i32, Custom);
1504 setOperationAction(ISD::AND, MVT::v8i64, Legal);
1505 setOperationAction(ISD::OR, MVT::v8i64, Legal);
1506 setOperationAction(ISD::XOR, MVT::v8i64, Legal);
1507 setOperationAction(ISD::AND, MVT::v16i32, Legal);
1508 setOperationAction(ISD::OR, MVT::v16i32, Legal);
1509 setOperationAction(ISD::XOR, MVT::v16i32, Legal);
1511 if (Subtarget->hasCDI()) {
1512 setOperationAction(ISD::CTLZ, MVT::v8i64, Legal);
1513 setOperationAction(ISD::CTLZ, MVT::v16i32, Legal);
1514 setOperationAction(ISD::CTLZ_ZERO_UNDEF, MVT::v8i64, Legal);
1515 setOperationAction(ISD::CTLZ_ZERO_UNDEF, MVT::v16i32, Legal);
1517 setOperationAction(ISD::CTTZ_ZERO_UNDEF, MVT::v8i64, Custom);
1518 setOperationAction(ISD::CTTZ_ZERO_UNDEF, MVT::v16i32, Custom);
1520 if (Subtarget->hasVLX() && Subtarget->hasCDI()) {
1521 setOperationAction(ISD::CTLZ, MVT::v4i64, Legal);
1522 setOperationAction(ISD::CTLZ, MVT::v8i32, Legal);
1523 setOperationAction(ISD::CTLZ, MVT::v2i64, Legal);
1524 setOperationAction(ISD::CTLZ, MVT::v4i32, Legal);
1525 setOperationAction(ISD::CTLZ_ZERO_UNDEF, MVT::v4i64, Legal);
1526 setOperationAction(ISD::CTLZ_ZERO_UNDEF, MVT::v8i32, Legal);
1527 setOperationAction(ISD::CTLZ_ZERO_UNDEF, MVT::v2i64, Legal);
1528 setOperationAction(ISD::CTLZ_ZERO_UNDEF, MVT::v4i32, Legal);
1530 setOperationAction(ISD::CTTZ_ZERO_UNDEF, MVT::v4i64, Custom);
1531 setOperationAction(ISD::CTTZ_ZERO_UNDEF, MVT::v8i32, Custom);
1532 setOperationAction(ISD::CTTZ_ZERO_UNDEF, MVT::v2i64, Custom);
1533 setOperationAction(ISD::CTTZ_ZERO_UNDEF, MVT::v4i32, Custom);
1535 if (Subtarget->hasDQI()) {
1536 setOperationAction(ISD::MUL, MVT::v2i64, Legal);
1537 setOperationAction(ISD::MUL, MVT::v4i64, Legal);
1538 setOperationAction(ISD::MUL, MVT::v8i64, Legal);
1540 // Custom lower several nodes.
1541 for (MVT VT : MVT::vector_valuetypes()) {
1542 unsigned EltSize = VT.getVectorElementType().getSizeInBits();
1544 setOperationAction(ISD::AND, VT, Legal);
1545 setOperationAction(ISD::OR, VT, Legal);
1546 setOperationAction(ISD::XOR, VT, Legal);
1548 if (EltSize >= 32 && VT.getSizeInBits() <= 512) {
1549 setOperationAction(ISD::MGATHER, VT, Custom);
1550 setOperationAction(ISD::MSCATTER, VT, Custom);
1552 // Extract subvector is special because the value type
1553 // (result) is 256/128-bit but the source is 512-bit wide.
1554 if (VT.is128BitVector() || VT.is256BitVector()) {
1555 setOperationAction(ISD::EXTRACT_SUBVECTOR, VT, Custom);
1557 if (VT.getVectorElementType() == MVT::i1)
1558 setOperationAction(ISD::EXTRACT_SUBVECTOR, VT, Legal);
1560 // Do not attempt to custom lower other non-512-bit vectors
1561 if (!VT.is512BitVector())
1564 if (EltSize >= 32) {
1565 setOperationAction(ISD::VECTOR_SHUFFLE, VT, Custom);
1566 setOperationAction(ISD::INSERT_VECTOR_ELT, VT, Custom);
1567 setOperationAction(ISD::BUILD_VECTOR, VT, Custom);
1568 setOperationAction(ISD::VSELECT, VT, Legal);
1569 setOperationAction(ISD::EXTRACT_VECTOR_ELT, VT, Custom);
1570 setOperationAction(ISD::SCALAR_TO_VECTOR, VT, Custom);
1571 setOperationAction(ISD::INSERT_SUBVECTOR, VT, Custom);
1572 setOperationAction(ISD::MLOAD, VT, Legal);
1573 setOperationAction(ISD::MSTORE, VT, Legal);
1576 for (int i = MVT::v32i8; i != MVT::v8i64; ++i) {
1577 MVT VT = (MVT::SimpleValueType)i;
1579 // Do not attempt to promote non-512-bit vectors.
1580 if (!VT.is512BitVector())
1583 setOperationAction(ISD::SELECT, VT, Promote);
1584 AddPromotedToType (ISD::SELECT, VT, MVT::v8i64);
1588 if (!Subtarget->useSoftFloat() && Subtarget->hasBWI()) {
1589 addRegisterClass(MVT::v32i16, &X86::VR512RegClass);
1590 addRegisterClass(MVT::v64i8, &X86::VR512RegClass);
1592 addRegisterClass(MVT::v32i1, &X86::VK32RegClass);
1593 addRegisterClass(MVT::v64i1, &X86::VK64RegClass);
1595 setOperationAction(ISD::LOAD, MVT::v32i16, Legal);
1596 setOperationAction(ISD::LOAD, MVT::v64i8, Legal);
1597 setOperationAction(ISD::SETCC, MVT::v32i1, Custom);
1598 setOperationAction(ISD::SETCC, MVT::v64i1, Custom);
1599 setOperationAction(ISD::ADD, MVT::v32i16, Legal);
1600 setOperationAction(ISD::ADD, MVT::v64i8, Legal);
1601 setOperationAction(ISD::SUB, MVT::v32i16, Legal);
1602 setOperationAction(ISD::SUB, MVT::v64i8, Legal);
1603 setOperationAction(ISD::MUL, MVT::v32i16, Legal);
1604 setOperationAction(ISD::MULHS, MVT::v32i16, Legal);
1605 setOperationAction(ISD::MULHU, MVT::v32i16, Legal);
1606 setOperationAction(ISD::CONCAT_VECTORS, MVT::v32i1, Legal);
1607 setOperationAction(ISD::CONCAT_VECTORS, MVT::v64i1, Legal);
1608 setOperationAction(ISD::INSERT_SUBVECTOR, MVT::v32i1, Custom);
1609 setOperationAction(ISD::INSERT_SUBVECTOR, MVT::v64i1, Custom);
1610 setOperationAction(ISD::INSERT_SUBVECTOR, MVT::v32i16, Custom);
1611 setOperationAction(ISD::INSERT_SUBVECTOR, MVT::v64i8, Custom);
1612 setOperationAction(ISD::EXTRACT_VECTOR_ELT, MVT::v32i16, Custom);
1613 setOperationAction(ISD::EXTRACT_VECTOR_ELT, MVT::v64i8, Custom);
1614 setOperationAction(ISD::SELECT, MVT::v32i1, Custom);
1615 setOperationAction(ISD::SELECT, MVT::v64i1, Custom);
1616 setOperationAction(ISD::SIGN_EXTEND, MVT::v32i8, Custom);
1617 setOperationAction(ISD::ZERO_EXTEND, MVT::v32i8, Custom);
1618 setOperationAction(ISD::SIGN_EXTEND, MVT::v32i16, Custom);
1619 setOperationAction(ISD::ZERO_EXTEND, MVT::v32i16, Custom);
1620 setOperationAction(ISD::VECTOR_SHUFFLE, MVT::v32i16, Custom);
1621 setOperationAction(ISD::SIGN_EXTEND, MVT::v64i8, Custom);
1622 setOperationAction(ISD::ZERO_EXTEND, MVT::v64i8, Custom);
1623 setOperationAction(ISD::INSERT_VECTOR_ELT, MVT::v32i1, Custom);
1624 setOperationAction(ISD::INSERT_VECTOR_ELT, MVT::v64i1, Custom);
1625 setOperationAction(ISD::INSERT_VECTOR_ELT, MVT::v32i16, Custom);
1626 setOperationAction(ISD::INSERT_VECTOR_ELT, MVT::v64i8, Custom);
1627 setOperationAction(ISD::VSELECT, MVT::v32i16, Legal);
1628 setOperationAction(ISD::VSELECT, MVT::v64i8, Legal);
1629 setOperationAction(ISD::TRUNCATE, MVT::v32i1, Custom);
1630 setOperationAction(ISD::TRUNCATE, MVT::v64i1, Custom);
1631 setOperationAction(ISD::TRUNCATE, MVT::v32i8, Custom);
1632 setOperationAction(ISD::VECTOR_SHUFFLE, MVT::v32i1, Custom);
1633 setOperationAction(ISD::VECTOR_SHUFFLE, MVT::v64i1, Custom);
1635 setOperationAction(ISD::SMAX, MVT::v64i8, Legal);
1636 setOperationAction(ISD::SMAX, MVT::v32i16, Legal);
1637 setOperationAction(ISD::UMAX, MVT::v64i8, Legal);
1638 setOperationAction(ISD::UMAX, MVT::v32i16, Legal);
1639 setOperationAction(ISD::SMIN, MVT::v64i8, Legal);
1640 setOperationAction(ISD::SMIN, MVT::v32i16, Legal);
1641 setOperationAction(ISD::UMIN, MVT::v64i8, Legal);
1642 setOperationAction(ISD::UMIN, MVT::v32i16, Legal);
1644 setTruncStoreAction(MVT::v32i16, MVT::v32i8, Legal);
1645 setTruncStoreAction(MVT::v16i16, MVT::v16i8, Legal);
1646 if (Subtarget->hasVLX())
1647 setTruncStoreAction(MVT::v8i16, MVT::v8i8, Legal);
1649 for (int i = MVT::v32i8; i != MVT::v8i64; ++i) {
1650 const MVT VT = (MVT::SimpleValueType)i;
1652 const unsigned EltSize = VT.getVectorElementType().getSizeInBits();
1654 // Do not attempt to promote non-512-bit vectors.
1655 if (!VT.is512BitVector())
1659 setOperationAction(ISD::BUILD_VECTOR, VT, Custom);
1660 setOperationAction(ISD::VSELECT, VT, Legal);
1665 if (!Subtarget->useSoftFloat() && Subtarget->hasVLX()) {
1666 addRegisterClass(MVT::v4i1, &X86::VK4RegClass);
1667 addRegisterClass(MVT::v2i1, &X86::VK2RegClass);
1669 setOperationAction(ISD::SETCC, MVT::v4i1, Custom);
1670 setOperationAction(ISD::SETCC, MVT::v2i1, Custom);
1671 setOperationAction(ISD::CONCAT_VECTORS, MVT::v4i1, Custom);
1672 setOperationAction(ISD::CONCAT_VECTORS, MVT::v8i1, Custom);
1673 setOperationAction(ISD::INSERT_SUBVECTOR, MVT::v8i1, Custom);
1674 setOperationAction(ISD::INSERT_SUBVECTOR, MVT::v4i1, Custom);
1675 setOperationAction(ISD::SELECT, MVT::v4i1, Custom);
1676 setOperationAction(ISD::SELECT, MVT::v2i1, Custom);
1677 setOperationAction(ISD::BUILD_VECTOR, MVT::v4i1, Custom);
1678 setOperationAction(ISD::BUILD_VECTOR, MVT::v2i1, Custom);
1679 setOperationAction(ISD::VECTOR_SHUFFLE, MVT::v2i1, Custom);
1680 setOperationAction(ISD::VECTOR_SHUFFLE, MVT::v4i1, Custom);
1682 setOperationAction(ISD::AND, MVT::v8i32, Legal);
1683 setOperationAction(ISD::OR, MVT::v8i32, Legal);
1684 setOperationAction(ISD::XOR, MVT::v8i32, Legal);
1685 setOperationAction(ISD::AND, MVT::v4i32, Legal);
1686 setOperationAction(ISD::OR, MVT::v4i32, Legal);
1687 setOperationAction(ISD::XOR, MVT::v4i32, Legal);
1688 setOperationAction(ISD::SRA, MVT::v2i64, Custom);
1689 setOperationAction(ISD::SRA, MVT::v4i64, Custom);
1691 setOperationAction(ISD::SMAX, MVT::v2i64, Legal);
1692 setOperationAction(ISD::SMAX, MVT::v4i64, Legal);
1693 setOperationAction(ISD::UMAX, MVT::v2i64, Legal);
1694 setOperationAction(ISD::UMAX, MVT::v4i64, Legal);
1695 setOperationAction(ISD::SMIN, MVT::v2i64, Legal);
1696 setOperationAction(ISD::SMIN, MVT::v4i64, Legal);
1697 setOperationAction(ISD::UMIN, MVT::v2i64, Legal);
1698 setOperationAction(ISD::UMIN, MVT::v4i64, Legal);
1701 // We want to custom lower some of our intrinsics.
1702 setOperationAction(ISD::INTRINSIC_WO_CHAIN, MVT::Other, Custom);
1703 setOperationAction(ISD::INTRINSIC_W_CHAIN, MVT::Other, Custom);
1704 setOperationAction(ISD::INTRINSIC_VOID, MVT::Other, Custom);
1705 if (!Subtarget->is64Bit())
1706 setOperationAction(ISD::INTRINSIC_W_CHAIN, MVT::i64, Custom);
1708 // Only custom-lower 64-bit SADDO and friends on 64-bit because we don't
1709 // handle type legalization for these operations here.
1711 // FIXME: We really should do custom legalization for addition and
1712 // subtraction on x86-32 once PR3203 is fixed. We really can't do much better
1713 // than generic legalization for 64-bit multiplication-with-overflow, though.
1714 for (unsigned i = 0, e = 3+Subtarget->is64Bit(); i != e; ++i) {
1715 // Add/Sub/Mul with overflow operations are custom lowered.
1717 setOperationAction(ISD::SADDO, VT, Custom);
1718 setOperationAction(ISD::UADDO, VT, Custom);
1719 setOperationAction(ISD::SSUBO, VT, Custom);
1720 setOperationAction(ISD::USUBO, VT, Custom);
1721 setOperationAction(ISD::SMULO, VT, Custom);
1722 setOperationAction(ISD::UMULO, VT, Custom);
1725 if (!Subtarget->is64Bit()) {
1726 // These libcalls are not available in 32-bit.
1727 setLibcallName(RTLIB::SHL_I128, nullptr);
1728 setLibcallName(RTLIB::SRL_I128, nullptr);
1729 setLibcallName(RTLIB::SRA_I128, nullptr);
1732 // Combine sin / cos into one node or libcall if possible.
1733 if (Subtarget->hasSinCos()) {
1734 setLibcallName(RTLIB::SINCOS_F32, "sincosf");
1735 setLibcallName(RTLIB::SINCOS_F64, "sincos");
1736 if (Subtarget->isTargetDarwin()) {
1737 // For MacOSX, we don't want the normal expansion of a libcall to sincos.
1738 // We want to issue a libcall to __sincos_stret to avoid memory traffic.
1739 setOperationAction(ISD::FSINCOS, MVT::f64, Custom);
1740 setOperationAction(ISD::FSINCOS, MVT::f32, Custom);
1744 if (Subtarget->isTargetWin64()) {
1745 setOperationAction(ISD::SDIV, MVT::i128, Custom);
1746 setOperationAction(ISD::UDIV, MVT::i128, Custom);
1747 setOperationAction(ISD::SREM, MVT::i128, Custom);
1748 setOperationAction(ISD::UREM, MVT::i128, Custom);
1749 setOperationAction(ISD::SDIVREM, MVT::i128, Custom);
1750 setOperationAction(ISD::UDIVREM, MVT::i128, Custom);
1753 // We have target-specific dag combine patterns for the following nodes:
1754 setTargetDAGCombine(ISD::VECTOR_SHUFFLE);
1755 setTargetDAGCombine(ISD::EXTRACT_VECTOR_ELT);
1756 setTargetDAGCombine(ISD::BITCAST);
1757 setTargetDAGCombine(ISD::VSELECT);
1758 setTargetDAGCombine(ISD::SELECT);
1759 setTargetDAGCombine(ISD::SHL);
1760 setTargetDAGCombine(ISD::SRA);
1761 setTargetDAGCombine(ISD::SRL);
1762 setTargetDAGCombine(ISD::OR);
1763 setTargetDAGCombine(ISD::AND);
1764 setTargetDAGCombine(ISD::ADD);
1765 setTargetDAGCombine(ISD::FADD);
1766 setTargetDAGCombine(ISD::FSUB);
1767 setTargetDAGCombine(ISD::FMA);
1768 setTargetDAGCombine(ISD::SUB);
1769 setTargetDAGCombine(ISD::LOAD);
1770 setTargetDAGCombine(ISD::MLOAD);
1771 setTargetDAGCombine(ISD::STORE);
1772 setTargetDAGCombine(ISD::MSTORE);
1773 setTargetDAGCombine(ISD::ZERO_EXTEND);
1774 setTargetDAGCombine(ISD::ANY_EXTEND);
1775 setTargetDAGCombine(ISD::SIGN_EXTEND);
1776 setTargetDAGCombine(ISD::SIGN_EXTEND_INREG);
1777 setTargetDAGCombine(ISD::SINT_TO_FP);
1778 setTargetDAGCombine(ISD::UINT_TO_FP);
1779 setTargetDAGCombine(ISD::SETCC);
1780 setTargetDAGCombine(ISD::BUILD_VECTOR);
1781 setTargetDAGCombine(ISD::MUL);
1782 setTargetDAGCombine(ISD::XOR);
1784 computeRegisterProperties(Subtarget->getRegisterInfo());
1786 MaxStoresPerMemset = 16; // For @llvm.memset -> sequence of stores
1787 MaxStoresPerMemsetOptSize = 8;
1788 MaxStoresPerMemcpy = 8; // For @llvm.memcpy -> sequence of stores
1789 MaxStoresPerMemcpyOptSize = 4;
1790 MaxStoresPerMemmove = 8; // For @llvm.memmove -> sequence of stores
1791 MaxStoresPerMemmoveOptSize = 4;
1792 setPrefLoopAlignment(4); // 2^4 bytes.
1794 // A predictable cmov does not hurt on an in-order CPU.
1795 // FIXME: Use a CPU attribute to trigger this, not a CPU model.
1796 PredictableSelectIsExpensive = !Subtarget->isAtom();
1797 EnableExtLdPromotion = true;
1798 setPrefFunctionAlignment(4); // 2^4 bytes.
1800 verifyIntrinsicTables();
1803 // This has so far only been implemented for 64-bit MachO.
1804 bool X86TargetLowering::useLoadStackGuardNode() const {
1805 return Subtarget->isTargetMachO() && Subtarget->is64Bit();
1808 TargetLoweringBase::LegalizeTypeAction
1809 X86TargetLowering::getPreferredVectorAction(EVT VT) const {
1810 if (ExperimentalVectorWideningLegalization &&
1811 VT.getVectorNumElements() != 1 &&
1812 VT.getVectorElementType().getSimpleVT() != MVT::i1)
1813 return TypeWidenVector;
1815 return TargetLoweringBase::getPreferredVectorAction(VT);
1818 EVT X86TargetLowering::getSetCCResultType(const DataLayout &DL, LLVMContext &,
1821 return Subtarget->hasAVX512() ? MVT::i1: MVT::i8;
1823 const unsigned NumElts = VT.getVectorNumElements();
1824 const EVT EltVT = VT.getVectorElementType();
1825 if (VT.is512BitVector()) {
1826 if (Subtarget->hasAVX512())
1827 if (EltVT == MVT::i32 || EltVT == MVT::i64 ||
1828 EltVT == MVT::f32 || EltVT == MVT::f64)
1830 case 8: return MVT::v8i1;
1831 case 16: return MVT::v16i1;
1833 if (Subtarget->hasBWI())
1834 if (EltVT == MVT::i8 || EltVT == MVT::i16)
1836 case 32: return MVT::v32i1;
1837 case 64: return MVT::v64i1;
1841 if (VT.is256BitVector() || VT.is128BitVector()) {
1842 if (Subtarget->hasVLX())
1843 if (EltVT == MVT::i32 || EltVT == MVT::i64 ||
1844 EltVT == MVT::f32 || EltVT == MVT::f64)
1846 case 2: return MVT::v2i1;
1847 case 4: return MVT::v4i1;
1848 case 8: return MVT::v8i1;
1850 if (Subtarget->hasBWI() && Subtarget->hasVLX())
1851 if (EltVT == MVT::i8 || EltVT == MVT::i16)
1853 case 8: return MVT::v8i1;
1854 case 16: return MVT::v16i1;
1855 case 32: return MVT::v32i1;
1859 return VT.changeVectorElementTypeToInteger();
1862 /// Helper for getByValTypeAlignment to determine
1863 /// the desired ByVal argument alignment.
1864 static void getMaxByValAlign(Type *Ty, unsigned &MaxAlign) {
1867 if (VectorType *VTy = dyn_cast<VectorType>(Ty)) {
1868 if (VTy->getBitWidth() == 128)
1870 } else if (ArrayType *ATy = dyn_cast<ArrayType>(Ty)) {
1871 unsigned EltAlign = 0;
1872 getMaxByValAlign(ATy->getElementType(), EltAlign);
1873 if (EltAlign > MaxAlign)
1874 MaxAlign = EltAlign;
1875 } else if (StructType *STy = dyn_cast<StructType>(Ty)) {
1876 for (auto *EltTy : STy->elements()) {
1877 unsigned EltAlign = 0;
1878 getMaxByValAlign(EltTy, EltAlign);
1879 if (EltAlign > MaxAlign)
1880 MaxAlign = EltAlign;
1887 /// Return the desired alignment for ByVal aggregate
1888 /// function arguments in the caller parameter area. For X86, aggregates
1889 /// that contain SSE vectors are placed at 16-byte boundaries while the rest
1890 /// are at 4-byte boundaries.
1891 unsigned X86TargetLowering::getByValTypeAlignment(Type *Ty,
1892 const DataLayout &DL) const {
1893 if (Subtarget->is64Bit()) {
1894 // Max of 8 and alignment of type.
1895 unsigned TyAlign = DL.getABITypeAlignment(Ty);
1902 if (Subtarget->hasSSE1())
1903 getMaxByValAlign(Ty, Align);
1907 /// Returns the target specific optimal type for load
1908 /// and store operations as a result of memset, memcpy, and memmove
1909 /// lowering. If DstAlign is zero that means it's safe to destination
1910 /// alignment can satisfy any constraint. Similarly if SrcAlign is zero it
1911 /// means there isn't a need to check it against alignment requirement,
1912 /// probably because the source does not need to be loaded. If 'IsMemset' is
1913 /// true, that means it's expanding a memset. If 'ZeroMemset' is true, that
1914 /// means it's a memset of zero. 'MemcpyStrSrc' indicates whether the memcpy
1915 /// source is constant so it does not need to be loaded.
1916 /// It returns EVT::Other if the type should be determined using generic
1917 /// target-independent logic.
1919 X86TargetLowering::getOptimalMemOpType(uint64_t Size,
1920 unsigned DstAlign, unsigned SrcAlign,
1921 bool IsMemset, bool ZeroMemset,
1923 MachineFunction &MF) const {
1924 const Function *F = MF.getFunction();
1925 if ((!IsMemset || ZeroMemset) &&
1926 !F->hasFnAttribute(Attribute::NoImplicitFloat)) {
1928 (!Subtarget->isUnalignedMem16Slow() ||
1929 ((DstAlign == 0 || DstAlign >= 16) &&
1930 (SrcAlign == 0 || SrcAlign >= 16)))) {
1932 // FIXME: Check if unaligned 32-byte accesses are slow.
1933 if (Subtarget->hasInt256())
1935 if (Subtarget->hasFp256())
1938 if (Subtarget->hasSSE2())
1940 if (Subtarget->hasSSE1())
1942 } else if (!MemcpyStrSrc && Size >= 8 &&
1943 !Subtarget->is64Bit() &&
1944 Subtarget->hasSSE2()) {
1945 // Do not use f64 to lower memcpy if source is string constant. It's
1946 // better to use i32 to avoid the loads.
1950 // This is a compromise. If we reach here, unaligned accesses may be slow on
1951 // this target. However, creating smaller, aligned accesses could be even
1952 // slower and would certainly be a lot more code.
1953 if (Subtarget->is64Bit() && Size >= 8)
1958 bool X86TargetLowering::isSafeMemOpType(MVT VT) const {
1960 return X86ScalarSSEf32;
1961 else if (VT == MVT::f64)
1962 return X86ScalarSSEf64;
1967 X86TargetLowering::allowsMisalignedMemoryAccesses(EVT VT,
1972 switch (VT.getSizeInBits()) {
1974 // 8-byte and under are always assumed to be fast.
1978 *Fast = !Subtarget->isUnalignedMem16Slow();
1981 *Fast = !Subtarget->isUnalignedMem32Slow();
1983 // TODO: What about AVX-512 (512-bit) accesses?
1986 // Misaligned accesses of any size are always allowed.
1990 /// Return the entry encoding for a jump table in the
1991 /// current function. The returned value is a member of the
1992 /// MachineJumpTableInfo::JTEntryKind enum.
1993 unsigned X86TargetLowering::getJumpTableEncoding() const {
1994 // In GOT pic mode, each entry in the jump table is emitted as a @GOTOFF
1996 if (getTargetMachine().getRelocationModel() == Reloc::PIC_ &&
1997 Subtarget->isPICStyleGOT())
1998 return MachineJumpTableInfo::EK_Custom32;
2000 // Otherwise, use the normal jump table encoding heuristics.
2001 return TargetLowering::getJumpTableEncoding();
2004 bool X86TargetLowering::useSoftFloat() const {
2005 return Subtarget->useSoftFloat();
2009 X86TargetLowering::LowerCustomJumpTableEntry(const MachineJumpTableInfo *MJTI,
2010 const MachineBasicBlock *MBB,
2011 unsigned uid,MCContext &Ctx) const{
2012 assert(MBB->getParent()->getTarget().getRelocationModel() == Reloc::PIC_ &&
2013 Subtarget->isPICStyleGOT());
2014 // In 32-bit ELF systems, our jump table entries are formed with @GOTOFF
2016 return MCSymbolRefExpr::create(MBB->getSymbol(),
2017 MCSymbolRefExpr::VK_GOTOFF, Ctx);
2020 /// Returns relocation base for the given PIC jumptable.
2021 SDValue X86TargetLowering::getPICJumpTableRelocBase(SDValue Table,
2022 SelectionDAG &DAG) const {
2023 if (!Subtarget->is64Bit())
2024 // This doesn't have SDLoc associated with it, but is not really the
2025 // same as a Register.
2026 return DAG.getNode(X86ISD::GlobalBaseReg, SDLoc(),
2027 getPointerTy(DAG.getDataLayout()));
2031 /// This returns the relocation base for the given PIC jumptable,
2032 /// the same as getPICJumpTableRelocBase, but as an MCExpr.
2033 const MCExpr *X86TargetLowering::
2034 getPICJumpTableRelocBaseExpr(const MachineFunction *MF, unsigned JTI,
2035 MCContext &Ctx) const {
2036 // X86-64 uses RIP relative addressing based on the jump table label.
2037 if (Subtarget->isPICStyleRIPRel())
2038 return TargetLowering::getPICJumpTableRelocBaseExpr(MF, JTI, Ctx);
2040 // Otherwise, the reference is relative to the PIC base.
2041 return MCSymbolRefExpr::create(MF->getPICBaseSymbol(), Ctx);
2044 std::pair<const TargetRegisterClass *, uint8_t>
2045 X86TargetLowering::findRepresentativeClass(const TargetRegisterInfo *TRI,
2047 const TargetRegisterClass *RRC = nullptr;
2049 switch (VT.SimpleTy) {
2051 return TargetLowering::findRepresentativeClass(TRI, VT);
2052 case MVT::i8: case MVT::i16: case MVT::i32: case MVT::i64:
2053 RRC = Subtarget->is64Bit() ? &X86::GR64RegClass : &X86::GR32RegClass;
2056 RRC = &X86::VR64RegClass;
2058 case MVT::f32: case MVT::f64:
2059 case MVT::v16i8: case MVT::v8i16: case MVT::v4i32: case MVT::v2i64:
2060 case MVT::v4f32: case MVT::v2f64:
2061 case MVT::v32i8: case MVT::v8i32: case MVT::v4i64: case MVT::v8f32:
2063 RRC = &X86::VR128RegClass;
2066 return std::make_pair(RRC, Cost);
2069 bool X86TargetLowering::getStackCookieLocation(unsigned &AddressSpace,
2070 unsigned &Offset) const {
2071 if (!Subtarget->isTargetLinux())
2074 if (Subtarget->is64Bit()) {
2075 // %fs:0x28, unless we're using a Kernel code model, in which case it's %gs:
2077 if (getTargetMachine().getCodeModel() == CodeModel::Kernel)
2089 /// Android provides a fixed TLS slot for the SafeStack pointer.
2090 /// See the definition of TLS_SLOT_SAFESTACK in
2091 /// https://android.googlesource.com/platform/bionic/+/master/libc/private/bionic_tls.h
2092 bool X86TargetLowering::getSafeStackPointerLocation(unsigned &AddressSpace,
2093 unsigned &Offset) const {
2094 if (!Subtarget->isTargetAndroid())
2097 if (Subtarget->is64Bit()) {
2098 // %fs:0x48, unless we're using a Kernel code model, in which case it's %gs:
2100 if (getTargetMachine().getCodeModel() == CodeModel::Kernel)
2112 bool X86TargetLowering::isNoopAddrSpaceCast(unsigned SrcAS,
2113 unsigned DestAS) const {
2114 assert(SrcAS != DestAS && "Expected different address spaces!");
2116 return SrcAS < 256 && DestAS < 256;
2119 //===----------------------------------------------------------------------===//
2120 // Return Value Calling Convention Implementation
2121 //===----------------------------------------------------------------------===//
2123 #include "X86GenCallingConv.inc"
2125 bool X86TargetLowering::CanLowerReturn(
2126 CallingConv::ID CallConv, MachineFunction &MF, bool isVarArg,
2127 const SmallVectorImpl<ISD::OutputArg> &Outs, LLVMContext &Context) const {
2128 SmallVector<CCValAssign, 16> RVLocs;
2129 CCState CCInfo(CallConv, isVarArg, MF, RVLocs, Context);
2130 return CCInfo.CheckReturn(Outs, RetCC_X86);
2133 const MCPhysReg *X86TargetLowering::getScratchRegisters(CallingConv::ID) const {
2134 static const MCPhysReg ScratchRegs[] = { X86::R11, 0 };
2139 X86TargetLowering::LowerReturn(SDValue Chain,
2140 CallingConv::ID CallConv, bool isVarArg,
2141 const SmallVectorImpl<ISD::OutputArg> &Outs,
2142 const SmallVectorImpl<SDValue> &OutVals,
2143 SDLoc dl, SelectionDAG &DAG) const {
2144 MachineFunction &MF = DAG.getMachineFunction();
2145 X86MachineFunctionInfo *FuncInfo = MF.getInfo<X86MachineFunctionInfo>();
2147 SmallVector<CCValAssign, 16> RVLocs;
2148 CCState CCInfo(CallConv, isVarArg, MF, RVLocs, *DAG.getContext());
2149 CCInfo.AnalyzeReturn(Outs, RetCC_X86);
2152 SmallVector<SDValue, 6> RetOps;
2153 RetOps.push_back(Chain); // Operand #0 = Chain (updated below)
2154 // Operand #1 = Bytes To Pop
2155 RetOps.push_back(DAG.getTargetConstant(FuncInfo->getBytesToPopOnReturn(), dl,
2158 // Copy the result values into the output registers.
2159 for (unsigned i = 0; i != RVLocs.size(); ++i) {
2160 CCValAssign &VA = RVLocs[i];
2161 assert(VA.isRegLoc() && "Can only return in registers!");
2162 SDValue ValToCopy = OutVals[i];
2163 EVT ValVT = ValToCopy.getValueType();
2165 // Promote values to the appropriate types.
2166 if (VA.getLocInfo() == CCValAssign::SExt)
2167 ValToCopy = DAG.getNode(ISD::SIGN_EXTEND, dl, VA.getLocVT(), ValToCopy);
2168 else if (VA.getLocInfo() == CCValAssign::ZExt)
2169 ValToCopy = DAG.getNode(ISD::ZERO_EXTEND, dl, VA.getLocVT(), ValToCopy);
2170 else if (VA.getLocInfo() == CCValAssign::AExt) {
2171 if (ValVT.isVector() && ValVT.getScalarType() == MVT::i1)
2172 ValToCopy = DAG.getNode(ISD::SIGN_EXTEND, dl, VA.getLocVT(), ValToCopy);
2174 ValToCopy = DAG.getNode(ISD::ANY_EXTEND, dl, VA.getLocVT(), ValToCopy);
2176 else if (VA.getLocInfo() == CCValAssign::BCvt)
2177 ValToCopy = DAG.getBitcast(VA.getLocVT(), ValToCopy);
2179 assert(VA.getLocInfo() != CCValAssign::FPExt &&
2180 "Unexpected FP-extend for return value.");
2182 // If this is x86-64, and we disabled SSE, we can't return FP values,
2183 // or SSE or MMX vectors.
2184 if ((ValVT == MVT::f32 || ValVT == MVT::f64 ||
2185 VA.getLocReg() == X86::XMM0 || VA.getLocReg() == X86::XMM1) &&
2186 (Subtarget->is64Bit() && !Subtarget->hasSSE1())) {
2187 report_fatal_error("SSE register return with SSE disabled");
2189 // Likewise we can't return F64 values with SSE1 only. gcc does so, but
2190 // llvm-gcc has never done it right and no one has noticed, so this
2191 // should be OK for now.
2192 if (ValVT == MVT::f64 &&
2193 (Subtarget->is64Bit() && !Subtarget->hasSSE2()))
2194 report_fatal_error("SSE2 register return with SSE2 disabled");
2196 // Returns in ST0/ST1 are handled specially: these are pushed as operands to
2197 // the RET instruction and handled by the FP Stackifier.
2198 if (VA.getLocReg() == X86::FP0 ||
2199 VA.getLocReg() == X86::FP1) {
2200 // If this is a copy from an xmm register to ST(0), use an FPExtend to
2201 // change the value to the FP stack register class.
2202 if (isScalarFPTypeInSSEReg(VA.getValVT()))
2203 ValToCopy = DAG.getNode(ISD::FP_EXTEND, dl, MVT::f80, ValToCopy);
2204 RetOps.push_back(ValToCopy);
2205 // Don't emit a copytoreg.
2209 // 64-bit vector (MMX) values are returned in XMM0 / XMM1 except for v1i64
2210 // which is returned in RAX / RDX.
2211 if (Subtarget->is64Bit()) {
2212 if (ValVT == MVT::x86mmx) {
2213 if (VA.getLocReg() == X86::XMM0 || VA.getLocReg() == X86::XMM1) {
2214 ValToCopy = DAG.getBitcast(MVT::i64, ValToCopy);
2215 ValToCopy = DAG.getNode(ISD::SCALAR_TO_VECTOR, dl, MVT::v2i64,
2217 // If we don't have SSE2 available, convert to v4f32 so the generated
2218 // register is legal.
2219 if (!Subtarget->hasSSE2())
2220 ValToCopy = DAG.getBitcast(MVT::v4f32, ValToCopy);
2225 Chain = DAG.getCopyToReg(Chain, dl, VA.getLocReg(), ValToCopy, Flag);
2226 Flag = Chain.getValue(1);
2227 RetOps.push_back(DAG.getRegister(VA.getLocReg(), VA.getLocVT()));
2230 // All x86 ABIs require that for returning structs by value we copy
2231 // the sret argument into %rax/%eax (depending on ABI) for the return.
2232 // We saved the argument into a virtual register in the entry block,
2233 // so now we copy the value out and into %rax/%eax.
2235 // Checking Function.hasStructRetAttr() here is insufficient because the IR
2236 // may not have an explicit sret argument. If FuncInfo.CanLowerReturn is
2237 // false, then an sret argument may be implicitly inserted in the SelDAG. In
2238 // either case FuncInfo->setSRetReturnReg() will have been called.
2239 if (unsigned SRetReg = FuncInfo->getSRetReturnReg()) {
2240 SDValue Val = DAG.getCopyFromReg(Chain, dl, SRetReg,
2241 getPointerTy(MF.getDataLayout()));
2244 = (Subtarget->is64Bit() && !Subtarget->isTarget64BitILP32()) ?
2245 X86::RAX : X86::EAX;
2246 Chain = DAG.getCopyToReg(Chain, dl, RetValReg, Val, Flag);
2247 Flag = Chain.getValue(1);
2249 // RAX/EAX now acts like a return value.
2251 DAG.getRegister(RetValReg, getPointerTy(DAG.getDataLayout())));
2254 RetOps[0] = Chain; // Update chain.
2256 // Add the flag if we have it.
2258 RetOps.push_back(Flag);
2260 return DAG.getNode(X86ISD::RET_FLAG, dl, MVT::Other, RetOps);
2263 bool X86TargetLowering::isUsedByReturnOnly(SDNode *N, SDValue &Chain) const {
2264 if (N->getNumValues() != 1)
2266 if (!N->hasNUsesOfValue(1, 0))
2269 SDValue TCChain = Chain;
2270 SDNode *Copy = *N->use_begin();
2271 if (Copy->getOpcode() == ISD::CopyToReg) {
2272 // If the copy has a glue operand, we conservatively assume it isn't safe to
2273 // perform a tail call.
2274 if (Copy->getOperand(Copy->getNumOperands()-1).getValueType() == MVT::Glue)
2276 TCChain = Copy->getOperand(0);
2277 } else if (Copy->getOpcode() != ISD::FP_EXTEND)
2280 bool HasRet = false;
2281 for (SDNode::use_iterator UI = Copy->use_begin(), UE = Copy->use_end();
2283 if (UI->getOpcode() != X86ISD::RET_FLAG)
2285 // If we are returning more than one value, we can definitely
2286 // not make a tail call see PR19530
2287 if (UI->getNumOperands() > 4)
2289 if (UI->getNumOperands() == 4 &&
2290 UI->getOperand(UI->getNumOperands()-1).getValueType() != MVT::Glue)
2303 X86TargetLowering::getTypeForExtArgOrReturn(LLVMContext &Context, EVT VT,
2304 ISD::NodeType ExtendKind) const {
2306 // TODO: Is this also valid on 32-bit?
2307 if (Subtarget->is64Bit() && VT == MVT::i1 && ExtendKind == ISD::ZERO_EXTEND)
2308 ReturnMVT = MVT::i8;
2310 ReturnMVT = MVT::i32;
2312 EVT MinVT = getRegisterType(Context, ReturnMVT);
2313 return VT.bitsLT(MinVT) ? MinVT : VT;
2316 /// Lower the result values of a call into the
2317 /// appropriate copies out of appropriate physical registers.
2320 X86TargetLowering::LowerCallResult(SDValue Chain, SDValue InFlag,
2321 CallingConv::ID CallConv, bool isVarArg,
2322 const SmallVectorImpl<ISD::InputArg> &Ins,
2323 SDLoc dl, SelectionDAG &DAG,
2324 SmallVectorImpl<SDValue> &InVals) const {
2326 // Assign locations to each value returned by this call.
2327 SmallVector<CCValAssign, 16> RVLocs;
2328 bool Is64Bit = Subtarget->is64Bit();
2329 CCState CCInfo(CallConv, isVarArg, DAG.getMachineFunction(), RVLocs,
2331 CCInfo.AnalyzeCallResult(Ins, RetCC_X86);
2333 // Copy all of the result registers out of their specified physreg.
2334 for (unsigned i = 0, e = RVLocs.size(); i != e; ++i) {
2335 CCValAssign &VA = RVLocs[i];
2336 EVT CopyVT = VA.getLocVT();
2338 // If this is x86-64, and we disabled SSE, we can't return FP values
2339 if ((CopyVT == MVT::f32 || CopyVT == MVT::f64) &&
2340 ((Is64Bit || Ins[i].Flags.isInReg()) && !Subtarget->hasSSE1())) {
2341 report_fatal_error("SSE register return with SSE disabled");
2344 // If we prefer to use the value in xmm registers, copy it out as f80 and
2345 // use a truncate to move it from fp stack reg to xmm reg.
2346 bool RoundAfterCopy = false;
2347 if ((VA.getLocReg() == X86::FP0 || VA.getLocReg() == X86::FP1) &&
2348 isScalarFPTypeInSSEReg(VA.getValVT())) {
2350 RoundAfterCopy = (CopyVT != VA.getLocVT());
2353 Chain = DAG.getCopyFromReg(Chain, dl, VA.getLocReg(),
2354 CopyVT, InFlag).getValue(1);
2355 SDValue Val = Chain.getValue(0);
2358 Val = DAG.getNode(ISD::FP_ROUND, dl, VA.getValVT(), Val,
2359 // This truncation won't change the value.
2360 DAG.getIntPtrConstant(1, dl));
2362 if (VA.isExtInLoc() && VA.getValVT().getScalarType() == MVT::i1)
2363 Val = DAG.getNode(ISD::TRUNCATE, dl, VA.getValVT(), Val);
2365 InFlag = Chain.getValue(2);
2366 InVals.push_back(Val);
2372 //===----------------------------------------------------------------------===//
2373 // C & StdCall & Fast Calling Convention implementation
2374 //===----------------------------------------------------------------------===//
2375 // StdCall calling convention seems to be standard for many Windows' API
2376 // routines and around. It differs from C calling convention just a little:
2377 // callee should clean up the stack, not caller. Symbols should be also
2378 // decorated in some fancy way :) It doesn't support any vector arguments.
2379 // For info on fast calling convention see Fast Calling Convention (tail call)
2380 // implementation LowerX86_32FastCCCallTo.
2382 /// CallIsStructReturn - Determines whether a call uses struct return
2384 enum StructReturnType {
2389 static StructReturnType
2390 callIsStructReturn(const SmallVectorImpl<ISD::OutputArg> &Outs) {
2392 return NotStructReturn;
2394 const ISD::ArgFlagsTy &Flags = Outs[0].Flags;
2395 if (!Flags.isSRet())
2396 return NotStructReturn;
2397 if (Flags.isInReg())
2398 return RegStructReturn;
2399 return StackStructReturn;
2402 /// Determines whether a function uses struct return semantics.
2403 static StructReturnType
2404 argsAreStructReturn(const SmallVectorImpl<ISD::InputArg> &Ins) {
2406 return NotStructReturn;
2408 const ISD::ArgFlagsTy &Flags = Ins[0].Flags;
2409 if (!Flags.isSRet())
2410 return NotStructReturn;
2411 if (Flags.isInReg())
2412 return RegStructReturn;
2413 return StackStructReturn;
2416 /// Make a copy of an aggregate at address specified by "Src" to address
2417 /// "Dst" with size and alignment information specified by the specific
2418 /// parameter attribute. The copy will be passed as a byval function parameter.
2420 CreateCopyOfByValArgument(SDValue Src, SDValue Dst, SDValue Chain,
2421 ISD::ArgFlagsTy Flags, SelectionDAG &DAG,
2423 SDValue SizeNode = DAG.getConstant(Flags.getByValSize(), dl, MVT::i32);
2425 return DAG.getMemcpy(Chain, dl, Dst, Src, SizeNode, Flags.getByValAlign(),
2426 /*isVolatile*/false, /*AlwaysInline=*/true,
2427 /*isTailCall*/false,
2428 MachinePointerInfo(), MachinePointerInfo());
2431 /// Return true if the calling convention is one that
2432 /// supports tail call optimization.
2433 static bool IsTailCallConvention(CallingConv::ID CC) {
2434 return (CC == CallingConv::Fast || CC == CallingConv::GHC ||
2435 CC == CallingConv::HiPE || CC == CallingConv::HHVM);
2438 /// \brief Return true if the calling convention is a C calling convention.
2439 static bool IsCCallConvention(CallingConv::ID CC) {
2440 return (CC == CallingConv::C || CC == CallingConv::X86_64_Win64 ||
2441 CC == CallingConv::X86_64_SysV);
2444 bool X86TargetLowering::mayBeEmittedAsTailCall(CallInst *CI) const {
2446 CI->getParent()->getParent()->getFnAttribute("disable-tail-calls");
2447 if (!CI->isTailCall() || Attr.getValueAsString() == "true")
2451 CallingConv::ID CalleeCC = CS.getCallingConv();
2452 if (!IsTailCallConvention(CalleeCC) && !IsCCallConvention(CalleeCC))
2458 /// Return true if the function is being made into
2459 /// a tailcall target by changing its ABI.
2460 static bool FuncIsMadeTailCallSafe(CallingConv::ID CC,
2461 bool GuaranteedTailCallOpt) {
2462 return GuaranteedTailCallOpt && IsTailCallConvention(CC);
2466 X86TargetLowering::LowerMemArgument(SDValue Chain,
2467 CallingConv::ID CallConv,
2468 const SmallVectorImpl<ISD::InputArg> &Ins,
2469 SDLoc dl, SelectionDAG &DAG,
2470 const CCValAssign &VA,
2471 MachineFrameInfo *MFI,
2473 // Create the nodes corresponding to a load from this parameter slot.
2474 ISD::ArgFlagsTy Flags = Ins[i].Flags;
2475 bool AlwaysUseMutable = FuncIsMadeTailCallSafe(
2476 CallConv, DAG.getTarget().Options.GuaranteedTailCallOpt);
2477 bool isImmutable = !AlwaysUseMutable && !Flags.isByVal();
2480 // If value is passed by pointer we have address passed instead of the value
2482 bool ExtendedInMem = VA.isExtInLoc() &&
2483 VA.getValVT().getScalarType() == MVT::i1;
2485 if (VA.getLocInfo() == CCValAssign::Indirect || ExtendedInMem)
2486 ValVT = VA.getLocVT();
2488 ValVT = VA.getValVT();
2490 // FIXME: For now, all byval parameter objects are marked mutable. This can be
2491 // changed with more analysis.
2492 // In case of tail call optimization mark all arguments mutable. Since they
2493 // could be overwritten by lowering of arguments in case of a tail call.
2494 if (Flags.isByVal()) {
2495 unsigned Bytes = Flags.getByValSize();
2496 if (Bytes == 0) Bytes = 1; // Don't create zero-sized stack objects.
2497 int FI = MFI->CreateFixedObject(Bytes, VA.getLocMemOffset(), isImmutable);
2498 return DAG.getFrameIndex(FI, getPointerTy(DAG.getDataLayout()));
2500 int FI = MFI->CreateFixedObject(ValVT.getSizeInBits()/8,
2501 VA.getLocMemOffset(), isImmutable);
2502 SDValue FIN = DAG.getFrameIndex(FI, getPointerTy(DAG.getDataLayout()));
2503 SDValue Val = DAG.getLoad(
2504 ValVT, dl, Chain, FIN,
2505 MachinePointerInfo::getFixedStack(DAG.getMachineFunction(), FI), false,
2507 return ExtendedInMem ?
2508 DAG.getNode(ISD::TRUNCATE, dl, VA.getValVT(), Val) : Val;
2512 // FIXME: Get this from tablegen.
2513 static ArrayRef<MCPhysReg> get64BitArgumentGPRs(CallingConv::ID CallConv,
2514 const X86Subtarget *Subtarget) {
2515 assert(Subtarget->is64Bit());
2517 if (Subtarget->isCallingConvWin64(CallConv)) {
2518 static const MCPhysReg GPR64ArgRegsWin64[] = {
2519 X86::RCX, X86::RDX, X86::R8, X86::R9
2521 return makeArrayRef(std::begin(GPR64ArgRegsWin64), std::end(GPR64ArgRegsWin64));
2524 static const MCPhysReg GPR64ArgRegs64Bit[] = {
2525 X86::RDI, X86::RSI, X86::RDX, X86::RCX, X86::R8, X86::R9
2527 return makeArrayRef(std::begin(GPR64ArgRegs64Bit), std::end(GPR64ArgRegs64Bit));
2530 // FIXME: Get this from tablegen.
2531 static ArrayRef<MCPhysReg> get64BitArgumentXMMs(MachineFunction &MF,
2532 CallingConv::ID CallConv,
2533 const X86Subtarget *Subtarget) {
2534 assert(Subtarget->is64Bit());
2535 if (Subtarget->isCallingConvWin64(CallConv)) {
2536 // The XMM registers which might contain var arg parameters are shadowed
2537 // in their paired GPR. So we only need to save the GPR to their home
2539 // TODO: __vectorcall will change this.
2543 const Function *Fn = MF.getFunction();
2544 bool NoImplicitFloatOps = Fn->hasFnAttribute(Attribute::NoImplicitFloat);
2545 bool isSoftFloat = Subtarget->useSoftFloat();
2546 assert(!(isSoftFloat && NoImplicitFloatOps) &&
2547 "SSE register cannot be used when SSE is disabled!");
2548 if (isSoftFloat || NoImplicitFloatOps || !Subtarget->hasSSE1())
2549 // Kernel mode asks for SSE to be disabled, so there are no XMM argument
2553 static const MCPhysReg XMMArgRegs64Bit[] = {
2554 X86::XMM0, X86::XMM1, X86::XMM2, X86::XMM3,
2555 X86::XMM4, X86::XMM5, X86::XMM6, X86::XMM7
2557 return makeArrayRef(std::begin(XMMArgRegs64Bit), std::end(XMMArgRegs64Bit));
2560 SDValue X86TargetLowering::LowerFormalArguments(
2561 SDValue Chain, CallingConv::ID CallConv, bool isVarArg,
2562 const SmallVectorImpl<ISD::InputArg> &Ins, SDLoc dl, SelectionDAG &DAG,
2563 SmallVectorImpl<SDValue> &InVals) const {
2564 MachineFunction &MF = DAG.getMachineFunction();
2565 X86MachineFunctionInfo *FuncInfo = MF.getInfo<X86MachineFunctionInfo>();
2566 const TargetFrameLowering &TFI = *Subtarget->getFrameLowering();
2568 const Function* Fn = MF.getFunction();
2569 if (Fn->hasExternalLinkage() &&
2570 Subtarget->isTargetCygMing() &&
2571 Fn->getName() == "main")
2572 FuncInfo->setForceFramePointer(true);
2574 MachineFrameInfo *MFI = MF.getFrameInfo();
2575 bool Is64Bit = Subtarget->is64Bit();
2576 bool IsWin64 = Subtarget->isCallingConvWin64(CallConv);
2578 assert(!(isVarArg && IsTailCallConvention(CallConv)) &&
2579 "Var args not supported with calling convention fastcc, ghc or hipe");
2581 // Assign locations to all of the incoming arguments.
2582 SmallVector<CCValAssign, 16> ArgLocs;
2583 CCState CCInfo(CallConv, isVarArg, MF, ArgLocs, *DAG.getContext());
2585 // Allocate shadow area for Win64
2587 CCInfo.AllocateStack(32, 8);
2589 CCInfo.AnalyzeFormalArguments(Ins, CC_X86);
2591 unsigned LastVal = ~0U;
2593 for (unsigned i = 0, e = ArgLocs.size(); i != e; ++i) {
2594 CCValAssign &VA = ArgLocs[i];
2595 // TODO: If an arg is passed in two places (e.g. reg and stack), skip later
2597 assert(VA.getValNo() != LastVal &&
2598 "Don't support value assigned to multiple locs yet");
2600 LastVal = VA.getValNo();
2602 if (VA.isRegLoc()) {
2603 EVT RegVT = VA.getLocVT();
2604 const TargetRegisterClass *RC;
2605 if (RegVT == MVT::i32)
2606 RC = &X86::GR32RegClass;
2607 else if (Is64Bit && RegVT == MVT::i64)
2608 RC = &X86::GR64RegClass;
2609 else if (RegVT == MVT::f32)
2610 RC = &X86::FR32RegClass;
2611 else if (RegVT == MVT::f64)
2612 RC = &X86::FR64RegClass;
2613 else if (RegVT.is512BitVector())
2614 RC = &X86::VR512RegClass;
2615 else if (RegVT.is256BitVector())
2616 RC = &X86::VR256RegClass;
2617 else if (RegVT.is128BitVector())
2618 RC = &X86::VR128RegClass;
2619 else if (RegVT == MVT::x86mmx)
2620 RC = &X86::VR64RegClass;
2621 else if (RegVT == MVT::i1)
2622 RC = &X86::VK1RegClass;
2623 else if (RegVT == MVT::v8i1)
2624 RC = &X86::VK8RegClass;
2625 else if (RegVT == MVT::v16i1)
2626 RC = &X86::VK16RegClass;
2627 else if (RegVT == MVT::v32i1)
2628 RC = &X86::VK32RegClass;
2629 else if (RegVT == MVT::v64i1)
2630 RC = &X86::VK64RegClass;
2632 llvm_unreachable("Unknown argument type!");
2634 unsigned Reg = MF.addLiveIn(VA.getLocReg(), RC);
2635 ArgValue = DAG.getCopyFromReg(Chain, dl, Reg, RegVT);
2637 // If this is an 8 or 16-bit value, it is really passed promoted to 32
2638 // bits. Insert an assert[sz]ext to capture this, then truncate to the
2640 if (VA.getLocInfo() == CCValAssign::SExt)
2641 ArgValue = DAG.getNode(ISD::AssertSext, dl, RegVT, ArgValue,
2642 DAG.getValueType(VA.getValVT()));
2643 else if (VA.getLocInfo() == CCValAssign::ZExt)
2644 ArgValue = DAG.getNode(ISD::AssertZext, dl, RegVT, ArgValue,
2645 DAG.getValueType(VA.getValVT()));
2646 else if (VA.getLocInfo() == CCValAssign::BCvt)
2647 ArgValue = DAG.getBitcast(VA.getValVT(), ArgValue);
2649 if (VA.isExtInLoc()) {
2650 // Handle MMX values passed in XMM regs.
2651 if (RegVT.isVector() && VA.getValVT().getScalarType() != MVT::i1)
2652 ArgValue = DAG.getNode(X86ISD::MOVDQ2Q, dl, VA.getValVT(), ArgValue);
2654 ArgValue = DAG.getNode(ISD::TRUNCATE, dl, VA.getValVT(), ArgValue);
2657 assert(VA.isMemLoc());
2658 ArgValue = LowerMemArgument(Chain, CallConv, Ins, dl, DAG, VA, MFI, i);
2661 // If value is passed via pointer - do a load.
2662 if (VA.getLocInfo() == CCValAssign::Indirect)
2663 ArgValue = DAG.getLoad(VA.getValVT(), dl, Chain, ArgValue,
2664 MachinePointerInfo(), false, false, false, 0);
2666 InVals.push_back(ArgValue);
2669 for (unsigned i = 0, e = ArgLocs.size(); i != e; ++i) {
2670 // All x86 ABIs require that for returning structs by value we copy the
2671 // sret argument into %rax/%eax (depending on ABI) for the return. Save
2672 // the argument into a virtual register so that we can access it from the
2674 if (Ins[i].Flags.isSRet()) {
2675 unsigned Reg = FuncInfo->getSRetReturnReg();
2677 MVT PtrTy = getPointerTy(DAG.getDataLayout());
2678 Reg = MF.getRegInfo().createVirtualRegister(getRegClassFor(PtrTy));
2679 FuncInfo->setSRetReturnReg(Reg);
2681 SDValue Copy = DAG.getCopyToReg(DAG.getEntryNode(), dl, Reg, InVals[i]);
2682 Chain = DAG.getNode(ISD::TokenFactor, dl, MVT::Other, Copy, Chain);
2687 unsigned StackSize = CCInfo.getNextStackOffset();
2688 // Align stack specially for tail calls.
2689 if (FuncIsMadeTailCallSafe(CallConv,
2690 MF.getTarget().Options.GuaranteedTailCallOpt))
2691 StackSize = GetAlignedArgumentStackSize(StackSize, DAG);
2693 // If the function takes variable number of arguments, make a frame index for
2694 // the start of the first vararg value... for expansion of llvm.va_start. We
2695 // can skip this if there are no va_start calls.
2696 if (MFI->hasVAStart() &&
2697 (Is64Bit || (CallConv != CallingConv::X86_FastCall &&
2698 CallConv != CallingConv::X86_ThisCall))) {
2699 FuncInfo->setVarArgsFrameIndex(
2700 MFI->CreateFixedObject(1, StackSize, true));
2703 MachineModuleInfo &MMI = MF.getMMI();
2704 const Function *WinEHParent = nullptr;
2705 if (MMI.hasWinEHFuncInfo(Fn))
2706 WinEHParent = MMI.getWinEHParent(Fn);
2707 bool IsWinEHParent = WinEHParent && WinEHParent == Fn;
2709 // Figure out if XMM registers are in use.
2710 assert(!(Subtarget->useSoftFloat() &&
2711 Fn->hasFnAttribute(Attribute::NoImplicitFloat)) &&
2712 "SSE register cannot be used when SSE is disabled!");
2714 // 64-bit calling conventions support varargs and register parameters, so we
2715 // have to do extra work to spill them in the prologue.
2716 if (Is64Bit && isVarArg && MFI->hasVAStart()) {
2717 // Find the first unallocated argument registers.
2718 ArrayRef<MCPhysReg> ArgGPRs = get64BitArgumentGPRs(CallConv, Subtarget);
2719 ArrayRef<MCPhysReg> ArgXMMs = get64BitArgumentXMMs(MF, CallConv, Subtarget);
2720 unsigned NumIntRegs = CCInfo.getFirstUnallocated(ArgGPRs);
2721 unsigned NumXMMRegs = CCInfo.getFirstUnallocated(ArgXMMs);
2722 assert(!(NumXMMRegs && !Subtarget->hasSSE1()) &&
2723 "SSE register cannot be used when SSE is disabled!");
2725 // Gather all the live in physical registers.
2726 SmallVector<SDValue, 6> LiveGPRs;
2727 SmallVector<SDValue, 8> LiveXMMRegs;
2729 for (MCPhysReg Reg : ArgGPRs.slice(NumIntRegs)) {
2730 unsigned GPR = MF.addLiveIn(Reg, &X86::GR64RegClass);
2732 DAG.getCopyFromReg(Chain, dl, GPR, MVT::i64));
2734 if (!ArgXMMs.empty()) {
2735 unsigned AL = MF.addLiveIn(X86::AL, &X86::GR8RegClass);
2736 ALVal = DAG.getCopyFromReg(Chain, dl, AL, MVT::i8);
2737 for (MCPhysReg Reg : ArgXMMs.slice(NumXMMRegs)) {
2738 unsigned XMMReg = MF.addLiveIn(Reg, &X86::VR128RegClass);
2739 LiveXMMRegs.push_back(
2740 DAG.getCopyFromReg(Chain, dl, XMMReg, MVT::v4f32));
2745 // Get to the caller-allocated home save location. Add 8 to account
2746 // for the return address.
2747 int HomeOffset = TFI.getOffsetOfLocalArea() + 8;
2748 FuncInfo->setRegSaveFrameIndex(
2749 MFI->CreateFixedObject(1, NumIntRegs * 8 + HomeOffset, false));
2750 // Fixup to set vararg frame on shadow area (4 x i64).
2752 FuncInfo->setVarArgsFrameIndex(FuncInfo->getRegSaveFrameIndex());
2754 // For X86-64, if there are vararg parameters that are passed via
2755 // registers, then we must store them to their spots on the stack so
2756 // they may be loaded by deferencing the result of va_next.
2757 FuncInfo->setVarArgsGPOffset(NumIntRegs * 8);
2758 FuncInfo->setVarArgsFPOffset(ArgGPRs.size() * 8 + NumXMMRegs * 16);
2759 FuncInfo->setRegSaveFrameIndex(MFI->CreateStackObject(
2760 ArgGPRs.size() * 8 + ArgXMMs.size() * 16, 16, false));
2763 // Store the integer parameter registers.
2764 SmallVector<SDValue, 8> MemOps;
2765 SDValue RSFIN = DAG.getFrameIndex(FuncInfo->getRegSaveFrameIndex(),
2766 getPointerTy(DAG.getDataLayout()));
2767 unsigned Offset = FuncInfo->getVarArgsGPOffset();
2768 for (SDValue Val : LiveGPRs) {
2769 SDValue FIN = DAG.getNode(ISD::ADD, dl, getPointerTy(DAG.getDataLayout()),
2770 RSFIN, DAG.getIntPtrConstant(Offset, dl));
2772 DAG.getStore(Val.getValue(1), dl, Val, FIN,
2773 MachinePointerInfo::getFixedStack(
2774 DAG.getMachineFunction(),
2775 FuncInfo->getRegSaveFrameIndex(), Offset),
2777 MemOps.push_back(Store);
2781 if (!ArgXMMs.empty() && NumXMMRegs != ArgXMMs.size()) {
2782 // Now store the XMM (fp + vector) parameter registers.
2783 SmallVector<SDValue, 12> SaveXMMOps;
2784 SaveXMMOps.push_back(Chain);
2785 SaveXMMOps.push_back(ALVal);
2786 SaveXMMOps.push_back(DAG.getIntPtrConstant(
2787 FuncInfo->getRegSaveFrameIndex(), dl));
2788 SaveXMMOps.push_back(DAG.getIntPtrConstant(
2789 FuncInfo->getVarArgsFPOffset(), dl));
2790 SaveXMMOps.insert(SaveXMMOps.end(), LiveXMMRegs.begin(),
2792 MemOps.push_back(DAG.getNode(X86ISD::VASTART_SAVE_XMM_REGS, dl,
2793 MVT::Other, SaveXMMOps));
2796 if (!MemOps.empty())
2797 Chain = DAG.getNode(ISD::TokenFactor, dl, MVT::Other, MemOps);
2800 if (isVarArg && MFI->hasMustTailInVarArgFunc()) {
2801 // Find the largest legal vector type.
2802 MVT VecVT = MVT::Other;
2803 // FIXME: Only some x86_32 calling conventions support AVX512.
2804 if (Subtarget->hasAVX512() &&
2805 (Is64Bit || (CallConv == CallingConv::X86_VectorCall ||
2806 CallConv == CallingConv::Intel_OCL_BI)))
2807 VecVT = MVT::v16f32;
2808 else if (Subtarget->hasAVX())
2810 else if (Subtarget->hasSSE2())
2813 // We forward some GPRs and some vector types.
2814 SmallVector<MVT, 2> RegParmTypes;
2815 MVT IntVT = Is64Bit ? MVT::i64 : MVT::i32;
2816 RegParmTypes.push_back(IntVT);
2817 if (VecVT != MVT::Other)
2818 RegParmTypes.push_back(VecVT);
2820 // Compute the set of forwarded registers. The rest are scratch.
2821 SmallVectorImpl<ForwardedRegister> &Forwards =
2822 FuncInfo->getForwardedMustTailRegParms();
2823 CCInfo.analyzeMustTailForwardedRegisters(Forwards, RegParmTypes, CC_X86);
2825 // Conservatively forward AL on x86_64, since it might be used for varargs.
2826 if (Is64Bit && !CCInfo.isAllocated(X86::AL)) {
2827 unsigned ALVReg = MF.addLiveIn(X86::AL, &X86::GR8RegClass);
2828 Forwards.push_back(ForwardedRegister(ALVReg, X86::AL, MVT::i8));
2831 // Copy all forwards from physical to virtual registers.
2832 for (ForwardedRegister &F : Forwards) {
2833 // FIXME: Can we use a less constrained schedule?
2834 SDValue RegVal = DAG.getCopyFromReg(Chain, dl, F.VReg, F.VT);
2835 F.VReg = MF.getRegInfo().createVirtualRegister(getRegClassFor(F.VT));
2836 Chain = DAG.getCopyToReg(Chain, dl, F.VReg, RegVal);
2840 // Some CCs need callee pop.
2841 if (X86::isCalleePop(CallConv, Is64Bit, isVarArg,
2842 MF.getTarget().Options.GuaranteedTailCallOpt)) {
2843 FuncInfo->setBytesToPopOnReturn(StackSize); // Callee pops everything.
2845 FuncInfo->setBytesToPopOnReturn(0); // Callee pops nothing.
2846 // If this is an sret function, the return should pop the hidden pointer.
2847 if (!Is64Bit && !IsTailCallConvention(CallConv) &&
2848 !Subtarget->getTargetTriple().isOSMSVCRT() &&
2849 argsAreStructReturn(Ins) == StackStructReturn)
2850 FuncInfo->setBytesToPopOnReturn(4);
2854 // RegSaveFrameIndex is X86-64 only.
2855 FuncInfo->setRegSaveFrameIndex(0xAAAAAAA);
2856 if (CallConv == CallingConv::X86_FastCall ||
2857 CallConv == CallingConv::X86_ThisCall)
2858 // fastcc functions can't have varargs.
2859 FuncInfo->setVarArgsFrameIndex(0xAAAAAAA);
2862 FuncInfo->setArgumentStackSize(StackSize);
2864 if (IsWinEHParent) {
2866 int UnwindHelpFI = MFI->CreateStackObject(8, 8, /*isSS=*/false);
2867 SDValue StackSlot = DAG.getFrameIndex(UnwindHelpFI, MVT::i64);
2868 MMI.getWinEHFuncInfo(MF.getFunction()).UnwindHelpFrameIdx = UnwindHelpFI;
2869 SDValue Neg2 = DAG.getConstant(-2, dl, MVT::i64);
2870 Chain = DAG.getStore(Chain, dl, Neg2, StackSlot,
2871 MachinePointerInfo::getFixedStack(
2872 DAG.getMachineFunction(), UnwindHelpFI),
2873 /*isVolatile=*/true,
2874 /*isNonTemporal=*/false, /*Alignment=*/0);
2876 // Functions using Win32 EH are considered to have opaque SP adjustments
2877 // to force local variables to be addressed from the frame or base
2879 MFI->setHasOpaqueSPAdjustment(true);
2887 X86TargetLowering::LowerMemOpCallTo(SDValue Chain,
2888 SDValue StackPtr, SDValue Arg,
2889 SDLoc dl, SelectionDAG &DAG,
2890 const CCValAssign &VA,
2891 ISD::ArgFlagsTy Flags) const {
2892 unsigned LocMemOffset = VA.getLocMemOffset();
2893 SDValue PtrOff = DAG.getIntPtrConstant(LocMemOffset, dl);
2894 PtrOff = DAG.getNode(ISD::ADD, dl, getPointerTy(DAG.getDataLayout()),
2896 if (Flags.isByVal())
2897 return CreateCopyOfByValArgument(Arg, PtrOff, Chain, Flags, DAG, dl);
2899 return DAG.getStore(
2900 Chain, dl, Arg, PtrOff,
2901 MachinePointerInfo::getStack(DAG.getMachineFunction(), LocMemOffset),
2905 /// Emit a load of return address if tail call
2906 /// optimization is performed and it is required.
2908 X86TargetLowering::EmitTailCallLoadRetAddr(SelectionDAG &DAG,
2909 SDValue &OutRetAddr, SDValue Chain,
2910 bool IsTailCall, bool Is64Bit,
2911 int FPDiff, SDLoc dl) const {
2912 // Adjust the Return address stack slot.
2913 EVT VT = getPointerTy(DAG.getDataLayout());
2914 OutRetAddr = getReturnAddressFrameIndex(DAG);
2916 // Load the "old" Return address.
2917 OutRetAddr = DAG.getLoad(VT, dl, Chain, OutRetAddr, MachinePointerInfo(),
2918 false, false, false, 0);
2919 return SDValue(OutRetAddr.getNode(), 1);
2922 /// Emit a store of the return address if tail call
2923 /// optimization is performed and it is required (FPDiff!=0).
2924 static SDValue EmitTailCallStoreRetAddr(SelectionDAG &DAG, MachineFunction &MF,
2925 SDValue Chain, SDValue RetAddrFrIdx,
2926 EVT PtrVT, unsigned SlotSize,
2927 int FPDiff, SDLoc dl) {
2928 // Store the return address to the appropriate stack slot.
2929 if (!FPDiff) return Chain;
2930 // Calculate the new stack slot for the return address.
2931 int NewReturnAddrFI =
2932 MF.getFrameInfo()->CreateFixedObject(SlotSize, (int64_t)FPDiff - SlotSize,
2934 SDValue NewRetAddrFrIdx = DAG.getFrameIndex(NewReturnAddrFI, PtrVT);
2935 Chain = DAG.getStore(Chain, dl, RetAddrFrIdx, NewRetAddrFrIdx,
2936 MachinePointerInfo::getFixedStack(
2937 DAG.getMachineFunction(), NewReturnAddrFI),
2942 /// Returns a vector_shuffle mask for an movs{s|d}, movd
2943 /// operation of specified width.
2944 static SDValue getMOVL(SelectionDAG &DAG, SDLoc dl, EVT VT, SDValue V1,
2946 unsigned NumElems = VT.getVectorNumElements();
2947 SmallVector<int, 8> Mask;
2948 Mask.push_back(NumElems);
2949 for (unsigned i = 1; i != NumElems; ++i)
2951 return DAG.getVectorShuffle(VT, dl, V1, V2, &Mask[0]);
2955 X86TargetLowering::LowerCall(TargetLowering::CallLoweringInfo &CLI,
2956 SmallVectorImpl<SDValue> &InVals) const {
2957 SelectionDAG &DAG = CLI.DAG;
2959 SmallVectorImpl<ISD::OutputArg> &Outs = CLI.Outs;
2960 SmallVectorImpl<SDValue> &OutVals = CLI.OutVals;
2961 SmallVectorImpl<ISD::InputArg> &Ins = CLI.Ins;
2962 SDValue Chain = CLI.Chain;
2963 SDValue Callee = CLI.Callee;
2964 CallingConv::ID CallConv = CLI.CallConv;
2965 bool &isTailCall = CLI.IsTailCall;
2966 bool isVarArg = CLI.IsVarArg;
2968 MachineFunction &MF = DAG.getMachineFunction();
2969 bool Is64Bit = Subtarget->is64Bit();
2970 bool IsWin64 = Subtarget->isCallingConvWin64(CallConv);
2971 StructReturnType SR = callIsStructReturn(Outs);
2972 bool IsSibcall = false;
2973 X86MachineFunctionInfo *X86Info = MF.getInfo<X86MachineFunctionInfo>();
2974 auto Attr = MF.getFunction()->getFnAttribute("disable-tail-calls");
2976 if (Attr.getValueAsString() == "true")
2979 if (Subtarget->isPICStyleGOT() &&
2980 !MF.getTarget().Options.GuaranteedTailCallOpt) {
2981 // If we are using a GOT, disable tail calls to external symbols with
2982 // default visibility. Tail calling such a symbol requires using a GOT
2983 // relocation, which forces early binding of the symbol. This breaks code
2984 // that require lazy function symbol resolution. Using musttail or
2985 // GuaranteedTailCallOpt will override this.
2986 GlobalAddressSDNode *G = dyn_cast<GlobalAddressSDNode>(Callee);
2987 if (!G || (!G->getGlobal()->hasLocalLinkage() &&
2988 G->getGlobal()->hasDefaultVisibility()))
2992 bool IsMustTail = CLI.CS && CLI.CS->isMustTailCall();
2994 // Force this to be a tail call. The verifier rules are enough to ensure
2995 // that we can lower this successfully without moving the return address
2998 } else if (isTailCall) {
2999 // Check if it's really possible to do a tail call.
3000 isTailCall = IsEligibleForTailCallOptimization(Callee, CallConv,
3001 isVarArg, SR != NotStructReturn,
3002 MF.getFunction()->hasStructRetAttr(), CLI.RetTy,
3003 Outs, OutVals, Ins, DAG);
3005 // Sibcalls are automatically detected tailcalls which do not require
3007 if (!MF.getTarget().Options.GuaranteedTailCallOpt && isTailCall)
3014 assert(!(isVarArg && IsTailCallConvention(CallConv)) &&
3015 "Var args not supported with calling convention fastcc, ghc or hipe");
3017 // Analyze operands of the call, assigning locations to each operand.
3018 SmallVector<CCValAssign, 16> ArgLocs;
3019 CCState CCInfo(CallConv, isVarArg, MF, ArgLocs, *DAG.getContext());
3021 // Allocate shadow area for Win64
3023 CCInfo.AllocateStack(32, 8);
3025 CCInfo.AnalyzeCallOperands(Outs, CC_X86);
3027 // Get a count of how many bytes are to be pushed on the stack.
3028 unsigned NumBytes = CCInfo.getAlignedCallFrameSize();
3030 // This is a sibcall. The memory operands are available in caller's
3031 // own caller's stack.
3033 else if (MF.getTarget().Options.GuaranteedTailCallOpt &&
3034 IsTailCallConvention(CallConv))
3035 NumBytes = GetAlignedArgumentStackSize(NumBytes, DAG);
3038 if (isTailCall && !IsSibcall && !IsMustTail) {
3039 // Lower arguments at fp - stackoffset + fpdiff.
3040 unsigned NumBytesCallerPushed = X86Info->getBytesToPopOnReturn();
3042 FPDiff = NumBytesCallerPushed - NumBytes;
3044 // Set the delta of movement of the returnaddr stackslot.
3045 // But only set if delta is greater than previous delta.
3046 if (FPDiff < X86Info->getTCReturnAddrDelta())
3047 X86Info->setTCReturnAddrDelta(FPDiff);
3050 unsigned NumBytesToPush = NumBytes;
3051 unsigned NumBytesToPop = NumBytes;
3053 // If we have an inalloca argument, all stack space has already been allocated
3054 // for us and be right at the top of the stack. We don't support multiple
3055 // arguments passed in memory when using inalloca.
3056 if (!Outs.empty() && Outs.back().Flags.isInAlloca()) {
3058 if (!ArgLocs.back().isMemLoc())
3059 report_fatal_error("cannot use inalloca attribute on a register "
3061 if (ArgLocs.back().getLocMemOffset() != 0)
3062 report_fatal_error("any parameter with the inalloca attribute must be "
3063 "the only memory argument");
3067 Chain = DAG.getCALLSEQ_START(
3068 Chain, DAG.getIntPtrConstant(NumBytesToPush, dl, true), dl);
3070 SDValue RetAddrFrIdx;
3071 // Load return address for tail calls.
3072 if (isTailCall && FPDiff)
3073 Chain = EmitTailCallLoadRetAddr(DAG, RetAddrFrIdx, Chain, isTailCall,
3074 Is64Bit, FPDiff, dl);
3076 SmallVector<std::pair<unsigned, SDValue>, 8> RegsToPass;
3077 SmallVector<SDValue, 8> MemOpChains;
3080 // Walk the register/memloc assignments, inserting copies/loads. In the case
3081 // of tail call optimization arguments are handle later.
3082 const X86RegisterInfo *RegInfo = Subtarget->getRegisterInfo();
3083 for (unsigned i = 0, e = ArgLocs.size(); i != e; ++i) {
3084 // Skip inalloca arguments, they have already been written.
3085 ISD::ArgFlagsTy Flags = Outs[i].Flags;
3086 if (Flags.isInAlloca())
3089 CCValAssign &VA = ArgLocs[i];
3090 EVT RegVT = VA.getLocVT();
3091 SDValue Arg = OutVals[i];
3092 bool isByVal = Flags.isByVal();
3094 // Promote the value if needed.
3095 switch (VA.getLocInfo()) {
3096 default: llvm_unreachable("Unknown loc info!");
3097 case CCValAssign::Full: break;
3098 case CCValAssign::SExt:
3099 Arg = DAG.getNode(ISD::SIGN_EXTEND, dl, RegVT, Arg);
3101 case CCValAssign::ZExt:
3102 Arg = DAG.getNode(ISD::ZERO_EXTEND, dl, RegVT, Arg);
3104 case CCValAssign::AExt:
3105 if (Arg.getValueType().isVector() &&
3106 Arg.getValueType().getScalarType() == MVT::i1)
3107 Arg = DAG.getNode(ISD::SIGN_EXTEND, dl, RegVT, Arg);
3108 else if (RegVT.is128BitVector()) {
3109 // Special case: passing MMX values in XMM registers.
3110 Arg = DAG.getBitcast(MVT::i64, Arg);
3111 Arg = DAG.getNode(ISD::SCALAR_TO_VECTOR, dl, MVT::v2i64, Arg);
3112 Arg = getMOVL(DAG, dl, MVT::v2i64, DAG.getUNDEF(MVT::v2i64), Arg);
3114 Arg = DAG.getNode(ISD::ANY_EXTEND, dl, RegVT, Arg);
3116 case CCValAssign::BCvt:
3117 Arg = DAG.getBitcast(RegVT, Arg);
3119 case CCValAssign::Indirect: {
3120 // Store the argument.
3121 SDValue SpillSlot = DAG.CreateStackTemporary(VA.getValVT());
3122 int FI = cast<FrameIndexSDNode>(SpillSlot)->getIndex();
3123 Chain = DAG.getStore(
3124 Chain, dl, Arg, SpillSlot,
3125 MachinePointerInfo::getFixedStack(DAG.getMachineFunction(), FI),
3132 if (VA.isRegLoc()) {
3133 RegsToPass.push_back(std::make_pair(VA.getLocReg(), Arg));
3134 if (isVarArg && IsWin64) {
3135 // Win64 ABI requires argument XMM reg to be copied to the corresponding
3136 // shadow reg if callee is a varargs function.
3137 unsigned ShadowReg = 0;
3138 switch (VA.getLocReg()) {
3139 case X86::XMM0: ShadowReg = X86::RCX; break;
3140 case X86::XMM1: ShadowReg = X86::RDX; break;
3141 case X86::XMM2: ShadowReg = X86::R8; break;
3142 case X86::XMM3: ShadowReg = X86::R9; break;
3145 RegsToPass.push_back(std::make_pair(ShadowReg, Arg));
3147 } else if (!IsSibcall && (!isTailCall || isByVal)) {
3148 assert(VA.isMemLoc());
3149 if (!StackPtr.getNode())
3150 StackPtr = DAG.getCopyFromReg(Chain, dl, RegInfo->getStackRegister(),
3151 getPointerTy(DAG.getDataLayout()));
3152 MemOpChains.push_back(LowerMemOpCallTo(Chain, StackPtr, Arg,
3153 dl, DAG, VA, Flags));
3157 if (!MemOpChains.empty())
3158 Chain = DAG.getNode(ISD::TokenFactor, dl, MVT::Other, MemOpChains);
3160 if (Subtarget->isPICStyleGOT()) {
3161 // ELF / PIC requires GOT in the EBX register before function calls via PLT
3164 RegsToPass.push_back(std::make_pair(
3165 unsigned(X86::EBX), DAG.getNode(X86ISD::GlobalBaseReg, SDLoc(),
3166 getPointerTy(DAG.getDataLayout()))));
3168 // If we are tail calling and generating PIC/GOT style code load the
3169 // address of the callee into ECX. The value in ecx is used as target of
3170 // the tail jump. This is done to circumvent the ebx/callee-saved problem
3171 // for tail calls on PIC/GOT architectures. Normally we would just put the
3172 // address of GOT into ebx and then call target@PLT. But for tail calls
3173 // ebx would be restored (since ebx is callee saved) before jumping to the
3176 // Note: The actual moving to ECX is done further down.
3177 GlobalAddressSDNode *G = dyn_cast<GlobalAddressSDNode>(Callee);
3178 if (G && !G->getGlobal()->hasLocalLinkage() &&
3179 G->getGlobal()->hasDefaultVisibility())
3180 Callee = LowerGlobalAddress(Callee, DAG);
3181 else if (isa<ExternalSymbolSDNode>(Callee))
3182 Callee = LowerExternalSymbol(Callee, DAG);
3186 if (Is64Bit && isVarArg && !IsWin64 && !IsMustTail) {
3187 // From AMD64 ABI document:
3188 // For calls that may call functions that use varargs or stdargs
3189 // (prototype-less calls or calls to functions containing ellipsis (...) in
3190 // the declaration) %al is used as hidden argument to specify the number
3191 // of SSE registers used. The contents of %al do not need to match exactly
3192 // the number of registers, but must be an ubound on the number of SSE
3193 // registers used and is in the range 0 - 8 inclusive.
3195 // Count the number of XMM registers allocated.
3196 static const MCPhysReg XMMArgRegs[] = {
3197 X86::XMM0, X86::XMM1, X86::XMM2, X86::XMM3,
3198 X86::XMM4, X86::XMM5, X86::XMM6, X86::XMM7
3200 unsigned NumXMMRegs = CCInfo.getFirstUnallocated(XMMArgRegs);
3201 assert((Subtarget->hasSSE1() || !NumXMMRegs)
3202 && "SSE registers cannot be used when SSE is disabled");
3204 RegsToPass.push_back(std::make_pair(unsigned(X86::AL),
3205 DAG.getConstant(NumXMMRegs, dl,
3209 if (isVarArg && IsMustTail) {
3210 const auto &Forwards = X86Info->getForwardedMustTailRegParms();
3211 for (const auto &F : Forwards) {
3212 SDValue Val = DAG.getCopyFromReg(Chain, dl, F.VReg, F.VT);
3213 RegsToPass.push_back(std::make_pair(unsigned(F.PReg), Val));
3217 // For tail calls lower the arguments to the 'real' stack slots. Sibcalls
3218 // don't need this because the eligibility check rejects calls that require
3219 // shuffling arguments passed in memory.
3220 if (!IsSibcall && isTailCall) {
3221 // Force all the incoming stack arguments to be loaded from the stack
3222 // before any new outgoing arguments are stored to the stack, because the
3223 // outgoing stack slots may alias the incoming argument stack slots, and
3224 // the alias isn't otherwise explicit. This is slightly more conservative
3225 // than necessary, because it means that each store effectively depends
3226 // on every argument instead of just those arguments it would clobber.
3227 SDValue ArgChain = DAG.getStackArgumentTokenFactor(Chain);
3229 SmallVector<SDValue, 8> MemOpChains2;
3232 for (unsigned i = 0, e = ArgLocs.size(); i != e; ++i) {
3233 CCValAssign &VA = ArgLocs[i];
3236 assert(VA.isMemLoc());
3237 SDValue Arg = OutVals[i];
3238 ISD::ArgFlagsTy Flags = Outs[i].Flags;
3239 // Skip inalloca arguments. They don't require any work.
3240 if (Flags.isInAlloca())
3242 // Create frame index.
3243 int32_t Offset = VA.getLocMemOffset()+FPDiff;
3244 uint32_t OpSize = (VA.getLocVT().getSizeInBits()+7)/8;
3245 FI = MF.getFrameInfo()->CreateFixedObject(OpSize, Offset, true);
3246 FIN = DAG.getFrameIndex(FI, getPointerTy(DAG.getDataLayout()));
3248 if (Flags.isByVal()) {
3249 // Copy relative to framepointer.
3250 SDValue Source = DAG.getIntPtrConstant(VA.getLocMemOffset(), dl);
3251 if (!StackPtr.getNode())
3252 StackPtr = DAG.getCopyFromReg(Chain, dl, RegInfo->getStackRegister(),
3253 getPointerTy(DAG.getDataLayout()));
3254 Source = DAG.getNode(ISD::ADD, dl, getPointerTy(DAG.getDataLayout()),
3257 MemOpChains2.push_back(CreateCopyOfByValArgument(Source, FIN,
3261 // Store relative to framepointer.
3262 MemOpChains2.push_back(DAG.getStore(
3263 ArgChain, dl, Arg, FIN,
3264 MachinePointerInfo::getFixedStack(DAG.getMachineFunction(), FI),
3269 if (!MemOpChains2.empty())
3270 Chain = DAG.getNode(ISD::TokenFactor, dl, MVT::Other, MemOpChains2);
3272 // Store the return address to the appropriate stack slot.
3273 Chain = EmitTailCallStoreRetAddr(DAG, MF, Chain, RetAddrFrIdx,
3274 getPointerTy(DAG.getDataLayout()),
3275 RegInfo->getSlotSize(), FPDiff, dl);
3278 // Build a sequence of copy-to-reg nodes chained together with token chain
3279 // and flag operands which copy the outgoing args into registers.
3281 for (unsigned i = 0, e = RegsToPass.size(); i != e; ++i) {
3282 Chain = DAG.getCopyToReg(Chain, dl, RegsToPass[i].first,
3283 RegsToPass[i].second, InFlag);
3284 InFlag = Chain.getValue(1);
3287 if (DAG.getTarget().getCodeModel() == CodeModel::Large) {
3288 assert(Is64Bit && "Large code model is only legal in 64-bit mode.");
3289 // In the 64-bit large code model, we have to make all calls
3290 // through a register, since the call instruction's 32-bit
3291 // pc-relative offset may not be large enough to hold the whole
3293 } else if (Callee->getOpcode() == ISD::GlobalAddress) {
3294 // If the callee is a GlobalAddress node (quite common, every direct call
3295 // is) turn it into a TargetGlobalAddress node so that legalize doesn't hack
3297 GlobalAddressSDNode* G = cast<GlobalAddressSDNode>(Callee);
3299 // We should use extra load for direct calls to dllimported functions in
3301 const GlobalValue *GV = G->getGlobal();
3302 if (!GV->hasDLLImportStorageClass()) {
3303 unsigned char OpFlags = 0;
3304 bool ExtraLoad = false;
3305 unsigned WrapperKind = ISD::DELETED_NODE;
3307 // On ELF targets, in both X86-64 and X86-32 mode, direct calls to
3308 // external symbols most go through the PLT in PIC mode. If the symbol
3309 // has hidden or protected visibility, or if it is static or local, then
3310 // we don't need to use the PLT - we can directly call it.
3311 if (Subtarget->isTargetELF() &&
3312 DAG.getTarget().getRelocationModel() == Reloc::PIC_ &&
3313 GV->hasDefaultVisibility() && !GV->hasLocalLinkage()) {
3314 OpFlags = X86II::MO_PLT;
3315 } else if (Subtarget->isPICStyleStubAny() &&
3316 !GV->isStrongDefinitionForLinker() &&
3317 (!Subtarget->getTargetTriple().isMacOSX() ||