1772
1773
// argument that specifies the vector type, need to handle each case.
1773
1774
switch (BuiltinID) {
1774
1775
default: break;
1776
case AArch64::BI__builtin_neon_vdups_lane_f32:
1777
case AArch64::BI__builtin_neon_vdupd_lane_f64:
1778
case AArch64::BI__builtin_neon_vdups_laneq_f32:
1779
case AArch64::BI__builtin_neon_vdupd_laneq_f64: {
1780
return CGF.Builder.CreateExtractElement(Ops[0], Ops[1], "vdup_lane");
1782
case AArch64::BI__builtin_neon_vdupb_lane_i8:
1783
case AArch64::BI__builtin_neon_vduph_lane_i16:
1784
case AArch64::BI__builtin_neon_vdups_lane_i32:
1785
case AArch64::BI__builtin_neon_vdupd_lane_i64:
1786
case AArch64::BI__builtin_neon_vdupb_laneq_i8:
1787
case AArch64::BI__builtin_neon_vduph_laneq_i16:
1788
case AArch64::BI__builtin_neon_vdups_laneq_i32:
1789
case AArch64::BI__builtin_neon_vdupd_laneq_i64: {
1790
// The backend treats Neon scalar types as v1ix types
1791
// So we want to dup lane from any vector to v1ix vector
1792
// with shufflevector
1794
Value* SV = llvm::ConstantVector::getSplat(1, cast<ConstantInt>(Ops[1]));
1795
Value *Result = CGF.Builder.CreateShuffleVector(Ops[0], Ops[0], SV, s);
1796
llvm::Type *Ty = CGF.ConvertType(E->getCallReturnType());
1797
// AArch64 intrinsic one-element vector type cast to
1798
// scalar type expected by the builtin
1799
return CGF.Builder.CreateBitCast(Result, Ty, s);
1801
case AArch64::BI__builtin_neon_vqdmlalh_lane_s16 :
1802
case AArch64::BI__builtin_neon_vqdmlalh_laneq_s16 :
1803
case AArch64::BI__builtin_neon_vqdmlals_lane_s32 :
1804
case AArch64::BI__builtin_neon_vqdmlals_laneq_s32 :
1805
case AArch64::BI__builtin_neon_vqdmlslh_lane_s16 :
1806
case AArch64::BI__builtin_neon_vqdmlslh_laneq_s16 :
1807
case AArch64::BI__builtin_neon_vqdmlsls_lane_s32 :
1808
case AArch64::BI__builtin_neon_vqdmlsls_laneq_s32 : {
1809
Int = Intrinsic::arm_neon_vqadds;
1810
if (BuiltinID == AArch64::BI__builtin_neon_vqdmlslh_lane_s16 ||
1811
BuiltinID == AArch64::BI__builtin_neon_vqdmlslh_laneq_s16 ||
1812
BuiltinID == AArch64::BI__builtin_neon_vqdmlsls_lane_s32 ||
1813
BuiltinID == AArch64::BI__builtin_neon_vqdmlsls_laneq_s32) {
1814
Int = Intrinsic::arm_neon_vqsubs;
1816
// create vqdmull call with b * c[i]
1817
llvm::Type *Ty = CGF.ConvertType(E->getArg(1)->getType());
1818
llvm::VectorType *OpVTy = llvm::VectorType::get(Ty, 1);
1819
Ty = CGF.ConvertType(E->getArg(0)->getType());
1820
llvm::VectorType *ResVTy = llvm::VectorType::get(Ty, 1);
1821
Value *F = CGF.CGM.getIntrinsic(Intrinsic::arm_neon_vqdmull, ResVTy);
1822
Value *V = UndefValue::get(OpVTy);
1823
llvm::Constant *CI = ConstantInt::get(CGF.Int32Ty, 0);
1824
SmallVector<Value *, 2> MulOps;
1825
MulOps.push_back(Ops[1]);
1826
MulOps.push_back(Ops[2]);
1827
MulOps[0] = CGF.Builder.CreateInsertElement(V, MulOps[0], CI);
1828
MulOps[1] = CGF.Builder.CreateExtractElement(MulOps[1], Ops[3], "extract");
1829
MulOps[1] = CGF.Builder.CreateInsertElement(V, MulOps[1], CI);
1830
Value *MulRes = CGF.Builder.CreateCall2(F, MulOps[0], MulOps[1]);
1831
// create vqadds call with a +/- vqdmull result
1832
F = CGF.CGM.getIntrinsic(Int, ResVTy);
1833
SmallVector<Value *, 2> AddOps;
1834
AddOps.push_back(Ops[0]);
1835
AddOps.push_back(MulRes);
1836
V = UndefValue::get(ResVTy);
1837
AddOps[0] = CGF.Builder.CreateInsertElement(V, AddOps[0], CI);
1838
Value *AddRes = CGF.Builder.CreateCall2(F, AddOps[0], AddOps[1]);
1839
return CGF.Builder.CreateBitCast(AddRes, Ty);
1775
1841
case AArch64::BI__builtin_neon_vfmas_lane_f32:
1776
1842
case AArch64::BI__builtin_neon_vfmas_laneq_f32:
1777
1843
case AArch64::BI__builtin_neon_vfmad_lane_f64:
1778
1844
case AArch64::BI__builtin_neon_vfmad_laneq_f64: {
1779
1845
llvm::Type *Ty = CGF.ConvertType(E->getCallReturnType());
1780
1846
Value *F = CGF.CGM.getIntrinsic(Intrinsic::fma, Ty);
1781
// extract lane acc += x * v[i]
1782
1847
Ops[2] = CGF.Builder.CreateExtractElement(Ops[2], Ops[3], "extract");
1783
1848
return CGF.Builder.CreateCall3(F, Ops[1], Ops[2], Ops[0]);
1857
1922
case AArch64::BI__builtin_neon_vqaddh_s16:
1858
1923
case AArch64::BI__builtin_neon_vqadds_s32:
1859
1924
case AArch64::BI__builtin_neon_vqaddd_s64:
1860
Int = Intrinsic::aarch64_neon_vqadds;
1925
Int = Intrinsic::arm_neon_vqadds;
1861
1926
s = "vqadds"; OverloadInt = true; break;
1862
1927
case AArch64::BI__builtin_neon_vqaddb_u8:
1863
1928
case AArch64::BI__builtin_neon_vqaddh_u16:
1864
1929
case AArch64::BI__builtin_neon_vqadds_u32:
1865
1930
case AArch64::BI__builtin_neon_vqaddd_u64:
1866
Int = Intrinsic::aarch64_neon_vqaddu;
1931
Int = Intrinsic::arm_neon_vqaddu;
1867
1932
s = "vqaddu"; OverloadInt = true; break;
1868
1933
// Scalar Saturating Sub
1869
1934
case AArch64::BI__builtin_neon_vqsubb_s8:
1870
1935
case AArch64::BI__builtin_neon_vqsubh_s16:
1871
1936
case AArch64::BI__builtin_neon_vqsubs_s32:
1872
1937
case AArch64::BI__builtin_neon_vqsubd_s64:
1873
Int = Intrinsic::aarch64_neon_vqsubs;
1938
Int = Intrinsic::arm_neon_vqsubs;
1874
1939
s = "vqsubs"; OverloadInt = true; break;
1875
1940
case AArch64::BI__builtin_neon_vqsubb_u8:
1876
1941
case AArch64::BI__builtin_neon_vqsubh_u16:
1877
1942
case AArch64::BI__builtin_neon_vqsubs_u32:
1878
1943
case AArch64::BI__builtin_neon_vqsubd_u64:
1879
Int = Intrinsic::aarch64_neon_vqsubu;
1944
Int = Intrinsic::arm_neon_vqsubu;
1880
1945
s = "vqsubu"; OverloadInt = true; break;
1881
1946
// Scalar Shift Left
1882
1947
case AArch64::BI__builtin_neon_vshld_s64:
2057
2122
case AArch64::BI__builtin_neon_vcvtd_f64_u64:
2058
2123
Int = Intrinsic::aarch64_neon_vcvtf64_u64,
2059
2124
s = "vcvtf"; OverloadInt = false; break;
2125
// Scalar Floating-point Converts
2126
case AArch64::BI__builtin_neon_vcvtxd_f32_f64:
2127
Int = Intrinsic::aarch64_neon_fcvtxn;
2128
s = "vcvtxn"; OverloadCvtInt = true; break;
2129
case AArch64::BI__builtin_neon_vcvtas_s32_f32:
2130
case AArch64::BI__builtin_neon_vcvtad_s64_f64:
2131
Int = Intrinsic::aarch64_neon_fcvtas;
2132
s = "vcvtas"; OverloadCvtInt = true; break;
2133
case AArch64::BI__builtin_neon_vcvtas_u32_f32:
2134
case AArch64::BI__builtin_neon_vcvtad_u64_f64:
2135
Int = Intrinsic::aarch64_neon_fcvtau;
2136
s = "vcvtau"; OverloadCvtInt = true; break;
2137
case AArch64::BI__builtin_neon_vcvtms_s32_f32:
2138
case AArch64::BI__builtin_neon_vcvtmd_s64_f64:
2139
Int = Intrinsic::aarch64_neon_fcvtms;
2140
s = "vcvtms"; OverloadCvtInt = true; break;
2141
case AArch64::BI__builtin_neon_vcvtms_u32_f32:
2142
case AArch64::BI__builtin_neon_vcvtmd_u64_f64:
2143
Int = Intrinsic::aarch64_neon_fcvtmu;
2144
s = "vcvtmu"; OverloadCvtInt = true; break;
2145
case AArch64::BI__builtin_neon_vcvtns_s32_f32:
2146
case AArch64::BI__builtin_neon_vcvtnd_s64_f64:
2147
Int = Intrinsic::aarch64_neon_fcvtns;
2148
s = "vcvtns"; OverloadCvtInt = true; break;
2149
case AArch64::BI__builtin_neon_vcvtns_u32_f32:
2150
case AArch64::BI__builtin_neon_vcvtnd_u64_f64:
2151
Int = Intrinsic::aarch64_neon_fcvtnu;
2152
s = "vcvtnu"; OverloadCvtInt = true; break;
2153
case AArch64::BI__builtin_neon_vcvtps_s32_f32:
2154
case AArch64::BI__builtin_neon_vcvtpd_s64_f64:
2155
Int = Intrinsic::aarch64_neon_fcvtps;
2156
s = "vcvtps"; OverloadCvtInt = true; break;
2157
case AArch64::BI__builtin_neon_vcvtps_u32_f32:
2158
case AArch64::BI__builtin_neon_vcvtpd_u64_f64:
2159
Int = Intrinsic::aarch64_neon_fcvtpu;
2160
s = "vcvtpu"; OverloadCvtInt = true; break;
2161
case AArch64::BI__builtin_neon_vcvts_s32_f32:
2162
case AArch64::BI__builtin_neon_vcvtd_s64_f64:
2163
Int = Intrinsic::aarch64_neon_fcvtzs;
2164
s = "vcvtzs"; OverloadCvtInt = true; break;
2165
case AArch64::BI__builtin_neon_vcvts_u32_f32:
2166
case AArch64::BI__builtin_neon_vcvtd_u64_f64:
2167
Int = Intrinsic::aarch64_neon_fcvtzu;
2168
s = "vcvtzu"; OverloadCvtInt = true; break;
2060
2169
// Scalar Floating-point Reciprocal Estimate
2061
2170
case AArch64::BI__builtin_neon_vrecpes_f32:
2062
2171
case AArch64::BI__builtin_neon_vrecped_f64: