2893
2893
// Analyze the access pattern of the vector to see if we can extract
2894
2894
// two subvectors and do the shuffle. The analysis is done by calculating
2895
2895
// the range of elements the mask access on both vectors.
2896
int MinRange[2] = { static_cast<int>(SrcNumElts+1),
2897
static_cast<int>(SrcNumElts+1)};
2896
int MinRange[2] = { static_cast<int>(SrcNumElts),
2897
static_cast<int>(SrcNumElts)};
2898
2898
int MaxRange[2] = {-1, -1};
2900
2900
for (unsigned i = 0; i != MaskNumElts; ++i) {
2901
2901
int Idx = Mask[i];
2916
2916
// Check if the access is smaller than the vector size and can we find
2917
2917
// a reasonable extract index.
2918
int RangeUse[2] = { 2, 2 }; // 0 = Unused, 1 = Extract, 2 = Can not
2918
int RangeUse[2] = { -1, -1 }; // 0 = Unused, 1 = Extract, -1 = Can not
2920
2920
int StartIdx[2]; // StartIdx to extract from
2921
for (int Input=0; Input < 2; ++Input) {
2922
if (MinRange[Input] == (int)(SrcNumElts+1) && MaxRange[Input] == -1) {
2921
for (unsigned Input = 0; Input < 2; ++Input) {
2922
if (MinRange[Input] >= (int)SrcNumElts && MaxRange[Input] < 0) {
2923
2923
RangeUse[Input] = 0; // Unused
2924
2924
StartIdx[Input] = 0;
2925
} else if (MaxRange[Input] - MinRange[Input] < (int)MaskNumElts) {
2926
// Fits within range but we should see if we can find a good
2927
// start index that is a multiple of the mask length.
2928
if (MaxRange[Input] < (int)MaskNumElts) {
2929
RangeUse[Input] = 1; // Extract from beginning of the vector
2930
StartIdx[Input] = 0;
2932
StartIdx[Input] = (MinRange[Input]/MaskNumElts)*MaskNumElts;
2933
if (MaxRange[Input] - StartIdx[Input] < (int)MaskNumElts &&
2934
StartIdx[Input] + MaskNumElts <= SrcNumElts)
2935
RangeUse[Input] = 1; // Extract from a multiple of the mask length.
2928
// Find a good start index that is a multiple of the mask length. Then
2929
// see if the rest of the elements are in range.
2930
StartIdx[Input] = (MinRange[Input]/MaskNumElts)*MaskNumElts;
2931
if (MaxRange[Input] - StartIdx[Input] < (int)MaskNumElts &&
2932
StartIdx[Input] + MaskNumElts <= SrcNumElts)
2933
RangeUse[Input] = 1; // Extract from a multiple of the mask length.
2940
2936
if (RangeUse[0] == 0 && RangeUse[1] == 0) {
2941
2937
setValue(&I, DAG.getUNDEF(VT)); // Vectors are not used.
2944
else if (RangeUse[0] < 2 && RangeUse[1] < 2) {
2940
if (RangeUse[0] >= 0 && RangeUse[1] >= 0) {
2945
2941
// Extract appropriate subvector and generate a vector shuffle
2946
for (int Input=0; Input < 2; ++Input) {
2942
for (unsigned Input = 0; Input < 2; ++Input) {
2947
2943
SDValue &Src = Input == 0 ? Src1 : Src2;
2948
2944
if (RangeUse[Input] == 0)
2949
2945
Src = DAG.getUNDEF(VT);
4849
4845
case Intrinsic::x86_avx_vinsertf128_pd_256:
4850
4846
case Intrinsic::x86_avx_vinsertf128_ps_256:
4851
case Intrinsic::x86_avx_vinsertf128_si_256: {
4847
case Intrinsic::x86_avx_vinsertf128_si_256:
4848
case Intrinsic::x86_avx2_vinserti128: {
4852
4849
DebugLoc dl = getCurDebugLoc();
4853
4850
EVT DestVT = TLI.getValueType(I.getType());
4854
4851
EVT ElVT = TLI.getValueType(I.getArgOperand(1)->getType());