4
4
// Copyright (C) 2008-2009 Gael Guennebaud <gael.guennebaud@inria.fr>
6
// Eigen is free software; you can redistribute it and/or
7
// modify it under the terms of the GNU Lesser General Public
8
// License as published by the Free Software Foundation; either
9
// version 3 of the License, or (at your option) any later version.
11
// Alternatively, you can redistribute it and/or
12
// modify it under the terms of the GNU General Public License as
13
// published by the Free Software Foundation; either version 2 of
14
// the License, or (at your option) any later version.
16
// Eigen is distributed in the hope that it will be useful, but WITHOUT ANY
17
// WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
18
// FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public License or the
19
// GNU General Public License for more details.
21
// You should have received a copy of the GNU Lesser General Public
22
// License and a copy of the GNU General Public License along with
23
// Eigen. If not, see <http://www.gnu.org/licenses/>.
6
// This Source Code Form is subject to the terms of the Mozilla
7
// Public License v. 2.0. If a copy of the MPL was not distributed
8
// with this file, You can obtain one at http://mozilla.org/MPL/2.0/.
25
10
#ifndef EIGEN_GENERAL_MATRIX_VECTOR_H
26
11
#define EIGEN_GENERAL_MATRIX_VECTOR_H
28
15
namespace internal {
30
17
/* Optimized col-major matrix * vector product:
40
27
* |cplx |real |cplx | invalid, the caller has to do tmp: = A * B; C += alpha*tmp
41
28
* |cplx |real |real | optimal case, vectorization possible via real-cplx mul
43
template<typename Index, typename LhsScalar, bool ConjugateLhs, typename RhsScalar, bool ConjugateRhs>
44
struct general_matrix_vector_product<Index,LhsScalar,ColMajor,ConjugateLhs,RhsScalar,ConjugateRhs>
30
template<typename Index, typename LhsScalar, bool ConjugateLhs, typename RhsScalar, bool ConjugateRhs, int Version>
31
struct general_matrix_vector_product<Index,LhsScalar,ColMajor,ConjugateLhs,RhsScalar,ConjugateRhs,Version>
46
33
typedef typename scalar_product_traits<LhsScalar, RhsScalar>::ReturnType ResScalar;
100
87
// How many coeffs of the result do we have to skip to be aligned.
101
88
// Here we assume data are at least aligned on the base scalar type.
102
Index alignedStart = first_aligned(res,size);
89
Index alignedStart = internal::first_aligned(res,size);
103
90
Index alignedSize = ResPacketSize>1 ? alignedStart + ((size-alignedStart) & ~ResPacketAlignedMask) : 0;
104
91
const Index peeledSize = peels>1 ? alignedStart + ((alignedSize-alignedStart) & ~PeelAlignedMask) : alignedStart;
111
98
// we cannot assume the first element is aligned because of sub-matrices
112
const Index lhsAlignmentOffset = first_aligned(lhs,size);
99
const Index lhsAlignmentOffset = internal::first_aligned(lhs,size);
114
101
// find how many columns do we have to skip to be aligned with the result (if possible)
115
102
Index skipColumns = 0;
296
283
* - alpha is always a complex (or converted to a complex)
297
284
* - no vectorization
299
template<typename Index, typename LhsScalar, bool ConjugateLhs, typename RhsScalar, bool ConjugateRhs>
300
struct general_matrix_vector_product<Index,LhsScalar,RowMajor,ConjugateLhs,RhsScalar,ConjugateRhs>
286
template<typename Index, typename LhsScalar, bool ConjugateLhs, typename RhsScalar, bool ConjugateRhs, int Version>
287
struct general_matrix_vector_product<Index,LhsScalar,RowMajor,ConjugateLhs,RhsScalar,ConjugateRhs,Version>
302
289
typedef typename scalar_product_traits<LhsScalar, RhsScalar>::ReturnType ResScalar;
351
338
// How many coeffs of the result do we have to skip to be aligned.
352
339
// Here we assume data are at least aligned on the base scalar type
353
340
// if that's not the case then vectorization is discarded, see below.
354
Index alignedStart = first_aligned(rhs, depth);
341
Index alignedStart = internal::first_aligned(rhs, depth);
355
342
Index alignedSize = RhsPacketSize>1 ? alignedStart + ((depth-alignedStart) & ~RhsPacketAlignedMask) : 0;
356
343
const Index peeledSize = peels>1 ? alignedStart + ((alignedSize-alignedStart) & ~PeelAlignedMask) : alignedStart;
363
350
// we cannot assume the first element is aligned because of sub-matrices
364
const Index lhsAlignmentOffset = first_aligned(lhs,depth);
351
const Index lhsAlignmentOffset = internal::first_aligned(lhs,depth);
366
353
// find how many rows do we have to skip to be aligned with rhs (if possible)
367
354
Index skipRows = 0;