1
// This file is part of Eigen, a lightweight C++ template library
4
// Copyright (C) 2006-2008 Benoit Jacob <jacob.benoit.1@gmail.com>
5
// Copyright (C) 2008-2010 Gael Guennebaud <gael.guennebaud@inria.fr>
7
// Eigen is free software; you can redistribute it and/or
8
// modify it under the terms of the GNU Lesser General Public
9
// License as published by the Free Software Foundation; either
10
// version 3 of the License, or (at your option) any later version.
12
// Alternatively, you can redistribute it and/or
13
// modify it under the terms of the GNU General Public License as
14
// published by the Free Software Foundation; either version 2 of
15
// the License, or (at your option) any later version.
17
// Eigen is distributed in the hope that it will be useful, but WITHOUT ANY
18
// WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
19
// FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public License or the
20
// GNU General Public License for more details.
22
// You should have received a copy of the GNU Lesser General Public
23
// License and a copy of the GNU General Public License along with
24
// Eigen. If not, see <http://www.gnu.org/licenses/>.
26
#ifndef EIGEN_COEFFBASED_PRODUCT_H
27
#define EIGEN_COEFFBASED_PRODUCT_H
31
/*********************************************************************************
32
* Coefficient based product implementation.
33
* It is designed for the following use cases:
36
*********************************************************************************/
38
/* Since the all the dimensions of the product are small, here we can rely
39
* on the generic Assign mechanism to evaluate the product per coeff (or packet).
41
* Note that here the inner-loops should always be unrolled.
44
template<int Traversal, int UnrollingIndex, typename Lhs, typename Rhs, typename RetScalar>
45
struct product_coeff_impl;
47
template<int StorageOrder, int UnrollingIndex, typename Lhs, typename Rhs, typename Packet, int LoadMode>
48
struct product_packet_impl;
50
template<typename LhsNested, typename RhsNested, int NestingFlags>
51
struct traits<CoeffBasedProduct<LhsNested,RhsNested,NestingFlags> >
53
typedef MatrixXpr XprKind;
54
typedef typename remove_all<LhsNested>::type _LhsNested;
55
typedef typename remove_all<RhsNested>::type _RhsNested;
56
typedef typename scalar_product_traits<typename _LhsNested::Scalar, typename _RhsNested::Scalar>::ReturnType Scalar;
57
typedef typename promote_storage_type<typename traits<_LhsNested>::StorageKind,
58
typename traits<_RhsNested>::StorageKind>::ret StorageKind;
59
typedef typename promote_index_type<typename traits<_LhsNested>::Index,
60
typename traits<_RhsNested>::Index>::type Index;
63
LhsCoeffReadCost = _LhsNested::CoeffReadCost,
64
RhsCoeffReadCost = _RhsNested::CoeffReadCost,
65
LhsFlags = _LhsNested::Flags,
66
RhsFlags = _RhsNested::Flags,
68
RowsAtCompileTime = _LhsNested::RowsAtCompileTime,
69
ColsAtCompileTime = _RhsNested::ColsAtCompileTime,
70
InnerSize = EIGEN_SIZE_MIN_PREFER_FIXED(_LhsNested::ColsAtCompileTime, _RhsNested::RowsAtCompileTime),
72
MaxRowsAtCompileTime = _LhsNested::MaxRowsAtCompileTime,
73
MaxColsAtCompileTime = _RhsNested::MaxColsAtCompileTime,
75
LhsRowMajor = LhsFlags & RowMajorBit,
76
RhsRowMajor = RhsFlags & RowMajorBit,
78
SameType = is_same<typename _LhsNested::Scalar,typename _RhsNested::Scalar>::value,
80
CanVectorizeRhs = RhsRowMajor && (RhsFlags & PacketAccessBit)
81
&& (ColsAtCompileTime == Dynamic
82
|| ( (ColsAtCompileTime % packet_traits<Scalar>::size) == 0
83
&& (RhsFlags&AlignedBit)
87
CanVectorizeLhs = (!LhsRowMajor) && (LhsFlags & PacketAccessBit)
88
&& (RowsAtCompileTime == Dynamic
89
|| ( (RowsAtCompileTime % packet_traits<Scalar>::size) == 0
90
&& (LhsFlags&AlignedBit)
94
EvalToRowMajor = (MaxRowsAtCompileTime==1&&MaxColsAtCompileTime!=1) ? 1
95
: (MaxColsAtCompileTime==1&&MaxRowsAtCompileTime!=1) ? 0
96
: (RhsRowMajor && !CanVectorizeLhs),
98
Flags = ((unsigned int)(LhsFlags | RhsFlags) & HereditaryBits & ~RowMajorBit)
99
| (EvalToRowMajor ? RowMajorBit : 0)
101
| (LhsFlags & RhsFlags & AlignedBit)
102
// TODO enable vectorization for mixed types
103
| (SameType && (CanVectorizeLhs || CanVectorizeRhs) ? PacketAccessBit : 0),
105
CoeffReadCost = InnerSize == Dynamic ? Dynamic
106
: InnerSize * (NumTraits<Scalar>::MulCost + LhsCoeffReadCost + RhsCoeffReadCost)
107
+ (InnerSize - 1) * NumTraits<Scalar>::AddCost,
109
/* CanVectorizeInner deserves special explanation. It does not affect the product flags. It is not used outside
110
* of Product. If the Product itself is not a packet-access expression, there is still a chance that the inner
111
* loop of the product might be vectorized. This is the meaning of CanVectorizeInner. Since it doesn't affect
112
* the Flags, it is safe to make this value depend on ActualPacketAccessBit, that doesn't affect the ABI.
114
CanVectorizeInner = SameType
117
&& (LhsFlags & RhsFlags & ActualPacketAccessBit)
118
&& (LhsFlags & RhsFlags & AlignedBit)
119
&& (InnerSize % packet_traits<Scalar>::size == 0)
123
} // end namespace internal
125
template<typename LhsNested, typename RhsNested, int NestingFlags>
126
class CoeffBasedProduct
127
: internal::no_assignment_operator,
128
public MatrixBase<CoeffBasedProduct<LhsNested, RhsNested, NestingFlags> >
132
typedef MatrixBase<CoeffBasedProduct> Base;
133
EIGEN_DENSE_PUBLIC_INTERFACE(CoeffBasedProduct)
134
typedef typename Base::PlainObject PlainObject;
138
typedef typename internal::traits<CoeffBasedProduct>::_LhsNested _LhsNested;
139
typedef typename internal::traits<CoeffBasedProduct>::_RhsNested _RhsNested;
142
PacketSize = internal::packet_traits<Scalar>::size,
143
InnerSize = internal::traits<CoeffBasedProduct>::InnerSize,
144
Unroll = CoeffReadCost != Dynamic && CoeffReadCost <= EIGEN_UNROLLING_LIMIT,
145
CanVectorizeInner = internal::traits<CoeffBasedProduct>::CanVectorizeInner
148
typedef internal::product_coeff_impl<CanVectorizeInner ? InnerVectorizedTraversal : DefaultTraversal,
149
Unroll ? InnerSize-1 : Dynamic,
150
_LhsNested, _RhsNested, Scalar> ScalarCoeffImpl;
152
typedef CoeffBasedProduct<LhsNested,RhsNested,NestByRefBit> LazyCoeffBasedProductType;
156
inline CoeffBasedProduct(const CoeffBasedProduct& other)
157
: Base(), m_lhs(other.m_lhs), m_rhs(other.m_rhs)
160
template<typename Lhs, typename Rhs>
161
inline CoeffBasedProduct(const Lhs& lhs, const Rhs& rhs)
162
: m_lhs(lhs), m_rhs(rhs)
164
// we don't allow taking products of matrices of different real types, as that wouldn't be vectorizable.
165
// We still allow to mix T and complex<T>.
166
EIGEN_STATIC_ASSERT((internal::is_same<typename Lhs::RealScalar, typename Rhs::RealScalar>::value),
167
YOU_MIXED_DIFFERENT_NUMERIC_TYPES__YOU_NEED_TO_USE_THE_CAST_METHOD_OF_MATRIXBASE_TO_CAST_NUMERIC_TYPES_EXPLICITLY)
168
eigen_assert(lhs.cols() == rhs.rows()
169
&& "invalid matrix product"
170
&& "if you wanted a coeff-wise or a dot product use the respective explicit functions");
173
EIGEN_STRONG_INLINE Index rows() const { return m_lhs.rows(); }
174
EIGEN_STRONG_INLINE Index cols() const { return m_rhs.cols(); }
176
EIGEN_STRONG_INLINE const Scalar coeff(Index row, Index col) const
179
ScalarCoeffImpl::run(row, col, m_lhs, m_rhs, res);
183
/* Allow index-based non-packet access. It is impossible though to allow index-based packed access,
184
* which is why we don't set the LinearAccessBit.
186
EIGEN_STRONG_INLINE const Scalar coeff(Index index) const
189
const Index row = RowsAtCompileTime == 1 ? 0 : index;
190
const Index col = RowsAtCompileTime == 1 ? index : 0;
191
ScalarCoeffImpl::run(row, col, m_lhs, m_rhs, res);
195
template<int LoadMode>
196
EIGEN_STRONG_INLINE const PacketScalar packet(Index row, Index col) const
199
internal::product_packet_impl<Flags&RowMajorBit ? RowMajor : ColMajor,
200
Unroll ? InnerSize-1 : Dynamic,
201
_LhsNested, _RhsNested, PacketScalar, LoadMode>
202
::run(row, col, m_lhs, m_rhs, res);
206
// Implicit conversion to the nested type (trigger the evaluation of the product)
207
EIGEN_STRONG_INLINE operator const PlainObject& () const
209
m_result.lazyAssign(*this);
213
const _LhsNested& lhs() const { return m_lhs; }
214
const _RhsNested& rhs() const { return m_rhs; }
216
const Diagonal<const LazyCoeffBasedProductType,0> diagonal() const
217
{ return reinterpret_cast<const LazyCoeffBasedProductType&>(*this); }
219
template<int DiagonalIndex>
220
const Diagonal<const LazyCoeffBasedProductType,DiagonalIndex> diagonal() const
221
{ return reinterpret_cast<const LazyCoeffBasedProductType&>(*this); }
223
const Diagonal<const LazyCoeffBasedProductType,Dynamic> diagonal(Index index) const
224
{ return reinterpret_cast<const LazyCoeffBasedProductType&>(*this).diagonal(index); }
227
const LhsNested m_lhs;
228
const RhsNested m_rhs;
230
mutable PlainObject m_result;
235
// here we need to overload the nested rule for products
236
// such that the nested type is a const reference to a plain matrix
237
template<typename Lhs, typename Rhs, int N, typename PlainObject>
238
struct nested<CoeffBasedProduct<Lhs,Rhs,EvalBeforeNestingBit|EvalBeforeAssigningBit>, N, PlainObject>
240
typedef PlainObject const& type;
243
/***************************************************************************
244
* Normal product .coeff() implementation (with meta-unrolling)
245
***************************************************************************/
247
/**************************************
248
*** Scalar path - no vectorization ***
249
**************************************/
251
template<int UnrollingIndex, typename Lhs, typename Rhs, typename RetScalar>
252
struct product_coeff_impl<DefaultTraversal, UnrollingIndex, Lhs, Rhs, RetScalar>
254
typedef typename Lhs::Index Index;
255
EIGEN_STRONG_INLINE static void run(Index row, Index col, const Lhs& lhs, const Rhs& rhs, RetScalar &res)
257
product_coeff_impl<DefaultTraversal, UnrollingIndex-1, Lhs, Rhs, RetScalar>::run(row, col, lhs, rhs, res);
258
res += lhs.coeff(row, UnrollingIndex) * rhs.coeff(UnrollingIndex, col);
262
template<typename Lhs, typename Rhs, typename RetScalar>
263
struct product_coeff_impl<DefaultTraversal, 0, Lhs, Rhs, RetScalar>
265
typedef typename Lhs::Index Index;
266
EIGEN_STRONG_INLINE static void run(Index row, Index col, const Lhs& lhs, const Rhs& rhs, RetScalar &res)
268
res = lhs.coeff(row, 0) * rhs.coeff(0, col);
272
template<typename Lhs, typename Rhs, typename RetScalar>
273
struct product_coeff_impl<DefaultTraversal, Dynamic, Lhs, Rhs, RetScalar>
275
typedef typename Lhs::Index Index;
276
EIGEN_STRONG_INLINE static void run(Index row, Index col, const Lhs& lhs, const Rhs& rhs, RetScalar& res)
278
eigen_assert(lhs.cols()>0 && "you are using a non initialized matrix");
279
res = lhs.coeff(row, 0) * rhs.coeff(0, col);
280
for(Index i = 1; i < lhs.cols(); ++i)
281
res += lhs.coeff(row, i) * rhs.coeff(i, col);
285
/*******************************************
286
*** Scalar path with inner vectorization ***
287
*******************************************/
289
template<int UnrollingIndex, typename Lhs, typename Rhs, typename Packet>
290
struct product_coeff_vectorized_unroller
292
typedef typename Lhs::Index Index;
293
enum { PacketSize = packet_traits<typename Lhs::Scalar>::size };
294
EIGEN_STRONG_INLINE static void run(Index row, Index col, const Lhs& lhs, const Rhs& rhs, typename Lhs::PacketScalar &pres)
296
product_coeff_vectorized_unroller<UnrollingIndex-PacketSize, Lhs, Rhs, Packet>::run(row, col, lhs, rhs, pres);
297
pres = padd(pres, pmul( lhs.template packet<Aligned>(row, UnrollingIndex) , rhs.template packet<Aligned>(UnrollingIndex, col) ));
301
template<typename Lhs, typename Rhs, typename Packet>
302
struct product_coeff_vectorized_unroller<0, Lhs, Rhs, Packet>
304
typedef typename Lhs::Index Index;
305
EIGEN_STRONG_INLINE static void run(Index row, Index col, const Lhs& lhs, const Rhs& rhs, typename Lhs::PacketScalar &pres)
307
pres = pmul(lhs.template packet<Aligned>(row, 0) , rhs.template packet<Aligned>(0, col));
311
template<int UnrollingIndex, typename Lhs, typename Rhs, typename RetScalar>
312
struct product_coeff_impl<InnerVectorizedTraversal, UnrollingIndex, Lhs, Rhs, RetScalar>
314
typedef typename Lhs::PacketScalar Packet;
315
typedef typename Lhs::Index Index;
316
enum { PacketSize = packet_traits<typename Lhs::Scalar>::size };
317
EIGEN_STRONG_INLINE static void run(Index row, Index col, const Lhs& lhs, const Rhs& rhs, RetScalar &res)
320
product_coeff_vectorized_unroller<UnrollingIndex+1-PacketSize, Lhs, Rhs, Packet>::run(row, col, lhs, rhs, pres);
321
product_coeff_impl<DefaultTraversal,UnrollingIndex,Lhs,Rhs,RetScalar>::run(row, col, lhs, rhs, res);
326
template<typename Lhs, typename Rhs, int LhsRows = Lhs::RowsAtCompileTime, int RhsCols = Rhs::ColsAtCompileTime>
327
struct product_coeff_vectorized_dyn_selector
329
typedef typename Lhs::Index Index;
330
EIGEN_STRONG_INLINE static void run(Index row, Index col, const Lhs& lhs, const Rhs& rhs, typename Lhs::Scalar &res)
332
res = lhs.row(row).transpose().cwiseProduct(rhs.col(col)).sum();
336
// NOTE the 3 following specializations are because taking .col(0) on a vector is a bit slower
337
// NOTE maybe they are now useless since we have a specialization for Block<Matrix>
338
template<typename Lhs, typename Rhs, int RhsCols>
339
struct product_coeff_vectorized_dyn_selector<Lhs,Rhs,1,RhsCols>
341
typedef typename Lhs::Index Index;
342
EIGEN_STRONG_INLINE static void run(Index /*row*/, Index col, const Lhs& lhs, const Rhs& rhs, typename Lhs::Scalar &res)
344
res = lhs.transpose().cwiseProduct(rhs.col(col)).sum();
348
template<typename Lhs, typename Rhs, int LhsRows>
349
struct product_coeff_vectorized_dyn_selector<Lhs,Rhs,LhsRows,1>
351
typedef typename Lhs::Index Index;
352
EIGEN_STRONG_INLINE static void run(Index row, Index /*col*/, const Lhs& lhs, const Rhs& rhs, typename Lhs::Scalar &res)
354
res = lhs.row(row).transpose().cwiseProduct(rhs).sum();
358
template<typename Lhs, typename Rhs>
359
struct product_coeff_vectorized_dyn_selector<Lhs,Rhs,1,1>
361
typedef typename Lhs::Index Index;
362
EIGEN_STRONG_INLINE static void run(Index /*row*/, Index /*col*/, const Lhs& lhs, const Rhs& rhs, typename Lhs::Scalar &res)
364
res = lhs.transpose().cwiseProduct(rhs).sum();
368
template<typename Lhs, typename Rhs, typename RetScalar>
369
struct product_coeff_impl<InnerVectorizedTraversal, Dynamic, Lhs, Rhs, RetScalar>
371
typedef typename Lhs::Index Index;
372
EIGEN_STRONG_INLINE static void run(Index row, Index col, const Lhs& lhs, const Rhs& rhs, typename Lhs::Scalar &res)
374
product_coeff_vectorized_dyn_selector<Lhs,Rhs>::run(row, col, lhs, rhs, res);
382
template<int UnrollingIndex, typename Lhs, typename Rhs, typename Packet, int LoadMode>
383
struct product_packet_impl<RowMajor, UnrollingIndex, Lhs, Rhs, Packet, LoadMode>
385
typedef typename Lhs::Index Index;
386
EIGEN_STRONG_INLINE static void run(Index row, Index col, const Lhs& lhs, const Rhs& rhs, Packet &res)
388
product_packet_impl<RowMajor, UnrollingIndex-1, Lhs, Rhs, Packet, LoadMode>::run(row, col, lhs, rhs, res);
389
res = pmadd(pset1<Packet>(lhs.coeff(row, UnrollingIndex)), rhs.template packet<LoadMode>(UnrollingIndex, col), res);
393
template<int UnrollingIndex, typename Lhs, typename Rhs, typename Packet, int LoadMode>
394
struct product_packet_impl<ColMajor, UnrollingIndex, Lhs, Rhs, Packet, LoadMode>
396
typedef typename Lhs::Index Index;
397
EIGEN_STRONG_INLINE static void run(Index row, Index col, const Lhs& lhs, const Rhs& rhs, Packet &res)
399
product_packet_impl<ColMajor, UnrollingIndex-1, Lhs, Rhs, Packet, LoadMode>::run(row, col, lhs, rhs, res);
400
res = pmadd(lhs.template packet<LoadMode>(row, UnrollingIndex), pset1<Packet>(rhs.coeff(UnrollingIndex, col)), res);
404
template<typename Lhs, typename Rhs, typename Packet, int LoadMode>
405
struct product_packet_impl<RowMajor, 0, Lhs, Rhs, Packet, LoadMode>
407
typedef typename Lhs::Index Index;
408
EIGEN_STRONG_INLINE static void run(Index row, Index col, const Lhs& lhs, const Rhs& rhs, Packet &res)
410
res = pmul(pset1<Packet>(lhs.coeff(row, 0)),rhs.template packet<LoadMode>(0, col));
414
template<typename Lhs, typename Rhs, typename Packet, int LoadMode>
415
struct product_packet_impl<ColMajor, 0, Lhs, Rhs, Packet, LoadMode>
417
typedef typename Lhs::Index Index;
418
EIGEN_STRONG_INLINE static void run(Index row, Index col, const Lhs& lhs, const Rhs& rhs, Packet &res)
420
res = pmul(lhs.template packet<LoadMode>(row, 0), pset1<Packet>(rhs.coeff(0, col)));
424
template<typename Lhs, typename Rhs, typename Packet, int LoadMode>
425
struct product_packet_impl<RowMajor, Dynamic, Lhs, Rhs, Packet, LoadMode>
427
typedef typename Lhs::Index Index;
428
EIGEN_STRONG_INLINE static void run(Index row, Index col, const Lhs& lhs, const Rhs& rhs, Packet& res)
430
eigen_assert(lhs.cols()>0 && "you are using a non initialized matrix");
431
res = pmul(pset1<Packet>(lhs.coeff(row, 0)),rhs.template packet<LoadMode>(0, col));
432
for(Index i = 1; i < lhs.cols(); ++i)
433
res = pmadd(pset1<Packet>(lhs.coeff(row, i)), rhs.template packet<LoadMode>(i, col), res);
437
template<typename Lhs, typename Rhs, typename Packet, int LoadMode>
438
struct product_packet_impl<ColMajor, Dynamic, Lhs, Rhs, Packet, LoadMode>
440
typedef typename Lhs::Index Index;
441
EIGEN_STRONG_INLINE static void run(Index row, Index col, const Lhs& lhs, const Rhs& rhs, Packet& res)
443
eigen_assert(lhs.cols()>0 && "you are using a non initialized matrix");
444
res = pmul(lhs.template packet<LoadMode>(row, 0), pset1<Packet>(rhs.coeff(0, col)));
445
for(Index i = 1; i < lhs.cols(); ++i)
446
res = pmadd(lhs.template packet<LoadMode>(row, i), pset1<Packet>(rhs.coeff(i, col)), res);
450
} // end namespace internal
452
#endif // EIGEN_COEFFBASED_PRODUCT_H