~ubuntu-branches/ubuntu/trusty/r-cran-rcpparmadillo/trusty-proposed

« back to all changes in this revision

Viewing changes to inst/include/armadillo_bits/gemv.hpp

Committer: Package Import Robot
Author(s): Dirk Eddelbuettel
Date: 2013-08-12 19:10:20 UTC
mfrom: (1.1.6)
Revision ID: package-import@ubuntu.com-20130812191020-4i0swxrz8v6i503v

Tags: 0.3.910.0-1

New upstream release

files added:
.Rbuildignore

inst/include/RcppArmadilloExtensions/spmat.h

inst/include/armadillo_bits/fn_all.hpp

inst/include/armadillo_bits/fn_any.hpp

inst/include/armadillo_bits/fn_vectorise.hpp

inst/include/armadillo_bits/mul_gemm.hpp

inst/include/armadillo_bits/mul_gemm_mixed.hpp

inst/include/armadillo_bits/mul_gemv.hpp

inst/include/armadillo_bits/mul_herk.hpp

inst/include/armadillo_bits/mul_syrk.hpp

inst/include/armadillo_bits/op_all_bones.hpp

inst/include/armadillo_bits/op_all_meat.hpp

inst/include/armadillo_bits/op_any_bones.hpp

inst/include/armadillo_bits/op_any_meat.hpp

inst/include/armadillo_bits/op_vectorise_bones.hpp

inst/include/armadillo_bits/op_vectorise_meat.hpp

vignettes

vignettes/Makefile

vignettes/RcppArmadillo-intro.Rnw

vignettes/RcppArmadillo-intro.pdf

vignettes/RcppArmadillo-unitTests.Rnw

vignettes/RcppArmadillo-unitTests.pdf

vignettes/RcppArmadillo.bib

vignettes/elsarticle-harv.bst

vignettes/elsarticle.cls

vignettes/kalmanExample.pdf

vignettes/unitTests-results

vignettes/unitTests-results/RcppArmadillo-unitTests.html

vignettes/unitTests-results/RcppArmadillo-unitTests.txt

files removed:
.Rinstignore

inst/doc/Makefile

inst/doc/RcppArmadillo-intro.R

inst/doc/RcppArmadillo-intro.Rnw

inst/doc/RcppArmadillo-unitTests.R

inst/doc/RcppArmadillo-unitTests.Rnw

inst/doc/RcppArmadillo.bib

inst/doc/elsarticle-harv.bst

inst/doc/elsarticle.cls

inst/doc/kalmanExample.pdf

inst/doc/unitTests

inst/doc/unitTests-results

inst/doc/unitTests-results/RcppArmadillo-unitTests.html

inst/doc/unitTests-results/RcppArmadillo-unitTests.txt

inst/doc/unitTests/RcppArmadillo-unitTests.R

inst/doc/unitTests/RcppArmadillo-unitTests.Rnw

inst/include/armadillo_bits/gemm.hpp

inst/include/armadillo_bits/gemm_mixed.hpp

inst/include/armadillo_bits/gemv.hpp

files modified:
ChangeLog

DESCRIPTION

cleanup

debian/changelog

inst/NEWS.Rd

inst/doc/RcppArmadillo-intro.pdf

inst/doc/RcppArmadillo-unitTests.pdf

inst/include/RcppArmadillo.h

inst/include/RcppArmadillo/Col_meat.h

inst/include/RcppArmadillo/Col_proto.h

inst/include/RcppArmadillo/Mat_meat.h

inst/include/RcppArmadillo/Mat_proto.h

inst/include/RcppArmadillo/Row_meat.h

inst/include/RcppArmadillo/Row_proto.h

inst/include/RcppArmadilloConfig.h

inst/include/RcppArmadilloExtensions/sample.h

inst/include/RcppArmadilloForward.h

inst/include/RcppArmadilloSugar.h

inst/include/RcppArmadilloWrap.h

inst/include/armadillo

inst/include/armadillo_bits/BaseCube_bones.hpp

inst/include/armadillo_bits/BaseCube_meat.hpp

inst/include/armadillo_bits/Base_bones.hpp

inst/include/armadillo_bits/Base_meat.hpp

inst/include/armadillo_bits/Col_bones.hpp

inst/include/armadillo_bits/Col_meat.hpp

inst/include/armadillo_bits/Cube_bones.hpp

inst/include/armadillo_bits/Cube_meat.hpp

inst/include/armadillo_bits/GenCube_bones.hpp

inst/include/armadillo_bits/GenCube_meat.hpp

inst/include/armadillo_bits/Gen_bones.hpp

inst/include/armadillo_bits/Gen_meat.hpp

inst/include/armadillo_bits/GlueCube_bones.hpp

inst/include/armadillo_bits/GlueCube_meat.hpp

inst/include/armadillo_bits/Glue_bones.hpp

inst/include/armadillo_bits/Glue_meat.hpp

inst/include/armadillo_bits/Mat_bones.hpp

inst/include/armadillo_bits/Mat_meat.hpp

inst/include/armadillo_bits/OpCube_bones.hpp

inst/include/armadillo_bits/OpCube_meat.hpp

inst/include/armadillo_bits/Op_bones.hpp

inst/include/armadillo_bits/Op_meat.hpp

inst/include/armadillo_bits/Proxy.hpp

inst/include/armadillo_bits/ProxyCube.hpp

inst/include/armadillo_bits/Row_bones.hpp

inst/include/armadillo_bits/Row_meat.hpp

inst/include/armadillo_bits/SpBase_bones.hpp

inst/include/armadillo_bits/SpBase_meat.hpp

inst/include/armadillo_bits/SpMat_meat.hpp

inst/include/armadillo_bits/SpSubview_iterators_meat.hpp

inst/include/armadillo_bits/access.hpp

inst/include/armadillo_bits/arma_config.hpp

inst/include/armadillo_bits/arma_ostream_bones.hpp

inst/include/armadillo_bits/arma_ostream_meat.hpp

inst/include/armadillo_bits/arma_static_check.hpp

inst/include/armadillo_bits/arma_version.hpp

inst/include/armadillo_bits/arrayops_bones.hpp

inst/include/armadillo_bits/arrayops_meat.hpp

inst/include/armadillo_bits/atlas_bones.hpp

inst/include/armadillo_bits/atlas_wrapper.hpp

inst/include/armadillo_bits/auxlib_bones.hpp

inst/include/armadillo_bits/auxlib_meat.hpp

inst/include/armadillo_bits/blas_bones.hpp

inst/include/armadillo_bits/blas_wrapper.hpp

inst/include/armadillo_bits/cmath_wrap.hpp

inst/include/armadillo_bits/compiler_setup.hpp

inst/include/armadillo_bits/compiler_setup_post.hpp

inst/include/armadillo_bits/config.hpp

inst/include/armadillo_bits/constants.hpp

inst/include/armadillo_bits/constants_compat.hpp

inst/include/armadillo_bits/debug.hpp

inst/include/armadillo_bits/eGlueCube_bones.hpp

inst/include/armadillo_bits/eGlueCube_meat.hpp

inst/include/armadillo_bits/eGlue_bones.hpp

inst/include/armadillo_bits/eGlue_meat.hpp

inst/include/armadillo_bits/eOpCube_bones.hpp

inst/include/armadillo_bits/eOpCube_meat.hpp

inst/include/armadillo_bits/eOp_bones.hpp

inst/include/armadillo_bits/eOp_meat.hpp

inst/include/armadillo_bits/eglue_core_bones.hpp

inst/include/armadillo_bits/eglue_core_meat.hpp

inst/include/armadillo_bits/eop_aux.hpp

inst/include/armadillo_bits/eop_core_bones.hpp

inst/include/armadillo_bits/eop_core_meat.hpp

inst/include/armadillo_bits/field_bones.hpp

inst/include/armadillo_bits/field_meat.hpp

inst/include/armadillo_bits/fn_accu.hpp

inst/include/armadillo_bits/fn_as_scalar.hpp

inst/include/armadillo_bits/fn_chol.hpp

inst/include/armadillo_bits/fn_conv.hpp

inst/include/armadillo_bits/fn_conv_to.hpp

inst/include/armadillo_bits/fn_cor.hpp

inst/include/armadillo_bits/fn_cov.hpp

inst/include/armadillo_bits/fn_cross.hpp

inst/include/armadillo_bits/fn_cumsum.hpp

inst/include/armadillo_bits/fn_det.hpp

inst/include/armadillo_bits/fn_diagmat.hpp

inst/include/armadillo_bits/fn_diagvec.hpp

inst/include/armadillo_bits/fn_dot.hpp

inst/include/armadillo_bits/fn_eig.hpp

inst/include/armadillo_bits/fn_elem.hpp

inst/include/armadillo_bits/fn_eps.hpp

inst/include/armadillo_bits/fn_eye.hpp

inst/include/armadillo_bits/fn_flip.hpp

inst/include/armadillo_bits/fn_hist.hpp

inst/include/armadillo_bits/fn_histc.hpp

inst/include/armadillo_bits/fn_inv.hpp

inst/include/armadillo_bits/fn_join.hpp

inst/include/armadillo_bits/fn_kron.hpp

inst/include/armadillo_bits/fn_log_det.hpp

inst/include/armadillo_bits/fn_lu.hpp

inst/include/armadillo_bits/fn_max.hpp

inst/include/armadillo_bits/fn_mean.hpp

inst/include/armadillo_bits/fn_median.hpp

inst/include/armadillo_bits/fn_min.hpp

inst/include/armadillo_bits/fn_misc.hpp

inst/include/armadillo_bits/fn_norm.hpp

inst/include/armadillo_bits/fn_ones.hpp

inst/include/armadillo_bits/fn_pinv.hpp

inst/include/armadillo_bits/fn_princomp.hpp

inst/include/armadillo_bits/fn_prod.hpp

inst/include/armadillo_bits/fn_qr.hpp

inst/include/armadillo_bits/fn_randn.hpp

inst/include/armadillo_bits/fn_randu.hpp

inst/include/armadillo_bits/fn_rank.hpp

inst/include/armadillo_bits/fn_repmat.hpp

inst/include/armadillo_bits/fn_reshape.hpp

inst/include/armadillo_bits/fn_resize.hpp

inst/include/armadillo_bits/fn_shuffle.hpp

inst/include/armadillo_bits/fn_solve.hpp

inst/include/armadillo_bits/fn_sort.hpp

inst/include/armadillo_bits/fn_sort_index.hpp

inst/include/armadillo_bits/fn_stddev.hpp

inst/include/armadillo_bits/fn_strans.hpp

inst/include/armadillo_bits/fn_sum.hpp

inst/include/armadillo_bits/fn_svd.hpp

inst/include/armadillo_bits/fn_syl_lyap.hpp

inst/include/armadillo_bits/fn_symmat.hpp

inst/include/armadillo_bits/fn_toeplitz.hpp

inst/include/armadillo_bits/fn_trace.hpp

inst/include/armadillo_bits/fn_trans.hpp

inst/include/armadillo_bits/fn_trig.hpp

inst/include/armadillo_bits/fn_trimat.hpp

inst/include/armadillo_bits/fn_trunc_exp.hpp

inst/include/armadillo_bits/fn_trunc_log.hpp

inst/include/armadillo_bits/fn_unique.hpp

inst/include/armadillo_bits/fn_var.hpp

inst/include/armadillo_bits/fn_zeros.hpp

inst/include/armadillo_bits/format_wrap.hpp

inst/include/armadillo_bits/forward_bones.hpp

inst/include/armadillo_bits/glue_conv_bones.hpp

inst/include/armadillo_bits/glue_conv_meat.hpp

inst/include/armadillo_bits/glue_cor_bones.hpp

inst/include/armadillo_bits/glue_cor_meat.hpp

inst/include/armadillo_bits/glue_cov_bones.hpp

inst/include/armadillo_bits/glue_cov_meat.hpp

inst/include/armadillo_bits/glue_cross_bones.hpp

inst/include/armadillo_bits/glue_cross_meat.hpp

inst/include/armadillo_bits/glue_hist_bones.hpp

inst/include/armadillo_bits/glue_hist_meat.hpp

inst/include/armadillo_bits/glue_histc_bones.hpp

inst/include/armadillo_bits/glue_histc_meat.hpp

inst/include/armadillo_bits/glue_join_bones.hpp

inst/include/armadillo_bits/glue_join_meat.hpp

inst/include/armadillo_bits/glue_kron_bones.hpp

inst/include/armadillo_bits/glue_kron_meat.hpp

inst/include/armadillo_bits/glue_mixed_bones.hpp

inst/include/armadillo_bits/glue_mixed_meat.hpp

inst/include/armadillo_bits/glue_relational_bones.hpp

inst/include/armadillo_bits/glue_relational_meat.hpp

inst/include/armadillo_bits/glue_solve_bones.hpp

inst/include/armadillo_bits/glue_solve_meat.hpp

inst/include/armadillo_bits/glue_times_bones.hpp

inst/include/armadillo_bits/glue_times_meat.hpp

inst/include/armadillo_bits/glue_toeplitz_bones.hpp

inst/include/armadillo_bits/glue_toeplitz_meat.hpp

inst/include/armadillo_bits/include_atlas.hpp

inst/include/armadillo_bits/injector_bones.hpp

inst/include/armadillo_bits/injector_meat.hpp

inst/include/armadillo_bits/lapack_bones.hpp

inst/include/armadillo_bits/lapack_wrapper.hpp

inst/include/armadillo_bits/memory.hpp

inst/include/armadillo_bits/mtGlueCube_bones.hpp

inst/include/armadillo_bits/mtGlueCube_meat.hpp

inst/include/armadillo_bits/mtGlue_bones.hpp

inst/include/armadillo_bits/mtGlue_meat.hpp

inst/include/armadillo_bits/mtOpCube_bones.hpp

inst/include/armadillo_bits/mtOpCube_meat.hpp

inst/include/armadillo_bits/mtOp_bones.hpp

inst/include/armadillo_bits/mtOp_meat.hpp

inst/include/armadillo_bits/op_chol_bones.hpp

inst/include/armadillo_bits/op_chol_meat.hpp

inst/include/armadillo_bits/op_cor_bones.hpp

inst/include/armadillo_bits/op_cor_meat.hpp

inst/include/armadillo_bits/op_cov_bones.hpp

inst/include/armadillo_bits/op_cov_meat.hpp

inst/include/armadillo_bits/op_cumsum_bones.hpp

inst/include/armadillo_bits/op_cumsum_meat.hpp

inst/include/armadillo_bits/op_cx_scalar_bones.hpp

inst/include/armadillo_bits/op_cx_scalar_meat.hpp

inst/include/armadillo_bits/op_diagmat_bones.hpp

inst/include/armadillo_bits/op_diagmat_meat.hpp

inst/include/armadillo_bits/op_diagvec_bones.hpp

inst/include/armadillo_bits/op_diagvec_meat.hpp

inst/include/armadillo_bits/op_dot_bones.hpp

inst/include/armadillo_bits/op_dot_meat.hpp

inst/include/armadillo_bits/op_dotext_bones.hpp

inst/include/armadillo_bits/op_dotext_meat.hpp

inst/include/armadillo_bits/op_find_bones.hpp

inst/include/armadillo_bits/op_find_meat.hpp

inst/include/armadillo_bits/op_flip_bones.hpp

inst/include/armadillo_bits/op_flip_meat.hpp

inst/include/armadillo_bits/op_hist_bones.hpp

inst/include/armadillo_bits/op_hist_meat.hpp

inst/include/armadillo_bits/op_htrans_bones.hpp

inst/include/armadillo_bits/op_htrans_meat.hpp

inst/include/armadillo_bits/op_inv_bones.hpp

inst/include/armadillo_bits/op_inv_meat.hpp

inst/include/armadillo_bits/op_max_bones.hpp

inst/include/armadillo_bits/op_max_meat.hpp

inst/include/armadillo_bits/op_mean_bones.hpp

inst/include/armadillo_bits/op_mean_meat.hpp

inst/include/armadillo_bits/op_median_bones.hpp

inst/include/armadillo_bits/op_median_meat.hpp

inst/include/armadillo_bits/op_min_bones.hpp

inst/include/armadillo_bits/op_min_meat.hpp

inst/include/armadillo_bits/op_misc_bones.hpp

inst/include/armadillo_bits/op_misc_meat.hpp

inst/include/armadillo_bits/op_pinv_bones.hpp

inst/include/armadillo_bits/op_pinv_meat.hpp

inst/include/armadillo_bits/op_princomp_bones.hpp

inst/include/armadillo_bits/op_princomp_meat.hpp

inst/include/armadillo_bits/op_prod_bones.hpp

inst/include/armadillo_bits/op_prod_meat.hpp

inst/include/armadillo_bits/op_relational_bones.hpp

inst/include/armadillo_bits/op_relational_meat.hpp

inst/include/armadillo_bits/op_repmat_bones.hpp

inst/include/armadillo_bits/op_repmat_meat.hpp

inst/include/armadillo_bits/op_reshape_bones.hpp

inst/include/armadillo_bits/op_reshape_meat.hpp

inst/include/armadillo_bits/op_resize_bones.hpp

inst/include/armadillo_bits/op_resize_meat.hpp

inst/include/armadillo_bits/op_shuffle_bones.hpp

inst/include/armadillo_bits/op_shuffle_meat.hpp

inst/include/armadillo_bits/op_sort_bones.hpp

inst/include/armadillo_bits/op_sort_meat.hpp

inst/include/armadillo_bits/op_stddev_bones.hpp

inst/include/armadillo_bits/op_stddev_meat.hpp

inst/include/armadillo_bits/op_strans_bones.hpp

inst/include/armadillo_bits/op_strans_meat.hpp

inst/include/armadillo_bits/op_sum_bones.hpp

inst/include/armadillo_bits/op_sum_meat.hpp

inst/include/armadillo_bits/op_symmat_bones.hpp

inst/include/armadillo_bits/op_symmat_meat.hpp

inst/include/armadillo_bits/op_trimat_bones.hpp

inst/include/armadillo_bits/op_trimat_meat.hpp

inst/include/armadillo_bits/op_unique_bones.hpp

inst/include/armadillo_bits/op_unique_meat.hpp

inst/include/armadillo_bits/op_var_bones.hpp

inst/include/armadillo_bits/op_var_meat.hpp

inst/include/armadillo_bits/operator_cube_div.hpp

inst/include/armadillo_bits/operator_cube_minus.hpp

inst/include/armadillo_bits/operator_cube_plus.hpp

inst/include/armadillo_bits/operator_cube_relational.hpp

inst/include/armadillo_bits/operator_cube_schur.hpp

inst/include/armadillo_bits/operator_cube_times.hpp

inst/include/armadillo_bits/operator_div.hpp

inst/include/armadillo_bits/operator_minus.hpp

inst/include/armadillo_bits/operator_ostream.hpp

inst/include/armadillo_bits/operator_plus.hpp

inst/include/armadillo_bits/operator_relational.hpp

inst/include/armadillo_bits/operator_schur.hpp

inst/include/armadillo_bits/operator_times.hpp

inst/include/armadillo_bits/podarray_bones.hpp

inst/include/armadillo_bits/podarray_meat.hpp

inst/include/armadillo_bits/promote_type.hpp

inst/include/armadillo_bits/restrictors.hpp

inst/include/armadillo_bits/running_stat_bones.hpp

inst/include/armadillo_bits/running_stat_meat.hpp

inst/include/armadillo_bits/running_stat_vec_bones.hpp

inst/include/armadillo_bits/running_stat_vec_meat.hpp

inst/include/armadillo_bits/span.hpp

inst/include/armadillo_bits/strip.hpp

inst/include/armadillo_bits/subview_bones.hpp

inst/include/armadillo_bits/subview_cube_bones.hpp

inst/include/armadillo_bits/subview_cube_meat.hpp

inst/include/armadillo_bits/subview_elem1_bones.hpp

inst/include/armadillo_bits/subview_elem1_meat.hpp

inst/include/armadillo_bits/subview_elem2_bones.hpp

inst/include/armadillo_bits/subview_elem2_meat.hpp

inst/include/armadillo_bits/subview_field_bones.hpp

inst/include/armadillo_bits/subview_field_meat.hpp

inst/include/armadillo_bits/subview_meat.hpp

inst/include/armadillo_bits/traits.hpp

inst/include/armadillo_bits/typedef.hpp

inst/include/armadillo_bits/typedef_blas_int.hpp

inst/include/armadillo_bits/typedef_fixed.hpp

inst/include/armadillo_bits/undefine_conflicts.hpp

inst/include/armadillo_bits/unwrap.hpp

inst/include/armadillo_bits/unwrap_cube.hpp

inst/include/armadillo_bits/unwrap_spmat.hpp

inst/include/armadillo_bits/upgrade_val.hpp

inst/include/armadillo_bits/wall_clock_bones.hpp

inst/include/armadillo_bits/wall_clock_meat.hpp

inst/unitTests/runit.RcppArmadillo.R

inst/unitTests/runit.sample.R

Show diffs side-by-side

added added

removed removed

inst/include/armadillo_bits/gemv.hpp

// This Source Code Form is subject to the terms of the Mozilla Public

// License, v. 2.0. If a copy of the MPL was not distributed with this

// file, You can obtain one at http://mozilla.org/MPL/2.0/.

//! \addtogroup gemv

//! @{

//! for tiny square matrices, size <= 4x4

template<const bool do_trans_A=false, const bool use_alpha=false, const bool use_beta=false>

class gemv_emul_tinysq

{

public:

template<const uword row, const uword col>

struct pos

{

static const uword n2 = (do_trans_A == false) ? (row + col*2) : (col + row*2);

static const uword n3 = (do_trans_A == false) ? (row + col*3) : (col + row*3);

static const uword n4 = (do_trans_A == false) ? (row + col*4) : (col + row*4);

};

template<typename eT, const uword i>

arma_hot

arma_inline

static

void

assign(eT* y, const eT acc, const eT alpha, const eT beta)

{

if(use_beta == false)

{

y[i] = (use_alpha == false) ? acc : alpha*acc;

}

else

{

const eT tmp = y[i];

y[i] = beta*tmp + ( (use_alpha == false) ? acc : alpha*acc );

}

template<typename eT, typename TA>

arma_hot

inline

static

void

apply( eT* y, const TA& A, const eT* x, const eT alpha = eT(1), const eT beta = eT(0) )

{

arma_extra_debug_sigprint();

const eT* Am = A.memptr();

switch(A.n_rows)

{

case 1:

{

const eT acc = Am[0] * x[0];

assign<eT, 0>(y, acc, alpha, beta);

}

break;

case 2:

{

const eT x0 = x[0];

const eT x1 = x[1];

const eT acc0 = Am[pos<0,0>::n2]*x0 + Am[pos<0,1>::n2]*x1;

const eT acc1 = Am[pos<1,0>::n2]*x0 + Am[pos<1,1>::n2]*x1;

assign<eT, 0>(y, acc0, alpha, beta);

assign<eT, 1>(y, acc1, alpha, beta);

}

break;

case 3:

{

const eT x0 = x[0];

const eT x1 = x[1];

const eT x2 = x[2];

const eT acc0 = Am[pos<0,0>::n3]*x0 + Am[pos<0,1>::n3]*x1 + Am[pos<0,2>::n3]*x2;

const eT acc1 = Am[pos<1,0>::n3]*x0 + Am[pos<1,1>::n3]*x1 + Am[pos<1,2>::n3]*x2;

const eT acc2 = Am[pos<2,0>::n3]*x0 + Am[pos<2,1>::n3]*x1 + Am[pos<2,2>::n3]*x2;

assign<eT, 0>(y, acc0, alpha, beta);

assign<eT, 1>(y, acc1, alpha, beta);

100

assign<eT, 2>(y, acc2, alpha, beta);

101

}

102

break;

103

104

105

case 4:

106

{

107

const eT x0 = x[0];

108

const eT x1 = x[1];

109

const eT x2 = x[2];

110

const eT x3 = x[3];

111

112

const eT acc0 = Am[pos<0,0>::n4]*x0 + Am[pos<0,1>::n4]*x1 + Am[pos<0,2>::n4]*x2 + Am[pos<0,3>::n4]*x3;

113

const eT acc1 = Am[pos<1,0>::n4]*x0 + Am[pos<1,1>::n4]*x1 + Am[pos<1,2>::n4]*x2 + Am[pos<1,3>::n4]*x3;

114

const eT acc2 = Am[pos<2,0>::n4]*x0 + Am[pos<2,1>::n4]*x1 + Am[pos<2,2>::n4]*x2 + Am[pos<2,3>::n4]*x3;

115

const eT acc3 = Am[pos<3,0>::n4]*x0 + Am[pos<3,1>::n4]*x1 + Am[pos<3,2>::n4]*x2 + Am[pos<3,3>::n4]*x3;

116

117

assign<eT, 0>(y, acc0, alpha, beta);

118

assign<eT, 1>(y, acc1, alpha, beta);

119

assign<eT, 2>(y, acc2, alpha, beta);

120

assign<eT, 3>(y, acc3, alpha, beta);

121

}

122

break;

123

124

125

default:

126

;

127

}

128

}

129

130

};

131

132

133

134

class gemv_emul_large_helper

135

{

136

public:

137

138

template<typename eT, typename TA>

139

arma_hot

140

inline

141

static

142

typename arma_not_cx<eT>::result

143

dot_row_col( const TA& A, const eT* x, const uword row, const uword N )

144

{

145

eT acc1 = eT(0);

146

eT acc2 = eT(0);

147

148

uword i,j;

149

for(i=0, j=1; j < N; i+=2, j+=2)

150

{

151

const eT xi = x[i];

152

const eT xj = x[j];

153

154

acc1 += A.at(row,i) * xi;

155

acc2 += A.at(row,j) * xj;

156

}

157

158

if(i < N)

159

{

160

acc1 += A.at(row,i) * x[i];

161

}

162

163

return (acc1 + acc2);

164

}

165

166

167

168

template<typename eT, typename TA>

169

arma_hot

170

inline

171

static

172

typename arma_cx_only<eT>::result

173

dot_row_col( const TA& A, const eT* x, const uword row, const uword N )

174

{

175

typedef typename get_pod_type<eT>::result T;

176

177

T val_real = T(0);

178

T val_imag = T(0);

179

180

for(uword i=0; i<N; ++i)

181

{

182

const std::complex<T>& Ai = A.at(row,i);

183

const std::complex<T>& xi = x[i];

184

185

const T a = Ai.real();

186

const T b = Ai.imag();

187

188

const T c = xi.real();

189

const T d = xi.imag();

190

191

val_real += (a*c) - (b*d);

192

val_imag += (a*d) + (b*c);

193

}

194

195

return std::complex<T>(val_real, val_imag);

196

}

197

198

};

199

200

201

202

//! \brief

203

//! Partial emulation of ATLAS/BLAS gemv().

204

//! 'y' is assumed to have been set to the correct size (i.e. taking into account the transpose)

205

206

template<const bool do_trans_A=false, const bool use_alpha=false, const bool use_beta=false>

207

class gemv_emul_large

208

{

209

public:

210

211

template<typename eT, typename TA>

212

arma_hot

213

inline

214

static

215

void

216

apply( eT* y, const TA& A, const eT* x, const eT alpha = eT(1), const eT beta = eT(0) )

217

{

218

arma_extra_debug_sigprint();

219

220

const uword A_n_rows = A.n_rows;

221

const uword A_n_cols = A.n_cols;

222

223

if(do_trans_A == false)

224

{

225

if(A_n_rows == 1)

226

{

227

const eT acc = op_dot::direct_dot_arma(A_n_cols, A.memptr(), x);

228

229

if( (use_alpha == false) && (use_beta == false) )

230

{

231

y[0] = acc;

232

}

233

else

234

if( (use_alpha == true) && (use_beta == false) )

235

{

236

y[0] = alpha * acc;

237

}

238

else

239

if( (use_alpha == false) && (use_beta == true) )

240

{

241

y[0] = acc + beta*y[0];

242

}

243

else

244

if( (use_alpha == true) && (use_beta == true) )

245

{

246

y[0] = alpha*acc + beta*y[0];

247

}

248

}

249

else

250

for(uword row=0; row < A_n_rows; ++row)

251

{

252

const eT acc = gemv_emul_large_helper::dot_row_col(A, x, row, A_n_cols);

253

254

if( (use_alpha == false) && (use_beta == false) )

255

{

256

y[row] = acc;

257

}

258

else

259

if( (use_alpha == true) && (use_beta == false) )

260

{

261

y[row] = alpha * acc;

262

}

263

else

264

if( (use_alpha == false) && (use_beta == true) )

265

{

266

y[row] = acc + beta*y[row];

267

}

268

else

269

if( (use_alpha == true) && (use_beta == true) )

270

{

271

y[row] = alpha*acc + beta*y[row];

272

}

273

}

274

}

275

else

276

if(do_trans_A == true)

277

{

278

for(uword col=0; col < A_n_cols; ++col)

279

{

280

// col is interpreted as row when storing the results in 'y'

281

282

283

// const eT* A_coldata = A.colptr(col);

284

285

// eT acc = eT(0);

286

// for(uword row=0; row < A_n_rows; ++row)

287

// {

288

// acc += A_coldata[row] * x[row];

289

// }

290

291

const eT acc = op_dot::direct_dot_arma(A_n_rows, A.colptr(col), x);

292

293

if( (use_alpha == false) && (use_beta == false) )

294

{

295

y[col] = acc;

296

}

297

else

298

if( (use_alpha == true) && (use_beta == false) )

299

{

300

y[col] = alpha * acc;

301

}

302

else

303

if( (use_alpha == false) && (use_beta == true) )

304

{

305

y[col] = acc + beta*y[col];

306

}

307

else

308

if( (use_alpha == true) && (use_beta == true) )

309

{

310

y[col] = alpha*acc + beta*y[col];

311

}

312

313

}

314

}

315

}

316

317

};

318

319

320

321

template<const bool do_trans_A=false, const bool use_alpha=false, const bool use_beta=false>

322

class gemv_emul

323

{

324

public:

325

326

template<typename eT, typename TA>

327

arma_hot

328

inline

329

static

330

void

331

apply( eT* y, const TA& A, const eT* x, const eT alpha = eT(1), const eT beta = eT(0), const typename arma_not_cx<eT>::result* junk = 0 )

332

{

333

arma_extra_debug_sigprint();

334

arma_ignore(junk);

335

336

const uword A_n_rows = A.n_rows;

337

const uword A_n_cols = A.n_cols;

338

339

if( (A_n_rows <= 4) && (A_n_rows == A_n_cols) )

340

{

341

gemv_emul_tinysq<do_trans_A, use_alpha, use_beta>::apply(y, A, x, alpha, beta);

342

}

343

else

344

{

345

gemv_emul_large<do_trans_A, use_alpha, use_beta>::apply(y, A, x, alpha, beta);

346

}

347

}

348

349

350

351

template<typename eT>

352

arma_hot

353

inline

354

static

355

void

356

apply( eT* y, const Mat<eT>& A, const eT* x, const eT alpha = eT(1), const eT beta = eT(0), const typename arma_cx_only<eT>::result* junk = 0 )

357

{

358

arma_extra_debug_sigprint();

359

arma_ignore(junk);

360

361

Mat<eT> tmp_A;

362

363

if(do_trans_A)

364

{

365

op_htrans::apply_noalias(tmp_A, A);

366

}

367

368

const Mat<eT>& AA = (do_trans_A == false) ? A : tmp_A;

369

370

const uword AA_n_rows = AA.n_rows;

371

const uword AA_n_cols = AA.n_cols;

372

373

if( (AA_n_rows <= 4) && (AA_n_rows == AA_n_cols) )

374

{

375

gemv_emul_tinysq<false, use_alpha, use_beta>::apply(y, AA, x, alpha, beta);

376

}

377

else

378

{

379

gemv_emul_large<false, use_alpha, use_beta>::apply(y, AA, x, alpha, beta);

380

}

381

}

382

};

383

384

385

386

//! \brief

387

//! Wrapper for ATLAS/BLAS gemv function, using template arguments to control the arguments passed to gemv.

388

//! 'y' is assumed to have been set to the correct size (i.e. taking into account the transpose)

389

390

template<const bool do_trans_A=false, const bool use_alpha=false, const bool use_beta=false>

391

class gemv

392

{

393

public:

394

395

template<typename eT, typename TA>

396

inline

397

static

398

void

399

apply_blas_type( eT* y, const TA& A, const eT* x, const eT alpha = eT(1), const eT beta = eT(0) )

400

{

401

arma_extra_debug_sigprint();

402

403

//const uword threshold = (is_complex<eT>::value == true) ? 16u : 64u;

404

const uword threshold = (is_complex<eT>::value == true) ? 64u : 100u;

405

406

if(A.n_elem <= threshold)

407

{

408

gemv_emul<do_trans_A, use_alpha, use_beta>::apply(y,A,x,alpha,beta);

409

}

410

else

411

{

412

#if defined(ARMA_USE_ATLAS)

413

{

414

if(is_complex<eT>::value == false)

415

{

416

// use gemm() instead of gemv() to work around a speed issue in Atlas 3.8.4

417

418

arma_extra_debug_print("atlas::cblas_gemm()");

419

420

atlas::cblas_gemm<eT>

421

(

422

atlas::CblasColMajor,

423

(do_trans_A) ? ( is_complex<eT>::value ? CblasConjTrans : atlas::CblasTrans ) : atlas::CblasNoTrans,

424

atlas::CblasNoTrans,

425

(do_trans_A) ? A.n_cols : A.n_rows,

426

427

(do_trans_A) ? A.n_rows : A.n_cols,

428

(use_alpha) ? alpha : eT(1),

429

A.mem,

430

A.n_rows,

431

432

(do_trans_A) ? A.n_rows : A.n_cols,

433

(use_beta) ? beta : eT(0),

434

435

(do_trans_A) ? A.n_cols : A.n_rows

436

);

437

}

438

else

439

{

440

arma_extra_debug_print("atlas::cblas_gemv()");

441

442

atlas::cblas_gemv<eT>

443

(

444

atlas::CblasColMajor,

445

(do_trans_A) ? ( is_complex<eT>::value ? CblasConjTrans : atlas::CblasTrans ) : atlas::CblasNoTrans,

446

A.n_rows,

447

A.n_cols,

448

(use_alpha) ? alpha : eT(1),

449

A.mem,

450

A.n_rows,

451

452

453

(use_beta) ? beta : eT(0),

454

455

456

);

457

}

458

}

459

#elif defined(ARMA_USE_BLAS)

460

{

461

arma_extra_debug_print("blas::gemv()");

462

463

const char trans_A = (do_trans_A) ? ( is_complex<eT>::value ? 'C' : 'T' ) : 'N';

464

const blas_int m = A.n_rows;

465

const blas_int n = A.n_cols;

466

const eT local_alpha = (use_alpha) ? alpha : eT(1);

467

//const blas_int lda = A.n_rows;

468

const blas_int inc = 1;

469

const eT local_beta = (use_beta) ? beta : eT(0);

470

471

arma_extra_debug_print( arma_boost::format("blas::gemv(): trans_A = %c") % trans_A );

472

473

blas::gemv<eT>

474

(

475

&trans_A,

476

&m,

477

&n,

478

&local_alpha,

479

A.mem,

480

&m, // lda

481

482

&inc,

483

&local_beta,

484

485

&inc

486

);

487

}

488

#else

489

{

490

gemv_emul<do_trans_A, use_alpha, use_beta>::apply(y,A,x,alpha,beta);

491

}

492

#endif

493

}

494

495

}

496

497

498

499

template<typename eT, typename TA>

500

arma_inline

501

static

502

void

503

apply( eT* y, const TA& A, const eT* x, const eT alpha = eT(1), const eT beta = eT(0) )

504

{

505

gemv_emul<do_trans_A, use_alpha, use_beta>::apply(y,A,x,alpha,beta);

506

}

507

508

509

510

template<typename TA>

511

arma_inline

512

static

513

void

514

apply

515

(

516

float* y,

517

const TA& A,

518

const float* x,

519

const float alpha = float(1),

520

const float beta = float(0)

521

)

522

{

523

gemv<do_trans_A, use_alpha, use_beta>::apply_blas_type(y,A,x,alpha,beta);

524

}

525

526

527

528

template<typename TA>

529

arma_inline

530

static

531

void

532

apply

533

(

534

double* y,

535

const TA& A,

536

const double* x,

537

const double alpha = double(1),

538

const double beta = double(0)

539

)

540

{

541

gemv<do_trans_A, use_alpha, use_beta>::apply_blas_type(y,A,x,alpha,beta);

542

}

543

544

545

546

template<typename TA>

547

arma_inline

548

static

549

void

550

apply

551

(

552

std::complex<float>* y,

553

const TA& A,

554

const std::complex<float>* x,

555

const std::complex<float> alpha = std::complex<float>(1),

556

const std::complex<float> beta = std::complex<float>(0)

557

)

558

{

559

gemv<do_trans_A, use_alpha, use_beta>::apply_blas_type(y,A,x,alpha,beta);

560

}

561

562

563

564

template<typename TA>

565

arma_inline

566

static

567

void

568

apply

569

(

570

std::complex<double>* y,

571

const TA& A,

572

const std::complex<double>* x,

573

const std::complex<double> alpha = std::complex<double>(1),

574

const std::complex<double> beta = std::complex<double>(0)

575

)

576

{

577

gemv<do_trans_A, use_alpha, use_beta>::apply_blas_type(y,A,x,alpha,beta);

578

}

579

580

581

582

};

583

584

585

//! @}

Older »