[Rcpp-commits] r3772 - in pkg/RcppArmadillo: . inst inst/include/armadillo_bits man
noreply at r-forge.r-project.org
noreply at r-forge.r-project.org
Tue Sep 18 15:59:50 CEST 2012
Author: edd
Date: 2012-09-18 15:59:49 +0200 (Tue, 18 Sep 2012)
New Revision: 3772
Modified:
pkg/RcppArmadillo/ChangeLog
pkg/RcppArmadillo/DESCRIPTION
pkg/RcppArmadillo/inst/NEWS.Rd
pkg/RcppArmadillo/inst/include/armadillo_bits/arma_version.hpp
pkg/RcppArmadillo/inst/include/armadillo_bits/atlas_wrapper.hpp
pkg/RcppArmadillo/inst/include/armadillo_bits/blas_wrapper.hpp
pkg/RcppArmadillo/inst/include/armadillo_bits/compiler_setup.hpp
pkg/RcppArmadillo/inst/include/armadillo_bits/config.hpp
pkg/RcppArmadillo/inst/include/armadillo_bits/fn_dot.hpp
pkg/RcppArmadillo/inst/include/armadillo_bits/gemv.hpp
pkg/RcppArmadillo/inst/include/armadillo_bits/op_dot_bones.hpp
pkg/RcppArmadillo/inst/include/armadillo_bits/op_dot_meat.hpp
pkg/RcppArmadillo/inst/include/armadillo_bits/operator_schur.hpp
pkg/RcppArmadillo/inst/include/armadillo_bits/spglue_minus_meat.hpp
pkg/RcppArmadillo/inst/include/armadillo_bits/spglue_plus_meat.hpp
pkg/RcppArmadillo/inst/include/armadillo_bits/spglue_times_meat.hpp
pkg/RcppArmadillo/inst/include/armadillo_bits/traits.hpp
pkg/RcppArmadillo/man/RcppArmadillo-package.Rd
Log:
Release 0.3.4.1 with Armadillo 3.4.1
Modified: pkg/RcppArmadillo/ChangeLog
===================================================================
--- pkg/RcppArmadillo/ChangeLog 2012-09-16 13:51:22 UTC (rev 3771)
+++ pkg/RcppArmadillo/ChangeLog 2012-09-18 13:59:49 UTC (rev 3772)
@@ -1,3 +1,9 @@
+2012-09-18 Dirk Eddelbuettel <edd at debian.org>
+
+ * DESCRIPTION: Release 0.3.4.1
+
+ * inst/include/*: Upgraded to new release 3.4.1 of Armadillo
+
2012-09-06 Dirk Eddelbuettel <edd at debian.org>
* DESCRIPTION: Release 0.3.4.0
Modified: pkg/RcppArmadillo/DESCRIPTION
===================================================================
--- pkg/RcppArmadillo/DESCRIPTION 2012-09-16 13:51:22 UTC (rev 3771)
+++ pkg/RcppArmadillo/DESCRIPTION 2012-09-18 13:59:49 UTC (rev 3772)
@@ -1,7 +1,7 @@
Package: RcppArmadillo
Type: Package
Title: Rcpp integration for Armadillo templated linear algebra library
-Version: 0.3.4.0
+Version: 0.3.4.1
Date: $Date$
Author: Romain Francois, Dirk Eddelbuettel and Doug Bates
Maintainer: Dirk Eddelbuettel <edd at debian.org>
@@ -21,7 +21,7 @@
(due to speed and/or integration capabilities), rather than another language.
.
The RcppArmadillo package includes the header files from the templated
- Armadillo library (currently version 3.4.0). Thus users do not need to
+ Armadillo library (currently version 3.4.1). Thus users do not need to
install Armadillo itself in order to use RcppArmadillo.
.
This Armadillo integration provides a nice illustration of the
Modified: pkg/RcppArmadillo/inst/NEWS.Rd
===================================================================
--- pkg/RcppArmadillo/inst/NEWS.Rd 2012-09-16 13:51:22 UTC (rev 3771)
+++ pkg/RcppArmadillo/inst/NEWS.Rd 2012-09-18 13:59:49 UTC (rev 3772)
@@ -2,6 +2,18 @@
\title{News for Package 'RcppArmadillo'}
\newcommand{\cpkg}{\href{http://CRAN.R-project.org/package=#1}{\pkg{#1}}}
+\section{Changes in RcppArmadillo version 0.3.4.1 (2012-09-18)}{
+ \itemize{
+ \item Upgraded to Armadillo release 3.4.1
+ \itemize{
+ \item workaround for a bug in the Mac OS X accelerate framework
+ \item fixes for handling empty sparse matrices
+ \item added documentation for saving & loading matrices in HDF5 format
+ \item faster dot() and cdot() for complex numbers
+ }
+ }
+}
+
\section{Changes in RcppArmadillo version 0.3.4.0 (2012-09-06)}{
\itemize{
\item Upgraded to Armadillo release 3.4.0 (Ku De Ta)
Modified: pkg/RcppArmadillo/inst/include/armadillo_bits/arma_version.hpp
===================================================================
--- pkg/RcppArmadillo/inst/include/armadillo_bits/arma_version.hpp 2012-09-16 13:51:22 UTC (rev 3771)
+++ pkg/RcppArmadillo/inst/include/armadillo_bits/arma_version.hpp 2012-09-18 13:59:49 UTC (rev 3772)
@@ -18,7 +18,7 @@
#define ARMA_VERSION_MAJOR 3
#define ARMA_VERSION_MINOR 4
-#define ARMA_VERSION_PATCH 0
+#define ARMA_VERSION_PATCH 1
#define ARMA_VERSION_NAME "Ku De Ta"
Modified: pkg/RcppArmadillo/inst/include/armadillo_bits/atlas_wrapper.hpp
===================================================================
--- pkg/RcppArmadillo/inst/include/armadillo_bits/atlas_wrapper.hpp 2012-09-16 13:51:22 UTC (rev 3771)
+++ pkg/RcppArmadillo/inst/include/armadillo_bits/atlas_wrapper.hpp 2012-09-18 13:59:49 UTC (rev 3772)
@@ -1,5 +1,5 @@
-// Copyright (C) 2008-2011 NICTA (www.nicta.com.au)
-// Copyright (C) 2008-2011 Conrad Sanderson
+// Copyright (C) 2008-2012 NICTA (www.nicta.com.au)
+// Copyright (C) 2008-2012 Conrad Sanderson
//
// This file is part of the Armadillo C++ library.
// It is provided without any warranty of fitness
@@ -22,7 +22,7 @@
inline static const eT& tmp_real(const eT& X) { return X; }
template<typename T>
- inline static const T& tmp_real(const std::complex<T>& X) { return X.real(); }
+ inline static const T tmp_real(const std::complex<T>& X) { return X.real(); }
Modified: pkg/RcppArmadillo/inst/include/armadillo_bits/blas_wrapper.hpp
===================================================================
--- pkg/RcppArmadillo/inst/include/armadillo_bits/blas_wrapper.hpp 2012-09-16 13:51:22 UTC (rev 3771)
+++ pkg/RcppArmadillo/inst/include/armadillo_bits/blas_wrapper.hpp 2012-09-18 13:59:49 UTC (rev 3772)
@@ -1,5 +1,5 @@
-// Copyright (C) 2008-2011 NICTA (www.nicta.com.au)
-// Copyright (C) 2008-2011 Conrad Sanderson
+// Copyright (C) 2008-2012 NICTA (www.nicta.com.au)
+// Copyright (C) 2008-2012 Conrad Sanderson
//
// This file is part of the Armadillo C++ library.
// It is provided without any warranty of fitness
@@ -22,46 +22,6 @@
template<typename eT>
inline
- eT
- dot(const uword n_elem, const eT* x, const eT* y)
- {
- arma_ignore(n_elem);
- arma_ignore(x);
- arma_ignore(y);
-
- return eT(0);
- }
-
-
-
- template<>
- inline
- float
- dot(const uword n_elem, const float* x, const float* y)
- {
- blas_int n = blas_int(n_elem);
- blas_int inc = blas_int(1);
-
- return arma_fortran(arma_sdot)(&n, x, &inc, y, &inc);
- }
-
-
-
- template<>
- inline
- double
- dot(const uword n_elem, const double* x, const double* y)
- {
- blas_int n = blas_int(n_elem);
- blas_int inc = blas_int(1);
-
- return arma_fortran(arma_ddot)(&n, x, &inc, y, &inc);
- }
-
-
-
- template<typename eT>
- inline
void
gemv(const char* transA, const blas_int* m, const blas_int* n, const eT* alpha, const eT* A, const blas_int* ldA, const eT* x, const blas_int* incx, const eT* beta, eT* y, const blas_int* incy)
{
@@ -128,6 +88,86 @@
}
+
+
+ template<typename eT>
+ inline
+ eT
+ dot(const uword n_elem, const eT* x, const eT* y)
+ {
+ arma_type_check((is_supported_blas_type<eT>::value == false));
+
+ if(is_float<eT>::value == true)
+ {
+ #if defined(ARMA_BLAS_SDOT_BUG)
+ {
+ if(n_elem == 0) { return eT(0); }
+
+ const char trans = 'T';
+
+ const blas_int m = blas_int(n_elem);
+ const blas_int n = 1;
+ //const blas_int lda = (n_elem > 0) ? blas_int(n_elem) : blas_int(1);
+ const blas_int inc = 1;
+
+ const eT alpha = eT(1);
+ const eT beta = eT(0);
+
+ eT result[2]; // paranoia: using two elements instead of one
+
+ //blas::gemv(&trans, &m, &n, &alpha, x, &lda, y, &inc, &beta, &result[0], &inc);
+ blas::gemv(&trans, &m, &n, &alpha, x, &m, y, &inc, &beta, &result[0], &inc);
+
+ return result[0];
+ }
+ #else
+ {
+ blas_int n = blas_int(n_elem);
+ blas_int inc = 1;
+
+ typedef float T;
+ return arma_fortran(arma_sdot)(&n, (const T*)x, &inc, (const T*)y, &inc);
+ }
+ #endif
+ }
+ else
+ if(is_double<eT>::value == true)
+ {
+ blas_int n = blas_int(n_elem);
+ blas_int inc = 1;
+
+ typedef double T;
+ return arma_fortran(arma_ddot)(&n, (const T*)x, &inc, (const T*)y, &inc);
+ }
+ else
+ if( (is_supported_complex_float<eT>::value == true) || (is_supported_complex_double<eT>::value == true) )
+ {
+ if(n_elem == 0) { return eT(0); }
+
+ // using gemv() workaround due to compatibility issues with cdotu() and zdotu()
+
+ const char trans = 'T';
+
+ const blas_int m = blas_int(n_elem);
+ const blas_int n = 1;
+ //const blas_int lda = (n_elem > 0) ? blas_int(n_elem) : blas_int(1);
+ const blas_int inc = 1;
+
+ const eT alpha = eT(1);
+ const eT beta = eT(0);
+
+ eT result[2]; // paranoia: using two elements instead of one
+
+ //blas::gemv(&trans, &m, &n, &alpha, x, &lda, y, &inc, &beta, &result[0], &inc);
+ blas::gemv(&trans, &m, &n, &alpha, x, &m, y, &inc, &beta, &result[0], &inc);
+
+ return result[0];
+ }
+ else
+ {
+ return eT(0);
+ }
+ }
}
Modified: pkg/RcppArmadillo/inst/include/armadillo_bits/compiler_setup.hpp
===================================================================
--- pkg/RcppArmadillo/inst/include/armadillo_bits/compiler_setup.hpp 2012-09-16 13:51:22 UTC (rev 3771)
+++ pkg/RcppArmadillo/inst/include/armadillo_bits/compiler_setup.hpp 2012-09-18 13:59:49 UTC (rev 3772)
@@ -125,6 +125,11 @@
#endif
+#if defined(__APPLE__)
+ #define ARMA_BLAS_SDOT_BUG
+#endif
+
+
#if defined(_MSC_VER)
#if (_MSC_VER < 1500)
Modified: pkg/RcppArmadillo/inst/include/armadillo_bits/config.hpp
===================================================================
--- pkg/RcppArmadillo/inst/include/armadillo_bits/config.hpp 2012-09-16 13:51:22 UTC (rev 3771)
+++ pkg/RcppArmadillo/inst/include/armadillo_bits/config.hpp 2012-09-18 13:59:49 UTC (rev 3772)
@@ -27,6 +27,10 @@
//// Without BLAS, matrix multiplication will still work, but might be slower.
#endif
+// #define ARMA_USE_WRAPPER
+//// Comment out the above line if you prefer to directly link with LAPACK and/or BLAS (eg. -llapack -lblas)
+//// instead of linking indirectly with LAPACK and/or BLAS via Armadillo's run-time wrapper library.
+
// #define ARMA_BLAS_CAPITALS
//// Uncomment the above line if your BLAS and LAPACK libraries have capitalised function names (eg. ACML on 64-bit Windows)
@@ -56,10 +60,18 @@
//// Uncomment the above line if you require matrices/vectors capable of holding more than 4 billion elements.
//// Your machine and compiler must have support for 64 bit integers (eg. via "long" or "long long")
+#if !defined(ARMA_USE_CXX11)
// #define ARMA_USE_CXX11
//// Uncomment the above line if you have a C++ compiler that supports the C++11 standard
//// This will enable additional features, such as use of initialiser lists
+#endif
+#if !defined(ARMA_USE_HDF5)
+// #define ARMA_USE_HDF5
+//// Uncomment the above line if you want the ability to save and load matrices stored in the HDF5 format;
+//// the hdf5.h header file must be available on your system and you will need to link with the hdf5 library (eg. -lhdf5)
+#endif
+
#if !defined(ARMA_MAT_PREALLOC)
#define ARMA_MAT_PREALLOC 16
#endif
@@ -86,11 +98,11 @@
//// Uncomment the above line if you want to see the function traces of how Armadillo evaluates expressions.
//// This is mainly useful for debugging of the library.
+
// #define ARMA_USE_BOOST
// #define ARMA_USE_BOOST_DATE
-// #define ARMA_USE_WRAPPER
-// #define ARMA_USE_HDF5
+
#if !defined(ARMA_DEFAULT_OSTREAM)
#define ARMA_DEFAULT_OSTREAM std::cout
#endif
Modified: pkg/RcppArmadillo/inst/include/armadillo_bits/fn_dot.hpp
===================================================================
--- pkg/RcppArmadillo/inst/include/armadillo_bits/fn_dot.hpp 2012-09-16 13:51:22 UTC (rev 3771)
+++ pkg/RcppArmadillo/inst/include/armadillo_bits/fn_dot.hpp 2012-09-18 13:59:49 UTC (rev 3772)
@@ -1,5 +1,6 @@
-// Copyright (C) 2008-2010 NICTA (www.nicta.com.au)
-// Copyright (C) 2008-2010 Conrad Sanderson
+// Copyright (C) 2008-2012 NICTA (www.nicta.com.au)
+// Copyright (C) 2008-2012 Conrad Sanderson
+// Copyright (C) 2012 Ryan Curtin
//
// This file is part of the Armadillo C++ library.
// It is provided without any warranty of fitness
@@ -18,11 +19,16 @@
template<typename T1, typename T2>
arma_inline
arma_warn_unused
-typename T1::elem_type
+typename
+enable_if2
+ <
+ is_arma_type<T1>::value && is_arma_type<T2>::value && is_same_type<typename T1::elem_type, typename T2::elem_type>::value,
+ typename T1::elem_type
+ >::result
dot
(
- const Base<typename T1::elem_type,T1>& A,
- const Base<typename T1::elem_type,T2>& B
+ const T1& A,
+ const T2& B
)
{
arma_extra_debug_sigprint();
@@ -35,16 +41,19 @@
template<typename T1, typename T2>
arma_inline
arma_warn_unused
-typename T1::elem_type
+typename
+enable_if2
+ <
+ is_arma_type<T1>::value && is_arma_type<T2>::value && is_same_type<typename T1::elem_type, typename T2::elem_type>::value,
+ typename T1::elem_type
+ >::result
norm_dot
(
- const Base<typename T1::elem_type,T1>& A,
- const Base<typename T1::elem_type,T2>& B,
- const typename arma_blas_type_only<typename T1::elem_type>::result* junk = 0
+ const T1& A,
+ const T2& B
)
{
arma_extra_debug_sigprint();
- arma_ignore(junk);
return op_norm_dot::apply(A,B);
}
@@ -59,57 +68,66 @@
template<typename T1, typename T2>
arma_inline
arma_warn_unused
-typename T1::elem_type
+typename
+enable_if2
+ <
+ is_arma_type<T1>::value && is_arma_type<T2>::value && is_same_type<typename T1::elem_type, typename T2::elem_type>::value && is_not_complex<typename T1::elem_type>::value,
+ typename T1::elem_type
+ >::result
cdot
(
- const Base<typename T1::elem_type,T1>& A,
- const Base<typename T1::elem_type,T2>& B,
- const typename arma_cx_only<typename T1::elem_type>::result* junk = 0
+ const T1& A,
+ const T2& B
)
{
arma_extra_debug_sigprint();
- arma_ignore(junk);
- return op_cdot::apply(A,B);
+ return op_dot::apply(A,B);
}
+
template<typename T1, typename T2>
arma_inline
arma_warn_unused
-typename T1::elem_type
+typename
+enable_if2
+ <
+ is_arma_type<T1>::value && is_arma_type<T2>::value && is_same_type<typename T1::elem_type, typename T2::elem_type>::value && is_complex<typename T1::elem_type>::value,
+ typename T1::elem_type
+ >::result
cdot
(
- const Base<typename T1::elem_type,T1>& A,
- const Base<typename T1::elem_type,T2>& B,
- const typename arma_not_cx<typename T1::elem_type>::result* junk = 0
+ const T1& A,
+ const T2& B
)
{
arma_extra_debug_sigprint();
- arma_ignore(junk);
- return op_dot::apply(A,B);
+ return op_cdot::apply(A,B);
}
-
// convert dot(htrans(x), y) to cdot(x,y)
template<typename T1, typename T2>
arma_inline
arma_warn_unused
-typename T1::elem_type
+typename
+enable_if2
+ <
+ is_arma_type<T2>::value && is_same_type<typename T1::elem_type, typename T2::elem_type>::value && is_complex<typename T1::elem_type>::value,
+ typename T1::elem_type
+ >::result
dot
(
const Op<T1, op_htrans>& A,
- const Base<typename T1::elem_type,T2>& B,
- const typename arma_cx_only<typename T1::elem_type>::result* junk = 0
+ const T2& B
)
{
arma_extra_debug_sigprint();
- arma_ignore(junk);
return cdot(A.m, B);
}
@@ -192,7 +210,7 @@
dot
(
const SpBase<typename T1::elem_type, T1>& x,
- const Base<typename T2::elem_type, T2>& y
+ const Base<typename T2::elem_type, T2>& y
)
{
// this is commutative
@@ -212,7 +230,7 @@
>::result
dot
(
- const Base<typename T1::elem_type, T1>& x,
+ const Base<typename T1::elem_type, T1>& x,
const SpBase<typename T2::elem_type, T2>& y
)
{
Modified: pkg/RcppArmadillo/inst/include/armadillo_bits/gemv.hpp
===================================================================
--- pkg/RcppArmadillo/inst/include/armadillo_bits/gemv.hpp 2012-09-16 13:51:22 UTC (rev 3771)
+++ pkg/RcppArmadillo/inst/include/armadillo_bits/gemv.hpp 2012-09-18 13:59:49 UTC (rev 3772)
@@ -1,5 +1,5 @@
-// Copyright (C) 2008-2011 NICTA (www.nicta.com.au)
-// Copyright (C) 2008-2011 Conrad Sanderson
+// Copyright (C) 2008-2012 NICTA (www.nicta.com.au)
+// Copyright (C) 2008-2012 Conrad Sanderson
//
// This file is part of the Armadillo C++ library.
// It is provided without any warranty of fitness
@@ -342,7 +342,9 @@
{
arma_extra_debug_sigprint();
- if(A.n_elem <= 64u)
+ const uword threshold = (is_complex<eT>::value == true) ? 16u : 64u;
+
+ if(A.n_elem <= threshold)
{
gemv_emul<do_trans_A, use_alpha, use_beta>::apply(y,A,x,alpha,beta);
}
Modified: pkg/RcppArmadillo/inst/include/armadillo_bits/op_dot_bones.hpp
===================================================================
--- pkg/RcppArmadillo/inst/include/armadillo_bits/op_dot_bones.hpp 2012-09-16 13:51:22 UTC (rev 3771)
+++ pkg/RcppArmadillo/inst/include/armadillo_bits/op_dot_bones.hpp 2012-09-18 13:59:49 UTC (rev 3772)
@@ -22,9 +22,16 @@
public:
template<typename eT>
- arma_hot arma_pure arma_inline static eT direct_dot_arma(const uword n_elem, const eT* const A, const eT* const B);
+ arma_hot arma_pure arma_inline static
+ typename arma_not_cx<eT>::result
+ direct_dot_arma(const uword n_elem, const eT* const A, const eT* const B);
template<typename eT>
+ arma_hot arma_pure inline static
+ typename arma_cx_only<eT>::result
+ direct_dot_arma(const uword n_elem, const eT* const A, const eT* const B);
+
+ template<typename eT>
arma_hot arma_pure inline static typename arma_float_only<eT>::result
direct_dot(const uword n_elem, const eT* const A, const eT* const B);
@@ -41,14 +48,17 @@
arma_hot arma_pure inline static eT direct_dot(const uword n_elem, const eT* const A, const eT* const B, const eT* C);
template<typename T1, typename T2>
- arma_hot arma_inline static typename T1::elem_type apply(const Base<typename T1::elem_type,T1>& X, const Base<typename T1::elem_type,T2>& Y);
+ arma_hot inline static typename T1::elem_type apply(const T1& X, const T2& Y);
template<typename T1, typename T2>
- arma_hot inline static typename T1::elem_type apply_unwrap(const Base<typename T1::elem_type,T1>& X, const Base<typename T1::elem_type,T2>& Y);
+ arma_hot inline static typename T1::elem_type apply_unwrap(const T1& X, const T2& Y);
template<typename T1, typename T2>
- arma_hot inline static typename T1::elem_type apply_proxy (const Base<typename T1::elem_type,T1>& X, const Base<typename T1::elem_type,T2>& Y);
+ arma_hot inline static typename arma_not_cx<typename T1::elem_type>::result apply_proxy(const T1& X, const T2& Y);
+ template<typename T1, typename T2>
+ arma_hot inline static typename arma_cx_only<typename T1::elem_type>::result apply_proxy(const T1& X, const T2& Y);
+
template<typename eT>
arma_hot inline static eT dot_and_copy_row(eT* out, const Mat<eT>& A, const uword row, const eT* B_mem, const uword N);
};
@@ -63,20 +73,35 @@
public:
template<typename T1, typename T2>
- arma_hot inline static typename T1::elem_type apply (const Base<typename T1::elem_type,T1>& X, const Base<typename T1::elem_type,T2>& Y);
+ arma_hot inline static typename T1::elem_type apply (const T1& X, const T2& Y);
template<typename T1, typename T2>
- arma_hot inline static typename T1::elem_type apply_unwrap(const Base<typename T1::elem_type,T1>& X, const Base<typename T1::elem_type,T2>& Y);
+ arma_hot inline static typename T1::elem_type apply_unwrap(const T1& X, const T2& Y);
};
+//! \brief
+//! complex conjugate dot product operation
+
class op_cdot
{
public:
+ template<typename eT>
+ arma_hot inline static eT direct_cdot_arma(const uword n_elem, const eT* const A, const eT* const B);
+
+ template<typename eT>
+ arma_hot inline static eT direct_cdot(const uword n_elem, const eT* const A, const eT* const B);
+
template<typename T1, typename T2>
- arma_hot arma_inline static typename T1::elem_type apply(const Base<typename T1::elem_type,T1>& X, const Base<typename T1::elem_type,T2>& Y);
+ arma_hot inline static typename T1::elem_type apply (const T1& X, const T2& Y);
+
+ template<typename T1, typename T2>
+ arma_hot inline static typename T1::elem_type apply_unwrap(const T1& X, const T2& Y);
+
+ template<typename T1, typename T2>
+ arma_hot inline static typename T1::elem_type apply_proxy (const T1& X, const T2& Y);
};
Modified: pkg/RcppArmadillo/inst/include/armadillo_bits/op_dot_meat.hpp
===================================================================
--- pkg/RcppArmadillo/inst/include/armadillo_bits/op_dot_meat.hpp 2012-09-16 13:51:22 UTC (rev 3771)
+++ pkg/RcppArmadillo/inst/include/armadillo_bits/op_dot_meat.hpp 2012-09-18 13:59:49 UTC (rev 3772)
@@ -16,13 +16,12 @@
-
-//! for two arrays, generic version
+//! for two arrays, generic version for non-complex values
template<typename eT>
arma_hot
arma_pure
arma_inline
-eT
+typename arma_not_cx<eT>::result
op_dot::direct_dot_arma(const uword n_elem, const eT* const A, const eT* const B)
{
arma_extra_debug_sigprint();
@@ -48,6 +47,41 @@
+//! for two arrays, generic version for complex values
+template<typename eT>
+arma_hot
+arma_pure
+inline
+typename arma_cx_only<eT>::result
+op_dot::direct_dot_arma(const uword n_elem, const eT* const A, const eT* const B)
+ {
+ arma_extra_debug_sigprint();
+
+ typedef typename get_pod_type<eT>::result T;
+
+ T val_real = T(0);
+ T val_imag = T(0);
+
+ for(uword i=0; i<n_elem; ++i)
+ {
+ const std::complex<T>& X = A[i];
+ const std::complex<T>& Y = B[i];
+
+ const T a = X.real();
+ const T b = X.imag();
+
+ const T c = Y.real();
+ const T d = Y.imag();
+
+ val_real += (a*c) - (b*d);
+ val_imag += (a*d) + (b*c);
+ }
+
+ return std::complex<T>(val_real, val_imag);
+ }
+
+
+
//! for two arrays, float and double version
template<typename eT>
arma_hot
@@ -58,7 +92,7 @@
{
arma_extra_debug_sigprint();
- if( n_elem <= (128/sizeof(eT)) )
+ if( n_elem <= 32u )
{
return op_dot::direct_dot_arma(n_elem, A, B);
}
@@ -94,22 +128,30 @@
typename arma_cx_only<eT>::result
op_dot::direct_dot(const uword n_elem, const eT* const A, const eT* const B)
{
- #if defined(ARMA_USE_ATLAS)
+ if( n_elem <= 16u )
{
- arma_extra_debug_print("atlas::cx_cblas_dot()");
-
- return atlas::cx_cblas_dot(n_elem, A, B);
- }
- #elif defined(ARMA_USE_BLAS)
- {
- // TODO: work out the mess with zdotu() and zdotu_sub() in BLAS
return op_dot::direct_dot_arma(n_elem, A, B);
}
- #else
+ else
{
- return op_dot::direct_dot_arma(n_elem, A, B);
+ #if defined(ARMA_USE_ATLAS)
+ {
+ arma_extra_debug_print("atlas::cx_cblas_dot()");
+
+ return atlas::cx_cblas_dot(n_elem, A, B);
+ }
+ #elif defined(ARMA_USE_BLAS)
+ {
+ arma_extra_debug_print("blas::dot()");
+
+ return blas::dot(n_elem, A, B);
+ }
+ #else
+ {
+ return op_dot::direct_dot_arma(n_elem, A, B);
+ }
+ #endif
}
- #endif
}
@@ -152,9 +194,9 @@
template<typename T1, typename T2>
arma_hot
-arma_inline
+inline
typename T1::elem_type
-op_dot::apply(const Base<typename T1::elem_type,T1>& X, const Base<typename T1::elem_type,T2>& Y)
+op_dot::apply(const T1& X, const T2& Y)
{
arma_extra_debug_sigprint();
@@ -172,16 +214,16 @@
template<typename T1, typename T2>
arma_hot
-arma_inline
+inline
typename T1::elem_type
-op_dot::apply_unwrap(const Base<typename T1::elem_type,T1>& X, const Base<typename T1::elem_type,T2>& Y)
+op_dot::apply_unwrap(const T1& X, const T2& Y)
{
arma_extra_debug_sigprint();
typedef typename T1::elem_type eT;
- const unwrap<T1> tmp1(X.get_ref());
- const unwrap<T2> tmp2(Y.get_ref());
+ const unwrap<T1> tmp1(X);
+ const unwrap<T2> tmp2(Y);
const Mat<eT>& A = tmp1.M;
const Mat<eT>& B = tmp2.M;
@@ -196,8 +238,8 @@
template<typename T1, typename T2>
arma_hot
inline
-typename T1::elem_type
-op_dot::apply_proxy(const Base<typename T1::elem_type,T1>& X, const Base<typename T1::elem_type,T2>& Y)
+typename arma_not_cx<typename T1::elem_type>::result
+op_dot::apply_proxy(const T1& X, const T2& Y)
{
arma_extra_debug_sigprint();
@@ -205,19 +247,20 @@
typedef typename Proxy<T1>::ea_type ea_type1;
typedef typename Proxy<T2>::ea_type ea_type2;
- const Proxy<T1> A(X.get_ref());
- const Proxy<T2> B(Y.get_ref());
-
const bool prefer_at_accessor = (Proxy<T1>::prefer_at_accessor) && (Proxy<T2>::prefer_at_accessor);
if(prefer_at_accessor == false)
{
- arma_debug_check( (A.get_n_elem() != B.get_n_elem()), "dot(): objects must have the same number of elements" );
-
- const uword N = A.get_n_elem();
- ea_type1 PA = A.get_ea();
- ea_type2 PB = B.get_ea();
+ const Proxy<T1> PA(X);
+ const Proxy<T2> PB(Y);
+ const uword N = PA.get_n_elem();
+
+ arma_debug_check( (N != PB.get_n_elem()), "dot(): objects must have the same number of elements" );
+
+ ea_type1 A = PA.get_ea();
+ ea_type2 B = PB.get_ea();
+
eT val1 = eT(0);
eT val2 = eT(0);
@@ -225,25 +268,81 @@
for(i=0, j=1; j<N; i+=2, j+=2)
{
- val1 += PA[i] * PB[i];
- val2 += PA[j] * PB[j];
+ val1 += A[i] * B[i];
+ val2 += A[j] * B[j];
}
if(i < N)
{
- val1 += PA[i] * PB[i];
+ val1 += A[i] * B[i];
}
return val1 + val2;
}
else
{
- return op_dot::apply_unwrap(A.Q, B.Q);
+ return op_dot::apply_unwrap(X,Y);
}
}
+template<typename T1, typename T2>
+arma_hot
+inline
+typename arma_cx_only<typename T1::elem_type>::result
+op_dot::apply_proxy(const T1& X, const T2& Y)
+ {
+ arma_extra_debug_sigprint();
+
+ typedef typename T1::elem_type eT;
+ typedef typename get_pod_type<eT>::result T;
+
+ typedef typename Proxy<T1>::ea_type ea_type1;
+ typedef typename Proxy<T2>::ea_type ea_type2;
+
+ const bool prefer_at_accessor = (Proxy<T1>::prefer_at_accessor) && (Proxy<T2>::prefer_at_accessor);
+
+ if(prefer_at_accessor == false)
+ {
+ const Proxy<T1> PA(X);
+ const Proxy<T2> PB(Y);
+
+ const uword N = PA.get_n_elem();
+
+ arma_debug_check( (N != PB.get_n_elem()), "dot(): objects must have the same number of elements" );
+
+ ea_type1 A = PA.get_ea();
+ ea_type2 B = PB.get_ea();
+
+ T val_real = T(0);
+ T val_imag = T(0);
+
+ for(uword i=0; i<N; ++i)
+ {
+ const std::complex<T> X = A[i];
+ const std::complex<T> Y = B[i];
+
+ const T a = X.real();
+ const T b = X.imag();
+
+ const T c = Y.real();
+ const T d = Y.imag();
+
+ val_real += (a*c) - (b*d);
+ val_imag += (a*d) + (b*c);
+ }
+
+ return std::complex<T>(val_real, val_imag);
+ }
+ else
+ {
+ return op_dot::apply_unwrap(X,Y);
+ }
+ }
+
+
+
template<typename eT>
arma_hot
inline
@@ -289,7 +388,7 @@
arma_hot
inline
typename T1::elem_type
-op_norm_dot::apply(const Base<typename T1::elem_type,T1>& X, const Base<typename T1::elem_type,T2>& Y)
+op_norm_dot::apply(const T1& X, const T2& Y)
{
arma_extra_debug_sigprint();
@@ -301,29 +400,30 @@
if(prefer_at_accessor == false)
{
- const Proxy<T1> A(X.get_ref());
- const Proxy<T2> B(Y.get_ref());
+ const Proxy<T1> PA(X);
+ const Proxy<T2> PB(Y);
- arma_debug_check( (A.get_n_elem() != B.get_n_elem()), "norm_dot(): objects must have the same number of elements" );
+ const uword N = PA.get_n_elem();
- const uword N = A.get_n_elem();
- ea_type1 PA = A.get_ea();
- ea_type2 PB = B.get_ea();
+ arma_debug_check( (N != PB.get_n_elem()), "norm_dot(): objects must have the same number of elements" );
+ ea_type1 A = PA.get_ea();
+ ea_type2 B = PB.get_ea();
+
eT acc1 = eT(0);
eT acc2 = eT(0);
eT acc3 = eT(0);
for(uword i=0; i<N; ++i)
{
- const eT tmpA = PA[i];
- const eT tmpB = PB[i];
+ const eT tmpA = A[i];
+ const eT tmpB = B[i];
acc1 += tmpA * tmpA;
acc2 += tmpB * tmpB;
acc3 += tmpA * tmpB;
}
-
+
return acc3 / ( std::sqrt(acc1 * acc2) );
}
else
@@ -338,14 +438,14 @@
arma_hot
inline
typename T1::elem_type
-op_norm_dot::apply_unwrap(const Base<typename T1::elem_type,T1>& X, const Base<typename T1::elem_type,T2>& Y)
+op_norm_dot::apply_unwrap(const T1& X, const T2& Y)
{
arma_extra_debug_sigprint();
typedef typename T1::elem_type eT;
- const unwrap<T1> tmp1(X.get_ref());
- const unwrap<T2> tmp2(Y.get_ref());
+ const unwrap<T1> tmp1(X);
+ const unwrap<T2> tmp2(Y);
const Mat<eT>& A = tmp1.M;
const Mat<eT>& B = tmp2.M;
@@ -382,54 +482,188 @@
+template<typename eT>
+arma_hot
+arma_pure
+inline
+eT
+op_cdot::direct_cdot_arma(const uword n_elem, const eT* const A, const eT* const B)
+ {
+ arma_extra_debug_sigprint();
+
+ typedef typename get_pod_type<eT>::result T;
+
+ T val_real = T(0);
+ T val_imag = T(0);
+
+ for(uword i=0; i<n_elem; ++i)
+ {
+ const std::complex<T>& X = A[i];
+ const std::complex<T>& Y = B[i];
+
+ const T a = X.real();
+ const T b = X.imag();
+
+ const T c = Y.real();
+ const T d = Y.imag();
+
+ val_real += (a*c) + (b*d);
+ val_imag += (a*d) - (b*c);
+ }
+
+ return std::complex<T>(val_real, val_imag);
+ }
+
+
+
+template<typename eT>
+arma_hot
+arma_pure
+inline
+eT
+op_cdot::direct_cdot(const uword n_elem, const eT* const A, const eT* const B)
+ {
+ arma_extra_debug_sigprint();
+
+ if( n_elem <= 32u )
+ {
+ return op_cdot::direct_cdot_arma(n_elem, A, B);
+ }
+ else
+ {
+ #if defined(ARMA_USE_BLAS)
+ {
+ arma_extra_debug_print("blas::gemv()");
+
+ // using gemv() workaround due to compatibility issues with cdotc() and zdotc()
+
+ const char trans = 'C';
+
+ const blas_int m = blas_int(n_elem);
+ const blas_int n = 1;
+ //const blas_int lda = (n_elem > 0) ? blas_int(n_elem) : blas_int(1);
+ const blas_int inc = 1;
+
+ const eT alpha = eT(1);
+ const eT beta = eT(0);
+
+ eT result[2]; // paranoia: using two elements instead of one
+
+ //blas::gemv(&trans, &m, &n, &alpha, A, &lda, B, &inc, &beta, &result[0], &inc);
+ blas::gemv(&trans, &m, &n, &alpha, A, &m, B, &inc, &beta, &result[0], &inc);
+
+ return result[0];
+ }
+ #elif defined(ARMA_USE_ATLAS)
+ {
+ // TODO: use dedicated atlas functions cblas_cdotc_sub() and cblas_zdotc_sub() and retune threshold
+
+ return op_cdot::direct_cdot_arma(n_elem, A, B);
+ }
+ #else
+ {
+ return op_cdot::direct_cdot_arma(n_elem, A, B);
+ }
+ #endif
+ }
+ }
+
+
+
template<typename T1, typename T2>
arma_hot
-arma_inline
+inline
typename T1::elem_type
-op_cdot::apply(const Base<typename T1::elem_type,T1>& X, const Base<typename T1::elem_type,T2>& Y)
+op_cdot::apply(const T1& X, const T2& Y)
{
arma_extra_debug_sigprint();
- typedef typename T1::elem_type eT;
+ if( (is_Mat<T1>::value == true) && (is_Mat<T2>::value == true) )
+ {
+ return op_cdot::apply_unwrap(X,Y);
+ }
+ else
+ {
+ return op_cdot::apply_proxy(X,Y);
+ }
+ }
+
+
+
+template<typename T1, typename T2>
+arma_hot
+inline
+typename T1::elem_type
+op_cdot::apply_unwrap(const T1& X, const T2& Y)
+ {
+ arma_extra_debug_sigprint();
+
+ typedef typename T1::elem_type eT;
+
+ const unwrap<T1> tmp1(X);
+ const unwrap<T2> tmp2(Y);
+
+ const Mat<eT>& A = tmp1.M;
+ const Mat<eT>& B = tmp2.M;
+
+ arma_debug_check( (A.n_elem != B.n_elem), "cdot(): objects must have the same number of elements" );
+
+ return op_cdot::direct_cdot( A.n_elem, A.mem, B.mem );
+ }
+
+
+
+template<typename T1, typename T2>
+arma_hot
+inline
+typename T1::elem_type
+op_cdot::apply_proxy(const T1& X, const T2& Y)
+ {
+ arma_extra_debug_sigprint();
+
+ typedef typename T1::elem_type eT;
+ typedef typename get_pod_type<eT>::result T;
+
typedef typename Proxy<T1>::ea_type ea_type1;
typedef typename Proxy<T2>::ea_type ea_type2;
- const Proxy<T1> A(X.get_ref());
- const Proxy<T2> B(Y.get_ref());
-
const bool prefer_at_accessor = (Proxy<T1>::prefer_at_accessor) || (Proxy<T2>::prefer_at_accessor);
if(prefer_at_accessor == false)
{
- arma_debug_check( (A.get_n_elem() != B.get_n_elem()), "cdot(): objects must have the same number of elements" );
+ const Proxy<T1> PA(X);
+ const Proxy<T2> PB(Y);
- const uword N = A.get_n_elem();
- ea_type1 PA = A.get_ea();
- ea_type2 PB = B.get_ea();
+ const uword N = PA.get_n_elem();
- eT val1 = eT(0);
- eT val2 = eT(0);
+ arma_debug_check( (N != PB.get_n_elem()), "cdot(): objects must have the same number of elements" );
- uword i,j;
- for(i=0, j=1; j<N; i+=2, j+=2)
- {
- val1 += std::conj(PA[i]) * PB[i];
- val2 += std::conj(PA[j]) * PB[j];
- }
+ ea_type1 A = PA.get_ea();
+ ea_type2 B = PB.get_ea();
- if(i < N)
+ T val_real = T(0);
+ T val_imag = T(0);
+
+ for(uword i=0; i<N; ++i)
{
- val1 += std::conj(PA[i]) * PB[i];
+ const std::complex<T> AA = A[i];
+ const std::complex<T> BB = B[i];
+
+ const T a = AA.real();
+ const T b = AA.imag();
+
+ const T c = BB.real();
+ const T d = BB.imag();
+
+ val_real += (a*c) + (b*d);
+ val_imag += (a*d) - (b*c);
}
- return val1 + val2;
+ return std::complex<T>(val_real, val_imag);
}
else
{
- const unwrap< typename Proxy<T1>::stored_type > tmp_A(A.Q);
- const unwrap< typename Proxy<T2>::stored_type > tmp_B(B.Q);
-
- return op_cdot::apply( tmp_A.M, tmp_B.M );
+ return op_cdot::apply_unwrap( X, Y );
}
}
Modified: pkg/RcppArmadillo/inst/include/armadillo_bits/operator_schur.hpp
===================================================================
--- pkg/RcppArmadillo/inst/include/armadillo_bits/operator_schur.hpp 2012-09-16 13:51:22 UTC (rev 3771)
+++ pkg/RcppArmadillo/inst/include/armadillo_bits/operator_schur.hpp 2012-09-18 13:59:49 UTC (rev 3772)
@@ -93,50 +93,53 @@
arma_debug_assert_same_size(pa.get_n_rows(), pa.get_n_cols(), pb.get_n_rows(), pb.get_n_cols(), "element-wise multiplication");
SpMat<typename T1::elem_type> result(pa.get_n_rows(), pa.get_n_cols());
-
- // Resize memory to correct size.
[TRUNCATED]
To get the complete diff run:
svnlook diff /svnroot/rcpp -r 3772
More information about the Rcpp-commits
mailing list