[Rcpp-commits] r4164 - in pkg/RcppArmadillo: . inst inst/include/armadillo_bits

Mon Dec 17 13:28:43 CET 2012

Author: edd
Date: 2012-12-17 13:28:42 +0100 (Mon, 17 Dec 2012)
New Revision: 4164

Modified:
   pkg/RcppArmadillo/ChangeLog
   pkg/RcppArmadillo/DESCRIPTION
   pkg/RcppArmadillo/inst/NEWS.Rd
   pkg/RcppArmadillo/inst/include/armadillo_bits/Mat_meat.hpp
   pkg/RcppArmadillo/inst/include/armadillo_bits/Op_bones.hpp
   pkg/RcppArmadillo/inst/include/armadillo_bits/Proxy.hpp
   pkg/RcppArmadillo/inst/include/armadillo_bits/SpMat_meat.hpp
   pkg/RcppArmadillo/inst/include/armadillo_bits/arma_version.hpp
   pkg/RcppArmadillo/inst/include/armadillo_bits/compiler_setup.hpp
   pkg/RcppArmadillo/inst/include/armadillo_bits/debug.hpp
   pkg/RcppArmadillo/inst/include/armadillo_bits/fn_dot.hpp
   pkg/RcppArmadillo/inst/include/armadillo_bits/fn_trace.hpp
   pkg/RcppArmadillo/inst/include/armadillo_bits/forward_bones.hpp
   pkg/RcppArmadillo/inst/include/armadillo_bits/op_diagvec_bones.hpp
   pkg/RcppArmadillo/inst/include/armadillo_bits/op_diagvec_meat.hpp
   pkg/RcppArmadillo/inst/include/armadillo_bits/operator_div.hpp
   pkg/RcppArmadillo/inst/include/armadillo_bits/operator_minus.hpp
   pkg/RcppArmadillo/inst/include/armadillo_bits/operator_plus.hpp
   pkg/RcppArmadillo/inst/include/armadillo_bits/operator_schur.hpp
   pkg/RcppArmadillo/inst/include/armadillo_bits/operator_times.hpp
   pkg/RcppArmadillo/inst/include/armadillo_bits/running_stat_meat.hpp
   pkg/RcppArmadillo/inst/include/armadillo_bits/subview_each_bones.hpp
Log:
New release 0.3.6.1 with Armadillo 3.6.1


Modified: pkg/RcppArmadillo/ChangeLog
===================================================================

--- pkg/RcppArmadillo/ChangeLog	2012-12-17 10:17:09 UTC (rev 4163)
+++ pkg/RcppArmadillo/ChangeLog	2012-12-17 12:28:42 UTC (rev 4164)
@@ -1,3 +1,10 @@
+2012-12-17  Dirk Eddelbuettel  <edd at debian.org>
+
+	* DESCRIPTION: Release 0.3.6.1
+
+	* inst/include/*: Upgraded to new release 3.6.1 of Armadillo
+
+
 2012-12-10  Romain Francois <romain at r-enthusiasts.com>
 
 	* include/RcppArmadillo.h: compiler error if Rcpp.h is included

Modified: pkg/RcppArmadillo/DESCRIPTION
===================================================================
--- pkg/RcppArmadillo/DESCRIPTION	2012-12-17 10:17:09 UTC (rev 4163)
+++ pkg/RcppArmadillo/DESCRIPTION	2012-12-17 12:28:42 UTC (rev 4164)
@@ -1,7 +1,7 @@
 Package: RcppArmadillo
 Type: Package
 Title: Rcpp integration for Armadillo templated linear algebra library
-Version: 0.3.6.0
+Version: 0.3.6.1
 Date: $Date$
 Author: Romain Francois, Dirk Eddelbuettel and Doug Bates
 Maintainer: Dirk Eddelbuettel <edd at debian.org>
@@ -21,7 +21,7 @@
  (due to speed and/or integration capabilities), rather than another language.
  .
  The RcppArmadillo package includes the header files from the templated
- Armadillo library (currently version 3.6.0). Thus users do not need to
+ Armadillo library (currently version 3.6.1). Thus users do not need to
  install Armadillo itself in order to use RcppArmadillo.
  .
  This Armadillo integration provides a nice illustration of the 

Modified: pkg/RcppArmadillo/inst/NEWS.Rd
===================================================================
--- pkg/RcppArmadillo/inst/NEWS.Rd	2012-12-17 10:17:09 UTC (rev 4163)
+++ pkg/RcppArmadillo/inst/NEWS.Rd	2012-12-17 12:28:42 UTC (rev 4164)
@@ -2,21 +2,17 @@
 \title{News for Package 'RcppArmadillo'}
 \newcommand{\cpkg}{\href{http://CRAN.R-project.org/package=#1}{\pkg{#1}}}
 
-
-\section{Changes in RcppArmadillo version 0.3.6.0 (2012-12-07)}{
+\section{Changes in RcppArmadillo version 0.3.6.1 (2012-12-17)}{
   \itemize{
-    \item Upgraded to Armadillo release Version 3.6.0 (Piazza del Duomo)
+    \item Upgraded to Armadillo release Version 3.6.1
     \itemize{
-      \item faster handling of compound expressions with submatrices and
-      subcubes 
-      \item added support for loading matrices as text files with
-      \code{NaN} and \code{Inf} elements 
-      \item added \code{stable_sort_index()}, which preserves the relative
-      order of elements with equivalent values
-      \item added handling of sparse matrices by \code{mean()},
-      \code{var()}, \code{norm()}, \code{abs()}, \code{square()}, \code{sqrt()}
-      \item added saving and loading of sparse matrices in arma_binary format
+      \item faster \code{trace()}
+      \item fix for handling sparse matrices by \code{dot()}
+      \item fixes for interactions between sparse and dense matrices
     }
+    \item Now throws compiler error if \code{Rcpp.h} is included before
+    \code{RcppArmadillo.h} (as the former is included automatically by the
+    latter anyway, but template logic prefers this ordering).
   }
 }
 

Modified: pkg/RcppArmadillo/inst/include/armadillo_bits/Mat_meat.hpp
===================================================================
--- pkg/RcppArmadillo/inst/include/armadillo_bits/Mat_meat.hpp	2012-12-17 10:17:09 UTC (rev 4163)
+++ pkg/RcppArmadillo/inst/include/armadillo_bits/Mat_meat.hpp	2012-12-17 12:28:42 UTC (rev 4164)
@@ -1979,20 +1979,23 @@
   , mem()
   {
   arma_extra_debug_sigprint_this(this);
-
+  
   const SpProxy<T1> p(m.get_ref());
-
+  
   access::rw(n_rows) = p.get_n_rows();
   access::rw(n_cols) = p.get_n_cols();
   access::rw(n_elem) = p.get_n_elem();
-
+  
   init_cold();
   fill(eT(0));
-
-  // Iterate over each nonzero element and set it.
-  for(typename SpProxy<T1>::const_iterator_type it = p.begin(); it != p.end(); ++it)
+  
+  typename SpProxy<T1>::const_iterator_type it     = p.begin();
+  typename SpProxy<T1>::const_iterator_type it_end = p.end();
+  
+  while(it != it_end)
     {
     at(it.row(), it.col()) = (*it);
+    ++it;
     }
   }
 
@@ -2005,18 +2008,22 @@
 Mat<eT>::operator=(const SpBase<eT, T1>& m)
   {
   arma_extra_debug_sigprint();
-
+  
   const SpProxy<T1> p(m.get_ref());
-
+  
   init_warm(p.get_n_rows(), p.get_n_cols());
-
+  
   fill(eT(0));
-
-  for(typename SpProxy<T1>::const_iterator_type it = p.begin(); it != p.end(); ++it)
+  
+  typename SpProxy<T1>::const_iterator_type it     = p.begin();
+  typename SpProxy<T1>::const_iterator_type it_end = p.end();
+  
+  while(it != it_end)
     {
     at(it.row(), it.col()) = (*it);
+    ++it;
     }
-
+  
   return *this;
   }
 
@@ -2029,16 +2036,20 @@
 Mat<eT>::operator+=(const SpBase<eT, T1>& m)
   {
   arma_extra_debug_sigprint();
-
+  
   const SpProxy<T1> p(m.get_ref());
-
+  
   arma_debug_assert_same_size(n_rows, n_cols, p.get_n_rows(), p.get_n_cols(), "addition");
-
-  for(typename SpProxy<T1>::const_iterator_type it = p.begin(); it != p.end(); ++it)
+  
+  typename SpProxy<T1>::const_iterator_type it     = p.begin();
+  typename SpProxy<T1>::const_iterator_type it_end = p.end();
+  
+  while(it != it_end)
     {
     at(it.row(), it.col()) += (*it);
+    ++it;
     }
-
+  
   return *this;
   }
 
@@ -2051,16 +2062,20 @@
 Mat<eT>::operator-=(const SpBase<eT, T1>& m)
   {
   arma_extra_debug_sigprint();
-
+  
   const SpProxy<T1> p(m.get_ref());
-
+  
   arma_debug_assert_same_size(n_rows, n_cols, p.get_n_rows(), p.get_n_cols(), "subtraction");
-
-  for(typename SpProxy<T1>::const_iterator_type it = p.begin(); it != p.end(); ++it)
+  
+  typename SpProxy<T1>::const_iterator_type it     = p.begin();
+  typename SpProxy<T1>::const_iterator_type it_end = p.end();
+  
+  while(it != it_end)
     {
     at(it.row(), it.col()) -= (*it);
+    ++it;
     }
-
+  
   return *this;
   }
 
@@ -2073,11 +2088,11 @@
 Mat<eT>::operator*=(const SpBase<eT, T1>& m)
   {
   arma_extra_debug_sigprint();
-
+  
   Mat<eT> z;
   z = (*this) * m.get_ref();
   steal_mem(z);
-
+  
   return *this;
   }
 
@@ -2090,31 +2105,32 @@
 Mat<eT>::operator%=(const SpBase<eT, T1>& m)
   {
   arma_extra_debug_sigprint();
-
+  
   const SpProxy<T1> p(m.get_ref());
-
+  
   arma_debug_assert_same_size(n_rows, n_cols, p.get_n_rows(), p.get_n_cols(), "element-wise multiplication");
-
-  typename SpProxy<T1>::const_iterator_type it = p.begin();
-
+  
+  typename SpProxy<T1>::const_iterator_type it     = p.begin();
+  typename SpProxy<T1>::const_iterator_type it_end = p.end();
+  
   // We have to zero everything that isn't being used.
   arrayops::inplace_set(memptr(), eT(0), (it.col() * n_rows) + it.row());
-
-  while(it != p.end())
+  
+  while(it != it_end)
     {
     const uword cur_loc = (it.col() * n_rows) + it.row();
-
+    
     access::rw(mem[cur_loc]) *= (*it);
-
+    
     ++it;
-
-    const uword next_loc = (it == p.end())
+    
+    const uword next_loc = (it == it_end)
       ? (p.get_n_cols() * n_rows)
       : (it.col() * n_rows) + it.row();
-
+    
     arrayops::inplace_set(memptr() + cur_loc + 1, eT(0), (next_loc - cur_loc - 1));
     }
-
+  
   return *this;
   }
 
@@ -2127,21 +2143,19 @@
 Mat<eT>::operator/=(const SpBase<eT, T1>& m)
   {
   arma_extra_debug_sigprint();
-
+  
   const SpProxy<T1> p(m.get_ref());
-
+  
   arma_debug_assert_same_size(n_rows, n_cols, p.get_n_rows(), p.get_n_cols(), "element-wise division");
-
+  
   // If you use this method, you are probably stupid or misguided, but for completeness it is implemented.
   // Unfortunately the best way to do this is loop over every element.
   for(uword c = 0; c < n_cols; ++c)
+  for(uword r = 0; r < n_rows; ++r)
     {
-    for(uword r = 0; r < n_rows; ++r)
-      {
-      at(r, c) /= p.at(r, c);
-      }
+    at(r, c) /= p.at(r, c);
     }
-
+  
   return *this;
   }
 

Modified: pkg/RcppArmadillo/inst/include/armadillo_bits/Op_bones.hpp
===================================================================
--- pkg/RcppArmadillo/inst/include/armadillo_bits/Op_bones.hpp	2012-12-17 10:17:09 UTC (rev 4163)
+++ pkg/RcppArmadillo/inst/include/armadillo_bits/Op_bones.hpp	2012-12-17 12:28:42 UTC (rev 4164)
@@ -35,7 +35,7 @@
   typedef typename get_pod_type<elem_type>::result pod_type;
   
   static const bool is_row = ( T1::is_col && (is_same_type<op_type, op_strans>::value || is_same_type<op_type, op_htrans>::value || is_same_type<op_type, op_htrans2>::value) );
-  static const bool is_col = ( T1::is_row && (is_same_type<op_type, op_strans>::value || is_same_type<op_type, op_htrans>::value || is_same_type<op_type, op_htrans2>::value) );
+  static const bool is_col = ( T1::is_row && (is_same_type<op_type, op_strans>::value || is_same_type<op_type, op_htrans>::value || is_same_type<op_type, op_htrans2>::value) ) || (is_same_type<op_type, op_diagvec>::value);
   
   inline explicit Op(const T1& in_m);
   inline          Op(const T1& in_m, const elem_type in_aux);

Modified: pkg/RcppArmadillo/inst/include/armadillo_bits/Proxy.hpp
===================================================================
--- pkg/RcppArmadillo/inst/include/armadillo_bits/Proxy.hpp	2012-12-17 10:17:09 UTC (rev 4163)
+++ pkg/RcppArmadillo/inst/include/armadillo_bits/Proxy.hpp	2012-12-17 12:28:42 UTC (rev 4164)
@@ -299,6 +299,133 @@
 
 
 template<typename T1>
+class Proxy_diagvec_mat
+  {
+  inline Proxy_diagvec_mat(const T1&) {}
+  };
+
+
+
+template<typename T1>
+class Proxy_diagvec_mat< Op<T1, op_diagvec> >
+  {
+  public:
+  
+  typedef typename T1::elem_type                   elem_type;
+  typedef typename get_pod_type<elem_type>::result pod_type;
+  typedef diagview<elem_type>                      stored_type;
+  typedef const diagview<elem_type>&               ea_type;
+  
+  static const bool prefer_at_accessor = false;
+  static const bool has_subview        = true;
+  static const bool is_fixed           = false;
+  static const bool fake_mat           = false;
+  
+  static const bool is_row = false;
+  static const bool is_col = true;
+  
+  arma_aligned const Mat<elem_type>&     R;
+  arma_aligned const diagview<elem_type> Q;
+  
+  inline explicit Proxy_diagvec_mat(const Op<T1, op_diagvec>& A)
+    : R(A.m), Q( R.diag( (A.aux_uword_b > 0) ? -sword(A.aux_uword_a) : sword(A.aux_uword_a) ) )
+    {
+    arma_extra_debug_sigprint();
+    }
+  
+  arma_inline uword get_n_rows() const { return Q.n_rows; }
+  arma_inline uword get_n_cols() const { return 1;        }
+  arma_inline uword get_n_elem() const { return Q.n_elem; }
+  
+  arma_inline elem_type operator[] (const uword i)                const { return Q[i];         }
+  arma_inline elem_type at         (const uword row, const uword) const { return Q.at(row, 0); }
+  
+  arma_inline ea_type get_ea() const { return Q; }
+  
+  template<typename eT2>
+  arma_inline bool is_alias(const Mat<eT2>& X) const { return (void_ptr(&R) == void_ptr(&X)); }
+  };
+
+
+
+template<typename T1>
+class Proxy_diagvec_expr
+  {
+  inline Proxy_diagvec_expr(const T1&) {}
+  };
+
+
+
+template<typename T1>
+class Proxy_diagvec_expr< Op<T1, op_diagvec> >
+  {
+  public:
+  
+  typedef typename T1::elem_type                   elem_type;
+  typedef typename get_pod_type<elem_type>::result pod_type;
+  typedef Mat<elem_type>                           stored_type;
+  typedef const elem_type*                         ea_type;
+  
+  static const bool prefer_at_accessor = false;
+  static const bool has_subview        = false;
+  static const bool is_fixed           = false;
+  static const bool fake_mat           = false;
+  
+  static const bool is_row = false;
+  static const bool is_col = true;
+  
+  arma_aligned const Mat<elem_type> Q;
+  
+  inline explicit Proxy_diagvec_expr(const Op<T1, op_diagvec>& A)
+    : Q(A)
+    {
+    arma_extra_debug_sigprint();
+    }
+  
+  arma_inline uword get_n_rows() const { return Q.n_rows; }
+  arma_inline uword get_n_cols() const { return 1;        }
+  arma_inline uword get_n_elem() const { return Q.n_elem; }
+  
+  arma_inline elem_type operator[] (const uword i)                const { return Q[i];         }
+  arma_inline elem_type at         (const uword row, const uword) const { return Q.at(row, 0); }
+  
+  arma_inline ea_type get_ea() const { return Q.memptr(); }
+  
+  template<typename eT2>
+  arma_inline bool is_alias(const Mat<eT2>&) const { return false; }
+  };
+
+
+
+template<typename T1, bool condition>
+struct Proxy_diagvec_redirect {};
+
+template<typename T1>
+struct Proxy_diagvec_redirect< Op<T1, op_diagvec>, true > { typedef Proxy_diagvec_mat < Op<T1, op_diagvec> > result; };
+
+template<typename T1>
+struct Proxy_diagvec_redirect< Op<T1, op_diagvec>, false> { typedef Proxy_diagvec_expr< Op<T1, op_diagvec> > result; };
+
+
+
+template<typename T1>
+class Proxy< Op<T1, op_diagvec> >
+  : public Proxy_diagvec_redirect< Op<T1, op_diagvec>, is_Mat<T1>::value >::result
+  {
+  public:
+  
+  typedef typename Proxy_diagvec_redirect< Op<T1, op_diagvec>, is_Mat<T1>::value >::result Proxy_diagvec;
+  
+  inline explicit Proxy(const Op<T1, op_diagvec>& A)
+    : Proxy_diagvec(A)
+    {
+    arma_extra_debug_sigprint();
+    }
+  };
+
+
+
+template<typename T1>
 struct Proxy_xtrans_default
   {
   typedef typename T1::elem_type eT;

Modified: pkg/RcppArmadillo/inst/include/armadillo_bits/SpMat_meat.hpp
===================================================================
--- pkg/RcppArmadillo/inst/include/armadillo_bits/SpMat_meat.hpp	2012-12-17 10:17:09 UTC (rev 4163)
+++ pkg/RcppArmadillo/inst/include/armadillo_bits/SpMat_meat.hpp	2012-12-17 12:28:42 UTC (rev 4164)
@@ -191,7 +191,6 @@
   access::rw(col_ptrs[1]) = 1;
 
   return *this;
-
   }
 
 
@@ -202,21 +201,14 @@
 SpMat<eT>::operator*=(const eT val)
   {
   arma_extra_debug_sigprint();
-
+  
   if(val == eT(0))
     {
     // Everything will be zero.
     init(n_rows, n_cols);
     return *this;
     }
-
-  // Iterate over nonzero values, which is a lot faster.
   
-  // for(uword i = 0; i < n_nonzero; ++i)
-  //   {
-  //   access::rw(values[i]) *= val;
-  //   }
-  
   arrayops::inplace_mul( access::rwp(values), val, n_nonzero );
   
   return *this;
@@ -233,12 +225,8 @@
   
   arma_debug_check( (val == eT(0)), "element-wise division: division by zero" );
   
-  // We only have to loop over nonzero values.
-  for (uword i = 0; i < n_nonzero; ++i)
-    {
-    access::rw(values[i]) /= val;
-    }
-
+  arrayops::inplace_div( access::rwp(values), val, n_nonzero );
+  
   return *this;
   }
 
@@ -537,22 +525,16 @@
   if(Proxy<T1>::prefer_at_accessor == true)
     {
     for(uword j = 0; j < x_n_cols; ++j)
+    for(uword i = 0; i < x_n_rows; ++i)
       {
-      for(uword i = 0; i < x_n_rows; ++i)
-        {
-        if(p.at(i, j) != eT(0))
-          ++n;
-        }
+      if(p.at(i, j) != eT(0)) { ++n; }
       }
     }
   else
     {
     for(uword i = 0; i < x_n_elem; ++i)
       {
-      if(p[i] != eT(0))
-        {
-        ++n;
-        }
+      if(p[i] != eT(0)) { ++n; }
       }
     }
 
@@ -561,16 +543,16 @@
   // Now the memory is resized correctly; add nonzero elements.
   n = 0;
   for(uword j = 0; j < x_n_cols; ++j)
+  for(uword i = 0; i < x_n_rows; ++i)
     {
-    for(uword i = 0; i < x_n_rows; ++i)
+    const eT val = p.at(i, j);
+    
+    if(val != eT(0))
       {
-      if(p.at(i, j) != eT(0))
-        {
-        access::rw(values[n]) = p.at(i, j);
-        access::rw(row_indices[n]) = i;
-        access::rw(col_ptrs[j + 1])++;
-        ++n;
-        }
+      access::rw(values[n])      = val;
+      access::rw(row_indices[n]) = i;
+      access::rw(col_ptrs[j + 1])++;
+      ++n;
       }
     }
 
@@ -679,6 +661,10 @@
 
 
 
+/**
+ * Don't use this function.  It's not mathematically well-defined and wastes
+ * cycles to trash all your data.  This is dumb.
+ */
 template<typename eT>
 template<typename T1>
 inline
@@ -687,20 +673,10 @@
   {
   arma_extra_debug_sigprint();
 
-  const Proxy<T1> p(x.get_ref());
+  SpMat<eT> tmp = (*this) / x.get_ref();
   
-  /**
-   * Don't use this function.  It's not mathematically well-defined and wastes
-   * cycles to trash all your data.  This is dumb.
-   */
-  arma_debug_assert_same_size(n_rows, n_cols, p.get_n_rows(), p.get_n_cols(), "element-wise division");
+  steal_mem(tmp);
   
-  for(uword j = 0; j < n_cols; j++)
-  for(uword i = 0; i < n_rows; i++)
-    {
-    at(i, j) /= p.at(i, j);
-    }
-  
   return *this;
   }
 

Modified: pkg/RcppArmadillo/inst/include/armadillo_bits/arma_version.hpp
===================================================================
--- pkg/RcppArmadillo/inst/include/armadillo_bits/arma_version.hpp	2012-12-17 10:17:09 UTC (rev 4163)
+++ pkg/RcppArmadillo/inst/include/armadillo_bits/arma_version.hpp	2012-12-17 12:28:42 UTC (rev 4164)
@@ -18,7 +18,7 @@
 
 #define ARMA_VERSION_MAJOR 3
 #define ARMA_VERSION_MINOR 6
-#define ARMA_VERSION_PATCH 0
+#define ARMA_VERSION_PATCH 1
 #define ARMA_VERSION_NAME  "Piazza del Duomo"
 
 

Modified: pkg/RcppArmadillo/inst/include/armadillo_bits/compiler_setup.hpp
===================================================================
--- pkg/RcppArmadillo/inst/include/armadillo_bits/compiler_setup.hpp	2012-12-17 10:17:09 UTC (rev 4163)
+++ pkg/RcppArmadillo/inst/include/armadillo_bits/compiler_setup.hpp	2012-12-17 12:28:42 UTC (rev 4164)
@@ -21,6 +21,7 @@
 #define arma_deprecated
 #define arma_malloc
 #define arma_inline            inline
+#define arma_noinline
 #define arma_ignore(variable)  ((void)(variable))
 
 
@@ -28,8 +29,8 @@
   #define arma_fortran2_noprefix(function) function##_
   #define arma_fortran2_prefix(function)   wrapper_##function##_
 #else
+  #define arma_fortran2_noprefix(function) function
   #define arma_fortran2_prefix(function)   wrapper_##function
-  #define arma_fortran2_noprefix(function) function
 #endif
 
 #if defined(ARMA_USE_WRAPPER)
@@ -71,7 +72,6 @@
   #define ARMA_GOOD_COMPILER
   #undef  ARMA_HAVE_STD_TR1
   
-  #undef  arma_cold
   #undef  arma_pure
   #undef  arma_const
   #undef  arma_aligned
@@ -79,8 +79,8 @@
   #undef  arma_deprecated
   #undef  arma_malloc
   #undef  arma_inline
+  #undef  arma_noinline
   
-  #define arma_cold               __attribute__((__noinline__))
   #define arma_pure               __attribute__((__pure__))
   #define arma_const              __attribute__((__const__))
   #define arma_aligned            __attribute__((__aligned__))
@@ -88,13 +88,14 @@
   #define arma_deprecated         __attribute__((__deprecated__))
   #define arma_malloc             __attribute__((__malloc__))
   #define arma_inline      inline __attribute__((__always_inline__))
+  #define arma_noinline           __attribute__((__noinline__))
   
   #if (ARMA_GCC_VERSION >= 40300)
     #undef  arma_hot
     #undef  arma_cold
     
     #define arma_hot  __attribute__((__hot__))
-    #define arma_cold __attribute__((__cold__,__noinline__))
+    #define arma_cold __attribute__((__cold__))
   #endif
   
   #if (ARMA_GCC_VERSION >= 40200)

Modified: pkg/RcppArmadillo/inst/include/armadillo_bits/debug.hpp
===================================================================
--- pkg/RcppArmadillo/inst/include/armadillo_bits/debug.hpp	2012-12-17 10:17:09 UTC (rev 4163)
+++ pkg/RcppArmadillo/inst/include/armadillo_bits/debug.hpp	2012-12-17 12:28:42 UTC (rev 4164)
@@ -93,6 +93,7 @@
 //! print a message to get_stream_err1() and/or throw a logic_error exception
 template<typename T1>
 arma_cold
+arma_noinline
 static
 void
 arma_stop(const T1& x)
@@ -121,6 +122,7 @@
 
 template<typename T1>
 arma_cold
+arma_noinline
 static
 void
 arma_stop_bad_alloc(const T1& x)
@@ -145,6 +147,7 @@
 //! print a message to get_stream_err2() and/or throw a run-time error exception
 template<typename T1>
 arma_cold
+arma_noinline
 static
 void
 arma_bad(const T1& x, const bool hurl = true)
@@ -189,6 +192,7 @@
 
 template<typename T1>
 arma_cold
+arma_noinline
 static
 void
 arma_print(const T1& x)
@@ -200,6 +204,7 @@
 
 template<typename T1, typename T2>
 arma_cold
+arma_noinline
 static
 void
 arma_print(const T1& x, const T2& y)
@@ -211,6 +216,7 @@
 
 template<typename T1, typename T2, typename T3>
 arma_cold
+arma_noinline
 static
 void
 arma_print(const T1& x, const T2& y, const T3& z)
@@ -293,6 +299,7 @@
 //! print a message to the warn stream
 template<typename T1>
 arma_cold
+arma_noinline
 static
 void
 arma_warn(const bool state, const T1& x)
@@ -306,6 +313,7 @@
 
 template<typename T1, typename T2>
 arma_cold
+arma_noinline
 static
 void
 arma_warn(const bool state, const T1& x, const T2& y)
@@ -319,6 +327,7 @@
 
 template<typename T1, typename T2, typename T3>
 arma_cold
+arma_noinline
 static
 void
 arma_warn(const bool state, const T1& x, const T2& y, const T3& z)
@@ -398,6 +407,7 @@
 // functions for generating strings indicating size errors
 
 arma_cold
+arma_noinline
 static
 std::string
 arma_incompat_size_string(const uword A_n_rows, const uword A_n_cols, const uword B_n_rows, const uword B_n_cols, const char* x)
@@ -412,6 +422,7 @@
 
 
 arma_cold
+arma_noinline
 static
 std::string
 arma_incompat_size_string(const uword A_n_rows, const uword A_n_cols, const uword A_n_slices, const uword B_n_rows, const uword B_n_cols, const uword B_n_slices, const char* x)
@@ -427,6 +438,7 @@
 
 template<typename eT>
 arma_cold
+arma_noinline
 static
 std::string
 arma_incompat_size_string(const subview_cube<eT>& Q, const Mat<eT>& A, const char* x)

Modified: pkg/RcppArmadillo/inst/include/armadillo_bits/fn_dot.hpp
===================================================================
--- pkg/RcppArmadillo/inst/include/armadillo_bits/fn_dot.hpp	2012-12-17 10:17:09 UTC (rev 4163)
+++ pkg/RcppArmadillo/inst/include/armadillo_bits/fn_dot.hpp	2012-12-17 12:28:42 UTC (rev 4164)
@@ -134,49 +134,38 @@
 
 
 
-//! dot product of two sparse objects
-template<typename T1, typename T2>
-inline
-arma_warn_unused
-typename
-enable_if2
-  <(is_arma_sparse_type<T1>::value) && (is_arma_sparse_type<T2>::value) && (is_same_type<typename T1::elem_type, typename T2::elem_type>::value),
-   typename T1::elem_type
-  >::result
-dot
-  (
-  const SpBase<typename T1::elem_type, T1>& x,
-  const SpBase<typename T2::elem_type, T2>& y
-  )
-  {
-  arma_extra_debug_sigprint();
+//
+// for sparse matrices
+// 
 
-  const SpProxy<T1> pa(x.get_ref());
-  const SpProxy<T2> pb(y.get_ref());
 
-  arma_debug_assert_same_size(pa.get_n_rows(), pa.get_n_cols(), pb.get_n_rows(), pb.get_n_cols(), "dot()");
 
-  typedef typename T1::elem_type eT;
-
-  if((&(x.get_ref()) == &(y.get_ref())) && (SpProxy<T1>::must_use_iterator == false))
+namespace priv
+  {
+  
+  template<typename T1, typename T2>
+  arma_hot
+  inline
+  typename T1::elem_type
+  dot_helper(const SpProxy<T1>& pa, const SpProxy<T2>& pb)
     {
-    // We can do it directly!
-    return op_dot::direct_dot_arma(pa.get_n_nonzero(), pa.get_values(), pa.get_values());
-    }
-  else
-    {
+    typedef typename T1::elem_type eT;
+    
     // Iterate over both objects and see when they are the same
     eT result = eT(0);
-
-    typename SpProxy<T1>::const_iterator_type a_it = pa.begin();
-    typename SpProxy<T2>::const_iterator_type b_it = pb.begin();
-
-    while((a_it != pa.end()) && (b_it != pb.end()))
+    
+    typename SpProxy<T1>::const_iterator_type a_it  = pa.begin();
+    typename SpProxy<T1>::const_iterator_type a_end = pa.end();
+    
+    typename SpProxy<T2>::const_iterator_type b_it  = pb.begin();
+    typename SpProxy<T2>::const_iterator_type b_end = pb.end();
+    
+    while((a_it != a_end) && (b_it != b_end))
       {
       if(a_it == b_it)
         {
         result += (*a_it) * (*b_it);
-
+        
         ++a_it;
         ++b_it;
         }
@@ -191,38 +180,76 @@
         ++b_it;
         }
       }
-
+    
     return result;
     }
+  
   }
 
 
 
-//! dot product of one sparse and one dense object
+//! dot product of two sparse objects
 template<typename T1, typename T2>
-arma_inline
 arma_warn_unused
+arma_hot
+inline
 typename
 enable_if2
-  <(is_arma_sparse_type<T1>::value) && (is_arma_type<T2>::value) && (is_same_type<typename T1::elem_type, typename T2::elem_type>::value),
+  <(is_arma_sparse_type<T1>::value) && (is_arma_sparse_type<T2>::value) && (is_same_type<typename T1::elem_type, typename T2::elem_type>::value),
    typename T1::elem_type
   >::result
 dot
   (
-  const SpBase<typename T1::elem_type, T1>& x,
-  const   Base<typename T2::elem_type, T2>& y
+  const T1& x,
+  const T2& y
   )
   {
-  // this is commutative
-  return dot(y, x);
+  arma_extra_debug_sigprint();
+  
+  const SpProxy<T1> pa(x);
+  const SpProxy<T2> pb(y);
+  
+  arma_debug_assert_same_size(pa.get_n_rows(), pa.get_n_cols(), pb.get_n_rows(), pb.get_n_cols(), "dot()");
+  
+  typedef typename T1::elem_type eT;
+  
+  typedef typename SpProxy<T1>::stored_type pa_Q_type;
+  typedef typename SpProxy<T2>::stored_type pb_Q_type;
+  
+  if(
+         ( (SpProxy<T1>::must_use_iterator == false) && (SpProxy<T2>::must_use_iterator == false) )
+      && ( (is_SpMat<pa_Q_type>::value     == true ) && (is_SpMat<pb_Q_type>::value     == true ) )   
+    )
+    {
+    const unwrap_spmat<pa_Q_type> tmp_a(pa.Q);
+    const unwrap_spmat<pb_Q_type> tmp_b(pb.Q);
+    
+    const SpMat<eT>& A = tmp_a.M;
+    const SpMat<eT>& B = tmp_b.M;
+    
+    if( &A == &B )
+      {
+      // We can do it directly!
+      return op_dot::direct_dot_arma(A.n_nonzero, A.values, A.values);
+      }
+    else
+      {
+      return priv::dot_helper(pa,pb);
+      }
+    }
+  else
+    {
+    return priv::dot_helper(pa,pb);
+    }
   }
 
 
 
 //! dot product of one dense and one sparse object
 template<typename T1, typename T2>
+arma_warn_unused
+arma_hot
 inline
-arma_warn_unused
 typename
 enable_if2
   <(is_arma_type<T1>::value) && (is_arma_sparse_type<T2>::value) && (is_same_type<typename T1::elem_type, typename T2::elem_type>::value),
@@ -230,32 +257,58 @@
   >::result
 dot
   (
-  const   Base<typename T1::elem_type, T1>& x,
-  const SpBase<typename T2::elem_type, T2>& y
+  const T1& x,
+  const T2& y
   )
   {
   arma_extra_debug_sigprint();
-
-  const Proxy<T1> pa(x.get_ref());
-  const SpProxy<T2> pb(y.get_ref());
-
+  
+  const   Proxy<T1> pa(x);
+  const SpProxy<T2> pb(y);
+  
   arma_debug_assert_same_size(pa.get_n_rows(), pa.get_n_cols(), pb.get_n_rows(), pb.get_n_cols(), "dot()");
-
+  
   typedef typename T1::elem_type eT;
-
+  
   eT result = eT(0);
-
-  typename SpProxy<T2>::const_iterator_type it = pb.begin();
-
+  
+  typename SpProxy<T2>::const_iterator_type it     = pb.begin();
+  typename SpProxy<T2>::const_iterator_type it_end = pb.end();
+  
   // prefer_at_accessor won't save us operations
-  while(it != pb.end())
+  while(it != it_end)
     {
     result += (*it) * pa.at(it.row(), it.col());
     ++it;
     }
-
+  
   return result;
   }
 
 
+
+//! dot product of one sparse and one dense object
+template<typename T1, typename T2>
+arma_warn_unused
+arma_hot
+inline
+typename
+enable_if2
+  <(is_arma_sparse_type<T1>::value) && (is_arma_type<T2>::value) && (is_same_type<typename T1::elem_type, typename T2::elem_type>::value),
+   typename T1::elem_type
+  >::result
+dot
+  (
+  const T1& x,
+  const T2& y
+  )
+  {
+  arma_extra_debug_sigprint();
+  
+  // this is commutative
+  return dot(y, x);
+  }
+
+
+
 //! @}

Modified: pkg/RcppArmadillo/inst/include/armadillo_bits/fn_trace.hpp
===================================================================
--- pkg/RcppArmadillo/inst/include/armadillo_bits/fn_trace.hpp	2012-12-17 10:17:09 UTC (rev 4163)
+++ pkg/RcppArmadillo/inst/include/armadillo_bits/fn_trace.hpp	2012-12-17 12:28:42 UTC (rev 4164)
@@ -1,5 +1,5 @@
-// Copyright (C) 2008-2010 NICTA (www.nicta.com.au)
-// Copyright (C) 2008-2010 Conrad Sanderson
+// Copyright (C) 2008-2012 NICTA (www.nicta.com.au)
+// Copyright (C) 2008-2012 Conrad Sanderson
 // Copyright (C) 2012 Ryan Curtin
 // 
 // This file is part of the Armadillo C++ library.
@@ -18,35 +18,46 @@
 
 //! Immediate trace (sum of diagonal elements) of a square dense matrix
 template<typename T1>
+arma_hot
+arma_warn_unused
 inline
-arma_warn_unused
-typename T1::elem_type
-trace(const Base<typename T1::elem_type,T1>& X)
+typename enable_if2<is_arma_type<T1>::value, typename T1::elem_type>::result
+trace(const T1& X)
   {
   arma_extra_debug_sigprint();
   
   typedef typename T1::elem_type eT;
   
-  const Proxy<T1> A(X.get_ref());
+  const Proxy<T1> A(X);
   
   arma_debug_check( (A.get_n_rows() != A.get_n_cols()), "trace(): matrix must be square sized" );
   
-  const uword N   = A.get_n_rows();
-        eT    val = eT(0);
+  const uword N = A.get_n_rows();
   
-  for(uword i=0; i<N; ++i)
+  eT val1 = eT(0);
+  eT val2 = eT(0);
+  
+  uword i,j;
+  for(i=0, j=1; j<N; i+=2, j+=2)
     {
-    val += A.at(i,i);
+    val1 += A.at(i,i);
+    val2 += A.at(j,j);
     }
   
-  return val;
+  if(i < N)
+    {
+    val1 += A.at(i,i);
+    }
+  
+  return val1 + val2;
   }
 
 
 
 template<typename T1>
+arma_hot
+arma_warn_unused
 inline
-arma_warn_unused
 typename T1::elem_type
 trace(const Op<T1, op_diagmat>& X)
   {
@@ -69,42 +80,55 @@
   }
 
 
+
 //! speedup for trace(A*B), where the result of A*B is a square sized matrix
 template<typename T1, typename T2>
+arma_hot
 inline
-arma_warn_unused
 typename T1::elem_type
-trace(const Glue<T1, T2, glue_times>& X)
+trace_mul_unwrap(const T1& XA, const T2& XB)
   {
   arma_extra_debug_sigprint();
   
   typedef typename T1::elem_type eT;
   
-  const unwrap<T1> tmp1(X.A);
-  const unwrap<T2> tmp2(X.B);
+  const Proxy<T1>    PA(XA);
+  const unwrap<T2> tmpB(XB);
   
-  const Mat<eT>& A = tmp1.M;
-  const Mat<eT>& B = tmp2.M;
+  const Mat<eT>& B = tmpB.M;
   
-  arma_debug_assert_mul_size(A, B, "matrix multiply");
+  arma_debug_assert_mul_size(PA.get_n_rows(), PA.get_n_cols(), B.n_rows, B.n_cols, "matrix multiply");
   
-  arma_debug_check( (A.n_rows != B.n_cols), "trace(): matrix must be square sized" );
+  arma_debug_check( (PA.get_n_rows() != B.n_cols), "trace(): matrix must be square sized" );
   
-  const uword N1  = A.n_rows;
-  const uword N2  = A.n_cols;
-        eT  val = eT(0);
+  const uword N1 = PA.get_n_rows();   // equivalent to B.n_cols, due to square size requirements
+  const uword N2 = PA.get_n_cols();   // equivalent to B.n_rows, due to matrix multiplication requirements
   
+  eT val = eT(0);
+  
   for(uword i=0; i<N1; ++i)
     {
     const eT* B_colmem = B.colptr(i);
-          eT  acc      = eT(0);
     
-    for(uword j=0; j<N2; ++j)
+    eT acc1 = eT(0);
+    eT acc2 = eT(0);
+    
+    uword j,k;
+    for(j=0, k=1; k < N2; j+=2, k+=2)
       {
-      acc += A.at(i,j) * B_colmem[j];
+      const eT tmp_j = B_colmem[j];
+      const eT tmp_k = B_colmem[k];
+      
+      acc1 += PA.at(i,j) * tmp_j;
+      acc2 += PA.at(i,k) * tmp_k;
       }
     
-    val += acc;
+    if(j < N2)
+      {
+      acc1 += PA.at(i,j) * B_colmem[j];
+      }
+    
+    val += (acc1 + acc2);
     }
   
   return val;
@@ -112,16 +136,88 @@
 
 
 
+//! speedup for trace(A*B), where the result of A*B is a square sized matrix
+template<typename T1, typename T2>
+arma_hot
+inline
+typename T1::elem_type
+trace_mul_proxy(const T1& XA, const T2& XB)
+  {
+  arma_extra_debug_sigprint();
+  
+  typedef typename T1::elem_type eT;
+  
+  const Proxy<T1> PA(XA);
+  const Proxy<T2> PB(XB);
+  
+  if(is_Mat<typename Proxy<T2>::stored_type>::value == true)
+    {
+    return trace_mul_unwrap(PA.Q, PB.Q);
+    }
+  
+  arma_debug_assert_mul_size(PA.get_n_rows(), PA.get_n_cols(), PB.get_n_rows(), PB.get_n_cols(), "matrix multiply");
+  
+  arma_debug_check( (PA.get_n_rows() != PB.get_n_cols()), "trace(): matrix must be square sized" );
+  
+  const uword N1 = PA.get_n_rows();   // equivalent to PB.get_n_cols(), due to square size requirements
+  const uword N2 = PA.get_n_cols();   // equivalent to PB.get_n_rows(), due to matrix multiplication requirements
+  
+  eT val = eT(0);
+  
+  for(uword i=0; i<N1; ++i)
+    {
+    eT acc1 = eT(0);
+    eT acc2 = eT(0);
+    
+    uword j,k;
+    for(j=0, k=1; k < N2; j+=2, k+=2)
+      {
+      const eT tmp_j = PB.at(j,i);
+      const eT tmp_k = PB.at(k,i);
+      
+      acc1 += PA.at(i,j) * tmp_j;
+      acc2 += PA.at(i,k) * tmp_k;
+      }
+    
+    if(j < N2)
+      {
+      acc1 += PA.at(i,j) * PB.at(j,i);
+      }
+    
+    val += (acc1 + acc2);
+    }
+  
+  return val;
+  }
+
+
+
+//! speedup for trace(A*B), where the result of A*B is a square sized matrix
+template<typename T1, typename T2>
+arma_hot
+arma_warn_unused
+inline
+typename T1::elem_type
+trace(const Glue<T1, T2, glue_times>& X)
+  {
+  arma_extra_debug_sigprint();
+  
+  return (is_Mat<T2>::value) ? trace_mul_unwrap(X.A, X.B) : trace_mul_proxy(X.A, X.B);
+  }
+
+
+
 //! trace of sparse object
 template<typename T1>
+arma_hot
+arma_warn_unused
 inline
-arma_warn_unused
 typename enable_if2<is_arma_sparse_type<T1>::value, typename T1::elem_type>::result
-trace(const SpBase<typename T1::elem_type, T1>& x)
+trace(const T1& x)
   {
   arma_extra_debug_sigprint();
   
-  const SpProxy<T1> p(x.get_ref());
+  const SpProxy<T1> p(x);
   
   arma_debug_check( (p.get_n_rows() != p.get_n_cols()), "trace(): matrix must be square sized" );
   
@@ -129,9 +225,10 @@
   
   eT result = eT(0);
   
-  typename SpProxy<T1>::const_iterator_type it = p.begin();
+  typename SpProxy<T1>::const_iterator_type it     = p.begin();
+  typename SpProxy<T1>::const_iterator_type it_end = p.end();
   
-  while(it != p.end())
+  while(it != it_end)
     {
     if(it.row() == it.col())
       {

Modified: pkg/RcppArmadillo/inst/include/armadillo_bits/forward_bones.hpp
===================================================================
--- pkg/RcppArmadillo/inst/include/armadillo_bits/forward_bones.hpp	2012-12-17 10:17:09 UTC (rev 4163)
+++ pkg/RcppArmadillo/inst/include/armadillo_bits/forward_bones.hpp	2012-12-17 12:28:42 UTC (rev 4164)
@@ -61,6 +61,7 @@
 class op_abs;
 class op_diagmat;
 class op_trimat;
+class op_diagvec;
[TRUNCATED]

To get the complete diff run:
    svnlook diff /svnroot/rcpp -r 4164