[Rcpp-commits] r3654 - pkg/RcppArmadillo/inst/doc

Sat Jun 30 22:16:16 CEST 2012

Author: edd
Date: 2012-06-30 22:16:16 +0200 (Sat, 30 Jun 2012)
New Revision: 3654

Added:
   pkg/RcppArmadillo/inst/doc/RcppArmadillo-intro.Rnw
   pkg/RcppArmadillo/inst/doc/RcppArmadillo.bib
   pkg/RcppArmadillo/inst/doc/kalmanExample.pdf
Modified:
   pkg/RcppArmadillo/inst/doc/Makefile
Log:
added vignette based on just-submitted paper draft


Modified: pkg/RcppArmadillo/inst/doc/Makefile
===================================================================

--- pkg/RcppArmadillo/inst/doc/Makefile	2012-06-30 16:13:49 UTC (rev 3653)
+++ pkg/RcppArmadillo/inst/doc/Makefile	2012-06-30 20:16:16 UTC (rev 3654)
@@ -1,11 +1,12 @@
 
-all: 	RcppArmadillo-unitTests.pdf
+all: 	RcppArmadillo-unitTests.pdf RcppArmadillo-intro.pdf
 
 clean:	
-	rm -f RcppArmadillo*.log RcppArmadillo*.aux RcppArmadillo*.out RcppArmadillo*.tex
+	rm -f RcppArmadillo*.log RcppArmadillo*.aux RcppArmadillo*.out RcppArmadillo*.tex *.blg *.bbl *~
+	rm -rf auto/
 
 pdfclean:
-	rm -f RcppArmadillo-unitTests.pdf
+	rm -f RcppArmadillo-unitTests.pdf RcppArmadillo-intro.pdf
 
 setvars:
 ifeq (${R_HOME},)
@@ -21,3 +22,10 @@
 	$(RSCRIPT) -e "tools::texi2dvi( 'RcppArmadillo-unitTests.tex', pdf = TRUE, clean = TRUE )"
 	rm -fr RcppArmadillo-unitTests.tex
 
+RcppArmadillo-intro.pdf:
+	R CMD Sweave RcppArmadillo-intro.Rnw
+	pdflatex RcppArmadillo-intro
+	bibtex RcppArmadillo-intro
+	pdflatex RcppArmadillo-intro
+	pdflatex RcppArmadillo-intro
+	rm RcppArmadillo-intro.aux RcppArmadillo-intro.log RcppArmadillo-intro.bbl RcppArmadillo-intro.blg RcppArmadillo-intro.tex
\ No newline at end of file

Added: pkg/RcppArmadillo/inst/doc/RcppArmadillo-intro.Rnw
===================================================================
--- pkg/RcppArmadillo/inst/doc/RcppArmadillo-intro.Rnw	                        (rev 0)
+++ pkg/RcppArmadillo/inst/doc/RcppArmadillo-intro.Rnw	2012-06-30 20:16:16 UTC (rev 3654)
@@ -0,0 +1,813 @@
+
+\documentclass[11pt]{article}
+%\VignetteIndexEntry{RcppArmadillo-introduction}
+%\VignetteKeywords{R, C++, Armadillo, linear algebra, kalman filter}
+%\VignettePackage{RcppArmadillo}
+
+\usepackage[USletter]{vmargin}
+%\setmargrb{0.75in}{0.75in}{0.75in}{0.75in}
+\setmargrb{1in}{1in}{1in}{1in}
+
+\usepackage{ae}
+\usepackage[authoryear,round,longnamesfirst]{natbib}
+\usepackage{url}                % break URLs
+\usepackage{booktabs}           % fancier tables
+\usepackage{color}              % color use
+\usepackage{listings}           % code examples
+\definecolor{darkgray}{rgb}{0.975,0.975,0.975}
+\lstset{backgroundcolor=\color{darkgray}}
+\lstset{numbers=left, numberstyle=\tiny, stepnumber=2, numbersep=5pt}
+\lstset{keywordstyle=\color{black}\bfseries\tt}
+\lstset{ %
+  %language=Octave,                % the language of the code
+  %basicstyle=\footnotesize,       % the size of the fonts that are used for the code
+  basicstyle=\small,              % the size of the fonts that are used for the code
+  numbers=left,                   % where to put the line-numbers
+  %numberstyle=\footnotesize,      % the size of the fonts that are used for the line-numbers
+  stepnumber=2,                   % the step between two line-numbers. If it's 1, each line
+                                  % will be numbered
+  numbersep=5pt,                  % how far the line-numbers are from the code
+  %backgroundcolor=\color{white},  % choose the background color. You must add \usepackage{color}
+  showspaces=false,               % show spaces adding particular underscores
+  showstringspaces=false,         % underline spaces within strings
+  showtabs=false,                 % show tabs within strings adding particular underscores
+  %frame=single,                   % adds a frame around the code
+  tabsize=2,                      % sets default tabsize to 2 spaces
+  captionpos=b,                   % sets the caption-position to bottom
+  breaklines=true,                % sets automatic line breaking
+  breakatwhitespace=false         % sets if automatic breaks should only happen at whitespace
+  %title=\lstname,                 % show the filename of files included with \lstinputlisting;
+                                  % also try caption instead of title
+  %escapeinside={\%*}{*)},         % if you want to add a comment within your code
+  %morekeywords={*,...}            % if you want to add more keywords to the set
+}
+%%% from jss.cls defs
+\makeatletter
+\newcommand\code{\bgroup\@makeother\_\@makeother\~\@makeother\$\@codex}
+\def\@codex#1{{\normalfont\ttfamily\hyphenchar\font=-1 #1}\egroup}
+\makeatother
+\newcommand{\proglang}[1]{\textsf{#1}}
+%\newcommand{\proglang}[1]{#1}   % also neutering the proglang macro
+\newcommand{\R}{\proglang{R}\ } % NB forces a space so not good before % fullstop etc.
+\newcommand{\Rns}{\proglang{R}} % without the space
+\newcommand{\Cpp}{\proglang{C++}\ }
+
+%\newcommand{\pkg}[1]{{\fontseries{b}\selectfont #1}\index{#1}}
+%\newcommand{\pkg}[1]{{\it #1}}
+\newcommand{\pkg}[1]{{#1}}  % null op for now
+
+<<echo=FALSE,print=FALSE>>=
+prettyVersion <- packageDescription("RcppArmadillo")$Version
+prettyDate <- format(Sys.Date(), "%B %e, %Y")
+@
+
+\begin{document}
+
+\title{RcppArmadillo: \\ Easily Extending R with High-Performance C++ Code}
+\author{Dirk Eddelbuettel \and Conrad Sanderson}
+\date{\pkg{RcppArmadillo} version \Sexpr{prettyVersion} as of \Sexpr{prettyDate}}
+
+\maketitle
+
+\begin{abstract}
+
+  \noindent
+  The \proglang{R} statistical environment and language has demonstrated
+  particular strengths for interactive development of statistical
+  algorithms, as well as data modelling and visualisation.  Its current
+  implementation has an interpreter at its core which may result in a
+  performance penalty in comparison to directly executing user algorithms
+  in the native machine code of the host CPU.  In contrast, the
+  \proglang{C++} language has no built-in visualisation capabilities,
+  handling of linear algebra or even basic statistical algorithms; however,
+  user programs are converted to high-performance machine code, ahead of
+  execution.
+  %
+  We present a new method of avoiding possible speed penalties in
+  \proglang{R} by using the \pkg{Rcpp} extension package in conjunction
+  with the \pkg{Armadillo} \Cpp matrix library.  In addition to the
+  inherent performance advantages of compiled code, \pkg{Armadillo}
+  provides an easy-to-use template-based meta-programming framework,
+  allowing the automatic pooling of several linear algebra operations into
+  one, which in turn can lead to further speedups.  We demonstrate that
+  with the aid of \pkg{Rcpp} and \pkg{Armadillo}, conversion of linear
+  algebra centered algorithms from \proglang{R} to \proglang{C++} becomes
+  straightforward, with the algorithms retaining the overall structure as
+  well as readability, all while maintaining a bidirectional link with the
+  host R environment.  Empirical timing comparisons of \proglang{R} and
+  \proglang{C++} implementations of a Kalman filtering algorithm indicate a
+  speedup of several orders of magnitude.
+
+\end{abstract}
+
+\section{Overview}
+
+Linear algebra is a cornerstone of statistical computing and statistical
+software systems.  Various matrix decompositions, linear program solvers, and
+eigenvalue / eigenvector computations are key to many estimation and analysis
+routines.  As generally useful procedure, these are often abstracted and
+regrouped in specific libraries for linear algebra which statistical
+programmers have provided for various programming languages and environments.
+
+One such environment (and statistical programming language) is R
+\citep{R:Main}.  It has become a tool of choice for data analysis and applied
+work in statistics \citep{Morandat_2012}.  While R has particular strengths
+at fast prototyping and easy visualisation of data, its current
+implementation has an interpreter at its core.  In comparison to running user
+algorithms in the native machine code of the host CPU, the use of an
+interpreter often results in a performance penalty for non-trivial algorithms
+that perform elaborate data manipulation \citep{Morandat_2012}.  With user
+algorithms becoming more complex and increasing in functionality, as well as
+with data sets which continue to increase in size, the issue of execution
+speed becomes more important.
+
+The \Cpp language offers a complementary set of attributes: while it has no
+built-in visualisation capabilities nor handling of linear algebra or
+statistical methods, user programs are converted to high-performance machine
+code ahead of execution.  It is also inherently flexible. One key feature is
+\textit{operator overloading} which allows the programmer to define custom
+behaviour for mathematical operators such as $+$, $-$, $*$
+\citep{Meyers:2005:EffectiveC++}.  \Cpp also provides language constructs
+known as {\it templates}, originally intended to easily allow the reuse of
+algorithms for various object types, and later extended to a programming
+construct in its own right called \textit{template meta-programming}
+\citep{Vandevoorde_2002,Abrahams_2004}
+
+Operator overloading allows mathematical operations to be extended to
+user-defined objects, such as matrices.  This in turn allows linear algebra
+expressions to be written in a more natural manner (eg.~\mbox{$X = 0.1*A +
+  0.2*B$}), rather than the far less readable traditional function call
+syntax, eg.~\mbox{$X = \mbox{\it add}(\mbox{\it multiply}(0.1,A), \mbox{\it
+    multiply}(0.2,B))$}.
+
+Template meta-programming is the process of inducing the \Cpp compiler to
+execute, at compile time, Turing-complete programs written in a somewhat
+opaque subset of the \Cpp language~\citep{Vandevoorde_2002,Abrahams_2004}.
+These meta-programs in effect generate further \Cpp code (often specialised
+for particular object types), which is finally converted into machine code.
+
+An early and influential example of exploiting both meta-programming and
+overloading of mathematical operators was provided by the Blitz++ library
+\citep{Veldhuizen:1998:Blitz}, targeted for efficient processing of arrays.
+Blitz++ employed elaborate meta-programming to avoid the generation of
+temporary array objects during the evaluation of mathematical expressions.
+However, the library's capabilities and usage were held back at the time by
+the limited availability of compilers correctly implementing all the
+necessary features and nuances of the \Cpp language.
+
+In this paper we present a new method of avoiding the speed penalty in R:
+using the Rcpp extension package \citep{Eddelbuettel+Francois:2011:Rcpp,
+  CRAN:Rcpp} in conjunction with the \pkg{Armadillo} \Cpp linear algebra
+library \citep{Sanderson:2010:Armadillo}.  In a similar manner to Blitz++,
+\pkg{Armadillo} uses operator overloading and various template
+meta-programming techniques to attain efficiency.  However, it has been
+written to target modern \Cpp compilers as well as providing a much larger
+set of linear algebra operations than Blitz++.  R programs augmented to use
+\pkg{Armadillo} retain the overall structure as well as readability, all
+while retaining a bidirectional link with the host R environment.
+
+We continue the paper as follows.  In Section~\ref{sec:arma} we provide an
+overview of the \pkg{Armadillo} \Cpp library, followed by its integration
+with the Rcpp extension package in Section~\ref{sec:rcpparma}.  In
+Section~\ref{sec:kalman} we provide an example of an R program and its
+conversion to \Cpp via the use of Rcpp and \pkg{Armadillo}.
+Section~\ref{sec:speed} provides an empirical timing comparison between the R
+and \Cpp versions.  We conclude the paper in Section~\ref{sec:conclusion}.
+
+
+\section{Armadillo}
+\label{sec:arma}
+
+The \pkg{Armadillo} \Cpp library provides vector, matrix and cube types
+(supporting integer, floating point and complex numbers) as well as a subset
+of trigonometric and statistics functions~\citep{Sanderson:2010:Armadillo}.
+In addition to elementary operations such as addition and matrix
+multiplication, various matrix factorisations and other commonly-used
+functions are provided.  The corresponding application programming interface
+(syntax) enables the programmer to write code which is both concise yet
+easy-to-read to those familiar with scripting languages such as
+\proglang{Matlab} and \proglang{R}.  Table~\ref{tab:arma} lists a few common
+Armadillo functions.
+
+Matrix multiplication and factorisations are accomplished through integration
+with the underlying operations stemming from standard numerical libraries
+such as BLAS and LAPACK~\citep{Demmel_1997}.  Similar to how environments
+such as R are implemented, these underlying libraries can be replaced in a
+transparent manner with variants that are optimised to the specific hardware
+platform and/or multi-threaded, to automatically take advantage of the
+now-common multi-core platforms~\citep{Kurzak_2010}.
+
+%% CS: Talking about cubes and fields currentlly seems like a digression...
+%%     However, cubes do add to the overall "meat" of the paper
+%%
+% Armadillo also provides cubes and fields. Cubes generalize matrices to three dimensions,
+% and (just like vectors and matrices) can contain either integer, floating-point or
+% complex types. Fields are similar to R list type as they allow the collection
+% of objects of types in a single variable.
+
+\begin{table}[!b]
+  \centering
+  \footnotesize
+  \begin{tabular}{ll}
+    \toprule
+    \textbf{Armadillo function} \phantom{XXXXXXX}    &  \textbf{Description}  \\
+    \midrule
+    \code{X(1,2) = 3} & Assign value 3 to element at location (1,2) of matrix $X$ \\ % DE shortened to fit on \textwidth
+    \code{X = A + B}    & Add matrices $A$ and $B$ \\
+    \code{X( span(1,2), span(3,4) )} & Provide read/write access to submatrix of $X$ \\
+    \code{zeros(rows [, cols [, slices]))} & Generate vector (or matrix or cube) of zeros\\
+    \code{ones(rows [, cols [, slices]))} & Generate vector (or matrix or cube) of ones\\
+    \code{eye(rows, cols)}  & Matrix diagonal set to 1, off-diagonal elements set to 0 \\
+    %\code{linspace(start, end, N=100)} & $N$ element vector with elements from start to end \\
+    \code{repmat(X, row_copies, col_copies)}  & Replicate matrix $X$ in block-like manner \\
+    \code{det(X)} & Returns the determinant of matrix $X$\\
+    %\code{dot(A, B)} & Dot-product of confirming vectors $A$ and $B$ \\
+    \code{norm(X, p)} & Compute the $p$-norm of matrix or vector $X$\\
+    \code{rank(X)} & Compute the rank of matrix $X$ \\
+    %\code{trace(X)} & Compute the trace of matrix $X$ \\
+    %\code{diagvec(X, k=0)} & Extracts the $k$-th diagnonal from matrix $X$\\
+    \code{min(X, dim=0)};~ \code{max(X, dim=0)} & Extremum value of each column~of $X$~(row if \code{dim=1}) \\
+    \code{trans(X)} ~or~ \code{X.t()} & Return transpose of $X$ \\
+    \code{R = chol(X)} & Cholesky decomposition of $X$ such that $R^{T} R = X$ \\
+    \code{inv(X)} ~or~ \code{X.i()} & Returns the inverse of square matrix $X$ \\
+    \code{pinv(X)} & Returns the pseudo-inverse of matrix $X$ \\
+    \code{lu(L, U, P, X)} & LU decomp.~with partial pivoting; also \code{lu(L, U, X)} \\
+    %\code{lu(L, U, P, X)} & LU decomposition with partial pivoting \\
+    \code{qr(Q, R, X)} & QR decomp.~into orthogonal $Q$ and right-triangular $R$\\
+    \code{X = solve(A, B)} & Solve system $AX = B$ for $X$ \\
+    \code{s = svd(X); svd(U, s, V, X)} & Singular-value decomposition of $X$ \\
+    \bottomrule
+  \end{tabular}
+\caption{Selected Armadillo functions with brief descriptions; see
+  \texttt{http://arma.sf.net/docs.html} for more complete
+  documentation. Several optional additional arguments have been omitted
+  here for brevity.}
+\label{tab:arma}
+\end{table}
+
+\pkg{Armadillo} uses a delayed evaluation approach to combine several
+operations into one and reduce (or eliminate) the need for temporary objects.
+In contrast to brute-force evaluations, delayed evaluation can provide
+considerable performance improvements as well as reduced memory usage.  The
+delayed evaluation machinery accomplished through template
+meta-programming~\citep{Vandevoorde_2002,Abrahams_2004}, where the \Cpp
+compiler is induced to reason about mathematical expressions at {\it compile
+  time}.  Where possible, the \Cpp compiler can generate machine code that is
+tailored for each expression.
+
+As an example of the possible efficiency gains, let us consider the
+expression \mbox{$X = A - B + C$}, where $A$, $B$ and $C$ are matrices.  A
+brute-force implementation would evaluate $A-B$ first and store the result in
+a temporary matrix $T$.  The next operation would be \mbox{$T + C$}, with the
+result finally stored in $X$.  The creation of the temporary matrix, and
+using two separate loops for the subtraction and addition of matrix elements
+is suboptimal from an efficiency point of view.
+
+Through the overloading of mathematical operators, \pkg{Armadillo} avoids the
+generation of the temporary matrix by first converting the expression into a
+set of lightweight \code{Glue} objects, which only store references to the
+matrices and Armadillo's representations of mathematical expressions
+(eg.~other \code{Glue} objects).  To indicate that an operation comprised of
+subtraction and addition is required, the exact type of the \code{Glue}
+objects is automatically inferred from the given expression through template
+meta-programming.  More specifically, given the expression \mbox{$X = A - B +
+  C$}, \pkg{Armadillo} automatically induces the compiler to generate an
+instance of the lightweight \code{Glue} storage object with the following
+\Cpp {\it type}:
+
+\centerline{~}
+\centerline{\code{Glue< Glue<Mat, Mat, glue\_minus>, Mat, glue\_plus>}}
+\centerline{~}
+
+\noindent
+where \code{Glue<...>} indicates that \code{Glue} is a C++ template class,
+with the items between `\code{<}' and `\code{>}' specifying template
+parameters; the outer \code{Glue<..., Mat, glue\_plus>} is the \code{Glue}
+object indicating an addition operation, storing a reference to a matrix as
+well as a reference to another \code{Glue} object; the inner \code{Glue<Mat,
+  Mat, glue\_minus>} stores references to two matrices and indicates a
+subtraction operation.  In both the inner and outer \code{Glue}, the type
+\code{Mat} specifies that a reference to matrix object is to be held.
+
+The expression evaluator in \pkg{Armadillo} is then automatically invoked
+through the ``\code{=}'' operation, which interprets (at compile time) the
+template parameters of the compound \code{Glue} object and generates \Cpp
+code equivalent to:
+
+\centerline{~}
+\centerline{\code{for(int i=0; i<N; i++) \{ X[i] = (A[i] - B[i]) + C[i]; \}}}
+\centerline{~}
+
+\noindent
+where $N$ is the number of elements in $A$, $B$ and $C$, with \code{A[i]}
+indicating the $i$-th element in $A$.  As such, apart from the lightweight
+\code{Glue} objects (for which memory is pre-allocated at compile time), no
+other temporary object is generated, and only one loop is required instead of
+two.  Given a sufficiently advanced \Cpp compiler, the lightweight
+\code{Glue} objects can be optimised away, as they are automatically
+generated by the compiler and only contain compile-time generated references;
+the resultant machine code can appear as if the \code{Glue} objects never
+existed in the first place.
+
+Note that due to the ability of the \code{Glue} object to hold references to
+other \code{Glue} objects, far longer and more complicated operations can be
+easily accommodated.  Further discussion is of template meta-programming is
+beyond the scope of this paper; for more details, the interested reader is
+referred to~\cite{Vandevoorde_2002} as well as~\cite{Abrahams_2004}.
+
+
+\section{RcppArmadillo}
+\label{sec:rcpparma}
+
+The \pkg{RcppArmadillo} package \citep{CRAN:RcppArmadillo} employs the Rcpp
+package \citep{Eddelbuettel+Francois:2011:Rcpp,CRAN:Rcpp} to provide a
+bidirectional interface between \R and \Cpp at the object level.  Using
+templates, \R objects such as vectors and matrices can be mapped directly to
+the corresponding \pkg{Armadillo} objects.
+
+Consider the simple example in Listing~\ref{code:RcppArmaEx}.  Given a
+vector, the \code{g()} function returns both the outer and inner products.
+We load the inline package \citep{CRAN:inline}, which provides
+\code{cxxfunction()} that we use to compile, link and load the \Cpp code
+which is passed as the \code{body} argument.  We declare the function
+signature to contain a single argument named `\code{vs}'.  On line six, this
+argument is used to instantiate an \pkg{Armadillo} column vector object named
+`\code{v}' (using the templated conversion function \code{as()} from Rcpp).
+In lines seven and eight, the outer and inner product of the column vector
+are calculated by appropriately multiplying the vector with its transpose.
+This shows how the \code{*} operator for multiplication has been overloaded
+to provide the appropriate operation for the types implemented by
+\pkg{Armadillo}.  The inner product creates a scalar variable, and in
+contrast to \R where each object is a vector type (even if of length one), we
+have to explicitly convert using \code{as_scalar()} to assign the value to a
+variable of type \code{double}.
+
+\lstset{language=R,label={code:RcppArmaEx},
+  caption={Example of integrating Armadillo based C++ code within R via the RcppArmadillo package.}}
+\begin{lstlisting}[float=*htp,frame=tb]
+R>
+R> library(inline)
+R>
+R> g <- cxxfunction(signature(vs="numeric"),
++                 plugin="RcppArmadillo", body='
++     arma::vec v = Rcpp::as<arma::vec>(vs);
++     arma::mat op = v * v.t();
++     double ip = arma::as_scalar(v.t() * v);
++     return Rcpp::List::create(Rcpp::Named("outer")=op,
++                               Rcpp::Named("inner")=ip);
++')
+R>
+R> g(7:11)
+$outer
+     [,1] [,2] [,3] [,4] [,5]
+[1,]   49   56   63   70   77
+[2,]   56   64   72   80   88
+[3,]   63   72   81   90   99
+[4,]   70   80   90  100  110
+[5,]   77   88   99  110  121
+
+$inner
+[1] 415
+
+R>
+\end{lstlisting}
+
+Finally, the last line creates an \R named list type containing both results.
+As a result of calling \code{cxxfunction()}, a new function is created. It
+contains a reference to the native code, compiled on the fly based on the
+\Cpp code provided to \code{cxxfunction()} and makes it available directly
+from \R under a user-assigned function name, here \code{g()}.  The listing
+also shows how the \code{Rcpp} and \code{arma} namespaces are used to
+disambiguate symbols from the different libraries; the \code{::} operator is
+already familiar to R programmers who use the NAMESPACE directive in \R in a
+similar fashion.
+
+The listing also demonstrates how the new function \code{g()} can be called
+with a suitable argument.  Here we create a vector of five elements,
+containing values ranging from 7 to 11.  The function's output, here the list
+containing both outer and inner product, is then displayed as it is not
+assigned to a variable.
+
+This simple example illustrates how \R objects can be transferred directly
+into corresponding \pkg{Armadillo} objects using the interface code provided
+by \pkg{Rcpp}.  It also shows how deployment of \pkg{RcppArmadillo} is
+straightforward, even for interactive work where functions can be compiled on
+the fly.  Similarly, usage in packages is also uncomplicated and follows the
+documentation provided with \pkg{Rcpp}~\citep{CRAN:Rcpp}.
+
+As of mid-2012, there are nineteen R packages on CRAN which deploy
+RcppArmadillo\footnote{See
+  \url{http://cran.r-project.org/web/packages/RcppArmadillo/}}, showing both
+the usefulness of RcppArmadillo and its acceptance by the R community.
+
+
+\section{Kalman Filtering Example}
+\label{sec:kalman}
+
+The Kalman filter is ubiquitous in many engineering disciplines as well as in
+statistics and econometrics~\citep{Tusell:2010:Kalman}.  Even its simplest
+linear form, the Kalman filter can provide simple estimates by recursively
+applying linear updates which are robust to noise and can cope with missing
+data.  Moreover, the estimation process is lightweight and fast, and consumes
+only minimal amounts of memory as few state variables are required.
+
+We discuss a standard example below.  The (two-dimensional) position of an
+object is estimated based on past values.  A $6 \times 1$ state vector
+includes $X$ and $Y$ coordinates determining the position, two variables for
+speed (or velocity) $V_X$ and $V_Y$ relative to the two coordinates, as well
+as two acceleration variables $A_X$ and $A_Y$.
+
+We have the positions being updated as a function of the velocity
+
+\begin{displaymath}
+   X  = X_0 + V_X dt
+   \mbox{\phantom{XX} and \phantom{XX}}
+   Y  =  Y_0 + V_Y dt
+\end{displaymath}
+
+\noindent
+and the velocity being updated as a function of the (unobserved) acceleration:
+
+\begin{displaymath}
+   V_x  =  V_{X,0} + A_X dt
+   \mbox{\phantom{XX} and \phantom{XX}}
+   V_y  = V_{Y,0} + A_Y dt
+\end{displaymath}
+
+% \begin{eqnarray*}
+%   X   & = & X_0 + V_x dt \\
+%   Y   & = & Y_0 + V_y dt \\
+%   V_x & = & V_{x,0} + A_x dt \\
+%   V_y & = & V_{y,0} + A_y dt \\
+% \end{eqnarray*}
+
+With covariance matrices $Q$ and $R$ for (Gaussian) error terms, the standard
+Kalman filter estimation involves a linear prediction step resulting in a new
+predicted state vector, and a new covariance estimate.  This leads to a
+residuals vector and a covariance matrix for residuals which are used to
+determine the (optimal) Kalman gain, which is then used to update the state
+estimate and covariance matrix.
+
+All of these steps involve only matrix multiplication and inversions, making
+the algorithm very suitable for an fast implementation in any language which
+can use matrix expressions. An example for \proglang{Matlab} is provided on
+the Mathworks website.\footnote{See
+  \url{http://www.mathworks.com/products/matlab-coder/demos.html?file=/products/demos/shipping/coder/coderdemo_kalman_filter.html}.}
+
+% and shown in Listing~\ref{code:matlabkalman}
+
+% \lstset{language=matlab,caption={Basic Kalman Filter in \proglang{Matlab}.},label={code:matlabkalman}}
+% \begin{lstlisting}[float=*htp, frame=tb]
+% %   Copyright 2010 The MathWorks, Inc.
+% function y = kalmanfilter(z)
+% %#codegen
+% dt=1;
+% % Initialize state transition matrix
+% A=[ 1 0 dt 0 0 0;...     % [x  ]
+%        0 1 0 dt 0 0;...     % [y  ]
+%        0 0 1 0 dt 0;...     % [Vx]
+%        0 0 0 1 0 dt;...     % [Vy]
+%        0 0 0 0 1 0 ;...     % [Ax]
+%        0 0 0 0 0 1 ];       % [Ay]
+% H = [ 1 0 0 0 0 0; 0 1 0 0 0 0 ];   % Init. measuremnt mat
+% Q = eye(6);
+% R = 1000 * eye(2);
+% persistent x_est p_est              % Init. state cond.
+% if isempty(x_est)
+%     x_est = zeros(6, 1);            % x_est=[x,y,Vx,Vy,Ax,Ay]'
+%     p_est = zeros(6, 6);
+% end
+% % Predicted state and covariance
+% x_prd = A * x_est;
+% p_prd = A * p_est * A' + Q;
+% % Estimation
+% S = H * p_prd' * H' + R;
+% B = H * p_prd';
+% klm_gain = (S \ B)';
+% % Estimated state and covariance
+% x_est = x_prd + klm_gain * (z - H * x_prd);
+% p_est = p_prd - klm_gain * H * p_prd;
+% % Compute the estimated measurements
+% y = H * x_est;
+% end                % of the function
+% \end{lstlisting}
+
+% %The \R code below reimplements the basic linear Kalman Filter.
+
+
+A straightforward \R implementation can be written as a close transcription
+of the \proglang{Matlab} version; we refer to this version as
+\code{FirstKalmanR} but have omitted it here for brevity.  A slightly
+improved version (where several invariant statements are moved out of the
+repeatedly-called function) is provided in Listing~\ref{code:Rkalman} on
+page~\pageref{code:Rkalman} showing the function \code{KalmanR}.  The
+estimates of the state vector and its covariance matrix are updated
+iteratively.  The \proglang{Matlab} implementation uses two variables
+declared `persistent' for this.  In \Rns, which does not have such an
+attribute for variables, we store them in the enclosing environment of the
+outer function \code{KalmanR}, which contains an inner function
+\code{kalmanfilter} that is called for each observation.
+
+\lstset{language=R,caption={A Kalman filter implemented in \Rns.},label={code:Rkalman}}
+\begin{lstlisting}[float=*htp,frame=tb]
+KalmanR <- function(pos) {
+
+    kalmanfilter <- function(z) {
+        ## predicted state and covariance
+        xprd <- A %*% xest
+        pprd <- A %*% pest %*% t(A) + Q
+
+        ## estimation
+        S <- H %*% t(pprd) %*% t(H) + R
+        B <- H %*% t(pprd)
+
+        kalmangain <- t(solve(S, B))
+
+        ## estimated state and covariance
+        ## assigned to vars in parent env
+        xest <<- xprd + kalmangain %*% (z - H %*% xprd)
+        pest <<- pprd - kalmangain %*% H %*% pprd
+
+        ## compute the estimated measurements
+        y <- H %*% xest
+    }
+
+    dt <- 1
+    A <- matrix( c( 1, 0, dt, 0, 0, 0,  # x
+                   0, 1, 0, dt, 0, 0,   # y
+                   0, 0, 1, 0, dt, 0,   # Vx
+                   0, 0, 0, 1, 0, dt,   # Vy
+                   0, 0, 0, 0, 1,  0,   # Ax
+                   0, 0, 0, 0, 0,  1),  # Ay
+                6, 6, byrow=TRUE)
+    H <- matrix( c(1, 0, 0, 0, 0, 0,
+                   0, 1, 0, 0, 0, 0),
+                2, 6, byrow=TRUE)
+    Q <- diag(6)
+    R <- 1000 * diag(2)
+    N <- nrow(pos)
+    Y <- matrix(NA, N, 2)
+
+    xest <- matrix(0, 6, 1)
+    pest <- matrix(0, 6, 6)
+
+    for (i in 1:N) {
+        Y[i,] <- kalmanfilter(t(pos[i,,drop=FALSE]))
+    }
+
+    invisible(y)
+}
+\end{lstlisting}
+
+
+\pkg{Armadillo} provides efficient vector and matrix classes to implement the
+Kalman filter.  In Listing~\ref{code:CppKalmanClass} on
+page~\pageref{code:CppKalmanClass}, we show a simple \Cpp class containing a
+basic constructor as well as one additional member function.  The constructor
+can be used to initialise all variables as we are guaranteed that the code in
+the class constructor will be executed exactly once when this class is
+instantiated.  A class also makes it easy to add `persistent' local
+variables, which is a feature we need here.  Given such a class, the
+estimation can be accessed from \R via a short and simple routine such as the
+one shown in Listing~\ref{code:InlineKalman} below.
+
+\lstset {language=C++,
+  caption={A Kalman filter class in \proglang{C++}, using matrix and vector classes from Armadillo.},
+  label={code:CppKalmanClass}
+}
+\begin{lstlisting}[float=*htp,frame=tb]
+using namespace arma;
+
+class Kalman {
+private:
+    mat A, H, Q, R, xest, pest;
+    double dt;
+
+public:
+    // constructor, sets up data structures
+    Kalman() : dt(1.0) {
+        A.eye(6,6);
+        A(0,2) = A(1,3) = A(2,4) = A(3,5) = dt;
+        H.zeros(2,6);
+        H(0,0) = H(1,1) = 1.0;
+        Q.eye(6,6);
+        R = 1000 * eye(2,2);
+        xest.zeros(6,1);
+        pest.zeros(6,6);
+    }
+
+    // sole member function: estimate model
+    mat estimate(const mat & Z) {
+       unsigned int n = Z.n_rows, k = Z.n_cols;
+       mat Y = zeros(n, k);
+       mat xprd, pprd, S, B, kalmangain;
+       colvec z, y;
+
+       for (unsigned int i = 0; i<n; i++) {
+           z = Z.row(i).t();
+           // predicted state and covariance
+           xprd = A * xest;
+           pprd = A * pest * A.t() + Q;
+           // estimation
+           S = H * pprd.t() * H.t() + R;
+           B = H * pprd.t();
+           kalmangain = (solve(S, B)).t();
+           // estimated state and covariance
+           xest = xprd + kalmangain * (z - H * xprd);
+           pest = pprd - kalmangain * H * pprd;
+           // compute the estimated measurements
+           y = H * xest;
+           Y.row(i) = y.t();
+       }
+       return Y;
+    }
+};
+\end{lstlisting}
+
+\medskip
+\noindent
+\begin{minipage}{1\textwidth}
+\lstset{language=R,label={code:InlineKalman}, caption={A Kalman filter
+    function implemented in a mixture of \proglang{R} and \proglang{C++} code,
+    using the Rcpp package to embed Armadillo based \proglang{C++} code (using the {\it Kalman} class from Listing~\ref{code:CppKalmanClass})
+    within R code.}}
+\begin{lstlisting}[frame=tb]
+R> kalmanSrc <- '
++  mat Z = as<mat>(ZS);       // passed from R
++  Kalman K;
++  mat Y = K.estimate(Z);
++  return wrap(Y);
++'
+R>
+R> KalmanCpp <- cxxfunction(signature(ZS="numeric"),
++                         body=kalmanSrc,
++                         include=kalmanClass,
++                         plugin="RcppArmadillo")
+R>
+\end{lstlisting}
+\end{minipage}
+
+The content of Listing~\ref{code:CppKalmanClass} is assigned to a variable
+\code{kalmanClass} which (on line ten) is passed to the \code{include=}
+argument. This provides the required class declaration and definition.
+The four lines of code in lines two to five, assigned to \code{kalmanSrc},
+provide the function body required by \code{cxxfunction()}.
+From both these elements and the function signature argument,
+\code{cxxfunction()} creates a very simple yet efficient
+\Cpp implementation of the Kalman filter which we can access from \Rns.
+Given a vector of observations $Z$, it estimates a vector of position
+estimates $Y$.  This is illustrated in Figure~\ref{fig:kalmanexample} which
+displays the original object trajectory (using light-coloured square
+symbols) as well as the position estimates provided by the Kalman filter (using dark-coloured circles).
+
+\begin{figure}[!tb]
+  \centerline{\includegraphics[width=0.85\columnwidth]{kalmanExample.pdf}}
+  % for eps:  \centerline{\includegraphics[width=0.85\columnwidth]{kalmanExample}}
[TRUNCATED]

To get the complete diff run:
    svnlook diff /svnroot/rcpp -r 3654