[Rcpp-commits] r3094 - in pkg/RcppEigen: R inst/unitTests man src
noreply at r-forge.r-project.org
noreply at r-forge.r-project.org
Tue Jun 21 04:28:06 CEST 2011
Author: dmbates
Date: 2011-06-21 04:28:02 +0200 (Tue, 21 Jun 2011)
New Revision: 3094
Added:
pkg/RcppEigen/src/fastLm.h
Modified:
pkg/RcppEigen/R/fastLm.R
pkg/RcppEigen/inst/unitTests/runit.fastLm.R
pkg/RcppEigen/man/fastLm.Rd
pkg/RcppEigen/src/fastLm.cpp
Log:
Refactoring of the fastLm code to allow algorithms to be added easily. Adjust docs and R code accordingly.
Modified: pkg/RcppEigen/R/fastLm.R
===================================================================
--- pkg/RcppEigen/R/fastLm.R 2011-06-17 18:32:54 UTC (rev 3093)
+++ pkg/RcppEigen/R/fastLm.R 2011-06-21 02:28:02 UTC (rev 3094)
@@ -17,22 +17,21 @@
## You should have received a copy of the GNU General Public License
## along with RcppEigen. If not, see <http://www.gnu.org/licenses/>.
-fastLmPure <- function(X, y) {
+fastLmPure <- function(X, y, method = 0L) {
stopifnot(is.matrix(X), is.numeric(y), NROW(y)==nrow(X))
- .Call("fastLm", X, y, package="RcppEigen")
+ .Call("fastLm", X, y, as.integer(method[1]), PACKAGE="RcppEigen")
}
fastLm <- function(X, ...) UseMethod("fastLm")
-fastLm.default <- function(X, y, ...) {
+fastLm.default <- function(X, y, method = 0L, ...) {
X <- as.matrix(X)
y <- as.numeric(y)
- res <- fastLmPure(X, y)
- names(res$coefficients) <- colnames(X)
+ res <- fastLmPure(X, y, as.integer(method[1]))
res$call <- match.call()
class(res) <- "fastLm"
@@ -47,13 +46,14 @@
}
summary.fastLm <- function(object, ...) {
- se <- object$stderr
- tval <- coef(object)/se
+ coef <- object$coefficients
+ se <- object$se
+ tval <- coef/se
- object$coefficients <- cbind(Estimate = object$coefficients,
- StdErr = se,
- t.value = tval,
- p.value = 2*pt(-abs(tval), df=object$df))
+ object$coefficients <- cbind(Estimate = coef,
+ "Std. Error" = se,
+ "t value" = tval,
+ "Pr(>|t|)" = 2*pt(-abs(tval), df=object$df))
## cf src/stats/R/lm.R and case with no weights and an intercept
f <- object$fitted.values
@@ -77,7 +77,7 @@
printCoefmat(x$coefficients, P.values=TRUE, has.Pvalue=TRUE)
digits <- max(3, getOption("digits") - 3)
- cat("\nResidual standard error: ", formatC(x$s, digits=digits), " on ",
+ cat("\nResidual standard error: ", formatC(sqrt(x$s2), digits=digits), " on ",
formatC(x$df), " degrees of freedom\n", sep="")
cat("Multiple R-squared: ", formatC(x$r.squared, digits=digits),
",\tAdjusted R-squared: ",formatC(x$adj.r.squared, digits=digits),
@@ -85,12 +85,12 @@
invisible(x)
}
-fastLm.formula <- function(formula, data=list(), ...) {
+fastLm.formula <- function(formula, data=list(), method = 0L, ...) {
mf <- model.frame(formula=formula, data=data)
X <- model.matrix(attr(mf, "terms"), data=mf)
y <- model.response(mf)
- res <- fastLm.default(X, y, ...)
+ res <- fastLm.default(X, y, method=method, ...)
res$call <- match.call()
# I think this is redundant. The formula is available as res$call$formula
res$formula <- formula
Modified: pkg/RcppEigen/inst/unitTests/runit.fastLm.R
===================================================================
--- pkg/RcppEigen/inst/unitTests/runit.fastLm.R 2011-06-17 18:32:54 UTC (rev 3093)
+++ pkg/RcppEigen/inst/unitTests/runit.fastLm.R 2011-06-21 02:28:02 UTC (rev 3094)
@@ -24,67 +24,43 @@
test.fastLm <- function() {
data(trees, package="datasets")
- flm <- .Call("fastLm",
- cbind(1, log(trees$Girth)),
- log(trees$Volume),
- package="RcppEigen")
+ flm0 <- .Call("fastLm",
+ cbind(1, log(trees$Girth)),
+ log(trees$Volume), 0L,
+ PACKAGE="RcppEigen")
+ flm1 <- .Call("fastLm",
+ cbind(1, log(trees$Girth)),
+ log(trees$Volume), 1L,
+ PACKAGE="RcppEigen")
+ flm2 <- .Call("fastLm",
+ cbind(1, log(trees$Girth)),
+ log(trees$Volume), 2L,
+ PACKAGE="RcppEigen")
+ flm3 <- .Call("fastLm",
+ cbind(1, log(trees$Girth)),
+ log(trees$Volume), 3L,
+ PACKAGE="RcppEigen")
fit <- lm(log(Volume) ~ log(Girth), data=trees)
-
- checkEquals(as.numeric(flm$coefficients), as.numeric(coef(fit)),
- msg="fastLm.coef")
- checkEquals(as.numeric(flm$stderr), as.numeric(coef(summary(fit))[,2]),
- msg="fastLm.stderr")
+ fitCoef <- unname(coef(fit))
+ fitStdErr <- unname(coef(summary(fit))[, "Std. Error", drop = TRUE])
+ checkEquals(flm0$coefficients, fitCoef, msg="fastLm0.coef")
+ checkEquals(flm0$se, fitStdErr, msg="fastLm0.stderr")
+ checkEquals(flm1$coefficients, fitCoef, msg="fastLm1.coef")
+ checkEquals(flm1$se, fitStdErr, msg="fastLm1.stderr")
+ checkEquals(flm2$coefficients, fitCoef, msg="fastLm2.coef")
+ checkEquals(flm2$se, fitStdErr, msg="fastLm2.stderr")
+ checkEquals(flm3$coefficients, fitCoef, msg="fastLm3.coef")
+ checkEquals(flm3$se, fitStdErr, msg="fastLm3.stderr")
}
-test.fastLm.Bench <- function() {
- data(trees, package="datasets")
- flm <- .Call("fastLmBench",
- cbind(1, log(trees$Girth)),
- log(trees$Volume),
- package="RcppEigen")
- fit <- lm(log(Volume) ~ log(Girth), data=trees)
- checkEquals(as.numeric(flm$coefficients), as.numeric(coef(fit)),
- msg="fastLm.coef")
- checkEquals(as.numeric(flm$se), as.numeric(coef(summary(fit))[,2]),
- msg="fastLm.stderr")
-}
-
-test.fastLm.Chol1 <- function() {
- data(trees, package="datasets")
- flm <- .Call("fastLmChol1",
- cbind(1, log(trees$Girth)),
- log(trees$Volume),
- package="RcppEigen")
- fit <- lm(log(Volume) ~ log(Girth), data=trees)
-
- checkEquals(as.numeric(flm$coefficients), as.numeric(coef(fit)),
- msg="fastLm.coef")
- checkEquals(as.numeric(flm$se), as.numeric(coef(summary(fit))[,2]),
- msg="fastLm.stderr")
-}
-
-test.fastLm.Chol2 <- function() {
- data(trees, package="datasets")
- flm <- .Call("fastLmChol2",
- cbind(1, log(trees$Girth)),
- log(trees$Volume),
- package="RcppEigen")
- fit <- lm(log(Volume) ~ log(Girth), data=trees)
-
- checkEquals(as.numeric(flm$coefficients), as.numeric(coef(fit)),
- msg="fastLm.coef")
- checkEquals(as.numeric(flm$se), as.numeric(coef(summary(fit))[,2]),
- msg="fastLm.stderr")
-}
-
test.fastLm.formula <- function() {
data(trees, package="datasets")
flm <- fastLm(log(Volume) ~ log(Girth), data=trees)
fit <- lm(log(Volume) ~ log(Girth), data=trees)
checkEquals(flm$coefficients, coef(fit), msg="fastLm.formula.coef")
- checkEquals(as.numeric(flm$stderr), as.numeric(coef(summary(fit))[,2]),
+ checkEquals(as.numeric(flm$se), as.numeric(coef(summary(fit))[,2]),
msg="fastLm.formula.stderr")
}
Modified: pkg/RcppEigen/man/fastLm.Rd
===================================================================
--- pkg/RcppEigen/man/fastLm.Rd 2011-06-17 18:32:54 UTC (rev 3093)
+++ pkg/RcppEigen/man/fastLm.Rd 2011-06-21 02:28:02 UTC (rev 3094)
@@ -6,14 +6,14 @@
\concept{regression}
\title{Bare-bones linear model fitting function}
\description{
- \code{fastLm} estimates the linear model using the a column-pivoted QR
- decomposition from the \code{Eigen} linear algebra library.
+ \code{fastLm} estimates the linear model using one of several methods
+ implemented using the \code{Eigen} linear algebra library.
}
\usage{
-fastLmPure(X, y)
+fastLmPure(X, y, method = 0L)
fastLm(X, \dots)
-\method{fastLm}{default}(X, y, \dots)
-\method{fastLm}{formula}(formula, data = list(), \dots)
+\method{fastLm}{default}(X, y, method = 0L, \dots)
+\method{fastLm}{formula}(formula, data = list(), method = 0L, \dots)
}
\arguments{
@@ -32,6 +32,10 @@
variables are taken from \code{environment(formula)},
typically the environment from which \code{lm} is called.}
+ \item{method}{an integer scalar with value 0 for the column-pivoted QR
+ decomposition, 1 for the unpivoted QR decomposition, 2 for the LLT
+ Cholesky, 3 for the LDLT Cholesky. Default is zero.}
+
\item{\dots}{not used}
}
\details{
Modified: pkg/RcppEigen/src/fastLm.cpp
===================================================================
--- pkg/RcppEigen/src/fastLm.cpp 2011-06-17 18:32:54 UTC (rev 3093)
+++ pkg/RcppEigen/src/fastLm.cpp 2011-06-21 02:28:02 UTC (rev 3094)
@@ -20,62 +20,127 @@
// in file.path(R.home("share"), "licenses"). If not, see
// <http://www.gnu.org/licenses/>.
-#include <RcppEigen.h>
+#include "fastLm.h"
-extern "C" SEXP fastLm(SEXP Xs, SEXP ys) {
- using namespace Eigen;
- using namespace Rcpp;
+using namespace Rcpp;
+
+lm::lm(const MMatrixXd &X, const MVectorXd &y) :
+ m_n(X.rows()), m_p(X.cols()), m_coef(m_p), m_r(NA_INTEGER), m_df(m_n - m_p),
+ m_perm(m_p), m_fitted(m_n), m_unsc(m_p, m_p) {}
+
+ColPivQR::ColPivQR(const MMatrixXd &X, const MVectorXd &y) : lm(X, y) {
+ PivQRType PQR(X); // decompose the model matrix
+ PermutationType Pmat = PQR.colsPermutation();
+ m_perm = Pmat.indices();
+ m_r = PQR.rank();
+ m_df = m_n - m_r;
+ MatrixXd R = PQR.matrixQR().topLeftCorner(m_p, m_p);
+
+ if (m_r < (int)m_p) { // The rank-deficient case
+ int nsing = m_p - m_r;
+ MatrixXd Atrunc = (X * Pmat).leftCols(m_r);
+ QRType QR(Atrunc);
+ VectorXd coefTrunc = QR.solve(y);
+ m_fitted = Atrunc * coefTrunc;
+ m_coef.topRows(m_r) = coefTrunc;
+ std::fill(m_coef.data() + m_r, m_coef.data() + m_p, NA_REAL);
+ m_coef = Pmat * m_coef;
+ MatrixXd Rtrunc = R.topLeftCorner(m_r, m_r);
+ MatrixXd Rinvtrunc = Rtrunc.triangularView<Eigen::Upper>().solve(MatrixXd::Identity(m_r, m_r));
+ m_unsc.topLeftCorner(m_r, m_r) = Rtrunc.setZero().selfadjointView<Eigen::Upper>().rankUpdate(Rinvtrunc);
+ m_unsc.rightCols(nsing).fill(NA_REAL);
+ m_unsc.bottomRows(nsing).fill(NA_REAL);
+ } else { // full rank
+ MatrixXd Rinv = R.triangularView<Eigen::Upper>().solve(MatrixXd::Identity(m_p, m_p));
+ m_unsc = R.setZero().selfadjointView<Eigen::Upper>().rankUpdate(Rinv);
+ m_coef = PQR.solve(y);
+ m_fitted = X * m_coef;
+ }
+}
+
+QR::QR(const MMatrixXd &X, const MVectorXd &y) : lm(X, y) {
+ QRType QR(X);
+ m_coef = QR.solve(y);
+ m_fitted = X * m_coef;
+ MatrixXd R = QR.matrixQR().topLeftCorner(m_p, m_p);
+ MatrixXd Rinv = R.triangularView<Eigen::Upper>().solve(MatrixXd::Identity(m_p, m_p));
+ m_unsc = R.setZero().selfadjointView<Eigen::Upper>().rankUpdate(Rinv);
+ for (Index i = 0; i < m_p; ++i) m_perm[i] = i;
+}
+
+LLT::LLT(const MMatrixXd &X, const MVectorXd &y) : lm(X, y) {
+ LLTType Ch(m_unsc.setZero().selfadjointView<Eigen::Lower>().rankUpdate(X.adjoint()));
+ m_coef = Ch.solve(X.adjoint() * y);
+ m_fitted = X * m_coef;
+ m_unsc = Ch.solve(MatrixXd::Identity(m_p, m_p));
+ for (Index i = 0; i < m_p; ++i) m_perm[i] = i;
+}
+
+LDLT::LDLT(const MMatrixXd &X, const MVectorXd &y) : lm(X, y) {
+ LDLTType Ch(m_unsc.setZero().selfadjointView<Eigen::Lower>().rankUpdate(X.adjoint()));
+ m_coef = Ch.solve(X.adjoint() * y);
+ for (Index i = 0; i < m_p; ++i) m_perm[i] = i; // for now. Maybe add a boolean flag to indicate if unsc is already reordered
+// m_perm = PermutationType(Ch.transpositionsP()).indices();
+ m_fitted = X * m_coef;
+ m_unsc = Ch.solve(MatrixXd::Identity(m_p, m_p));
+}
+
+enum {ColPivQR_t = 0, QR_t, LDLT_t, LLT_t, SVD_t, SymmEigen_t};
+
+static inline lm do_lm(const MMatrixXd &X, const MVectorXd &y, int type)
+ throw (std::invalid_argument) {
+ switch(type) {
+ case ColPivQR_t:
+ return ColPivQR(X, y);
+ case QR_t:
+ return QR(X, y);
+ case LLT_t:
+ return LLT(X, y);
+ case LDLT_t:
+ return LDLT(X, y);
+ // case SVD_t:
+ // return SVD(X, y);
+ // case SymmEigen_t:
+ // return SymmEigen(X, y);
+ }
+ throw std::invalid_argument("invalid type");
+ return ColPivQR(X, y); // -Wall
+}
+
+extern "C" SEXP fastLm(SEXP Xs, SEXP ys, SEXP type) {
try {
- NumericMatrix X(Xs);
- NumericVector y(ys);
- size_t n = X.nrow(), p = X.ncol();
- if ((size_t)y.size() != n)
+ const NumericMatrix X(Xs);
+ const NumericVector y(ys);
+ Index n = X.nrow(), p = X.ncol();
+ if ((Index)y.size() != n)
throw std::invalid_argument("size mismatch");
+ const MVectorXd yy(y.begin(), n);
+ lm ans = do_lm(MMatrixXd(X.begin(), n, p), yy, as<int>(type));
- MatrixXd A = Map<MatrixXd>(X.begin(), n, p); // shares storage
- VectorXd b = Map<VectorXd>(y.begin(), n);
-
- ColPivHouseholderQR<MatrixXd> mqr(A); // decompose the model matrix
- size_t r = mqr.rank();
- ptrdiff_t df = n - r;
- VectorXd coef(p);
- VectorXd fitted(n);
- VectorXd se(p);
- MatrixXd Rinv;
- if (r < p) { // Handle the rank-deficient case
- MatrixXd Atrunc = (A * mqr.colsPermutation()).leftCols(r);
- HouseholderQR<MatrixXd> QR(Atrunc);
- VectorXd coefTrunc = QR.solve(b);
- fitted = Atrunc * coefTrunc;
- MatrixXd R = QR.matrixQR().topLeftCorner(r, r);
- Rinv = TriangularView<MatrixXd, Upper>(R).solve(MatrixXd::Identity(r, r));
- coef.topRows(r) = coefTrunc;
- std::fill(coef.data() + r, coef.data() + p,
- std::numeric_limits<double>::quiet_NaN());
- coef = mqr.colsPermutation() * coef;
- std::fill(se.data() + r, se.data() + p,
- std::numeric_limits<double>::quiet_NaN());
- } else {
- coef = mqr.solve(b);
- fitted = A * coef;
- MatrixXd R = mqr.matrixQR().topRows(p);
- Rinv = TriangularView<MatrixXd, Upper>(R).solve(MatrixXd::Identity(p, p));
- }
- VectorXd res = b - fitted;
- double s = std::sqrt(res.squaredNorm()/df);
- se.topRows(r) = Rinv.rowwise().norm() * s;
- se = mqr.colsPermutation() * se;
+ NumericVector coef(ans.coef().data(), ans.coef().data() + p);
+ // install the names, if available
+ List dimnames = X.attr("dimnames");
+ RObject colnames = dimnames[1];
+ if (!(colnames).isNULL())
+ coef.attr("names") = clone(CharacterVector(colnames));
+ VectorXd resid = yy - ans.fitted();
+ double s2 = resid.squaredNorm()/ans.df();
+ PermutationType Pmat = PermutationType(p);
+ Pmat.indices() = ans.perm();
+ VectorXd dd = Pmat * ans.unsc().diagonal();
+ ArrayXd se = (dd.array() * s2).sqrt();
+
return List::create(_["coefficients"] = coef,
- _["rank"] = r,
- _["df.residual"] = df,
- _["perm"] = mqr.colsPermutation().indices(),
- _["stderr"] = se,
- _["s"] = s,
- _["residuals"] = res,
- _["fitted.values"] = fitted,
- _["Rinv"] = Rinv
- );
+ _["se"] = se,
+ _["rank"] = ans.rank(),
+ _["df.residual"] = ans.df(),
+ _["perm"] = IntegerVector(ans.perm().data(), ans.perm().data() + p),
+ _["residuals"] = resid,
+ _["s2"] = s2,
+ _["fitted.values"] = ans.fitted(),
+ _["unsc"] = ans.unsc());
+
} catch( std::exception &ex ) {
forward_exception_to_r( ex );
} catch(...) {
@@ -84,14 +149,15 @@
return R_NilValue; // -Wall
}
+#if 0
extern "C" SEXP fastLmBench(SEXP Xs, SEXP ys) {
using namespace Eigen;
using namespace Rcpp;
try {
NumericMatrix X(Xs);
NumericVector y(ys);
- size_t n = X.nrow(), p = X.ncol();
- ptrdiff_t df = n - p;
+ Indices n = X.nrow(), p = X.ncol();
+ int df = n - p;
if ((size_t)y.size() != n)
throw std::invalid_argument("size mismatch");
@@ -164,7 +230,7 @@
MatrixXd A = Map<MatrixXd>(X.begin(), n, p); // shares storage
VectorXd b = Map<VectorXd>(y.begin(), n);
- LDLT<MatrixXd> Ch(SelfAdjointView<MatrixXd, Lower>(MatrixXd::Zero(p, p)).rankUpdate(A.adjoint()));
+ LDLT Ch(MatrixXd::Zero(p, p).selfAdjointView(Eigen::Lower).rankUpdate(A.adjoint()));
VectorXd coef = Ch.solve(A.adjoint() * b);
double s2 = (b - A*coef).squaredNorm()/df;
@@ -215,3 +281,4 @@
return R_NilValue; // -Wall
}
+#endif
Added: pkg/RcppEigen/src/fastLm.h
===================================================================
--- pkg/RcppEigen/src/fastLm.h (rev 0)
+++ pkg/RcppEigen/src/fastLm.h 2011-06-21 02:28:02 UTC (rev 3094)
@@ -0,0 +1,76 @@
+// -*- mode: C++; c-indent-level: 4; c-basic-offset: 4; tab-width: 8 -*-
+#ifndef RCPPEIGEN_FASTLM_H
+#define RCPPEIGEN_FASTLM_H
+
+#include <RcppEigen.h>
+ // Basic matrix, vector and array types
+typedef Eigen::MatrixXd MatrixXd;
+typedef Eigen::VectorXd VectorXd;
+typedef Eigen::VectorXi VectorXi;
+typedef Eigen::ArrayXd ArrayXd;
+typedef Eigen::ArrayXXd ArrayXXd;
+typedef Eigen::ArrayXi ArrayXi;
+typedef Eigen::ArrayXXi ArrayXXi;
+typedef typename MatrixXd::Index Index;
+ // Mapped matrix and vector types (share storage)
+typedef Eigen::Map<MatrixXd> MMatrixXd;
+typedef Eigen::Map<VectorXd> MVectorXd;
+ // Views
+typedef Eigen::TriangularView<MatrixXd, Eigen::Upper> UpperTri;
+typedef Eigen::TriangularView<MatrixXd, Eigen::Lower> LowerTri;
+typedef Eigen::SelfAdjointView<MatrixXd, Eigen::Upper> UpperSym;
+typedef Eigen::SelfAdjointView<MatrixXd, Eigen::Lower> LowerSym;
+ // Decomposition types
+typedef Eigen::LLT<MatrixXd> LLTType;
+typedef Eigen::LDLT<MatrixXd> LDLTType;
+typedef Eigen::ColPivHouseholderQR<MatrixXd> PivQRType;
+typedef Eigen::HouseholderQR<MatrixXd> QRType;
+typedef Eigen::JacobiSVD<MatrixXd> SVDType;
+ // Types derived from decompositions
+typedef typename PivQRType::PermutationType PermutationType;
+typedef typename PermutationType::IndicesType IndicesType;
+
+class lm {
+protected:
+ Index m_n; /**< number of rows */
+ Index m_p; /**< number of columns */
+ VectorXd m_coef; /**< coefficient vector */
+ int m_r; /**< computed rank or NA_INTEGER */
+ int m_df; /**< residual degrees of freedom */
+ IndicesType m_perm; /**< column permutation */
+ VectorXd m_fitted; /**< vector of fitted values */
+ MatrixXd m_unsc; /**< unscaled variance-covariance matrix */
+public:
+ lm(const MMatrixXd&, const MVectorXd&);
+ const VectorXd& coef() const {return m_coef;}
+ int rank() const {return m_r;}
+ int df() const {return m_df;}
+ const IndicesType& perm() const {return m_perm;}
+ const VectorXd& fitted() const {return m_fitted;}
+ const MatrixXd& unsc() const {return m_unsc;}
+};
+
+class ColPivQR : public lm {
+public:
+ ColPivQR(const MMatrixXd&, const MVectorXd&);
+};
+
+class LLT : public lm {
+public:
+ LLT(const MMatrixXd&, const MVectorXd&);
+};
+
+class LDLT : public lm {
+public:
+ LDLT(const MMatrixXd&, const MVectorXd&);
+};
+
+class QR : public lm {
+public:
+ QR(const MMatrixXd&, const MVectorXd&);
+};
+
+extern "C" SEXP fastLm(SEXP Xs, SEXP ys, SEXP types);
+
+#endif
+
More information about the Rcpp-commits
mailing list