[Rcpp-commits] r685 - in pkg: . inst inst/examples inst/examples/FastLM
noreply at r-forge.r-project.org
noreply at r-forge.r-project.org
Mon Feb 15 21:31:07 CET 2010
Author: edd
Date: 2010-02-15 21:31:06 +0100 (Mon, 15 Feb 2010)
New Revision: 685
Added:
pkg/inst/examples/FastLM/
pkg/inst/examples/FastLM/fastLMviaArmadillo.r
pkg/inst/examples/FastLM/fastLMviaGSL.r
Modified:
pkg/NEWS
pkg/inst/ChangeLog
Log:
new example for 'fast lm' using, respectively, GSL and Armadillo
Modified: pkg/NEWS
===================================================================
--- pkg/NEWS 2010-02-15 15:23:43 UTC (rev 684)
+++ pkg/NEWS 2010-02-15 20:31:06 UTC (rev 685)
@@ -1,3 +1,10 @@
+0.7.8 (under development)
+
+ o New examples for 'fast lm' using compiled code:
+ - using GNU GSL and a C interface
+ - using Armadillo (http://arma.sf.net) and a C++ interface
+ Armadillo is seen as faster for lack of extra copying
+
0.7.7 2010-02-14
o new template classes Rcpp::unary_call and Rcpp::binary_call
Modified: pkg/inst/ChangeLog
===================================================================
--- pkg/inst/ChangeLog 2010-02-15 15:23:43 UTC (rev 684)
+++ pkg/inst/ChangeLog 2010-02-15 20:31:06 UTC (rev 685)
@@ -1,6 +1,12 @@
+2010-02-15 Dirk Eddelbuettel <edd at debian.org>
+
+ * inst/examples/FastLM: New example directory with two 'fast lm'
+ implementations using, respectively, GNU GSL (in C) and Armadillo
+ (in C++); Armadillo is seen as faster for lack of extra copying
+
2010-02-14 Romain Francois <romain at r-enthusiasts.com>
- * src/Rcpp/Language.h: accepting Function in unary_call and
+ * src/Rcpp/Language.h: accepting Function in unary_call and
binary_call to support STL algorithms using R functions.
2010-02-14 Dirk Eddelbuettel <edd at debian.org>
Added: pkg/inst/examples/FastLM/fastLMviaArmadillo.r
===================================================================
--- pkg/inst/examples/FastLM/fastLMviaArmadillo.r (rev 0)
+++ pkg/inst/examples/FastLM/fastLMviaArmadillo.r 2010-02-15 20:31:06 UTC (rev 685)
@@ -0,0 +1,126 @@
+#!/usr/bin/r -t
+#
+# A faster lm() replacement based on Armadillo
+#
+# This improves on the previous version using GNU GSL
+#
+# Copyright (C) 2010 Dirk Eddelbuettel and Romain Francois
+#
+# This file is part of Rcpp.
+#
+# Rcpp is free software: you can redistribute it and/or modify it
+# under the terms of the GNU General Public License as published by
+# the Free Software Foundation, either version 2 of the License, or
+# (at your option) any later version.
+#
+# Rcpp is distributed in the hope that it will be useful, but
+# WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+# GNU General Public License for more details.
+#
+# You should have received a copy of the GNU General Public License
+# along with Rcpp. If not, see <http://www.gnu.org/licenses/>.
+
+suppressMessages(library(Rcpp))
+suppressMessages(library(inline))
+
+lmViaArmadillo <- function() {
+ ## a really simple C program calling three functions from the GSL
+ src <- '
+
+ SEXP rl = R_NilValue;
+ char *exceptionMesg = NULL;
+
+ try {
+ Rcpp::NumericVector yr(Ysexp);
+ Rcpp::NumericVector Xr(Xsexp);
+ std::vector<int> dims = Xr.attr("dim") ;
+ int n = dims[0];
+ int k = dims[1];
+ // use advanced armadillo constructors:
+
+ arma::mat X(Xr.begin(), n, k, false);
+ arma::colvec y(yr.begin(), yr.size());
+
+ //std::cout << ay << std::endl;
+ //std::cout << aX << std::endl;
+
+ arma::colvec coef = solve(X, y);
+ //std::cout << coef << std::endl;
+
+ // compute std. error of the coefficients
+ arma::colvec resid = y - X*coef;
+ double rss = trans(resid)*resid;
+ double sig2 = rss/(n-k);
+ arma::mat covmat = sig2 * arma::inv(arma::trans(X)*X);
+ //arma::colvec stderrest = sqrt(arma::diagview(covmat));
+
+ RcppMatrix<double> CovMat(k, k);
+ RcppVector<double> Coef(k);
+ for (int i = 0; i < k; i++) {
+ Coef(i) = coef(i);
+ for (int j = 0; j < k; j++) {
+ CovMat(i,j) = covmat(i,j);
+ }
+ }
+ //Rcpp::NumericVector coefr(k);
+ //Rcpp::NumericVector stderrestr(k);
+ //for (int i=0; i<k; i++) {
+ // coefr[i] = coef[i];
+ // stderrestr[i] = stderrestr[i];
+ //}
+
+ RcppResultSet rs;
+ rs.add("coef", Coef);
+ rs.add("covmat", CovMat);
+
+ rl = rs.getReturnList();
+
+ } catch(std::exception& ex) {
+ exceptionMesg = copyMessageToR(ex.what());
+ } catch(...) {
+ exceptionMesg = copyMessageToR("unknown reason");
+ }
+ if (exceptionMesg != NULL)
+ Rf_error(exceptionMesg);
+ return rl;
+ '
+
+ ## turn into a function that R can call
+ ## compileargs redundant on Debian/Ubuntu as gsl headers are found anyway
+ fun <- cfunction(signature(Ysexp="numeric", Xsexp="numeric"),
+ src,
+ includes="#include <armadillo>",
+ Rcpp=TRUE,
+ cppargs="-I/usr/include",
+ libargs="-larmadillo")
+}
+
+checkLmViaArmadillo <- function(y, X) {
+ fun <- lmViaArmadillo();
+ cat("Running lm via Armadillo\n")
+ res <- fun(y, X)
+ print(cbind(res[[1]], sqrt(diag(res[[2]]))))
+ cat("Running lm()\n")
+ print(summary(lm(y ~ X - 1)))
+ invisible(NULL)
+}
+
+timeLmViaArmadillo <- function(y, X, N) {
+ fun <- lmViaArmadillo();
+ meantime <- mean(replicate(N, system.time(fun(y, X))["elapsed"]), trim=0.05)
+}
+
+
+set.seed(42)
+n <- 5000
+k <- 9
+X <- cbind( rep(1,n), matrix(rnorm(n*k), ncol=k) )
+truecoef <- 1:(k+1)
+y <- as.numeric(X %*% truecoef + rnorm(n))
+
+N <- 100
+
+#checkLmViaArmadillo(y, X)
+mt <- timeLmViaArmadillo(y, X, N)
+cat("Armadillo: Running", N, "simulations yields (trimmed) mean time", mt, "\n")
Property changes on: pkg/inst/examples/FastLM/fastLMviaArmadillo.r
___________________________________________________________________
Name: svn:executable
+ *
Added: pkg/inst/examples/FastLM/fastLMviaGSL.r
===================================================================
--- pkg/inst/examples/FastLM/fastLMviaGSL.r (rev 0)
+++ pkg/inst/examples/FastLM/fastLMviaGSL.r 2010-02-15 20:31:06 UTC (rev 685)
@@ -0,0 +1,120 @@
+#!/usr/bin/r -t
+#
+# A faster lm() replacement based on GNU GSL
+#
+# This first appeared in the 'Intro to HPC tutorials'
+# but has been wrapped in inline::cfunction() here
+#
+# Copyright (C) 2010 Dirk Eddelbuettel and Romain Francois
+#
+# This file is part of Rcpp.
+#
+# Rcpp is free software: you can redistribute it and/or modify it
+# under the terms of the GNU General Public License as published by
+# the Free Software Foundation, either version 2 of the License, or
+# (at your option) any later version.
+#
+# Rcpp is distributed in the hope that it will be useful, but
+# WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+# GNU General Public License for more details.
+#
+# You should have received a copy of the GNU General Public License
+# along with Rcpp. If not, see <http://www.gnu.org/licenses/>.
+
+suppressMessages(library(Rcpp))
+suppressMessages(library(inline))
+
+lmViaGSL <- function() {
+ ## a really simple C program calling three functions from the GSL
+ src <- '
+
+ SEXP rl = R_NilValue;
+ char *exceptionMesg = NULL;
+
+ try {
+ RcppMatrixView<double> Xr(Xsexp);
+ RcppVectorView<double> Yr(Ysexp);
+
+ int i,j,n = Xr.dim1(), k = Xr.dim2();
+ double chisq;
+
+ gsl_matrix *X = gsl_matrix_alloc (n, k);
+ gsl_vector *y = gsl_vector_alloc (n);
+ gsl_vector *c = gsl_vector_alloc (k);
+ gsl_matrix *cov = gsl_matrix_alloc (k, k);
+ for (i = 0; i < n; i++) {
+ for (j = 0; j < k; j++)
+ gsl_matrix_set (X, i, j, Xr(i,j));
+ gsl_vector_set (y, i, Yr(i));
+ }
+
+ gsl_multifit_linear_workspace *work = gsl_multifit_linear_alloc (n, k);
+ gsl_multifit_linear (X, y, c, cov, &chisq, work);
+ gsl_multifit_linear_free (work);
+
+ RcppMatrix<double> CovMat(k, k);
+ RcppVector<double> Coef(k);
+ for (i = 0; i < k; i++) {
+ for (j = 0; j < k; j++)
+ CovMat(i,j) = gsl_matrix_get(cov,i,j);
+ Coef(i) = gsl_vector_get(c,i);
+ }
+ gsl_matrix_free (X);
+ gsl_vector_free (y);
+ gsl_vector_free (c);
+ gsl_matrix_free (cov);
+
+ RcppResultSet rs;
+ rs.add("coef", Coef);
+ rs.add("covmat", CovMat);
+
+ rl = rs.getReturnList();
+
+ } catch(std::exception& ex) {
+ exceptionMesg = copyMessageToR(ex.what());
+ } catch(...) {
+ exceptionMesg = copyMessageToR("unknown reason");
+ }
+ if (exceptionMesg != NULL)
+ Rf_error(exceptionMesg);
+ return rl;
+ '
+
+ ## turn into a function that R can call
+ ## compileargs redundant on Debian/Ubuntu as gsl headers are found anyway
+ fun <- cfunction(signature(Ysexp="numeric", Xsexp="numeric"),
+ src,
+ includes="#include <gsl/gsl_multifit.h>",
+ Rcpp=TRUE,
+ cppargs="-I/usr/include",
+ libargs="-lgsl -lgslcblas")
+}
+
+checkLmViaGSL <- function(y, X) {
+ fun <- lmViaGSL();
+ cat("Running lm via GSL\n")
+ res <- fun(y, X)
+ print(cbind(res[[1]], sqrt(diag(res[[2]]))))
+ cat("Running lm()\n")
+ print(summary(lm(y ~ X - 1)))
+ invisible(NULL)
+}
+
+timeLmViaGSL <- function(y, X, N) {
+ fun <- lmViaGSL();
+ meantime <- mean(replicate(N, system.time(fun(y, X))["elapsed"]), trim=0.05)
+}
+
+set.seed(42)
+n <- 5000
+k <- 9
+X <- cbind( rep(1,n), matrix(rnorm(n*k), ncol=k) )
+truecoef <- 1:(k+1)
+y <- as.numeric(X %*% truecoef + rnorm(n))
+
+N <- 100
+
+#checkLmViaGSL(y, X)
+mt <- timeLmViaGSL(y, X, N)
+cat("GSL: Running", N, "simulations yields (trimmed) mean time", mt, "\n")
Property changes on: pkg/inst/examples/FastLM/fastLMviaGSL.r
___________________________________________________________________
Name: svn:executable
+ *
More information about the Rcpp-commits
mailing list