[Rcpp-commits] r685 - in pkg: . inst inst/examples inst/examples/FastLM

noreply at r-forge.r-project.org noreply at r-forge.r-project.org
Mon Feb 15 21:31:07 CET 2010


Author: edd
Date: 2010-02-15 21:31:06 +0100 (Mon, 15 Feb 2010)
New Revision: 685

Added:
   pkg/inst/examples/FastLM/
   pkg/inst/examples/FastLM/fastLMviaArmadillo.r
   pkg/inst/examples/FastLM/fastLMviaGSL.r
Modified:
   pkg/NEWS
   pkg/inst/ChangeLog
Log:
new example for 'fast lm' using, respectively, GSL and Armadillo


Modified: pkg/NEWS
===================================================================
--- pkg/NEWS	2010-02-15 15:23:43 UTC (rev 684)
+++ pkg/NEWS	2010-02-15 20:31:06 UTC (rev 685)
@@ -1,3 +1,10 @@
+0.7.8   (under development)
+
+    o	New examples for 'fast lm' using compiled code: 
+        - using GNU GSL and a C interface
+        - using Armadillo (http://arma.sf.net) and a C++ interface
+        Armadillo is seen as faster for lack of extra copying
+
 0.7.7	2010-02-14
 
     o	new template classes Rcpp::unary_call and Rcpp::binary_call

Modified: pkg/inst/ChangeLog
===================================================================
--- pkg/inst/ChangeLog	2010-02-15 15:23:43 UTC (rev 684)
+++ pkg/inst/ChangeLog	2010-02-15 20:31:06 UTC (rev 685)
@@ -1,6 +1,12 @@
+2010-02-15  Dirk Eddelbuettel  <edd at debian.org>
+
+	* inst/examples/FastLM: New example directory with two 'fast lm'
+	  implementations using, respectively, GNU GSL (in C) and Armadillo
+	  (in C++); Armadillo is seen as faster for lack of extra copying
+
 2010-02-14  Romain Francois <romain at r-enthusiasts.com>
 
-	* src/Rcpp/Language.h: accepting Function in unary_call and 
+	* src/Rcpp/Language.h: accepting Function in unary_call and
 	binary_call to support STL algorithms using R functions.
 
 2010-02-14  Dirk Eddelbuettel  <edd at debian.org>

Added: pkg/inst/examples/FastLM/fastLMviaArmadillo.r
===================================================================
--- pkg/inst/examples/FastLM/fastLMviaArmadillo.r	                        (rev 0)
+++ pkg/inst/examples/FastLM/fastLMviaArmadillo.r	2010-02-15 20:31:06 UTC (rev 685)
@@ -0,0 +1,126 @@
+#!/usr/bin/r -t
+#
+# A faster lm() replacement based on Armadillo
+#
+# This improves on the previous version using GNU GSL
+#
+# Copyright (C) 2010 Dirk Eddelbuettel and Romain Francois
+#
+# This file is part of Rcpp.
+#
+# Rcpp is free software: you can redistribute it and/or modify it
+# under the terms of the GNU General Public License as published by
+# the Free Software Foundation, either version 2 of the License, or
+# (at your option) any later version.
+#
+# Rcpp is distributed in the hope that it will be useful, but
+# WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+# GNU General Public License for more details.
+#
+# You should have received a copy of the GNU General Public License
+# along with Rcpp.  If not, see <http://www.gnu.org/licenses/>.
+
+suppressMessages(library(Rcpp))
+suppressMessages(library(inline))
+
+lmViaArmadillo <- function() {
+    ## a really simple C program calling three functions from the GSL
+    src <- '
+
+    SEXP rl = R_NilValue;
+    char *exceptionMesg = NULL;
+
+    try {
+        Rcpp::NumericVector yr(Ysexp);
+	Rcpp::NumericVector Xr(Xsexp);
+        std::vector<int> dims = Xr.attr("dim") ;
+        int n = dims[0];
+        int k = dims[1];
+        // use advanced armadillo constructors:
+
+        arma::mat X(Xr.begin(), n, k, false);
+        arma::colvec y(yr.begin(), yr.size());
+
+        //std::cout << ay << std::endl;
+        //std::cout << aX << std::endl;
+
+        arma::colvec coef = solve(X, y);
+        //std::cout << coef << std::endl;
+
+        // compute std. error of the coefficients
+        arma::colvec resid = y - X*coef;
+        double rss  = trans(resid)*resid;
+        double sig2 = rss/(n-k);
+        arma::mat covmat = sig2 * arma::inv(arma::trans(X)*X);
+        //arma::colvec stderrest = sqrt(arma::diagview(covmat));
+
+        RcppMatrix<double> CovMat(k, k);
+        RcppVector<double> Coef(k);
+        for (int i = 0; i < k; i++) {
+            Coef(i) = coef(i);
+            for (int j = 0; j < k; j++) {
+                CovMat(i,j) = covmat(i,j);
+            }
+        }
+        //Rcpp::NumericVector coefr(k);
+        //Rcpp::NumericVector stderrestr(k);
+        //for (int i=0; i<k; i++) {
+        //    coefr[i]      = coef[i];
+        //    stderrestr[i] = stderrestr[i];
+        //}
+
+        RcppResultSet rs;
+        rs.add("coef", Coef);
+        rs.add("covmat", CovMat);
+
+        rl = rs.getReturnList();
+
+    } catch(std::exception& ex) {
+        exceptionMesg = copyMessageToR(ex.what());
+    } catch(...) {
+        exceptionMesg = copyMessageToR("unknown reason");
+    }
+    if (exceptionMesg != NULL)
+        Rf_error(exceptionMesg);
+    return rl;
+    '
+
+    ## turn into a function that R can call
+    ## compileargs redundant on Debian/Ubuntu as gsl headers are found anyway
+    fun <- cfunction(signature(Ysexp="numeric", Xsexp="numeric"),
+                     src,
+                     includes="#include <armadillo>",
+                     Rcpp=TRUE,
+                     cppargs="-I/usr/include",
+                     libargs="-larmadillo")
+}
+
+checkLmViaArmadillo <- function(y, X) {
+    fun <- lmViaArmadillo();
+    cat("Running lm via Armadillo\n")
+    res <- fun(y, X)
+    print(cbind(res[[1]], sqrt(diag(res[[2]]))))
+    cat("Running lm()\n")
+    print(summary(lm(y ~ X - 1)))
+    invisible(NULL)
+}
+
+timeLmViaArmadillo <- function(y, X, N) {
+    fun <- lmViaArmadillo();
+    meantime <- mean(replicate(N, system.time(fun(y, X))["elapsed"]), trim=0.05)
+}
+
+
+set.seed(42)
+n <- 5000
+k <- 9
+X <- cbind( rep(1,n), matrix(rnorm(n*k), ncol=k) )
+truecoef <- 1:(k+1)
+y <- as.numeric(X %*% truecoef + rnorm(n))
+
+N <- 100
+
+#checkLmViaArmadillo(y, X)
+mt <- timeLmViaArmadillo(y, X, N)
+cat("Armadillo: Running", N, "simulations yields (trimmed) mean time", mt, "\n")


Property changes on: pkg/inst/examples/FastLM/fastLMviaArmadillo.r
___________________________________________________________________
Name: svn:executable
   + *

Added: pkg/inst/examples/FastLM/fastLMviaGSL.r
===================================================================
--- pkg/inst/examples/FastLM/fastLMviaGSL.r	                        (rev 0)
+++ pkg/inst/examples/FastLM/fastLMviaGSL.r	2010-02-15 20:31:06 UTC (rev 685)
@@ -0,0 +1,120 @@
+#!/usr/bin/r -t
+#
+# A faster lm() replacement based on GNU GSL
+#
+# This first appeared in the 'Intro to HPC tutorials'
+# but has been wrapped in inline::cfunction() here
+#
+# Copyright (C) 2010 Dirk Eddelbuettel and Romain Francois
+#
+# This file is part of Rcpp.
+#
+# Rcpp is free software: you can redistribute it and/or modify it
+# under the terms of the GNU General Public License as published by
+# the Free Software Foundation, either version 2 of the License, or
+# (at your option) any later version.
+#
+# Rcpp is distributed in the hope that it will be useful, but
+# WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+# GNU General Public License for more details.
+#
+# You should have received a copy of the GNU General Public License
+# along with Rcpp.  If not, see <http://www.gnu.org/licenses/>.
+
+suppressMessages(library(Rcpp))
+suppressMessages(library(inline))
+
+lmViaGSL <- function() {
+    ## a really simple C program calling three functions from the GSL
+    src <- '
+
+    SEXP rl = R_NilValue;
+    char *exceptionMesg = NULL;
+
+    try {
+        RcppMatrixView<double> Xr(Xsexp);
+        RcppVectorView<double> Yr(Ysexp);
+
+        int i,j,n = Xr.dim1(), k = Xr.dim2();
+        double chisq;
+
+        gsl_matrix *X = gsl_matrix_alloc (n, k);
+        gsl_vector *y = gsl_vector_alloc (n);
+        gsl_vector *c = gsl_vector_alloc (k);
+        gsl_matrix *cov = gsl_matrix_alloc (k, k);
+        for (i = 0; i < n; i++) {
+            for (j = 0; j < k; j++)
+                gsl_matrix_set (X, i, j, Xr(i,j));
+            gsl_vector_set (y, i, Yr(i));
+        }
+
+        gsl_multifit_linear_workspace *work = gsl_multifit_linear_alloc (n, k);
+        gsl_multifit_linear (X, y, c, cov, &chisq, work);
+        gsl_multifit_linear_free (work);
+
+        RcppMatrix<double> CovMat(k, k);
+        RcppVector<double> Coef(k);
+        for (i = 0; i < k; i++) {
+            for (j = 0; j < k; j++)
+                CovMat(i,j) = gsl_matrix_get(cov,i,j);
+            Coef(i) = gsl_vector_get(c,i);
+        }
+        gsl_matrix_free (X);
+        gsl_vector_free (y);
+        gsl_vector_free (c);
+        gsl_matrix_free (cov);
+
+        RcppResultSet rs;
+        rs.add("coef", Coef);
+        rs.add("covmat", CovMat);
+
+        rl = rs.getReturnList();
+
+    } catch(std::exception& ex) {
+        exceptionMesg = copyMessageToR(ex.what());
+    } catch(...) {
+        exceptionMesg = copyMessageToR("unknown reason");
+    }
+    if (exceptionMesg != NULL)
+        Rf_error(exceptionMesg);
+    return rl;
+    '
+
+    ## turn into a function that R can call
+    ## compileargs redundant on Debian/Ubuntu as gsl headers are found anyway
+    fun <- cfunction(signature(Ysexp="numeric", Xsexp="numeric"),
+                     src,
+                     includes="#include <gsl/gsl_multifit.h>",
+                     Rcpp=TRUE,
+                     cppargs="-I/usr/include",
+                     libargs="-lgsl -lgslcblas")
+}
+
+checkLmViaGSL <- function(y, X) {
+    fun <- lmViaGSL();
+    cat("Running lm via GSL\n")
+    res <- fun(y, X)
+    print(cbind(res[[1]], sqrt(diag(res[[2]]))))
+    cat("Running lm()\n")
+    print(summary(lm(y ~ X - 1)))
+    invisible(NULL)
+}
+
+timeLmViaGSL <- function(y, X, N) {
+    fun <- lmViaGSL();
+    meantime <- mean(replicate(N, system.time(fun(y, X))["elapsed"]), trim=0.05)
+}
+
+set.seed(42)
+n <- 5000
+k <- 9
+X <- cbind( rep(1,n), matrix(rnorm(n*k), ncol=k) )
+truecoef <- 1:(k+1)
+y <- as.numeric(X %*% truecoef + rnorm(n))
+
+N <- 100
+
+#checkLmViaGSL(y, X)
+mt <- timeLmViaGSL(y, X, N)
+cat("GSL: Running", N, "simulations yields (trimmed) mean time", mt, "\n")


Property changes on: pkg/inst/examples/FastLM/fastLMviaGSL.r
___________________________________________________________________
Name: svn:executable
   + *



More information about the Rcpp-commits mailing list