[Rcpp-commits] r690 - pkg/inst/examples/FastLM

noreply at r-forge.r-project.org noreply at r-forge.r-project.org
Tue Feb 16 16:38:53 CET 2010


Author: edd
Date: 2010-02-16 16:38:53 +0100 (Tue, 16 Feb 2010)
New Revision: 690

Added:
   pkg/inst/examples/FastLM/lmArmadillo.R
   pkg/inst/examples/FastLM/lmGSL.R
Modified:
   pkg/inst/examples/FastLM/benchmark.r
   pkg/inst/examples/FastLM/fastLMviaArmadillo.r
   pkg/inst/examples/FastLM/fastLMviaGSL.r
Log:
actual 'worker' functions factored out into new files
wrappers for individual or benchmark use source these
wrappers updated too


Modified: pkg/inst/examples/FastLM/benchmark.r
===================================================================
--- pkg/inst/examples/FastLM/benchmark.r	2010-02-16 15:19:16 UTC (rev 689)
+++ pkg/inst/examples/FastLM/benchmark.r	2010-02-16 15:38:53 UTC (rev 690)
@@ -2,7 +2,8 @@
 #
 # Comparison benchmark
 #
-# This improves on the previous version using GNU GSL
+# This shows how Armadillo improves on the previous version using GNU GSL,
+# and how both are doing better than lm.fit()
 #
 # Copyright (C) 2010 Dirk Eddelbuettel and Romain Francois
 #
@@ -21,101 +22,9 @@
 # You should have received a copy of the GNU General Public License
 # along with Rcpp.  If not, see <http://www.gnu.org/licenses/>.
 
-suppressMessages(library(Rcpp))
-suppressMessages(library(inline))
+source("lmArmadillo.R")
+source("lmGSL.R")
 
-lmViaArmadillo <- function() {
-    src <- '
-
-    Rcpp::NumericVector yr(Ysexp);
-    Rcpp::NumericVector Xr(Xsexp);
-    std::vector<int> dims = Xr.attr("dim") ;
-    int n = dims[0];
-    int k = dims[1];
-    // use advanced armadillo constructors:
-
-    arma::mat X(Xr.begin(), n, k, false);
-    arma::colvec y(yr.begin(), yr.size());
-
-    //std::cout << ay << std::endl;
-    //std::cout << aX << std::endl;
-
-    arma::colvec coef = solve(X, y);
-    //std::cout << coef << std::endl;
-
-    // compute std. error of the coefficients
-    arma::colvec resid = y - X*coef;
-    double rss  = trans(resid)*resid;
-    double sig2 = rss/(n-k);
-    arma::mat covmat = sig2 * arma::inv(arma::trans(X)*X);
-
-    Rcpp::NumericVector coefr(k), stderrestr(k);
-    for (int i=0; i<k; i++) {
-        coefr[i]      = coef[i];
-        stderrestr[i] = sqrt(covmat(i,i));
-    }
-
-    Rcpp::Pairlist res(Rcpp::Named( "coef", coefr), Rcpp::Named( "stderr", stderrestr));
-    return res;
-    '
-
-    ## turn into a function that R can call
-    ## compileargs redundant on Debian/Ubuntu as gsl headers are found anyway
-    fun <- cfunction(signature(Ysexp="numeric", Xsexp="numeric"),
-                     src,
-                     includes="#include <armadillo>",
-                     Rcpp=TRUE,
-                     cppargs="-I/usr/include",
-                     libargs="-larmadillo")
-}
-
-lmViaGSL <- function() {
-    src <- '
-
-    RcppVectorView<double> Yr(Ysexp);
-    RcppMatrixView<double> Xr(Xsexp);
-
-    int i,j,n = Xr.dim1(), k = Xr.dim2();
-    double chisq;
-
-    gsl_matrix *X = gsl_matrix_alloc (n, k);
-    gsl_vector *y = gsl_vector_alloc (n);
-    gsl_vector *c = gsl_vector_alloc (k);
-    gsl_matrix *cov = gsl_matrix_alloc (k, k);
-    for (i = 0; i < n; i++) {
-        for (j = 0; j < k; j++)
-            gsl_matrix_set (X, i, j, Xr(i,j));
-        gsl_vector_set (y, i, Yr(i));
-    }
-
-    gsl_multifit_linear_workspace *work = gsl_multifit_linear_alloc (n, k);
-    gsl_multifit_linear (X, y, c, cov, &chisq, work);
-    gsl_multifit_linear_free (work);
-
-    Rcpp::NumericVector coefr(k), stderrestr(k);
-    for (i = 0; i < k; i++) {
-        coefr(i) = gsl_vector_get(c,i);
-        stderrestr(i) = gsl_matrix_get(cov,i,i);
-    }
-    gsl_matrix_free (X);
-    gsl_vector_free (y);
-    gsl_vector_free (c);
-    gsl_matrix_free (cov);
-
-    Rcpp::Pairlist res(Rcpp::Named( "coef", coefr), Rcpp::Named( "stderr", stderrestr));
-    return res;
-    '
-
-    ## turn into a function that R can call
-    ## compileargs redundant on Debian/Ubuntu as gsl headers are found anyway
-    fun <- cfunction(signature(Ysexp="numeric", Xsexp="numeric"),
-                     src,
-                     includes="#include <gsl/gsl_multifit.h>",
-                     Rcpp=TRUE,
-                     cppargs="-I/usr/include",
-                     libargs="-lgsl -lgslcblas")
-}
-
 set.seed(42)
 n <- 100
 k <- 9
@@ -125,8 +34,8 @@
 
 N <- 500
 
-lmgsl <- lmViaGSL()
-lmarma <- lmViaArmadillo()
+lmgsl <- lmGSL()
+lmarma <- lmArmadillo()
 
 tlm <- mean(replicate(N, system.time( lmfit <- lm(y ~ X - 1) )["elapsed"]), trim=0.05)
 tlmfit <- mean(replicate(N, system.time(lmfitfit <- lm.fit(X, y))["elapsed"]), trim=0.05)

Modified: pkg/inst/examples/FastLM/fastLMviaArmadillo.r
===================================================================
--- pkg/inst/examples/FastLM/fastLMviaArmadillo.r	2010-02-16 15:19:16 UTC (rev 689)
+++ pkg/inst/examples/FastLM/fastLMviaArmadillo.r	2010-02-16 15:38:53 UTC (rev 690)
@@ -21,95 +21,22 @@
 # You should have received a copy of the GNU General Public License
 # along with Rcpp.  If not, see <http://www.gnu.org/licenses/>.
 
-suppressMessages(library(Rcpp))
-suppressMessages(library(inline))
+source("lmArmadillo.R")
 
-lmViaArmadillo <- function() {
-    src <- '
-
-    SEXP rl = R_NilValue;
-    char *exceptionMesg = NULL;
-
-    try {
-        Rcpp::NumericVector yr(Ysexp);
-	Rcpp::NumericVector Xr(Xsexp);
-        std::vector<int> dims = Xr.attr("dim") ;
-        int n = dims[0];
-        int k = dims[1];
-        // use advanced armadillo constructors:
-
-        arma::mat X(Xr.begin(), n, k, false);
-        arma::colvec y(yr.begin(), yr.size());
-
-        //std::cout << ay << std::endl;
-        //std::cout << aX << std::endl;
-
-        arma::colvec coef = solve(X, y);
-        //std::cout << coef << std::endl;
-
-        // compute std. error of the coefficients
-        arma::colvec resid = y - X*coef;
-        double rss  = trans(resid)*resid;
-        double sig2 = rss/(n-k);
-        arma::mat covmat = sig2 * arma::inv(arma::trans(X)*X);
-        //arma::colvec stderrest = sqrt(arma::diagview(covmat));
-
-        RcppMatrix<double> CovMat(k, k);
-        RcppVector<double> Coef(k);
-        for (int i = 0; i < k; i++) {
-            Coef(i) = coef(i);
-            for (int j = 0; j < k; j++) {
-                CovMat(i,j) = covmat(i,j);
-            }
-        }
-        //Rcpp::NumericVector coefr(k);
-        //Rcpp::NumericVector stderrestr(k);
-        //for (int i=0; i<k; i++) {
-        //    coefr[i]      = coef[i];
-        //    stderrestr[i] = stderrestr[i];
-        //}
-
-        RcppResultSet rs;
-        rs.add("coef", Coef);
-        rs.add("covmat", CovMat);
-
-        rl = rs.getReturnList();
-
-    } catch(std::exception& ex) {
-        exceptionMesg = copyMessageToR(ex.what());
-    } catch(...) {
-        exceptionMesg = copyMessageToR("unknown reason");
-    }
-    if (exceptionMesg != NULL)
-        Rf_error(exceptionMesg);
-    return rl;
-    '
-
-    ## turn into a function that R can call
-    ## compileargs redundant on Debian/Ubuntu as gsl headers are found anyway
-    fun <- cfunction(signature(Ysexp="numeric", Xsexp="numeric"),
-                     src,
-                     includes="#include <armadillo>",
-                     Rcpp=TRUE,
-                     cppargs="-I/usr/include",
-                     libargs="-larmadillo")
-}
-
-checkLmViaArmadillo <- function(y, X) {
-    fun <- lmViaArmadillo()
+checkLmArmadillo <- function(y, X) {
+    fun <- lmArmadillo()
     res <- fun(y, X)
     fit <- lm(y ~ X - 1)
     rc <- all.equal( res[[1]], as.numeric(coef(fit))) &
-          all.equal( res[[2]], matrix(as.numeric(vcov(fit)),ncol=10,byrow=FALSE))
+          all.equal( res[[2]], as.numeric(coef(summary(fit))[,2]))
     invisible(rc)
 }
 
-timeLmViaArmadillo <- function(y, X, N) {
-    fun <- lmViaArmadillo();
+timeLmArmadillo <- function(y, X, N) {
+    fun <- lmArmadillo();
     meantime <- mean(replicate(N, system.time(fun(y, X))["elapsed"]), trim=0.05)
 }
 
-
 set.seed(42)
 n <- 5000
 k <- 9
@@ -119,6 +46,6 @@
 
 N <- 100
 
-stopifnot(checkLmViaArmadillo(y, X))
-mt <- timeLmViaArmadillo(y, X, N)
+stopifnot(checkLmArmadillo(y, X))
+mt <- timeLmArmadillo(y, X, N)
 cat("Armadillo: Running", N, "simulations yields (trimmed) mean time", mt, "\n")

Modified: pkg/inst/examples/FastLM/fastLMviaGSL.r
===================================================================
--- pkg/inst/examples/FastLM/fastLMviaGSL.r	2010-02-16 15:19:16 UTC (rev 689)
+++ pkg/inst/examples/FastLM/fastLMviaGSL.r	2010-02-16 15:38:53 UTC (rev 690)
@@ -22,85 +22,19 @@
 # You should have received a copy of the GNU General Public License
 # along with Rcpp.  If not, see <http://www.gnu.org/licenses/>.
 
-suppressMessages(library(Rcpp))
-suppressMessages(library(inline))
+source("lmGSL.R")
 
-lmViaGSL <- function() {
-    src <- '
-
-    SEXP rl = R_NilValue;
-    char *exceptionMesg = NULL;
-
-    try {
-        RcppMatrixView<double> Xr(Xsexp);
-        RcppVectorView<double> Yr(Ysexp);
-
-        int i,j,n = Xr.dim1(), k = Xr.dim2();
-        double chisq;
-
-        gsl_matrix *X = gsl_matrix_alloc (n, k);
-        gsl_vector *y = gsl_vector_alloc (n);
-        gsl_vector *c = gsl_vector_alloc (k);
-        gsl_matrix *cov = gsl_matrix_alloc (k, k);
-        for (i = 0; i < n; i++) {
-            for (j = 0; j < k; j++)
-                gsl_matrix_set (X, i, j, Xr(i,j));
-            gsl_vector_set (y, i, Yr(i));
-        }
-
-        gsl_multifit_linear_workspace *work = gsl_multifit_linear_alloc (n, k);
-        gsl_multifit_linear (X, y, c, cov, &chisq, work);
-        gsl_multifit_linear_free (work);
-
-        RcppMatrix<double> CovMat(k, k);
-        RcppVector<double> Coef(k);
-        for (i = 0; i < k; i++) {
-            for (j = 0; j < k; j++)
-                CovMat(i,j) = gsl_matrix_get(cov,i,j);
-            Coef(i) = gsl_vector_get(c,i);
-        }
-        gsl_matrix_free (X);
-        gsl_vector_free (y);
-        gsl_vector_free (c);
-        gsl_matrix_free (cov);
-
-        RcppResultSet rs;
-        rs.add("coef", Coef);
-        rs.add("covmat", CovMat);
-
-        rl = rs.getReturnList();
-
-    } catch(std::exception& ex) {
-        exceptionMesg = copyMessageToR(ex.what());
-    } catch(...) {
-        exceptionMesg = copyMessageToR("unknown reason");
-    }
-    if (exceptionMesg != NULL)
-        Rf_error(exceptionMesg);
-    return rl;
-    '
-
-    ## turn into a function that R can call
-    ## compileargs redundant on Debian/Ubuntu as gsl headers are found anyway
-    fun <- cfunction(signature(Ysexp="numeric", Xsexp="numeric"),
-                     src,
-                     includes="#include <gsl/gsl_multifit.h>",
-                     Rcpp=TRUE,
-                     cppargs="-I/usr/include",
-                     libargs="-lgsl -lgslcblas")
-}
-
-checkLmViaGSL <- function(y, X) {
-    fun <- lmViaGSL()
+checkLmGSL <- function(y, X) {
+    fun <- lmGSL()
     res <- fun(y, X)
     fit <- lm(y ~ X - 1)
     rc <- all.equal( res[[1]], as.numeric(coef(fit))) &
-          all.equal( res[[2]], matrix(as.numeric(vcov(fit)),ncol=10,byrow=FALSE))
+          all.equal( res[[2]], as.numeric(coef(summary(fit))[,2]))
     invisible(rc)
 }
 
-timeLmViaGSL <- function(y, X, N) {
-    fun <- lmViaGSL();
+timeLmGSL <- function(y, X, N) {
+    fun <- lmGSL();
     meantime <- mean(replicate(N, system.time(fun(y, X))["elapsed"]), trim=0.05)
 }
 
@@ -113,6 +47,6 @@
 
 N <- 100
 
-stopifnot(checkLmViaGSL(y, X))
-mt <- timeLmViaGSL(y, X, N)
+stopifnot(checkLmGSL(y, X))
+mt <- timeLmGSL(y, X, N)
 cat("GSL: Running", N, "simulations yields (trimmed) mean time", mt, "\n")

Added: pkg/inst/examples/FastLM/lmArmadillo.R
===================================================================
--- pkg/inst/examples/FastLM/lmArmadillo.R	                        (rev 0)
+++ pkg/inst/examples/FastLM/lmArmadillo.R	2010-02-16 15:38:53 UTC (rev 690)
@@ -0,0 +1,69 @@
+#
+# lm() via Armadillo -- improving on the previous GSL solution
+#
+# Copyright (C) 2010 Dirk Eddelbuettel and Romain Francois
+#
+# This file is part of Rcpp.
+#
+# Rcpp is free software: you can redistribute it and/or modify it
+# under the terms of the GNU General Public License as published by
+# the Free Software Foundation, either version 2 of the License, or
+# (at your option) any later version.
+#
+# Rcpp is distributed in the hope that it will be useful, but
+# WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+# GNU General Public License for more details.
+#
+# You should have received a copy of the GNU General Public License
+# along with Rcpp.  If not, see <http://www.gnu.org/licenses/>.
+
+suppressMessages(require(Rcpp))
+suppressMessages(require(inline))
+
+lmArmadillo <- function() {
+    src <- '
+
+    Rcpp::NumericVector yr(Ysexp);
+    Rcpp::NumericVector Xr(Xsexp);
+    std::vector<int> dims = Xr.attr("dim") ;
+    int n = dims[0];
+    int k = dims[1];
+    // use advanced armadillo constructors:
+
+    arma::mat X(Xr.begin(), n, k, false);
+    arma::colvec y(yr.begin(), yr.size());
+
+    //std::cout << ay << std::endl;
+    //std::cout << aX << std::endl;
+
+    arma::colvec coef = solve(X, y);
+    //std::cout << coef << std::endl;
+
+    // compute std. error of the coefficients
+    arma::colvec resid = y - X*coef;
+    double rss  = trans(resid)*resid;
+    double sig2 = rss/(n-k);
+    arma::mat covmat = sig2 * arma::inv(arma::trans(X)*X);
+
+    Rcpp::NumericVector coefr(k), stderrestr(k);
+    for (int i=0; i<k; i++) {
+        coefr[i]      = coef[i];
+        stderrestr[i] = sqrt(covmat(i,i));
+    }
+
+    Rcpp::Pairlist res(Rcpp::Named( "coef", coefr),
+                       Rcpp::Named( "stderr", stderrestr));
+    return res;
+    '
+
+    ## turn into a function that R can call
+    ## compileargs redundant on Debian/Ubuntu as gsl headers are found anyway
+    fun <- cfunction(signature(Ysexp="numeric", Xsexp="numeric"),
+                     src,
+                     includes="#include <armadillo>",
+                     Rcpp=TRUE,
+                     cppargs="-I/usr/include",
+                     libargs="-larmadillo")
+}
+

Added: pkg/inst/examples/FastLM/lmGSL.R
===================================================================
--- pkg/inst/examples/FastLM/lmGSL.R	                        (rev 0)
+++ pkg/inst/examples/FastLM/lmGSL.R	2010-02-16 15:38:53 UTC (rev 690)
@@ -0,0 +1,70 @@
+#
+# lm() via GNU GSL -- building on something from the Intro to HPC tutorials
+#
+# Copyright (C) 2010 Dirk Eddelbuettel and Romain Francois
+#
+# This file is part of Rcpp.
+#
+# Rcpp is free software: you can redistribute it and/or modify it
+# under the terms of the GNU General Public License as published by
+# the Free Software Foundation, either version 2 of the License, or
+# (at your option) any later version.
+#
+# Rcpp is distributed in the hope that it will be useful, but
+# WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+# GNU General Public License for more details.
+#
+# You should have received a copy of the GNU General Public License
+# along with Rcpp.  If not, see <http://www.gnu.org/licenses/>.
+
+suppressMessages(require(Rcpp))
+suppressMessages(require(inline))
+
+lmGSL <- function() {
+
+    src <- '
+
+    RcppVectorView<double> Yr(Ysexp);
+    RcppMatrixView<double> Xr(Xsexp);
+
+    int i,j,n = Xr.dim1(), k = Xr.dim2();
+    double chisq;
+
+    gsl_matrix *X = gsl_matrix_alloc (n, k);
+    gsl_vector *y = gsl_vector_alloc (n);
+    gsl_vector *c = gsl_vector_alloc (k);
+    gsl_matrix *cov = gsl_matrix_alloc (k, k);
+    for (i = 0; i < n; i++) {
+        for (j = 0; j < k; j++)
+            gsl_matrix_set (X, i, j, Xr(i,j));
+        gsl_vector_set (y, i, Yr(i));
+    }
+
+    gsl_multifit_linear_workspace *work = gsl_multifit_linear_alloc (n, k);
+    gsl_multifit_linear (X, y, c, cov, &chisq, work);
+    gsl_multifit_linear_free (work);
+
+    Rcpp::NumericVector coefr(k), stderrestr(k);
+    for (i = 0; i < k; i++) {
+        coefr(i) = gsl_vector_get(c,i);
+        stderrestr(i) = sqrt(gsl_matrix_get(cov,i,i));
+    }
+    gsl_matrix_free (X);
+    gsl_vector_free (y);
+    gsl_vector_free (c);
+    gsl_matrix_free (cov);
+
+    Rcpp::Pairlist res(Rcpp::Named( "coef", coefr), Rcpp::Named( "stderr", stderrestr));
+    return res;
+    '
+
+    ## turn into a function that R can call
+    ## compileargs redundant on Debian/Ubuntu as gsl headers are found anyway
+    fun <- cfunction(signature(Ysexp="numeric", Xsexp="numeric"),
+                     src,
+                     includes="#include <gsl/gsl_multifit.h>",
+                     Rcpp=TRUE,
+                     cppargs="-I/usr/include",
+                     libargs="-lgsl -lgslcblas")
+}



More information about the Rcpp-commits mailing list