[Rcpp-commits] r690 - pkg/inst/examples/FastLM
noreply at r-forge.r-project.org
noreply at r-forge.r-project.org
Tue Feb 16 16:38:53 CET 2010
Author: edd
Date: 2010-02-16 16:38:53 +0100 (Tue, 16 Feb 2010)
New Revision: 690
Added:
pkg/inst/examples/FastLM/lmArmadillo.R
pkg/inst/examples/FastLM/lmGSL.R
Modified:
pkg/inst/examples/FastLM/benchmark.r
pkg/inst/examples/FastLM/fastLMviaArmadillo.r
pkg/inst/examples/FastLM/fastLMviaGSL.r
Log:
actual 'worker' functions factored out into new files
wrappers for individual or benchmark use source these
wrappers updated too
Modified: pkg/inst/examples/FastLM/benchmark.r
===================================================================
--- pkg/inst/examples/FastLM/benchmark.r 2010-02-16 15:19:16 UTC (rev 689)
+++ pkg/inst/examples/FastLM/benchmark.r 2010-02-16 15:38:53 UTC (rev 690)
@@ -2,7 +2,8 @@
#
# Comparison benchmark
#
-# This improves on the previous version using GNU GSL
+# This shows how Armadillo improves on the previous version using GNU GSL,
+# and how both are doing better than lm.fit()
#
# Copyright (C) 2010 Dirk Eddelbuettel and Romain Francois
#
@@ -21,101 +22,9 @@
# You should have received a copy of the GNU General Public License
# along with Rcpp. If not, see <http://www.gnu.org/licenses/>.
-suppressMessages(library(Rcpp))
-suppressMessages(library(inline))
+source("lmArmadillo.R")
+source("lmGSL.R")
-lmViaArmadillo <- function() {
- src <- '
-
- Rcpp::NumericVector yr(Ysexp);
- Rcpp::NumericVector Xr(Xsexp);
- std::vector<int> dims = Xr.attr("dim") ;
- int n = dims[0];
- int k = dims[1];
- // use advanced armadillo constructors:
-
- arma::mat X(Xr.begin(), n, k, false);
- arma::colvec y(yr.begin(), yr.size());
-
- //std::cout << ay << std::endl;
- //std::cout << aX << std::endl;
-
- arma::colvec coef = solve(X, y);
- //std::cout << coef << std::endl;
-
- // compute std. error of the coefficients
- arma::colvec resid = y - X*coef;
- double rss = trans(resid)*resid;
- double sig2 = rss/(n-k);
- arma::mat covmat = sig2 * arma::inv(arma::trans(X)*X);
-
- Rcpp::NumericVector coefr(k), stderrestr(k);
- for (int i=0; i<k; i++) {
- coefr[i] = coef[i];
- stderrestr[i] = sqrt(covmat(i,i));
- }
-
- Rcpp::Pairlist res(Rcpp::Named( "coef", coefr), Rcpp::Named( "stderr", stderrestr));
- return res;
- '
-
- ## turn into a function that R can call
- ## compileargs redundant on Debian/Ubuntu as gsl headers are found anyway
- fun <- cfunction(signature(Ysexp="numeric", Xsexp="numeric"),
- src,
- includes="#include <armadillo>",
- Rcpp=TRUE,
- cppargs="-I/usr/include",
- libargs="-larmadillo")
-}
-
-lmViaGSL <- function() {
- src <- '
-
- RcppVectorView<double> Yr(Ysexp);
- RcppMatrixView<double> Xr(Xsexp);
-
- int i,j,n = Xr.dim1(), k = Xr.dim2();
- double chisq;
-
- gsl_matrix *X = gsl_matrix_alloc (n, k);
- gsl_vector *y = gsl_vector_alloc (n);
- gsl_vector *c = gsl_vector_alloc (k);
- gsl_matrix *cov = gsl_matrix_alloc (k, k);
- for (i = 0; i < n; i++) {
- for (j = 0; j < k; j++)
- gsl_matrix_set (X, i, j, Xr(i,j));
- gsl_vector_set (y, i, Yr(i));
- }
-
- gsl_multifit_linear_workspace *work = gsl_multifit_linear_alloc (n, k);
- gsl_multifit_linear (X, y, c, cov, &chisq, work);
- gsl_multifit_linear_free (work);
-
- Rcpp::NumericVector coefr(k), stderrestr(k);
- for (i = 0; i < k; i++) {
- coefr(i) = gsl_vector_get(c,i);
- stderrestr(i) = gsl_matrix_get(cov,i,i);
- }
- gsl_matrix_free (X);
- gsl_vector_free (y);
- gsl_vector_free (c);
- gsl_matrix_free (cov);
-
- Rcpp::Pairlist res(Rcpp::Named( "coef", coefr), Rcpp::Named( "stderr", stderrestr));
- return res;
- '
-
- ## turn into a function that R can call
- ## compileargs redundant on Debian/Ubuntu as gsl headers are found anyway
- fun <- cfunction(signature(Ysexp="numeric", Xsexp="numeric"),
- src,
- includes="#include <gsl/gsl_multifit.h>",
- Rcpp=TRUE,
- cppargs="-I/usr/include",
- libargs="-lgsl -lgslcblas")
-}
-
set.seed(42)
n <- 100
k <- 9
@@ -125,8 +34,8 @@
N <- 500
-lmgsl <- lmViaGSL()
-lmarma <- lmViaArmadillo()
+lmgsl <- lmGSL()
+lmarma <- lmArmadillo()
tlm <- mean(replicate(N, system.time( lmfit <- lm(y ~ X - 1) )["elapsed"]), trim=0.05)
tlmfit <- mean(replicate(N, system.time(lmfitfit <- lm.fit(X, y))["elapsed"]), trim=0.05)
Modified: pkg/inst/examples/FastLM/fastLMviaArmadillo.r
===================================================================
--- pkg/inst/examples/FastLM/fastLMviaArmadillo.r 2010-02-16 15:19:16 UTC (rev 689)
+++ pkg/inst/examples/FastLM/fastLMviaArmadillo.r 2010-02-16 15:38:53 UTC (rev 690)
@@ -21,95 +21,22 @@
# You should have received a copy of the GNU General Public License
# along with Rcpp. If not, see <http://www.gnu.org/licenses/>.
-suppressMessages(library(Rcpp))
-suppressMessages(library(inline))
+source("lmArmadillo.R")
-lmViaArmadillo <- function() {
- src <- '
-
- SEXP rl = R_NilValue;
- char *exceptionMesg = NULL;
-
- try {
- Rcpp::NumericVector yr(Ysexp);
- Rcpp::NumericVector Xr(Xsexp);
- std::vector<int> dims = Xr.attr("dim") ;
- int n = dims[0];
- int k = dims[1];
- // use advanced armadillo constructors:
-
- arma::mat X(Xr.begin(), n, k, false);
- arma::colvec y(yr.begin(), yr.size());
-
- //std::cout << ay << std::endl;
- //std::cout << aX << std::endl;
-
- arma::colvec coef = solve(X, y);
- //std::cout << coef << std::endl;
-
- // compute std. error of the coefficients
- arma::colvec resid = y - X*coef;
- double rss = trans(resid)*resid;
- double sig2 = rss/(n-k);
- arma::mat covmat = sig2 * arma::inv(arma::trans(X)*X);
- //arma::colvec stderrest = sqrt(arma::diagview(covmat));
-
- RcppMatrix<double> CovMat(k, k);
- RcppVector<double> Coef(k);
- for (int i = 0; i < k; i++) {
- Coef(i) = coef(i);
- for (int j = 0; j < k; j++) {
- CovMat(i,j) = covmat(i,j);
- }
- }
- //Rcpp::NumericVector coefr(k);
- //Rcpp::NumericVector stderrestr(k);
- //for (int i=0; i<k; i++) {
- // coefr[i] = coef[i];
- // stderrestr[i] = stderrestr[i];
- //}
-
- RcppResultSet rs;
- rs.add("coef", Coef);
- rs.add("covmat", CovMat);
-
- rl = rs.getReturnList();
-
- } catch(std::exception& ex) {
- exceptionMesg = copyMessageToR(ex.what());
- } catch(...) {
- exceptionMesg = copyMessageToR("unknown reason");
- }
- if (exceptionMesg != NULL)
- Rf_error(exceptionMesg);
- return rl;
- '
-
- ## turn into a function that R can call
- ## compileargs redundant on Debian/Ubuntu as gsl headers are found anyway
- fun <- cfunction(signature(Ysexp="numeric", Xsexp="numeric"),
- src,
- includes="#include <armadillo>",
- Rcpp=TRUE,
- cppargs="-I/usr/include",
- libargs="-larmadillo")
-}
-
-checkLmViaArmadillo <- function(y, X) {
- fun <- lmViaArmadillo()
+checkLmArmadillo <- function(y, X) {
+ fun <- lmArmadillo()
res <- fun(y, X)
fit <- lm(y ~ X - 1)
rc <- all.equal( res[[1]], as.numeric(coef(fit))) &
- all.equal( res[[2]], matrix(as.numeric(vcov(fit)),ncol=10,byrow=FALSE))
+ all.equal( res[[2]], as.numeric(coef(summary(fit))[,2]))
invisible(rc)
}
-timeLmViaArmadillo <- function(y, X, N) {
- fun <- lmViaArmadillo();
+timeLmArmadillo <- function(y, X, N) {
+ fun <- lmArmadillo();
meantime <- mean(replicate(N, system.time(fun(y, X))["elapsed"]), trim=0.05)
}
-
set.seed(42)
n <- 5000
k <- 9
@@ -119,6 +46,6 @@
N <- 100
-stopifnot(checkLmViaArmadillo(y, X))
-mt <- timeLmViaArmadillo(y, X, N)
+stopifnot(checkLmArmadillo(y, X))
+mt <- timeLmArmadillo(y, X, N)
cat("Armadillo: Running", N, "simulations yields (trimmed) mean time", mt, "\n")
Modified: pkg/inst/examples/FastLM/fastLMviaGSL.r
===================================================================
--- pkg/inst/examples/FastLM/fastLMviaGSL.r 2010-02-16 15:19:16 UTC (rev 689)
+++ pkg/inst/examples/FastLM/fastLMviaGSL.r 2010-02-16 15:38:53 UTC (rev 690)
@@ -22,85 +22,19 @@
# You should have received a copy of the GNU General Public License
# along with Rcpp. If not, see <http://www.gnu.org/licenses/>.
-suppressMessages(library(Rcpp))
-suppressMessages(library(inline))
+source("lmGSL.R")
-lmViaGSL <- function() {
- src <- '
-
- SEXP rl = R_NilValue;
- char *exceptionMesg = NULL;
-
- try {
- RcppMatrixView<double> Xr(Xsexp);
- RcppVectorView<double> Yr(Ysexp);
-
- int i,j,n = Xr.dim1(), k = Xr.dim2();
- double chisq;
-
- gsl_matrix *X = gsl_matrix_alloc (n, k);
- gsl_vector *y = gsl_vector_alloc (n);
- gsl_vector *c = gsl_vector_alloc (k);
- gsl_matrix *cov = gsl_matrix_alloc (k, k);
- for (i = 0; i < n; i++) {
- for (j = 0; j < k; j++)
- gsl_matrix_set (X, i, j, Xr(i,j));
- gsl_vector_set (y, i, Yr(i));
- }
-
- gsl_multifit_linear_workspace *work = gsl_multifit_linear_alloc (n, k);
- gsl_multifit_linear (X, y, c, cov, &chisq, work);
- gsl_multifit_linear_free (work);
-
- RcppMatrix<double> CovMat(k, k);
- RcppVector<double> Coef(k);
- for (i = 0; i < k; i++) {
- for (j = 0; j < k; j++)
- CovMat(i,j) = gsl_matrix_get(cov,i,j);
- Coef(i) = gsl_vector_get(c,i);
- }
- gsl_matrix_free (X);
- gsl_vector_free (y);
- gsl_vector_free (c);
- gsl_matrix_free (cov);
-
- RcppResultSet rs;
- rs.add("coef", Coef);
- rs.add("covmat", CovMat);
-
- rl = rs.getReturnList();
-
- } catch(std::exception& ex) {
- exceptionMesg = copyMessageToR(ex.what());
- } catch(...) {
- exceptionMesg = copyMessageToR("unknown reason");
- }
- if (exceptionMesg != NULL)
- Rf_error(exceptionMesg);
- return rl;
- '
-
- ## turn into a function that R can call
- ## compileargs redundant on Debian/Ubuntu as gsl headers are found anyway
- fun <- cfunction(signature(Ysexp="numeric", Xsexp="numeric"),
- src,
- includes="#include <gsl/gsl_multifit.h>",
- Rcpp=TRUE,
- cppargs="-I/usr/include",
- libargs="-lgsl -lgslcblas")
-}
-
-checkLmViaGSL <- function(y, X) {
- fun <- lmViaGSL()
+checkLmGSL <- function(y, X) {
+ fun <- lmGSL()
res <- fun(y, X)
fit <- lm(y ~ X - 1)
rc <- all.equal( res[[1]], as.numeric(coef(fit))) &
- all.equal( res[[2]], matrix(as.numeric(vcov(fit)),ncol=10,byrow=FALSE))
+ all.equal( res[[2]], as.numeric(coef(summary(fit))[,2]))
invisible(rc)
}
-timeLmViaGSL <- function(y, X, N) {
- fun <- lmViaGSL();
+timeLmGSL <- function(y, X, N) {
+ fun <- lmGSL();
meantime <- mean(replicate(N, system.time(fun(y, X))["elapsed"]), trim=0.05)
}
@@ -113,6 +47,6 @@
N <- 100
-stopifnot(checkLmViaGSL(y, X))
-mt <- timeLmViaGSL(y, X, N)
+stopifnot(checkLmGSL(y, X))
+mt <- timeLmGSL(y, X, N)
cat("GSL: Running", N, "simulations yields (trimmed) mean time", mt, "\n")
Added: pkg/inst/examples/FastLM/lmArmadillo.R
===================================================================
--- pkg/inst/examples/FastLM/lmArmadillo.R (rev 0)
+++ pkg/inst/examples/FastLM/lmArmadillo.R 2010-02-16 15:38:53 UTC (rev 690)
@@ -0,0 +1,69 @@
+#
+# lm() via Armadillo -- improving on the previous GSL solution
+#
+# Copyright (C) 2010 Dirk Eddelbuettel and Romain Francois
+#
+# This file is part of Rcpp.
+#
+# Rcpp is free software: you can redistribute it and/or modify it
+# under the terms of the GNU General Public License as published by
+# the Free Software Foundation, either version 2 of the License, or
+# (at your option) any later version.
+#
+# Rcpp is distributed in the hope that it will be useful, but
+# WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+# GNU General Public License for more details.
+#
+# You should have received a copy of the GNU General Public License
+# along with Rcpp. If not, see <http://www.gnu.org/licenses/>.
+
+suppressMessages(require(Rcpp))
+suppressMessages(require(inline))
+
+lmArmadillo <- function() {
+ src <- '
+
+ Rcpp::NumericVector yr(Ysexp);
+ Rcpp::NumericVector Xr(Xsexp);
+ std::vector<int> dims = Xr.attr("dim") ;
+ int n = dims[0];
+ int k = dims[1];
+ // use advanced armadillo constructors:
+
+ arma::mat X(Xr.begin(), n, k, false);
+ arma::colvec y(yr.begin(), yr.size());
+
+ //std::cout << ay << std::endl;
+ //std::cout << aX << std::endl;
+
+ arma::colvec coef = solve(X, y);
+ //std::cout << coef << std::endl;
+
+ // compute std. error of the coefficients
+ arma::colvec resid = y - X*coef;
+ double rss = trans(resid)*resid;
+ double sig2 = rss/(n-k);
+ arma::mat covmat = sig2 * arma::inv(arma::trans(X)*X);
+
+ Rcpp::NumericVector coefr(k), stderrestr(k);
+ for (int i=0; i<k; i++) {
+ coefr[i] = coef[i];
+ stderrestr[i] = sqrt(covmat(i,i));
+ }
+
+ Rcpp::Pairlist res(Rcpp::Named( "coef", coefr),
+ Rcpp::Named( "stderr", stderrestr));
+ return res;
+ '
+
+ ## turn into a function that R can call
+ ## compileargs redundant on Debian/Ubuntu as gsl headers are found anyway
+ fun <- cfunction(signature(Ysexp="numeric", Xsexp="numeric"),
+ src,
+ includes="#include <armadillo>",
+ Rcpp=TRUE,
+ cppargs="-I/usr/include",
+ libargs="-larmadillo")
+}
+
Added: pkg/inst/examples/FastLM/lmGSL.R
===================================================================
--- pkg/inst/examples/FastLM/lmGSL.R (rev 0)
+++ pkg/inst/examples/FastLM/lmGSL.R 2010-02-16 15:38:53 UTC (rev 690)
@@ -0,0 +1,70 @@
+#
+# lm() via GNU GSL -- building on something from the Intro to HPC tutorials
+#
+# Copyright (C) 2010 Dirk Eddelbuettel and Romain Francois
+#
+# This file is part of Rcpp.
+#
+# Rcpp is free software: you can redistribute it and/or modify it
+# under the terms of the GNU General Public License as published by
+# the Free Software Foundation, either version 2 of the License, or
+# (at your option) any later version.
+#
+# Rcpp is distributed in the hope that it will be useful, but
+# WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+# GNU General Public License for more details.
+#
+# You should have received a copy of the GNU General Public License
+# along with Rcpp. If not, see <http://www.gnu.org/licenses/>.
+
+suppressMessages(require(Rcpp))
+suppressMessages(require(inline))
+
+lmGSL <- function() {
+
+ src <- '
+
+ RcppVectorView<double> Yr(Ysexp);
+ RcppMatrixView<double> Xr(Xsexp);
+
+ int i,j,n = Xr.dim1(), k = Xr.dim2();
+ double chisq;
+
+ gsl_matrix *X = gsl_matrix_alloc (n, k);
+ gsl_vector *y = gsl_vector_alloc (n);
+ gsl_vector *c = gsl_vector_alloc (k);
+ gsl_matrix *cov = gsl_matrix_alloc (k, k);
+ for (i = 0; i < n; i++) {
+ for (j = 0; j < k; j++)
+ gsl_matrix_set (X, i, j, Xr(i,j));
+ gsl_vector_set (y, i, Yr(i));
+ }
+
+ gsl_multifit_linear_workspace *work = gsl_multifit_linear_alloc (n, k);
+ gsl_multifit_linear (X, y, c, cov, &chisq, work);
+ gsl_multifit_linear_free (work);
+
+ Rcpp::NumericVector coefr(k), stderrestr(k);
+ for (i = 0; i < k; i++) {
+ coefr(i) = gsl_vector_get(c,i);
+ stderrestr(i) = sqrt(gsl_matrix_get(cov,i,i));
+ }
+ gsl_matrix_free (X);
+ gsl_vector_free (y);
+ gsl_vector_free (c);
+ gsl_matrix_free (cov);
+
+ Rcpp::Pairlist res(Rcpp::Named( "coef", coefr), Rcpp::Named( "stderr", stderrestr));
+ return res;
+ '
+
+ ## turn into a function that R can call
+ ## compileargs redundant on Debian/Ubuntu as gsl headers are found anyway
+ fun <- cfunction(signature(Ysexp="numeric", Xsexp="numeric"),
+ src,
+ includes="#include <gsl/gsl_multifit.h>",
+ Rcpp=TRUE,
+ cppargs="-I/usr/include",
+ libargs="-lgsl -lgslcblas")
+}
More information about the Rcpp-commits
mailing list