[Rcpp-commits] r4278 - in pkg/RcppArmadillo: . inst inst/include inst/include/RcppArmadilloExtensions inst/unitTests inst/unitTests/cpp tests

noreply at r-forge.r-project.org noreply at r-forge.r-project.org
Tue Mar 12 03:07:44 CET 2013


Author: edd
Date: 2013-03-12 03:07:42 +0100 (Tue, 12 Mar 2013)
New Revision: 4278

Added:
   pkg/RcppArmadillo/inst/include/RcppArmadilloExtensions/
   pkg/RcppArmadillo/inst/include/RcppArmadilloExtensions/sample.h
   pkg/RcppArmadillo/inst/unitTests/cpp/
   pkg/RcppArmadillo/inst/unitTests/cpp/sample.cpp
   pkg/RcppArmadillo/inst/unitTests/runit.sample.R
Modified:
   pkg/RcppArmadillo/ChangeLog
   pkg/RcppArmadillo/DESCRIPTION
   pkg/RcppArmadillo/inst/NEWS.Rd
   pkg/RcppArmadillo/tests/doRUnit.R
Log:
committinh Christian Gunning's nice sample() contribution, including unit tests (which we reworked slightly)


Modified: pkg/RcppArmadillo/ChangeLog
===================================================================
--- pkg/RcppArmadillo/ChangeLog	2013-03-08 16:15:20 UTC (rev 4277)
+++ pkg/RcppArmadillo/ChangeLog	2013-03-12 02:07:42 UTC (rev 4278)
@@ -1,3 +1,15 @@
+2013-03-11  Dirk Eddelbuettel  <edd at debian.org>
+
+	* inst/unitTests/cpp/sample.cpp: Regrouping C++ tests for sample()
+	* inst/unitTests/runit.sample.R: Rewritten for sole external cpp file
+
+2013-03-10  Dirk Eddelbuettel  <edd at debian.org>
+
+	* inst/include/add-ons/sample.h: Added sample() function contributed
+	by Christian Gunning
+	* inst/unitTests/runit.sample.R: Added unit test for sample()
+	contributed by Christian Gunning
+
 2013-03-01  Dirk Eddelbuettel  <edd at debian.org>
 
 	* DESCRIPTION: Release 0.3.800.0

Modified: pkg/RcppArmadillo/DESCRIPTION
===================================================================
--- pkg/RcppArmadillo/DESCRIPTION	2013-03-08 16:15:20 UTC (rev 4277)
+++ pkg/RcppArmadillo/DESCRIPTION	2013-03-12 02:07:42 UTC (rev 4278)
@@ -1,7 +1,7 @@
 Package: RcppArmadillo
 Type: Package
 Title: Rcpp integration for Armadillo templated linear algebra library
-Version: 0.3.800.0
+Version: 0.3.800.0.1
 Date: $Date$
 Author: Romain Francois, Dirk Eddelbuettel and Doug Bates
 Maintainer: Dirk Eddelbuettel <edd at debian.org>

Modified: pkg/RcppArmadillo/inst/NEWS.Rd
===================================================================
--- pkg/RcppArmadillo/inst/NEWS.Rd	2013-03-08 16:15:20 UTC (rev 4277)
+++ pkg/RcppArmadillo/inst/NEWS.Rd	2013-03-12 02:07:42 UTC (rev 4278)
@@ -2,6 +2,13 @@
 \title{News for Package 'RcppArmadillo'}
 \newcommand{\cpkg}{\href{http://CRAN.R-project.org/package=#1}{\pkg{#1}}}
 
+\section{Changes in RcppArmadillo version 0.3.800.0.1 (2013-03-31)}{
+  \itemize{
+    \item UNRELEASED -- version and date subject to change
+    \item Added new sample() function contributed by Christian Gunning
+  }
+}
+
 \section{Changes in RcppArmadillo version 0.3.800.0 (2013-03-01)}{
   \itemize{
     \item Upgraded to Armadillo release Version 3.800.0 (Miami Beach)

Added: pkg/RcppArmadillo/inst/include/RcppArmadilloExtensions/sample.h
===================================================================
--- pkg/RcppArmadillo/inst/include/RcppArmadilloExtensions/sample.h	                        (rev 0)
+++ pkg/RcppArmadillo/inst/include/RcppArmadilloExtensions/sample.h	2013-03-12 02:07:42 UTC (rev 4278)
@@ -0,0 +1,168 @@
+// -*- mode: C++; c-indent-level: 4; c-basic-offset: 4; tab-width: 8 -*-
+//
+// sample.cpp: Rcpp/Armadillo equivalent to R's sample().  
+// This is to be used in C++ functions, and should *not* be called from R.
+// It should yield identical results to R in most cases
+// (note that Walker's alias method is not implemented).
+//
+// Copyright (C)  2012 -2013  Christian Gunning
+//
+// This file is part of RcppArmadillo.
+//
+// RcppArmadillo is free software: you can redistribute it and/or modify it
+// under the terms of the GNU General Public License as published by
+// the Free Software Foundation, either version 2 of the License, or
+// (at your option) any later version.
+//
+// RcppArmadillo is distributed in the hope that it will be useful, but
+// WITHOUT ANY WARRANTY; without even the implied warranty of
+// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+// GNU General Public License for more details.
+//
+// You should have received a copy of the GNU General Public License
+// along with RcppArmadillo.  If not, see <http://www.gnu.org/licenses/>.
+
+#include <RcppArmadillo.h>
+
+
+namespace Rcpp{
+
+    namespace RcppArmadillo{
+
+        //Declarations
+        void SampleReplace( IntegerVector &index, int nOrig, int size);
+        void SampleNoReplace( IntegerVector &index, int nOrig, int size);
+        void ProbSampleReplace(IntegerVector &index, int nOrig, int size, arma::vec &prob);
+        void ProbSampleNoReplace(IntegerVector &index, int nOrig, int size, arma::vec &prob);
+        void FixProb(NumericVector &prob, int size, bool replace);
+
+        template <class T> 
+        T sample(const T &x, const int size, const bool replace, NumericVector prob_ = NumericVector(0) ) {
+            // Templated sample -- should work on any Rcpp Vector
+            int ii, jj;
+            int nOrig = x.size();
+            int probsize = prob_.size();
+            // Create return object
+            T ret(size);
+            if ( size > nOrig && !replace) throw std::range_error( "Tried to sample more elements than in x without replacement" ) ;
+            // Store the sample ids here, modify in-place
+            IntegerVector index(size);
+            if (probsize == 0) { // No probabilities given
+                if (replace) {
+                    SampleReplace(index, nOrig, size);
+                } else {
+                    SampleNoReplace(index, nOrig, size);
+                }
+            } else { 
+                if (probsize != nOrig) throw std::range_error( "Number of probabilities must equal input vector length" ) ;
+                // normalize, error-check probability vector
+                // fixprob will be modified in-place
+                NumericVector fixprob = clone(prob_);
+                FixProb(fixprob, size, replace);
+                // 
+                // Copy the given probabilities into an arma vector
+                arma::vec prob(fixprob.begin(), fixprob.size());
+                if (replace) {
+                    // check for walker alias conditions here??
+                    ProbSampleReplace(index, nOrig, size, prob);
+                } else {
+                    ProbSampleNoReplace(index, nOrig, size, prob);
+                }
+            }
+            // copy the results into the return vector
+            for (ii=0; ii<size; ii++) {
+                jj = index[ii];
+                ret[ii] = x[jj];
+            }
+            return(ret);
+        }
+
+        void SampleReplace( IntegerVector &index, int nOrig, int size) {
+            int ii;
+            for (ii = 0; ii < size; ii++) {
+                index[ii] = nOrig * unif_rand();
+            }
+        }
+
+        void SampleNoReplace( IntegerVector &index, int nOrig, int size) {
+            int ii, jj;
+            IntegerVector sub(nOrig);
+            for (ii = 0; ii < nOrig; ii++) {
+                sub[ii] = ii;
+            }
+            for (ii = 0; ii < size; ii++) {
+                jj = nOrig * unif_rand();
+                index[ii] = sub[jj];
+                // replace sampled element with last, decrement
+                sub[jj] = sub[--nOrig];
+            }
+        }
+
+        void FixProb(NumericVector &prob, const int size, const bool replace) {
+            // prob is modified in-place.  
+            // Is this better than cloning it?
+            double sum = 0.0;
+            int ii, nPos = 0;
+            int nn = prob.size();
+            for (ii = 0; ii < nn; ii++) {
+                if (!R_FINITE(prob[ii])) //does this work??
+                    throw std::range_error( "NAs not allowed in probability" ) ;
+                if (prob[ii] < 0.0)
+                    throw std::range_error( "Negative probabilities not allowed" ) ;
+                if (prob[ii] > 0.0) {
+                    nPos++;
+                    sum += prob[ii];
+                }
+            }
+            if (nPos == 0 || (!replace && size > nPos)) {
+                throw std::range_error("Not enough positive probabilities");
+            }
+            prob = prob / sum;  //sugar
+        }
+
+        // Unequal probability sampling with replacement 
+        void ProbSampleReplace(IntegerVector &index, int nOrig, int size, arma::vec &prob){
+            double rU;
+            int ii, jj;
+            int nOrig_1 = nOrig - 1;
+            arma::uvec perm = arma::sort_index(prob, 1); //descending sort of index
+            prob = arma::sort(prob, 1);  // descending sort of prob
+            // cumulative probabilities 
+            prob = arma::cumsum(prob);
+            // compute the sample 
+            for (ii = 0; ii < size; ii++) {
+                rU = unif_rand();
+                for (jj = 0; jj < nOrig_1; jj++) {
+                    if (rU <= prob[jj])
+                        break;
+                }
+                index[ii] = perm[jj];
+            }
+        }
+
+        // Unequal probability sampling without replacement 
+        void ProbSampleNoReplace(IntegerVector &index, int nOrig, int size, arma::vec &prob){
+            int ii, jj, kk;
+            int nOrig_1 = nOrig - 1;
+            double rT, mass, totalmass = 1.0;
+            arma::uvec perm = arma::sort_index(prob, 1); //descending sort of index
+            prob = arma::sort(prob, 1);  // descending sort of prob
+            // compute the sample 
+            for (ii = 0; ii < size; ii++, nOrig_1--) {
+                rT = totalmass * unif_rand();
+                mass = 0;
+                for (jj = 0; jj < nOrig_1; jj++) {
+                    mass += prob[jj];
+                    if (rT <= mass)
+                        break;
+                }
+                index[ii] = perm[jj];
+                totalmass -= prob[jj];
+                for ( kk = jj; kk < nOrig_1; kk++) {
+                    prob[kk] = prob[kk+1];
+                    perm[kk] = perm[kk+1];
+                }
+            }
+        }
+    }
+}

Added: pkg/RcppArmadillo/inst/unitTests/cpp/sample.cpp
===================================================================
--- pkg/RcppArmadillo/inst/unitTests/cpp/sample.cpp	                        (rev 0)
+++ pkg/RcppArmadillo/inst/unitTests/cpp/sample.cpp	2013-03-12 02:07:42 UTC (rev 4278)
@@ -0,0 +1,68 @@
+#!/usr/bin/r -t
+#
+# Copyright (C) 2012 - 2013  Christian Gunning and Dirk Eddelbuettel
+#
+# This file is part of RcppArmadillo.
+#
+# RcppArmadillo is free software: you can redistribute it and/or modify it
+# under the terms of the GNU General Public License as published by
+# the Free Software Foundation, either version 2 of the License, or
+# (at your option) any later version.
+#
+# RcppArmadillo is distributed in the hope that it will be useful, but
+# WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+# GNU General Public License for more details.
+#
+# You should have received a copy of the GNU General Public License
+# along with RcppArmadillo.  If not, see <http://www.gnu.org/licenses/>.
+
+// [[Rcpp::depends(RcppArmadillo)]]
+#include <RcppArmadillo.h>
+
+#include <add-ons/sample.h>
+
+using namespace Rcpp;
+
+// function defns exported to R -- instantiate templated sample
+// functions are identical except for class of sampled vector and return val
+
+// [[Rcpp::export]]
+IntegerVector csample_integer( IntegerVector x, int size, bool replace, 
+			       NumericVector prob = NumericVector::create()) {
+    RNGScope scope;
+    IntegerVector ret = RcppArmadillo::sample(x, size, replace, prob);
+    return ret;
+}
+
+// [[Rcpp::export]]
+NumericVector csample_numeric( NumericVector x, int size, bool replace, 
+			       NumericVector prob = NumericVector::create()) {
+    RNGScope scope;
+    NumericVector ret = RcppArmadillo::sample(x, size, replace, prob);
+    return ret;
+}
+
+// [[Rcpp::export]]
+ComplexVector csample_complex( ComplexVector x, int size, bool replace, 
+			       NumericVector prob = NumericVector::create()) {
+    RNGScope scope;
+    ComplexVector ret = RcppArmadillo::sample(x, size, replace, prob);
+    return ret;
+}
+ 
+// [[Rcpp::export]]
+CharacterVector csample_character( CharacterVector x, int size, bool replace, 
+				   NumericVector prob = NumericVector::create()) {
+    RNGScope scope;
+    CharacterVector ret = RcppArmadillo::sample(x, size, replace, prob);
+    return ret;
+}
+
+// [[Rcpp::export]]
+LogicalVector csample_logical( LogicalVector x, int size, bool replace, 
+			       NumericVector prob = NumericVector::create()) {
+    RNGScope scope;
+    LogicalVector ret = RcppArmadillo::sample(x, size, replace, prob);
+    return ret;
+}

Added: pkg/RcppArmadillo/inst/unitTests/runit.sample.R
===================================================================
--- pkg/RcppArmadillo/inst/unitTests/runit.sample.R	                        (rev 0)
+++ pkg/RcppArmadillo/inst/unitTests/runit.sample.R	2013-03-12 02:07:42 UTC (rev 4278)
@@ -0,0 +1,92 @@
+#!/usr/bin/r -t
+#
+# Copyright (C) 2012 - 2013  Christian Gunning
+#
+# This file is part of RcppArmadillo.
+#
+# RcppArmadillo is free software: you can redistribute it and/or modify it
+# under the terms of the GNU General Public License as published by
+# the Free Software Foundation, either version 2 of the License, or
+# (at your option) any later version.
+#
+# RcppArmadillo is distributed in the hope that it will be useful, but
+# WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+# GNU General Public License for more details.
+#
+# You should have received a copy of the GNU General Public License
+# along with RcppArmadillo.  If not, see <http://www.gnu.org/licenses/>.
+
+.setUp <- function(){
+    suppressMessages(require(RcppArmadillo))
+    sourceCpp(file.path(pathRcppArmadilloTests, "cpp/sample.cpp"))
+}
+
+test.sample <- function() {
+    ## Seed needs to be reset to compare R to C++
+    seed <- 441
+
+    ## Input vectors to sample
+    N <- 100
+
+    ## Number of samples
+    ## works for size == N?!
+    size <- N%/%2
+
+    ## set up S3 dispatching,
+    ## simplifies lapply(tests, ...) below
+    csample <- function(x, ...) UseMethod("csample")
+    csample.numeric <- csample_numeric
+    csample.integer <- csample_integer
+    csample.complex <- csample_complex
+    csample.character <- csample_character
+    csample.logical <- csample_logical
+
+    ## all atomic vector classes except raw
+    ## for each list element, check that sampling works
+    ## with and without replacement, with and without prob
+    tests <- list()
+    tests <- within(tests, {
+        int <- 1:N
+        num <- (1:N)/10
+        cpx <- (1:N)/10 + 1i
+        char <-rep(letters, 4)[1:N]
+        bool <- rep(c(T,F), times=N/2)
+    })
+
+    ## Un-normalized probs
+    probs <- seq(from=0, to=1, length.out=N)
+    #probs <- probs/sum(probs)
+
+
+    ## Run the S3 generic function csample
+    ## and associated R function on each data type
+    ## ReplaceYN.ProbsYN
+    lapply(tests, function(dat) {
+        .class <- class(dat)
+        set.seed(seed)
+        ## R
+        r.no.no <- sample(dat, size, replace=F)
+        set.seed(seed)
+        r.yes.no <- sample(dat, size, replace=T)
+        set.seed(seed)
+        r.no.yes <- sample(dat, size, replace=F, prob=1:N)
+        set.seed(seed)
+        r.yes.yes <- sample(dat, size, replace=T, prob=1:N)
+        ## C
+        set.seed(seed)
+        c.no.no <- csample(dat, size, replace=F)
+        set.seed(seed)
+        c.yes.no <- csample(dat, size, replace=T)
+        set.seed(seed)
+        c.no.yes <- csample(dat, size, replace=F, prob=1:N)
+        set.seed(seed)
+        c.yes.yes <- csample(dat, size, replace=T, prob=1:N)
+        ## comparisons
+        checkEquals(r.no.no, c.no.no, msg=sprintf("sample.%s.no_replace.no_prob",.class))
+        checkEquals(r.yes.no, c.yes.no, msg=sprintf("sample.%s.replace.no_prob",.class))
+        ## the following don't pass
+        checkEquals(r.no.yes, c.no.yes, msg=sprintf("sample.%s.no_replace.prob",.class))
+        checkEquals(r.yes.yes, c.yes.yes, msg=sprintf("sample.%s.replace.prob",.class))
+    })
+}

Modified: pkg/RcppArmadillo/tests/doRUnit.R
===================================================================
--- pkg/RcppArmadillo/tests/doRUnit.R	2013-03-08 16:15:20 UTC (rev 4277)
+++ pkg/RcppArmadillo/tests/doRUnit.R	2013-03-12 02:07:42 UTC (rev 4278)
@@ -13,13 +13,14 @@
 ## ----> put the bulk of the code e.g. in  ../inst/unitTests/runTests.R :
 
 if(require("RUnit", quietly = TRUE)) {
+
     pkg <- "RcppArmadillo"
-
     require( pkg, character.only=TRUE)
-
     path <- system.file("unitTests", package = pkg)
-
     stopifnot(file.exists(path), file.info(path.expand(path))$isdir)
+    pathRcppArmadilloTests <<- system.file("unitTests", package = pkg)
+    stopifnot(file.exists(pathRcppArmadilloTests),
+              file.info(path.expand(pathRcppArmadilloTests))$isdir)
 
     ## without this, we get unit test failures
     Sys.setenv( R_TESTS = "" )



More information about the Rcpp-commits mailing list