[Rcpp-devel] Sugar seems slower than Rcpp.

Romain Francois romain at r-enthusiasts.com
Fri Jan 7 12:54:09 CET 2011


Le 05/01/11 14:01, Dirk Eddelbuettel a écrit :
>
> On 5 January 2011 at 10:55, Cedric Ginestet wrote:
> | Dear All,
> |
> | Here are some simulations that I have run this morning. Romain's suggestion to
> | compute xV.size() before the loop and Douglas' idea of using accumulate appear
> | to work best. However, both are substantially slower than the r-base function.
> |
> | I have also included two more versions: (i) one similar to Romain's but using
> | pre-incrementation in the loop and (ii) one using the iterator in the loop.
> | Another option may be to use the C++ boost library. I don't know if anyone on
> | this list has experience with using boost.
> |
> | See the results of the simulations below (N=1000 data sets).
> | Ced
> |
> | #####################################################################
> | ## Functions.
> | Summing1<- cxxfunction(signature(x="numeric"), '
> |       NumericVector xV(x);
> |       double out = sum(xV);
> |       return wrap(out);
> | ',plugin="Rcpp")
> | Summing2<- cxxfunction(signature(x="numeric"), '
> |       NumericVector xV(x);
> |       double out = 0.0;
> |       for(int i=0; i<xV.size(); i++) out += xV[i];
> |       return wrap(out);
> | ',plugin="Rcpp")
> | Summing3<- cxxfunction(signature(x="numeric"), '
> |       NumericVector xV(x);
> |       double out = 0.0; int N=xV.size();
> |       for(int i=0; i<N; i++) out += xV[i];
> |       return wrap(out);
> | ',plugin="Rcpp")
> | Summing4<- cxxfunction(signature(x="numeric"), '
> |       NumericVector xV(x);
> |       return wrap(std::accumulate(xV.begin(), xV.end(), double()));
> | ',plugin="Rcpp")
> | Summing5<- cxxfunction(signature(x="numeric"), '
> |       NumericVector xV(x);
> |       double out = 0.0; int N=xV.size();
> |       for(int i=0; i<N; ++i) out += xV[i];
> |       return wrap(out);
> | ',plugin="Rcpp")
> | Summing6<- cxxfunction(signature(x="numeric"), '
> |       NumericVector xV(x);
> |       double out = 0.0;
> |       for(NumericVector::iterator i=xV.begin(); i!=xV.end(); ++i) out += *i;
> |       return wrap(out);
> | ',plugin="Rcpp")
> |
> | #####################################################################
> | ## Simulation: Time Testing.
> | n<- 1000000; N<- 1000
> | time.Sum<- matrix(0,N,7);
> | for(i in 1:N){
> | x<- rnorm(n)
> | time.Sum[i,1]<- system.time(Summing1(x))[3];
> | time.Sum[i,2]<- system.time(Summing2(x))[3];
> | time.Sum[i,3]<- system.time(Summing3(x))[3];
> | time.Sum[i,4]<- system.time(Summing4(x))[3];
> | time.Sum[i,5]<- system.time(Summing5(x))[3];
> | time.Sum[i,6]<- system.time(Summing6(x))[3];
> | time.Sum[i,7]<- system.time(sum(x))[3];
> | }# i
> | time.df<- data.frame(time.Sum)
> | names(time.df)<- c
> | ("Sugar","Rcpp","Rcpp_N","Accumulate","Pre-increment","Iterator","R")
> | boxplot(time.df)
> |
> | #####################################################################
> | ## RESULTS:
> | formatC(summary(time.df),dec=3)
> |      Sugar                 Rcpp                Rcpp_N
> |  " Min.   :0.01600  " " Min.   :0.01000  " "Min.   :0.005000  "
> |  " 1st Qu.:0.01600  " " 1st Qu.:0.01000  " "1st Qu.:0.005000  "
> |  " Median :0.01600  " " Median :0.01100  " "Median :0.006000  "
> |  " Mean   :0.01631  " " Mean   :0.01060  " "Mean   :0.005668  "
> |  " 3rd Qu.:0.01600  " " 3rd Qu.:0.01100  " "3rd Qu.:0.006000  "
> |  " Max.   :0.03700  " " Max.   :0.02400  " "Max.   :0.020000  "
> |    Accumulate         Pre-increment           Iterator
> |  "Min.   :0.005000  " "Min.   :0.005000  " " Min.   :0.01000  "
> |  "1st Qu.:0.005000  " "1st Qu.:0.005000  " " 1st Qu.:0.01000  "
> |  "Median :0.006000  " "Median :0.006000  " " Median :0.01100  "
> |  "Mean   :0.005714  " "Mean   :0.005697  " " Mean   :0.01065  "
> |  "3rd Qu.:0.006000  " "3rd Qu.:0.006000  " " 3rd Qu.:0.01100  "
> |  "Max.   :0.029000  " "Max.   :0.021000  " " Max.   :0.03100  "
> |        R
> |  "Min.   :0.002000  "
> |  "1st Qu.:0.002000  "
> |  "Median :0.002000  "
> |  "Mean   :0.002211  "
> |  "3rd Qu.:0.002000  "
> |  "Max.   :0.004000  "
> | #####################################################################
> |
> | PS: Apologies to Dirk as I have not followed his advice, yet.
>
> Try this instead:
>
>      ## Summing1 to Summing6 as above
>
>      Summing1a<- cxxfunction(signature(x="numeric"), '
>            NumericVector xV(x);
>            double out = sum(noNA(xV));
>            return wrap(out);
>      ',plugin="Rcpp")
>
>      library(rbenchmark)
>      n<- 1000000
>      N<- 1000
>      x<- rnorm(n)
>
>      bm<- benchmark(Sugar     = Summing1(x),
>                      SugarNoNA = Summing1a(x),
>                      Rcpp      = Summing2(x),
>                      Rcpp_N    = Summing3(x),
>                      Accumulate= Summing4(x),
>                      PreIncrem = Summing5(x),
>                      Iterator  = Summing6(x),
>                      R         = function(x){ sum(x) },
>                      columns=c("test", "elapsed", "relative", "user.self", "sys.self"),
>                      order="relative",
>                      replications=N)
>      print(bm)
>
> which on my box gets this
>
>      edd at max:/tmp$ Rscript cedric.R
>      Loading required package: methods
>              test elapsed relative user.self sys.self
>      8          R   0.003     1.00      0.00        0
>      5 Accumulate   1.212   404.00      1.22        0
>      2  SugarNoNA   1.214   404.67      1.22        0
>      6  PreIncrem   1.214   404.67      1.21        0
>      4     Rcpp_N   1.215   405.00      1.21        0
>      7   Iterator   5.301  1767.00      5.30        0
>      3       Rcpp   5.302  1767.33      5.30        0
>      1      Sugar   7.229  2409.67      7.21        0
>      edd at max:/tmp$
>
> indicating that you have four equivalent versions neither on of which can go
> as fast as an R builtin goes (well, doh).
>
> Basic sugar, as we said before, gives a lot of convenience along with some
> safeties (exception checks, NA checks, ...).
>
> But you are not the first person, and surely not the last, to simply assume
> that it would also be as fast as carefully tuned and crafted code.
>
> But that ain't so -- the No Free Lunch theorem is still valid.
>
> Dirk

You can get free lunch if you are friend witrh the cook.

I commited some code (rev 2846) that makes sum faster. This is based on 
the same thing that made operators *, +, etc ... faster during christmas.

So, with this. I get:

romain at naxos /tmp $ Rscript /tmp/sum.R
Le chargement a nécessité le package : inline
Le chargement a nécessité le package : methods
Le chargement a nécessité le package : Rcpp
         test elapsed relative user.self sys.self
1      Sugar   1.005 1.000000     1.003    0.003
3     Rcpp_N   1.005 1.000000     1.002    0.003
5  PreIncrem   1.005 1.000000     1.003    0.003
4 Accumulate   1.011 1.005970     1.007    0.003
7          R   1.648 1.639801     1.643    0.005
2       Rcpp   4.827 4.802985     4.813    0.015
6   Iterator   4.827 4.802985     4.812    0.014

BTW, Dirk this line was wrong:

R         = function(x){ sum(x) },

The expression that was benchmarked was "create the function" not "call 
it", which explains why the R version was so much faster in your 
example, it did not do anything.



Romain




require(inline)
require(Rcpp)


Summing1 <- cxxfunction(signature(x="numeric"), '
       NumericVector xV(x);
       double out = sum(xV);
       return wrap(out);
',plugin="Rcpp")
Summing2 <- cxxfunction(signature(x="numeric"), '
       NumericVector xV(x);
       double out = 0.0;
       for(int i=0; i<xV.size(); i++) out += xV[i];
       return wrap(out);
',plugin="Rcpp")
Summing3 <- cxxfunction(signature(x="numeric"), '
       NumericVector xV(x);
       double out = 0.0; int N=xV.size();
       for(int i=0; i<N; i++) out += xV[i];
       return wrap(out);
',plugin="Rcpp")
Summing4 <- cxxfunction(signature(x="numeric"), '
       NumericVector xV(x);
       return wrap(std::accumulate(xV.begin(), xV.end(), double()));
',plugin="Rcpp")
Summing5 <- cxxfunction(signature(x="numeric"), '
       NumericVector xV(x);
       double out = 0.0; int N=xV.size();
       for(int i=0; i<N; ++i) out += xV[i];
       return wrap(out);
',plugin="Rcpp")
Summing6 <- cxxfunction(signature(x="numeric"), '
       NumericVector xV(x);
       double out = 0.0;
       for(NumericVector::iterator i=xV.begin(); i!=xV.end(); ++i) out 
+= *i;
       return wrap(out);
',plugin="Rcpp")

library(rbenchmark)
n <- 1000000
N <- 1000
x <- rnorm(n)

bm <- benchmark(Sugar     = Summing1(x),
                 Rcpp      = Summing2(x),
                 Rcpp_N    = Summing3(x),
                 Accumulate= Summing4(x),
                 PreIncrem = Summing5(x),
                 Iterator  = Summing6(x),
                 R         = sum(x),
                 columns=c("test", "elapsed", "relative", "user.self", 
"sys.self"),
                 order="relative",
                 replications=N)
print(bm)



-- 
Romain Francois
Professional R Enthusiast
+33(0) 6 28 91 30 30
http://romainfrancois.blog.free.fr
|- http://bit.ly/fT2rZM : highlight 0.2-5
|- http://bit.ly/gpCSpH : Evolution of Rcpp code size
`- http://bit.ly/hovakS : RcppGSL initial release




More information about the Rcpp-devel mailing list