[Rcpp-commits] r4075 - in pkg/Rcpp: . inst inst/include/Rcpp/hash inst/include/Rcpp/sugar/functions

noreply at r-forge.r-project.org noreply at r-forge.r-project.org
Tue Dec 4 18:05:31 CET 2012


Author: romain
Date: 2012-12-04 18:05:31 +0100 (Tue, 04 Dec 2012)
New Revision: 4075

Modified:
   pkg/Rcpp/ChangeLog
   pkg/Rcpp/inst/NEWS.Rd
   pkg/Rcpp/inst/include/Rcpp/hash/hash.h
   pkg/Rcpp/inst/include/Rcpp/sugar/functions/duplicated.h
   pkg/Rcpp/inst/include/Rcpp/sugar/functions/match.h
   pkg/Rcpp/inst/include/Rcpp/sugar/functions/unique.h
Log:
faster duplicated based on IndexHash

Modified: pkg/Rcpp/ChangeLog
===================================================================
--- pkg/Rcpp/ChangeLog	2012-12-04 15:59:15 UTC (rev 4074)
+++ pkg/Rcpp/ChangeLog	2012-12-04 17:05:31 UTC (rev 4075)
@@ -16,6 +16,8 @@
         using new IndexHash
         * include/Rcpp/sugar/functions/unique.h: more efficient version of 
         unique and in using IndexHash
+        * include/Rcpp/sugar/functions/duplictated.h: more efficient version of 
+        duplicated and in using IndexHash
         * include/Rcpp/vector/Vector.h: more efficiently create Vector from 
         sugar expression that are already vectors, i.e. grab the SEXP
         

Modified: pkg/Rcpp/inst/NEWS.Rd
===================================================================
--- pkg/Rcpp/inst/NEWS.Rd	2012-12-04 15:59:15 UTC (rev 4074)
+++ pkg/Rcpp/inst/NEWS.Rd	2012-12-04 17:05:31 UTC (rev 4075)
@@ -20,6 +20,7 @@
         \item More efficient version of \code{match} based on \code{IndexHash}
         \item More efficient version of \code{unique} base on \code{IndexHash}
         \item More efficient version of \code{in} base on \code{IndexHash}
+        \item More efficient version of \code{duplicated} base on \code{IndexHash}
     }
   }
 }

Modified: pkg/Rcpp/inst/include/Rcpp/hash/hash.h
===================================================================
--- pkg/Rcpp/inst/include/Rcpp/hash/hash.h	2012-12-04 15:59:15 UTC (rev 4074)
+++ pkg/Rcpp/inst/include/Rcpp/hash/hash.h	2012-12-04 17:05:31 UTC (rev 4075)
@@ -37,14 +37,25 @@
         typedef typename traits::storage_type<RTYPE>::type STORAGE ;
         typedef Vector<RTYPE> VECTOR ;
               
-        IndexHash( SEXP table ) : m(2), k(1), src( (STORAGE*)dataptr(table) ), data(), size_(0) {
-            int n =  Rf_length(table) ;
+        IndexHash( SEXP table ) : n(Rf_length(table)), m(2), k(1), src( (STORAGE*)dataptr(table) ), data(), size_(0) {
             int desired = n*2 ;
             while( m < desired ){ m *= 2 ; k++ ; }
             data.resize( m ) ;
-            for( int i=0; i<n; i++) add_value(i) ;    
         }
         
+        inline IndexHash& fill(){
+            for( int i=0; i<n; i++) add_value(i) ;
+            return *this ;
+        }
+        
+        inline LogicalVector fill_and_get_duplicated() { 
+            LogicalVector result = no_init(n) ;
+            int* res = LOGICAL(result) ;
+            for( int i=0; i<n; i++) res[i] = ! add_value(i) ;
+            return result ;
+        }
+    
+        
         template <typename T>
         inline SEXP lookup(const T& vec) const {
             return lookup__impl(vec, vec.size() ) ;
@@ -71,8 +82,7 @@
             return res ;
         }
         
-    private:
-        int m, k ;
+        int n, m, k ;
         STORAGE* src ;
         std::vector<int> data ;
         int size_ ;
@@ -85,7 +95,7 @@
             return res ;
         }
         
-        void add_value(int i){
+        bool add_value(int i){
             STORAGE val = src[i++] ;
             int addr = get_addr(val) ;
             while (data[addr] && src[data[addr] - 1] != val) {
@@ -95,7 +105,9 @@
             if (!data[addr]){
               data[addr] = i ;
               size_++ ;
+              return true ;
             }
+            return false;
         }
         
         /* NOTE: we are returning a 1-based index ! */

Modified: pkg/Rcpp/inst/include/Rcpp/sugar/functions/duplicated.h
===================================================================
--- pkg/Rcpp/inst/include/Rcpp/sugar/functions/duplicated.h	2012-12-04 15:59:15 UTC (rev 4074)
+++ pkg/Rcpp/inst/include/Rcpp/sugar/functions/duplicated.h	2012-12-04 17:05:31 UTC (rev 4075)
@@ -23,46 +23,12 @@
 #define Rcpp__sugar__duplicated_h
           
 namespace Rcpp{
-namespace sugar{
 
-template <typename SET, typename STORAGE>
-class DuplicatedInserter {
-public:
-    DuplicatedInserter( SET& set_ ) : set(set_) {}
-    
-    inline int operator()( STORAGE value ){
-        if( set.count(value) ) return TRUE ;
-        set.insert(value);
-        return FALSE ;
-    }
-    
-private:
-    SET& set ;
-} ; 
-
-template <int RTYPE, typename TABLE_T>        
-class Duplicated {
-public:
-    typedef typename Rcpp::traits::storage_type<RTYPE>::type STORAGE ;
-    
-    Duplicated( const TABLE_T& table ): set(), result(table.size()) {
-        std::transform( table.begin(), table.end(), result.begin(), Inserter(set) ) ;
-    }
-    
-    inline operator LogicalVector() const { return result ; }
-    
-private:
-    typedef RCPP_UNORDERED_SET<STORAGE> SET ;
-    typedef DuplicatedInserter<SET,STORAGE> Inserter ;
-    SET set ; 
-    LogicalVector result ;
-}; 
-  
-} // sugar
-
 template <int RTYPE, bool NA, typename T>
 inline LogicalVector duplicated( const VectorBase<RTYPE,NA,T>& x ){
-    return sugar::Duplicated<RTYPE,T>(x.get_ref()) ;
+    Vector<RTYPE> vec(x) ;
+    sugar::IndexHash<RTYPE> hash(vec) ;
+    return hash.fill_and_get_duplicated() ;
 }
 
 

Modified: pkg/Rcpp/inst/include/Rcpp/sugar/functions/match.h
===================================================================
--- pkg/Rcpp/inst/include/Rcpp/sugar/functions/match.h	2012-12-04 15:59:15 UTC (rev 4074)
+++ pkg/Rcpp/inst/include/Rcpp/sugar/functions/match.h	2012-12-04 17:05:31 UTC (rev 4075)
@@ -27,8 +27,7 @@
 template <int RTYPE, bool NA, typename T, bool RHS_NA, typename RHS_T>
 inline IntegerVector match( const VectorBase<RTYPE,NA,T>& x, const VectorBase<RTYPE,RHS_NA,RHS_T>& table_ ){
     Vector<RTYPE> table = table_ ;
-    sugar::IndexHash<RTYPE> hash( table ) ;
-    return hash.lookup( x.get_ref() ) ;
+    return sugar::IndexHash<RTYPE>( table ).fill().lookup( x.get_ref() ) ;
 }
 
 } // Rcpp

Modified: pkg/Rcpp/inst/include/Rcpp/sugar/functions/unique.h
===================================================================
--- pkg/Rcpp/inst/include/Rcpp/sugar/functions/unique.h	2012-12-04 15:59:15 UTC (rev 4074)
+++ pkg/Rcpp/inst/include/Rcpp/sugar/functions/unique.h	2012-12-04 17:05:31 UTC (rev 4075)
@@ -47,7 +47,9 @@
     HASH hash ;
     
 public:
-    In( const TABLE_T& table) : vec(table), hash(vec){}
+    In( const TABLE_T& table) : vec(table), hash(vec){
+        hash.fill() ;
+    }
     
     template <typename T>
     LogicalVector get( const T& x) const {



More information about the Rcpp-commits mailing list