[Rcpp-commits] r4076 - in pkg/Rcpp: . inst inst/include/Rcpp/hash inst/include/Rcpp/sugar/functions

noreply at r-forge.r-project.org noreply at r-forge.r-project.org
Tue Dec 4 18:32:46 CET 2012


Author: romain
Date: 2012-12-04 18:32:46 +0100 (Tue, 04 Dec 2012)
New Revision: 4076

Modified:
   pkg/Rcpp/ChangeLog
   pkg/Rcpp/inst/NEWS.Rd
   pkg/Rcpp/inst/include/Rcpp/hash/hash.h
   pkg/Rcpp/inst/include/Rcpp/sugar/functions/self_match.h
Log:
faster self_match

Modified: pkg/Rcpp/ChangeLog
===================================================================
--- pkg/Rcpp/ChangeLog	2012-12-04 17:05:31 UTC (rev 4075)
+++ pkg/Rcpp/ChangeLog	2012-12-04 17:32:46 UTC (rev 4076)
@@ -16,8 +16,10 @@
         using new IndexHash
         * include/Rcpp/sugar/functions/unique.h: more efficient version of 
         unique and in using IndexHash
-        * include/Rcpp/sugar/functions/duplictated.h: more efficient version of 
+        * include/Rcpp/sugar/functions/duplicated.h: more efficient version of 
         duplicated and in using IndexHash
+        * include/Rcpp/sugar/functions/self_match.h: more efficient version of 
+        self_match and in using IndexHash
         * include/Rcpp/vector/Vector.h: more efficiently create Vector from 
         sugar expression that are already vectors, i.e. grab the SEXP
         

Modified: pkg/Rcpp/inst/NEWS.Rd
===================================================================
--- pkg/Rcpp/inst/NEWS.Rd	2012-12-04 17:05:31 UTC (rev 4075)
+++ pkg/Rcpp/inst/NEWS.Rd	2012-12-04 17:32:46 UTC (rev 4076)
@@ -21,6 +21,7 @@
         \item More efficient version of \code{unique} base on \code{IndexHash}
         \item More efficient version of \code{in} base on \code{IndexHash}
         \item More efficient version of \code{duplicated} base on \code{IndexHash}
+        \item More efficient version of \code{self_match} base on \code{IndexHash}
     }
   }
 }

Modified: pkg/Rcpp/inst/include/Rcpp/hash/hash.h
===================================================================
--- pkg/Rcpp/inst/include/Rcpp/hash/hash.h	2012-12-04 17:05:31 UTC (rev 4075)
+++ pkg/Rcpp/inst/include/Rcpp/hash/hash.h	2012-12-04 17:32:46 UTC (rev 4076)
@@ -54,6 +54,13 @@
             for( int i=0; i<n; i++) res[i] = ! add_value(i) ;
             return result ;
         }
+        
+        inline IntegerVector fill_and_self_match(){
+            IntegerVector result = no_init(n) ;
+            int* res = INTEGER(result) ;
+            for( int i=0; i<n; i++) res[i] = add_value_get_index(i) ;
+            return result ;    
+        }
     
         
         template <typename T>
@@ -110,6 +117,21 @@
             return false;
         }
         
+        int add_value_get_index(int i){
+            STORAGE val = src[i++] ;
+            int addr = get_addr(val) ;
+            while (data[addr] && src[data[addr] - 1] != val) {
+              addr++;
+              if (addr == m) addr = 0;
+            }
+            if (!data[addr]){
+              data[addr] = i ;
+              size_++ ;
+              return i ;
+            }
+            return data[addr] ;
+        }
+        
         /* NOTE: we are returning a 1-based index ! */
         int get_index(STORAGE value) const {
             int addr = get_addr(value) ;

Modified: pkg/Rcpp/inst/include/Rcpp/sugar/functions/self_match.h
===================================================================
--- pkg/Rcpp/inst/include/Rcpp/sugar/functions/self_match.h	2012-12-04 17:05:31 UTC (rev 4075)
+++ pkg/Rcpp/inst/include/Rcpp/sugar/functions/self_match.h	2012-12-04 17:32:46 UTC (rev 4076)
@@ -23,51 +23,11 @@
 #define Rcpp__sugar__self_match_h
           
 namespace Rcpp{
-namespace sugar{
 
-template <typename HASH, typename STORAGE>
-class SelfInserter {
-public:
-    SelfInserter( HASH& hash_ ) : hash(hash_), index(0) {}
-    
-    inline int operator()( STORAGE value ){
-        typename HASH::iterator it = hash.find( value ) ;
-        if( it == hash.end() ){
-            hash.insert( std::make_pair(value, ++index) ) ;
-            return index ; 
-        } else {
-            return it->second ;
-        }   
-    }
-    
-private:
-    HASH& hash ;
-    int index;
-} ; 
-
-template <int RTYPE, typename TABLE_T>        
-class SelfMatch {
-public:
-    typedef typename Rcpp::traits::storage_type<RTYPE>::type STORAGE ;
-    
-    SelfMatch( const TABLE_T& table ): hash(), result(table.size()) {
-        std::transform( table.begin(), table.end(), result.begin(), Inserter(hash) ) ;
-    }
-    
-    inline operator IntegerVector() const { return result ; }
-    
-private:
-    typedef RCPP_UNORDERED_MAP<STORAGE, int> HASH ;
-    typedef SelfInserter<HASH,STORAGE> Inserter ;
-    HASH hash ; 
-    IntegerVector result ;
-}; 
-    
-} // sugar
-
 template <int RTYPE, bool NA, typename T>
 inline IntegerVector self_match( const VectorBase<RTYPE,NA,T>& x ){
-    return sugar::SelfMatch<RTYPE,T>(x.get_ref()) ;
+    Vector<RTYPE> vec(x) ;
+    return sugar::IndexHash<RTYPE>(vec).fill_and_self_match() ;
 }
 
 



More information about the Rcpp-commits mailing list