[Rcpp-commits] r4075 - in pkg/Rcpp: . inst inst/include/Rcpp/hash inst/include/Rcpp/sugar/functions
noreply at r-forge.r-project.org
noreply at r-forge.r-project.org
Tue Dec 4 18:05:31 CET 2012
Author: romain
Date: 2012-12-04 18:05:31 +0100 (Tue, 04 Dec 2012)
New Revision: 4075
Modified:
pkg/Rcpp/ChangeLog
pkg/Rcpp/inst/NEWS.Rd
pkg/Rcpp/inst/include/Rcpp/hash/hash.h
pkg/Rcpp/inst/include/Rcpp/sugar/functions/duplicated.h
pkg/Rcpp/inst/include/Rcpp/sugar/functions/match.h
pkg/Rcpp/inst/include/Rcpp/sugar/functions/unique.h
Log:
faster duplicated based on IndexHash
Modified: pkg/Rcpp/ChangeLog
===================================================================
--- pkg/Rcpp/ChangeLog 2012-12-04 15:59:15 UTC (rev 4074)
+++ pkg/Rcpp/ChangeLog 2012-12-04 17:05:31 UTC (rev 4075)
@@ -16,6 +16,8 @@
using new IndexHash
* include/Rcpp/sugar/functions/unique.h: more efficient version of
unique and in using IndexHash
+ * include/Rcpp/sugar/functions/duplictated.h: more efficient version of
+ duplicated and in using IndexHash
* include/Rcpp/vector/Vector.h: more efficiently create Vector from
sugar expression that are already vectors, i.e. grab the SEXP
Modified: pkg/Rcpp/inst/NEWS.Rd
===================================================================
--- pkg/Rcpp/inst/NEWS.Rd 2012-12-04 15:59:15 UTC (rev 4074)
+++ pkg/Rcpp/inst/NEWS.Rd 2012-12-04 17:05:31 UTC (rev 4075)
@@ -20,6 +20,7 @@
\item More efficient version of \code{match} based on \code{IndexHash}
\item More efficient version of \code{unique} base on \code{IndexHash}
\item More efficient version of \code{in} base on \code{IndexHash}
+ \item More efficient version of \code{duplicated} base on \code{IndexHash}
}
}
}
Modified: pkg/Rcpp/inst/include/Rcpp/hash/hash.h
===================================================================
--- pkg/Rcpp/inst/include/Rcpp/hash/hash.h 2012-12-04 15:59:15 UTC (rev 4074)
+++ pkg/Rcpp/inst/include/Rcpp/hash/hash.h 2012-12-04 17:05:31 UTC (rev 4075)
@@ -37,14 +37,25 @@
typedef typename traits::storage_type<RTYPE>::type STORAGE ;
typedef Vector<RTYPE> VECTOR ;
- IndexHash( SEXP table ) : m(2), k(1), src( (STORAGE*)dataptr(table) ), data(), size_(0) {
- int n = Rf_length(table) ;
+ IndexHash( SEXP table ) : n(Rf_length(table)), m(2), k(1), src( (STORAGE*)dataptr(table) ), data(), size_(0) {
int desired = n*2 ;
while( m < desired ){ m *= 2 ; k++ ; }
data.resize( m ) ;
- for( int i=0; i<n; i++) add_value(i) ;
}
+ inline IndexHash& fill(){
+ for( int i=0; i<n; i++) add_value(i) ;
+ return *this ;
+ }
+
+ inline LogicalVector fill_and_get_duplicated() {
+ LogicalVector result = no_init(n) ;
+ int* res = LOGICAL(result) ;
+ for( int i=0; i<n; i++) res[i] = ! add_value(i) ;
+ return result ;
+ }
+
+
template <typename T>
inline SEXP lookup(const T& vec) const {
return lookup__impl(vec, vec.size() ) ;
@@ -71,8 +82,7 @@
return res ;
}
- private:
- int m, k ;
+ int n, m, k ;
STORAGE* src ;
std::vector<int> data ;
int size_ ;
@@ -85,7 +95,7 @@
return res ;
}
- void add_value(int i){
+ bool add_value(int i){
STORAGE val = src[i++] ;
int addr = get_addr(val) ;
while (data[addr] && src[data[addr] - 1] != val) {
@@ -95,7 +105,9 @@
if (!data[addr]){
data[addr] = i ;
size_++ ;
+ return true ;
}
+ return false;
}
/* NOTE: we are returning a 1-based index ! */
Modified: pkg/Rcpp/inst/include/Rcpp/sugar/functions/duplicated.h
===================================================================
--- pkg/Rcpp/inst/include/Rcpp/sugar/functions/duplicated.h 2012-12-04 15:59:15 UTC (rev 4074)
+++ pkg/Rcpp/inst/include/Rcpp/sugar/functions/duplicated.h 2012-12-04 17:05:31 UTC (rev 4075)
@@ -23,46 +23,12 @@
#define Rcpp__sugar__duplicated_h
namespace Rcpp{
-namespace sugar{
-template <typename SET, typename STORAGE>
-class DuplicatedInserter {
-public:
- DuplicatedInserter( SET& set_ ) : set(set_) {}
-
- inline int operator()( STORAGE value ){
- if( set.count(value) ) return TRUE ;
- set.insert(value);
- return FALSE ;
- }
-
-private:
- SET& set ;
-} ;
-
-template <int RTYPE, typename TABLE_T>
-class Duplicated {
-public:
- typedef typename Rcpp::traits::storage_type<RTYPE>::type STORAGE ;
-
- Duplicated( const TABLE_T& table ): set(), result(table.size()) {
- std::transform( table.begin(), table.end(), result.begin(), Inserter(set) ) ;
- }
-
- inline operator LogicalVector() const { return result ; }
-
-private:
- typedef RCPP_UNORDERED_SET<STORAGE> SET ;
- typedef DuplicatedInserter<SET,STORAGE> Inserter ;
- SET set ;
- LogicalVector result ;
-};
-
-} // sugar
-
template <int RTYPE, bool NA, typename T>
inline LogicalVector duplicated( const VectorBase<RTYPE,NA,T>& x ){
- return sugar::Duplicated<RTYPE,T>(x.get_ref()) ;
+ Vector<RTYPE> vec(x) ;
+ sugar::IndexHash<RTYPE> hash(vec) ;
+ return hash.fill_and_get_duplicated() ;
}
Modified: pkg/Rcpp/inst/include/Rcpp/sugar/functions/match.h
===================================================================
--- pkg/Rcpp/inst/include/Rcpp/sugar/functions/match.h 2012-12-04 15:59:15 UTC (rev 4074)
+++ pkg/Rcpp/inst/include/Rcpp/sugar/functions/match.h 2012-12-04 17:05:31 UTC (rev 4075)
@@ -27,8 +27,7 @@
template <int RTYPE, bool NA, typename T, bool RHS_NA, typename RHS_T>
inline IntegerVector match( const VectorBase<RTYPE,NA,T>& x, const VectorBase<RTYPE,RHS_NA,RHS_T>& table_ ){
Vector<RTYPE> table = table_ ;
- sugar::IndexHash<RTYPE> hash( table ) ;
- return hash.lookup( x.get_ref() ) ;
+ return sugar::IndexHash<RTYPE>( table ).fill().lookup( x.get_ref() ) ;
}
} // Rcpp
Modified: pkg/Rcpp/inst/include/Rcpp/sugar/functions/unique.h
===================================================================
--- pkg/Rcpp/inst/include/Rcpp/sugar/functions/unique.h 2012-12-04 15:59:15 UTC (rev 4074)
+++ pkg/Rcpp/inst/include/Rcpp/sugar/functions/unique.h 2012-12-04 17:05:31 UTC (rev 4075)
@@ -47,7 +47,9 @@
HASH hash ;
public:
- In( const TABLE_T& table) : vec(table), hash(vec){}
+ In( const TABLE_T& table) : vec(table), hash(vec){
+ hash.fill() ;
+ }
template <typename T>
LogicalVector get( const T& x) const {
More information about the Rcpp-commits
mailing list