[Rcpp-commits] r4072 - in pkg/Rcpp: . inst inst/include/Rcpp/hash inst/include/Rcpp/sugar/functions inst/include/Rcpp/vector
noreply at r-forge.r-project.org
noreply at r-forge.r-project.org
Tue Dec 4 16:16:02 CET 2012
Author: romain
Date: 2012-12-04 16:16:01 +0100 (Tue, 04 Dec 2012)
New Revision: 4072
Modified:
pkg/Rcpp/ChangeLog
pkg/Rcpp/inst/NEWS.Rd
pkg/Rcpp/inst/include/Rcpp/hash/hash.h
pkg/Rcpp/inst/include/Rcpp/sugar/functions/unique.h
pkg/Rcpp/inst/include/Rcpp/vector/Vector.h
Log:
more efficient unique, based on IndexHash
Modified: pkg/Rcpp/ChangeLog
===================================================================
--- pkg/Rcpp/ChangeLog 2012-12-04 13:55:49 UTC (rev 4071)
+++ pkg/Rcpp/ChangeLog 2012-12-04 15:16:01 UTC (rev 4072)
@@ -3,6 +3,8 @@
* include/Rcpp/hash/hash.h: new implementation of IndexHash, based on
Simon's fastmatch package
* include/Rcpp/sugar/functions/match.h: using new IndexHash
+ * include/Rcpp/vector/Vector.h: more efficiently create Vector from
+ sugar expression that are already vectors, i.e. grab the SEXP
2012-12-03 Dirk Eddelbuettel <edd at debian.org>
Modified: pkg/Rcpp/inst/NEWS.Rd
===================================================================
--- pkg/Rcpp/inst/NEWS.Rd 2012-12-04 13:55:49 UTC (rev 4071)
+++ pkg/Rcpp/inst/NEWS.Rd 2012-12-04 15:16:01 UTC (rev 4072)
@@ -12,7 +12,14 @@
patch by Yan Zhou
\item New class \code{Rcpp::String} to facilitate working with a single
element of a character vector
+ \item utility class sugar::IndexHash inspired from Simon Urbanek's fastmatch
+ package
}
+ \item Changes in Rcpp sugar:
+ \itemize{
+ \item More efficient version of \code{match} based on \code{IndexHash}
+ \item More efficient version of \code{unique} base on \code{IndexHash}
+ }
}
}
Modified: pkg/Rcpp/inst/include/Rcpp/hash/hash.h
===================================================================
--- pkg/Rcpp/inst/include/Rcpp/hash/hash.h 2012-12-04 13:55:49 UTC (rev 4071)
+++ pkg/Rcpp/inst/include/Rcpp/hash/hash.h 2012-12-04 15:16:01 UTC (rev 4072)
@@ -37,7 +37,7 @@
typedef typename traits::storage_type<RTYPE>::type STORAGE ;
typedef Vector<RTYPE> VECTOR ;
- IndexHash( SEXP table ) : m(2), k(1), src( (STORAGE*)dataptr(table) ), data() {
+ IndexHash( SEXP table ) : m(2), k(1), src( (STORAGE*)dataptr(table) ), data(), size_(0) {
int n = Rf_length(table) ;
int desired = n*2 ;
while( m < desired ){ m *= 2 ; k++ ; }
@@ -46,23 +46,39 @@
}
template <typename T>
- inline SEXP lookup(const T& vec){
+ inline SEXP lookup(const T& vec) const {
return lookup__impl(vec, vec.size() ) ;
}
// use the pointers for actual (non sugar expression vectors)
- inline SEXP lookup(const VECTOR& vec){
+ inline SEXP lookup(const VECTOR& vec) const {
return lookup__impl(vec.begin(), vec.size() ) ;
}
+ bool contains(STORAGE val) const {
+ return get_index(val) == NA_INTEGER ;
+ }
+ inline int size() const {
+ return size_ ;
+ }
+
+ inline Vector<RTYPE> keys() const{
+ Vector<RTYPE> res = no_init(size_) ;
+ for( int i=0, j=0; j<size_; i++){
+ if( data[i] ) res[j++] = src[data[i]] ;
+ }
+ return res ;
+ }
+
private:
int m, k ;
STORAGE* src ;
std::vector<int> data ;
+ int size_ ;
template <typename T>
- SEXP lookup__impl(const T& vec, int n){
+ SEXP lookup__impl(const T& vec, int n) const {
SEXP res = Rf_allocVector(INTSXP, n) ;
int *v = INTEGER(res) ;
for( int i=0; i<n; i++) v[i] = get_index( vec[i] ) ;
@@ -76,12 +92,14 @@
addr++;
if (addr == m) addr = 0;
}
- if (!data[addr])
+ if (!data[addr]){
data[addr] = i ;
+ size_++ ;
+ }
}
/* NOTE: we are returning a 1-based index ! */
- int get_index(STORAGE value){
+ int get_index(STORAGE value) const {
int addr = get_addr(value) ;
while (data[addr]) {
if (src[data[addr] - 1] == value)
@@ -93,15 +111,15 @@
}
// defined below
- int get_addr(STORAGE value) ;
+ int get_addr(STORAGE value) const ;
} ;
template <>
- inline int IndexHash<INTSXP>::get_addr(int value){
+ inline int IndexHash<INTSXP>::get_addr(int value) const {
return RCPP_HASH(value) ;
}
template <>
- inline int IndexHash<REALSXP>::get_addr(double val){
+ inline int IndexHash<REALSXP>::get_addr(double val) const {
int addr;
union dint_u {
double d;
@@ -118,7 +136,7 @@
}
template <>
- inline int IndexHash<STRSXP>::get_addr(SEXP value){
+ inline int IndexHash<STRSXP>::get_addr(SEXP value) const {
intptr_t val = (intptr_t) value;
int addr;
#if (defined _LP64) || (defined __LP64__) || (defined WIN64)
Modified: pkg/Rcpp/inst/include/Rcpp/sugar/functions/unique.h
===================================================================
--- pkg/Rcpp/inst/include/Rcpp/sugar/functions/unique.h 2012-12-04 13:55:49 UTC (rev 4071)
+++ pkg/Rcpp/inst/include/Rcpp/sugar/functions/unique.h 2012-12-04 15:16:01 UTC (rev 4072)
@@ -25,71 +25,6 @@
namespace Rcpp{
namespace sugar{
-template <int RTYPE, typename T>
-class Unique {
-public:
- typedef typename Rcpp::traits::storage_type<RTYPE>::type STORAGE ;
-
- Unique( const T& vec ) : set( vec.begin(), vec.end() ) {}
-
- Vector<RTYPE> get( ) {
- return Vector<RTYPE>( set.begin(), set.end() ) ;
- }
- Vector<RTYPE> get_sorted( ) {
- return Vector<RTYPE>( set.begin(), set.end() ).sort() ;
- }
-
-private:
-
- RCPP_UNORDERED_SET<STORAGE> set ;
-
-} ;
-
-// for a character expression
-template <typename T>
-class Unique<STRSXP,T> {
-public:
- Unique( const T& vec ) : set() {
- std::string buffer ;
- int n = vec.size() ;
- for( int i=0; i<n; i++){
- buffer = vec[i] ;
- set.insert( buffer ) ;
- }
- }
-
- CharacterVector get( ) {
- return CharacterVector( set.begin(), set.end() ) ;
- }
- CharacterVector get_sorted( ) {
- return CharacterVector( set.begin(), set.end() ).sort() ;
- }
-
-private:
-
- RCPP_UNORDERED_SET<std::string> set ;
-
-} ;
-
-// for a character vector
-template <>
-class Unique<STRSXP,CharacterVector> {
-public:
- Unique( const CharacterVector& vec ) : set( vec.begin(), vec.end() ) {}
-
- CharacterVector get( ) {
- return CharacterVector( set.begin(), set.end() ) ;
- }
-
- CharacterVector get_sorted( ) {
- return CharacterVector( set.begin(), set.end() ).sort() ;
- }
-private:
-
- RCPP_UNORDERED_SET<SEXP> set ;
-
-} ;
-
template <typename SET, typename STORAGE>
class InSet {
public:
@@ -126,11 +61,13 @@
template <int RTYPE, bool NA, typename T>
inline Vector<RTYPE> unique( const VectorBase<RTYPE,NA,T>& t ){
- return sugar::Unique<RTYPE,T>( t.get_ref() ).get() ;
+ Vector<RTYPE> vec(t) ;
+ sugar::IndexHash<RTYPE> hash(vec) ;
+ return hash.keys() ;
}
template <int RTYPE, bool NA, typename T>
inline Vector<RTYPE> sort_unique( const VectorBase<RTYPE,NA,T>& t ){
- return sugar::Unique<RTYPE,T>( t.get_ref() ).get_sorted() ;
+ return unique<RTYPE,NA,T>( t ).sort() ;
}
template <int RTYPE, bool NA, typename T, bool RHS_NA, typename RHS_T>
Modified: pkg/Rcpp/inst/include/Rcpp/vector/Vector.h
===================================================================
--- pkg/Rcpp/inst/include/Rcpp/vector/Vector.h 2012-12-04 13:55:49 UTC (rev 4071)
+++ pkg/Rcpp/inst/include/Rcpp/vector/Vector.h 2012-12-04 15:16:01 UTC (rev 4072)
@@ -134,10 +134,7 @@
template <bool NA, typename VEC>
Vector( const VectorBase<RTYPE,NA,VEC>& other ) : RObject() {
- RCPP_DEBUG_4( "Vector<%d>( VectorBase<%d,%d,%s> )", RTYPE, NA, RTYPE, DEMANGLE(VEC) ) ;
- int n = other.size() ;
- RObject::setSEXP( Rf_allocVector( RTYPE, n ) ) ;
- import_expression<VEC>( other.get_ref() , n ) ;
+ import_sugar_expression( other, typename traits::same_type<Vector,VEC>::type() ) ;
}
// should eally onlu be used for LogicalVector.
@@ -146,10 +143,25 @@
RObject::setSEXP( r_cast<RTYPE>( const_cast<sugar::SingleLogicalResult<NA,T>&>( obj ) .get_sexp() ) ) ;
}
+private:
+
+ // we are importing a real sugar expression, i.e. not a vector
+ template <bool NA, typename VEC>
+ inline void import_sugar_expression( const Rcpp::VectorBase<RTYPE,NA,VEC>& other, traits::false_type ){
+ RCPP_DEBUG_4( "Vector<%d>::import_sugar_expression( VectorBase<%d,%d,%s>, false_type )", RTYPE, NA, RTYPE, DEMANGLE(VEC) ) ;
+ int n = other.size() ;
+ RObject::setSEXP( Rf_allocVector( RTYPE, n ) ) ;
+ import_expression<VEC>( other.get_ref() , n ) ;
+ }
+ // we are imoprtung a sugar expression that actually is a vector
+ template <bool NA, typename VEC>
+ inline void import_sugar_expression( const Rcpp::VectorBase<RTYPE,NA,VEC>& other, traits::true_type ){
+ RCPP_DEBUG_4( "Vector<%d>::import_sugar_expression( VectorBase<%d,%d,%s>, true_type )", RTYPE, NA, RTYPE, DEMANGLE(VEC) ) ;
+ RObject::setSEXP( other.get_ref() ) ;
+ }
-private:
-
+
// TODO: do some dispatch when VEC == Vector so that we use std::copy
template <typename T>
inline void import_expression( const T& other, int n ){
More information about the Rcpp-commits
mailing list