[Rcpp-commits] r4080 - in pkg/Rcpp/inst/include/Rcpp: hash sugar/functions
noreply at r-forge.r-project.org
noreply at r-forge.r-project.org
Wed Dec 5 00:46:49 CET 2012
Author: romain
Date: 2012-12-05 00:46:49 +0100 (Wed, 05 Dec 2012)
New Revision: 4080
Added:
pkg/Rcpp/inst/include/Rcpp/hash/IndexHash.h
pkg/Rcpp/inst/include/Rcpp/hash/SelfHash.h
Modified:
pkg/Rcpp/inst/include/Rcpp/hash/hash.h
pkg/Rcpp/inst/include/Rcpp/sugar/functions/self_match.h
pkg/Rcpp/inst/include/Rcpp/sugar/functions/unique.h
Log:
fixing self_match
Added: pkg/Rcpp/inst/include/Rcpp/hash/IndexHash.h
===================================================================
--- pkg/Rcpp/inst/include/Rcpp/hash/IndexHash.h (rev 0)
+++ pkg/Rcpp/inst/include/Rcpp/hash/IndexHash.h 2012-12-04 23:46:49 UTC (rev 4080)
@@ -0,0 +1,168 @@
+// -*- mode: C++; c-indent-level: 4; c-basic-offset: 4; tab-width: 4 -*-
+//
+// IndexHash.h: Rcpp R/C++ interface class library -- hashing utility, inspired
+// from Simon's fastmatch package
+//
+// Copyright (C) 2010, 2011 Simon Urbanek
+// Copyright (C) 2012 Dirk Eddelbuettel and Romain Francois
+//
+// This file is part of Rcpp.
+//
+// Rcpp is free software: you can redistribute it and/or modify it
+// under the terms of the GNU General Public License as published by
+// the Free Software Foundation, either version 2 of the License, or
+// (at your option) any later version.
+//
+// Rcpp is distributed in the hope that it will be useful, but
+// WITHOUT ANY WARRANTY; without even the implied warranty of
+// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+// GNU General Public License for more details.
+//
+// You should have received a copy of the GNU General Public License
+// along with Rcpp. If not, see <http://www.gnu.org/licenses/>.
+
+#ifndef RCPP__HASH__INDEX_HASH_H
+#define RCPP__HASH__INDEX_HASH_H
+
+namespace Rcpp{
+ namespace sugar{
+
+ #ifndef RCPP_HASH
+ #define RCPP_HASH(X) (3141592653U * ((unsigned int)(X)) >> (32 - k))
+ #endif
+
+ template <int RTYPE>
+ class IndexHash {
+ public:
+ typedef typename traits::storage_type<RTYPE>::type STORAGE ;
+ typedef Vector<RTYPE> VECTOR ;
+
+ IndexHash( SEXP table ) : n(Rf_length(table)), m(2), k(1), src( (STORAGE*)dataptr(table) ), data(), size_(0) {
+ int desired = n*2 ;
+ while( m < desired ){ m *= 2 ; k++ ; }
+ data.resize( m ) ;
+ }
+
+ inline IndexHash& fill(){
+ for( int i=0; i<n; i++) add_value(i) ;
+ return *this ;
+ }
+
+ inline LogicalVector fill_and_get_duplicated() {
+ LogicalVector result = no_init(n) ;
+ int* res = LOGICAL(result) ;
+ for( int i=0; i<n; i++) res[i] = ! add_value(i) ;
+ return result ;
+ }
+
+ template <typename T>
+ inline SEXP lookup(const T& vec) const {
+ return lookup__impl(vec, vec.size() ) ;
+ }
+
+ // use the pointers for actual (non sugar expression vectors)
+ inline SEXP lookup(const VECTOR& vec) const {
+ return lookup__impl(vec.begin(), vec.size() ) ;
+ }
+
+ inline bool contains(STORAGE val) const {
+ return get_index(val) != NA_INTEGER ;
+ }
+
+ inline int size() const {
+ return size_ ;
+ }
+
+ // keys, in the order they appear in the data
+ inline Vector<RTYPE> keys() const{
+ Vector<RTYPE> res = no_init(size_) ;
+ for( int i=0, j=0; j<size_; i++){
+ if( data[i] ) res[j++] = src[data[i]-1] ;
+ }
+ return res ;
+ }
+
+ int n, m, k ;
+ STORAGE* src ;
+ std::vector<int> data ;
+ int size_ ;
+
+ template <typename T>
+ SEXP lookup__impl(const T& vec, int n) const {
+ SEXP res = Rf_allocVector(INTSXP, n) ;
+ int *v = INTEGER(res) ;
+ for( int i=0; i<n; i++) v[i] = get_index( vec[i] ) ;
+ return res ;
+ }
+
+ bool add_value(int i){
+ RCPP_DEBUG_2( "%s::add_value(%d)", DEMANGLE(IndexHash), i )
+ STORAGE val = src[i++] ;
+ int addr = get_addr(val) ;
+ while (data[addr] && src[data[addr] - 1] != val) {
+ addr++;
+ if (addr == m) addr = 0;
+ }
+ if (!data[addr]){
+ data[addr] = i ;
+ size_++ ;
+ return true ;
+ }
+ return false;
+ }
+
+ /* NOTE: we are returning a 1-based index ! */
+ int get_index(STORAGE value) const {
+ int addr = get_addr(value) ;
+ while (data[addr]) {
+ if (src[data[addr] - 1] == value)
+ return data[addr];
+ addr++;
+ if (addr == m) addr = 0;
+ }
+ return NA_INTEGER;
+ }
+
+ // defined below
+ int get_addr(STORAGE value) const ;
+ } ;
+
+ template <>
+ inline int IndexHash<INTSXP>::get_addr(int value) const {
+ return RCPP_HASH(value) ;
+ }
+ template <>
+ inline int IndexHash<REALSXP>::get_addr(double val) const {
+ int addr;
+ union dint_u {
+ double d;
+ unsigned int u[2];
+ };
+ union dint_u val_u;
+ /* double is a bit tricky - we nave to normalize 0.0, NA and NaN */
+ if (val == 0.0) val = 0.0;
+ if (R_IsNA(val)) val = NA_REAL;
+ else if (R_IsNaN(val)) val = R_NaN;
+ val_u.d = val;
+ addr = RCPP_HASH(val_u.u[0] + val_u.u[1]);
+ return addr ;
+ }
+
+ template <>
+ inline int IndexHash<STRSXP>::get_addr(SEXP value) const {
+ intptr_t val = (intptr_t) value;
+ int addr;
+ #if (defined _LP64) || (defined __LP64__) || (defined WIN64)
+ addr = RCPP_HASH((val & 0xffffffff) ^ (val >> 32));
+ #else
+ addr = RCPP_HASH(val);
+ #endif
+ return addr ;
+ }
+
+
+} // sugar
+} // Rcpp
+
+#endif
+
Added: pkg/Rcpp/inst/include/Rcpp/hash/SelfHash.h
===================================================================
--- pkg/Rcpp/inst/include/Rcpp/hash/SelfHash.h (rev 0)
+++ pkg/Rcpp/inst/include/Rcpp/hash/SelfHash.h 2012-12-04 23:46:49 UTC (rev 4080)
@@ -0,0 +1,129 @@
+// -*- mode: C++; c-indent-level: 4; c-basic-offset: 4; tab-width: 4 -*-
+//
+// hash.h: Rcpp R/C++ interface class library -- hashing utility, inspired
+// from Simon's fastmatch package
+//
+// Copyright (C) 2010, 2011 Simon Urbanek
+// Copyright (C) 2012 Dirk Eddelbuettel and Romain Francois
+//
+// This file is part of Rcpp.
+//
+// Rcpp is free software: you can redistribute it and/or modify it
+// under the terms of the GNU General Public License as published by
+// the Free Software Foundation, either version 2 of the License, or
+// (at your option) any later version.
+//
+// Rcpp is distributed in the hope that it will be useful, but
+// WITHOUT ANY WARRANTY; without even the implied warranty of
+// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+// GNU General Public License for more details.
+//
+// You should have received a copy of the GNU General Public License
+// along with Rcpp. If not, see <http://www.gnu.org/licenses/>.
+
+#ifndef RCPP__HASH__SELF_HASH_H
+#define RCPP__HASH__SELF_HASH_H
+
+namespace Rcpp{
+namespace sugar{
+
+
+ template <int RTYPE>
+ class SelfHash {
+ public:
+ typedef typename traits::storage_type<RTYPE>::type STORAGE ;
+ typedef Vector<RTYPE> VECTOR ;
+
+ SelfHash( SEXP table ) : n(Rf_length(table)), m(2), k(1),
+ src( (STORAGE*)dataptr(table) ), data(), indices(), size_(0)
+ {
+ int desired = n*2 ;
+ while( m < desired ){ m *= 2 ; k++ ; }
+ data.resize( m ) ;
+ indices.resize( m ) ;
+ }
+
+ inline IntegerVector fill_and_self_match(){
+ IntegerVector result = no_init(n) ;
+ int* res = INTEGER(result) ;
+ for( int i=0; i<n; i++) res[i] = add_value_get_index(i) ;
+ return result ;
+ }
+
+ inline int size() const {
+ return size_ ;
+ }
+
+ int n, m, k ;
+ STORAGE* src ;
+ std::vector<int> data ;
+ std::vector<int> indices ;
+ int size_ ;
+
+ int add_value_get_index(int i){
+ STORAGE val = src[i++] ;
+ int addr = get_addr(val) ;
+ while (data[addr] && src[data[addr] - 1] != val) {
+ addr++;
+ if (addr == m) addr = 0;
+ }
+ if (!data[addr]) {
+ data[addr] = i ;
+ indices[addr] = ++size_ ;
+ }
+ return indices[addr] ;
+ }
+
+ /* NOTE: we are returning a 1-based index ! */
+ int get_index(STORAGE value) const {
+ int addr = get_addr(value) ;
+ while (data[addr]) {
+ if (src[data[addr] - 1] == value)
+ return data[addr];
+ addr++;
+ if (addr == m) addr = 0;
+ }
+ return NA_INTEGER;
+ }
+
+ // defined below
+ int get_addr(STORAGE value) const ;
+ } ;
+
+ template <>
+ inline int SelfHash<INTSXP>::get_addr(int value) const {
+ return RCPP_HASH(value) ;
+ }
+ template <>
+ inline int SelfHash<REALSXP>::get_addr(double val) const {
+ int addr;
+ union dint_u {
+ double d;
+ unsigned int u[2];
+ };
+ union dint_u val_u;
+ /* double is a bit tricky - we nave to normalize 0.0, NA and NaN */
+ if (val == 0.0) val = 0.0;
+ if (R_IsNA(val)) val = NA_REAL;
+ else if (R_IsNaN(val)) val = R_NaN;
+ val_u.d = val;
+ addr = RCPP_HASH(val_u.u[0] + val_u.u[1]);
+ return addr ;
+ }
+
+ template <>
+ inline int SelfHash<STRSXP>::get_addr(SEXP value) const {
+ intptr_t val = (intptr_t) value;
+ int addr;
+ #if (defined _LP64) || (defined __LP64__) || (defined WIN64)
+ addr = RCPP_HASH((val & 0xffffffff) ^ (val >> 32));
+ #else
+ addr = RCPP_HASH(val);
+ #endif
+ return addr ;
+ }
+
+} // sugar
+} // Rcpp
+
+#endif
Modified: pkg/Rcpp/inst/include/Rcpp/hash/hash.h
===================================================================
--- pkg/Rcpp/inst/include/Rcpp/hash/hash.h 2012-12-04 21:02:39 UTC (rev 4079)
+++ pkg/Rcpp/inst/include/Rcpp/hash/hash.h 2012-12-04 23:46:49 UTC (rev 4080)
@@ -1,9 +1,7 @@
// -*- mode: C++; c-indent-level: 4; c-basic-offset: 4; tab-width: 4 -*-
//
-// hash.h: Rcpp R/C++ interface class library -- hashing utility, inspired
-// from Simon's fastmatch package
+// hash.h: Rcpp R/C++ interface class library -- hashing
//
-// Copyright (C) 2010, 2011 Simon Urbanek
// Copyright (C) 2012 Dirk Eddelbuettel and Romain Francois
//
// This file is part of Rcpp.
@@ -24,166 +22,8 @@
#ifndef RCPP__HASH__HASH_H
#define RCPP__HASH__HASH_H
-namespace Rcpp{
- namespace sugar{
-
- #ifndef RCPP_HASH
- #define RCPP_HASH(X) (3141592653U * ((unsigned int)(X)) >> (32 - k))
- #endif
-
- template <int RTYPE>
- class IndexHash {
- public:
- typedef typename traits::storage_type<RTYPE>::type STORAGE ;
- typedef Vector<RTYPE> VECTOR ;
-
- IndexHash( SEXP table ) : n(Rf_length(table)), m(2), k(1), src( (STORAGE*)dataptr(table) ), data(), size_(0) {
- int desired = n*2 ;
- while( m < desired ){ m *= 2 ; k++ ; }
- data.resize( m ) ;
- }
-
- inline IndexHash& fill(){
- for( int i=0; i<n; i++) add_value(i) ;
- return *this ;
- }
-
- inline LogicalVector fill_and_get_duplicated() {
- LogicalVector result = no_init(n) ;
- int* res = LOGICAL(result) ;
- for( int i=0; i<n; i++) res[i] = ! add_value(i) ;
- return result ;
- }
-
- inline IntegerVector fill_and_self_match(){
- IntegerVector result = no_init(n) ;
- int* res = INTEGER(result) ;
- for( int i=0; i<n; i++) res[i] = add_value_get_index(i) ;
- return result ;
- }
-
-
- template <typename T>
- inline SEXP lookup(const T& vec) const {
- return lookup__impl(vec, vec.size() ) ;
- }
-
- // use the pointers for actual (non sugar expression vectors)
- inline SEXP lookup(const VECTOR& vec) const {
- return lookup__impl(vec.begin(), vec.size() ) ;
- }
-
- inline bool contains(STORAGE val) const {
- return get_index(val) != NA_INTEGER ;
- }
-
- inline int size() const {
- return size_ ;
- }
-
- inline Vector<RTYPE> keys() const{
- Vector<RTYPE> res = no_init(size_) ;
- for( int i=0, j=0; j<size_; i++){
- if( data[i] ) res[j++] = src[data[i]] ;
- }
- return res ;
- }
-
- int n, m, k ;
- STORAGE* src ;
- std::vector<int> data ;
- int size_ ;
-
- template <typename T>
- SEXP lookup__impl(const T& vec, int n) const {
- SEXP res = Rf_allocVector(INTSXP, n) ;
- int *v = INTEGER(res) ;
- for( int i=0; i<n; i++) v[i] = get_index( vec[i] ) ;
- return res ;
- }
-
- bool add_value(int i){
- STORAGE val = src[i++] ;
- int addr = get_addr(val) ;
- while (data[addr] && src[data[addr] - 1] != val) {
- addr++;
- if (addr == m) addr = 0;
- }
- if (!data[addr]){
- data[addr] = i ;
- size_++ ;
- return true ;
- }
- return false;
- }
-
- int add_value_get_index(int i){
- STORAGE val = src[i++] ;
- int addr = get_addr(val) ;
- while (data[addr] && src[data[addr] - 1] != val) {
- addr++;
- if (addr == m) addr = 0;
- }
- if (!data[addr]){
- data[addr] = i ;
- size_++ ;
- return i ;
- }
- return data[addr] ;
- }
-
- /* NOTE: we are returning a 1-based index ! */
- int get_index(STORAGE value) const {
- int addr = get_addr(value) ;
- while (data[addr]) {
- if (src[data[addr] - 1] == value)
- return data[addr];
- addr++;
- if (addr == m) addr = 0;
- }
- return NA_INTEGER;
- }
-
- // defined below
- int get_addr(STORAGE value) const ;
- } ;
-
- template <>
- inline int IndexHash<INTSXP>::get_addr(int value) const {
- return RCPP_HASH(value) ;
- }
- template <>
- inline int IndexHash<REALSXP>::get_addr(double val) const {
- int addr;
- union dint_u {
- double d;
- unsigned int u[2];
- };
- union dint_u val_u;
- /* double is a bit tricky - we nave to normalize 0.0, NA and NaN */
- if (val == 0.0) val = 0.0;
- if (R_IsNA(val)) val = NA_REAL;
- else if (R_IsNaN(val)) val = R_NaN;
- val_u.d = val;
- addr = RCPP_HASH(val_u.u[0] + val_u.u[1]);
- return addr ;
- }
-
- template <>
- inline int IndexHash<STRSXP>::get_addr(SEXP value) const {
- intptr_t val = (intptr_t) value;
- int addr;
- #if (defined _LP64) || (defined __LP64__) || (defined WIN64)
- addr = RCPP_HASH((val & 0xffffffff) ^ (val >> 32));
- #else
- addr = RCPP_HASH(val);
- #endif
- return addr ;
- }
+#include <Rcpp/hash/IndexHash.h>
+#include <Rcpp/hash/SelfHash.h>
-
-} // sugar
-} // Rcpp
-
#endif
Modified: pkg/Rcpp/inst/include/Rcpp/sugar/functions/self_match.h
===================================================================
--- pkg/Rcpp/inst/include/Rcpp/sugar/functions/self_match.h 2012-12-04 21:02:39 UTC (rev 4079)
+++ pkg/Rcpp/inst/include/Rcpp/sugar/functions/self_match.h 2012-12-04 23:46:49 UTC (rev 4080)
@@ -23,11 +23,52 @@
#define Rcpp__sugar__self_match_h
namespace Rcpp{
+namespace sugar{
+template <typename HASH, typename STORAGE>
+class SelfInserter {
+public:
+ SelfInserter( HASH& hash_ ) : hash(hash_), index(0) {}
+
+ inline int operator()( STORAGE value ){
+ typename HASH::iterator it = hash.find( value ) ;
+ if( it == hash.end() ){
+ hash.insert( std::make_pair(value, ++index) ) ;
+ return index ;
+ } else {
+ return it->second ;
+ }
+ }
+
+private:
+ HASH& hash ;
+ int index;
+} ;
+
+template <int RTYPE, typename TABLE_T>
+class SelfMatch {
+public:
+ typedef typename Rcpp::traits::storage_type<RTYPE>::type STORAGE ;
+
+ SelfMatch( const TABLE_T& table ): hash(), result(table.size()) {
+ std::transform( table.begin(), table.end(), result.begin(), Inserter(hash) ) ;
+ }
+
+ inline operator IntegerVector() const { return result ; }
+
+private:
+ typedef RCPP_UNORDERED_MAP<STORAGE, int> HASH ;
+ typedef SelfInserter<HASH,STORAGE> Inserter ;
+ HASH hash ;
+ IntegerVector result ;
+};
+
+} // sugar
+
template <int RTYPE, bool NA, typename T>
inline IntegerVector self_match( const VectorBase<RTYPE,NA,T>& x ){
Vector<RTYPE> vec(x) ;
- return sugar::IndexHash<RTYPE>(vec).fill_and_self_match() ;
+ return sugar::SelfHash<RTYPE>(vec).fill_and_self_match() ;
}
Modified: pkg/Rcpp/inst/include/Rcpp/sugar/functions/unique.h
===================================================================
--- pkg/Rcpp/inst/include/Rcpp/sugar/functions/unique.h 2012-12-04 21:02:39 UTC (rev 4079)
+++ pkg/Rcpp/inst/include/Rcpp/sugar/functions/unique.h 2012-12-04 23:46:49 UTC (rev 4080)
@@ -65,7 +65,8 @@
inline Vector<RTYPE> unique( const VectorBase<RTYPE,NA,T>& t ){
Vector<RTYPE> vec(t) ;
sugar::IndexHash<RTYPE> hash(vec) ;
- return hash.keys() ;
+ hash.fill() ;
+ return hash.keys() ;
}
template <int RTYPE, bool NA, typename T>
inline Vector<RTYPE> sort_unique( const VectorBase<RTYPE,NA,T>& t ){
More information about the Rcpp-commits
mailing list