[Rcpp-commits] r4071 - in pkg/Rcpp: . inst/include/Rcpp/hash src
noreply at r-forge.r-project.org
noreply at r-forge.r-project.org
Tue Dec 4 14:55:49 CET 2012
Author: romain
Date: 2012-12-04 14:55:49 +0100 (Tue, 04 Dec 2012)
New Revision: 4071
Removed:
pkg/Rcpp/inst/include/Rcpp/hash/hash_impl.h
pkg/Rcpp/src/fastmatch.c
Modified:
pkg/Rcpp/ChangeLog
pkg/Rcpp/inst/include/Rcpp/hash/hash.h
Log:
C++ify hash code from Simon
Modified: pkg/Rcpp/ChangeLog
===================================================================
--- pkg/Rcpp/ChangeLog 2012-12-04 03:22:05 UTC (rev 4070)
+++ pkg/Rcpp/ChangeLog 2012-12-04 13:55:49 UTC (rev 4071)
@@ -2,10 +2,8 @@
* include/Rcpp/hash/hash.h: new implementation of IndexHash, based on
Simon's fastmatch package
- * include/Rcpp/hash/hash_impl.h: low level implementation details
* include/Rcpp/sugar/functions/match.h: using new IndexHash
- * src/fastmatch.c : largely inspired from Simon's fastmatch
-
+
2012-12-03 Dirk Eddelbuettel <edd at debian.org>
* inst/include/RcppCommon.h: Applied patch kindly contributed by Yan
Modified: pkg/Rcpp/inst/include/Rcpp/hash/hash.h
===================================================================
--- pkg/Rcpp/inst/include/Rcpp/hash/hash.h 2012-12-04 03:22:05 UTC (rev 4070)
+++ pkg/Rcpp/inst/include/Rcpp/hash/hash.h 2012-12-04 13:55:49 UTC (rev 4071)
@@ -1,7 +1,9 @@
// -*- mode: C++; c-indent-level: 4; c-basic-offset: 4; tab-width: 4 -*-
//
-// hash.h: Rcpp R/C++ interface class library -- hashing
+// hash.h: Rcpp R/C++ interface class library -- hashing utility, inspired
+// from Simon's fastmatch package
//
+// Copyright (C) 2010, 2011 Simon Urbanek
// Copyright (C) 2012 Dirk Eddelbuettel and Romain Francois
//
// This file is part of Rcpp.
@@ -22,66 +24,112 @@
#ifndef RCPP__HASH__HASH_H
#define RCPP__HASH__HASH_H
-#include <Rcpp/hash/hash_impl.h>
-
namespace Rcpp{
namespace sugar{
- template <typename T> void add_hash_value( hash_t *h, int i) ;
- template <> inline void add_hash_value<int>( hash_t* h, int i ){ add_hash_int(h,i) ;}
- template <> inline void add_hash_value<double>( hash_t* h, int i ){ add_hash_real(h,i) ;}
- template <> inline void add_hash_value<SEXP>( hash_t* h, int i ){ add_hash_ptr(h,i) ;}
+
+ #ifndef RCPP_HASH
+ #define RCPP_HASH(X) (3141592653U * ((unsigned int)(X)) >> (32 - k))
+ #endif
- template <typename T> int get_hash_value( hash_t *h, T val) ;
- template <> inline int get_hash_value<int>( hash_t *h, int val){ return get_hash_int(h, val) ; }
- template <> inline int get_hash_value<double>( hash_t *h, double val){ return get_hash_real(h, val); }
- template <> inline int get_hash_value<SEXP>( hash_t *h, SEXP val){ return get_hash_ptr(h, val) ; }
-
template <int RTYPE>
class IndexHash {
public:
typedef typename traits::storage_type<RTYPE>::type STORAGE ;
typedef Vector<RTYPE> VECTOR ;
- IndexHash( SEXP table ) : h(0) {
- int n = LENGTH(table) ;
- h = new_hash( dataptr(table), n ) ;
- for( int i=0; i<n; i++){
- add_hash_value<STORAGE>( h, i) ;
- }
+ IndexHash( SEXP table ) : m(2), k(1), src( (STORAGE*)dataptr(table) ), data() {
+ int n = Rf_length(table) ;
+ int desired = n*2 ;
+ while( m < desired ){ m *= 2 ; k++ ; }
+ data.resize( m ) ;
+ for( int i=0; i<n; i++) add_value(i) ;
}
- ~IndexHash(){
- if(h) {
- free_hash(h);
- h = 0 ;
- }
+
+ template <typename T>
+ inline SEXP lookup(const T& vec){
+ return lookup__impl(vec, vec.size() ) ;
}
+ // use the pointers for actual (non sugar expression vectors)
+ inline SEXP lookup(const VECTOR& vec){
+ return lookup__impl(vec.begin(), vec.size() ) ;
+ }
+
+
+ private:
+ int m, k ;
+ STORAGE* src ;
+ std::vector<int> data ;
+
template <typename T>
- SEXP lookup(const T& vec){
- int n = vec.size() ;
+ SEXP lookup__impl(const T& vec, int n){
SEXP res = Rf_allocVector(INTSXP, n) ;
int *v = INTEGER(res) ;
- for( int i=0; i<n; i++){
- v[i] = get_hash_value<STORAGE>( h, vec[i] ) ;
- }
+ for( int i=0; i<n; i++) v[i] = get_index( vec[i] ) ;
return res ;
}
- SEXP lookup(const VECTOR& vec){
- int n = vec.size() ;
- SEXP res = Rf_allocVector(INTSXP, n) ;
- int *v = INTEGER(res) ;
- STORAGE* p_vec = vec.begin() ;
- for( int i=0; i<n; i++){
- v[i] = get_hash_value<STORAGE>( h, p_vec[i] ) ;
+ void add_value(int i){
+ STORAGE val = src[i++] ;
+ int addr = get_addr(val) ;
+ while (data[addr] && src[data[addr] - 1] != val) {
+ addr++;
+ if (addr == m) addr = 0;
}
- return res ;
+ if (!data[addr])
+ data[addr] = i ;
}
- private:
- hash_t* h ;
+ /* NOTE: we are returning a 1-based index ! */
+ int get_index(STORAGE value){
+ int addr = get_addr(value) ;
+ while (data[addr]) {
+ if (src[data[addr] - 1] == value)
+ return data[addr];
+ addr++;
+ if (addr == m) addr = 0;
+ }
+ return NA_INTEGER;
+ }
+
+ // defined below
+ int get_addr(STORAGE value) ;
} ;
+
+ template <>
+ inline int IndexHash<INTSXP>::get_addr(int value){
+ return RCPP_HASH(value) ;
+ }
+ template <>
+ inline int IndexHash<REALSXP>::get_addr(double val){
+ int addr;
+ union dint_u {
+ double d;
+ unsigned int u[2];
+ };
+ union dint_u val_u;
+ /* double is a bit tricky - we nave to normalize 0.0, NA and NaN */
+ if (val == 0.0) val = 0.0;
+ if (R_IsNA(val)) val = NA_REAL;
+ else if (R_IsNaN(val)) val = R_NaN;
+ val_u.d = val;
+ addr = RCPP_HASH(val_u.u[0] + val_u.u[1]);
+ return addr ;
+ }
+ template <>
+ inline int IndexHash<STRSXP>::get_addr(SEXP value){
+ intptr_t val = (intptr_t) value;
+ int addr;
+ #if (defined _LP64) || (defined __LP64__) || (defined WIN64)
+ addr = RCPP_HASH((val & 0xffffffff) ^ (val >> 32));
+ #else
+ addr = RCPP_HASH(val);
+ #endif
+ return addr ;
+ }
+
+
} // sugar
} // Rcpp
Deleted: pkg/Rcpp/inst/include/Rcpp/hash/hash_impl.h
===================================================================
--- pkg/Rcpp/inst/include/Rcpp/hash/hash_impl.h 2012-12-04 03:22:05 UTC (rev 4070)
+++ pkg/Rcpp/inst/include/Rcpp/hash/hash_impl.h 2012-12-04 13:55:49 UTC (rev 4071)
@@ -1,51 +0,0 @@
-// -*- mode: C++; c-indent-level: 4; c-basic-offset: 4; tab-width: 4 -*-
-//
-// hash_impl.h: Rcpp R/C++ interface class library -- hashing
-//
-// Copyright (C) 2012 Dirk Eddelbuettel and Romain Francois
-//
-// This file is part of Rcpp.
-//
-// Rcpp is free software: you can redistribute it and/or modify it
-// under the terms of the GNU General Public License as published by
-// the Free Software Foundation, either version 2 of the License, or
-// (at your option) any later version.
-//
-// Rcpp is distributed in the hope that it will be useful, but
-// WITHOUT ANY WARRANTY; without even the implied warranty of
-// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
-// GNU General Public License for more details.
-//
-// You should have received a copy of the GNU General Public License
-// along with Rcpp. If not, see <http://www.gnu.org/licenses/>.
-
-#ifndef RCPP__HASH__HASH_IMPL_H
-#define RCPP__HASH__HASH_IMPL_H
-
-#ifdef __cplusplus
-extern "C" {
-#endif
-
- typedef struct hash {
- int m, k ;
- void *src;
- int ix[1];
- } hash_t;
-
- hash_t *new_hash(void *src, int len) ;
- void free_hash(hash_t *h) ;
-
- void add_hash_int(hash_t *h, int i) ;
- void add_hash_real(hash_t *h, int i) ;
- void add_hash_ptr(hash_t *h, int i) ;
-
- int get_hash_int(hash_t *h, int val) ;
- int get_hash_real(hash_t *h, double val) ;
- int get_hash_ptr(hash_t *h, void *val_ptr) ;
-
-#ifdef __cplusplus
-}
-#endif
-
-#endif
-
Deleted: pkg/Rcpp/src/fastmatch.c
===================================================================
--- pkg/Rcpp/src/fastmatch.c 2012-12-04 03:22:05 UTC (rev 4070)
+++ pkg/Rcpp/src/fastmatch.c 2012-12-04 13:55:49 UTC (rev 4071)
@@ -1,157 +0,0 @@
-/*
- * fastmatch: fast implementation of match() in R using semi-permanent hash tables
- *
- * Copyright (C) 2010, 2011 Simon Urbanek
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License as published by
- * the Free Software Foundation; version 2 of the License.
- *
- * This program is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
- * GNU General Public License for more details.
- */
-
-/* for speed (should not really matter in this case as most time is spent in the hashing) */
-#define USE_RINTERNALS 1
-#include <Rinternals.h>
-
-/* for malloc/free since we handle our hash table memory separately from R */
-#include <stdlib.h>
-/* for hashing for pointers we need intptr_t */
-#include <stdint.h>
-
-#include <Rcpp/hash/hash_impl.h>
-
-/* create a new hash table with the given source and length.
- we store only the index - values are picked from the source
- so you must make sure the source is still alive when used */
-hash_t *new_hash(void *src, int len) {
- hash_t *h;
- int m = 2, k = 1, desired = len * 2; /* we want a maximal load of 50% */
- while (m < desired) { m *= 2; k++; }
- h = (hash_t*) calloc(1, sizeof(hash_t) + (sizeof(int) * m));
- if (!h) Rf_error("unable to allocate %.2Mb for a hash table", (double) sizeof(int) * (double) m / (1024.0 * 1024.0));
- h->m = m;
- h->k = k;
- h->src = src;
- return h;
-}
-
-/* free the hash table (and all chained hash tables as well) */
-void free_hash(hash_t *h) {
- free(h);
-}
-
-/* pi-hash fn */
-#define HASH(X) (3141592653U * ((unsigned int)(X)) >> (32 - h->k))
-
-/* add the integer value at index i (0-based!) to the hash */
-void add_hash_int(hash_t *h, int i) {
- int *src = (int*) h->src;
- int val = src[i++], addr;
- addr = HASH(val);
- while (h->ix[addr] && src[h->ix[addr] - 1] != val) {
- addr++;
- if (addr == h->m) addr = 0;
- }
- if (!h->ix[addr])
- h->ix[addr] = i;
-}
-
-/* to avoid aliasing rules issues use a union */
-union dint_u {
- double d;
- unsigned int u[2];
-};
-
-/* add the double value at index i (0-based!) to the hash */
-void add_hash_real(hash_t *h, int i) {
- double *src = (double*) h->src;
- union dint_u val;
- int addr;
- /* double is a bit tricky - we nave to nomalize 0.0, NA and NaN */
- val.d = (src[i] == 0.0) ? 0.0 : src[i];
- if (R_IsNA(val.d)) val.d = NA_REAL;
- else if (R_IsNaN(val.d)) val.d = R_NaN;
- addr = HASH(val.u[0]+ val.u[1]);
- while (h->ix[addr] && src[h->ix[addr] - 1] != val.d) {
- addr++;
- if (addr == h->m) addr = 0;
- }
- if (!h->ix[addr])
- h->ix[addr] = i + 1;
-}
-
-/* add the pointer value at index i (0-based!) to the hash */
-void add_hash_ptr(hash_t *h, int i) {
- int addr;
- void **src = (void**) h->src;
- intptr_t val = (intptr_t) src[i++];
-#if (defined _LP64) || (defined __LP64__) || (defined WIN64)
- addr = HASH((val & 0xffffffff) ^ (val >> 32));
-#else
- addr = HASH(val);
-#endif
- while (h->ix[addr] && (intptr_t) src[h->ix[addr] - 1] != val) {
- addr++;
- if (addr == h->m) addr = 0;
- }
- if (!h->ix[addr])
- h->ix[addr] = i;
-}
-
-/* NOTE: we are returning a 1-based index ! */
-int get_hash_int(hash_t *h, int val) {
- int *src = (int*) h->src;
- int addr;
- addr = HASH(val);
- while (h->ix[addr]) {
- if (src[h->ix[addr] - 1] == val)
- return h->ix[addr];
- addr ++;
- if (addr == h->m) addr = 0;
- }
- return NA_INTEGER;
-}
-
-/* NOTE: we are returning a 1-based index ! */
-int get_hash_real(hash_t *h, double val) {
- double *src = (double*) h->src;
- int addr;
- union dint_u val_u;
- /* double is a bit tricky - we nave to normalize 0.0, NA and NaN */
- if (val == 0.0) val = 0.0;
- if (R_IsNA(val)) val = NA_REAL;
- else if (R_IsNaN(val)) val = R_NaN;
- val_u.d = val;
- addr = HASH(val_u.u[0] + val_u.u[1]);
- while (h->ix[addr]) {
- if (src[h->ix[addr] - 1] == val)
- return h->ix[addr];
- addr++;
- if (addr == h->m) addr = 0;
- }
- return NA_INTEGER;
-}
-
-/* NOTE: we are returning a 1-based index ! */
-int get_hash_ptr(hash_t *h, void *val_ptr) {
- void **src = (void **) h->src;
- intptr_t val = (intptr_t) val_ptr;
- int addr;
-#if (defined _LP64) || (defined __LP64__) || (defined WIN64)
- addr = HASH((val & 0xffffffff) ^ (val >> 32));
-#else
- addr = HASH(val);
-#endif
- while (h->ix[addr]) {
- if ((intptr_t) src[h->ix[addr] - 1] == val)
- return h->ix[addr];
- addr ++;
- if (addr == h->m) addr = 0;
- }
- return NA_INTEGER;
-}
-
More information about the Rcpp-commits
mailing list