[H5r-commits] r46 - / R src tests

noreply at r-forge.r-project.org noreply at r-forge.r-project.org
Sat Apr 23 01:56:39 CEST 2011


Author: extemporaneousb
Date: 2011-04-23 01:56:39 +0200 (Sat, 23 Apr 2011)
New Revision: 46

Added:
   tests/testReadPoints.R
Modified:
   NAMESPACE
   R/h5R.R
   src/h5_wrap.c
Log:
added some more code for writing.

Modified: NAMESPACE
===================================================================
--- NAMESPACE	2011-04-21 02:06:27 UTC (rev 45)
+++ NAMESPACE	2011-04-22 23:56:39 UTC (rev 46)
@@ -17,7 +17,7 @@
               getH5Type,
               getH5Dataset,
               getH5Attribute,
-              readDataAsVector,
+              readH5Data,
               show,
               ncol,
               nrow)

Modified: R/h5R.R
===================================================================
--- R/h5R.R	2011-04-21 02:06:27 UTC (rev 45)
+++ R/h5R.R	2011-04-22 23:56:39 UTC (rev 46)
@@ -7,6 +7,11 @@
 ## These are defined in H5Tpublic.h
 .h5Types <- c("integer", "double", "time", "character", "logical")
 
+## ##############################################################################
+##
+## Class Definitions
+##
+## ##############################################################################
 setClass("H5Obj", representation(ePtr = "externalptr"))
 setClass("H5Container", contains = "H5Obj")
 setClass("H5File", contains = "H5Container",
@@ -21,7 +26,25 @@
 setClass("H5Dataset", contains = "H5DataContainer")
 setClass("H5Attribute", contains = "H5DataContainer")
 setClass("hSlab", representation = representation(s = "integer", w = "integer"))
+setClass("H5DataFrame", contains = "data.frame", representation(h5File = "H5File", h5Datasets = "list"))
 
+## ##############################################################################
+##
+## Generics Definitions
+##
+## ##############################################################################
+setGeneric("getH5Group", function(h5Obj, groupName, ...) standardGeneric("getH5Group"))
+setGeneric("getH5Dim", function(h5Obj, ...) standardGeneric("getH5Dim"))
+setGeneric("getH5Type", function(h5Obj, ...) standardGeneric("getH5Type"))
+setGeneric("getH5Dataset", function(h5Obj, datasetName, ...) standardGeneric("getH5Dataset"))
+setGeneric("getH5Attribute", function(h5Obj, attrName, ...) standardGeneric("getH5Attribute"))
+setGeneric("createH5Group", function(h5Obj, groupName, ...) standardGeneric("createH5Group"))
+setGeneric("createH5Dataset", function(h5Obj, datasetName, ...) standardGeneric("createH5Dataset"))
+setGeneric("createH5Attribute", function(h5Obj, attrName, attrValue, ...) standardGeneric("createH5Attribute"))
+setGeneric("writeH5Data", function(h5Obj, ...) standardGeneric("writeH5Data"))
+setGeneric("readH5Data", function(h5Obj, ...) standardGeneric("readH5Data"))
+setGeneric("deleteH5Obj", function(h5Obj, h5ObjName, ...) standardGeneric("deleteH5Obj"))
+
 H5File <- function(fileName, mode = 'r') {
   new("H5File", fileName, mode)
 }
@@ -57,15 +80,6 @@
   return(! is.null(h5Dataset at .data))
 }
 
-setGeneric("getH5Group", function(h5Obj, groupName, ...) standardGeneric("getH5Group"))
-setGeneric("getH5Dim", function(h5Obj, ...) standardGeneric("getH5Dim"))
-setGeneric("getH5Type", function(h5Obj, ...) standardGeneric("getH5Type"))
-setGeneric("getH5Dataset", function(h5Obj, datasetName, ...) standardGeneric("getH5Dataset"))
-setGeneric("getH5Attribute", function(h5Obj, attrName, ...) standardGeneric("getH5Attribute"))
-setGeneric("createH5Group", function(h5Obj, groupName, ...) standardGeneric("createH5Group"))
-setGeneric("createH5Dataset", function(h5Obj, datasetName, ...) standardGeneric("createH5Dataset"))
-setGeneric("writeH5Data", function(h5Obj, ...) standardGeneric("writeH5Data"))
-
 setMethod("getH5Group", c("H5Container", "character"), function(h5Obj, groupName) {
   if (is.null(x <- .Call("h5R_get_group", .ePtr(h5Obj), groupName, PACKAGE = 'h5r')))
     stop(paste("Group:", groupName, "cannot be opened."))
@@ -151,8 +165,15 @@
 })
 
 setMethod("createH5Dataset", c("H5Container", "character"), function(h5Obj, datasetName, data,
-                                                                     dims, dType = c("integer", "double"),
-                                                                     chunkSizes = NA) {
+                                                                     dims, dType = c("integer", "double", "character"),
+                                                                     chunkSizes = NA, overwrite = TRUE) {
+  if (h5DatasetExists(h5Obj, datasetName)) {
+    if (! overwrite)
+      stop(paste("Dataset:", datasetName, "already exists."))
+    else
+      deleteH5Obj(h5Obj, datasetName)
+  }
+
   mData <- missing(data)
   mDims <- missing(dims)
   mType <- missing(dType)
@@ -199,9 +220,16 @@
   return(getH5Dataset(h5Obj, datasetName))
 })
 
-setMethod("createH5Group", c("H5Container", "character"), function(h5Obj, groupName) {
-  if (h5GroupExists(h5Obj, groupName))
-    stop(paste("Group:", groupName, "exists."))
+setMethod("createH5Group", c("H5Container", "character"), function(h5Obj, groupName,
+                                                                   overwrite = TRUE) {
+  if (h5GroupExists(h5Obj, groupName)) {
+    if (! overwrite) {
+      stop(paste("Group:", groupName, "exists."))
+    } else {
+      deleteH5Obj(h5Obj, groupName)
+    }
+  }
+  
   if (.Call("h5R_create_group", .ePtr(h5Obj), groupName, PACKAGE = 'h5r') == 0) {
     .flush(h5Obj)
     return(getH5Group(h5Obj, groupName))
@@ -210,7 +238,15 @@
   }
 })
 
+setMethod("createH5Attribute", c("H5Obj"), function(h5Obj, attrName, attrValue) {
+  stop("Not implemented yet.")
+})
 
+setMethod("deleteH5Obj", c("H5Container"), function(h5Obj, h5ObjName) {
+  .Call("h5R_delete_object", .ePtr(h5Obj), as.character(h5ObjName))
+  .flush(h5Obj)
+})
+
 ##
 ## The whole slicing infrastructure.
 ## 
@@ -408,7 +444,7 @@
 ## Note: the two reverses.
 ##
 .loadDataset <- function(h5Dataset) {
-  d <- readDataAsVector(h5Dataset)
+  d <- readH5Data(h5Dataset)
   dim(d) <- rev(dim(h5Dataset))
   
   if (! is.null(dim(h5Dataset))) aperm(d) else d
@@ -428,15 +464,22 @@
   if (! is.null(dim(h5Dataset))) aperm(d) else d
 }
 
-setGeneric("readDataAsVector", function(h5Obj, ...) {
-  standardGeneric("readDataAsVector")
-})
+readPoints <- function(h5Dataset, idxs) {
+  if (is.null(dim(idxs))) {
+    nr <- length(idxs)
+    nc <- 1
+  } else {
+    nr <- nrow(idxs)
+    nc <- ncol(idxs)
+  }
+  .Call("h5R_read_points", .ePtr(h5Dataset), as.integer(idxs), as.integer(nr), as.integer(nc))
+}
 
-setMethod("readDataAsVector", "H5Dataset", function(h5Obj) {
+setMethod("readH5Data", "H5Dataset", function(h5Obj) {
   .Call('h5R_read_dataset', .ePtr(h5Obj), PACKAGE = 'h5r')
 })
 
-setMethod("readDataAsVector", "H5Attribute", function(h5Obj) {
+setMethod("readH5Data", "H5Attribute", function(h5Obj) {
   .Call('h5R_read_attr', .ePtr(h5Obj), PACKAGE = 'h5r')
 })
 
@@ -529,11 +572,6 @@
 ## H5DataFrame interface
 ##
 ################################################################
-
-setClass("H5DataFrame", contains = "data.frame",
-         representation(h5File = "H5File",
-                        h5Datasets = "list"))
-
 H5DataFrame <- function(file, nms = NA) {
   .getCols <- function(h5) {
     nms <- names(listH5Contents(h5))

Modified: src/h5_wrap.c
===================================================================
--- src/h5_wrap.c	2011-04-21 02:06:27 UTC (rev 45)
+++ src/h5_wrap.c	2011-04-22 23:56:39 UTC (rev 46)
@@ -277,7 +277,19 @@
     return(dta);
 }
 
-hid_t _h5R_get_memtype(SEXP h5_type) {
+SEXP h5R_create_dataset(SEXP h5_obj, SEXP name, SEXP h5_type, SEXP dims, SEXP chunks) {
+    int i; 
+    
+    hsize_t* current_dims = (hsize_t*) Calloc(length(dims), hsize_t);
+    hsize_t* max_dims = (hsize_t*) Calloc(length(dims), hsize_t);
+    hsize_t* chunk_lens = (hsize_t*) Calloc(length(chunks), hsize_t);
+
+    for (i = 0; i < length(dims); i++) {
+	current_dims[i] = INTEGER(dims)[i];
+	max_dims[i]     = H5S_UNLIMITED;
+	chunk_lens[i]   = INTEGER(chunks)[i];
+    }
+    
     hid_t memtype = -1;
 
     switch (INTEGER(h5_type)[0]) {
@@ -288,32 +300,19 @@
 	memtype = H5T_NATIVE_DOUBLE;
 	break;
     case H5T_STRING:
-	// clearly we'll wait on this.
+	memtype = H5Tcopy (H5T_C_S1);
+	H5Tset_size (memtype, H5T_VARIABLE);   
+	break;
     default:
 	error("Unsupported class in %s.\n", __func__);
     }
-    return memtype;
-}
-
-
-SEXP h5R_create_dataset(SEXP h5_obj, SEXP name, SEXP h5_type, SEXP dims, SEXP chunks) {
-    int i; 
+ 
     
-    hsize_t* current_dims = (hsize_t*) Calloc(length(dims), hsize_t);
-    hsize_t* max_dims = (hsize_t*) Calloc(length(dims), hsize_t);
-    hsize_t* chunk_lens = (hsize_t*) Calloc(length(chunks), hsize_t);
-
-    for (i = 0; i < length(dims); i++) {
-	current_dims[i] = INTEGER(dims)[i];
-	max_dims[i]     = H5S_UNLIMITED;
-	chunk_lens[i]   = INTEGER(chunks)[i];
-    }
-    
     hid_t cparms = H5Pcreate(H5P_DATASET_CREATE);
     H5Pset_chunk(cparms, length(chunks), chunk_lens);
     
     hid_t dataspace = H5Screate_simple(length(dims), current_dims, max_dims);
-    hid_t dataset   = H5Dcreate(HID(h5_obj), NM(name), _h5R_get_memtype(h5_type), 
+    hid_t dataset   = H5Dcreate(HID(h5_obj), NM(name), memtype, 
 				dataspace, H5P_DEFAULT, cparms, H5P_DEFAULT);
     
     H5Pclose(cparms);
@@ -331,6 +330,7 @@
     hid_t space = -1, memspace = -1, memtype = -1;
     int i; 
     void* _data; 
+    char** tmp;
 
     int* offsets  = INTEGER(_offsets);
     int* counts   = INTEGER(_counts);
@@ -348,7 +348,9 @@
     H5Sselect_hyperslab(space, H5S_SELECT_SET, _h_offsets, NULL, _h_counts, NULL);
     memspace = H5Screate_simple(length(_counts), _h_counts, NULL);
 
-    switch (INTEGER(h5R_get_type(h5_dataset))[0]) {
+    int memtype_int = INTEGER(h5R_get_type(h5_dataset))[0];
+    
+    switch (memtype_int) {
     case H5T_INTEGER: 
 	memtype = H5T_NATIVE_INT;
 	_data = (void*) INTEGER(data);
@@ -360,14 +362,19 @@
     case H5T_STRING:
 	memtype = H5Tcopy (H5T_C_S1);
 	H5Tset_size (memtype, H5T_VARIABLE);   
+	tmp = (char**) Calloc(length(data), char*);
+	for (i = 0; i < length(data); i++)
+	    tmp[i] = CHAR(STRING_ELT(data, i));
+	_data = (void*) tmp;
 	break;
     default:
 	__ERROR__ = 1;
     }
-    
 
     if (__ERROR__ == 0) {
 	H5Dwrite(HID(h5_dataset), memtype, memspace, space, H5P_DEFAULT, _data);
+	if (memtype_int == H5T_STRING) 
+	    Free(tmp);
     }
 
     /** clean up. **/
@@ -379,7 +386,6 @@
     if (__ERROR__ == 1) {
 	error("Unsupported class in %s\n", __func__);
     }
-
     return R_NilValue;
 }
 
@@ -484,6 +490,85 @@
 
     return(r_lst);
 }
+
+SEXP h5R_read_points(SEXP h5_dataset, SEXP _points, SEXP _nr, SEXP _nc) {
+    int __ERROR__ = 0;
+    SEXP dta = R_NilValue;
+    hid_t space = -1, memspace = -1, memtype = -1;
+    void* buf = NULL; 
+    int i; 
+
+    int nr = INTEGER(_nr)[0];
+    int nc = INTEGER(_nc)[0];
+
+    /** I'm surprised I have to do this, but it seems to be necessary. **/
+    hsize_t* points = (hsize_t*) Calloc(nr*nc, hsize_t);
+    for (i = 0; i < nr*nc; i++) points[i] = INTEGER(_points)[i];
+
+    hsize_t nrp = nr;
+
+    space = _h5R_get_space(h5_dataset);
+    H5Sselect_elements(space, H5S_SELECT_SET, nr, points);
+    memspace = H5Screate_simple(nc, &nrp, NULL);
+
+    switch (INTEGER(h5R_get_type(h5_dataset))[0]) {
+    case H5T_INTEGER: 
+	PROTECT(dta = allocVector(INTSXP, length(_points)));
+	memtype = H5T_NATIVE_INT;
+	buf = INTEGER(dta);
+	break;
+    case H5T_FLOAT:
+	PROTECT(dta = allocVector(REALSXP, length(_points)));
+	memtype = H5T_NATIVE_DOUBLE;
+	buf = REAL(dta);
+	break;
+    case H5T_STRING:
+	buf = (char **) Calloc(length(_points), char*);
+	memtype = H5Tcopy (H5T_C_S1);
+	H5Tset_size (memtype, H5T_VARIABLE);   
+	break;
+    default:
+	__ERROR__ = 1;
+    }
+    
+
+    if (__ERROR__ == 0) {
+	H5Dread(HID(h5_dataset), memtype, memspace, space, H5P_DEFAULT, buf);
+
+	/** There requires a little more with strings. **/
+	/* if (H5T_STRING == INTEGER(h5R_get_type(h5_dataset))[0]) { */
+	/*     PROTECT(dta = allocVector(STRSXP, v)); */
+	/*     for (i = 0; i < v; i++) */
+	/* 	if (((char **) buf)[i]) { */
+	/* 	    SET_STRING_ELT(dta, i, mkChar(  ((char **) buf)[i] ));  */
+	/* 	} */
+	    
+	/*     H5Dvlen_reclaim (memtype, memspace, H5P_DEFAULT, buf); */
+	    
+	/*     H5Tclose(memtype); */
+	/*     Free(buf); */
+	/* } */
+    }
+
+    /** clean up. **/
+    Free(points);
+    H5Sclose(memspace);
+    H5Sclose(space);
+
+    UNPROTECT(1);
+    
+    if (__ERROR__ == 1) {
+	error("Unsupported class in %s\n", __func__);
+    }
+    
+    
+    return dta;
+}
+
+SEXP h5R_delete_object(SEXP h5_obj, SEXP name) {
+    H5Ldelete(HID(h5_obj), NM(name), H5P_DEFAULT);
+    return R_NilValue;
+}
   
 SEXP h5R_read_attr(SEXP h5_attr) {
     SEXP dta = R_NilValue;

Added: tests/testReadPoints.R
===================================================================
--- tests/testReadPoints.R	                        (rev 0)
+++ tests/testReadPoints.R	2011-04-22 23:56:39 UTC (rev 46)
@@ -0,0 +1,17 @@
+require(h5r)
+
+require(h5r)
+h5 <- H5File("test.h5", 'w')
+d <- createH5Dataset(h5, "jim", c('jime ', 'joe', 'mikey'))
+
+d <- createH5Dataset(h5, "dd", rnorm(1000000))
+
+s <- sample(1:length(d), size = 10000)
+system.time(z <- d[s])
+system.time(y <- readPoints(d, s-1))
+all(z==y)
+
+m <- createH5Dataset(h5, "mm", cbind(rnorm(1000), rnorm(1000)))
+
+d <- createH5Dataset(h5, "dusdf", as.character(1:20))
+writeH5Data(d, "jim", 1, 1)



More information about the H5r-commits mailing list