[H5r-commits] r45 - / R src tests

noreply at r-forge.r-project.org noreply at r-forge.r-project.org
Thu Apr 21 04:06:28 CEST 2011


Author: extemporaneousb
Date: 2011-04-21 04:06:27 +0200 (Thu, 21 Apr 2011)
New Revision: 45

Modified:
   DESCRIPTION
   NEWS
   R/h5R.R
   src/h5_wrap.c
   tests/testwrite.R
Log:
A number of changes to facilitate writing.



Modified: DESCRIPTION
===================================================================
--- DESCRIPTION	2011-04-19 15:50:17 UTC (rev 44)
+++ DESCRIPTION	2011-04-21 02:06:27 UTC (rev 45)
@@ -1,11 +1,11 @@
 Package: h5r
 Type: Package
 Title: Interface to HDF5 Files
-Version: 1.0
+Version: 1.1
 Date: 2010-01-28
 Author: James Bullard
 Maintainer: <jbullard at pacificbiosciences.com>
 Depends: R (>= 2.10.0), methods, base
-Description: A package for interfacing to HDF5 files.
+Description: A package for interfacing with HDF5 files.
 License: LGPL
 

Modified: NEWS
===================================================================
--- NEWS	2011-04-19 15:50:17 UTC (rev 44)
+++ NEWS	2011-04-21 02:06:27 UTC (rev 45)
@@ -1,5 +1,22 @@
         **************************************************
         *                                                *
+        *              1.1 SERIES NEWS                   *
+        *                                                *
+        **************************************************
+
+
+                CHANGES IN h5r VERSION 1.1
+
+NEW FEATURES
+    o Added native ability to read sets of 1-D slabs.
+    o Added basic writing capabilities
+
+BUG FIXES
+    o 
+
+
+        **************************************************
+        *                                                *
         *              1.0 SERIES NEWS                   *
         *                                                *
         **************************************************

Modified: R/h5R.R
===================================================================
--- R/h5R.R	2011-04-19 15:50:17 UTC (rev 44)
+++ R/h5R.R	2011-04-21 02:06:27 UTC (rev 45)
@@ -5,11 +5,14 @@
 ## ##############################################################################
 
 ## These are defined in H5Tpublic.h
-.h5Types <- c("integer", "numeric", "time", "character", "logical")
+.h5Types <- c("integer", "double", "time", "character", "logical")
 
 setClass("H5Obj", representation(ePtr = "externalptr"))
-setClass("H5File", contains = "H5Obj", representation(fileName = "character"))
-setClass("H5Group", contains = "H5Obj", representation(name = "character"))
+setClass("H5Container", contains = "H5Obj")
+setClass("H5File", contains = "H5Container",
+         representation(fileName = "character"))
+setClass("H5Group", contains = "H5Container",
+         representation(name = "character"))
 
 setClassUnion("envOrNULL", c("environment", "NULL"))
 setClass("H5DataContainer", contains = "H5Obj",
@@ -17,32 +20,8 @@
                         h5Type = "integer", .data = "envOrNULL"))
 setClass("H5Dataset", contains = "H5DataContainer")
 setClass("H5Attribute", contains = "H5DataContainer")
-
 setClass("hSlab", representation = representation(s = "integer", w = "integer"))
 
-hSlab <- function(start, width = NA, end = NA) {
-  stopifnot(length(start) == length(width) || length(start) == length(end))
-
-  if (any(is.na(end)) & any(is.na(width))) {
-    stop("Must specify either end or width.")
-  } else {
-    if(any(is.na(end))) {
-      width <- width
-    }
-    else {
-      width <- (end - start + 1)
-    }
-  }
-  obj <- new("hSlab")
-  obj at s <- as.integer(start)
-  obj at w <- as.integer(width)
-  return(obj)
-}
-
-setMethod("length", "hSlab", function(x) {
-  length(x at s)
-})
-
 H5File <- function(fileName, mode = 'r') {
   new("H5File", fileName, mode)
 }
@@ -78,54 +57,21 @@
   return(! is.null(h5Dataset at .data))
 }
 
-setGeneric("getH5Group", function(h5Obj, groupName, ...) {
-  standardGeneric("getH5Group")
-})
+setGeneric("getH5Group", function(h5Obj, groupName, ...) standardGeneric("getH5Group"))
+setGeneric("getH5Dim", function(h5Obj, ...) standardGeneric("getH5Dim"))
+setGeneric("getH5Type", function(h5Obj, ...) standardGeneric("getH5Type"))
+setGeneric("getH5Dataset", function(h5Obj, datasetName, ...) standardGeneric("getH5Dataset"))
+setGeneric("getH5Attribute", function(h5Obj, attrName, ...) standardGeneric("getH5Attribute"))
+setGeneric("createH5Group", function(h5Obj, groupName, ...) standardGeneric("createH5Group"))
+setGeneric("createH5Dataset", function(h5Obj, datasetName, ...) standardGeneric("createH5Dataset"))
+setGeneric("writeH5Data", function(h5Obj, ...) standardGeneric("writeH5Data"))
 
-setGeneric("getH5Dim", function(h5Obj, ...) {
-  standardGeneric("getH5Dim")
-})
-
-setGeneric("getH5Type", function(h5Obj, ...) {
-  standardGeneric("getH5Type")
-})
-
-setGeneric("getH5Dataset", function(h5Obj, datasetName, ...) {
-  standardGeneric("getH5Dataset")
-})
-
-createH5Dataset <- function(h5Obj, datasetName, data) {
-  if (is.null(d <- dim(data))) {
-    d <- length(data)
-  } else {
-    data <- aperm(data)
-  }
-  .Call("h5R_write_dataset", .ePtr(h5Obj), datasetName,
-        as.integer(data), as.integer(d))
-}
-
-setGeneric("getH5Attribute", function(h5Obj, attrName, ...) {
-  standardGeneric("getH5Attribute")
-})
-
-setMethod("getH5Group", c("H5Obj", "character"), function(h5Obj, groupName) {
+setMethod("getH5Group", c("H5Container", "character"), function(h5Obj, groupName) {
   if (is.null(x <- .Call("h5R_get_group", .ePtr(h5Obj), groupName, PACKAGE = 'h5r')))
     stop(paste("Group:", groupName, "cannot be opened."))
-
   .H5Group(x, groupName)
 })
 
-createH5Group <- function(h5Obj, groupName) {
-  ## XXX: Should add a check to see if group exists.
-  z <- .Call("h5R_create_group", .ePtr(h5Obj), groupName, PACKAGE = 'h5r')
-  
-  if (z == 0) {
-    return(getH5Group(h5Obj, groupName))
-  } else {
-    stop(paste("Unable to create group:", groupName))
-  }
-}
-
 setMethod("getH5Dim", "H5DataContainer", function(h5Obj) {
   .Call('h5R_get_dims', .ePtr(h5Obj), PACKAGE = 'h5r')
 })
@@ -143,7 +89,7 @@
   mode <- match.arg(mode)
   
   if (! file.exists(fileName) && mode == 'w') {
-    .Call("h5R_finalizer", .Call("h5R_create", fileName, package = "h5R"))
+    .Call("h5R_create", fileName, package = "h5R")
   }
     
   if (! file.exists(fileName)) {
@@ -175,12 +121,10 @@
   return(o)
 }
 
-setMethod("getH5Dataset", c("H5Obj", "character"), function(h5Obj, datasetName,
-                                                            inMemory = FALSE) {
+setMethod("getH5Dataset", c("H5Container", "character"), function(h5Obj, datasetName, inMemory = FALSE) {
   if (is.null(x <- .Call("h5R_get_dataset", .ePtr(h5Obj), datasetName, PACKAGE = 'h5r'))) {
     stop(paste("Dataset:", datasetName, "cannot be opened."))
   }
- 
   o <- new("H5Dataset")
   o at ePtr <- x
   return(.initH5DataContainer(o, datasetName, inMemory))
@@ -190,12 +134,86 @@
   if (is.null(x <- .Call("h5R_get_attr", .ePtr(h5Obj), attrName, PACKAGE = 'h5r'))) {
      stop(paste("Attribute:", attrName, "cannot be opened."))
   }
-  
   o <- new("H5Attribute")
   o at ePtr <- x
   return(.initH5DataContainer(o, attrName, inMemory = TRUE))
 })
 
+##
+## Writing.
+##
+.flush <- function(h5Obj) {
+  .Call("h5R_flush", .ePtr(h5Obj))
+}
+
+setMethod("writeH5Data", c("H5Dataset"), function(h5Obj, data, offsets, extents) {
+  .Call("h5R_write_slab", .ePtr(h5Obj), as.integer(offsets), as.integer(extents), data)
+})
+
+setMethod("createH5Dataset", c("H5Container", "character"), function(h5Obj, datasetName, data,
+                                                                     dims, dType = c("integer", "double"),
+                                                                     chunkSizes = NA) {
+  mData <- missing(data)
+  mDims <- missing(dims)
+  mType <- missing(dType)
+  mChnk <- missing(chunkSizes)
+
+  if (mData && (mDims || mType)) {
+    stop("Must specify either data or dimensions and type.")
+  }
+
+  if (mData && mType) {
+    storage.mode(data) <- dType
+  }
+  
+  if (! mData) { 
+    if (is.null(dim(data))) {
+      dims <- length(data)
+    } else {
+      ## note order.
+      dims <- dim(data)
+      data <- aperm(data)
+    }
+    dType <- storage.mode(data)
+  } else {
+    dType <- match.arg(dType)
+  }
+  iType <- as.integer(match(dType, .h5Types) - 1)
+
+  if (is.na(iType)) {
+    stop("Type is not resolveable.")
+  }
+  dims  <- as.integer(dims)
+  if (mChnk)
+    chunkSizes <- rep(4096, length(dims))
+
+  .Call("h5R_create_dataset", .ePtr(h5Obj), datasetName, iType, dims, as.integer(chunkSizes))
+ 
+  if (! mData) {
+    writeH5Data(getH5Dataset(h5Obj, datasetName), data,
+                as.integer(rep(0L, length(dims))),
+                as.integer(dims))
+  }
+  .flush(h5Obj)
+  
+  return(getH5Dataset(h5Obj, datasetName))
+})
+
+setMethod("createH5Group", c("H5Container", "character"), function(h5Obj, groupName) {
+  if (h5GroupExists(h5Obj, groupName))
+    stop(paste("Group:", groupName, "exists."))
+  if (.Call("h5R_create_group", .ePtr(h5Obj), groupName, PACKAGE = 'h5r') == 0) {
+    .flush(h5Obj)
+    return(getH5Group(h5Obj, groupName))
+  } else {
+    stop(paste("Unable to create group:", groupName))
+  }
+})
+
+
+##
+## The whole slicing infrastructure.
+## 
 .internalSlice <- function(x, i, j, ..., drop = TRUE) {
   if (!.hasData(x)) {
     .putData(x, .loadDataset(x))
@@ -373,8 +391,8 @@
   }
 })
 
-## This function is written to leverage the possibility of fast contiguous
-## range access.
+## This function is written to leverage the possibility of fast
+## contiguous range access.
 setMethod("[", c("H5Dataset", "hSlab", "missing", "missing"), function(x, i) {
   if (.inMemory(x))
     stop("Not implemented for inMemory datasets.")
@@ -492,6 +510,10 @@
   return(lst)
 }
 
+h5GroupExists <- function(h5Obj, name) {
+  .Call("h5R_dataset_exists", .ePtr(h5Obj), name) == 1
+}
+
 h5DatasetExists <- function(h5Obj, name) {
   .Call("h5R_dataset_exists", .ePtr(h5Obj), name) == 1
 }
@@ -638,3 +660,30 @@
   colnames(d) <- colnames(x)
   return(d)
 })
+
+
+##
+## hSlab stuff.
+##
+hSlab <- function(start, width = NA, end = NA) {
+  stopifnot(length(start) == length(width) || length(start) == length(end))
+
+  if (any(is.na(end)) & any(is.na(width))) {
+    stop("Must specify either end or width.")
+  } else {
+    if(any(is.na(end))) {
+      width <- width
+    }
+    else {
+      width <- (end - start + 1)
+    }
+  }
+  obj <- new("hSlab")
+  obj at s <- as.integer(start)
+  obj at w <- as.integer(width)
+  return(obj)
+}
+
+setMethod("length", "hSlab", function(x) {
+  length(x at s)
+})

Modified: src/h5_wrap.c
===================================================================
--- src/h5_wrap.c	2011-04-19 15:50:17 UTC (rev 44)
+++ src/h5_wrap.c	2011-04-21 02:06:27 UTC (rev 45)
@@ -19,12 +19,19 @@
     h5_holder* h = (h5_holder*) R_ExternalPtrAddr(h5_obj);
 
     if (! h) return;
-    if (h->is_file)
+    if (h->is_file == 1) {
+	H5Fflush(HID(h5_obj), H5F_SCOPE_GLOBAL);
 	H5Fclose(HID(h5_obj));
- 
+    }
+
     Free(h);
 }
 
+SEXP h5R_flush(SEXP h5_file) {
+    H5Fflush(HID(h5_file), H5F_SCOPE_GLOBAL);
+    return ScalarInteger(0);
+}
+
 SEXP _h5R_make_holder (hid_t id, int is_file) {
     if (id < 0) {
 	return R_NilValue;
@@ -56,6 +63,7 @@
 SEXP h5R_create_group(SEXP h5_obj, SEXP group_name) {
     hid_t group = H5Gcreate(HID(h5_obj), NM(group_name), H5P_DEFAULT, H5P_DEFAULT, H5P_DEFAULT);
     H5Gclose(group);
+    
     return ScalarInteger(0);
 }
 
@@ -179,7 +187,6 @@
     return s;
 }
 
-
 SEXP _h5R_read_vlen_str(SEXP h5_obj) {
     int __ERROR__ = 0;
     int i = -1;
@@ -270,22 +277,110 @@
     return(dta);
 }
 
-SEXP h5R_write_dataset(SEXP h5_obj, SEXP name, SEXP data, SEXP dims) {
-    Rprintf("length dims: %d\n", length(dims));
-    int i;
+hid_t _h5R_get_memtype(SEXP h5_type) {
+    hid_t memtype = -1;
+
+    switch (INTEGER(h5_type)[0]) {
+    case H5T_INTEGER: 
+	memtype = H5T_NATIVE_INT;
+	break;
+    case H5T_FLOAT:
+	memtype = H5T_NATIVE_DOUBLE;
+	break;
+    case H5T_STRING:
+	// clearly we'll wait on this.
+    default:
+	error("Unsupported class in %s.\n", __func__);
+    }
+    return memtype;
+}
+
+
+SEXP h5R_create_dataset(SEXP h5_obj, SEXP name, SEXP h5_type, SEXP dims, SEXP chunks) {
+    int i; 
+    
+    hsize_t* current_dims = (hsize_t*) Calloc(length(dims), hsize_t);
+    hsize_t* max_dims = (hsize_t*) Calloc(length(dims), hsize_t);
+    hsize_t* chunk_lens = (hsize_t*) Calloc(length(chunks), hsize_t);
+
     for (i = 0; i < length(dims); i++) {
-	Rprintf("length dim[%d]=%d\n", i, INTEGER(dims)[i]);
+	current_dims[i] = INTEGER(dims)[i];
+	max_dims[i]     = H5S_UNLIMITED;
+	chunk_lens[i]   = INTEGER(chunks)[i];
     }
+    
+    hid_t cparms = H5Pcreate(H5P_DATASET_CREATE);
+    H5Pset_chunk(cparms, length(chunks), chunk_lens);
+    
+    hid_t dataspace = H5Screate_simple(length(dims), current_dims, max_dims);
+    hid_t dataset   = H5Dcreate(HID(h5_obj), NM(name), _h5R_get_memtype(h5_type), 
+				dataspace, H5P_DEFAULT, cparms, H5P_DEFAULT);
+    
+    H5Pclose(cparms);
+    H5Dclose(dataset);
+    H5Sclose(dataspace);
+    Free(max_dims);
+    Free(current_dims);
+    Free(chunk_lens);
+  
+    return ScalarInteger(0);
+}
 
-    hsize_t* cdims = (hsize_t*) Calloc(length(dims), hsize_t);
-    for (i = 0; i < length(dims); i++)
-	cdims[i] = INTEGER(dims)[i];
+SEXP h5R_write_slab(SEXP h5_dataset, SEXP _offsets, SEXP _counts, SEXP data) {
+    int __ERROR__ = 0;
+    hid_t space = -1, memspace = -1, memtype = -1;
+    int i; 
+    void* _data; 
 
-    hid_t space = H5Screate_simple(length(dims), cdims, cdims);
-    hid_t ds = H5Dcreate(HID(h5_obj), NM(name), H5T_NATIVE_INT, space, H5P_DEFAULT, H5P_DEFAULT, H5P_DEFAULT);
-    H5Dwrite(ds, H5T_NATIVE_INT, H5S_ALL, H5S_ALL, H5P_DEFAULT, INTEGER(data));
+    int* offsets  = INTEGER(_offsets);
+    int* counts   = INTEGER(_counts);
 
-    return(ScalarInteger(0));
+    /** I'm surprised I have to do this, but it seems to be necessary. **/
+    hsize_t* _h_offsets = (hsize_t*) Calloc(length(_counts), hsize_t);
+    hsize_t* _h_counts  = (hsize_t*) Calloc(length(_counts), hsize_t);
+
+    for (i = 0; i < length(_counts); i++) {
+	_h_offsets[i] = offsets[i];
+	_h_counts[i]  = counts[i];
+    }
+
+    space = _h5R_get_space(h5_dataset);
+    H5Sselect_hyperslab(space, H5S_SELECT_SET, _h_offsets, NULL, _h_counts, NULL);
+    memspace = H5Screate_simple(length(_counts), _h_counts, NULL);
+
+    switch (INTEGER(h5R_get_type(h5_dataset))[0]) {
+    case H5T_INTEGER: 
+	memtype = H5T_NATIVE_INT;
+	_data = (void*) INTEGER(data);
+	break;
+    case H5T_FLOAT:
+	memtype = H5T_NATIVE_DOUBLE;
+	_data = (void*) REAL(data);
+	break;
+    case H5T_STRING:
+	memtype = H5Tcopy (H5T_C_S1);
+	H5Tset_size (memtype, H5T_VARIABLE);   
+	break;
+    default:
+	__ERROR__ = 1;
+    }
+    
+
+    if (__ERROR__ == 0) {
+	H5Dwrite(HID(h5_dataset), memtype, memspace, space, H5P_DEFAULT, _data);
+    }
+
+    /** clean up. **/
+    Free(_h_offsets);
+    Free(_h_counts);
+    H5Sclose(memspace);
+    H5Sclose(space);
+    
+    if (__ERROR__ == 1) {
+	error("Unsupported class in %s\n", __func__);
+    }
+
+    return R_NilValue;
 }
 
 
@@ -389,7 +484,6 @@
 
     return(r_lst);
 }
-
   
 SEXP h5R_read_attr(SEXP h5_attr) {
     SEXP dta = R_NilValue;
@@ -422,8 +516,6 @@
 /**
  * File content inspection and iteration.
  */
-
-/** Inspection **/
 SEXP h5R_attribute_exists(SEXP h5_obj, SEXP name) {
     return(ScalarInteger(H5Aexists(HID(h5_obj), NM(name))));
 }
@@ -432,13 +524,13 @@
     return(ScalarInteger(H5Lexists(HID(h5_obj), NM(name), H5P_DEFAULT)));
 }
 
+
 /** Iteration **/
 typedef struct __index_and_SEXP__ {
     int  i;
     SEXP s;
 } __index_and_SEXP__;
 
-
 herr_t _h5R_count_func(hid_t loc_id, const char *name, const H5O_info_t *info,
 		       void *operator_data) {
     int* counter = ((int *) operator_data);
@@ -513,7 +605,6 @@
 
 herr_t _h5R_name_exists(hid_t loc_id, const char *name, const H5O_info_t *info,
 			void *operator_data) {
-
     const char* probe = (const char*) operator_data;
 
     if (strcmp(name, probe) == 0) {

Modified: tests/testwrite.R
===================================================================
--- tests/testwrite.R	2011-04-19 15:50:17 UTC (rev 44)
+++ tests/testwrite.R	2011-04-21 02:06:27 UTC (rev 45)
@@ -1,26 +1,19 @@
 require(h5r)
 
+
 system("rm test.h5")
 h5 <- H5File("test.h5", 'w')
+
 listH5Contents(h5)
 
 g1 <- createH5Group( h5, "grp1" )
+d <- createH5Dataset(g1, "ds3", dType = "integer", dims = 100)
+writeH5Data(d, as.integer(1:10), 0, 10)
+writeH5Data(d, as.integer(1:10), 11, 4)
 
-createH5Dataset(g1, "ds1", a <- rep(1, 100))
-all(getH5Dataset(g1, "ds1")[] == a)
+d <- createH5Dataset(g1, "ds4", dType = "integer", dims = c(10, 2))
+writeH5Data(d, as.integer(1:10), c(0, 0), c(5, 2))
+writeH5Data(d, as.integer(1:10), c(0, 0), c(10, 2))
 
-createH5Dataset(g1, "ds2", a <- cbind(rep(1, 10), rep(2, 10)))
-all(getH5Dataset(g1, "ds2")[] == a)
-
-createH5Dataset( g1, "ds4", rbind(rep(1, 10), rep(2, 10)))
-getH5Dataset(g1, "ds4")[]
-
-ds = createH5Dataset( g1, "ds4", data = NULL, dim = NULL, chunks = NULL)
-ds[1:10, 1:10] <- dta
-ds[] 
-
-
-
-createH5Attribute( h5e, "attr1", 1:4 ) 
-
-
+d <- createH5Dataset(g1, "ds5", rbind(rnorm(10), rnorm(10)))
+d[]



More information about the H5r-commits mailing list