[H5r-commits] r88 - / R inst inst/h5_files src tests

noreply at r-forge.r-project.org noreply at r-forge.r-project.org
Wed Oct 3 20:53:44 CEST 2012


Author: extemporaneousb
Date: 2012-10-03 20:53:44 +0200 (Wed, 03 Oct 2012)
New Revision: 88

Added:
   inst/h5_files/compound.h5
   tests/testCompound.R
Modified:
   DESCRIPTION
   NEWS
   R/h5R.R
   inst/CRAN_prepare_h5r.sh
   inst/h5_files/ex_1.h5
   inst/h5_files/makeH5.py
   src/h5_wrap.c
Log:

Initial support for compound data types. This support is limited in some sense, first no writing support is available and some of the data type support is not very extensive. 



Modified: DESCRIPTION
===================================================================
--- DESCRIPTION	2012-08-08 20:10:34 UTC (rev 87)
+++ DESCRIPTION	2012-10-03 18:53:44 UTC (rev 88)
@@ -1,7 +1,7 @@
 Package: h5r
 Type: Package
 Title: Interface to HDF5 Files
-Version: 1.4.1
+Version: 1.4.4
 Date: 2011-09-19
 Author: James Bullard
 Maintainer: <jbullard at pacificbiosciences.com>

Modified: NEWS
===================================================================
--- NEWS	2012-08-08 20:10:34 UTC (rev 87)
+++ NEWS	2012-10-03 18:53:44 UTC (rev 88)
@@ -4,7 +4,17 @@
         *                                                *
         **************************************************
 
+                CHANGES IN h5r VERSION 1.4.4
 
+NEW FEATURES
+
+    o Added basic support for reading compound data sets. This support
+    works reasonably well for the examples that I have tried, one fear
+    is that memory is not being properly managed with compound data
+    access. Additionally, access to named vectors does not yet exist.
+
+
+
                 CHANGES IN h5r VERSION 1.4
 
 NEW FEATURES

Modified: R/h5R.R
===================================================================
--- R/h5R.R	2012-08-08 20:10:34 UTC (rev 87)
+++ R/h5R.R	2012-10-03 18:53:44 UTC (rev 88)
@@ -506,7 +506,9 @@
   if (! all((offsets + dims - 1) <= dim(h5Dataset)))
     stop("error invalid slice specification in readSlab.")
   d <- .myCall("h5R_read_dataset", .ePtr(h5Dataset), as.integer(offsets - 1), as.integer(dims))
-  dim(d) <- rev(dims)
+  
+  if (class(d) != 'list') ## compound datasets produce lists.
+    dim(d) <- rev(dims)
 
   if (! is.null(dim(h5Dataset))) aperm(d) else d
 }

Modified: inst/CRAN_prepare_h5r.sh
===================================================================
--- inst/CRAN_prepare_h5r.sh	2012-08-08 20:10:34 UTC (rev 87)
+++ inst/CRAN_prepare_h5r.sh	2012-10-03 18:53:44 UTC (rev 88)
@@ -23,5 +23,8 @@
 # 4. build
 R CMD build $TARGET
 
+BUILT_TARGET=h5r_`cat $TARGET/DESCRIPTION | grep Version | sed 's/Version: //g'`.tar.gz
+
 # 5. check
-R CMD check --as-cran --timings --install-args="--no-lock --preclean" `ls h5r_*tar.gz`
+R CMD check --as-cran --timings --install-args="--no-lock --preclean" $BUILT_TARGET
+

Added: inst/h5_files/compound.h5
===================================================================
(Binary files differ)


Property changes on: inst/h5_files/compound.h5
___________________________________________________________________
Added: svn:mime-type
   + application/octet-stream

Modified: inst/h5_files/ex_1.h5
===================================================================
(Binary files differ)

Modified: inst/h5_files/makeH5.py
===================================================================
--- inst/h5_files/makeH5.py	2012-08-08 20:10:34 UTC (rev 87)
+++ inst/h5_files/makeH5.py	2012-10-03 18:53:44 UTC (rev 88)
@@ -59,3 +59,45 @@
 ## shrink the file for submission.
 os.system("h5repack -v -f GZIP=7 %s %s-repacked" % (FILE, FILE))
 os.system("mv %s-repacked %s" % (FILE, FILE))
+
+##
+## now make the compound file.
+##
+FILE = 'compound.h5' 
+
+if (os.path.exists(FILE)):
+    os.remove(FILE)
+
+f = h5py.File(FILE)
+
+## no strings
+r = recarray(shape = (100,), dtype = [('a',int), ('b', float)])
+for a in xrange(0, r.shape[0]):
+    r.a[a] = random.randint(10000)
+    r.b[a] = random.rand(1)[0]
+f.create_dataset('NS', data = r)
+
+## fixed len
+r = recarray(shape = (100,), dtype = [('a',int), ('b', float), ('c', 'S10')])
+for a in xrange(0, r.shape[0]):
+    r.a[a] = random.randint(10000)
+    r.b[a] = random.rand(1)[0]
+    sq = "".join([['a','c','g','t'][random.randint(4)] for x in 
+                  xrange(0, random.randint(9))]) + "|"
+    r.c[a] = sq
+f.create_dataset("S10", data = r)
+
+## vlen
+r = recarray(shape = (100,), dtype = [('a',int), ('b', float), ('c', h5py.new_vlen(str))])
+for a in xrange(0, r.shape[0]):
+    r.a[a] = random.randint(10000)
+    r.b[a] = random.rand(1)[0]
+    sq = "".join([['a','c','g','t'][random.randint(4)] for x in 
+                  xrange(0, random.randint(9))]) + "|"
+    r.c[a] = sq
+f.create_dataset("SVLEN", data = r)
+
+## close it up and shrink it. 
+f.close()
+os.system("h5repack -v -f GZIP=7 %s %s-repacked" % (FILE, FILE))
+os.system("mv %s-repacked %s" % (FILE, FILE))

Modified: src/h5_wrap.c
===================================================================
--- src/h5_wrap.c	2012-08-08 20:10:34 UTC (rev 87)
+++ src/h5_wrap.c	2012-10-03 18:53:44 UTC (rev 88)
@@ -252,7 +252,8 @@
   UNPROTECT(1); 
 
   if (_h5R_is_vlen(h5_obj)) {
-    // It doesn't like the H5S_ALL space in the vlen_reclaim.
+    // It doesn't like the H5S_ALL space in the vlen_reclaim which can
+    // be passed in.
     if (memspace == H5S_ALL) {
       hid_t space = _h5R_get_space(h5_obj);
       H5Dvlen_reclaim (memtype, space, H5P_DEFAULT, buf);
@@ -268,8 +269,109 @@
   return dta;
 }
 
+/** XXX: This is probably leaky **/
 SEXP _h5R_read_compound(SEXP h5_obj, int nelts, hid_t memspace, hid_t filespace) {
-  return R_NilValue;
+  hid_t hdty   = _h5R_get_type(h5_obj);
+  int nmembers = H5Tget_nmembers(hdty);
+  int c_size   = 0;
+  SEXP res     = R_NilValue;
+  hid_t memtype, strtype; 
+  int has_str  = -1;
+
+  for (int i = 0; i < nmembers; i++) 
+    c_size += (H5Tget_size(H5Tget_member_type(hdty, i)));
+
+  memtype = H5Tcreate (H5T_COMPOUND, c_size);
+  PROTECT(res = allocVector(VECSXP, nmembers));
+
+  /** construct the memtype so you can read in the data. **/
+  c_size = 0;
+  for (int i = 0; i < nmembers; i++) {
+    switch (H5Tget_member_class(hdty, i)) {
+    case H5T_INTEGER: 
+      H5Tinsert (memtype, H5Tget_member_name(hdty, i), c_size, H5T_NATIVE_INT);
+      SET_VECTOR_ELT(res, i, allocVector(INTSXP, nelts));
+      break;
+    case H5T_FLOAT:
+      H5Tinsert (memtype, H5Tget_member_name(hdty, i), c_size, H5T_NATIVE_DOUBLE);
+      SET_VECTOR_ELT(res, i, allocVector(REALSXP, nelts));
+      break;
+    case H5T_STRING:
+      strtype = H5Tcopy (H5T_C_S1);
+      has_str = 1;
+      if (H5Tis_variable_str(H5Tget_member_type(hdty, i))) {
+	H5Tset_size(strtype, H5T_VARIABLE);
+	H5Tinsert(memtype, H5Tget_member_name(hdty, i), c_size, strtype);
+      } else {
+	H5Tset_size(strtype, H5Tget_size(H5Tget_member_type(hdty, i)));
+	H5Tinsert(memtype, H5Tget_member_name(hdty, i), c_size, strtype);
+      }
+      SET_VECTOR_ELT(res, i, allocVector(STRSXP, nelts));
+      break;
+    default:
+      error("Unsupported class %d in %s\n", H5Tget_member_class(hdty, i), __func__);
+      break;
+    }
+    c_size += (H5Tget_size(H5Tget_member_type(hdty, i)));
+  }
+
+  /** read **/
+  void* buf = malloc(c_size*nelts);
+  H5Dread (HID(h5_obj), memtype, memspace, filespace, H5P_DEFAULT, buf);
+  
+  /** put the results into a list. **/
+  for (int i = 0; i < nelts; i++) {
+    int current_offset = 0;
+    for (int j = 0; j < nmembers; j++) {
+      switch (H5Tget_member_class(hdty, j)) {
+      case H5T_INTEGER: 
+	INTEGER(VECTOR_ELT(res, j))[i] = ((int*) (buf + i*c_size + current_offset))[0];
+	break;
+      case H5T_FLOAT:
+	REAL(VECTOR_ELT(res, j))[i] = ((double*) (buf + i*c_size + current_offset))[0];
+	break;
+      case H5T_STRING:
+	if (H5Tis_variable_str(H5Tget_member_type(hdty, j))) {
+	  SET_STRING_ELT(VECTOR_ELT(res, j), i, mkChar(((char**) (buf + i*c_size + current_offset))[0]));
+	} else {
+	  SET_STRING_ELT(VECTOR_ELT(res, j), i, mkChar(((char*) (buf + i*c_size + current_offset))));
+	}
+	break;
+      default:
+	error("Unsupported class in %s\n", __func__);
+	break;
+      }
+      current_offset += H5Tget_size(H5Tget_member_type(hdty, j));
+    }
+  }
+
+  /** names **/
+  SEXP lst_names;
+
+  PROTECT(lst_names = allocVector(VECSXP, nmembers));
+  for (int j = 0; j < nmembers; j++) {
+    SET_VECTOR_ELT(lst_names, j, mkChar(H5Tget_member_name(hdty, j)));
+  }
+  setAttrib(res, R_NamesSymbol, lst_names);
+  
+  if (has_str > 0) { 
+    /** cleanup **/
+    if (memspace == H5S_ALL) {
+      hid_t space = _h5R_get_space(h5_obj);
+      H5Dvlen_reclaim (memtype, space, H5P_DEFAULT, buf);
+      H5Sclose(space);
+    } else {
+      H5Dvlen_reclaim (memtype, memspace, H5P_DEFAULT, buf);
+    }
+    H5Tclose(strtype);
+  }
+  free(buf);
+  H5Tclose(memtype);
+  H5Tclose(hdty);
+
+  UNPROTECT(2);
+ 
+  return res;
 }
 
 SEXP h5R_read_dataset_all(SEXP h5_dataset) {

Added: tests/testCompound.R
===================================================================
--- tests/testCompound.R	                        (rev 0)
+++ tests/testCompound.R	2012-10-03 18:53:44 UTC (rev 88)
@@ -0,0 +1,24 @@
+require(h5r)
+
+source("tinyTestHarness.R")
+
+##
+## Make a new TestHarness.
+##
+TH <- TestHarness()
+
+file <- system.file("h5_files", "compound.h5", package = 'h5r')
+f <- H5File(file)
+
+dsets <- c("NS", "S10", "SVLEN") 
+
+for (dset in dsets) {
+  d <- getH5Dataset(f, dset)
+  TH(paste(dset, "read", sep = "-"), !is.null(d[]))
+  TH(paste(dset, "data.frame", sep = "-"),
+     nrow(as.data.frame(d[1:10])) == 10)
+  TH(paste(dset, "data.frame 2", sep = "-"),
+     all(as.data.frame(d[1:10]) == as.data.frame(d[10:1])[10:1,]))
+}
+
+



More information about the H5r-commits mailing list