[H5r-commits] r30 - / R src tests

noreply at r-forge.r-project.org noreply at r-forge.r-project.org
Fri Jun 4 19:58:24 CEST 2010


Author: extemporaneousb
Date: 2010-06-04 19:58:23 +0200 (Fri, 04 Jun 2010)
New Revision: 30

Modified:
   DESCRIPTION
   NAMESPACE
   R/h5R.R
   src/h5_wrap.c
   tests/.Rhistory
   tests/testall.R
Log:
Added mechanism to grab contiguous blocks, shockingly it doesn't speed things up as much as I thought.

Modified: DESCRIPTION
===================================================================
--- DESCRIPTION	2010-05-29 01:37:24 UTC (rev 29)
+++ DESCRIPTION	2010-06-04 17:58:23 UTC (rev 30)
@@ -5,7 +5,7 @@
 Date: 2010-01-28
 Author: James Bullard
 Maintainer: <jbullard at pacificbiosciences.com>
-Depends: R (>= 2.10.0), methods, base	
+Depends: R (>= 2.10.0), methods, base
 Description: A package for interfacing to HDF5 files.
 License: LGPL
 

Modified: NAMESPACE
===================================================================
--- NAMESPACE	2010-05-29 01:37:24 UTC (rev 29)
+++ NAMESPACE	2010-06-04 17:58:23 UTC (rev 30)
@@ -3,5 +3,21 @@
 ## Export everything lacking a leading .
 exportPattern("^[^\\.]")
 
-## export the classes.
-exportClasses(H5Obj, H5File, H5Group, H5Dataset, H5Attribute, H5DataContainer)
+exportClasses(H5Obj,
+              H5File,
+              H5Group,
+              H5Dataset,
+              H5Attribute,
+              H5DataContainer)
+
+importMethodsFrom(methods, show)
+
+exportMethods(getH5Group,
+              getH5Dim,
+              getH5Type,
+              getH5Dataset,
+              getH5Attribute,
+              readDataAsVector,
+              show,
+              ncol,
+              nrow)

Modified: R/h5R.R
===================================================================
--- R/h5R.R	2010-05-29 01:37:24 UTC (rev 29)
+++ R/h5R.R	2010-06-04 17:58:23 UTC (rev 30)
@@ -18,6 +18,44 @@
 setClass("H5Dataset", contains = "H5DataContainer")
 setClass("H5Attribute", contains = "H5DataContainer")
 
+setClass("hSlab", representation = representation(s = "integer", w = "integer"))
+
+hSlab <- function(start, width = NA, end = NA) {
+  stopifnot(length(start) == length(width) || length(start) == length(end))
+
+  if (any(is.na(end)) & any(is.na(width))) {
+    stop("Must specify either end or width.")
+  } else {
+    if(any(is.na(end))) {
+      width <- width
+    }
+    else {
+      width <- (end - start + 1)
+    }
+  }
+  obj <- new("hSlab")
+  obj at s <- as.integer(start)
+  obj at w <- as.integer(width)
+  return(obj)
+}
+
+setMethod("length", "hSlab", function(x) {
+  length(x at s)
+})
+setGeneric("start", function(x, ...) {
+  standardGeneric("start")
+})
+setMethod("start", "hSlab", function(x) {
+  x at s
+})
+setGeneric("width", function(x, ...) {
+  standardGeneric("width")
+})
+setMethod("width", "hSlab", function(x) {
+  x at w
+})
+
+
 H5File <- function(fileName) {
   new("H5File", fileName)
 }
@@ -298,16 +336,16 @@
 ## This function is written to leverage the possibility of fast contiguous
 ## range access. Here matrix is a two-column matrix with start, stop.
 ## Other options will be IRanges.
-## setMethod("[", c("H5Dataset", "matrix", "missing", "missing"), function(x, i) {
-##   if (.inMemory(x))
-##     stop("Not implemented for inMemory datasets.")
-
-##   nr <- nrow(i)
-##   if (! ((nr == 1 && is.null(dim(x))) || (nr == length(dim(x)))))
-##     stop("Dimension mismatch: nrow(x) == length(dim(x))")
+setMethod("[", c("H5Dataset", "hSlab", "missing", "missing"), function(x, i) {
+  if (.inMemory(x))
+    stop("Not implemented for inMemory datasets.")
   
-##   readSlab(x, i[,1], i[,2] - i[,1] + 1)
-## })
+  nr <- length(i)
+  if (! ((nr == 1 && is.null(dim(x))) || (nr == length(dim(x)))))
+    stop("Dimension mismatch: nrow(x) == length(dim(x))")
+  
+  readSlab(x, start(i), width(i))
+})
 
 ##
 ## Note: the two reverses.
@@ -368,8 +406,12 @@
 
 setMethod("dim", "H5DataContainer", function(x) if (length(x at dims) < 2) NULL else x at dims)
 setMethod("length", "H5DataContainer", function(x) if (is.null(dim(x))) x at dims else prod(x at dims))
-setMethod("nrow", "H5DataContainer", function(x) x at dims[1])
-setMethod("ncol", "H5DataContainer", function(x) x at dims[2])
+setMethod("nrow", "H5DataContainer", function(x) {
+  x at dims[1]
+})
+setMethod("ncol", "H5DataContainer", function(x) {
+  x at dims[2]
+})
 
 
 ##
@@ -384,27 +426,41 @@
   contents <- .listH5Contents(h5Obj)
   
   lst <- lapply(contents, function(a) {
-    h5Obj <- switch(as.character(a[[2]]), '0' = { getH5Group(h5Obj, a[[1]]) }, '1' = { getH5Dataset(h5Obj, a[[1]]) })
+    h5Obj <- switch(as.character(a[[2]]),
+                    '0' = { getH5Group(h5Obj, a[[1]]) },
+                    '1' = { getH5Dataset(h5Obj, a[[1]]) })
 
     if (class(h5Obj) == "H5Dataset") {
       dim <- getH5Dim(h5Obj)
     } else {
       dim <- NA
     }
-    list(name = a[[1]], type = a[[2]], attributes = .listH5Attributes(h5Obj), dim = dim)
+    list(name = a[[1]], type = a[[2]], attributes = .listH5Attributes(h5Obj),
+         dim = dim)
   })
-  class(lst) <- "H5ContentList"
   names(lst) <- sapply(lst, "[[", 1)
-  
   return(lst)
 }
 
-print.H5ContentList <- function(x, ...) {
-  ## This is a pretty way to print the thing, but
-  ## less so to compute on.
-  d <- as.data.frame(do.call(rbind, x))[, -2]
-  print(d)
+##
+## Does the name exist directly below the h5Obj.
+##
+## XXX: This function might be implemented in C more
+##      efficiently, i.e., short-circuiting when the
+##      object is found, but I have avoided using the
+##      Lightweight interface.
+h5Exists <- function(h5Obj, name) {
+  a <- .listH5Contents(h5Obj)
+  n <- sapply(a, "[[", 1)
+  s <- sapply(strsplit(n, "/"), "[[", 1)
+  any(s == name)
+
+  ## This call determines if an object exists anywhere in
+  ## the file with 'name'
+  ##
+  ## return(.Call("h5R_name_exists", .ePtr(h5Obj), name))
 }
+
   
 
 

Modified: src/h5_wrap.c
===================================================================
--- src/h5_wrap.c	2010-05-29 01:37:24 UTC (rev 29)
+++ src/h5_wrap.c	2010-06-04 17:58:23 UTC (rev 30)
@@ -354,7 +354,7 @@
 typedef struct __index_and_SEXP__ {
     int  i;
     SEXP s;
-} index_and_SEXP;
+} __index_and_SEXP__;
 
 
 herr_t _h5R_count_func(hid_t loc_id, const char *name, const H5O_info_t *info,
@@ -367,7 +367,7 @@
 
 herr_t _h5R_capture_name(hid_t loc_id, const char *name, const H5O_info_t *info,
 			 void *operator_data) {
-    index_and_SEXP* od = (index_and_SEXP*) operator_data;
+    __index_and_SEXP__* od = (__index_and_SEXP__*) operator_data;
     SET_STRING_ELT(od->s, (od->i)++, mkChar(name));
     
     return 0;
@@ -375,7 +375,7 @@
 
 herr_t _h5R_capture_name_and_type(hid_t loc_id, const char *name, const H5O_info_t *info,
 				  void *operator_data) {
-    index_and_SEXP* od = (index_and_SEXP*) operator_data;
+    __index_and_SEXP__* od = (__index_and_SEXP__*) operator_data;
     SEXP lst, str;
 
     PROTECT(lst = allocVector(VECSXP, 2));
@@ -392,7 +392,7 @@
 
 SEXP h5R_list_contents(SEXP h5_obj) {
     int counter = 0; 
-    index_and_SEXP* isxp = (index_and_SEXP*) Calloc(1, index_and_SEXP);
+    __index_and_SEXP__* isxp = (__index_and_SEXP__*) Calloc(1, __index_and_SEXP__);
     SEXP dta;
 
     H5Ovisit (HID(h5_obj), H5_INDEX_NAME, H5_ITER_NATIVE, _h5R_count_func, (void*) &counter);
@@ -415,7 +415,7 @@
 
     H5Aiterate(HID(h5_obj), H5_INDEX_NAME, H5_ITER_NATIVE, &n, (H5A_operator2_t) _h5R_count_func, (void*) &counter);
     
-    index_and_SEXP* isxp = (index_and_SEXP*) Calloc(1, index_and_SEXP);
+    __index_and_SEXP__* isxp = (__index_and_SEXP__*) Calloc(1, __index_and_SEXP__);
     PROTECT(dta = allocVector(STRSXP, counter));
     isxp->s = dta;
     isxp->i = 0;
@@ -428,3 +428,22 @@
 
     return(dta);
 }
+
+herr_t _h5R_name_exists(hid_t loc_id, const char *name, const H5O_info_t *info,
+			void *operator_data) {
+
+    const char* probe = (const char*) operator_data;
+
+    if (strcmp(name, probe) == 0) {
+	return 1; //short-circuit.
+    } else {
+	return 0; //continue
+    }
+}
+
+SEXP h5R_name_exists(SEXP h5_obj, SEXP _name) {
+    const char* name = NM(_name);
+    herr_t v = H5Ovisit (HID(h5_obj), H5_INDEX_NAME, H5_ITER_NATIVE, _h5R_name_exists, 
+			 (void*) name);
+    return ScalarLogical(v); 
+}

Modified: tests/.Rhistory
===================================================================
--- tests/.Rhistory	2010-05-29 01:37:24 UTC (rev 29)
+++ tests/.Rhistory	2010-06-04 17:58:23 UTC (rev 30)
@@ -1,67 +1,4 @@
-ds3[1,1,,drop=TRUE]
-id3[1,1,,drop=TRUE]
-id3[1,1,,drop=TRUE]
-ds3[1,1,,drop=TRUE]
-ds3[1,1,,drop=TRUE]
-n
-extras
-start
-nArgs
-nargs()
-kall
-nArgs
-Q
-?nargs
-ds3
-ds3[]
-ds3[]
-ds3[]
-ds3[]
-TH(action="print")
-TH(action="print")
-source("/home/NANOFLUIDICS/jbullard/projects/software/R/common/h5r/tests/testall.R")
-source("/home/NANOFLUIDICS/jbullard/projects/software/R/common/h5r/tests/testall.R")
-source("/home/NANOFLUIDICS/jbullard/projects/software/R/common/h5r/tests/testall.R")
-ds9[]
-x = ds9[]
-ds9
-ds9[,,,,,]
-x =ds9[,,,,,]
-x =ds9[,,,,,,,,,]
-x =ds9[,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,]
-x =ds3[,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,]
-x
-ds3[,,]
-ds3[,,,,,]
-id3[,,,,,]
-id3[1,1,]
-ds9[1,1,]
-x = ds9[1,1,]
-x = ds9[1,,]
-x = ds9[,,]
-x = ds9[,,,1:2,]
-x = ds9[,,,1:2,]
-class(quote(x))
-as.character(quote(x))
-x = ds9[,,,1:2,]
-x = ds9[,,,1:2,]
-x = ds9[,,,1:2]
-x = ds9[,1:2]
-x
-x
-dim(x)
 x = ds9[,1:2,,1]
-traceback()
-x = ds9[,1:2,,1]
-n
-names(kall)
-j
-i
-d
-kall
-kall
-Q
-x = ds9[,1:2,,1]
 x
 dim(ds9[,1:2,,1])
 x = ds9[,1:2,,1]
@@ -148,3 +85,68 @@
 listH5Contents(g)
 length(listH5Contents(g))
 TH(action='print')
+TH(action='print')
+g
+h5Exists(g, "ds_1")
+g
+h5Exists(g, "ds_2")
+h5Exists(g, "ds_1")
+g
+listH5Contents(g)
+h5Exists(g, "ds_1")
+h5Exists(g, "ds_1")
+h5Exists(g, "ds_1")
+h5Exists(g, "ds_1")
+h5Exists(g, "ds_1")
+h5Exists(g, "ds_323432")
+ls9)
+ls()
+f
+h5Exists(g, "group_1")
+listH5Contents(f)
+h5Exists(g, "ds_1")
+h5Exists(g, "group_1")
+h5r:::.listH5Contents
+h5r:::.listH5Contents(f)
+sapply(h5r:::.listH5Contents(f), "[[", 1)
+h5Exists(g, "group_1")
+h5Exists(g, "group_1")
+h5Exists(f, "group_1")
+h5Exists(f, "group_1")
+h5Exists(f, "group_1")
+h5Exists(f, "group_3")
+h5Exists(g, "group_3")
+h5Exists(g, "ds_1")
+h5Exists(g, "ds_sdf1")
+TH(action='print')
+ds7
+dim(ds7)
+c(c(1,1,1), dim(ds7))
+cbind(c(1,1,1), dim(ds7))
+hSlab(cbind(c(1,1,1), dim(ds7)))
+apply((cbind(c(1,1,1), dim(ds7))), runif)
+apply((cbind(c(1,1,1), dim(ds7))), 1, function(b) runif(1, b[1], b[2]))
+floor(apply((cbind(c(1,1,1), dim(ds7))), 1, function(b) runif(1, b[1], b[2])))
+floor(apply((cbind(c(1,1,1), dim(ds7))), 1, function(b) { a <- runif(1, b[1], b[2]); a + floor(runif(1, a, b[2])) })
+)
+floor(apply((cbind(c(1,1,1), dim(ds7))), 1, function(b) { a <- runif(1, b[1], b[2]); c(a, a + floor(runif(1, a, b[2]))) }))
+t(floor(apply((cbind(c(1,1,1), dim(ds7))), 1, function(b) { a <- runif(1, b[1], b[2]); c(a, a + floor(runif(1, a, b[2]))) })))
+m
+traceback()
+m
+m
+m
+m
+apply(m, 2, diff)
+TH(action='print')
+TH(action='print')
+m
+ds7[m[1,1]:m[2,1],
+                m[1,2]:m[2,2],
+                m[1,3]:m[2,3]]
+)
+)
+)
+TH(action='print')
+TH(action='print')
+TH(action='print')

Modified: tests/testall.R
===================================================================
--- tests/testall.R	2010-05-29 01:37:24 UTC (rev 29)
+++ tests/testall.R	2010-06-04 17:58:23 UTC (rev 30)
@@ -79,6 +79,10 @@
 TH("ds_1 dim, 4", is.null(dim(ds1[, 1])))
 TH("ds_1 dim, 5", assertError(ds1[,1:12]))
 
+## test existence.
+TH("existence, 1", h5Exists(g, "ds_1"))
+TH("existence, 2", h5Exists(g, "ds_232") == FALSE)
+
 ## string dataset
 ds2M <- getH5Dataset(g, "ds_2", inMemory = T)
 ds2 <- getH5Dataset(g, "ds_2", inMemory = F)
@@ -240,11 +244,36 @@
 TH("test 0-vs-1 based", all(ds8[1,1:5] == 1:5))
 
 
-TH("matrix grab.",
-   all(ds8[rbind(1:2,1:2)] == ds8[1:2, 1:2]) &
-   all(ds8[] == ds8[ cbind(c(1,1), dim(ds8)) ]))
+TH("hSlab grab",
+   all(ds8[hSlab(c(1,1), end = c(2,2))] == ds8[1:2, 1:2]) &
+   all(ds8[] == ds8[ hSlab(c(1,1), end = dim(ds8)) ]))
+
+TH("normal time", {
+  all(replicate(10000, {
+    m <- apply(cbind(c(1,1,1), dim(ds7)), 1, function(b) {
+      a <- runif(1, b[1], b[2])
+      floor(c(a, runif(1, a, b[2])))
+    })
+    ds7[m[1,1]:m[2,1],
+        m[1,2]:m[2,2],
+        m[1,3]:m[2,3]]
+    return(TRUE)
+  }))
+})
+
+TH("hSlab time", {
+  all(replicate(10000, {
+    m <- apply(cbind(c(1,1,1), dim(ds7)), 1, function(b) {
+      a <- runif(1, b[1], b[2])
+      floor(c(a, runif(1, a, b[2])))
+    })
+    ds7[hSlab(m[1,], end = m[2,])]
+    return(TRUE)
+  }))
+})
    
 
+
 TH(action = "print")
 TH(action = "throw")
 



More information about the H5r-commits mailing list