[H5r-commits] r30 - / R src tests
noreply at r-forge.r-project.org
noreply at r-forge.r-project.org
Fri Jun 4 19:58:24 CEST 2010
Author: extemporaneousb
Date: 2010-06-04 19:58:23 +0200 (Fri, 04 Jun 2010)
New Revision: 30
Modified:
DESCRIPTION
NAMESPACE
R/h5R.R
src/h5_wrap.c
tests/.Rhistory
tests/testall.R
Log:
Added mechanism to grab contiguous blocks, shockingly it doesn't speed things up as much as I thought.
Modified: DESCRIPTION
===================================================================
--- DESCRIPTION 2010-05-29 01:37:24 UTC (rev 29)
+++ DESCRIPTION 2010-06-04 17:58:23 UTC (rev 30)
@@ -5,7 +5,7 @@
Date: 2010-01-28
Author: James Bullard
Maintainer: <jbullard at pacificbiosciences.com>
-Depends: R (>= 2.10.0), methods, base
+Depends: R (>= 2.10.0), methods, base
Description: A package for interfacing to HDF5 files.
License: LGPL
Modified: NAMESPACE
===================================================================
--- NAMESPACE 2010-05-29 01:37:24 UTC (rev 29)
+++ NAMESPACE 2010-06-04 17:58:23 UTC (rev 30)
@@ -3,5 +3,21 @@
## Export everything lacking a leading .
exportPattern("^[^\\.]")
-## export the classes.
-exportClasses(H5Obj, H5File, H5Group, H5Dataset, H5Attribute, H5DataContainer)
+exportClasses(H5Obj,
+ H5File,
+ H5Group,
+ H5Dataset,
+ H5Attribute,
+ H5DataContainer)
+
+importMethodsFrom(methods, show)
+
+exportMethods(getH5Group,
+ getH5Dim,
+ getH5Type,
+ getH5Dataset,
+ getH5Attribute,
+ readDataAsVector,
+ show,
+ ncol,
+ nrow)
Modified: R/h5R.R
===================================================================
--- R/h5R.R 2010-05-29 01:37:24 UTC (rev 29)
+++ R/h5R.R 2010-06-04 17:58:23 UTC (rev 30)
@@ -18,6 +18,44 @@
setClass("H5Dataset", contains = "H5DataContainer")
setClass("H5Attribute", contains = "H5DataContainer")
+setClass("hSlab", representation = representation(s = "integer", w = "integer"))
+
+hSlab <- function(start, width = NA, end = NA) {
+ stopifnot(length(start) == length(width) || length(start) == length(end))
+
+ if (any(is.na(end)) & any(is.na(width))) {
+ stop("Must specify either end or width.")
+ } else {
+ if(any(is.na(end))) {
+ width <- width
+ }
+ else {
+ width <- (end - start + 1)
+ }
+ }
+ obj <- new("hSlab")
+ obj at s <- as.integer(start)
+ obj at w <- as.integer(width)
+ return(obj)
+}
+
+setMethod("length", "hSlab", function(x) {
+ length(x at s)
+})
+setGeneric("start", function(x, ...) {
+ standardGeneric("start")
+})
+setMethod("start", "hSlab", function(x) {
+ x at s
+})
+setGeneric("width", function(x, ...) {
+ standardGeneric("width")
+})
+setMethod("width", "hSlab", function(x) {
+ x at w
+})
+
+
H5File <- function(fileName) {
new("H5File", fileName)
}
@@ -298,16 +336,16 @@
## This function is written to leverage the possibility of fast contiguous
## range access. Here matrix is a two-column matrix with start, stop.
## Other options will be IRanges.
-## setMethod("[", c("H5Dataset", "matrix", "missing", "missing"), function(x, i) {
-## if (.inMemory(x))
-## stop("Not implemented for inMemory datasets.")
-
-## nr <- nrow(i)
-## if (! ((nr == 1 && is.null(dim(x))) || (nr == length(dim(x)))))
-## stop("Dimension mismatch: nrow(x) == length(dim(x))")
+setMethod("[", c("H5Dataset", "hSlab", "missing", "missing"), function(x, i) {
+ if (.inMemory(x))
+ stop("Not implemented for inMemory datasets.")
-## readSlab(x, i[,1], i[,2] - i[,1] + 1)
-## })
+ nr <- length(i)
+ if (! ((nr == 1 && is.null(dim(x))) || (nr == length(dim(x)))))
+ stop("Dimension mismatch: nrow(x) == length(dim(x))")
+
+ readSlab(x, start(i), width(i))
+})
##
## Note: the two reverses.
@@ -368,8 +406,12 @@
setMethod("dim", "H5DataContainer", function(x) if (length(x at dims) < 2) NULL else x at dims)
setMethod("length", "H5DataContainer", function(x) if (is.null(dim(x))) x at dims else prod(x at dims))
-setMethod("nrow", "H5DataContainer", function(x) x at dims[1])
-setMethod("ncol", "H5DataContainer", function(x) x at dims[2])
+setMethod("nrow", "H5DataContainer", function(x) {
+ x at dims[1]
+})
+setMethod("ncol", "H5DataContainer", function(x) {
+ x at dims[2]
+})
##
@@ -384,27 +426,41 @@
contents <- .listH5Contents(h5Obj)
lst <- lapply(contents, function(a) {
- h5Obj <- switch(as.character(a[[2]]), '0' = { getH5Group(h5Obj, a[[1]]) }, '1' = { getH5Dataset(h5Obj, a[[1]]) })
+ h5Obj <- switch(as.character(a[[2]]),
+ '0' = { getH5Group(h5Obj, a[[1]]) },
+ '1' = { getH5Dataset(h5Obj, a[[1]]) })
if (class(h5Obj) == "H5Dataset") {
dim <- getH5Dim(h5Obj)
} else {
dim <- NA
}
- list(name = a[[1]], type = a[[2]], attributes = .listH5Attributes(h5Obj), dim = dim)
+ list(name = a[[1]], type = a[[2]], attributes = .listH5Attributes(h5Obj),
+ dim = dim)
})
- class(lst) <- "H5ContentList"
names(lst) <- sapply(lst, "[[", 1)
-
return(lst)
}
-print.H5ContentList <- function(x, ...) {
- ## This is a pretty way to print the thing, but
- ## less so to compute on.
- d <- as.data.frame(do.call(rbind, x))[, -2]
- print(d)
+##
+## Does the name exist directly below the h5Obj.
+##
+## XXX: This function might be implemented in C more
+## efficiently, i.e., short-circuiting when the
+## object is found, but I have avoided using the
+## Lightweight interface.
+h5Exists <- function(h5Obj, name) {
+ a <- .listH5Contents(h5Obj)
+ n <- sapply(a, "[[", 1)
+ s <- sapply(strsplit(n, "/"), "[[", 1)
+ any(s == name)
+
+ ## This call determines if an object exists anywhere in
+ ## the file with 'name'
+ ##
+ ## return(.Call("h5R_name_exists", .ePtr(h5Obj), name))
}
+
Modified: src/h5_wrap.c
===================================================================
--- src/h5_wrap.c 2010-05-29 01:37:24 UTC (rev 29)
+++ src/h5_wrap.c 2010-06-04 17:58:23 UTC (rev 30)
@@ -354,7 +354,7 @@
typedef struct __index_and_SEXP__ {
int i;
SEXP s;
-} index_and_SEXP;
+} __index_and_SEXP__;
herr_t _h5R_count_func(hid_t loc_id, const char *name, const H5O_info_t *info,
@@ -367,7 +367,7 @@
herr_t _h5R_capture_name(hid_t loc_id, const char *name, const H5O_info_t *info,
void *operator_data) {
- index_and_SEXP* od = (index_and_SEXP*) operator_data;
+ __index_and_SEXP__* od = (__index_and_SEXP__*) operator_data;
SET_STRING_ELT(od->s, (od->i)++, mkChar(name));
return 0;
@@ -375,7 +375,7 @@
herr_t _h5R_capture_name_and_type(hid_t loc_id, const char *name, const H5O_info_t *info,
void *operator_data) {
- index_and_SEXP* od = (index_and_SEXP*) operator_data;
+ __index_and_SEXP__* od = (__index_and_SEXP__*) operator_data;
SEXP lst, str;
PROTECT(lst = allocVector(VECSXP, 2));
@@ -392,7 +392,7 @@
SEXP h5R_list_contents(SEXP h5_obj) {
int counter = 0;
- index_and_SEXP* isxp = (index_and_SEXP*) Calloc(1, index_and_SEXP);
+ __index_and_SEXP__* isxp = (__index_and_SEXP__*) Calloc(1, __index_and_SEXP__);
SEXP dta;
H5Ovisit (HID(h5_obj), H5_INDEX_NAME, H5_ITER_NATIVE, _h5R_count_func, (void*) &counter);
@@ -415,7 +415,7 @@
H5Aiterate(HID(h5_obj), H5_INDEX_NAME, H5_ITER_NATIVE, &n, (H5A_operator2_t) _h5R_count_func, (void*) &counter);
- index_and_SEXP* isxp = (index_and_SEXP*) Calloc(1, index_and_SEXP);
+ __index_and_SEXP__* isxp = (__index_and_SEXP__*) Calloc(1, __index_and_SEXP__);
PROTECT(dta = allocVector(STRSXP, counter));
isxp->s = dta;
isxp->i = 0;
@@ -428,3 +428,22 @@
return(dta);
}
+
+herr_t _h5R_name_exists(hid_t loc_id, const char *name, const H5O_info_t *info,
+ void *operator_data) {
+
+ const char* probe = (const char*) operator_data;
+
+ if (strcmp(name, probe) == 0) {
+ return 1; //short-circuit.
+ } else {
+ return 0; //continue
+ }
+}
+
+SEXP h5R_name_exists(SEXP h5_obj, SEXP _name) {
+ const char* name = NM(_name);
+ herr_t v = H5Ovisit (HID(h5_obj), H5_INDEX_NAME, H5_ITER_NATIVE, _h5R_name_exists,
+ (void*) name);
+ return ScalarLogical(v);
+}
Modified: tests/.Rhistory
===================================================================
--- tests/.Rhistory 2010-05-29 01:37:24 UTC (rev 29)
+++ tests/.Rhistory 2010-06-04 17:58:23 UTC (rev 30)
@@ -1,67 +1,4 @@
-ds3[1,1,,drop=TRUE]
-id3[1,1,,drop=TRUE]
-id3[1,1,,drop=TRUE]
-ds3[1,1,,drop=TRUE]
-ds3[1,1,,drop=TRUE]
-n
-extras
-start
-nArgs
-nargs()
-kall
-nArgs
-Q
-?nargs
-ds3
-ds3[]
-ds3[]
-ds3[]
-ds3[]
-TH(action="print")
-TH(action="print")
-source("/home/NANOFLUIDICS/jbullard/projects/software/R/common/h5r/tests/testall.R")
-source("/home/NANOFLUIDICS/jbullard/projects/software/R/common/h5r/tests/testall.R")
-source("/home/NANOFLUIDICS/jbullard/projects/software/R/common/h5r/tests/testall.R")
-ds9[]
-x = ds9[]
-ds9
-ds9[,,,,,]
-x =ds9[,,,,,]
-x =ds9[,,,,,,,,,]
-x =ds9[,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,]
-x =ds3[,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,]
-x
-ds3[,,]
-ds3[,,,,,]
-id3[,,,,,]
-id3[1,1,]
-ds9[1,1,]
-x = ds9[1,1,]
-x = ds9[1,,]
-x = ds9[,,]
-x = ds9[,,,1:2,]
-x = ds9[,,,1:2,]
-class(quote(x))
-as.character(quote(x))
-x = ds9[,,,1:2,]
-x = ds9[,,,1:2,]
-x = ds9[,,,1:2]
-x = ds9[,1:2]
-x
-x
-dim(x)
x = ds9[,1:2,,1]
-traceback()
-x = ds9[,1:2,,1]
-n
-names(kall)
-j
-i
-d
-kall
-kall
-Q
-x = ds9[,1:2,,1]
x
dim(ds9[,1:2,,1])
x = ds9[,1:2,,1]
@@ -148,3 +85,68 @@
listH5Contents(g)
length(listH5Contents(g))
TH(action='print')
+TH(action='print')
+g
+h5Exists(g, "ds_1")
+g
+h5Exists(g, "ds_2")
+h5Exists(g, "ds_1")
+g
+listH5Contents(g)
+h5Exists(g, "ds_1")
+h5Exists(g, "ds_1")
+h5Exists(g, "ds_1")
+h5Exists(g, "ds_1")
+h5Exists(g, "ds_1")
+h5Exists(g, "ds_323432")
+ls9)
+ls()
+f
+h5Exists(g, "group_1")
+listH5Contents(f)
+h5Exists(g, "ds_1")
+h5Exists(g, "group_1")
+h5r:::.listH5Contents
+h5r:::.listH5Contents(f)
+sapply(h5r:::.listH5Contents(f), "[[", 1)
+h5Exists(g, "group_1")
+h5Exists(g, "group_1")
+h5Exists(f, "group_1")
+h5Exists(f, "group_1")
+h5Exists(f, "group_1")
+h5Exists(f, "group_3")
+h5Exists(g, "group_3")
+h5Exists(g, "ds_1")
+h5Exists(g, "ds_sdf1")
+TH(action='print')
+ds7
+dim(ds7)
+c(c(1,1,1), dim(ds7))
+cbind(c(1,1,1), dim(ds7))
+hSlab(cbind(c(1,1,1), dim(ds7)))
+apply((cbind(c(1,1,1), dim(ds7))), runif)
+apply((cbind(c(1,1,1), dim(ds7))), 1, function(b) runif(1, b[1], b[2]))
+floor(apply((cbind(c(1,1,1), dim(ds7))), 1, function(b) runif(1, b[1], b[2])))
+floor(apply((cbind(c(1,1,1), dim(ds7))), 1, function(b) { a <- runif(1, b[1], b[2]); a + floor(runif(1, a, b[2])) })
+)
+floor(apply((cbind(c(1,1,1), dim(ds7))), 1, function(b) { a <- runif(1, b[1], b[2]); c(a, a + floor(runif(1, a, b[2]))) }))
+t(floor(apply((cbind(c(1,1,1), dim(ds7))), 1, function(b) { a <- runif(1, b[1], b[2]); c(a, a + floor(runif(1, a, b[2]))) })))
+m
+traceback()
+m
+m
+m
+m
+apply(m, 2, diff)
+TH(action='print')
+TH(action='print')
+m
+ds7[m[1,1]:m[2,1],
+ m[1,2]:m[2,2],
+ m[1,3]:m[2,3]]
+)
+)
+)
+TH(action='print')
+TH(action='print')
+TH(action='print')
Modified: tests/testall.R
===================================================================
--- tests/testall.R 2010-05-29 01:37:24 UTC (rev 29)
+++ tests/testall.R 2010-06-04 17:58:23 UTC (rev 30)
@@ -79,6 +79,10 @@
TH("ds_1 dim, 4", is.null(dim(ds1[, 1])))
TH("ds_1 dim, 5", assertError(ds1[,1:12]))
+## test existence.
+TH("existence, 1", h5Exists(g, "ds_1"))
+TH("existence, 2", h5Exists(g, "ds_232") == FALSE)
+
## string dataset
ds2M <- getH5Dataset(g, "ds_2", inMemory = T)
ds2 <- getH5Dataset(g, "ds_2", inMemory = F)
@@ -240,11 +244,36 @@
TH("test 0-vs-1 based", all(ds8[1,1:5] == 1:5))
-TH("matrix grab.",
- all(ds8[rbind(1:2,1:2)] == ds8[1:2, 1:2]) &
- all(ds8[] == ds8[ cbind(c(1,1), dim(ds8)) ]))
+TH("hSlab grab",
+ all(ds8[hSlab(c(1,1), end = c(2,2))] == ds8[1:2, 1:2]) &
+ all(ds8[] == ds8[ hSlab(c(1,1), end = dim(ds8)) ]))
+
+TH("normal time", {
+ all(replicate(10000, {
+ m <- apply(cbind(c(1,1,1), dim(ds7)), 1, function(b) {
+ a <- runif(1, b[1], b[2])
+ floor(c(a, runif(1, a, b[2])))
+ })
+ ds7[m[1,1]:m[2,1],
+ m[1,2]:m[2,2],
+ m[1,3]:m[2,3]]
+ return(TRUE)
+ }))
+})
+
+TH("hSlab time", {
+ all(replicate(10000, {
+ m <- apply(cbind(c(1,1,1), dim(ds7)), 1, function(b) {
+ a <- runif(1, b[1], b[2])
+ floor(c(a, runif(1, a, b[2])))
+ })
+ ds7[hSlab(m[1,], end = m[2,])]
+ return(TRUE)
+ }))
+})
+
TH(action = "print")
TH(action = "throw")
More information about the H5r-commits
mailing list