[H5r-commits] r12 - R inst inst/h5_files src
noreply at r-forge.r-project.org
noreply at r-forge.r-project.org
Thu Apr 22 03:28:39 CEST 2010
Author: extemporaneousb
Date: 2010-04-22 03:28:39 +0200 (Thu, 22 Apr 2010)
New Revision: 12
Added:
inst/testSlab.R
Modified:
R/h5R.R
inst/.Rhistory
inst/h5_files/ex_1.h5
inst/h5_files/makeH5.py
inst/tests.R
src/h5_wrap.c
Log:
Large commit which gets support for hyperslab selection in. This selection is limited to contiguous regions for now (and maybe forever).
Modified: R/h5R.R
===================================================================
--- R/h5R.R 2010-04-21 01:36:54 UTC (rev 11)
+++ R/h5R.R 2010-04-22 01:28:39 UTC (rev 12)
@@ -11,9 +11,11 @@
setClass("H5File", contains = "H5Obj", representation(fileName = "character"))
setClass("H5Group", contains = "H5Obj", representation(name = "character"))
+
+setClassUnion("envOrNULL", c("environment", "NULL"))
setClass("H5DataContainer", contains = "H5Obj",
representation(name = "character", dims = "integer",
- h5Type = "integer", .data = "environment"))
+ h5Type = "integer", .data = "envOrNULL"))
setClass("H5Dataset", contains = "H5DataContainer")
setClass("H5Attribute", contains = "H5DataContainer")
@@ -48,6 +50,10 @@
get(".data", h5DataContainer at .data)
}
+.inMemory <- function(h5Dataset) {
+ return(! is.null(h5Dataset at .data))
+}
+
setGeneric("getH5Group", function(h5Obj, groupName, ...) {
standardGeneric("getH5Group")
})
@@ -86,13 +92,13 @@
## call this on *class* instanteation time.
if (missing(fileName))
return(.Object)
-
+
.Object at ePtr <- .Call("h5R_open", fileName, package = "h5R")
.Object at fileName <- fileName
return(.Object)
})
-.initH5DataContainer <- function(o, name) {
+.initH5DataContainer <- function(o, name, inMemory = TRUE) {
o at name <- name
o at h5Type <- getH5Type(o)
@@ -108,19 +114,22 @@
else
o at dims <- d ## I store the length of the vector here, note that in
## dim I take this into account.
-
+
## This caches the data. At some point, we'll want to
## move away from this and just grab things from disk
## and provide a mechanism to cache.
- o at .data <- new.env(parent = emptyenv(), hash = TRUE)
-
+ if (! inMemory) {
+ o at .data <- NULL
+ } else {
+ o at .data <- new.env(parent = emptyenv(), hash = TRUE)
+ }
return(o)
}
-setMethod("getH5Dataset", c("H5Obj", "character"), function(h5Obj, datasetName) {
+setMethod("getH5Dataset", c("H5Obj", "character"), function(h5Obj, datasetName, inMemory = FALSE) {
o <- new("H5Dataset")
o at ePtr <- .Call("h5R_get_dataset", .ePtr(h5Obj), datasetName, PACKAGE = 'h5r')
- return(.initH5DataContainer(o, datasetName))
+ return(.initH5DataContainer(o, datasetName, inMemory))
})
setMethod("getH5Attribute", c("H5Obj", "character"), function(h5Obj, attrName) {
@@ -129,7 +138,8 @@
return(.initH5DataContainer(o, attrName))
})
-setMethod("[", "H5DataContainer", function(x, i, j, ..., drop = FALSE) {
+
+.internalSlice <- function(x, i, j, ..., drop = TRUE) {
if (!.hasData(x)) {
.putData(x, .loadDataset(x))
}
@@ -141,10 +151,81 @@
d[i]
}
else {
- drop(d[i, j, ..., drop = drop])
+ d[i, j, ..., drop = drop]
}
+}
+
+setMethod("[", "H5DataContainer", .internalSlice)
+setMethod("[", "H5Dataset", function(x, i, j, ..., drop = TRUE) {
+ if (.inMemory(x)) {
+ callNextMethod()
+ }
+
+ ##
+ ## Currently, This supports only a limited range of slicing.
+ ## contiguous chunks
+ ##
+ else {
+ if (is.null(dim(x))) {
+ if (! missing(j))
+ stop("incorrect number of dimensions")
+ if (! missing(i))
+ dta <- readSlab(x, min(i), max(i) - min(i) + 1)
+ else
+ dta <- readSlab(x, 1, length(x))
+ }
+ else {
+ ## need to specify the dim(x) offset, dim.
+ sel <- matrix(NA, nrow = length(dim(x)), ncol = 2)
+ if (! missing(i))
+ sel[1, ] <- range(i)
+ else
+ sel[1, ] <- c(1, dim(x)[1])
+
+ if (! missing(j))
+ sel[2, ] <- range(j)
+ else
+ sel[2, ] <- c(1, dim(x)[2])
+
+ ##
+ ## Not quite sure why this results in a strange state
+ ## of both missing and !missing(.)
+ ##
+ l <- tryCatch(list(...), simpleError = function(e) {
+ return(list())
+ })
+ if (nrow(sel) > 2) {
+ for (k in 3:nrow(sel)) {
+ if (length(l) >= k)
+ sel[k, ] <- range(l[[k]])
+ else
+ sel[k, ] <- c(1, dim(x)[k])
+ }
+ }
+
+ ext <- sel[,2] - sel[,1] + 1
+
+ if (nrow(sel) == 2) {
+ dta <- readSlab(x, sel[,1], ext)
+ dta <- matrix(dta, nrow = ext[1], ncol = ext[2], byrow = TRUE)
+ dta <- if (drop) drop(dta) else dta
+ } else {
+ dta <- readSlab(x, rev(sel[,1]), rev(ext))
+ dim(dta) <- rev(ext)
+ }
+ }
+ return(dta)
+ }
})
+
+readSlab <- function(h5Dataset, offsets, dims) {
+### stopifnot(length(offsets) == length(dims))
+### stopifnot(all((offsets + dims - 1) <=
+### (if (is.null(dim(h5Dataset))) length(h5Dataset) else dim(h5Dataset))))
+ .Call("h5R_read_slab", .ePtr(h5Dataset), as.integer(offsets - 1), as.integer(dims))
+}
+
setGeneric("readDataAsVector", function(h5Obj, ...) {
standardGeneric("readDataAsVector")
})
Modified: inst/.Rhistory
===================================================================
--- inst/.Rhistory 2010-04-21 01:36:54 UTC (rev 11)
+++ inst/.Rhistory 2010-04-22 01:28:39 UTC (rev 12)
@@ -1,150 +1,150 @@
-d = .Call("h5R_get_dataset", g at ePtr, "ds_2")
-.Call("__h5R_get_dataset_str", d)
-d = .Call("h5R_get_dataset", g at ePtr, "ds_2")
-.Call("__h5R_get_dataset_str", d)
-d = .Call("h5R_get_dataset", g at ePtr, "ds_2")
-.Call("__h5R_get_dataset_str", d)
-d = .Call("h5R_get_dataset", g at ePtr, "ds_2")
-.Call("__h5R_get_dataset_str", d)
-d = .Call("h5R_get_dataset", g at ePtr, "ds_2")
-.Call("__h5R_get_dataset_str", d)
-d = .Call("h5R_get_dataset", g at ePtr, "ds_2")
-.Call("__h5R_get_dataset_str", d)
-d = .Call("h5R_get_dataset", g at ePtr, "ds_2")
-.Call("__h5R_get_dataset_str", d)
-d = .Call("h5R_get_dataset", g at ePtr, "ds_2")
-.Call("__h5R_get_dataset_str", d)
-d = .Call("h5R_get_dataset", g at ePtr, "ds_2")
-.Call("__h5R_get_dataset_str", d)
-d = .Call("h5R_get_dataset", g at ePtr, "ds_2")
-.Call("__h5R_get_dataset_str", d)
-d = .Call("h5R_get_dataset", g at ePtr, "ds_2")
-.Call("__h5R_get_dataset_str", d)
-.Call("__h5R_get_dataset_str", d)
-.Call("__h5R_get_dataset_str", d)
-.Call("__h5R_get_dataset_str", d)
-.Call("__h5R_get_dataset_str", d)
-.Call("__h5R_get_dataset_str", d)
-d = .Call("h5R_get_dataset", g at ePtr, "ds_2")
-.Call("__h5R_get_dataset_str", d)
-.Call("__h5R_get_dataset_str", d)
-.Call("__h5R_get_dataset_str", d)
-.Call("__h5R_get_dataset_str", d)
-.Call("__h5R_get_dataset_str", d)
-.Call("__h5R_get_dataset_str", d)
-.Call("__h5R_get_dataset_str", d)
-.Call("__h5R_get_dataset_str", d)
-.Call("__h5R_get_dataset_str", d)
-d = .Call("h5R_get_dataset", g at ePtr, "ds_2")
-.Call("__h5R_get_dataset_str", d)
-.Call("__h5R_get_dataset_str", d)
-.Call("__h5R_get_dataset_str", d)
-.Call("__h5R_get_dataset_str", d)
-.Call("__h5R_get_dataset_str", d)
-replicate(100, .Call("__h5R_get_dataset_str", d))
+diLong
+readSlab(diLong, 1, 100)
+diLong[1:100]
+seq.int(10000, 10)
gc()
-x = replicate(10000, .Call("__h5R_get_dataset_str", d))
gc()
-d = .Call("h5R_get_dataset", g at ePtr, "ds_2")
-.Call("_h5R_get_dataset_str", d)
-d = .Call("h5R_get_dataset", g at ePtr, "ds_2")
-.Call("_h5R_get_dataset_str", d)
-d = .Call("h5R_get_dataset", g at ePtr, "ds_2")
-.Call("_h5R_get_dataset_str", d)
-1
-d = .Call("h5R_get_dataset", g at ePtr, "ds_2")
-.Call("_h5R_get_dataset_str", d)
-.Call("_h5R_read_dataset_str", d)
-.Call("_h5R_read_vlen_str", d)
-.Call("_h5R_read_vlen_str", d)
-.Call("_h5R_read_vlen_str", d)
-.Call("_h5R_read_vlen_str", d)
-d = .Call("h5R_get_dataset", g at ePtr, "ds_2")
-.Call("h5R_get_dims", d)
-1
-d = .Call("h5R_get_dataset", g at ePtr, "ds_2")
-.Call("h5R_get_dims", d)
-.Call("h5R_get_dims", d)
+time()
+date()
+?date
+Sys.time()
+?system.time
+proc.time()
+d3
+d2
+q()
+n
+d2
+nrow(d)
+debug("readSlab")
+n
+offsets + dims
+offsets + dims - 1
+Q
+d2
+gc()
+emptyenv()
+length(emptyenv())
+?callNextMethod
+?emptyenv
+setUnion
+setClassUnion
+?setClassUnion
+?setClassUnion
+readSlab
+ls()
d
-1
-d = .Call("h5R_get_dataset", g at ePtr, "ds_1")
-.Call("h5R_get_dims", d)
-.Call("h5R_get_dims", d)
+d2
+readSlab(d2, c(1,1), c(1000, 10))
+length(readSlab(d2, c(1,1), c(1000, 10)))
+length(readSlab(d2, c(5,5), c(1000, 10)))
+length(readSlab(d2, c(1,5), c(1000, 5)))
d
-d = .Call("h5R_get_dataset", g at ePtr, "ds_1")
-.Call("h5R_get_dims", d)
-.Call("h5R_get_dims", d)
-.Call("h5R_get_dims", d)
+d[1:10]
+d[]
d
+d[1:10]
+d2[1:10]
+d2[,] = d[,]
+c(1,2,3,4)[]
d
-d = .Call("h5R_get_dataset", g at ePtr, "ds_2")
-.Call("h5R_get_dims", d)
-.Call("h5R_get_dims", d)
-.Call("h5R_get_dims", d)
-.Call("h5R_read_dataset", d)
-.Call("h5R_read_dataset", d)
-d
-d
-d[,]
-dim(d[,])
-head(d[,])
-head(d[,])
-head(d[,])
-head(d[,])
-head(d[,])
-d2
-d2[]
-dim(d2)
-d
-d[,]
-length(d[,])
-length(d)
-length(d2)
-d2
-d2[]
-getGenerics("[")
-getGeneric("[")
-showMethods("[")
-d2
-d2[1]
-d2[1:3]
-d2[1,2]
-length(d2)
-dim(d2)
-dim(d2)
-dim(d)
-a
-q
-dim
-a
+d at .data
+d2 at .data
+d2 at .data$.data
b
-matrix(c(1,2,3,4), 2, 2)
-matrix(c(1,2,3,4), 2, 2)[1,2]
-dim(matrix(c(1,2,3,4), 2, 2)[1,2])
-dim(d)
-c(1,23,4)[1,1]
+dM
+dMM
+dM
+dM[] == dMM
+dM[] == dMM[]
+dM[] == t(dMM[])
+dim(dM[])
+dim(dMM[])
+dM
+dM[1:5, 2]
+dMM[1:5, 2]
+dM[1,2]
+dM[1:4,2]
+dMM[1:10, 2]
+dM[1:10, 2]
+dM[1:10, 2] == dMM[1:10, 2]
+dM[1:5, 2] == dMM[1:999, 2]
+dMM[1:999, 2]
+dM[1:999, 2]
+dim(d3)
+dim(d3M)
d3
+d3[1,,]
+debug(.internalSlice)
+debug(h5r:::.internalSlice)
+debug(h5r:::.internalSlice)
+h5R:::.inMemory(d3)
d3
-dim(d3)
-d3[,,]
-dim(d3[,,])
-drop
-?drop
-d[1,1]
-d3[1,1,]
+d3 at .data
+d3M at .data
+d3M at .data$.data
+d3
+d3M
+traceback()
+d3M at .data$data[,,]
+d3M at .data$data
+debug(h5r:::.internalSlice)
+traceback()
+c
+n
+dim(x)
+n
+i
d
-.Call("h5R_read_slab", d at ePtr, as.integer(0,0), as.integer(5,5))
-4
-.Call("h5R_read_slab", d at ePtr, as.integer(0,0), as.integer(5,5))
-1
-.Call("h5R_read_slab", d at ePtr, as.integer(0,0), as.integer(5,5))
-1
-.Call("h5R_read_slab", d at ePtr, as.integer(0,0), as.integer(5,5))
-1
-d3
-d3[1,,]
-dint
-.Call("h5R_read_slab", dint at ePtr, as.integer(0,0), as.integer(5,5))
-1
-dint[,]
-1
+c
+n
+x at .data$.data
+n
+class(d)
+ drop
+d[i, j, ..., drop = FALSE]
+dim(d)
+Q
+?callNextMethod
+traceback()
+a = function(a, ...) { list(...) }
+a(1)
+c
+x
+c
+n
+n
+n
+n
+...
+missing(...)
+list(...)
+class(...)
+mode(...)
+length(...)
+?missing
+substitute(...)
+length(substitute(...))
+class(substitute(...))
+Q
+?promise
+?substitue
+?substitute
+n
+n
+ext
+n
+n
+sel[,1]
+sel
+dim(x)
+l
+sel
+Q
+n
+readSlab(x, rev(sel[, 1]), rev(ext))
+Q
+n
+n
+n
+readSlab(x, rev(sel[, 1]), rev(ext))
Modified: inst/h5_files/ex_1.h5
===================================================================
(Binary files differ)
Modified: inst/h5_files/makeH5.py
===================================================================
--- inst/h5_files/makeH5.py 2010-04-21 01:36:54 UTC (rev 11)
+++ inst/h5_files/makeH5.py 2010-04-22 01:28:39 UTC (rev 12)
@@ -25,8 +25,28 @@
g.create_dataset("ds_5", data = a.reshape(21, 9))
+
+g.create_dataset("ds_6", data = random.randint(0, int(1e4), int(1e5)), dtype = "uint32")
+
+
f.close()
+##
+## some reading and testing.
+##
f = h5py.File("ex_1.h5")
-f['group_1']['ds_2'].attrs['z']
+g = f["group_1"]
+d = g["ds_6"]
+import time
+
+s = time.time()
+k = 100000
+n = 100
+
+for i in xrange(0, n):
+ b = random.randint(0, d.shape[0] - k)
+ e = b + k
+ print len(d[b:e])
+
+print "total time %d" % int(time.time() - s)
Added: inst/testSlab.R
===================================================================
--- inst/testSlab.R (rev 0)
+++ inst/testSlab.R 2010-04-22 01:28:39 UTC (rev 12)
@@ -0,0 +1,56 @@
+##
+## Used to test hyperslab reading.
+##
+require(h5r)
+
+files <- list.files("h5_files", full.names = TRUE)
+
+## ex_1
+f <- H5File(files[1])
+g <- getH5Group(f, "group_1")
+d <- getH5Dataset(g, "ds_6", inMemory = FALSE)
+d2 <- getH5Dataset(g, "ds_6", inMemory = TRUE)
+
+all(d[,] == d2[,])
+all(d[] == d2[])
+all(d[2:length(d)] == d2[2:length(d2)])
+
+timeMe <- function(d) {
+ k <- 1000
+ n <- 1000
+ system.time({
+ for (i in seq.int(1, n)) {
+ b <- runif(1, 1, nrow(d) - k)
+ length(readSlab(d, b, k))
+ }
+ })
+}
+timeMe(d)
+timeMe(d2)
+
+dM <- getH5Dataset(g, "ds_1", inMemory = FALSE)
+dMM <- getH5Dataset(g, "ds_1", inMemory = TRUE)
+
+all(dM[] == dMM[])
+all(dM[,] == dMM[,])
+all(dM[1:5, 1:5] == dMM[1:5, 1:5])
+all(dM[1:5, 2] == dMM[1:5, 2])
+
+length(dM[1:5, 2]) == length(dMM[1:5, 2])
+
+all(dim(dM[1:5, 2:3]) == dim(dMM[1:5, 2:3]))
+
+
+##
+## Three dimensional
+##
+d3M <- getH5Dataset(g, "ds_3", inMemory = TRUE)
+d3 <- getH5Dataset(g, "ds_3", inMemory = FALSE)
+
+d3M[,,]
+d3[,,]
+
+d3[1,,]
+d3[1,1,1]
+d3[1,,]
+d3[,,1]
Modified: inst/tests.R
===================================================================
--- inst/tests.R 2010-04-21 01:36:54 UTC (rev 11)
+++ inst/tests.R 2010-04-22 01:28:39 UTC (rev 12)
@@ -9,9 +9,6 @@
f <- H5File(files[1])
g <- getH5Group(f, "group_1")
-dint <- getH5Dataset(g, "ds_5")
-.Call("h5R_read_slab", dint at ePtr, as.integer(c(0,0)), as.integer(c(5,5)), as.integer(c(5,5)))
-
d <- getH5Dataset(g, "ds_1")
d[1:10, 1:10]
d[1:10,]
Modified: src/h5_wrap.c
===================================================================
--- src/h5_wrap.c 2010-04-21 01:36:54 UTC (rev 11)
+++ src/h5_wrap.c 2010-04-22 01:28:39 UTC (rev 12)
@@ -199,46 +199,55 @@
return(dta);
}
-SEXP h5R_read_slab(SEXP h5_dataset, SEXP _offsets, SEXP _counts, SEXP _orank) {
+SEXP h5R_read_slab(SEXP h5_dataset, SEXP _offsets, SEXP _counts) {
SEXP dta = R_NilValue;
- hid_t space, memspace;
- int ndims, i;
+ hid_t space, memspace, memtype;
+ void* buf;
+ int i;
- int* offsets = INTEGER(_offsets);
- int* counts = INTEGER(_counts);
- int* orank = INTEGER(_orank);
-
- hsize_t sel[] = {0, 0};
-
+ int* offsets = INTEGER(_offsets);
+ int* counts = INTEGER(_counts);
+
+ /** I'm surprised I have to do this, but it seems to be necessary. **/
+ hsize_t* _h_offsets = (hsize_t*) Calloc(length(_counts), hsize_t);
+ hsize_t* _h_counts = (hsize_t*) Calloc(length(_counts), hsize_t);
+
int v = 1;
- for (i = 0; i < length(_orank); i++) {
- v *= orank[i];
+ for (i = 0; i < length(_counts); i++) {
+ v *= counts[i];
+ _h_offsets[i] = offsets[i];
+ _h_counts[i] = counts[i];
}
switch (INTEGER(h5R_get_type(h5_dataset))[0]) {
case H5T_INTEGER:
- Rprintf("In H5T_INTEGER with v = %d\n", v);
-
- space = _h5R_get_space(h5_dataset);
- H5Sselect_hyperslab(space, H5S_SELECT_SET, (hsize_t*) offsets, NULL, NULL, (hsize_t*) counts);
-
- /* memspace = H5Screate_simple(length(_orank), (hsize_t*) orank, NULL); */
- /* H5Sselect_hyperslab(memspace, H5S_SELECT_SET, sel, NULL, (hsize_t*) counts, NULL); */
-
PROTECT(dta = allocVector(INTSXP, v));
- Rprintf("About to read dataset, v = %d\n", v);
- Rprintf("offset: %d, %d\n", offsets[0], offsets[1]);
- Rprintf("counts: %d, %d\n", counts[0], counts[1]);
-
- H5Dread(HID(h5_dataset), H5T_NATIVE_INT, H5S_ALL, space, H5P_DEFAULT, (void *) INTEGER(dta));
+ memtype = H5T_NATIVE_INT;
+ buf = INTEGER(dta);
break;
+ case H5T_FLOAT:
+ PROTECT(dta = allocVector(REALSXP, v));
+ memtype = H5T_NATIVE_DOUBLE;
+ buf = REAL(dta);
+ break;
+ case H5T_STRING:
+ error("String slab selection not yet supported.\n");
default:
error("Unsupported class in %s\n", __func__);
-
}
+
+ space = _h5R_get_space(h5_dataset);
+ H5Sselect_hyperslab(space, H5S_SELECT_SET, _h_offsets, NULL, _h_counts, NULL);
+ memspace = H5Screate_simple(length(_counts), _h_counts, NULL);
+ H5Dread(HID(h5_dataset), memtype, memspace, space,
+ H5P_DEFAULT, buf);
+ /** clean up. **/
+ Free(_h_offsets);
+ Free(_h_counts);
+ H5Sclose(memspace);
UNPROTECT(1);
-
+
return dta;
}
More information about the H5r-commits
mailing list