[H5r-commits] r29 - R inst tests
noreply at r-forge.r-project.org
noreply at r-forge.r-project.org
Sat May 29 03:37:25 CEST 2010
Author: extemporaneousb
Date: 2010-05-29 03:37:24 +0200 (Sat, 29 May 2010)
New Revision: 29
Modified:
R/h5R.R
inst/makeBigH5.py
inst/performance.R
tests/testall.R
Log:
Small bug fixes and performance enhancements
Modified: R/h5R.R
===================================================================
--- R/h5R.R 2010-05-26 19:34:42 UTC (rev 28)
+++ R/h5R.R 2010-05-29 01:37:24 UTC (rev 29)
@@ -111,7 +111,8 @@
return(o)
}
-setMethod("getH5Dataset", c("H5Obj", "character"), function(h5Obj, datasetName, inMemory = TRUE) {
+setMethod("getH5Dataset", c("H5Obj", "character"), function(h5Obj, datasetName,
+ inMemory = FALSE) {
o <- new("H5Dataset")
o at ePtr <- .Call("h5R_get_dataset", .ePtr(h5Obj), datasetName, PACKAGE = 'h5r')
return(.initH5DataContainer(o, datasetName, inMemory))
@@ -294,15 +295,28 @@
}
})
+## This function is written to leverage the possibility of fast contiguous
+## range access. Here matrix is a two-column matrix with start, stop.
+## Other options will be IRanges.
+## setMethod("[", c("H5Dataset", "matrix", "missing", "missing"), function(x, i) {
+## if (.inMemory(x))
+## stop("Not implemented for inMemory datasets.")
+
+## nr <- nrow(i)
+## if (! ((nr == 1 && is.null(dim(x))) || (nr == length(dim(x)))))
+## stop("Dimension mismatch: nrow(x) == length(dim(x))")
+
+## readSlab(x, i[,1], i[,2] - i[,1] + 1)
+## })
+
##
## Note: the two reverses.
##
-.myperm <- function(d) if (!is.null(dim(d))) aperm(d) else d
-
.loadDataset <- function(h5Dataset) {
d <- readDataAsVector(h5Dataset)
dim(d) <- rev(dim(h5Dataset))
- .myperm(d)
+
+ if (! is.null(dim(h5Dataset))) aperm(d) else d
}
readSlab <- function(h5Dataset, offsets, dims) {
@@ -311,7 +325,8 @@
d <- .Call("h5R_read_slab", .ePtr(h5Dataset), as.integer(offsets - 1), as.integer(dims))
dim(d) <- rev(dims)
- .myperm(d)
+
+ if (! is.null(dim(h5Dataset))) aperm(d) else d
}
setGeneric("readDataAsVector", function(h5Obj, ...) {
Modified: inst/makeBigH5.py
===================================================================
--- inst/makeBigH5.py 2010-05-26 19:34:42 UTC (rev 28)
+++ inst/makeBigH5.py 2010-05-29 01:37:24 UTC (rev 29)
@@ -2,11 +2,14 @@
from numpy import *
import glob
-h5 = File(glob.glob("~/h5_files/big.h5"))
-x = random.randint(0, 1e5, 1e9)
+h5 = File("/home/jbullard/big.h5")
+x = random.randint(0, 1e8, 1e8)
## with chunks.
-h5.create_dataset("cdata", data = x, chunks = (10000,))
+h5.create_dataset("cdata_1e3", data = x, chunks = (1000,))
+h5.create_dataset("cdata_1e4", data = x, chunks = (10000,))
+h5.create_dataset("cdata_1e5", data = x, chunks = (100000,))
+h5.create_dataset("cdata_1e6", data = x, chunks = (1000000,))
## without
h5.create_dataset("udata", data = x)
Modified: inst/performance.R
===================================================================
--- inst/performance.R 2010-05-26 19:34:42 UTC (rev 28)
+++ inst/performance.R 2010-05-29 01:37:24 UTC (rev 29)
@@ -3,20 +3,21 @@
##
require(h5r)
-h5 <- H5File(Sys.glob("~/h5_files/big.h5"))
+h5 <- H5File(Sys.glob("~/local/big.h5"))
-cD <- getH5Dataset(h5, "cdata", inMemory = FALSE)
-uD <- getH5Dataset(h5, "udata", inMemory = FALSE)
-zD <- getH5Dataset(h5, "zdata", inMemory = FALSE)
-mD <- getH5Dataset(h5, "cdata", inMemory = TRUE)
+chunks <- c("1e3", "1e4", "1e5", "1e6")
+names(chunks) <- chunks <- c("1e3", "1e4", "1e5", "1e6")
+cDtas <- lapply(paste("cdata", chunks, sep = "_"), function(n) getH5Dataset(h5, n, inMemory = FALSE))
+names(cDtas) <- chunks
-N <- 10
+N <- 1000
K <- 1000
f <- function(d, n = N, mu = 1000) {
start <- runif(n, 1, length(d))
- end <- start + round(rexp(n, 1/mu))
-
+ end <- start + round(rexp(n, 1/mu))
+ end <- ifelse(end > length(d), start, end)
+
mapply(function(s,e) {
z <- d[s:e]
}, start, end)
@@ -24,6 +25,49 @@
return(TRUE)
}
+g <- function(d, n = N, mu = 1000) {
+ start <- runif(n, 1, length(d))
+ end <- start + round(rexp(n, 1/mu))
+ end <- ifelse(end > length(d), start, end)
+
+ mapply(function(s,e) {
+ z <- d[matrix(c(s,e), ncol = 2)]
+ }, start, end)
+
+ return(TRUE)
+}
+
+system.time(f(cDtas[[1]], n = 100000))
+system.time(g(cDtas[[1]], n = 100000))
+
+
+
+Rprof("rprof")
+levelplot(x <- do.call(rbind, lapply(cDtas, function(a) {
+ set.seed(10)
+ sapply(chunks, function(n) {
+ n <- as.numeric(n)
+ system.time(f(a, N, mu = n))[3]/(n)
+ })
+})))
+Rprof(NULL)
+summaryRprof("rprof")
+
+Rprof("rprof")
+x <- f(cDtas[[1]], n = 1000, mu = 1e4)
+Rprof(NULL)
+summaryRprof("rprof")
+
+
+
+##
+## Different Datasets.
+##
+cD <- getH5Dataset(h5, "cdata_1e4", inMemory = FALSE)
+uD <- getH5Dataset(h5, "udata", inMemory = FALSE)
+zD <- getH5Dataset(h5, "zdata", inMemory = FALSE)
+mD <- getH5Dataset(h5, "cdata", inMemory = TRUE)
+
par(mar=c(10, 5, 3, 3))
boxplot(as.data.frame(do.call(rbind, lapply(1:K, function(i) {
sapply(list("chunked" = cD, "unchunked" = uD, "zipped" = zD, "memory" = mD), function(a) {
Modified: tests/testall.R
===================================================================
--- tests/testall.R 2010-05-26 19:34:42 UTC (rev 28)
+++ tests/testall.R 2010-05-29 01:37:24 UTC (rev 29)
@@ -46,7 +46,8 @@
throw = {
errs <- ! sapply(tests, getResult)
if (any(errs)) {
- stop(simpleError(paste("Tests in error:\n", paste(paste("\t", names(tests)[errs], sep = ""), collapse = "\n"),
+ stop(simpleError(paste("Tests in error:\n", paste(paste("\t", names(tests)[errs], sep = ""),
+ collapse = "\n"),
sep = "")))
}
})
@@ -238,6 +239,12 @@
TH("dim check 4", assertError(ds8[10,1]))
TH("test 0-vs-1 based", all(ds8[1,1:5] == 1:5))
+
+TH("matrix grab.",
+ all(ds8[rbind(1:2,1:2)] == ds8[1:2, 1:2]) &
+ all(ds8[] == ds8[ cbind(c(1,1), dim(ds8)) ]))
+
+
TH(action = "print")
TH(action = "throw")
More information about the H5r-commits
mailing list