[H5r-commits] r29 - R inst tests

noreply at r-forge.r-project.org noreply at r-forge.r-project.org
Sat May 29 03:37:25 CEST 2010


Author: extemporaneousb
Date: 2010-05-29 03:37:24 +0200 (Sat, 29 May 2010)
New Revision: 29

Modified:
   R/h5R.R
   inst/makeBigH5.py
   inst/performance.R
   tests/testall.R
Log:
Small bug fixes and performance enhancements

Modified: R/h5R.R
===================================================================
--- R/h5R.R	2010-05-26 19:34:42 UTC (rev 28)
+++ R/h5R.R	2010-05-29 01:37:24 UTC (rev 29)
@@ -111,7 +111,8 @@
   return(o)
 }
 
-setMethod("getH5Dataset", c("H5Obj", "character"), function(h5Obj, datasetName, inMemory = TRUE) {
+setMethod("getH5Dataset", c("H5Obj", "character"), function(h5Obj, datasetName,
+                                                            inMemory = FALSE) {
   o <- new("H5Dataset")
   o at ePtr <- .Call("h5R_get_dataset", .ePtr(h5Obj), datasetName, PACKAGE = 'h5r')
   return(.initH5DataContainer(o, datasetName, inMemory))
@@ -294,15 +295,28 @@
   }
 })
 
+## This function is written to leverage the possibility of fast contiguous
+## range access. Here matrix is a two-column matrix with start, stop.
+## Other options will be IRanges.
+## setMethod("[", c("H5Dataset", "matrix", "missing", "missing"), function(x, i) {
+##   if (.inMemory(x))
+##     stop("Not implemented for inMemory datasets.")
+
+##   nr <- nrow(i)
+##   if (! ((nr == 1 && is.null(dim(x))) || (nr == length(dim(x)))))
+##     stop("Dimension mismatch: nrow(x) == length(dim(x))")
+  
+##   readSlab(x, i[,1], i[,2] - i[,1] + 1)
+## })
+
 ##
 ## Note: the two reverses.
 ##
-.myperm <- function(d) if (!is.null(dim(d))) aperm(d) else d
-
 .loadDataset <- function(h5Dataset) {
   d <- readDataAsVector(h5Dataset)
   dim(d) <- rev(dim(h5Dataset))
-  .myperm(d)
+  
+  if (! is.null(dim(h5Dataset))) aperm(d) else d
 }
 
 readSlab <- function(h5Dataset, offsets, dims) {
@@ -311,7 +325,8 @@
   
   d <- .Call("h5R_read_slab", .ePtr(h5Dataset), as.integer(offsets - 1), as.integer(dims))
   dim(d) <- rev(dims)
-  .myperm(d)
+
+  if (! is.null(dim(h5Dataset))) aperm(d) else d
 }
 
 setGeneric("readDataAsVector", function(h5Obj, ...) {

Modified: inst/makeBigH5.py
===================================================================
--- inst/makeBigH5.py	2010-05-26 19:34:42 UTC (rev 28)
+++ inst/makeBigH5.py	2010-05-29 01:37:24 UTC (rev 29)
@@ -2,11 +2,14 @@
 from numpy import *
 import glob
 
-h5 = File(glob.glob("~/h5_files/big.h5"))
-x  = random.randint(0, 1e5, 1e9)
+h5 = File("/home/jbullard/big.h5")
+x  = random.randint(0, 1e8, 1e8)
 
 ## with chunks.
-h5.create_dataset("cdata", data = x, chunks = (10000,))
+h5.create_dataset("cdata_1e3", data = x, chunks = (1000,))
+h5.create_dataset("cdata_1e4", data = x, chunks = (10000,))
+h5.create_dataset("cdata_1e5", data = x, chunks = (100000,))
+h5.create_dataset("cdata_1e6", data = x, chunks = (1000000,))
 
 ## without
 h5.create_dataset("udata", data = x)

Modified: inst/performance.R
===================================================================
--- inst/performance.R	2010-05-26 19:34:42 UTC (rev 28)
+++ inst/performance.R	2010-05-29 01:37:24 UTC (rev 29)
@@ -3,20 +3,21 @@
 ##
 require(h5r)
 
-h5 <- H5File(Sys.glob("~/h5_files/big.h5"))
+h5 <- H5File(Sys.glob("~/local/big.h5"))
 
-cD <- getH5Dataset(h5, "cdata", inMemory = FALSE)
-uD <- getH5Dataset(h5, "udata", inMemory = FALSE)
-zD <- getH5Dataset(h5, "zdata", inMemory = FALSE)
-mD <- getH5Dataset(h5, "cdata", inMemory = TRUE)
+chunks <- c("1e3", "1e4", "1e5", "1e6")
+names(chunks) <- chunks <- c("1e3", "1e4", "1e5", "1e6")
+cDtas <- lapply(paste("cdata", chunks, sep = "_"), function(n) getH5Dataset(h5, n, inMemory = FALSE))
+names(cDtas) <- chunks
 
-N <- 10
+N <- 1000
 K <- 1000
 
 f <- function(d, n = N, mu = 1000) {
   start <- runif(n, 1, length(d))
-  end <- start + round(rexp(n, 1/mu))
-
+  end  <- start + round(rexp(n, 1/mu))
+  end <- ifelse(end > length(d), start, end)
+  
   mapply(function(s,e) {
     z <- d[s:e]
   }, start, end)
@@ -24,6 +25,49 @@
   return(TRUE)
 }
 
+g <- function(d, n = N, mu = 1000) {
+  start <- runif(n, 1, length(d))
+  end  <- start + round(rexp(n, 1/mu))
+  end <- ifelse(end > length(d), start, end)
+  
+  mapply(function(s,e) {
+    z <- d[matrix(c(s,e), ncol = 2)]
+  }, start, end)
+
+  return(TRUE)
+}
+
+system.time(f(cDtas[[1]], n = 100000))
+system.time(g(cDtas[[1]], n = 100000))
+
+
+
+Rprof("rprof")
+levelplot(x <- do.call(rbind, lapply(cDtas, function(a) {
+  set.seed(10)
+  sapply(chunks, function(n) {
+    n <- as.numeric(n)
+    system.time(f(a, N, mu = n))[3]/(n)
+  })
+})))
+Rprof(NULL)
+summaryRprof("rprof")
+
+Rprof("rprof")
+x <- f(cDtas[[1]], n = 1000, mu = 1e4)
+Rprof(NULL)
+summaryRprof("rprof")
+
+
+
+##
+## Different Datasets.
+##
+cD <- getH5Dataset(h5, "cdata_1e4", inMemory = FALSE)
+uD <- getH5Dataset(h5, "udata", inMemory = FALSE)
+zD <- getH5Dataset(h5, "zdata", inMemory = FALSE)
+mD <- getH5Dataset(h5, "cdata", inMemory = TRUE)
+
 par(mar=c(10, 5, 3, 3))
 boxplot(as.data.frame(do.call(rbind, lapply(1:K, function(i) {
   sapply(list("chunked" = cD, "unchunked" = uD, "zipped" = zD, "memory" = mD), function(a) {

Modified: tests/testall.R
===================================================================
--- tests/testall.R	2010-05-26 19:34:42 UTC (rev 28)
+++ tests/testall.R	2010-05-29 01:37:24 UTC (rev 29)
@@ -46,7 +46,8 @@
            throw = {
              errs <- ! sapply(tests, getResult)
              if (any(errs)) {
-               stop(simpleError(paste("Tests in error:\n", paste(paste("\t", names(tests)[errs], sep = ""), collapse = "\n"),
+               stop(simpleError(paste("Tests in error:\n", paste(paste("\t", names(tests)[errs], sep = ""),
+                                                                 collapse = "\n"),
                                       sep = "")))
              }
            })
@@ -238,6 +239,12 @@
 TH("dim check 4", assertError(ds8[10,1]))
 TH("test 0-vs-1 based", all(ds8[1,1:5] == 1:5))
 
+
+TH("matrix grab.",
+   all(ds8[rbind(1:2,1:2)] == ds8[1:2, 1:2]) &
+   all(ds8[] == ds8[ cbind(c(1,1), dim(ds8)) ]))
+   
+
 TH(action = "print")
 TH(action = "throw")
 



More information about the H5r-commits mailing list