[H5r-commits] r43 - R inst/benchmark inst/h5_files src tests

noreply at r-forge.r-project.org noreply at r-forge.r-project.org
Tue Apr 19 02:18:27 CEST 2011


Author: extemporaneousb
Date: 2011-04-19 02:18:26 +0200 (Tue, 19 Apr 2011)
New Revision: 43

Added:
   inst/h5_files/link.h5
   tests/testwrite.R
Modified:
   R/h5R.R
   inst/benchmark/makeBigH5.py
   inst/benchmark/performance.R
   src/h5_wrap.c
Log:
Added the beginnings of writing code.

Modified: R/h5R.R
===================================================================
--- R/h5R.R	2011-04-13 19:41:18 UTC (rev 42)
+++ R/h5R.R	2011-04-19 00:18:26 UTC (rev 43)
@@ -43,8 +43,8 @@
   length(x at s)
 })
 
-H5File <- function(fileName) {
-  new("H5File", fileName)
+H5File <- function(fileName, mode = 'r') {
+  new("H5File", fileName, mode)
 }
 
 .ePtr <- function(obj) obj at ePtr
@@ -94,6 +94,16 @@
   standardGeneric("getH5Dataset")
 })
 
+createH5Dataset <- function(h5Obj, datasetName, data) {
+  if (is.null(d <- dim(data))) {
+    d <- length(data)
+  } else {
+    data <- aperm(data)
+  }
+  .Call("h5R_write_dataset", .ePtr(h5Obj), datasetName,
+        as.integer(data), as.integer(d))
+}
+
 setGeneric("getH5Attribute", function(h5Obj, attrName, ...) {
   standardGeneric("getH5Attribute")
 })
@@ -105,6 +115,17 @@
   .H5Group(x, groupName)
 })
 
+createH5Group <- function(h5Obj, groupName) {
+  ## XXX: Should add a check to see if group exists.
+  z <- .Call("h5R_create_group", .ePtr(h5Obj), groupName, PACKAGE = 'h5r')
+  
+  if (z == 0) {
+    return(getH5Group(h5Obj, groupName))
+  } else {
+    stop(paste("Unable to create group:", groupName))
+  }
+}
+
 setMethod("getH5Dim", "H5DataContainer", function(h5Obj) {
   .Call('h5R_get_dims', .ePtr(h5Obj), PACKAGE = 'h5r')
 })
@@ -113,16 +134,22 @@
   .Call("h5R_get_type", .ePtr(h5Obj), PACKAGE = 'h5r')
 })
 
-setMethod("initialize", c("H5File"), function(.Object, fileName) {
+setMethod("initialize", c("H5File"), function(.Object, fileName, mode = c('r', 'w')) {
   ## This is obscene. I have to do this because somehow Subclasses
   ## call this at *class* instantiation time. 
   if (missing(fileName))
     return(.Object)
 
+  mode <- match.arg(mode)
+  
+  if (! file.exists(fileName) && mode == 'w') {
+    .Call("h5R_finalizer", .Call("h5R_create", fileName, package = "h5R"))
+  }
+    
   if (! file.exists(fileName)) {
     stop(paste("Unable to open file:", fileName, "does not exist."))
   }
-  x <- .Call("h5R_open", fileName, package = "h5R")
+  x <- .Call("h5R_open", fileName, if(mode == 'r') as.integer(0) else as.integer(1), package = "h5R")
 
   if (is.null(x)) {
     stop(paste("Problem opening file:", fileName))

Modified: inst/benchmark/makeBigH5.py
===================================================================
--- inst/benchmark/makeBigH5.py	2011-04-13 19:41:18 UTC (rev 42)
+++ inst/benchmark/makeBigH5.py	2011-04-19 00:18:26 UTC (rev 43)
@@ -2,8 +2,8 @@
 from numpy import *
 import glob
 
-uh5 = File("./u_big.h5")
-zh5 = File("./z_big.h5")
+uh5 = File("/scratch/u_big.h5")
+zh5 = File("/scratch/z_big.h5")
 x   = random.randint(0, 1e8, 1e8)
 
 for s in ['1e3', '1e4', '1e5']:

Modified: inst/benchmark/performance.R
===================================================================
--- inst/benchmark/performance.R	2011-04-13 19:41:18 UTC (rev 42)
+++ inst/benchmark/performance.R	2011-04-19 00:18:26 UTC (rev 43)
@@ -3,28 +3,62 @@
 ##
 require(h5r)
 
-f <- function(d, n = 100, mu = 1000) {
-  start <- runif(n, 1, length(d))
+makeRanges <- function(d, n = 100, mu = 1000) {
+  start <- runif(n, 1, length(d) - mu)
   end   <- start + round(rexp(n, 1/mu))
-  end   <- ifelse(end > length(d), start, end)
+  end   <- ifelse(end > length(d), length(d), end)
   width <- end - start + 1
-  
-  lapply(seq.int(1, n), function(i) {
-    readSlab(d, start[i], width[i])
-    return(NULL)
+  cbind(start, width)
+}
+
+f <- function(d, ranges) {  
+  g <- lapply(seq.int(1, nrow(ranges)), function(i) {
+    readSlab(d, ranges[i,1], ranges[i,2])
   })
   return(TRUE)
 }
 
-l <- do.call(rbind, lapply(list(H5File(Sys.glob("./u_big.h5")), H5File(Sys.glob("./z_big.h5"))), function(h5) {
-  do.call(rbind, lapply(c("1e3", "1e4", "1e5"), function(s) {
+g <- function(d, ranges) {  
+  g <- read1DSlabs(d, ranges[,1], ranges[,2])
+  return(TRUE)
+}
+
+h5Files <- lapply(list(unzipped = "/scratch/u_big.h5", zipped = "/scratch/z_big.h5"), H5File)
+chunks <- c("1e3", "1e4", "1e5")
+names(chunks) <- paste("cs", chunks, sep = "-")
+
+l <- lapply(h5Files, function(h5) {
+  a <- do.call(cbind, lapply(chunks, function(s) {
     d <- getH5Dataset(h5, paste("data", s, sep = "_"))
-    replicate(100, {
-      system.time(f(d, n = 1000))[3]
-    })
+    do.call(rbind, lapply(1:20, function(i) {
+      ranges <- makeRanges(d, n = 1000, mu = 10000)
+      c(lapply = as.numeric(system.time(f(d, ranges))[3]),
+        native = as.numeric(system.time(g(d, ranges))[3]))
+    }))
   }))
-}))
+  colnames(a) <- paste(rep(names(chunks), each = 2), colnames(a), sep = "_")
+  return(a)
+})
+a <- do.call(cbind, l)
+colnames(a) <- paste(rep(names(h5Files), each = 6), colnames(a), sep = "_")
 
-write.table(l, file = "rres.dta")
+par(mar=c(12, 5, 3, 1))
+boxplot(a, las = 2, ylab = 'seconds', main = "Time for 1000 ranges of ~ 10000 long",
+        log = 'y')
 
 
+write.table(a, file = "rres.dta")
+
+
+require(Genominator)
+
+d <- data.frame( chr = 1, strand = 0, location = 1:100000000, value = rpois(100000000, 10))
+expData <- importToExpData(d, "test.db", tablename = "big_d", overwrite = T)
+
+r <- makeRanges(1:10000000, n = 100)
+r <- data.frame(r)
+r$chr <- 1
+r$strand <- 0
+r$end <- r$start+r$width
+system.time(splitByAnnotation(expData, r, what = 'value'))
+                           

Added: inst/h5_files/link.h5
===================================================================
(Binary files differ)


Property changes on: inst/h5_files/link.h5
___________________________________________________________________
Added: svn:mime-type
   + application/octet-stream

Modified: src/h5_wrap.c
===================================================================
--- src/h5_wrap.c	2011-04-13 19:41:18 UTC (rev 42)
+++ src/h5_wrap.c	2011-04-19 00:18:26 UTC (rev 43)
@@ -40,14 +40,25 @@
     return e_ptr;
 }
 
-SEXP h5R_open(SEXP filename) {
-    return _h5R_make_holder(H5Fopen(NM(filename), H5F_ACC_RDONLY, H5P_DEFAULT), 1);
+SEXP h5R_open(SEXP filename, SEXP mode) {
+    int _mode_ = (INTEGER(mode)[0] == 1) ? H5F_ACC_RDWR : H5F_ACC_RDONLY;
+    return _h5R_make_holder(H5Fopen(NM(filename), _mode_, H5P_DEFAULT), 1);
 }
 
+SEXP h5R_create(SEXP filename) {
+    return _h5R_make_holder(H5Fcreate(NM(filename), H5F_ACC_EXCL, H5P_DEFAULT, H5P_DEFAULT), 1);
+}
+
 SEXP h5R_get_group(SEXP h5_obj, SEXP group_name) {
     return _h5R_make_holder(H5Gopen(HID(h5_obj), NM(group_name), H5P_DEFAULT), 0);
 }
 
+SEXP h5R_create_group(SEXP h5_obj, SEXP group_name) {
+    hid_t group = H5Gcreate(HID(h5_obj), NM(group_name), H5P_DEFAULT, H5P_DEFAULT, H5P_DEFAULT);
+    H5Gclose(group);
+    return ScalarInteger(0);
+}
+
 SEXP h5R_get_dataset(SEXP h5_obj, SEXP dataset_name) {
     return _h5R_make_holder(H5Dopen(HID(h5_obj), NM(dataset_name), H5P_DEFAULT), 0);
 }
@@ -259,7 +270,25 @@
     return(dta);
 }
 
+SEXP h5R_write_dataset(SEXP h5_obj, SEXP name, SEXP data, SEXP dims) {
+    Rprintf("length dims: %d\n", length(dims));
+    int i;
+    for (i = 0; i < length(dims); i++) {
+	Rprintf("length dim[%d]=%d\n", i, INTEGER(dims)[i]);
+    }
 
+    hsize_t* cdims = (hsize_t*) Calloc(length(dims), hsize_t);
+    for (i = 0; i < length(dims); i++)
+	cdims[i] = INTEGER(dims)[i];
+
+    hid_t space = H5Screate_simple(length(dims), cdims, cdims);
+    hid_t ds = H5Dcreate(HID(h5_obj), NM(name), H5T_NATIVE_INT, space, H5P_DEFAULT, H5P_DEFAULT, H5P_DEFAULT);
+    H5Dwrite(ds, H5T_NATIVE_INT, H5S_ALL, H5S_ALL, H5P_DEFAULT, INTEGER(data));
+
+    return(ScalarInteger(0));
+}
+
+
 SEXP h5R_read_slab(SEXP h5_dataset, SEXP _offsets, SEXP _counts) {
     int __ERROR__ = 0;
     SEXP dta = R_NilValue;

Added: tests/testwrite.R
===================================================================
--- tests/testwrite.R	                        (rev 0)
+++ tests/testwrite.R	2011-04-19 00:18:26 UTC (rev 43)
@@ -0,0 +1,20 @@
+require(h5r)
+
+system("rm test.h5")
+h5 <- H5File("test.h5", 'w')
+listH5Contents(h5)
+
+g1 <- createH5Group( h5, "grp1" )
+
+createH5Dataset(g1, "ds1", a <- rep(1, 100))
+all(getH5Dataset(g1, "ds1")[] == a)
+
+createH5Dataset(g1, "ds2", a <- cbind(rep(1, 10), rep(2, 10)))
+all(getH5Dataset(g1, "ds2")[] == a)
+
+createH5Dataset( g1, "ds4", rbind(rep(1, 10), rep(2, 10)))
+getH5Dataset(g1, "ds4")[]
+
+createH5Attribute( h5e, "attr1", 1:4 ) 
+
+



More information about the H5r-commits mailing list