[H5r-commits] r43 - R inst/benchmark inst/h5_files src tests
noreply at r-forge.r-project.org
noreply at r-forge.r-project.org
Tue Apr 19 02:18:27 CEST 2011
Author: extemporaneousb
Date: 2011-04-19 02:18:26 +0200 (Tue, 19 Apr 2011)
New Revision: 43
Added:
inst/h5_files/link.h5
tests/testwrite.R
Modified:
R/h5R.R
inst/benchmark/makeBigH5.py
inst/benchmark/performance.R
src/h5_wrap.c
Log:
Added the beginnings of writing code.
Modified: R/h5R.R
===================================================================
--- R/h5R.R 2011-04-13 19:41:18 UTC (rev 42)
+++ R/h5R.R 2011-04-19 00:18:26 UTC (rev 43)
@@ -43,8 +43,8 @@
length(x at s)
})
-H5File <- function(fileName) {
- new("H5File", fileName)
+H5File <- function(fileName, mode = 'r') {
+ new("H5File", fileName, mode)
}
.ePtr <- function(obj) obj at ePtr
@@ -94,6 +94,16 @@
standardGeneric("getH5Dataset")
})
+createH5Dataset <- function(h5Obj, datasetName, data) {
+ if (is.null(d <- dim(data))) {
+ d <- length(data)
+ } else {
+ data <- aperm(data)
+ }
+ .Call("h5R_write_dataset", .ePtr(h5Obj), datasetName,
+ as.integer(data), as.integer(d))
+}
+
setGeneric("getH5Attribute", function(h5Obj, attrName, ...) {
standardGeneric("getH5Attribute")
})
@@ -105,6 +115,17 @@
.H5Group(x, groupName)
})
+createH5Group <- function(h5Obj, groupName) {
+ ## XXX: Should add a check to see if group exists.
+ z <- .Call("h5R_create_group", .ePtr(h5Obj), groupName, PACKAGE = 'h5r')
+
+ if (z == 0) {
+ return(getH5Group(h5Obj, groupName))
+ } else {
+ stop(paste("Unable to create group:", groupName))
+ }
+}
+
setMethod("getH5Dim", "H5DataContainer", function(h5Obj) {
.Call('h5R_get_dims', .ePtr(h5Obj), PACKAGE = 'h5r')
})
@@ -113,16 +134,22 @@
.Call("h5R_get_type", .ePtr(h5Obj), PACKAGE = 'h5r')
})
-setMethod("initialize", c("H5File"), function(.Object, fileName) {
+setMethod("initialize", c("H5File"), function(.Object, fileName, mode = c('r', 'w')) {
## This is obscene. I have to do this because somehow Subclasses
## call this at *class* instantiation time.
if (missing(fileName))
return(.Object)
+ mode <- match.arg(mode)
+
+ if (! file.exists(fileName) && mode == 'w') {
+ .Call("h5R_finalizer", .Call("h5R_create", fileName, package = "h5R"))
+ }
+
if (! file.exists(fileName)) {
stop(paste("Unable to open file:", fileName, "does not exist."))
}
- x <- .Call("h5R_open", fileName, package = "h5R")
+ x <- .Call("h5R_open", fileName, if(mode == 'r') as.integer(0) else as.integer(1), package = "h5R")
if (is.null(x)) {
stop(paste("Problem opening file:", fileName))
Modified: inst/benchmark/makeBigH5.py
===================================================================
--- inst/benchmark/makeBigH5.py 2011-04-13 19:41:18 UTC (rev 42)
+++ inst/benchmark/makeBigH5.py 2011-04-19 00:18:26 UTC (rev 43)
@@ -2,8 +2,8 @@
from numpy import *
import glob
-uh5 = File("./u_big.h5")
-zh5 = File("./z_big.h5")
+uh5 = File("/scratch/u_big.h5")
+zh5 = File("/scratch/z_big.h5")
x = random.randint(0, 1e8, 1e8)
for s in ['1e3', '1e4', '1e5']:
Modified: inst/benchmark/performance.R
===================================================================
--- inst/benchmark/performance.R 2011-04-13 19:41:18 UTC (rev 42)
+++ inst/benchmark/performance.R 2011-04-19 00:18:26 UTC (rev 43)
@@ -3,28 +3,62 @@
##
require(h5r)
-f <- function(d, n = 100, mu = 1000) {
- start <- runif(n, 1, length(d))
+makeRanges <- function(d, n = 100, mu = 1000) {
+ start <- runif(n, 1, length(d) - mu)
end <- start + round(rexp(n, 1/mu))
- end <- ifelse(end > length(d), start, end)
+ end <- ifelse(end > length(d), length(d), end)
width <- end - start + 1
-
- lapply(seq.int(1, n), function(i) {
- readSlab(d, start[i], width[i])
- return(NULL)
+ cbind(start, width)
+}
+
+f <- function(d, ranges) {
+ g <- lapply(seq.int(1, nrow(ranges)), function(i) {
+ readSlab(d, ranges[i,1], ranges[i,2])
})
return(TRUE)
}
-l <- do.call(rbind, lapply(list(H5File(Sys.glob("./u_big.h5")), H5File(Sys.glob("./z_big.h5"))), function(h5) {
- do.call(rbind, lapply(c("1e3", "1e4", "1e5"), function(s) {
+g <- function(d, ranges) {
+ g <- read1DSlabs(d, ranges[,1], ranges[,2])
+ return(TRUE)
+}
+
+h5Files <- lapply(list(unzipped = "/scratch/u_big.h5", zipped = "/scratch/z_big.h5"), H5File)
+chunks <- c("1e3", "1e4", "1e5")
+names(chunks) <- paste("cs", chunks, sep = "-")
+
+l <- lapply(h5Files, function(h5) {
+ a <- do.call(cbind, lapply(chunks, function(s) {
d <- getH5Dataset(h5, paste("data", s, sep = "_"))
- replicate(100, {
- system.time(f(d, n = 1000))[3]
- })
+ do.call(rbind, lapply(1:20, function(i) {
+ ranges <- makeRanges(d, n = 1000, mu = 10000)
+ c(lapply = as.numeric(system.time(f(d, ranges))[3]),
+ native = as.numeric(system.time(g(d, ranges))[3]))
+ }))
}))
-}))
+ colnames(a) <- paste(rep(names(chunks), each = 2), colnames(a), sep = "_")
+ return(a)
+})
+a <- do.call(cbind, l)
+colnames(a) <- paste(rep(names(h5Files), each = 6), colnames(a), sep = "_")
-write.table(l, file = "rres.dta")
+par(mar=c(12, 5, 3, 1))
+boxplot(a, las = 2, ylab = 'seconds', main = "Time for 1000 ranges of ~ 10000 long",
+ log = 'y')
+write.table(a, file = "rres.dta")
+
+
+require(Genominator)
+
+d <- data.frame( chr = 1, strand = 0, location = 1:100000000, value = rpois(100000000, 10))
+expData <- importToExpData(d, "test.db", tablename = "big_d", overwrite = T)
+
+r <- makeRanges(1:10000000, n = 100)
+r <- data.frame(r)
+r$chr <- 1
+r$strand <- 0
+r$end <- r$start+r$width
+system.time(splitByAnnotation(expData, r, what = 'value'))
+
Added: inst/h5_files/link.h5
===================================================================
(Binary files differ)
Property changes on: inst/h5_files/link.h5
___________________________________________________________________
Added: svn:mime-type
+ application/octet-stream
Modified: src/h5_wrap.c
===================================================================
--- src/h5_wrap.c 2011-04-13 19:41:18 UTC (rev 42)
+++ src/h5_wrap.c 2011-04-19 00:18:26 UTC (rev 43)
@@ -40,14 +40,25 @@
return e_ptr;
}
-SEXP h5R_open(SEXP filename) {
- return _h5R_make_holder(H5Fopen(NM(filename), H5F_ACC_RDONLY, H5P_DEFAULT), 1);
+SEXP h5R_open(SEXP filename, SEXP mode) {
+ int _mode_ = (INTEGER(mode)[0] == 1) ? H5F_ACC_RDWR : H5F_ACC_RDONLY;
+ return _h5R_make_holder(H5Fopen(NM(filename), _mode_, H5P_DEFAULT), 1);
}
+SEXP h5R_create(SEXP filename) {
+ return _h5R_make_holder(H5Fcreate(NM(filename), H5F_ACC_EXCL, H5P_DEFAULT, H5P_DEFAULT), 1);
+}
+
SEXP h5R_get_group(SEXP h5_obj, SEXP group_name) {
return _h5R_make_holder(H5Gopen(HID(h5_obj), NM(group_name), H5P_DEFAULT), 0);
}
+SEXP h5R_create_group(SEXP h5_obj, SEXP group_name) {
+ hid_t group = H5Gcreate(HID(h5_obj), NM(group_name), H5P_DEFAULT, H5P_DEFAULT, H5P_DEFAULT);
+ H5Gclose(group);
+ return ScalarInteger(0);
+}
+
SEXP h5R_get_dataset(SEXP h5_obj, SEXP dataset_name) {
return _h5R_make_holder(H5Dopen(HID(h5_obj), NM(dataset_name), H5P_DEFAULT), 0);
}
@@ -259,7 +270,25 @@
return(dta);
}
+SEXP h5R_write_dataset(SEXP h5_obj, SEXP name, SEXP data, SEXP dims) {
+ Rprintf("length dims: %d\n", length(dims));
+ int i;
+ for (i = 0; i < length(dims); i++) {
+ Rprintf("length dim[%d]=%d\n", i, INTEGER(dims)[i]);
+ }
+ hsize_t* cdims = (hsize_t*) Calloc(length(dims), hsize_t);
+ for (i = 0; i < length(dims); i++)
+ cdims[i] = INTEGER(dims)[i];
+
+ hid_t space = H5Screate_simple(length(dims), cdims, cdims);
+ hid_t ds = H5Dcreate(HID(h5_obj), NM(name), H5T_NATIVE_INT, space, H5P_DEFAULT, H5P_DEFAULT, H5P_DEFAULT);
+ H5Dwrite(ds, H5T_NATIVE_INT, H5S_ALL, H5S_ALL, H5P_DEFAULT, INTEGER(data));
+
+ return(ScalarInteger(0));
+}
+
+
SEXP h5R_read_slab(SEXP h5_dataset, SEXP _offsets, SEXP _counts) {
int __ERROR__ = 0;
SEXP dta = R_NilValue;
Added: tests/testwrite.R
===================================================================
--- tests/testwrite.R (rev 0)
+++ tests/testwrite.R 2011-04-19 00:18:26 UTC (rev 43)
@@ -0,0 +1,20 @@
+require(h5r)
+
+system("rm test.h5")
+h5 <- H5File("test.h5", 'w')
+listH5Contents(h5)
+
+g1 <- createH5Group( h5, "grp1" )
+
+createH5Dataset(g1, "ds1", a <- rep(1, 100))
+all(getH5Dataset(g1, "ds1")[] == a)
+
+createH5Dataset(g1, "ds2", a <- cbind(rep(1, 10), rep(2, 10)))
+all(getH5Dataset(g1, "ds2")[] == a)
+
+createH5Dataset( g1, "ds4", rbind(rep(1, 10), rep(2, 10)))
+getH5Dataset(g1, "ds4")[]
+
+createH5Attribute( h5e, "attr1", 1:4 )
+
+
More information about the H5r-commits
mailing list