[H5r-commits] r49 - inst/benchmark tests
noreply at r-forge.r-project.org
noreply at r-forge.r-project.org
Tue Apr 26 17:24:01 CEST 2011
Author: extemporaneousb
Date: 2011-04-26 17:24:00 +0200 (Tue, 26 Apr 2011)
New Revision: 49
Added:
inst/benchmark/ff_and_bigmem.R
Removed:
tests/testall.R
Log:
cleanup.
Added: inst/benchmark/ff_and_bigmem.R
===================================================================
--- inst/benchmark/ff_and_bigmem.R (rev 0)
+++ inst/benchmark/ff_and_bigmem.R 2011-04-26 15:24:00 UTC (rev 49)
@@ -0,0 +1,172 @@
+require(bigmemory)
+require(ff)
+require(h5r)
+require(pbutils)
+require(RColorBrewer)
+
+pdf("timing_results.pdf", width = 12, height = 8)
+
+cols <- brewer.pal(3, "Set2")
+reps <- 10
+vsize <- 4:8
+gDta <- function(s) as.integer(rpois(10^s, 1000))
+
+creation <- lapply(vsize, function(s) {
+ do.call(rbind, lapply(1:reps, function(r) {
+ d <- gDta(s)
+ system("rm -f /scratch/t.h5")
+ h5 <- H5File("/scratch/t.h5", 'w')
+ c("ff" = system.time(ff(d))[3],
+ "bigmemory" = system.time(as.big.matrix(d))[3],
+ "h5r" = system.time(createH5Dataset(h5, 'ds', d))[3])
+ }))
+})
+
+local({
+ par(mar=c(8, 6, 2, 2))
+ m <- do.call(cbind, creation)
+ boxplot(m, col = cols, las = 2, ylab = "Seconds", log = 'y',
+ main = paste("Creation times for vectors of 10^(", paste(vsize, collapse = ", "), ")", sep = ""),
+ names = gsub("\\.elapse", "", colnames(m)), border = cols, pch = 16)
+ legend("topleft", c("ff", "bigmemory", "h5r"), fill = cols)
+})
+
+insertion <- lapply(vsize, function(s) {
+ do.call(rbind, lapply(1:reps, function(r) {
+ d <- gDta(s)
+ system("rm -f /scratch/t.h5")
+ h5 <- H5File("/scratch/t.h5", 'w')
+
+ c(
+ "ff" = system.time({
+ v <- ff(vmode = "integer", dim = length(d))
+ v[] <- d
+ })[3],
+ "bigmemory" = system.time({
+ as.big.matrix(d)
+ })[3],
+ "h5r" = system.time({
+ createH5Dataset(h5, 'ds', d)
+ })[3]
+ )
+ }))
+})
+
+local({
+ par(mar=c(8, 6, 2, 2))
+ m <- do.call(cbind, insertion)
+ boxplot(m, col = cols, las = 2, ylab = "Seconds", log = 'y',
+ main = paste("Creation+Insertion times for vectors of 10^(", paste(vsize, collapse = ", "), ")", sep = ""),
+ names = gsub("\\.elapse", "", colnames(m)), border = cols, pch = 16)
+ legend("topleft", c("ff", "bigmemory", "h5r"), fill = cols)
+})
+
+
+system("rm -f /scratch/t.h5")
+h5 <- H5File("/scratch/t.h5", 'w')
+
+D <- gDta(8)
+hD <- createH5Dataset(h5, "dsq", D, chunkSizes = 2^16)
+fD <- ff(D)
+bD <- as.big.matrix(D)
+
+selSize <- 10^(2:4)
+selection <- lapply(selSize, function(s) {
+ do.call(rbind, lapply(1:reps, function(r) {
+ colSums(do.call(rbind, lapply(1:50, function(zz) {
+ start <- round(runif(1, 1, length(D) - s - 10))
+ end <- round(start + s)
+ c(
+ "ff" = system.time({
+ fD[start:end]
+ })[3],
+ "bigmemory" = system.time({
+ bD[start:end, ]
+ })[3],
+ "h5r" = system.time({
+ hD[start:end]
+ })[3]
+ )
+ })))
+ }))
+})
+
+local({
+ par(mar=c(8, 6, 2, 2))
+ m <- do.call(cbind, selection)
+ boxplot(m, col = cols, las = 2, ylab = "Seconds", log = 'y',
+ main = paste("Selection Times for 50 vectors of 10^(", paste(selSize, collapse = ", "), ")", sep = ""),
+ names = gsub("\\.elapse", "", colnames(m)), border = cols, pch = 16)
+ legend("topleft", c("ff", "bigmemory", "h5r"), fill = cols)
+})
+
+##
+## vector selection
+##
+selSize <- 1000
+nSelect <- 10^(2:5)
+
+vselection <- lapply(nSelect, function(s) {
+ do.call(rbind, lapply(1:reps, function(r) {
+ start <- runif(s, 1, length(D) - 10*selSize)
+ width <- rexp(s, 1/selSize)
+ rngs <- round(cbind(start, width)) + 1
+
+ print(range(rngs))
+ c(
+ "ff" = system.time({
+ lapply(1:nrow(rngs), function(i) fD[rngs[i,1]:(rngs[i,1]+rngs[i,2])])
+ })[3],
+ "bigmemory" = system.time({
+ lapply(1:nrow(rngs), function(i) bD[rngs[i,1]:(rngs[i,1]+rngs[i,2]),])
+ })[3],
+ "h5r" = system.time({
+ read1DSlabs(hD, rngs[,1], rngs[,2])
+ })[3]
+ )
+ }))
+})
+
+local({
+ par(mar=c(8, 6, 2, 2))
+ m <- do.call(cbind, vselection)
+ boxplot(m, col = cols, las = 2, ylab = "Seconds", log = 'y',
+ main = paste("Selection Times for sets of ranges of length (", paste(nSelect, collapse = ", "), ")", sep = ""),
+ names = gsub("\\.elapse", "", colnames(m)), border = cols, pch = 16)
+ legend("topleft", c("ff", "bigmemory", "h5r"), fill = cols)
+})
+
+##
+## random
+##
+selSize <- 10^(2:4)
+rselection <- lapply(selSize, function(s) {
+ do.call(rbind, lapply(1:reps, function(r) {
+ colSums(do.call(rbind, lapply(1:10, function(zz) {
+ points <- sample(1:length(D), size = s)
+ c(
+ "ff" = system.time({
+ fD[points]
+ })[3],
+ "bigmemory" = system.time({
+ bD[points, ]
+ })[3],
+ "h5r" = system.time({
+ hD[points]
+ })[3]
+ )
+ })))
+ }))
+})
+
+local({
+ par(mar=c(8, 6, 2, 2))
+ m <- do.call(cbind, rselection)
+ m[m==0] <- min(m[m!=0])
+ boxplot(m, col = cols, las = 2, ylab = "Seconds", log = 'y',
+ main = paste("Selection Times for 10 random point vectors of size 10^(", paste(selSize, collapse = ", "), ")", sep = ""),
+ names = gsub("\\.elapse", "", colnames(m)), border = cols, pch = 16)
+ legend("topleft", c("ff", "bigmemory", "h5r"), fill = cols)
+})
+
+dev.off()
Deleted: tests/testall.R
===================================================================
--- tests/testall.R 2011-04-26 05:22:39 UTC (rev 48)
+++ tests/testall.R 2011-04-26 15:24:00 UTC (rev 49)
@@ -1,235 +0,0 @@
-require(h5r)
-
-source("tinyTestHarness.R")
-
-##
-## Make a new TestHarness.
-##
-TH <- TestHarness()
-
-
-##
-## The tests.
-##
-file <- system.file("h5_files", "ex_1.h5", package = 'h5r')
-
-## ex_1
-f <- H5File(file)
-g <- getH5Group(f, "group_1")
-
-TH("group name", g at name == "group_1")
-
-ds1 <- getH5Dataset(g, "ds_1", inMemory = T)
-
-TH("ds_1 dim, 1", all(dim(ds1) == c(1000, 10)))
-TH("ds_1 dim, 2", all(dim(ds1[1:10, 1:10]) == c(10, 10)))
-TH("ds_1 dim, 3", all(dim(ds1[1:10, ]) == c(10, 10)))
-TH("ds_1 dim, 4", is.null(dim(ds1[, 1])))
-TH("ds_1 dim, 5", assertError(ds1[,1:12]))
-
-## test existence.
-TH("existence, 1", h5DatasetExists(g, "ds_1"))
-TH("existence, 2", h5DatasetExists(g, "ds_232") == FALSE)
-
-## string dataset
-ds2M <- getH5Dataset(g, "ds_2", inMemory = T)
-ds2 <- getH5Dataset(g, "ds_2", inMemory = F)
-
-TH("ds_2 dim, 1", all(ds2[] == ds2M[]))
-TH("ds_2 dim, 2", all(ds2[1:5] == ds2M[1:5]))
-
-## attributes
-a <- getH5Attribute(ds2, "x")
-b <- getH5Attribute(ds2, "y")
-c <- getH5Attribute(ds2, "z")
-
-TH("attribute 1", all(a[] == 1:3))
-TH("attribute 2", all(b[] == rbind(1:3, 5:7)))
-TH("attribute 3", all(c[] == ds2[]))
-
-## > 2 dimensional data.
-ds3M <- getH5Dataset(g, "ds_3", inMemory = T)
-ds3 <- getH5Dataset(g, "ds_3", inMemory = F)
-
-TH("ds_3 dim", all(dim(ds3[,,]) == dim(ds3)) && all(dim(ds3M[,,]) == dim(ds3M)))
-
-## known inconsistency between two.
-TH("In memory inconsistency (!! FIXME !!)", assertError(all(ds3M[] == ds3[])))
-
-## the 3d R object.
-id3 <- ds3M at .data$.data
-
-TH("3d consistency, slabbed", all(id3[,,] == ds3[,,]) &
- all(id3[,1,,drop=TRUE] == ds3[,1,,drop=TRUE]) &
- all(id3[1,1,,drop=TRUE] == ds3[1,1,,drop=TRUE]) &
- all(id3[1,,3,drop=TRUE] == ds3[1,,3,drop=TRUE]) &
- all(id3[1,,1:3,drop=TRUE] == ds3[1,,1:3,drop=TRUE]))
-
-TH("3d consistency, contiguity",
- all(id3[,2:1,] == ds3[,2:1,]) &
- all(id3[,1,seq(1,9,by=4)] == ds3[,1,seq(1,9,by=4)]) &
- all(id3[3:1,,] == ds3[3:1,,]))
-
-
-TH("3d consistency, memory", all(id3[,,] == ds3M[,,]) &
- all(id3[,1,,drop=TRUE] == ds3M[,1,,drop=TRUE]) &
- all(id3[1,1,,drop=TRUE] == ds3M[1,1,,drop=TRUE]) &
- all(id3[1,,3,drop=TRUE] == ds3M[1,,3,drop=TRUE]))
-
-TH("3d bounds check 1", assertError(ds3[0:10,,]))
-TH("3d bounds check 2", assertError(ds3[,,0:10]))
-TH("3d bounds check 3", assertError(ds3[1,2,1:1000]))
-
-## 2 dimensional string dataset.
-ds4M <- getH5Dataset(g, "ds_4", inMemory = T)
-ds4 <- getH5Dataset(g, "ds_4", inMemory = F)
-
-TH("ds_2 dim", all(dim(ds4[,]) == dim(ds4)) & all(dim(ds4M[,]) == dim(ds4)))
-
-TH("ds_4, memory", (function(n = 100, s = 100) {
- g1 <- gc()[,1]
- a <- replicate(n, {
- replicate(s, getH5Dataset(g, "ds_4", inMemory = FALSE)[1:2,1:2])
- })
- rm(a)
- all(g1 - gc()[,1] <= 0)
-})())
-
-## contiguity problem.
-TH("contiguity", all(ds4M[1, c(2,3,5)] == ds4[1, c(2,3,5)]))
-TH("contiguity - 1D", all(ds2M[c(1, 7, 13)] == ds2[c(1, 7, 13)]))
-
-ds5 <- getH5Dataset(g, "ds_5")
-ds5M <- ds5[]
-
-TH("ds5 contiguity",
- all(ds5[10:1, ] == ds5M[ 10:1, ]) &&
- all(ds5[10:1, 2] == ds5M[ 10:1, 2]) &&
- all(ds5[seq(1, 10, by = 3), 2] == ds5M[ seq(1, 10, by = 3), 2]))
-
-## 5-d object
-ds9M <- getH5Dataset(g, "ds_9", inMemory = T)
-ds9 <- getH5Dataset(g, "ds_9", inMemory = F)
-id9 <- ds9M[,,,,]
-
-TH("5-d 0", all(id9[] == ds9M[,,,,]) && all(id9[] == ds9[]))
-
-TH("5-d",
- all(id9[c(7, 10), c(3, 4), , , ] == ds9[ c(7, 10), c(3, 4), , , ]) &&
- all(id9[c(7, 10), c(3, 4), c(1, 5), , ] == ds9[ c(7, 10), c(3, 4), c(1, 5), , ]) &&
- all(id9[c(7, 10), c(3, 4), 1:5, , ] == ds9[ c(7, 10), c(3, 4), 1:5, , ]) &&
- all(id9[c(7, 10), c(3, 4), , , ] == ds9[ c(7, 10), c(3, 4), , , ]) &&
- all(id9[c(10, 7), 10:1, , , ] == ds9[ c(10, 7), 10:1, , , ]) &&
- all(id9[, , 1:2, 1:2, ] == ds9[ , , 1:2, 1:2, ]) &&
- all(id9[, , 2:1, 2:1, ] == ds9[ , , 2:1, 2:1, ]) &&
- all(id9[ , , , 1:2, 1:2 ] == ds9[ , , , 1:2, 1:2]) &&
- all(id9[1,1,1,1,1] == ds9[1,1,1,1,1]))
-
-
-##
-## More in-depth testing of slicing.
-##
-ds6 <- getH5Dataset(g, "ds_6", inMemory = FALSE)
-ds6M <- getH5Dataset(g, "ds_6", inMemory = TRUE)
-
-TH("ds6, slicing", all(ds6[,] == ds6M[,]) & all(ds6[2:1001] == ds6M[2:1001]))
-
-timeMe <- function(d) {
- k <- 100
- n <- 100
- system.time({
- for (i in seq.int(1, n)) {
- b <- runif(1, 1, nrow(d) - k)
- d[b:(b + k)]
- }
- })[3]
-}
-
-## These are *real* upper-bounds on timing.
-TH("slab selection, timing", (mean(replicate(10, timeMe(ds6))) < 1))
-TH("slab selection, timing -- memory", (mean(replicate(10, timeMe(ds6M))) < 1))
-
-randomSlice <- function(d) {
- dims <- dim(d)
-
- starts <- floor(runif(length(dims), rep(1, length(dims)), dims))
- ends <- starts + floor(rexp(length(dims), 1/4))
- ends <- ifelse(ends > dims, dims, ends)
-
- lst <- as.list(call("[", d))
-
- for (i in 1:length(dims)) {
- lst[[i + 2]] <- starts[i]:ends[i]
- }
- eval(as.call(lst))
-}
-
-ds7 <- getH5Dataset(g, "ds_7", inMemory = FALSE)
-ds7M <- getH5Dataset(g, "ds_7", inMemory = TRUE)
-
-TH("random slice", {
- set.seed(10)
- system.time({a <- replicate(10000, randomSlice(ds7))})
- set.seed(10)
- system.time({b <- replicate(10000, randomSlice(ds7M))})
- all.equal(a,b)
-})
-
-TH("list attributes, file", {
- length(listH5Contents(f)) == 15
-})
-
-TH("list attributes, group", {
- length(listH5Contents(g)) == 12
-})
-
-ds8 <- getH5Dataset(g, "ds_8", inMemory = FALSE)
-
-TH("dim check 1", assertError(ds8[,0:5]))
-TH("dim check 2", assertError(ds8[0,1:5]))
-TH("dim check 3", assertError(ds8[-1,1:5]))
-TH("dim check 4", assertError(ds8[10,1]))
-TH("test 0-vs-1 based", all(ds8[1,1:5] == 1:5))
-
-
-TH("hSlab grab",
- all(ds8[hSlab(c(1,1), end = c(2,2))] == ds8[1:2, 1:2]) &
- all(ds8[] == ds8[ hSlab(c(1,1), end = dim(ds8)) ]))
-
-TH("normal time", {
- all(replicate(10000, {
- m <- apply(cbind(c(1,1,1), dim(ds7)), 1, function(b) {
- a <- runif(1, b[1], b[2])
- floor(c(a, runif(1, a, b[2])))
- })
- ds7[m[1,1]:m[2,1],
- m[1,2]:m[2,2],
- m[1,3]:m[2,3]]
- return(TRUE)
- }))
-})
-
-TH("hSlab time", {
- all(replicate(10000, {
- m <- apply(cbind(c(1,1,1), dim(ds7)), 1, function(b) {
- a <- runif(1, b[1], b[2])
- floor(c(a, runif(1, a, b[2])))
- })
- ds7[hSlab(m[1,], end = m[2,])]
- return(TRUE)
- }))
-})
-
-TH("slabs equal", all(readSlab(ds6, 1, 10) == read1DSlabs(ds6, 1, 10)[[1]]))
-TH("slabs equal iteration", {
- r1 <- read1DSlabs(ds6, 1:10, rep(5, 10))
- r2 <- lapply(1:10, function(a) {
- as.integer(readSlab(ds6, a, 5))
- })
- all.equal(r1, r2)
-})
-
-
-TH(action = "print")
-TH(action = "throw")
-
More information about the H5r-commits
mailing list