[H5r-commits] r35 - in inst: . benchmark
noreply at r-forge.r-project.org
noreply at r-forge.r-project.org
Fri Jul 16 20:34:58 CEST 2010
Author: extemporaneousb
Date: 2010-07-16 20:34:57 +0200 (Fri, 16 Jul 2010)
New Revision: 35
Added:
inst/benchmark/
inst/benchmark/makeBigH5.py
inst/benchmark/performance.R
inst/benchmark/performance.py
Removed:
inst/makeBigH5.py
inst/performance.R
inst/performance.py
Log:
A long standing checkin to organize the files
Copied: inst/benchmark/makeBigH5.py (from rev 32, inst/makeBigH5.py)
===================================================================
--- inst/benchmark/makeBigH5.py (rev 0)
+++ inst/benchmark/makeBigH5.py 2010-07-16 18:34:57 UTC (rev 35)
@@ -0,0 +1,20 @@
+from h5py import *
+from numpy import *
+import glob
+
+uh5 = File("./u_big.h5")
+zh5 = File("./z_big.h5")
+x = random.randint(0, 1e8, 1e8)
+
+for s in ['1e3', '1e4', '1e5']:
+ print s
+
+ uh5.create_dataset("data_" + s, data = x, chunks = (int(double(s)),))
+ zh5.create_dataset("data_" + s, data = x, chunks = (int(double(s)),), compression = 'gzip')
+
+uh5.close()
+zh5.close()
+
+
+
+
Property changes on: inst/benchmark/makeBigH5.py
___________________________________________________________________
Added: svn:mergeinfo
+
Copied: inst/benchmark/performance.R (from rev 33, inst/performance.R)
===================================================================
--- inst/benchmark/performance.R (rev 0)
+++ inst/benchmark/performance.R 2010-07-16 18:34:57 UTC (rev 35)
@@ -0,0 +1,30 @@
+##
+## Investigate performance profiles of various access patterns.
+##
+require(h5r)
+
+f <- function(d, n = 100, mu = 1000) {
+ start <- runif(n, 1, length(d))
+ end <- start + round(rexp(n, 1/mu))
+ end <- ifelse(end > length(d), start, end)
+ width <- end - start + 1
+
+ lapply(seq.int(1, n), function(i) {
+ readSlab(d, start[i], width[i])
+ return(NULL)
+ })
+ return(TRUE)
+}
+
+l <- do.call(rbind, lapply(list(H5File(Sys.glob("./u_big.h5")), H5File(Sys.glob("./z_big.h5"))), function(h5) {
+ do.call(rbind, lapply(c("1e3", "1e4", "1e5"), function(s) {
+ d <- getH5Dataset(h5, paste("data", s, sep = "_"))
+ replicate(100, {
+ system.time(f(d, n = 1000))[3]
+ })
+ }))
+}))
+
+write.table(l, file = "rres.dta")
+
+
Property changes on: inst/benchmark/performance.R
___________________________________________________________________
Added: svn:mergeinfo
+
Copied: inst/benchmark/performance.py (from rev 34, inst/performance.py)
===================================================================
--- inst/benchmark/performance.py (rev 0)
+++ inst/benchmark/performance.py 2010-07-16 18:34:57 UTC (rev 35)
@@ -0,0 +1,34 @@
+from h5py import *
+from numpy import *
+import time
+
+def f(ds, N):
+ start = random.randint(0, len(ds), N)
+ end = start + random.exponential(1000, N) + 1
+ end[end > len(ds)] = len(ds)
+
+ for j in zip(start, end):
+ z = ds[j[0]:j[1]]
+ return True
+
+
+def myTime(K, ds, N):
+ res = [0]*K
+
+ for i in range(0, K):
+ s = time.time()
+ f(ds, N)
+ res[i] = time.time() - s
+ return res
+
+o = file('pyres.txt', 'w')
+
+for h5 in [File("./u_big.h5"), File("./z_big.h5")]:
+ for k in h5.keys():
+ ds = h5[k]
+ times = myTime(100, ds, N = 1000)
+ o.write(k + " ")
+ o.write(" ".join(map(str, times)))
+ o.write("\n")
+
+o.close()
Property changes on: inst/benchmark/performance.py
___________________________________________________________________
Added: svn:mergeinfo
+
Deleted: inst/makeBigH5.py
===================================================================
--- inst/makeBigH5.py 2010-06-15 15:32:24 UTC (rev 34)
+++ inst/makeBigH5.py 2010-07-16 18:34:57 UTC (rev 35)
@@ -1,25 +0,0 @@
-from h5py import *
-from numpy import *
-import glob
-
-h5 = File("/home/jbullard/big.h5")
-x = random.randint(0, 1e8, 1e8)
-
-## with chunks.
-h5.create_dataset("cdata_1e3", data = x, chunks = (1000,))
-h5.create_dataset("cdata_1e4", data = x, chunks = (10000,))
-h5.create_dataset("cdata_1e5", data = x, chunks = (100000,))
-h5.create_dataset("cdata_1e6", data = x, chunks = (1000000,))
-
-## without
-h5.create_dataset("udata", data = x)
-
-## zipped
-h5.create_dataset("zdata", data = x, compression = 'gzip')
-
-## performance.
-
-
-h5.close()
-
-
Deleted: inst/performance.R
===================================================================
--- inst/performance.R 2010-06-15 15:32:24 UTC (rev 34)
+++ inst/performance.R 2010-07-16 18:34:57 UTC (rev 35)
@@ -1,75 +0,0 @@
-##
-## Investigate performance profiles of various access patterns.
-##
-require(h5r)
-
-h5 <- H5File(Sys.glob("~/local/big.h5"))
-
-chunks <- c("1e3", "1e4", "1e5", "1e6")
-names(chunks) <- chunks <- c("1e3", "1e4", "1e5", "1e6")
-cDtas <- lapply(paste("cdata", chunks, sep = "_"), function(n) getH5Dataset(h5, n, inMemory = FALSE))
-names(cDtas) <- chunks
-
-f <- function(d, n = 100, mu = 1000) {
- start <- runif(n, 1, length(d))
- end <- start + round(rexp(n, 1/mu))
- end <- ifelse(end > length(d), start, end)
- width <- end - start + 1
-
- lapply(seq.int(1, n), function(i) {
- readSlab(d, start[i], width[i])
- return(NULL)
- })
- return(TRUE)
-}
-
-times <- replicate(100, {
- system.time(f(cDtas[[1]], n = 1000))[3]
-})
-
-pyTimes <- scan("pyres.txt")
-
-png("numpyVR.png")
-par(mar = c(8, 6, 5, 1), cex.lab = 2, cex.axis = 2, cex.main = 2)
-boxplot(list("Python" = pyTimes, "R" = times), ylim = c(.075, .15),
- las = 2, main = "Random 1k dataset slice")
-dev.off()
-
-
-Rprof("rprof")
-levelplot(x <- do.call(rbind, lapply(cDtas, function(a) {
- set.seed(10)
- sapply(chunks, function(n) {
- n <- as.numeric(n)
- system.time(f(a, N, mu = n))[3]/(n)
- })
-})))
-Rprof(NULL)
-summaryRprof("rprof")
-
-Rprof("rprof")
-x <- f(cDtas[[1]], n = 1000, mu = 1e4)
-Rprof(NULL)
-summaryRprof("rprof")
-
-
-
-##
-## Different Datasets.
-##
-cD <- getH5Dataset(h5, "cdata_1e4", inMemory = FALSE)
-uD <- getH5Dataset(h5, "udata", inMemory = FALSE)
-zD <- getH5Dataset(h5, "zdata", inMemory = FALSE)
-mD <- getH5Dataset(h5, "cdata", inMemory = TRUE)
-
-par(mar=c(10, 5, 3, 3))
-boxplot(as.data.frame(do.call(rbind, lapply(1:K, function(i) {
- sapply(list("chunked" = cD, "unchunked" = uD, "zipped" = zD, "memory" = mD), function(a) {
- set.seed(i)
- system.time(f(a))[3]/N
- })
-}))), las = 2)
-
-
-
-
Deleted: inst/performance.py
===================================================================
--- inst/performance.py 2010-06-15 15:32:24 UTC (rev 34)
+++ inst/performance.py 2010-07-16 18:34:57 UTC (rev 35)
@@ -1,31 +0,0 @@
-from h5py import *
-from numpy import *
-import time
-
-h5 = File("/home/NANOFLUIDICS/jbullard/local/big.h5")
-ds = h5["cdata_1e3"]
-N = 100
-
-def f(ds, N):
- start = random.randint(0, len(ds), N)
- end = start + random.exponential(1000, N) + 1
- end[end > len(ds)] = len(ds)
-
- for j in zip(start, end):
- z = ds[j[0]:j[1]]
- return True
-
-
-def myTime(K, ds, N):
- res = [0]*K
-
- for i in range(0, K):
- s = time.time()
- f(ds, N)
- res[i] = time.time() - s
- return res
-
-times = myTime(100, ds, N = 1000)
-o = file('pyres.txt', 'w')
-o.write(" ".join(map(str, times)))
-o.close()
More information about the H5r-commits
mailing list