[H5r-commits] r35 - in inst: . benchmark

noreply at r-forge.r-project.org noreply at r-forge.r-project.org
Fri Jul 16 20:34:58 CEST 2010


Author: extemporaneousb
Date: 2010-07-16 20:34:57 +0200 (Fri, 16 Jul 2010)
New Revision: 35

Added:
   inst/benchmark/
   inst/benchmark/makeBigH5.py
   inst/benchmark/performance.R
   inst/benchmark/performance.py
Removed:
   inst/makeBigH5.py
   inst/performance.R
   inst/performance.py
Log:
A long standing checkin to organize the files

Copied: inst/benchmark/makeBigH5.py (from rev 32, inst/makeBigH5.py)
===================================================================
--- inst/benchmark/makeBigH5.py	                        (rev 0)
+++ inst/benchmark/makeBigH5.py	2010-07-16 18:34:57 UTC (rev 35)
@@ -0,0 +1,20 @@
+from h5py import *
+from numpy import *
+import glob
+
+uh5 = File("./u_big.h5")
+zh5 = File("./z_big.h5")
+x   = random.randint(0, 1e8, 1e8)
+
+for s in ['1e3', '1e4', '1e5']:
+    print s
+
+    uh5.create_dataset("data_" + s, data = x, chunks = (int(double(s)),))
+    zh5.create_dataset("data_" + s, data = x, chunks = (int(double(s)),), compression = 'gzip')
+
+uh5.close()
+zh5.close()
+
+
+
+


Property changes on: inst/benchmark/makeBigH5.py
___________________________________________________________________
Added: svn:mergeinfo
   + 

Copied: inst/benchmark/performance.R (from rev 33, inst/performance.R)
===================================================================
--- inst/benchmark/performance.R	                        (rev 0)
+++ inst/benchmark/performance.R	2010-07-16 18:34:57 UTC (rev 35)
@@ -0,0 +1,30 @@
+##
+## Investigate performance profiles of various access patterns.
+##
+require(h5r)
+
+f <- function(d, n = 100, mu = 1000) {
+  start <- runif(n, 1, length(d))
+  end   <- start + round(rexp(n, 1/mu))
+  end   <- ifelse(end > length(d), start, end)
+  width <- end - start + 1
+  
+  lapply(seq.int(1, n), function(i) {
+    readSlab(d, start[i], width[i])
+    return(NULL)
+  })
+  return(TRUE)
+}
+
+l <- do.call(rbind, lapply(list(H5File(Sys.glob("./u_big.h5")), H5File(Sys.glob("./z_big.h5"))), function(h5) {
+  do.call(rbind, lapply(c("1e3", "1e4", "1e5"), function(s) {
+    d <- getH5Dataset(h5, paste("data", s, sep = "_"))
+    replicate(100, {
+      system.time(f(d, n = 1000))[3]
+    })
+  }))
+}))
+
+write.table(l, file = "rres.dta")
+
+


Property changes on: inst/benchmark/performance.R
___________________________________________________________________
Added: svn:mergeinfo
   + 

Copied: inst/benchmark/performance.py (from rev 34, inst/performance.py)
===================================================================
--- inst/benchmark/performance.py	                        (rev 0)
+++ inst/benchmark/performance.py	2010-07-16 18:34:57 UTC (rev 35)
@@ -0,0 +1,34 @@
+from h5py import *
+from numpy import *
+import time
+
+def f(ds, N):
+    start = random.randint(0, len(ds), N)
+    end   = start + random.exponential(1000, N) + 1
+    end[end > len(ds)] = len(ds)
+    
+    for j in zip(start, end):
+        z = ds[j[0]:j[1]]
+    return True
+
+
+def myTime(K, ds, N):
+    res = [0]*K
+
+    for i in range(0, K):
+        s = time.time()
+        f(ds, N)
+        res[i] = time.time() - s
+    return res
+
+o = file('pyres.txt', 'w')
+
+for h5 in [File("./u_big.h5"), File("./z_big.h5")]:
+    for k in h5.keys():
+        ds = h5[k]
+        times = myTime(100, ds, N = 1000)
+        o.write(k + " ")
+        o.write(" ".join(map(str, times)))
+        o.write("\n")
+
+o.close()


Property changes on: inst/benchmark/performance.py
___________________________________________________________________
Added: svn:mergeinfo
   + 

Deleted: inst/makeBigH5.py
===================================================================
--- inst/makeBigH5.py	2010-06-15 15:32:24 UTC (rev 34)
+++ inst/makeBigH5.py	2010-07-16 18:34:57 UTC (rev 35)
@@ -1,25 +0,0 @@
-from h5py import *
-from numpy import *
-import glob
-
-h5 = File("/home/jbullard/big.h5")
-x  = random.randint(0, 1e8, 1e8)
-
-## with chunks.
-h5.create_dataset("cdata_1e3", data = x, chunks = (1000,))
-h5.create_dataset("cdata_1e4", data = x, chunks = (10000,))
-h5.create_dataset("cdata_1e5", data = x, chunks = (100000,))
-h5.create_dataset("cdata_1e6", data = x, chunks = (1000000,))
-
-## without
-h5.create_dataset("udata", data = x)
-
-## zipped
-h5.create_dataset("zdata", data = x, compression = 'gzip')
-
-## performance.
-
-
-h5.close()
-
-

Deleted: inst/performance.R
===================================================================
--- inst/performance.R	2010-06-15 15:32:24 UTC (rev 34)
+++ inst/performance.R	2010-07-16 18:34:57 UTC (rev 35)
@@ -1,75 +0,0 @@
-##
-## Investigate performance profiles of various access patterns.
-##
-require(h5r)
-
-h5 <- H5File(Sys.glob("~/local/big.h5"))
-
-chunks <- c("1e3", "1e4", "1e5", "1e6")
-names(chunks) <- chunks <- c("1e3", "1e4", "1e5", "1e6")
-cDtas <- lapply(paste("cdata", chunks, sep = "_"), function(n) getH5Dataset(h5, n, inMemory = FALSE))
-names(cDtas) <- chunks
-
-f <- function(d, n = 100, mu = 1000) {
-  start <- runif(n, 1, length(d))
-  end   <- start + round(rexp(n, 1/mu))
-  end   <- ifelse(end > length(d), start, end)
-  width <- end - start + 1
-  
-  lapply(seq.int(1, n), function(i) {
-    readSlab(d, start[i], width[i])
-    return(NULL)
-  })
-  return(TRUE)
-}
-
-times <- replicate(100, {
-  system.time(f(cDtas[[1]], n = 1000))[3]
-})
-
-pyTimes <- scan("pyres.txt")
-
-png("numpyVR.png")
-par(mar = c(8, 6, 5, 1), cex.lab = 2, cex.axis = 2, cex.main = 2)
-boxplot(list("Python" = pyTimes, "R" = times), ylim = c(.075, .15),
-        las = 2, main = "Random 1k dataset slice")
-dev.off()
-
-
-Rprof("rprof")
-levelplot(x <- do.call(rbind, lapply(cDtas, function(a) {
-  set.seed(10)
-  sapply(chunks, function(n) {
-    n <- as.numeric(n)
-    system.time(f(a, N, mu = n))[3]/(n)
-  })
-})))
-Rprof(NULL)
-summaryRprof("rprof")
-
-Rprof("rprof")
-x <- f(cDtas[[1]], n = 1000, mu = 1e4)
-Rprof(NULL)
-summaryRprof("rprof")
-
-
-
-##
-## Different Datasets.
-##
-cD <- getH5Dataset(h5, "cdata_1e4", inMemory = FALSE)
-uD <- getH5Dataset(h5, "udata", inMemory = FALSE)
-zD <- getH5Dataset(h5, "zdata", inMemory = FALSE)
-mD <- getH5Dataset(h5, "cdata", inMemory = TRUE)
-
-par(mar=c(10, 5, 3, 3))
-boxplot(as.data.frame(do.call(rbind, lapply(1:K, function(i) {
-  sapply(list("chunked" = cD, "unchunked" = uD, "zipped" = zD, "memory" = mD), function(a) {
-    set.seed(i)
-    system.time(f(a))[3]/N
-  })
-}))), las = 2)
-
-  
-
-

Deleted: inst/performance.py
===================================================================
--- inst/performance.py	2010-06-15 15:32:24 UTC (rev 34)
+++ inst/performance.py	2010-07-16 18:34:57 UTC (rev 35)
@@ -1,31 +0,0 @@
-from h5py import *
-from numpy import *
-import time
-
-h5 = File("/home/NANOFLUIDICS/jbullard/local/big.h5")
-ds = h5["cdata_1e3"]
-N  = 100
-
-def f(ds, N):
-    start = random.randint(0, len(ds), N)
-    end   = start + random.exponential(1000, N) + 1
-    end[end > len(ds)] = len(ds)
-    
-    for j in zip(start, end):
-        z = ds[j[0]:j[1]]
-    return True
-
-
-def myTime(K, ds, N):
-    res = [0]*K
-
-    for i in range(0, K):
-        s = time.time()
-        f(ds, N)
-        res[i] = time.time() - s
-    return res
-
-times = myTime(100, ds, N = 1000)
-o     = file('pyres.txt', 'w')
-o.write(" ".join(map(str, times)))
-o.close()



More information about the H5r-commits mailing list