[Rprotobuf-commits] r930 - papers/jss
noreply at r-forge.r-project.org
noreply at r-forge.r-project.org
Mon Dec 15 02:10:08 CET 2014
Author: edd
Date: 2014-12-15 02:10:07 +0100 (Mon, 15 Dec 2014)
New Revision: 930
Modified:
papers/jss/article.R
papers/jss/article.Rnw
Log:
one "isn't" replaced with "is not"; one sentence reworked
Modified: papers/jss/article.R
===================================================================
--- papers/jss/article.R 2014-12-04 01:45:57 UTC (rev 929)
+++ papers/jss/article.R 2014-12-15 01:10:07 UTC (rev 930)
@@ -1,7 +1,7 @@
-### R code from vignette source 'article.Rnw'
+### R code from vignette source '/home/edd/svn/rprotobuf/papers/jss/article.Rnw'
###################################################
-### code chunk number 1: article.Rnw:125-131
+### code chunk number 1: article.Rnw:130-136
###################################################
## cf http://www.jstatsoft.org/style#q12
options(prompt = "R> ",
@@ -12,7 +12,7 @@
###################################################
-### code chunk number 2: article.Rnw:313-321
+### code chunk number 2: article.Rnw:318-326
###################################################
library("RProtoBuf")
p <- new(tutorial.Person, id=1,
@@ -25,20 +25,13 @@
###################################################
-### code chunk number 3: article.Rnw:376-377
+### code chunk number 3: article.Rnw:421-422
###################################################
-ls("RProtoBuf:DescriptorPool")
-
-
-###################################################
-### code chunk number 4: article.Rnw:391-393
-###################################################
-p1 <- new(tutorial.Person)
p <- new(tutorial.Person, name = "Murray", id = 1)
###################################################
-### code chunk number 5: article.Rnw:402-405
+### code chunk number 4: article.Rnw:431-434
###################################################
p$name
p$id
@@ -46,7 +39,7 @@
###################################################
-### code chunk number 6: article.Rnw:413-416
+### code chunk number 5: article.Rnw:442-445
###################################################
p[["name"]] <- "Murray Stokely"
p[[ 2 ]] <- 3
@@ -54,25 +47,25 @@
###################################################
-### code chunk number 7: article.Rnw:429-430
+### code chunk number 6: article.Rnw:461-462
###################################################
p
###################################################
-### code chunk number 8: article.Rnw:437-438
+### code chunk number 7: article.Rnw:469-470
###################################################
writeLines(as.character(p))
###################################################
-### code chunk number 9: article.Rnw:451-452
+### code chunk number 8: article.Rnw:483-484
###################################################
serialize(p, NULL)
###################################################
-### code chunk number 10: article.Rnw:457-460
+### code chunk number 9: article.Rnw:489-492
###################################################
tf1 <- tempfile()
serialize(p, tf1)
@@ -80,92 +73,42 @@
###################################################
-### code chunk number 11: article.Rnw:465-470
+### code chunk number 10: article.Rnw:538-540
###################################################
-tf2 <- tempfile()
-con <- file(tf2, open = "wb")
-serialize(p, con)
-close(con)
-readBin(tf2, raw(0), 500)
-
-
-###################################################
-### code chunk number 12: article.Rnw:476-480
-###################################################
-p$serialize(tf1)
-con <- file(tf2, open = "wb")
-p$serialize(con)
-close(con)
-
-
-###################################################
-### code chunk number 13: article.Rnw:500-502
-###################################################
msg <- read(tutorial.Person, tf1)
writeLines(as.character(msg))
###################################################
-### code chunk number 14: article.Rnw:508-512
+### code chunk number 11: article.Rnw:660-661
###################################################
-con <- file(tf2, open = "rb")
-message <- read(tutorial.Person, con)
-close(con)
-writeLines(as.character(message))
-
-
-###################################################
-### code chunk number 15: article.Rnw:517-519
-###################################################
-payload <- readBin(tf1, raw(0), 5000)
-message <- read(tutorial.Person, payload)
-
-
-###################################################
-### code chunk number 16: article.Rnw:526-531
-###################################################
-message <- tutorial.Person$read(tf1)
-con <- file(tf2, open = "rb")
-message <- tutorial.Person$read(con)
-close(con)
-message <- tutorial.Person$read(payload)
-
-
-###################################################
-### code chunk number 17: article.Rnw:610-611
-###################################################
new(tutorial.Person)
###################################################
-### code chunk number 18: article.Rnw:675-682
+### code chunk number 12: article.Rnw:685-690
###################################################
tutorial.Person$email
+tutorial.Person$email$is_required()
+tutorial.Person$email$type()
+tutorial.Person$email$as.character()
+class(tutorial.Person$email)
-tutorial.Person$PhoneType
-tutorial.Person$PhoneNumber
-
-tutorial.Person.PhoneNumber
-
-
###################################################
-### code chunk number 19: article.Rnw:798-800
+### code chunk number 13: article.Rnw:702-709
###################################################
tutorial.Person$PhoneType
tutorial.Person$PhoneType$WORK
-
-
-###################################################
-### code chunk number 20: article.Rnw:849-852
-###################################################
+class(tutorial.Person$PhoneType)
tutorial.Person$PhoneType$value(1)
tutorial.Person$PhoneType$value(name="HOME")
tutorial.Person$PhoneType$value(number=1)
+class(tutorial.Person$PhoneType$value(1))
###################################################
-### code chunk number 21: article.Rnw:921-924
+### code chunk number 14: article.Rnw:719-722
###################################################
f <- tutorial.Person$fileDescriptor()
f
@@ -173,7 +116,7 @@
###################################################
-### code chunk number 22: article.Rnw:987-990
+### code chunk number 15: article.Rnw:785-788
###################################################
if (!exists("JSSPaper.Example1", "RProtoBuf:DescriptorPool")) {
readProtoFiles(file="int64.proto")
@@ -181,7 +124,7 @@
###################################################
-### code chunk number 23: article.Rnw:1012-1016
+### code chunk number 16: article.Rnw:810-814
###################################################
as.integer(2^31-1)
as.integer(2^31 - 1) + as.integer(1)
@@ -190,20 +133,20 @@
###################################################
-### code chunk number 24: article.Rnw:1028-1029
+### code chunk number 17: article.Rnw:826-827
###################################################
2^53 == (2^53 + 1)
###################################################
-### code chunk number 25: article.Rnw:1080-1082
+### code chunk number 18: article.Rnw:878-880
###################################################
msg <- serialize_pb(iris, NULL)
identical(iris, unserialize_pb(msg))
###################################################
-### code chunk number 26: article.Rnw:1113-1116
+### code chunk number 19: article.Rnw:908-911
###################################################
datasets <- as.data.frame(data(package="datasets")$results)
datasets$name <- sub("\\s+.*$", "", datasets$Item)
@@ -211,26 +154,8 @@
###################################################
-### code chunk number 27: article.Rnw:1126-1127
+### code chunk number 20: article.Rnw:929-972
###################################################
-m <- sum(sapply(datasets$name, function(x) can_serialize_pb(get(x))))
-
-
-###################################################
-### code chunk number 28: article.Rnw:1140-1147
-###################################################
-attr(CO2, "formula")
-msg <- serialize_pb(CO2, NULL)
-object <- unserialize_pb(msg)
-identical(CO2, object)
-identical(class(CO2), class(object))
-identical(dim(CO2), dim(object))
-attr(object, "formula")
-
-
-###################################################
-### code chunk number 29: article.Rnw:1163-1182
-###################################################
datasets$object.size <- unname(sapply(datasets$name, function(x) object.size(eval(as.name(x)))))
datasets$R.serialize.size <- unname(sapply(datasets$name, function(x) length(serialize(eval(as.name(x)), NULL))))
@@ -249,42 +174,117 @@
"gzipped serialized"=datasets$R.serialize.size.gz,
"RProtoBuf"=datasets$RProtoBuf.serialize.size,
"gzipped RProtoBuf"=datasets$RProtoBuf.serialize.size.gz,
+ "ratio.serialized" = datasets$R.serialize.size / datasets$object.size,
+ "ratio.rprotobuf" = datasets$RProtoBuf.serialize.size / datasets$object.size,
+ "ratio.serialized.gz" = datasets$R.serialize.size.gz / datasets$object.size,
+ "ratio.rprotobuf.gz" = datasets$RProtoBuf.serialize.size.gz / datasets$object.size,
+ "savings.serialized" = 1-(datasets$R.serialize.size / datasets$object.size),
+ "savings.rprotobuf" = 1-(datasets$RProtoBuf.serialize.size / datasets$object.size),
+ "savings.serialized.gz" = 1-(datasets$R.serialize.size.gz / datasets$object.size),
+ "savings.rprotobuf.gz" = 1-(datasets$RProtoBuf.serialize.size.gz / datasets$object.size),
check.names=FALSE)
+all.df<-data.frame(dataset="TOTAL", object.size=sum(datasets$object.size),
+ "serialized"=sum(datasets$R.serialize.size),
+ "gzipped serialized"=sum(datasets$R.serialize.size.gz),
+ "RProtoBuf"=sum(datasets$RProtoBuf.serialize.size),
+ "gzipped RProtoBuf"=sum(datasets$RProtoBuf.serialize.size.gz),
+ "ratio.serialized" = sum(datasets$R.serialize.size) / sum(datasets$object.size),
+ "ratio.rprotobuf" = sum(datasets$RProtoBuf.serialize.size) / sum(datasets$object.size),
+ "ratio.serialized.gz" = sum(datasets$R.serialize.size.gz) / sum(datasets$object.size),
+ "ratio.rprotobuf.gz" = sum(datasets$RProtoBuf.serialize.size.gz) / sum(datasets$object.size),
+ "savings.serialized" = 1-(sum(datasets$R.serialize.size) / sum(datasets$object.size)),
+ "savings.rprotobuf" = 1-(sum(datasets$RProtoBuf.serialize.size) / sum(datasets$object.size)),
+ "savings.serialized.gz" = 1-(sum(datasets$R.serialize.size.gz) / sum(datasets$object.size)),
+ "savings.rprotobuf.gz" = 1-(sum(datasets$RProtoBuf.serialize.size.gz) / sum(datasets$object.size)),
+ check.names=FALSE)
+clean.df<-rbind(clean.df, all.df)
+
###################################################
-### code chunk number 30: article.Rnw:1390-1395
+### code chunk number 21: SER
###################################################
-require(RProtoBuf)
+old.mar<-par("mar")
+new.mar<-old.mar
+new.mar[3]<-0
+new.mar[4]<-0
+my.cex<-1.3
+par("mar"=new.mar)
+plot(clean.df$savings.serialized, clean.df$savings.rprotobuf, pch=1, col="red", las=1, xlab="Serialization Space Savings", ylab="Protocol Buffer Space Savings", xlim=c(0,1),ylim=c(0,1),cex.lab=my.cex, cex.axis=my.cex)
+points(clean.df$savings.serialized.gz, clean.df$savings.rprotobuf.gz,pch=2, col="blue")
+# grey dotted diagonal
+abline(a=0,b=1, col="grey",lty=2,lwd=3)
+
+# find point furthest off the X axis.
+clean.df$savings.diff <- clean.df$savings.serialized - clean.df$savings.rprotobuf
+clean.df$savings.diff.gz <- clean.df$savings.serialized.gz - clean.df$savings.rprotobuf.gz
+
+# The one to label.
+tmp.df <- clean.df[which(clean.df$savings.diff == min(clean.df$savings.diff)),]
+# This minimum means most to the left of our line, so pos=2 is label to the left
+text(tmp.df$savings.serialized, tmp.df$savings.rprotobuf, labels=tmp.df$dataset, pos=2, cex=my.cex)
+
+# Some gziped version
+# text(tmp.df$savings.serialized.gz, tmp.df$savings.rprotobuf.gz, labels=tmp.df$dataset, pos=2, cex=my.cex)
+
+# Second one is also an outlier
+tmp.df <- clean.df[which(clean.df$savings.diff == sort(clean.df$savings.diff)[2]),]
+# This minimum means most to the left of our line, so pos=2 is label to the left
+text(tmp.df$savings.serialized, tmp.df$savings.rprotobuf, labels=tmp.df$dataset, pos=2, cex=my.cex)
+#text(tmp.df$savings.serialized.gz, tmp.df$savings.rprotobuf.gz, labels=tmp.df$dataset, pos=my.cex)
+
+
+tmp.df <- clean.df[which(clean.df$savings.diff == max(clean.df$savings.diff)),]
+# This minimum means most to the right of the diagonal, so pos=4 is label to the right
+# Only show the gziped one.
+#text(tmp.df$savings.serialized, tmp.df$savings.rprotobuf, labels=tmp.df$dataset, pos=4, cex=my.cex)
+text(tmp.df$savings.serialized.gz, tmp.df$savings.rprotobuf.gz, labels=tmp.df$dataset, pos=4, cex=my.cex)
+
+#outlier.dfs <- clean.df[c(which(clean.df$savings.diff == min(clean.df$savings.diff)),
+
+legend("topleft", c("Raw", "Gzip Compressed"), pch=1:2, col=c("red", "blue"), cex=my.cex)
+
+interesting.df <- clean.df[unique(c(which(clean.df$savings.diff == min(clean.df$savings.diff)),
+ which(clean.df$savings.diff == max(clean.df$savings.diff)),
+ which(clean.df$savings.diff.gz == max(clean.df$savings.diff.gz)),
+ which(clean.df$dataset == "TOTAL"))),c("dataset", "object.size", "serialized", "gzipped serialized", "RProtoBuf", "gzipped RProtoBuf", "savings.serialized", "savings.serialized.gz", "savings.rprotobuf", "savings.rprotobuf.gz")]
+# Print without .00 in xtable
+interesting.df$object.size <- as.integer(interesting.df$object.size)
+par("mar"=old.mar)
+
+
+###################################################
+### code chunk number 22: article.Rnw:1211-1215
+###################################################
require(HistogramTools)
readProtoFiles(package="HistogramTools")
hist <- HistogramTools.HistogramState$read("hist.pb")
-plot(as.histogram(hist))
+plot(as.histogram(hist), main="")
###################################################
-### code chunk number 31: article.Rnw:1463-1470 (eval = FALSE)
+### code chunk number 23: article.Rnw:1303-1310 (eval = FALSE)
###################################################
## library("RProtoBuf")
## library("httr")
##
-## req <- GET('https://public.opencpu.org/ocpu/library/MASS/data/Animals/pb')
+## req <- GET('https://demo.ocpu.io/MASS/data/Animals/pb')
## output <- unserialize_pb(req$content)
##
## identical(output, MASS::Animals)
###################################################
-### code chunk number 32: article.Rnw:1529-1545 (eval = FALSE)
+### code chunk number 24: article.Rnw:1360-1376 (eval = FALSE)
###################################################
-## library("httr")
+## library("httr")
## library("RProtoBuf")
##
## args <- list(n=42, mean=100)
## payload <- serialize_pb(args, NULL)
##
## req <- POST (
-## url = "https://public.opencpu.org/ocpu/library/stats/R/rnorm/pb",
+## url = "https://demo.ocpu.io/stats/R/rnorm/pb",
## body = payload,
## add_headers (
## "Content-Type" = "application/x-protobuf"
@@ -296,7 +296,7 @@
###################################################
-### code chunk number 33: article.Rnw:1549-1552 (eval = FALSE)
+### code chunk number 25: article.Rnw:1380-1383 (eval = FALSE)
###################################################
## fnargs <- unserialize_pb(inputmsg)
## val <- do.call(stats::rnorm, fnargs)
Modified: papers/jss/article.Rnw
===================================================================
--- papers/jss/article.Rnw 2014-12-04 01:45:57 UTC (rev 929)
+++ papers/jss/article.Rnw 2014-12-15 01:10:07 UTC (rev 930)
@@ -233,8 +233,8 @@
\label{sec:protobuf}
Protocol Buffers are a modern, language-neutral, platform-neutral,
-extensible mechanism for sharing and storing structured data. Key
-features provided by Protocol Buffers for data analysis include:
+extensible mechanism for sharing and storing structured data. Some of their
+features, particularly in the context of data analysis, are:
\begin{itemize}
\item \emph{Portable}: Enable users to send and receive data between
@@ -388,7 +388,7 @@
parsed from \code{.proto} files and added to the global
namespace.\footnote{Note that there is a significant performance
overhead with this RObjectTable implementation. Because the table
- is on the search path and isn't cacheable, lookups of symbols that
+ is on the search path and is not cacheable, lookups of symbols that
are behind it in the search path cannot be added to the global object
cache, and R must perform an expensive lookup through all of the
attached environments and the protocol buffer definitions to find common
More information about the Rprotobuf-commits
mailing list