From noreply at r-forge.r-project.org Tue Mar 11 02:12:49 2014 From: noreply at r-forge.r-project.org (noreply at r-forge.r-project.org) Date: Tue, 11 Mar 2014 02:12:49 +0100 (CET) Subject: [Rprotobuf-commits] r872 - in pkg: . R inst inst/unitTests src Message-ID: <20140311011249.81E371844D7@r-forge.r-project.org> Author: murray Date: 2014-03-11 02:12:48 +0100 (Tue, 11 Mar 2014) New Revision: 872 Modified: pkg/ChangeLog pkg/R/00classes.R pkg/inst/NEWS.Rd pkg/inst/unitTests/runit.enums.R pkg/src/DescriptorPoolLookup.cpp pkg/src/rprotobuf.cpp Log: * Fix a bug in the show method for EnumDescriptor types. * Import all top-level enums from imported .proto files add add a test. Modified: pkg/ChangeLog =================================================================== --- pkg/ChangeLog 2014-02-22 00:24:18 UTC (rev 871) +++ pkg/ChangeLog 2014-03-11 01:12:48 UTC (rev 872) @@ -1,3 +1,14 @@ +2014-03-10 Murray Stokely + + * src/DescriptorPoolLookup.cpp (rprotobuf): Import all top-level + enums defined in an imported .proto file. + * src/rprotobuf.cpp (rprotobuf): Add a function to get the enum + descriptor associated with a named enum. + * inst/unitTests/runit.enums.R (test.enums): Test that we import + top-level enums from .proto files correctly. + * R/00classes.R (P): Correct a bug in the show() method for + EnumDescriptor that prevented useful output in some contexts. + 2014-02-21 Murray Stokely * inst/unitTests/runit.messages.R (test.message): Add a test for Modified: pkg/R/00classes.R =================================================================== --- pkg/R/00classes.R 2014-02-22 00:24:18 UTC (rev 871) +++ pkg/R/00classes.R 2014-03-11 01:12:48 UTC (rev 872) @@ -34,7 +34,7 @@ pointer = "externalptr" , # pointer to a google::protobuf::EnumDescriptor c++ object name = "character", full_name = "character", - type = "character" + type = "character" # TODO(mstokely): enums don't really have another type, remove? ), prototype = list( pointer = NULL, name = character(0), full_name = character(0), type = character(0) ) ) @@ -122,7 +122,11 @@ # See if it is an extension desc <- .Call("getExtensionDescriptor", type, PACKAGE="RProtoBuf") if (is.null(desc)) { - stop( sprintf( "could not find descriptor for message type '%s' ", type ) ) + # See if it is an enum + desc <- .Call("getEnumDescriptor", type, PACKAGE="RProtoBuf") + if (is.null(desc)) { + stop( sprintf( "could not find descriptor for message type '%s' ", type ) ) + } } } desc @@ -146,7 +150,8 @@ show( sprintf( "descriptor for field '%s' of type '%s' ", object at name, object at type ) ) } ) setMethod( "show", c( "EnumDescriptor" ), function(object){ - show( sprintf( "descriptor for enum '%s' of type '%s' with %d values", object at name, object at type, value_count(object) ) ) + show( sprintf( "descriptor for enum '%s' with %d values", object at name, + value_count(object) ) ) } ) setMethod( "show", c( "ServiceDescriptor" ), function(object){ show( sprintf( "service descriptor <%s>", object at name ) ) Modified: pkg/inst/NEWS.Rd =================================================================== --- pkg/inst/NEWS.Rd 2014-02-22 00:24:18 UTC (rev 871) +++ pkg/inst/NEWS.Rd 2014-03-11 01:12:48 UTC (rev 872) @@ -2,6 +2,18 @@ \title{News for Package \pkg{RProtoBuf}} \newcommand{\cpkg}{\href{http://CRAN.R-project.org/package=#1}{\pkg{#1}}} +\section{Changes in RProtoBuf version 0.4.x (2014-XX-XX)}{ + \itemize{ + \item Document and add a test for the deprecated group + functionality. + \item Add a \code{CITATION} file pointing to the JSS preprint on + arXiv.org. + \item Fix a bug in the \code{show} method for \code{EnumDescriptor} + types. + \item Import all top-level enums from imported \code{.proto} files. + } +} + \section{Changes in RProtoBuf version 0.4.0 (2014-01-14)}{ \itemize{ \item Changes to support CRAN builds for MS Windows. Modified: pkg/inst/unitTests/runit.enums.R =================================================================== --- pkg/inst/unitTests/runit.enums.R 2014-02-22 00:24:18 UTC (rev 871) +++ pkg/inst/unitTests/runit.enums.R 2014-03-11 01:12:48 UTC (rev 872) @@ -1,15 +1,15 @@ # Copyright 2011 Google Inc. -# +# # This program is free software; you can redistribute it and/or # modify it under the terms of the GNU General Public License # as published by the Free Software Foundation; either version 2 # of the License, or (at your option) any later version. -# +# # This program is distributed in the hope that it will be useful, # but WITHOUT ANY WARRANTY; without even the implied warranty of # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the # GNU General Public License for more details. -# +# # You should have received a copy of the GNU General Public License # along with this program; if not, write to the Free Software # Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA. @@ -43,4 +43,14 @@ # Verify that invalid indices are returned as NULL. checkTrue(is.null(value(ProtoFormat$PhoneType, index=900))) + + # Verify that we import top-level enums from .proto files. + if (!exists("protobuf_unittest.TestAllTypes", + "RProtoBuf:DescriptorPool")) { + unittest.proto.file <- system.file("unitTests", "data", + "unittest.proto", + package="RProtoBuf") + readProtoFiles(file=unittest.proto.file) + } + checkTrue(inherits(P("protobuf_unittest.ForeignEnum"), "EnumDescriptor")) } Modified: pkg/src/DescriptorPoolLookup.cpp =================================================================== --- pkg/src/DescriptorPoolLookup.cpp 2014-02-22 00:24:18 UTC (rev 871) +++ pkg/src/DescriptorPoolLookup.cpp 2014-03-11 01:12:48 UTC (rev 872) @@ -40,6 +40,9 @@ /** * Add descriptors from a proto file to the descriptor pool. * + * Specifically, message types, extensions, and enums are added. + * Services are not added because they are not really used in RProtoBuf. + * * @param files A character vector of .proto files to import. * @param dirs A character vector of directories to import from. * @throws Rcpp::exception if a file can't be loaded (uncaught). @@ -54,19 +57,25 @@ "'\n"; Rcpp_error(message.c_str()); } + // add top level messages from the file. int ntypes = file_desc->message_type_count(); for (int i = 0; i < ntypes; i++) { const GPB::Descriptor* desc = file_desc->message_type(i); add(desc->full_name()); /* should we bother recursing ? */ - /* TODO(mstokely): add top level enums and services? */ } - // add top level extensions! + // add top level extensions int nexts = file_desc->extension_count(); for (int i = 0; i < nexts; i++) { const GPB::FieldDescriptor* field_desc = file_desc->extension(i); add(field_desc->full_name()); } + // add top level enums. + int nenums = file_desc->enum_type_count(); + for (int i = 0; i < nenums; i++) { + const GPB::EnumDescriptor* enum_desc = file_desc->enum_type(i); + add(enum_desc->full_name()); + } } // source_tree.removeDirectories( dirs ) ; } Modified: pkg/src/rprotobuf.cpp =================================================================== --- pkg/src/rprotobuf.cpp 2014-02-22 00:24:18 UTC (rev 871) +++ pkg/src/rprotobuf.cpp 2014-03-11 01:12:48 UTC (rev 872) @@ -116,6 +116,40 @@ } /** + * get the descriptor associated with an enum + * + * @param type message type + * + * @return an S4 object of class EnumDescriptor, or NULL if the type + * is unknown + */ +RcppExport SEXP getEnumDescriptor(SEXP type) { +#ifdef RPB_DEBUG + Rprintf("\n type = "); + Rf_PrintValue(type); +#endif + + const char* typeName = CHAR(STRING_ELT(type, 0)); + + /* first try the generated pool */ + const GPB::DescriptorPool* pool = GPB::DescriptorPool::generated_pool(); + Rprintf("typeName = %s", typeName); + const GPB::EnumDescriptor* desc = pool->FindEnumTypeByName(typeName); + if (!desc) { + /* then try the "runtime" pool" */ + pool = DescriptorPoolLookup::pool(); + Rprintf("trying runtime pool typeName = %s", typeName); + desc = pool->FindEnumTypeByName(typeName); + if (!desc) { + /* unlucky */ + return R_NilValue; + } + } + + return (S4_EnumDescriptor(desc)); +} + +/** * make a new protobuf message * * @param descriptor a "Descriptor" R object From noreply at r-forge.r-project.org Tue Mar 11 04:52:01 2014 From: noreply at r-forge.r-project.org (noreply at r-forge.r-project.org) Date: Tue, 11 Mar 2014 04:52:01 +0100 (CET) Subject: [Rprotobuf-commits] r873 - pkg/src Message-ID: <20140311035201.1CC7A186B19@r-forge.r-project.org> Author: murray Date: 2014-03-11 04:51:59 +0100 (Tue, 11 Mar 2014) New Revision: 873 Modified: pkg/src/rprotobuf.cpp Log: Remove some extraneous debugging printfs. Modified: pkg/src/rprotobuf.cpp =================================================================== --- pkg/src/rprotobuf.cpp 2014-03-11 01:12:48 UTC (rev 872) +++ pkg/src/rprotobuf.cpp 2014-03-11 03:51:59 UTC (rev 873) @@ -133,12 +133,10 @@ /* first try the generated pool */ const GPB::DescriptorPool* pool = GPB::DescriptorPool::generated_pool(); - Rprintf("typeName = %s", typeName); const GPB::EnumDescriptor* desc = pool->FindEnumTypeByName(typeName); if (!desc) { /* then try the "runtime" pool" */ pool = DescriptorPoolLookup::pool(); - Rprintf("trying runtime pool typeName = %s", typeName); desc = pool->FindEnumTypeByName(typeName); if (!desc) { /* unlucky */ From noreply at r-forge.r-project.org Sun Mar 16 23:44:21 2014 From: noreply at r-forge.r-project.org (noreply at r-forge.r-project.org) Date: Sun, 16 Mar 2014 23:44:21 +0100 (CET) Subject: [Rprotobuf-commits] r874 - papers/jss Message-ID: <20140316224421.C6422186835@r-forge.r-project.org> Author: edd Date: 2014-03-16 23:44:21 +0100 (Sun, 16 Mar 2014) New Revision: 874 Modified: papers/jss/article.Rnw papers/jss/article.bib Log: first pass concerning JSS comments - cite all software: added Rserve - "replication more accessible": huh? - sentence style for \section, \subsection - sentence style for \caption - sentence style for column captions in tables - quote argument to data() and library() - remove comments from quoted code - adjust linebreak of first code example table - reference to 'Springer-Verlag' - title style for references Modified: papers/jss/article.Rnw =================================================================== --- papers/jss/article.Rnw 2014-03-11 03:51:59 UTC (rev 873) +++ papers/jss/article.Rnw 2014-03-16 22:44:21 UTC (rev 874) @@ -242,7 +242,7 @@ \begin{center} \includegraphics[width=\textwidth]{figures/protobuf-distributed-system-crop.pdf} \end{center} -\caption{Example protobuf usage} +\caption{Example ussage of Protocol Buffers.} \label{fig:protobuf-distributed-usecase} \end{figure} @@ -252,7 +252,8 @@ Protocol Buffer in \proglang{R} that is then serialized and sent over the network to a remote server. The server would then deserialize the message, act on the request, and respond with a new Protocol Buffer over the network. -The key difference to, say, a request to an \pkg{Rserve} instance is that +The key difference to, say, a request to an \pkg{Rserve} +\citep{Urbanek:2003:Rserve,CRAN:Rserve} instance is that the remote server may be implemented in any language, with no dependence on \proglang{R}. @@ -278,34 +279,36 @@ \noindent \begin{table} -\begin{tabular}{p{.40\textwidth}p{0.55\textwidth}} +\begin{tabular}{p{0.45\textwidth}p{0.5\textwidth}} \toprule -Schema : \texttt{addressbook.proto} & Example \proglang{R} Session\\ +Schema : \texttt{addressbook.proto} & Example \proglang{R} session\\ \cmidrule{1-2} \begin{minipage}{.40\textwidth} \vspace{2mm} \begin{example} package tutorial; message Person { - required string name = 1; - required int32 id = 2; - optional string email = 3; - enum PhoneType { - MOBILE = 0; HOME = 1; - WORK = 2; - } - message PhoneNumber { - required string number = 1; - optional PhoneType type = 2; - } - repeated PhoneNumber phone = 4; + required string name = 1; + required int32 id = 2; + optional string email = 3; + enum PhoneType { + MOBILE = 0; + HOME = 1; + WORK = 2; + } + message PhoneNumber { + required string number = 1; + optional PhoneType type = 2; + } + repeated PhoneNumber phone = 4; } \end{example} \vspace{2mm} \end{minipage} & \begin{minipage}{.55\textwidth} <>= -library(RProtoBuf) -p <- new(tutorial.Person,id=1,name="Dirk") +library("RProtoBuf") +p <- new(tutorial.Person, id=1, + name="Dirk") p$name p$name <- "Murray" cat(as.character(p)) @@ -335,7 +338,7 @@ on the named fields of the specified message type, as described in the next section. -\section{Basic Usage: Messages and descriptors} +\section{Basic usage: Messages and descriptors} \label{sec:rprotobuf-basic} This section describes how to use the \proglang{R} API to create and manipulate @@ -463,17 +466,17 @@ @ \texttt{serialize} can also be called in a more traditional -object oriented fashion using the dollar operator: +object oriented fashion using the dollar operator. <<>>= -# serialize to a file p$serialize(tf1) -# serialize to a binary connection con <- file(tf2, open = "wb") p$serialize(con) close(con) @ +Here, we first serialize to a file \code{tf1} before we serialize to a binary +connection to file \code{tf2}. \subsection{Parsing messages} @@ -504,10 +507,9 @@ writeLines(as.character(message)) @ -Finally, the payload of the message can be used: +Finally, the raw vector payload of the message can be used: <<>>= -# reading the raw vector payload of the message payload <- readBin(tf1, raw(0), 5000) message <- read(tutorial.Person, payload) @ @@ -517,17 +519,17 @@ object: <<>>= -# reading from a file message <- tutorial.Person$read(tf1) -# reading from a binary connection con <- file(tf2, open = "rb") message <- tutorial.Person$read(con) close(con) -# read from the payload message <- tutorial.Person$read(payload) @ +Here we read first from a file, then from a binary connection and lastly from +a message payload. + \section{Under the hood: S4 classes, methods, and pseudo methods} \label{sec:rprotobuf-classes} @@ -548,11 +550,8 @@ \centering \begin{tabular}{lccl} \toprule -\textbf{Class} & - \textbf{Slots} & - \textbf{Methods} & - \textbf{Dynamic Dispatch}\\ -\cmidrule{1-4} +Class & Slots & Methods & Dynamic dispatch\\ +\cmidrule{2-4} Message & 2 & 20 & yes (field names)\\ Descriptor & 2 & 16 & yes (field names, enum types, nested types)\\ FieldDescriptor & 4 & 18 & no\\ @@ -562,7 +561,7 @@ \bottomrule \end{tabular} \caption{\label{class-summary-table}Overview of class, slot, method and - dispatch relationships} + dispatch relationships.} \end{table} The \CRANpkg{Rcpp} package @@ -591,7 +590,7 @@ classes, plus \emph{dynamic dispatch} on names or types specific to a given object. This functionality is implemented with the \texttt{.DollarNames} S3 generic function defined in the \pkg{utils} -package. +package that is included with \pronglang{R} \citep{r}. \subsection{Messages} @@ -612,14 +611,15 @@ \begin{small} \begin{tabular}{lp{10cm}} \toprule -\textbf{Slot} & \textbf{Description} \\ +Slot & Description \\ \cmidrule(r){2-2} \texttt{pointer} & External pointer to the \texttt{Message} object of the \proglang{C++} protobuf library. Documentation for the \texttt{Message} class is available from the Protocol Buffer project page. \\ %(\url{http://code.google.com/apis/protocolbuffers/docs/reference/cpp/google.protobuf.message.html#Message}) \\ \texttt{type} & Fully qualified name of the message. For example a \texttt{Person} message has its \texttt{type} slot set to \texttt{tutorial.Person} \\[.3cm] -\textbf{Method} & \textbf{Description} \\ + +Method & Description \\ \cmidrule(r){2-2} \texttt{has} & Indicates if a message has a given field. \\ \texttt{clone} & Creates a clone of the message \\ @@ -646,7 +646,7 @@ \hline \end{tabular} \end{small} -\caption{\label{Message-methods-table}Description of slots and methods for the \texttt{Message} S4 class} +\caption{\label{Message-methods-table}Description of slots and methods for the \texttt{Message} S4 class.} \end{table} \subsection{Descriptors} @@ -668,12 +668,12 @@ \texttt{readProtoFiles}.}. <<>>= -tutorial.Person$email # field descriptor +tutorial.Person$email -tutorial.Person$PhoneType # enum descriptor +tutorial.Person$PhoneType -tutorial.Person$PhoneNumber # nested type descriptor -# same as +tutorial.Person$PhoneNumber + tutorial.Person.PhoneNumber @ @@ -685,14 +685,15 @@ \begin{small} \begin{tabular}{lp{10cm}} \toprule -\textbf{Slot} & \textbf{Description} \\ +Slot & Description \\ \cmidrule(r){2-2} \texttt{pointer} & External pointer to the \texttt{Descriptor} object of the \proglang{C++} proto library. Documentation for the \texttt{Descriptor} class is available from the Protocol Buffer project page.\\ %\url{http://code.google.com/apis/protocolbuffers/docs/reference/cpp/google.protobuf.descriptor.html#Descriptor} \\ \texttt{type} & Fully qualified path of the message type. \\[.3cm] % -\textbf{Method} & \textbf{Description} \\ + +Method & Description \\ \cmidrule(r){2-2} \texttt{new} & Creates a prototype of a message described by this descriptor.\\ \texttt{read} & Reads a message from a file or binary connection.\\ @@ -719,7 +720,7 @@ \bottomrule \end{tabular} \end{small} -\caption{\label{Descriptor-methods-table}Description of slots and methods for the \texttt{Descriptor} S4 class} +\caption{\label{Descriptor-methods-table}Description of slots and methods for the \texttt{Descriptor} S4 class.} \end{table} \subsection{Field descriptors} @@ -736,14 +737,15 @@ \begin{small} \begin{tabular}{lp{10cm}} \toprule -\textbf{Slot} & \textbf{Description} \\ +Slot & Description \\ \cmidrule(r){2-2} \texttt{pointer} & External pointer to the \texttt{FieldDescriptor} \proglang{C++} variable \\ \texttt{name} & Simple name of the field \\ \texttt{full\_name} & Fully qualified name of the field \\ \texttt{type} & Name of the message type where the field is declared \\[.3cm] % -\textbf{Method} & \textbf{Description} \\ + +Method & Description \\ \cmidrule(r){2-2} \texttt{as.character} & Character representation of a descriptor\\ \texttt{toString} & Character representation of a descriptor (same as \texttt{as.character}) \\ @@ -767,7 +769,7 @@ \end{tabular} \end{small} \caption{\label{fielddescriptor-methods-table}Description of slots and - methods for the \texttt{FieldDescriptor} S4 class} + methods for the \texttt{FieldDescriptor} S4 class.} \end{table} @@ -798,14 +800,15 @@ \begin{small} \begin{tabular}{lp{10cm}} \toprule -\textbf{Slot} & \textbf{Description} \\ +Slot & Description \\ \cmidrule(r){2-2} \texttt{pointer} & External pointer to the \texttt{EnumDescriptor} \proglang{C++} variable \\ \texttt{name} & Simple name of the enum \\ \texttt{full\_name} & Fully qualified name of the enum \\ \texttt{type} & Name of the message type where the enum is declared \\[.3cm] % -\textbf{Method} & \textbf{Description} \\ + +Method & Description \\ \cmidrule(r){2-2} \texttt{as.list} & return a named integer vector with the values of the enum and their names.\\ @@ -823,7 +826,8 @@ \bottomrule \end{tabular} \end{small} -\caption{\label{enumdescriptor-methods-table}Description of slots and methods for the \texttt{EnumDescriptor} S4 class} +\caption{\label{enumdescriptor-methods-table}Description of slots and methods + for the \texttt{EnumDescriptor} S4 class.} \end{table} \subsection{Enum value descriptors} @@ -848,13 +852,14 @@ \begin{small} \begin{tabular}{lp{10cm}} \toprule -\textbf{Slot} & \textbf{Description} \\ +Slot & Description \\ \cmidrule(r){2-2} \texttt{pointer} & External pointer to the \texttt{EnumValueDescriptor} \proglang{C++} variable \\ \texttt{name} & simple name of the enum value \\ \texttt{full\_name} & fully qualified name of the enum value \\[.3cm] % -\textbf{Method} & \textbf{Description} \\ + +Method & Description \\ \cmidrule(r){2-2} \texttt{number} & return the number of this EnumValueDescriptor. \\ \texttt{name} & Return the name of the enum value descriptor.\\ @@ -866,7 +871,7 @@ \end{tabular} \end{small} \caption{\label{EnumValueDescriptor-methods-table}Description of slots - and methods for the \texttt{EnumValueDescriptor} S4 class} + and methods for the \texttt{EnumValueDescriptor} S4 class.} \end{table} \subsection{File descriptors} @@ -877,14 +882,15 @@ \begin{small} \begin{tabular}{lp{10cm}} \toprule -\textbf{Slot} & \textbf{Description} \\ +Slot & Description \\ \cmidrule(r){2-2} \texttt{pointer} & external pointer to the \texttt{FileDescriptor} object of the \proglang{C++} proto library. Documentation for the \texttt{FileDescriptor} class is available from the Protocol Buffer project page: \url{http://developers.google.com/protocol-buffers/docs/reference/cpp/google.protobuf.descriptor.html#FileDescriptor} \\ \texttt{filename} & fully qualified pathname of the \texttt{.proto} file.\\ \texttt{package} & package name defined in this \texttt{.proto} file.\\[.3cm] -\textbf{Method} & \textbf{Description} \\ + +Method & Description \\ \cmidrule(r){2-2} \texttt{name} & Return the filename for this FileDescriptorProto.\\ \texttt{package} & Return the file-level package name specified in this FileDescriptorProto.\\ @@ -895,7 +901,7 @@ \bottomrule \end{tabular} \end{small} -\caption{\label{filedescriptor-methods-table}Description of slots and methods for the \texttt{FileDescriptor} S4 class} +\caption{\label{filedescriptor-methods-table}Description of slots and methods for the \texttt{FileDescriptor} S4 class.} \end{table} The class \emph{FileDescriptor} represents file descriptors in \proglang{R}. @@ -1313,7 +1319,7 @@ \begin{center} \includegraphics[width=\textwidth]{figures/histogram-mapreduce-diag1.pdf} \end{center} -\caption{Diagram of MapReduce histogram generation pattern} +\caption{Diagram of MapReduce histogram generation pattern.} \label{fig:mr-histogram-pattern1} \end{figure} @@ -1352,42 +1358,39 @@ This generates a Python module called \texttt{histogram\_pb2.py}, containing both the descriptor information as well as methods to read and manipulate the histogram message data. The following simple Python script uses this generated -module to create a histogram and write out the Protocol Buffer +module to create a histogram (to which breakpoints and binned data are +added), and writes out the Protocol Buffer representation to a file: \begin{Code} from histogram_pb2 import HistogramState; -# Create empty Histogram message hist = HistogramState() -# Add breakpoints and binned data set. hist.counts.extend([2, 6, 2, 4, 6]) hist.breaks.extend(range(6)) hist.name="Example Histogram Created in Python" -# Output the histogram outfile = open("/tmp/hist.pb", "wb") outfile.write(hist.SerializeToString()) outfile.close() \end{Code} The Protocol Buffer can then be read into \proglang{R} and converted to a native -\proglang{R} histogram object for plotting: +\proglang{R} histogram object for plotting. Here, the schema is read first, +then the (serialized) histogram is read into the variable \code{hist} which +is then converted a histogram object which is display as a plot. \begin{Code} -library(RProtoBuf) -library(HistogramTools) +library("RProtoBuf") +library("HistogramTools") -# Read the Histogram schema readProtoFiles(package="HistogramTools") -# Read the serialized histogram file. hist <- HistogramTools.HistogramState$read("/tmp/hist.pb") hist [1] "message of type 'HistogramTools.HistogramState' with 3 fields set" -# Convert to native R histogram object and plot plot(as.histogram(hist)) \end{Code} @@ -1405,7 +1408,7 @@ large-scale studies of distributed storage systems \citep{sciencecloud,janus}. -\section{Application: Data Interchange in web Services} +\section{Application: Data interchange in web services} \label{sec:opencpu} As described earlier, the primary application of Protocol Buffers is data @@ -1466,15 +1469,12 @@ verify that the object was transferred without loss of information. <>= -# Load packages -library(RProtoBuf) -library(httr) +library("RProtoBuf") +library("httr") -# Retrieve and parse message req <- GET('https://public.opencpu.org/ocpu/library/MASS/data/Animals/pb') output <- unserialize_pb(req$content) -# Check that no information was lost identical(output, MASS::Animals) @ @@ -1494,15 +1494,12 @@ module. \begin{verbatim} -# Import modules import urllib2 from rexp_pb2 import REXP -# Retrieve message req = urllib2.Request('https://public.opencpu.org/ocpu/library/MASS/data/Animals/pb') res = urllib2.urlopen(req) -# Parse rexp.proto message msg = REXP() msg.ParseFromString(res.read()) print(msg) @@ -1538,9 +1535,8 @@ are contained within a list. <>= -#requires httr >= 0.2.99 -library(httr) -library(RProtoBuf) +library("httr") #requires httr >= 0.2.99 +library("RProtoBuf") args <- list(n=42, mean=100) payload <- serialize_pb(args, NULL) @@ -1553,7 +1549,6 @@ ) ) -#This is the output of stats::rnorm(n=42, mean=100) output <- unserialize_pb(req$content) print(output) @ @@ -1619,6 +1614,7 @@ %\begin{appendices} \section*{Appendix: The rexp.proto schema descriptor} + \label{rexp.proto} Below a print of the \texttt{rexp.proto} schema (originally designed by \cite{rhipe}) Modified: papers/jss/article.bib =================================================================== --- papers/jss/article.bib 2014-03-11 03:51:59 UTC (rev 873) +++ papers/jss/article.bib 2014-03-16 22:44:21 UTC (rev 874) @@ -1,5 +1,5 @@ @article{eddelbuettel2011rcpp, - title = {Rcpp: Seamless R and C++ integration}, + title = {Rcpp: Seamless R and C++ Integration}, author = {Dirk Eddelbuettel and Romain Fran{\c{c}}ois}, journal = {Journal of Statistical Software}, volume = 40, @@ -8,14 +8,15 @@ year = 2011 } @inproceedings{dremel, -title = {Dremel: Interactive Analysis of Web-Scale Datasets}, -author = {Sergey Melnik and Andrey Gubarev and Jing Jing Long and - Geoffrey Romer and Shiva Shivakumar and Matt Tolton - and Theo Vassilakis}, -year = 2010, -URL = {http://www.vldb2010.org/accept.htm}, -booktitle = {Proc. of the 36th Int'l Conf on Very Large Data Bases}, -pages = {330-339} + title = {Dremel: Interactive Analysis of Web-Scale Datasets}, + author = {Sergey Melnik and Andrey Gubarev and Jing Jing Long + and Geoffrey Romer and Shiva Shivakumar and Matt + Tolton and Theo Vassilakis}, + year = 2010, + URL = {http://www.vldb2010.org/accept.htm}, + booktitle = {Proc. of the 36th Int'l Conf on Very Large Data + Bases}, + pages = {330-339} } @Manual{msgpackR, title = {msgpackR: A library to serialize or unserialize data @@ -26,11 +27,15 @@ url = {http://CRAN.R-project.org/package=msgpackR}, } @inproceedings{sciencecloud, -title = {Projecting Disk Usage Based on Historical Trends in a Cloud Environment}, -author = {Murray Stokely and Amaan Mehrabian and Christoph Albrecht and Francois Labelle and Arif Merchant}, -year = 2012, -booktitle = {ScienceCloud 2012 Proceedings of the 3rd International Workshop on Scientific Cloud Computing}, -pages = {63--70} + title = {Projecting Disk Usage Based on Historical Trends in + a Cloud Environment}, + author = {Murray Stokely and Amaan Mehrabian and Christoph + Albrecht and Francois Labelle and Arif Merchant}, + year = 2012, + booktitle = {ScienceCloud 2012 Proceedings of the 3rd + International Workshop on Scientific Cloud + Computing}, + pages = {63--70} } @inproceedings{janus, title = {Janus: Optimal Flash Provisioning for Cloud Storage @@ -58,15 +63,15 @@ pages = "1176--1211", publisher = "Bernoulli Society for Mathematical Statistics and Probability", - title = "The potential and perils of preprocessing: Building - new foundations", + title = "The Potential and Perils of Preprocessing: Building + new Foundations", url = "http://dx.doi.org/10.3150/13-BEJSP16", volume = 19, year = 2013 } @article{clinec++, - title = {C++ faq}, + title = {C++ FAQ}, author = {Marshall Cline}, journal = {Also available as http://www. parashift. com/c++-faq-lite/index. html}, @@ -91,7 +96,7 @@ } @Manual{jsonlite, - title = {jsonlite: A smarter JSON encoder/decoder for R}, + title = {jsonlite: A Smarter JSON Encoder/Decoder for R}, author = {Jeroen Ooms}, year = 2014, note = {R package version 0.9.4}, @@ -107,7 +112,7 @@ } @Manual{int64, - title = {int64: 64 bit integer types}, + title = {int64: 64 Bit Integer Types}, author = {Romain Fran{\c{c}}ois}, year = 2011, note = {R package version 1.1.2}, @@ -115,7 +120,7 @@ } @Manual{bit64, - title = {bit64: A S3 class for vectors of 64bit integers}, + title = {bit64: A S3 class for Vectors of 64bit Integers}, author = {Jens Oehlschl\"{a}gel}, year = 2012, note = {R package version 0.9-3}, @@ -126,7 +131,7 @@ title = {Seamless R and C++ Integration with Rcpp}, author = {Dirk Eddelbuettel}, year = 2013, - publisher = {Springer} + publisher = {Springer-Verlag} } @Manual{rhipe, @@ -146,10 +151,10 @@ } @manual{eddelbuettel2013exposing, - title = {Exposing C++ functions and classes with Rcpp - modules}, + title = {Exposing C++ Functions and Classes with Rcpp + Modules}, author = {Dirk Eddelbuettel and Romain Fran{\c{c}}ois}, - year = 2013, + year = 2014, note = {Vignette included in R package Rcpp}, url = {http://CRAN.R-project.org/package=Rcpp}, } @@ -171,7 +176,7 @@ number = 1, pages = {11--32}, year = 1991, - publisher = {Springer} + publisher = {Springer-Verlag} } @article{rubner2000earth, @@ -183,7 +188,7 @@ number = 2, pages = {99--121}, year = 2000, - publisher = {Springer} + publisher = {Springer-Verlag} } @book{kullback1997information, @@ -305,13 +310,13 @@ author = {{R Core Team}}, organization = {R Foundation for Statistical Computing}, address = {Vienna, Austria}, - year = 2013, + year = 2014, url = {http://www.R-project.org/}, } @article{dean2008mapreduce, - title = {MapReduce: simplified data processing on large - clusters}, + title = {MapReduce: Simplified Data Processing on Large + Clusters}, author = {Jeffrey Dean and Sanjay Ghemawat}, journal = {Communications of the ACM}, volume = 51, @@ -404,7 +409,7 @@ } @article{scott1979optimal, - title = {On optimal and data-based histograms}, + title = {On Optimal and Data-Based Histograms}, author = {David W Scott}, journal = {Biometrika}, volume = 66, @@ -415,8 +420,8 @@ } @book{scott2009multivariate, - title = {Multivariate density estimation: theory, practice, - and visualization}, + title = {Multivariate Density Estimation: Theory, Practice, + and Visualization}, author = {David W Scott}, volume = 383, year = 2009, @@ -424,7 +429,7 @@ } @Manual{httr, - title = {httr: Tools for working with URLs and HTTP}, + title = {httr: Tools for Working with URLs and HTTP}, author = {Hadley Wickham}, year = 2012, note = {R package version 0.2}, @@ -432,8 +437,8 @@ } @Manual{opencpu, - title = {OpenCPU system for embedded statistical computation - and reproducible research}, + title = {OpenCPU System for Embedded Statistical Computation + and Reproducible Research}, author = {Jeroen Ooms}, year = 2013, note = {R package version 1.2.2}, @@ -441,8 +446,8 @@ } @article{shafranovich2005common, - title = {Common format and mime type for comma-separated - values (csv) files}, + title = {Common Format and Mime Type for Comma-Separated + Values (csv) Files}, author = {Yakov Shafranovich}, year = 2005, url = {http://tools.ietf.org/html/rfc4180} @@ -452,7 +457,7 @@ title = {XML and Web Technologies for Data Sciences with R}, author = {Deborah Nolan and Duncan {Temple Lang}}, year = 2013, - publisher = {Springer} + publisher = {Springer-Verlag} } @Manual{nlme, @@ -462,3 +467,23 @@ note = {R package version 3.1-113}, url = {http://CRAN.R-project.org/package=nlme}, } + + at Manual{CRAN:Rserve, + title = {Rserve: Binary R server}, + author = {Simon Urbanek}, + year = 2013, + note = {R package version 1.7-3}, + url = {http://CRAN.R-Project.org/package=Rserve} +} + + at InProceedings{Urbanek:2003:Rserve, + author = {Simon Urbanek}, + title = {{Rserve}: A Fast Way to Provide {R} Functionality to + Applications}, + booktitle = {Proceedings of the 3rd International Workshop on Distributed + Statistical Computing, Vienna, Austria}, + editor = {Kurt Hornik and Friedrich Leisch and Achim Zeileis}, + year = {2003}, + url = {http://www.ci.tuwien.ac.at/Conferences/DSC-2003/Proceedings/}, + note = {{ISSN 1609-395X}} +} From noreply at r-forge.r-project.org Sun Mar 16 23:45:51 2014 From: noreply at r-forge.r-project.org (noreply at r-forge.r-project.org) Date: Sun, 16 Mar 2014 23:45:51 +0100 (CET) Subject: [Rprotobuf-commits] r875 - papers/jss Message-ID: <20140316224551.35E93186892@r-forge.r-project.org> Author: edd Date: 2014-03-16 23:45:50 +0100 (Sun, 16 Mar 2014) New Revision: 875 Modified: papers/jss/article.Rnw Log: one char typo Modified: papers/jss/article.Rnw =================================================================== --- papers/jss/article.Rnw 2014-03-16 22:44:21 UTC (rev 874) +++ papers/jss/article.Rnw 2014-03-16 22:45:50 UTC (rev 875) @@ -590,7 +590,7 @@ classes, plus \emph{dynamic dispatch} on names or types specific to a given object. This functionality is implemented with the \texttt{.DollarNames} S3 generic function defined in the \pkg{utils} -package that is included with \pronglang{R} \citep{r}. +package that is included with \proglang{R} \citep{r}. \subsection{Messages} From noreply at r-forge.r-project.org Mon Mar 17 03:28:08 2014 From: noreply at r-forge.r-project.org (noreply at r-forge.r-project.org) Date: Mon, 17 Mar 2014 03:28:08 +0100 (CET) Subject: [Rprotobuf-commits] r876 - papers/jss Message-ID: <20140317022808.29A77186F98@r-forge.r-project.org> Author: murray Date: 2014-03-17 03:28:07 +0100 (Mon, 17 Mar 2014) New Revision: 876 Modified: papers/jss/article.Rnw Log: Fix typo in Dirks last change. Modified: papers/jss/article.Rnw =================================================================== --- papers/jss/article.Rnw 2014-03-16 22:45:50 UTC (rev 875) +++ papers/jss/article.Rnw 2014-03-17 02:28:07 UTC (rev 876) @@ -242,7 +242,7 @@ \begin{center} \includegraphics[width=\textwidth]{figures/protobuf-distributed-system-crop.pdf} \end{center} -\caption{Example ussage of Protocol Buffers.} +\caption{Example usage of Protocol Buffers.} \label{fig:protobuf-distributed-usecase} \end{figure} From noreply at r-forge.r-project.org Sun Mar 23 23:44:03 2014 From: noreply at r-forge.r-project.org (noreply at r-forge.r-project.org) Date: Sun, 23 Mar 2014 23:44:03 +0100 (CET) Subject: [Rprotobuf-commits] r877 - papers/jss Message-ID: <20140323224403.B7F73186694@r-forge.r-project.org> Author: edd Date: 2014-03-23 23:44:03 +0100 (Sun, 23 Mar 2014) New Revision: 877 Modified: papers/jss/article.Rnw papers/jss/article.bib Log: two new citation (as, I think, suggested by the note) minor twiddling with floats; slightly narrower figures Modified: papers/jss/article.Rnw =================================================================== --- papers/jss/article.Rnw 2014-03-17 02:28:07 UTC (rev 876) +++ papers/jss/article.Rnw 2014-03-23 22:44:03 UTC (rev 877) @@ -16,7 +16,6 @@ % % Local helpers to make this more compatible with R Journal style. % -\newcommand{\CRANpkg}[1]{\pkg{#1}} \RequirePackage{fancyvrb} \RequirePackage{alltt} \DefineVerbatimEnvironment{example}{Verbatim}{} @@ -30,33 +29,32 @@ %% for pretty printing and a nice hypersummary also set: \Plainauthor{Dirk Eddelbuettel, Murray Stokely, Jeroen Ooms} %% comma-separated \Plaintitle{RProtoBuf: Efficient Cross-Language Data Serialization in R} -\Shorttitle{\CRANpkg{RProtoBuf}: Protocol Buffers in \proglang{R}} %% a short title (if necessary) +\Shorttitle{\pkg{RProtoBuf}: Protocol Buffers in \proglang{R}} %% a short title (if necessary) %% an abstract and keywords \Abstract{ -Modern data collection and analysis pipelines often involve -a sophisticated mix of applications written in general purpose and -specialized programming languages. -Many formats commonly used to import and export data between -different programs or systems, such as \texttt{CSV} or \texttt{JSON}, are -verbose, inefficient, not type-safe, or tied to a specific programming language. -Protocol Buffers are a popular -method of serializing structured data between applications---while remaining -independent of programming languages or operating systems. -They offer a unique combination of features, performance, and maturity that seems -particularly well suited for data-driven applications and numerical -computing. -The -\CRANpkg{RProtoBuf} package provides a complete interface to Protocol -Buffers from the -\proglang{R} environment for statistical computing. -This paper outlines the general class of data serialization -requirements for statistical computing, describes the implementation -of the \CRANpkg{RProtoBuf} package, and illustrates its use with -example applications in large-scale data collection pipelines and web -services. -%TODO(ms) keep it less than 150 words. -- I think this may be 154, -%depending how emacs is counting. + Modern data collection and analysis pipelines often involve + a sophisticated mix of applications written in general purpose and + specialized programming languages. + Many formats commonly used to import and export data between + different programs or systems, such as \texttt{CSV} or \texttt{JSON}, are + verbose, inefficient, not type-safe, or tied to a specific programming language. + Protocol Buffers are a popular + method of serializing structured data between applications---while remaining + independent of programming languages or operating systems. + They offer a unique combination of features, performance, and maturity that seems + particularly well suited for data-driven applications and numerical + computing. + The \pkg{RProtoBuf} package provides a complete interface to Protocol + Buffers from the + \proglang{R} environment for statistical computing. + This paper outlines the general class of data serialization + requirements for statistical computing, describes the implementation + of the \pkg{RProtoBuf} package, and illustrates its use with + example applications in large-scale data collection pipelines and web + services. + %% TODO(ms) keep it less than 150 words. -- I think this may be 154, + %% depending how emacs is counting. } \Keywords{\proglang{R}, \pkg{Rcpp}, Protocol Buffers, serialization, cross-platform} \Plainkeywords{R, Rcpp, Protocol Buffers, serialization, cross-platform} %% without formatting @@ -194,29 +192,31 @@ Once the data serialization needs of an application become complex enough, developers typically benefit from the use of an \emph{interface description language}, or \emph{IDL}. IDLs like -Protocol Buffers \citep{protobuf}, Apache Thrift, and Apache Avro +Protocol Buffers \citep{protobuf}, Apache Thrift \citep{Apache:Thrift}, and Apache Avro \citep{Apache:Avro} provide a compact well-documented schema for cross-language data structures and efficient binary interchange formats. Since the schema is provided separately from the data, the data can be efficiently encoded to minimize storage costs when compared with simple ``schema-less'' binary interchange formats. -Many sources compare data serialization formats -and show Protocol Buffers perform favorably to the alternatives; see -\citet{Sumaray:2012:CDS:2184751.2184810} for one such comparison. +%Many sources compare data serialization formats +%and show Protocol Buffers perform favorably to the alternatives; see +%\citet{Sumaray:2012:CDS:2184751.2184810} for one such comparison. +Protocol Buffers performs well in the comparison of such formats by +\citet{Sumaray:2012:CDS:2184751.2184810}. This paper describes an \proglang{R} interface to Protocol Buffers, and is organized as follows. Section~\ref{sec:protobuf} provides a general high-level overview of Protocol Buffers as well as a basic motivation for their use. Section~\ref{sec:rprotobuf-basic} describes the interactive \proglang{R} interface -provided by the \CRANpkg{RProtoBuf} package, and introduces the two main abstractions: +provided by the \pkg{RProtoBuf} package, and introduces the two main abstractions: \emph{Messages} and \emph{Descriptors}. Section~\ref{sec:rprotobuf-classes} details the implementation details of the main S4 classes and methods. Section~\ref{sec:types} describes the challenges of type coercion between \proglang{R} and other languages. Section~\ref{sec:evaluation} introduces a general \proglang{R} language schema for serializing arbitrary \proglang{R} objects and evaluates it against the serialization capabilities built directly into \proglang{R}. Sections~\ref{sec:mapreduce} -and \ref{sec:opencpu} provide real-world use cases of \CRANpkg{RProtoBuf} +and \ref{sec:opencpu} provide real-world use cases of \pkg{RProtoBuf} in MapReduce and web service environments, respectively, before Section~\ref{sec:summary} concludes. @@ -238,9 +238,10 @@ decade. \end{itemize} -\begin{figure}[bp] +%\begin{figure}[bp] +\begin{figure}[h!] \begin{center} -\includegraphics[width=\textwidth]{figures/protobuf-distributed-system-crop.pdf} +\includegraphics[width=0.9\textwidth]{figures/protobuf-distributed-system-crop.pdf} \end{center} \caption{Example usage of Protocol Buffers.} \label{fig:protobuf-distributed-usecase} @@ -254,8 +255,8 @@ request, and respond with a new Protocol Buffer over the network. The key difference to, say, a request to an \pkg{Rserve} \citep{Urbanek:2003:Rserve,CRAN:Rserve} instance is that -the remote server may be implemented in any language, with no -dependence on \proglang{R}. +the remote server may be implemented in any language. +%, with no dependence on \proglang{R}. While traditional IDLs have at times been criticized for code bloat and complexity, Protocol Buffers are based on a simple list and records @@ -480,7 +481,7 @@ \subsection{Parsing messages} -The \CRANpkg{RProtoBuf} package defines the \code{read} and +The \pkg{RProtoBuf} package defines the \code{read} and \code{readASCII} functions to read messages from files, raw vectors, or arbitrary connections. \code{read} expects to read the message payload from binary files or connections and \code{readASCII} parses @@ -533,13 +534,13 @@ \section{Under the hood: S4 classes, methods, and pseudo methods} \label{sec:rprotobuf-classes} -The \CRANpkg{RProtoBuf} package uses the S4 system to store +The \pkg{RProtoBuf} package uses the S4 system to store information about descriptors and messages. Using the S4 system allows the package to dispatch methods that are not generic in the S3 sense, such as \texttt{new} and \texttt{serialize}. Table~\ref{class-summary-table} lists the six -primary Message and Descriptor classes in \CRANpkg{RProtoBuf}. Each \proglang{R} object +primary Message and Descriptor classes in \pkg{RProtoBuf}. Each \proglang{R} object contains an external pointer to an object managed by the \texttt{protobuf} \proglang{C++} library, and the \proglang{R} objects make calls into more than 100 \proglang{C++} functions that provide the @@ -564,19 +565,19 @@ dispatch relationships.} \end{table} -The \CRANpkg{Rcpp} package +The \pkg{Rcpp} package \citep{eddelbuettel2011rcpp,eddelbuettel2013seamless} is used to facilitate this integration of the \proglang{R} and \proglang{C++} code for these objects. Each method is wrapped individually which allows us to add user-friendly custom error handling, type coercion, and performance improvements at the cost of a more verbose implementation. -The \CRANpkg{RProtoBuf} package in many ways motivated -the development of \CRANpkg{Rcpp} Modules \citep{eddelbuettel2013exposing}, +The \pkg{RProtoBuf} package in many ways motivated +the development of \pkg{Rcpp} Modules \citep{eddelbuettel2013exposing}, which provide a more concise way of wrapping \proglang{C++} functions and classes in a single entity. -The \CRANpkg{RProtoBuf} package supports two forms for calling +The \pkg{RProtoBuf} package supports two forms for calling functions with these S4 classes: \begin{itemize} \item The functional dispatch mechanism of the the form @@ -585,7 +586,7 @@ \verb|object$method(arguments)|. \end{itemize} -Additionally, \CRANpkg{RProtoBuf} supports tab completion for all +Additionally, \pkg{RProtoBuf} supports tab completion for all classes. Completion possibilities include pseudo-method names for all classes, plus \emph{dynamic dispatch} on names or types specific to a given object. This functionality is implemented with the @@ -595,7 +596,7 @@ \subsection{Messages} The \texttt{Message} S4 class represents Protocol Buffer Messages and -is the core abstraction of \CRANpkg{RProtoBuf}. Each \texttt{Message} +is the core abstraction of \pkg{RProtoBuf}. Each \texttt{Message} contains a pointer to a \texttt{Descriptor} which defines the schema of the data defined in the Message, as well as a number of \texttt{FieldDescriptors} for the individual fields of the message. A @@ -659,7 +660,7 @@ used to retrieve descriptors that are contained in the descriptor, or invoke pseudo-methods. -When \CRANpkg{RProtoBuf} is first loaded it calls +When \pkg{RProtoBuf} is first loaded it calls \texttt{readProtoFiles} to read in the example \texttt{addressbook.proto} file included with the package. The \texttt{tutorial.Person} descriptor and all other descriptors defined in the loaded \texttt{.proto} files are @@ -1028,9 +1029,9 @@ @ However, most modern languages do have support for 64-bit integers, -which becomes problematic when \CRANpkg{RProtoBuf} is used to exchange data +which becomes problematic when \pkg{RProtoBuf} is used to exchange data with a system that requires this integer type. To work around this, -\CRANpkg{RProtoBuf} allows users to get and set 64-bit integer values by specifying +\pkg{RProtoBuf} allows users to get and set 64-bit integer values by specifying them as character strings. If we try to set an int64 field in \proglang{R} to double values, we lose @@ -1042,7 +1043,7 @@ length(unique(test$repeated_int64)) @ -But when the values are specified as character strings, \CRANpkg{RProtoBuf} +But when the values are specified as character strings, \pkg{RProtoBuf} will automatically coerce them into a true 64-bit integer types before storing them in the Protocol Buffer message: @@ -1055,7 +1056,7 @@ will be returned if the \code{RProtoBuf.int64AsString} option is set to \texttt{TRUE}. The character values are useful because they can accurately be used as unique identifiers and can easily be passed to \proglang{R} -packages such as \CRANpkg{int64} \citep{int64} or \CRANpkg{bit64} +packages such as \pkg{int64} \citep{int64} or \pkg{bit64} \citep{bit64} which represent 64-bit integers in \proglang{R}. <<>>= @@ -1074,7 +1075,7 @@ \section[Converting R data structures into Protocol Buffers]{Converting \proglang{R} data structures into Protocol Buffers} \label{sec:evaluation} -The previous sections discussed functionality in the \CRANpkg{RProtoBuf} package +The previous sections discussed functionality in the \pkg{RProtoBuf} package for creating, manipulating, parsing, and serializing Protocol Buffer messages of a defined schema. This is useful when there are pre-existing systems with defined schemas or significant software @@ -1090,12 +1091,12 @@ identical(iris, unserialize_pb(msg)) @ -In order to accomplish this, \CRANpkg{RProtoBuf} uses the same catch-all \texttt{proto} +In order to accomplish this, \pkg{RProtoBuf} uses the same catch-all \texttt{proto} schema used by \pkg{RHIPE} for exchanging \proglang{R} data with Hadoop \citep{rhipe}. This schema, which we will refer to as \texttt{rexp.proto}, is printed in %appendix \ref{rexp.proto}. the appendix. -The Protocol Buffer messages generated by \CRANpkg{RProtoBuf} and +The Protocol Buffer messages generated by \pkg{RProtoBuf} and \pkg{RHIPE} are naturally compatible between the two systems because they use the same schema. This shows the power of using a schema-based cross-platform format such as Protocol Buffers: interoperability is achieved without effort or close coordination. @@ -1201,11 +1202,11 @@ %in multiple languages instead of requiring other programs to parse the \proglang{R} %serialization format. % \citep{serialization}. One takeaway from this table is that the universal \proglang{R} object schema -included in \CRANpkg{RProtoBuf} does not in general provide +included in \pkg{RProtoBuf} does not in general provide any significant saving in file size compared to the normal serialization mechanism in \proglang{R}. % redundant: which is seen as equally compact. -The benefits of \CRANpkg{RProtoBuf} accrue more naturally in applications where +The benefits of \pkg{RProtoBuf} accrue more naturally in applications where multiple programming languages are involved, or when a more concise application-specific schema has been defined. The example in the next section satisfies both of these conditions. @@ -1279,7 +1280,7 @@ \end{tabular} } \caption{Serialization sizes for default serialization in \proglang{R} and - \CRANpkg{RProtoBuf} for 50 \proglang{R} data sets.} + \pkg{RProtoBuf} for 50 \proglang{R} data sets.} \label{tab:compression} \end{center} \end{table} @@ -1317,7 +1318,7 @@ \begin{figure}[h!] \begin{center} -\includegraphics[width=\textwidth]{figures/histogram-mapreduce-diag1.pdf} +\includegraphics[width=0.9\textwidth]{figures/histogram-mapreduce-diag1.pdf} \end{center} \caption{Diagram of MapReduce histogram generation pattern.} \label{fig:mr-histogram-pattern1} @@ -1331,8 +1332,8 @@ share a schema of the histogram representation to coordinate effectively. -The \CRANpkg{HistogramTools} package \citep{histogramtools} enhances -\CRANpkg{RProtoBuf} by providing a concise schema for \proglang{R} histogram objects: +The \pkg{HistogramTools} package \citep{histogramtools} enhances +\pkg{RProtoBuf} by providing a concise schema for \proglang{R} histogram objects: \begin{example} package HistogramTools; @@ -1439,7 +1440,7 @@ function calls, and arguments/return values can be posted/retrieved using several data interchange formats, such as Protocol Buffers. OpenCPU uses the \texttt{serialize\_pb} and \texttt{unserialize\_pb} functions -from the \CRANpkg{RProtoBuf} package to convert between \proglang{R} objects and protobuf +from the \pkg{RProtoBuf} package to convert between \proglang{R} objects and protobuf messages. Therefore, clients need the \texttt{rexp.proto} descriptor mentioned earlier to parse and generate protobuf messages when interacting with OpenCPU. @@ -1535,7 +1536,7 @@ are contained within a list. <>= -library("httr") #requires httr >= 0.2.99 +library("httr") library("RProtoBuf") args <- list(n=42, mean=100) @@ -1576,13 +1577,13 @@ performance, and maturity, that seems particularly well suited for data-driven applications and numerical computing. -The \CRANpkg{RProtoBuf} package builds on the Protocol Buffers \proglang{C++} library, +The \pkg{RProtoBuf} package builds on the Protocol Buffers \proglang{C++} library, and extends the \proglang{R} system with the ability to create, read, write, parse, and manipulate Protocol -Buffer messages. \CRANpkg{RProtoBuf} has been used extensively inside Google +Buffer messages. \pkg{RProtoBuf} has been used extensively inside Google for the past three years by statisticians, analysts, and software engineers. At the time of this writing there are over 300 active -users of \CRANpkg{RProtoBuf} using it to read data from and otherwise interact +users of \pkg{RProtoBuf} using it to read data from and otherwise interact with distributed systems written in \proglang{C++}, \proglang{Java}, \proglang{Python}, and other languages. We hope that making Protocol Buffers available to the \proglang{R} community will contribute towards better software integration @@ -1593,11 +1594,11 @@ \section*{Acknowledgments} -The first versions of \CRANpkg{RProtoBuf} were written during 2009-2010. +The first versions of \pkg{RProtoBuf} were written during 2009 - 2010. Very significant contributions, both in code and design, were made by Romain Fran\c{c}ois whose continued influence on design and code is greatly appreciated. Several features of the package reflect -the design of the \CRANpkg{rJava} package by Simon Urbanek. +the design of the \pkg{rJava} package by Simon Urbanek. The user-defined table mechanism, implemented by Duncan Temple Lang for the purpose of the \pkg{RObjectTables} package, allows for the dynamic symbol lookup. Kenton Varda was generous with his time in reviewing code and explaining @@ -1618,7 +1619,7 @@ \label{rexp.proto} Below a print of the \texttt{rexp.proto} schema (originally designed by \cite{rhipe}) -that is included with the \CRANpkg{RProtoBuf} package and used by \texttt{serialize\_pb} and +that is included with the \pkg{RProtoBuf} package and used by \texttt{serialize\_pb} and \texttt{unserialize\_pb}. \begin{verbatim} Modified: papers/jss/article.bib =================================================================== --- papers/jss/article.bib 2014-03-17 02:28:07 UTC (rev 876) +++ papers/jss/article.bib 2014-03-23 22:44:03 UTC (rev 877) @@ -145,8 +145,7 @@ @misc{serialization, author = {Luke Tierney}, title = {A New Serialization Mechanism for R}, - url = - {http://www.cs.uiowa.edu/~luke/R/serialize/serialize.ps}, + url = {http://www.cs.uiowa.edu/~luke/R/serialize/serialize.ps}, year = 2003, } @@ -431,8 +430,8 @@ @Manual{httr, title = {httr: Tools for Working with URLs and HTTP}, author = {Hadley Wickham}, - year = 2012, - note = {R package version 0.2}, + year = 2014, + note = {R package version 0.3}, url = {http://CRAN.R-project.org/package=httr}, } @@ -487,3 +486,20 @@ url = {http://www.ci.tuwien.ac.at/Conferences/DSC-2003/Proceedings/}, note = {{ISSN 1609-395X}} } + + at Misc{Apache:Avro, + author = {{Apache Software Foundation}}, + title = {Apache Avro}, + url = {http://avro.apache.org}, + note = {Data Serialization System, Version 1.7.6}, + year = 2014 +} + + at Misc{Apache:Thrift, + author = {{Apache Software Foundation}}, + title = {Apache Thrift}, + url = {http://thrift.apache.org}, + note = {Software Framework for Scalable Cross-Language Services, Version 0.9.1}, + year = 2013 +} + From noreply at r-forge.r-project.org Sun Mar 23 23:48:08 2014 From: noreply at r-forge.r-project.org (noreply at r-forge.r-project.org) Date: Sun, 23 Mar 2014 23:48:08 +0100 (CET) Subject: [Rprotobuf-commits] r878 - papers/jss Message-ID: <20140323224808.170C81865B2@r-forge.r-project.org> Author: edd Date: 2014-03-23 23:48:07 +0100 (Sun, 23 Mar 2014) New Revision: 878 Added: papers/jss/article.R Modified: papers/jss/Makefile papers/jss/article.Rnw Log: also 'R CMD Stangle' to create single R file with code -- is that what they ask for? Modified: papers/jss/Makefile =================================================================== --- papers/jss/Makefile 2014-03-23 22:44:03 UTC (rev 877) +++ papers/jss/Makefile 2014-03-23 22:48:07 UTC (rev 878) @@ -10,6 +10,7 @@ bibtex article pdflatex article.tex pdflatex article.tex + R CMD Stangle article.Rnw jssarchive: (cd .. && zip -r jssarchive.zip jss/) Added: papers/jss/article.R =================================================================== --- papers/jss/article.R (rev 0) +++ papers/jss/article.R 2014-03-23 22:48:07 UTC (rev 878) @@ -0,0 +1,340 @@ +### R code from vignette source 'article.Rnw' + +################################################### +### code chunk number 1: article.Rnw:125-131 +################################################### +## cf http://www.jstatsoft.org/style#q12 +options(prompt = "R> ", + continue = "+ ", + width = 70, + useFancyQuotes = FALSE, + digits = 4) + + +################################################### +### code chunk number 2: article.Rnw:313-321 +################################################### +library("RProtoBuf") +p <- new(tutorial.Person, id=1, + name="Dirk") +p$name +p$name <- "Murray" +cat(as.character(p)) +serialize(p, NULL) +class(p) + + +################################################### +### code chunk number 3: article.Rnw:376-377 +################################################### +ls("RProtoBuf:DescriptorPool") + + +################################################### +### code chunk number 4: article.Rnw:391-393 +################################################### +p1 <- new(tutorial.Person) +p <- new(tutorial.Person, name = "Murray", id = 1) + + +################################################### +### code chunk number 5: article.Rnw:402-405 +################################################### +p$name +p$id +p$email <- "murray at stokely.org" + + +################################################### +### code chunk number 6: article.Rnw:413-416 +################################################### +p[["name"]] <- "Murray Stokely" +p[[ 2 ]] <- 3 +p[["email"]] + + +################################################### +### code chunk number 7: article.Rnw:429-430 +################################################### +p + + +################################################### +### code chunk number 8: article.Rnw:437-438 +################################################### +writeLines(as.character(p)) + + +################################################### +### code chunk number 9: article.Rnw:451-452 +################################################### +serialize(p, NULL) + + +################################################### +### code chunk number 10: article.Rnw:457-460 +################################################### +tf1 <- tempfile() +serialize(p, tf1) +readBin(tf1, raw(0), 500) + + +################################################### +### code chunk number 11: article.Rnw:465-470 +################################################### +tf2 <- tempfile() +con <- file(tf2, open = "wb") +serialize(p, con) +close(con) +readBin(tf2, raw(0), 500) + + +################################################### +### code chunk number 12: article.Rnw:476-480 +################################################### +p$serialize(tf1) +con <- file(tf2, open = "wb") +p$serialize(con) +close(con) + + +################################################### +### code chunk number 13: article.Rnw:500-502 +################################################### +msg <- read(tutorial.Person, tf1) +writeLines(as.character(msg)) + + +################################################### +### code chunk number 14: article.Rnw:508-512 +################################################### +con <- file(tf2, open = "rb") +message <- read(tutorial.Person, con) +close(con) +writeLines(as.character(message)) + + +################################################### +### code chunk number 15: article.Rnw:517-519 +################################################### +payload <- readBin(tf1, raw(0), 5000) +message <- read(tutorial.Person, payload) + + +################################################### +### code chunk number 16: article.Rnw:526-531 +################################################### +message <- tutorial.Person$read(tf1) +con <- file(tf2, open = "rb") +message <- tutorial.Person$read(con) +close(con) +message <- tutorial.Person$read(payload) + + +################################################### +### code chunk number 17: article.Rnw:610-611 +################################################### +new(tutorial.Person) + + +################################################### +### code chunk number 18: article.Rnw:675-682 +################################################### +tutorial.Person$email + +tutorial.Person$PhoneType + +tutorial.Person$PhoneNumber + +tutorial.Person.PhoneNumber + + +################################################### +### code chunk number 19: article.Rnw:798-800 +################################################### +tutorial.Person$PhoneType +tutorial.Person$PhoneType$WORK + + +################################################### +### code chunk number 20: article.Rnw:849-852 +################################################### +tutorial.Person$PhoneType$value(1) +tutorial.Person$PhoneType$value(name="HOME") +tutorial.Person$PhoneType$value(number=1) + + +################################################### +### code chunk number 21: article.Rnw:921-924 +################################################### +f <- tutorial.Person$fileDescriptor() +f +f$Person + + +################################################### +### code chunk number 22: article.Rnw:987-994 +################################################### +if (!exists("protobuf_unittest.TestAllTypes", + "RProtoBuf:DescriptorPool")) { + unittest.proto.file <- system.file("unitTests", "data", + "unittest.proto", + package="RProtoBuf") + readProtoFiles(file=unittest.proto.file) +} + + +################################################### +### code chunk number 23: article.Rnw:1016-1020 +################################################### +as.integer(2^31-1) +as.integer(2^31 - 1) + as.integer(1) +2^31 +class(2^31) + + +################################################### +### code chunk number 24: article.Rnw:1031-1032 +################################################### +2^53 == (2^53 + 1) + + +################################################### +### code chunk number 25: article.Rnw:1044-1047 +################################################### +test <- new(protobuf_unittest.TestAllTypes) +test$repeated_int64 <- c(2^53, 2^53+1) +length(unique(test$repeated_int64)) + + +################################################### +### code chunk number 26: article.Rnw:1054-1055 +################################################### +test$repeated_int64 <- c("9007199254740992", "9007199254740993") + + +################################################### +### code chunk number 27: article.Rnw:1066-1072 +################################################### +options("RProtoBuf.int64AsString" = FALSE) +test$repeated_int64 +length(unique(test$repeated_int64)) +options("RProtoBuf.int64AsString" = TRUE) +test$repeated_int64 +length(unique(test$repeated_int64)) + + +################################################### +### code chunk number 28: article.Rnw:1075-1076 +################################################### +options("RProtoBuf.int64AsString" = FALSE) + + +################################################### +### code chunk number 29: article.Rnw:1093-1095 +################################################### +msg <- serialize_pb(iris, NULL) +identical(iris, unserialize_pb(msg)) + + +################################################### +### code chunk number 30: article.Rnw:1126-1129 +################################################### +datasets <- as.data.frame(data(package="datasets")$results) +datasets$name <- sub("\\s+.*$", "", datasets$Item) +n <- nrow(datasets) + + +################################################### +### code chunk number 31: article.Rnw:1139-1140 +################################################### +m <- sum(sapply(datasets$name, function(x) can_serialize_pb(get(x)))) + + +################################################### +### code chunk number 32: article.Rnw:1153-1160 +################################################### +attr(CO2, "formula") +msg <- serialize_pb(CO2, NULL) +object <- unserialize_pb(msg) +identical(CO2, object) +identical(class(CO2), class(object)) +identical(dim(CO2), dim(object)) +attr(object, "formula") + + +################################################### +### code chunk number 33: article.Rnw:1176-1195 +################################################### +datasets$object.size <- unname(sapply(datasets$name, function(x) object.size(eval(as.name(x))))) + +datasets$R.serialize.size <- unname(sapply(datasets$name, function(x) length(serialize(eval(as.name(x)), NULL)))) + +datasets$R.serialize.size <- unname(sapply(datasets$name, function(x) length(serialize(eval(as.name(x)), NULL)))) + +datasets$R.serialize.size.gz <- unname(sapply(datasets$name, function(x) length(memCompress(serialize(eval(as.name(x)), NULL), "gzip")))) + +datasets$RProtoBuf.serialize.size <- unname(sapply(datasets$name, function(x) length(serialize_pb(eval(as.name(x)), NULL)))) + +datasets$RProtoBuf.serialize.size.gz <- unname(sapply(datasets$name, function(x) length(memCompress(serialize_pb(eval(as.name(x)), NULL), "gzip")))) + +clean.df <- data.frame(dataset=datasets$name, + object.size=datasets$object.size, + "serialized"=datasets$R.serialize.size, + "gzipped serialized"=datasets$R.serialize.size.gz, + "RProtoBuf"=datasets$RProtoBuf.serialize.size, + "gzipped RProtoBuf"=datasets$RProtoBuf.serialize.size.gz, + check.names=FALSE) + + +################################################### +### code chunk number 34: article.Rnw:1403-1408 +################################################### +require(RProtoBuf) +require(HistogramTools) +readProtoFiles(package="HistogramTools") +hist <- HistogramTools.HistogramState$read("hist.pb") +plot(as.histogram(hist)) + + +################################################### +### code chunk number 35: article.Rnw:1476-1483 (eval = FALSE) +################################################### +## library("RProtoBuf") +## library("httr") +## +## req <- GET('https://public.opencpu.org/ocpu/library/MASS/data/Animals/pb') +## output <- unserialize_pb(req$content) +## +## identical(output, MASS::Animals) + + +################################################### +### code chunk number 36: article.Rnw:1542-1558 (eval = FALSE) +################################################### +## library("httr") +## library("RProtoBuf") +## +## args <- list(n=42, mean=100) +## payload <- serialize_pb(args, NULL) +## +## req <- POST ( +## url = "https://public.opencpu.org/ocpu/library/stats/R/rnorm/pb", +## body = payload, +## add_headers ( +## "Content-Type" = "application/x-protobuf" +## ) +## ) +## +## output <- unserialize_pb(req$content) +## print(output) + + +################################################### +### code chunk number 37: article.Rnw:1562-1565 (eval = FALSE) +################################################### +## fnargs <- unserialize_pb(inputmsg) +## val <- do.call(stats::rnorm, fnargs) +## outputmsg <- serialize_pb(val) + + Modified: papers/jss/article.Rnw =================================================================== --- papers/jss/article.Rnw 2014-03-23 22:44:03 UTC (rev 877) +++ papers/jss/article.Rnw 2014-03-23 22:48:07 UTC (rev 878) @@ -124,7 +124,11 @@ %% guarantees better line breaks <>= ## cf http://www.jstatsoft.org/style#q12 -options(prompt = "R> ", continue = "+ ", width = 70, useFancyQuotes = FALSE, digits=4) +options(prompt = "R> ", + continue = "+ ", + width = 70, + useFancyQuotes = FALSE, + digits = 4) @ \maketitle @@ -981,13 +985,13 @@ distinct values. <>= - if (!exists("protobuf_unittest.TestAllTypes", - "RProtoBuf:DescriptorPool")) { - unittest.proto.file <- system.file("unitTests", "data", - "unittest.proto", - package="RProtoBuf") - readProtoFiles(file=unittest.proto.file) - } +if (!exists("protobuf_unittest.TestAllTypes", + "RProtoBuf:DescriptorPool")) { + unittest.proto.file <- system.file("unitTests", "data", + "unittest.proto", + package="RProtoBuf") + readProtoFiles(file=unittest.proto.file) +} @ % We want a cleaner error message here. From noreply at r-forge.r-project.org Mon Mar 24 00:01:30 2014 From: noreply at r-forge.r-project.org (noreply at r-forge.r-project.org) Date: Mon, 24 Mar 2014 00:01:30 +0100 (CET) Subject: [Rprotobuf-commits] r879 - papers/jss Message-ID: <20140323230130.C13B21865B2@r-forge.r-project.org> Author: edd Date: 2014-03-24 00:01:28 +0100 (Mon, 24 Mar 2014) New Revision: 879 Modified: papers/jss/article.Rnw Log: s/texttt/code/ as the later is a JSS-recommended macro Modified: papers/jss/article.Rnw =================================================================== --- papers/jss/article.Rnw 2014-03-23 22:48:07 UTC (rev 878) +++ papers/jss/article.Rnw 2014-03-23 23:01:28 UTC (rev 879) @@ -37,7 +37,7 @@ a sophisticated mix of applications written in general purpose and specialized programming languages. Many formats commonly used to import and export data between - different programs or systems, such as \texttt{CSV} or \texttt{JSON}, are + different programs or systems, such as \code{CSV} or \code{JSON}, are verbose, inefficient, not type-safe, or tied to a specific programming language. Protocol Buffers are a popular method of serializing structured data between applications---while remaining @@ -161,33 +161,33 @@ environment. Data analysts and researchers often use character-separated text formats such -as \texttt{CSV} \citep{shafranovich2005common} to export and import -data. However, anyone who has ever used \texttt{CSV} files will have noticed +as \code{CSV} \citep{shafranovich2005common} to export and import +data. However, anyone who has ever used \code{CSV} files will have noticed that this method has many limitations: it is restricted to tabular data, lacks type-safety, and has limited precision for numeric values. Moreover, ambiguities in the format itself frequently cause problems. For example, conventions on which characters is used as separator or decimal point vary by -country. \emph{Extensible Markup Language} (\texttt{XML}) is another +country. \emph{Extensible Markup Language} (\code{XML}) is another well-established and widely-supported format with the ability to define just about any arbitrarily complex schema \citep{nolan2013xml}. However, it pays for this complexity with comparatively large and verbose messages, and added complexity at the parsing side (which are somewhat mitigated by the -availability of mature libraries and parsers). Because \texttt{XML} is +availability of mature libraries and parsers). Because \code{XML} is text-based and has no native notion of numeric types or arrays, it usually not a very practical format to store numeric data sets as they appear in statistical applications. A more modern format is \emph{JavaScript ObjectNotation} -(\texttt{JSON}), which is derived from the object literals of +(\code{JSON}), which is derived from the object literals of \proglang{JavaScript}, and already widely-used on the world wide web. Several \proglang{R} packages implement functions to parse and generate -\texttt{JSON} data from \proglang{R} objects \citep{rjson,RJSONIO,jsonlite}. -\texttt{JSON} natively supports arrays and four primitive types: numbers, strings, +\code{JSON} data from \proglang{R} objects \citep{rjson,RJSONIO,jsonlite}. +\code{JSON} natively supports arrays and four primitive types: numbers, strings, booleans, and null. However, as it too is a text-based format, numbers are stored as human-readable decimal notation which is inefficient and leads to loss of type (double versus integer) and precision. -A number of binary formats based on \texttt{JSON} have been proposed +A number of binary formats based on \code{JSON} have been proposed that reduce the parsing cost and improve efficiency, but these formats are not widely supported. Furthermore, such formats lack a separate schema for the serialized data and thus still duplicate field names @@ -265,20 +265,20 @@ While traditional IDLs have at times been criticized for code bloat and complexity, Protocol Buffers are based on a simple list and records model that is flexible and easy to use. The schema for structured -Protocol Buffer data is defined in \texttt{.proto} files, which may +Protocol Buffer data is defined in \code{.proto} files, which may contain one or more message types. Each message type has one or more fields. A field is specified with a unique number (called a \emph{tag number}), a name, a value type, and a field rule specifying whether the field is optional, required, or repeated. The supported value types are numbers, enumerations, booleans, strings, raw bytes, or other nested message -types. The \texttt{.proto} file syntax for defining the structure of Protocol +types. The \code{.proto} file syntax for defining the structure of Protocol Buffer data is described comprehensively on Google Code\footnote{See \url{http://code.google.com/apis/protocolbuffers/docs/proto.html}.}. -Table~\ref{tab:proto} shows an example \texttt{.proto} file that -defines the \texttt{tutorial.Person} type\footnote{The compound name - \texttt{tutorial.Person} in R is derived from the name of the +Table~\ref{tab:proto} shows an example \code{.proto} file that +defines the \code{tutorial.Person} type\footnote{The compound name + \code{tutorial.Person} in R is derived from the name of the message (\emph{Person}) and the name of the package defined at the top of the - \texttt{.proto} file in which it is defined (\emph{tutorial}).}. The \proglang{R} code in the right + \code{.proto} file in which it is defined (\emph{tutorial}).}. The \proglang{R} code in the right column shows an example of creating a new message of this type and populating its fields. @@ -286,7 +286,7 @@ \begin{table} \begin{tabular}{p{0.45\textwidth}p{0.5\textwidth}} \toprule -Schema : \texttt{addressbook.proto} & Example \proglang{R} session\\ +Schema : \code{addressbook.proto} & Example \proglang{R} session\\ \cmidrule{1-2} \begin{minipage}{.40\textwidth} \vspace{2mm} @@ -323,8 +323,8 @@ \end{minipage} \\ \bottomrule \end{tabular} -\caption{The schema representation from a \texttt{.proto} file for the - \texttt{tutorial.Person} class (left) and simple \proglang{R} code for creating +\caption{The schema representation from a \code{.proto} file for the + \code{tutorial.Person} class (left) and simple \proglang{R} code for creating an object of this class and accessing its fields (right).} \label{tab:proto} \end{table} @@ -333,7 +333,7 @@ For added speed and efficiency, the \proglang{C++}, \proglang{Java}, and \proglang{Python} bindings to Protocol Buffers are used with a compiler that translates a Protocol -Buffer schema description file (ending in \texttt{.proto}) into +Buffer schema description file (ending in \code{.proto}) into language-specific classes that can be used to create, read, write, and manipulate Protocol Buffer messages. The \proglang{R} interface, in contrast, uses a reflection-based API that makes some operations slightly @@ -353,16 +353,16 @@ The two fundamental building blocks of Protocol Buffers are \emph{Messages} and \emph{Descriptors}. Messages provide a common abstract encapsulation of structured data fields of the type specified in a Message Descriptor. -Message Descriptors are defined in \texttt{.proto} files and define a +Message Descriptors are defined in \code{.proto} files and define a schema for a particular named class of messages. -\subsection[Importing message descriptors from .proto files]{Importing message descriptors from \texttt{.proto} files} +\subsection[Importing message descriptors from .proto files]{Importing message descriptors from \code{.proto} files} To create or parse a Protocol Buffer Message, one must first read in -the message type specification from a \texttt{.proto} file. The -\texttt{.proto} files are imported using the \code{readProtoFiles} +the message type specification from a \code{.proto} file. The +\code{.proto} files are imported using the \code{readProtoFiles} function, which can either import a single file, all files in a directory, -or every \texttt{.proto} file provided by a particular \proglang{R} package. +or every \code{.proto} file provided by a particular \proglang{R} package. After importing proto files, the corresponding message descriptors are available by name from the \code{RProtoBuf:DescriptorPool} environment in @@ -379,13 +379,13 @@ \subsection{Creating a message} -New messages are created with the \texttt{new} function which accepts +New messages are created with the \code{new} function which accepts a Message Descriptor and optionally a list of ``name = value'' pairs to set in the message. %The objects contained in the special environment are %descriptors for their associated message types. Descriptors will be %discussed in detail in another part of this document, but for the -%purpose of this section, descriptors are just used with the \texttt{new} +%purpose of this section, descriptors are just used with the \code{new} %function to create messages. <<>>= @@ -423,7 +423,7 @@ \subsection{Display messages} -Protocol Buffer messages and descriptors implement \texttt{show} +Protocol Buffer messages and descriptors implement \code{show} methods that provide basic information about the message: <<>>= @@ -431,7 +431,7 @@ @ For additional information, such as for debugging purposes, -the \texttt{as.character} method provides a more complete ASCII +the \code{as.character} method provides a more complete ASCII representation of the contents of a message. <<>>= @@ -442,7 +442,7 @@ One of the primary benefits of Protocol Buffers is the efficient binary wire-format representation. -The \texttt{serialize} method is implemented for +The \code{serialize} method is implemented for Protocol Buffer messages to serialize a message into a sequence of bytes (raw vector) that represents the message. The raw bytes can then be parsed back into the original message safely @@ -470,7 +470,7 @@ readBin(tf2, raw(0), 500) @ -\texttt{serialize} can also be called in a more traditional +\code{serialize} can also be called in a more traditional object oriented fashion using the dollar operator. <<>>= @@ -502,7 +502,7 @@ writeLines(as.character(msg)) @ -The \texttt{input} argument of \texttt{read} can also be a binary +The \code{input} argument of \code{read} can also be a binary readable \proglang{R} connection, such as a binary file connection: <<>>= @@ -520,7 +520,7 @@ @ -\texttt{read} can also be used as a pseudo-method of the descriptor +\code{read} can also be used as a pseudo-method of the descriptor object: <<>>= @@ -541,12 +541,12 @@ The \pkg{RProtoBuf} package uses the S4 system to store information about descriptors and messages. Using the S4 system allows the package to dispatch methods that are not -generic in the S3 sense, such as \texttt{new} and -\texttt{serialize}. +generic in the S3 sense, such as \code{new} and +\code{serialize}. Table~\ref{class-summary-table} lists the six primary Message and Descriptor classes in \pkg{RProtoBuf}. Each \proglang{R} object contains an external pointer to an object managed by the -\texttt{protobuf} \proglang{C++} library, and the \proglang{R} objects make calls into more +\code{protobuf} \proglang{C++} library, and the \proglang{R} objects make calls into more than 100 \proglang{C++} functions that provide the glue code between the \proglang{R} language classes and the underlying \proglang{C++} classes. @@ -594,17 +594,17 @@ classes. Completion possibilities include pseudo-method names for all classes, plus \emph{dynamic dispatch} on names or types specific to a given object. This functionality is implemented with the -\texttt{.DollarNames} S3 generic function defined in the \pkg{utils} +\code{.DollarNames} S3 generic function defined in the \pkg{utils} package that is included with \proglang{R} \citep{r}. \subsection{Messages} -The \texttt{Message} S4 class represents Protocol Buffer Messages and -is the core abstraction of \pkg{RProtoBuf}. Each \texttt{Message} -contains a pointer to a \texttt{Descriptor} which defines the schema +The \code{Message} S4 class represents Protocol Buffer Messages and +is the core abstraction of \pkg{RProtoBuf}. Each \code{Message} +contains a pointer to a \code{Descriptor} which defines the schema of the data defined in the Message, as well as a number of -\texttt{FieldDescriptors} for the individual fields of the message. A -complete list of the slots and methods for \texttt{Messages} +\code{FieldDescriptors} for the individual fields of the message. A +complete list of the slots and methods for \code{Messages} is available in Table~\ref{Message-methods-table}. <<>>= @@ -618,40 +618,40 @@ \toprule Slot & Description \\ \cmidrule(r){2-2} -\texttt{pointer} & External pointer to the \texttt{Message} object of the \proglang{C++} protobuf library. Documentation for the -\texttt{Message} class is available from the Protocol Buffer project page. \\ +\code{pointer} & External pointer to the \code{Message} object of the \proglang{C++} protobuf library. Documentation for the +\code{Message} class is available from the Protocol Buffer project page. \\ %(\url{http://code.google.com/apis/protocolbuffers/docs/reference/cpp/google.protobuf.message.html#Message}) \\ -\texttt{type} & Fully qualified name of the message. For example a \texttt{Person} message -has its \texttt{type} slot set to \texttt{tutorial.Person} \\[.3cm] +\code{type} & Fully qualified name of the message. For example a \code{Person} message +has its \code{type} slot set to \code{tutorial.Person} \\[.3cm] Method & Description \\ \cmidrule(r){2-2} -\texttt{has} & Indicates if a message has a given field. \\ -\texttt{clone} & Creates a clone of the message \\ -\texttt{isInitialized} & Indicates if a message has all its required fields set\\ -\texttt{serialize} & serialize a message to a file, binary connection, or raw vector\\ -\texttt{clear} & Clear one or several fields of a message, or the entire message\\ -\texttt{size} & The number of elements in a message field\\ -\texttt{bytesize} & The number of bytes the message would take once serialized\\[3mm] +\code{has} & Indicates if a message has a given field. \\ +\code{clone} & Creates a clone of the message \\ +\code{isInitialized} & Indicates if a message has all its required fields set\\ +\code{serialize} & serialize a message to a file, binary connection, or raw vector\\ +\code{clear} & Clear one or several fields of a message, or the entire message\\ +\code{size} & The number of elements in a message field\\ +\code{bytesize} & The number of bytes the message would take once serialized\\[3mm] % -\texttt{swap} & swap elements of a repeated field of a message\\ -\texttt{set} & set elements of a repeated field\\ -\texttt{fetch} & fetch elements of a repeated field\\ -\texttt{setExtension} & set an extension of a message\\ -\texttt{getExtension} & get the value of an extension of a message\\ -\texttt{add} & add elements to a repeated field \\[3mm] +\code{swap} & swap elements of a repeated field of a message\\ +\code{set} & set elements of a repeated field\\ +\code{fetch} & fetch elements of a repeated field\\ +\code{setExtension} & set an extension of a message\\ +\code{getExtension} & get the value of an extension of a message\\ +\code{add} & add elements to a repeated field \\[3mm] % -\texttt{str} & the \proglang{R} structure of the message\\ -\texttt{as.character} & character representation of a message\\ -\texttt{toString} & character representation of a message (same as \texttt{as.character}) \\ -\texttt{as.list} & converts message to a named \proglang{R} list\\ -\texttt{update} & updates several fields of a message at once\\ -\texttt{descriptor} & get the descriptor of the message type of this message\\ -\texttt{fileDescriptor} & get the file descriptor of this message's descriptor\\ +\code{str} & the \proglang{R} structure of the message\\ +\code{as.character} & character representation of a message\\ +\code{toString} & character representation of a message (same as \code{as.character}) \\ +\code{as.list} & converts message to a named \proglang{R} list\\ +\code{update} & updates several fields of a message at once\\ +\code{descriptor} & get the descriptor of the message type of this message\\ +\code{fileDescriptor} & get the file descriptor of this message's descriptor\\ \hline \end{tabular} \end{small} -\caption{\label{Message-methods-table}Description of slots and methods for the \texttt{Message} S4 class.} +\caption{\label{Message-methods-table}Description of slots and methods for the \code{Message} S4 class.} \end{table} \subsection{Descriptors} @@ -659,18 +659,18 @@ Descriptors describe the type of a Message. This includes what fields a message contains and what the types of those fields are. Message descriptors are represented in \proglang{R} by the \emph{Descriptor} S4 -class. The class contains the slots \texttt{pointer} and -\texttt{type}. Similarly to messages, the \verb|$| operator can be +class. The class contains the slots \code{pointer} and +\code{type}. Similarly to messages, the \verb|$| operator can be used to retrieve descriptors that are contained in the descriptor, or invoke pseudo-methods. When \pkg{RProtoBuf} is first loaded it calls -\texttt{readProtoFiles} to read in the example \texttt{addressbook.proto} file -included with the package. The \texttt{tutorial.Person} descriptor -and all other descriptors defined in the loaded \texttt{.proto} files are +\code{readProtoFiles} to read in the example \code{addressbook.proto} file +included with the package. The \code{tutorial.Person} descriptor +and all other descriptors defined in the loaded \code{.proto} files are then available on the search path\footnote{This explains why the example in Table~\ref{tab:proto} lacked an explicit call to -\texttt{readProtoFiles}.}. +\code{readProtoFiles}.}. <<>>= tutorial.Person$email @@ -692,40 +692,40 @@ \toprule Slot & Description \\ \cmidrule(r){2-2} -\texttt{pointer} & External pointer to the \texttt{Descriptor} object of the \proglang{C++} proto library. Documentation for the -\texttt{Descriptor} class is available from the Protocol Buffer project page.\\ +\code{pointer} & External pointer to the \code{Descriptor} object of the \proglang{C++} proto library. Documentation for the +\code{Descriptor} class is available from the Protocol Buffer project page.\\ %\url{http://code.google.com/apis/protocolbuffers/docs/reference/cpp/google.protobuf.descriptor.html#Descriptor} \\ -\texttt{type} & Fully qualified path of the message type. \\[.3cm] +\code{type} & Fully qualified path of the message type. \\[.3cm] % Method & Description \\ \cmidrule(r){2-2} -\texttt{new} & Creates a prototype of a message described by this descriptor.\\ -\texttt{read} & Reads a message from a file or binary connection.\\ -\texttt{readASCII} & Read a message in ASCII format from a file or +\code{new} & Creates a prototype of a message described by this descriptor.\\ +\code{read} & Reads a message from a file or binary connection.\\ +\code{readASCII} & Read a message in ASCII format from a file or text connection.\\ -\texttt{name} & Retrieve the name of the message type associated with +\code{name} & Retrieve the name of the message type associated with this descriptor.\\ -\texttt{as.character} & character representation of a descriptor\\ -\texttt{toString} & character representation of a descriptor (same as \texttt{as.character}) \\ -\texttt{as.list} & return a named +\code{as.character} & character representation of a descriptor\\ +\code{toString} & character representation of a descriptor (same as \code{as.character}) \\ +\code{as.list} & return a named list of the field, enum, and nested descriptors included in this descriptor.\\ -\texttt{asMessage} & return DescriptorProto message. \\ -\texttt{fileDescriptor} & Retrieve the file descriptor of this +\code{asMessage} & return DescriptorProto message. \\ +\code{fileDescriptor} & Retrieve the file descriptor of this descriptor.\\ -\texttt{containing\_type} & Retrieve the descriptor describing the message type containing this descriptor.\\ -\texttt{field\_count} & Return the number of fields in this descriptor.\\ -\texttt{field} & Return the descriptor for the specified field in this descriptor.\\ -\texttt{nested\_type\_count} & The number of nested types in this descriptor.\\ -\texttt{nested\_type} & Return the descriptor for the specified nested +\code{containing\_type} & Retrieve the descriptor describing the message type containing this descriptor.\\ +\code{field\_count} & Return the number of fields in this descriptor.\\ +\code{field} & Return the descriptor for the specified field in this descriptor.\\ +\code{nested\_type\_count} & The number of nested types in this descriptor.\\ +\code{nested\_type} & Return the descriptor for the specified nested type in this descriptor.\\ -\texttt{enum\_type\_count} & The number of enum types in this descriptor.\\ -\texttt{enum\_type} & Return the descriptor for the specified enum +\code{enum\_type\_count} & The number of enum types in this descriptor.\\ +\code{enum\_type} & Return the descriptor for the specified enum type in this descriptor.\\ \bottomrule \end{tabular} \end{small} -\caption{\label{Descriptor-methods-table}Description of slots and methods for the \texttt{Descriptor} S4 class.} +\caption{\label{Descriptor-methods-table}Description of slots and methods for the \code{Descriptor} S4 class.} \end{table} \subsection{Field descriptors} @@ -733,9 +733,9 @@ The class \emph{FieldDescriptor} represents field descriptors in \proglang{R}. This is a wrapper S4 class around the -\texttt{google::protobuf::FieldDescriptor} \proglang{C++} class. +\code{google::protobuf::FieldDescriptor} \proglang{C++} class. Table~\ref{fielddescriptor-methods-table} describes the methods -defined for the \texttt{FieldDescriptor} class. +defined for the \code{FieldDescriptor} class. \begin{table}[tbp] \centering @@ -744,37 +744,37 @@ \toprule Slot & Description \\ \cmidrule(r){2-2} -\texttt{pointer} & External pointer to the \texttt{FieldDescriptor} \proglang{C++} variable \\ -\texttt{name} & Simple name of the field \\ -\texttt{full\_name} & Fully qualified name of the field \\ -\texttt{type} & Name of the message type where the field is declared \\[.3cm] +\code{pointer} & External pointer to the \code{FieldDescriptor} \proglang{C++} variable \\ +\code{name} & Simple name of the field \\ +\code{full\_name} & Fully qualified name of the field \\ +\code{type} & Name of the message type where the field is declared \\[.3cm] % Method & Description \\ \cmidrule(r){2-2} -\texttt{as.character} & Character representation of a descriptor\\ -\texttt{toString} & Character representation of a descriptor (same as \texttt{as.character}) \\ -\texttt{asMessage} & Return FieldDescriptorProto message. \\ -\texttt{name} & Return the name of the field descriptor.\\ -\texttt{fileDescriptor} & Return the fileDescriptor where this field is defined.\\ -\texttt{containing\_type} & Return the containing descriptor of this field.\\ -\texttt{is\_extension} & Return TRUE if this field is an extension.\\ -\texttt{number} & Gets the declared tag number of the field.\\ -\texttt{type} & Gets the type of the field.\\ -\texttt{cpp\_type} & Gets the \proglang{C++} type of the field.\\ -\texttt{label} & Gets the label of a field (optional, required, or repeated).\\ -\texttt{is\_repeated} & Return TRUE if this field is repeated.\\ -\texttt{is\_required} & Return TRUE if this field is required.\\ -\texttt{is\_optional} & Return TRUE if this field is optional.\\ -\texttt{has\_default\_value} & Return TRUE if this field has a default value.\\ -\texttt{default\_value} & Return the default value.\\ -\texttt{message\_type} & Return the message type if this is a message type field.\\ -\texttt{enum\_type} & Return the enum type if this is an enum type field.\\ +\code{as.character} & Character representation of a descriptor\\ +\code{toString} & Character representation of a descriptor (same as \code{as.character}) \\ +\code{asMessage} & Return FieldDescriptorProto message. \\ +\code{name} & Return the name of the field descriptor.\\ +\code{fileDescriptor} & Return the fileDescriptor where this field is defined.\\ +\code{containing\_type} & Return the containing descriptor of this field.\\ +\code{is\_extension} & Return TRUE if this field is an extension.\\ +\code{number} & Gets the declared tag number of the field.\\ +\code{type} & Gets the type of the field.\\ +\code{cpp\_type} & Gets the \proglang{C++} type of the field.\\ +\code{label} & Gets the label of a field (optional, required, or repeated).\\ +\code{is\_repeated} & Return TRUE if this field is repeated.\\ +\code{is\_required} & Return TRUE if this field is required.\\ +\code{is\_optional} & Return TRUE if this field is optional.\\ +\code{has\_default\_value} & Return TRUE if this field has a default value.\\ +\code{default\_value} & Return the default value.\\ +\code{message\_type} & Return the message type if this is a message type field.\\ +\code{enum\_type} & Return the enum type if this is an enum type field.\\ \bottomrule \end{tabular} \end{small} \caption{\label{fielddescriptor-methods-table}Description of slots and - methods for the \texttt{FieldDescriptor} S4 class.} + methods for the \code{FieldDescriptor} S4 class.} \end{table} @@ -783,16 +783,16 @@ The class \emph{EnumDescriptor} represents enum descriptors in \proglang{R}. This is a wrapper S4 class around the -\texttt{google::protobuf::EnumDescriptor} \proglang{C++} class. +\code{google::protobuf::EnumDescriptor} \proglang{C++} class. Table~\ref{enumdescriptor-methods-table} describes the methods -defined for the \texttt{EnumDescriptor} class. +defined for the \code{EnumDescriptor} class. The \verb|$| operator can be used to retrieve the value of enum constants contained in the EnumDescriptor, or to invoke pseudo-methods. -The \texttt{EnumDescriptor} contains information about what values this type -defines, while the \texttt{EnumValueDescriptor} describes an +The \code{EnumDescriptor} contains information about what values this type +defines, while the \code{EnumValueDescriptor} describes an individual enum constant of a particular type. <<>>= @@ -807,32 +807,32 @@ \toprule Slot & Description \\ \cmidrule(r){2-2} -\texttt{pointer} & External pointer to the \texttt{EnumDescriptor} \proglang{C++} variable \\ -\texttt{name} & Simple name of the enum \\ -\texttt{full\_name} & Fully qualified name of the enum \\ -\texttt{type} & Name of the message type where the enum is declared \\[.3cm] +\code{pointer} & External pointer to the \code{EnumDescriptor} \proglang{C++} variable \\ +\code{name} & Simple name of the enum \\ +\code{full\_name} & Fully qualified name of the enum \\ +\code{type} & Name of the message type where the enum is declared \\[.3cm] % Method & Description \\ \cmidrule(r){2-2} -\texttt{as.list} & return a named +\code{as.list} & return a named integer vector with the values of the enum and their names.\\ -\texttt{as.character} & character representation of a descriptor\\ -\texttt{toString} & character -representation of a descriptor (same as \texttt{as.character}) \\ -\texttt{asMessage} & return EnumDescriptorProto message. \\ -\texttt{name} & Return the name of the enum descriptor.\\ -\texttt{fileDescriptor} & Return the fileDescriptor where this field is defined.\\ -\texttt{containing\_type} & Return the containing descriptor of this field.\\ -\texttt{length} & Return the number of constants in this enum.\\ -\texttt{has} & Return TRUE if this enum contains the specified named constant string.\\ -\texttt{value\_count} & Return the number of constants in this enum (same as \texttt{length}).\\ -\texttt{value} & Return the EnumValueDescriptor of an enum value of specified index, name, or number.\\ +\code{as.character} & character representation of a descriptor\\ +\code{toString} & character +representation of a descriptor (same as \code{as.character}) \\ +\code{asMessage} & return EnumDescriptorProto message. \\ +\code{name} & Return the name of the enum descriptor.\\ +\code{fileDescriptor} & Return the fileDescriptor where this field is defined.\\ +\code{containing\_type} & Return the containing descriptor of this field.\\ +\code{length} & Return the number of constants in this enum.\\ +\code{has} & Return TRUE if this enum contains the specified named constant string.\\ +\code{value\_count} & Return the number of constants in this enum (same as \code{length}).\\ +\code{value} & Return the EnumValueDescriptor of an enum value of specified index, name, or number.\\ \bottomrule \end{tabular} \end{small} \caption{\label{enumdescriptor-methods-table}Description of slots and methods - for the \texttt{EnumDescriptor} S4 class.} + for the \code{EnumDescriptor} S4 class.} \end{table} \subsection{Enum value descriptors} @@ -840,9 +840,9 @@ The class \emph{EnumValueDescriptor} represents enumeration value descriptors in \proglang{R}. This is a wrapper S4 class around the -\texttt{google::protobuf::EnumValueDescriptor} \proglang{C++} class. +\code{google::protobuf::EnumValueDescriptor} \proglang{C++} class. Table~\ref{EnumValueDescriptor-methods-table} describes the methods -defined for the \texttt{EnumValueDescriptor} class. +defined for the \code{EnumValueDescriptor} class. The \verb|$| operator can be used to invoke pseudo-methods. @@ -859,24 +859,24 @@ \toprule Slot & Description \\ \cmidrule(r){2-2} -\texttt{pointer} & External pointer to the \texttt{EnumValueDescriptor} \proglang{C++} variable \\ -\texttt{name} & simple name of the enum value \\ -\texttt{full\_name} & fully qualified name of the enum value \\[.3cm] +\code{pointer} & External pointer to the \code{EnumValueDescriptor} \proglang{C++} variable \\ +\code{name} & simple name of the enum value \\ +\code{full\_name} & fully qualified name of the enum value \\[.3cm] % Method & Description \\ \cmidrule(r){2-2} -\texttt{number} & return the number of this EnumValueDescriptor. \\ -\texttt{name} & Return the name of the enum value descriptor.\\ -\texttt{enum\_type} & return the EnumDescriptor type of this EnumValueDescriptor. \\ -\texttt{as.character} & character representation of a descriptor. \\ -\texttt{toString} & character representation of a descriptor (same as \texttt{as.character}). \\ -\texttt{asMessage} & return EnumValueDescriptorProto message. \\ +\code{number} & return the number of this EnumValueDescriptor. \\ +\code{name} & Return the name of the enum value descriptor.\\ +\code{enum\_type} & return the EnumDescriptor type of this EnumValueDescriptor. \\ +\code{as.character} & character representation of a descriptor. \\ +\code{toString} & character representation of a descriptor (same as \code{as.character}). \\ +\code{asMessage} & return EnumValueDescriptorProto message. \\ \bottomrule \end{tabular} \end{small} \caption{\label{EnumValueDescriptor-methods-table}Description of slots - and methods for the \texttt{EnumValueDescriptor} S4 class.} + and methods for the \code{EnumValueDescriptor} S4 class.} \end{table} \subsection{File descriptors} @@ -889,31 +889,31 @@ \toprule Slot & Description \\ \cmidrule(r){2-2} -\texttt{pointer} & external pointer to the \texttt{FileDescriptor} object of the \proglang{C++} proto library. Documentation for the -\texttt{FileDescriptor} class is available from the Protocol Buffer project page: +\code{pointer} & external pointer to the \code{FileDescriptor} object of the \proglang{C++} proto library. Documentation for the +\code{FileDescriptor} class is available from the Protocol Buffer project page: \url{http://developers.google.com/protocol-buffers/docs/reference/cpp/google.protobuf.descriptor.html#FileDescriptor} \\ -\texttt{filename} & fully qualified pathname of the \texttt{.proto} file.\\ -\texttt{package} & package name defined in this \texttt{.proto} file.\\[.3cm] +\code{filename} & fully qualified pathname of the \code{.proto} file.\\ +\code{package} & package name defined in this \code{.proto} file.\\[.3cm] Method & Description \\ \cmidrule(r){2-2} -\texttt{name} & Return the filename for this FileDescriptorProto.\\ -\texttt{package} & Return the file-level package name specified in this FileDescriptorProto.\\ -\texttt{as.character} & character representation of a descriptor. \\ -\texttt{toString} & character representation of a descriptor (same as \texttt{as.character}). \\ -\texttt{asMessage} & return FileDescriptorProto message. \\ -\texttt{as.list} & return named list of descriptors defined in this file descriptor.\\ +\code{name} & Return the filename for this FileDescriptorProto.\\ +\code{package} & Return the file-level package name specified in this FileDescriptorProto.\\ +\code{as.character} & character representation of a descriptor. \\ +\code{toString} & character representation of a descriptor (same as \code{as.character}). \\ +\code{asMessage} & return FileDescriptorProto message. \\ +\code{as.list} & return named list of descriptors defined in this file descriptor.\\ \bottomrule \end{tabular} \end{small} -\caption{\label{filedescriptor-methods-table}Description of slots and methods for the \texttt{FileDescriptor} S4 class.} +\caption{\label{filedescriptor-methods-table}Description of slots and methods for the \code{FileDescriptor} S4 class.} \end{table} The class \emph{FileDescriptor} represents file descriptors in \proglang{R}. This is a wrapper S4 class around the -\texttt{google::protobuf::FileDescriptor} \proglang{C++} class. [TRUNCATED] To get the complete diff run: svnlook diff /svnroot/rprotobuf -r 879 From noreply at r-forge.r-project.org Mon Mar 24 21:25:09 2014 From: noreply at r-forge.r-project.org (noreply at r-forge.r-project.org) Date: Mon, 24 Mar 2014 21:25:09 +0100 (CET) Subject: [Rprotobuf-commits] r880 - papers/jss Message-ID: <20140324202509.9B17D187151@r-forge.r-project.org> Author: murray Date: 2014-03-24 21:25:09 +0100 (Mon, 24 Mar 2014) New Revision: 880 Modified: papers/jss/article.Rnw Log: Replace sweave code with manual CodeInput/CodeOutput for now so this is buildable again with protobuf-2.5 library which doesn't like our unittest.proto. Modified: papers/jss/article.Rnw =================================================================== --- papers/jss/article.Rnw 2014-03-23 23:01:28 UTC (rev 879) +++ papers/jss/article.Rnw 2014-03-24 20:25:09 UTC (rev 880) @@ -984,7 +984,7 @@ choose another type (such as enum or integer) capable of storing three distinct values. -<>= +<>= if (!exists("protobuf_unittest.TestAllTypes", "RProtoBuf:DescriptorPool")) { unittest.proto.file <- system.file("unitTests", "data", @@ -1041,19 +1041,27 @@ If we try to set an int64 field in \proglang{R} to double values, we lose precision: -<<>>= -test <- new(protobuf_unittest.TestAllTypes) -test$repeated_int64 <- c(2^53, 2^53+1) -length(unique(test$repeated_int64)) -@ +% We want a cleaner error message here. +\begin{CodeChunk} +\begin{CodeInput} +R> test <- new(protobuf_unittest.TestAllTypes) +R> test$repeated_int64 <- c(2^53, 2^53+1) +R> length(unique(test$repeated_int64)) +\end{CodeInput} +\begin{CodeOutput} +[1] 1 +\end{CodeOutput} +\end{CodeChunk} But when the values are specified as character strings, \pkg{RProtoBuf} will automatically coerce them into a true 64-bit integer types before storing them in the Protocol Buffer message: -<<>>= -test$repeated_int64 <- c("9007199254740992", "9007199254740993") -@ +\begin{CodeChunk} +\begin{CodeInput} +R> test$repeated_int64 <- c("9007199254740992", "9007199254740993") +\end{CodeInput} +\end{CodeChunk} When reading the value back into \proglang{R}, numeric types are returned by default, but when the full precision is required a character value @@ -1063,14 +1071,34 @@ packages such as \pkg{int64} \citep{int64} or \pkg{bit64} \citep{bit64} which represent 64-bit integers in \proglang{R}. -<<>>= -options("RProtoBuf.int64AsString" = FALSE) -test$repeated_int64 -length(unique(test$repeated_int64)) -options("RProtoBuf.int64AsString" = TRUE) -test$repeated_int64 -length(unique(test$repeated_int64)) -@ +\begin{CodeChunk} +\begin{CodeInput} +R> options("RProtoBuf.int64AsString" = FALSE) +R> test$repeated_int64 +\end{CodeInput} +\begin{CodeOutput} +[1] 9.007e+15 9.007e+15 +\end{CodeOutput} +\begin{CodeInput} +R> length(unique(test$repeated_int64)) +\end{CodeInput} +\begin{CodeOutput} +[1] 1 +\end{CodeOutput} +\begin{CodeInput} +R> options("RProtoBuf.int64AsString" = TRUE) +R> test$repeated_int64 +\end{CodeInput} +\begin{CodeOutput} +[1] "9007199254740992" "9007199254740993" +\end{CodeOutput} +\begin{CodeInput} +R> length(unique(test$repeated_int64)) +\end{CodeInput} +\begin{CodeOutput} +[1] 2 +\end{CodeOutput} +\end{CodeChunk} <>= options("RProtoBuf.int64AsString" = FALSE) From noreply at r-forge.r-project.org Mon Mar 24 21:33:36 2014 From: noreply at r-forge.r-project.org (noreply at r-forge.r-project.org) Date: Mon, 24 Mar 2014 21:33:36 +0100 (CET) Subject: [Rprotobuf-commits] r881 - papers/jss Message-ID: <20140324203336.1EB5B187155@r-forge.r-project.org> Author: murray Date: 2014-03-24 21:33:35 +0100 (Mon, 24 Mar 2014) New Revision: 881 Added: papers/jss/int64.proto Modified: papers/jss/article.R papers/jss/article.Rnw Log: Better fix. Add a new proto with only the examples we need for this paper rather than pulling in all fo the complexity of TestAllTypes or faking it with CodeInput/CodeOutput. Modified: papers/jss/article.R =================================================================== --- papers/jss/article.R 2014-03-24 20:25:09 UTC (rev 880) +++ papers/jss/article.R 2014-03-24 20:33:35 UTC (rev 881) @@ -173,19 +173,15 @@ ################################################### -### code chunk number 22: article.Rnw:987-994 +### code chunk number 22: article.Rnw:987-990 ################################################### -if (!exists("protobuf_unittest.TestAllTypes", - "RProtoBuf:DescriptorPool")) { - unittest.proto.file <- system.file("unitTests", "data", - "unittest.proto", - package="RProtoBuf") - readProtoFiles(file=unittest.proto.file) +if (!exists("JSSPaper.Example1", "RProtoBuf:DescriptorPool")) { + readProtoFiles(file="int64.proto") } ################################################### -### code chunk number 23: article.Rnw:1016-1020 +### code chunk number 23: article.Rnw:1012-1016 ################################################### as.integer(2^31-1) as.integer(2^31 - 1) + as.integer(1) @@ -194,27 +190,27 @@ ################################################### -### code chunk number 24: article.Rnw:1031-1032 +### code chunk number 24: article.Rnw:1027-1028 ################################################### 2^53 == (2^53 + 1) ################################################### -### code chunk number 25: article.Rnw:1044-1047 +### code chunk number 25: article.Rnw:1040-1043 ################################################### -test <- new(protobuf_unittest.TestAllTypes) +test <- new(JSSPaper.Example1) test$repeated_int64 <- c(2^53, 2^53+1) length(unique(test$repeated_int64)) ################################################### -### code chunk number 26: article.Rnw:1054-1055 +### code chunk number 26: article.Rnw:1050-1051 ################################################### test$repeated_int64 <- c("9007199254740992", "9007199254740993") ################################################### -### code chunk number 27: article.Rnw:1066-1072 +### code chunk number 27: article.Rnw:1062-1068 ################################################### options("RProtoBuf.int64AsString" = FALSE) test$repeated_int64 @@ -225,20 +221,20 @@ ################################################### -### code chunk number 28: article.Rnw:1075-1076 +### code chunk number 28: article.Rnw:1071-1072 ################################################### options("RProtoBuf.int64AsString" = FALSE) ################################################### -### code chunk number 29: article.Rnw:1093-1095 +### code chunk number 29: article.Rnw:1089-1091 ################################################### msg <- serialize_pb(iris, NULL) identical(iris, unserialize_pb(msg)) ################################################### -### code chunk number 30: article.Rnw:1126-1129 +### code chunk number 30: article.Rnw:1122-1125 ################################################### datasets <- as.data.frame(data(package="datasets")$results) datasets$name <- sub("\\s+.*$", "", datasets$Item) @@ -246,13 +242,13 @@ ################################################### -### code chunk number 31: article.Rnw:1139-1140 +### code chunk number 31: article.Rnw:1135-1136 ################################################### m <- sum(sapply(datasets$name, function(x) can_serialize_pb(get(x)))) ################################################### -### code chunk number 32: article.Rnw:1153-1160 +### code chunk number 32: article.Rnw:1149-1156 ################################################### attr(CO2, "formula") msg <- serialize_pb(CO2, NULL) @@ -264,7 +260,7 @@ ################################################### -### code chunk number 33: article.Rnw:1176-1195 +### code chunk number 33: article.Rnw:1172-1191 ################################################### datasets$object.size <- unname(sapply(datasets$name, function(x) object.size(eval(as.name(x))))) @@ -288,7 +284,7 @@ ################################################### -### code chunk number 34: article.Rnw:1403-1408 +### code chunk number 34: article.Rnw:1399-1404 ################################################### require(RProtoBuf) require(HistogramTools) @@ -298,7 +294,7 @@ ################################################### -### code chunk number 35: article.Rnw:1476-1483 (eval = FALSE) +### code chunk number 35: article.Rnw:1472-1479 (eval = FALSE) ################################################### ## library("RProtoBuf") ## library("httr") @@ -310,7 +306,7 @@ ################################################### -### code chunk number 36: article.Rnw:1542-1558 (eval = FALSE) +### code chunk number 36: article.Rnw:1538-1554 (eval = FALSE) ################################################### ## library("httr") ## library("RProtoBuf") @@ -331,7 +327,7 @@ ################################################### -### code chunk number 37: article.Rnw:1562-1565 (eval = FALSE) +### code chunk number 37: article.Rnw:1558-1561 (eval = FALSE) ################################################### ## fnargs <- unserialize_pb(inputmsg) ## val <- do.call(stats::rnorm, fnargs) Modified: papers/jss/article.Rnw =================================================================== --- papers/jss/article.Rnw 2014-03-24 20:25:09 UTC (rev 880) +++ papers/jss/article.Rnw 2014-03-24 20:33:35 UTC (rev 881) @@ -984,20 +984,16 @@ choose another type (such as enum or integer) capable of storing three distinct values. -<>= -if (!exists("protobuf_unittest.TestAllTypes", - "RProtoBuf:DescriptorPool")) { - unittest.proto.file <- system.file("unitTests", "data", - "unittest.proto", - package="RProtoBuf") - readProtoFiles(file=unittest.proto.file) +<>= +if (!exists("JSSPaper.Example1", "RProtoBuf:DescriptorPool")) { + readProtoFiles(file="int64.proto") } @ % We want a cleaner error message here. \begin{CodeChunk} \begin{CodeInput} -R> a <- new(protobuf_unittest.TestAllTypes) +R> a <- new(JSSPaper.Example1) R> a$optional_bool <- TRUE R> a$optional_bool <- FALSE R> a$optional_bool <- NA @@ -1041,27 +1037,19 @@ If we try to set an int64 field in \proglang{R} to double values, we lose precision: -% We want a cleaner error message here. -\begin{CodeChunk} -\begin{CodeInput} -R> test <- new(protobuf_unittest.TestAllTypes) -R> test$repeated_int64 <- c(2^53, 2^53+1) -R> length(unique(test$repeated_int64)) -\end{CodeInput} -\begin{CodeOutput} -[1] 1 -\end{CodeOutput} -\end{CodeChunk} +<<>>= +test <- new(JSSPaper.Example1) +test$repeated_int64 <- c(2^53, 2^53+1) +length(unique(test$repeated_int64)) +@ But when the values are specified as character strings, \pkg{RProtoBuf} will automatically coerce them into a true 64-bit integer types before storing them in the Protocol Buffer message: -\begin{CodeChunk} -\begin{CodeInput} -R> test$repeated_int64 <- c("9007199254740992", "9007199254740993") -\end{CodeInput} -\end{CodeChunk} +<<>>= +test$repeated_int64 <- c("9007199254740992", "9007199254740993") +@ When reading the value back into \proglang{R}, numeric types are returned by default, but when the full precision is required a character value @@ -1071,34 +1059,14 @@ packages such as \pkg{int64} \citep{int64} or \pkg{bit64} \citep{bit64} which represent 64-bit integers in \proglang{R}. -\begin{CodeChunk} -\begin{CodeInput} -R> options("RProtoBuf.int64AsString" = FALSE) -R> test$repeated_int64 -\end{CodeInput} -\begin{CodeOutput} -[1] 9.007e+15 9.007e+15 -\end{CodeOutput} -\begin{CodeInput} -R> length(unique(test$repeated_int64)) -\end{CodeInput} -\begin{CodeOutput} -[1] 1 -\end{CodeOutput} -\begin{CodeInput} -R> options("RProtoBuf.int64AsString" = TRUE) -R> test$repeated_int64 -\end{CodeInput} -\begin{CodeOutput} -[1] "9007199254740992" "9007199254740993" -\end{CodeOutput} -\begin{CodeInput} -R> length(unique(test$repeated_int64)) -\end{CodeInput} -\begin{CodeOutput} -[1] 2 -\end{CodeOutput} -\end{CodeChunk} +<<>>= +options("RProtoBuf.int64AsString" = FALSE) +test$repeated_int64 +length(unique(test$repeated_int64)) +options("RProtoBuf.int64AsString" = TRUE) +test$repeated_int64 +length(unique(test$repeated_int64)) +@ <>= options("RProtoBuf.int64AsString" = FALSE) Added: papers/jss/int64.proto =================================================================== --- papers/jss/int64.proto (rev 0) +++ papers/jss/int64.proto 2014-03-24 20:33:35 UTC (rev 881) @@ -0,0 +1,6 @@ +package JSSPaper; + +message Example1 { + optional bool optional_bool = 1; + repeated int64 repeated_int64 = 2; +} \ No newline at end of file From noreply at r-forge.r-project.org Mon Mar 24 21:52:52 2014 From: noreply at r-forge.r-project.org (noreply at r-forge.r-project.org) Date: Mon, 24 Mar 2014 21:52:52 +0100 (CET) Subject: [Rprotobuf-commits] r882 - in pkg: . inst/unitTests/data Message-ID: <20140324205252.947F8186C1C@r-forge.r-project.org> Author: murray Date: 2014-03-24 21:52:51 +0100 (Mon, 24 Mar 2014) New Revision: 882 Modified: pkg/ChangeLog pkg/inst/unitTests/data/unittest.proto Log: Comment out a dup enum message type that causes problems with libprotobuf-2.5 Modified: pkg/ChangeLog =================================================================== --- pkg/ChangeLog 2014-03-24 20:33:35 UTC (rev 881) +++ pkg/ChangeLog 2014-03-24 20:52:51 UTC (rev 882) @@ -1,3 +1,10 @@ +2014-03-24 Murray Stokely + + * inst/unitTests/data/unittest.proto: Comment out + TestEnumWithDupValue. This causes problems with the newest + libprotobuf-2.5 which now wants an option to be specified to + enable enum aliasing like this. + 2014-03-10 Murray Stokely * src/DescriptorPoolLookup.cpp (rprotobuf): Import all top-level Modified: pkg/inst/unitTests/data/unittest.proto =================================================================== --- pkg/inst/unitTests/data/unittest.proto 2014-03-24 20:33:35 UTC (rev 881) +++ pkg/inst/unitTests/data/unittest.proto 2014-03-24 20:52:51 UTC (rev 882) @@ -392,15 +392,16 @@ optional NestedMessage optional_nested_message = 1; } - +// mstokely commented this out, it breaks with libprotobuf-2.5. +// // Test an enum that has multiple values with the same number. -enum TestEnumWithDupValue { - FOO1 = 1; - BAR1 = 2; - BAZ = 3; - FOO2 = 1; - BAR2 = 2; -} +// enum TestEnumWithDupValue { +// FOO1 = 1; +// BAR1 = 2; +// BAZ = 3; +// FOO2 = 1; +// BAR2 = 2; +//} // Test an enum with large, unordered values. enum TestSparseEnum { From noreply at r-forge.r-project.org Mon Mar 24 22:04:05 2014 From: noreply at r-forge.r-project.org (noreply at r-forge.r-project.org) Date: Mon, 24 Mar 2014 22:04:05 +0100 (CET) Subject: [Rprotobuf-commits] r883 - pkg/inst Message-ID: <20140324210405.3077B186D61@r-forge.r-project.org> Author: murray Date: 2014-03-24 22:04:04 +0100 (Mon, 24 Mar 2014) New Revision: 883 Modified: pkg/inst/NEWS.Rd Log: Document duplicate enum removal from unittest.proto. Modified: pkg/inst/NEWS.Rd =================================================================== --- pkg/inst/NEWS.Rd 2014-03-24 20:52:51 UTC (rev 882) +++ pkg/inst/NEWS.Rd 2014-03-24 21:04:04 UTC (rev 883) @@ -11,9 +11,11 @@ \item Fix a bug in the \code{show} method for \code{EnumDescriptor} types. \item Import all top-level enums from imported \code{.proto} files. + \item Removed duplicate enum value type from the unit tests that + caused problems with the most recent libprotobuf-2.5. (without option + allow_alias). } } - \section{Changes in RProtoBuf version 0.4.0 (2014-01-14)}{ \itemize{ \item Changes to support CRAN builds for MS Windows. From noreply at r-forge.r-project.org Tue Mar 25 00:33:30 2014 From: noreply at r-forge.r-project.org (noreply at r-forge.r-project.org) Date: Tue, 25 Mar 2014 00:33:30 +0100 (CET) Subject: [Rprotobuf-commits] r884 - pkg Message-ID: <20140324233331.09A3618716A@r-forge.r-project.org> Author: murray Date: 2014-03-25 00:33:30 +0100 (Tue, 25 Mar 2014) New Revision: 884 Modified: pkg/ChangeLog pkg/configure pkg/configure.in Log: If we can't find the protobuf headers with default CXXFLAGS and CPPFLAGS, add -I/usr/local/include to them and try again so that this is more likely to work out of the box. Modified: pkg/ChangeLog =================================================================== --- pkg/ChangeLog 2014-03-24 21:04:04 UTC (rev 883) +++ pkg/ChangeLog 2014-03-24 23:33:30 UTC (rev 884) @@ -4,7 +4,10 @@ TestEnumWithDupValue. This causes problems with the newest libprotobuf-2.5 which now wants an option to be specified to enable enum aliasing like this. - + * configure.in: if we can't find the libproto headers with the + default CPPFLAGS/CXXFLAGS, manually add /usr/local/include and try + again. + 2014-03-10 Murray Stokely * src/DescriptorPoolLookup.cpp (rprotobuf): Import all top-level Modified: pkg/configure =================================================================== --- pkg/configure 2014-03-24 21:04:04 UTC (rev 883) +++ pkg/configure 2014-03-24 23:33:30 UTC (rev 884) @@ -1,6 +1,6 @@ #! /bin/sh # Guess values for system-dependent variables and create Makefiles. -# Generated by GNU Autoconf 2.69 for RProtoBuf 0.3. +# Generated by GNU Autoconf 2.69 for RProtoBuf 0.4. # # # Copyright (C) 1992-1996, 1998-2012 Free Software Foundation, Inc. @@ -577,8 +577,8 @@ # Identity of this package. PACKAGE_NAME='RProtoBuf' PACKAGE_TARNAME='rprotobuf' -PACKAGE_VERSION='0.3' -PACKAGE_STRING='RProtoBuf 0.3' +PACKAGE_VERSION='0.4' +PACKAGE_STRING='RProtoBuf 0.4' PACKAGE_BUGREPORT='' PACKAGE_URL='' @@ -1234,7 +1234,7 @@ # Omit some internal or obsolete options to make the list less imposing. # This message is too long to be a string in the A/UX 3.1 sh. cat <<_ACEOF -\`configure' configures RProtoBuf 0.3 to adapt to many kinds of systems. +\`configure' configures RProtoBuf 0.4 to adapt to many kinds of systems. Usage: $0 [OPTION]... [VAR=VALUE]... @@ -1295,7 +1295,7 @@ if test -n "$ac_init_help"; then case $ac_init_help in - short | recursive ) echo "Configuration of RProtoBuf 0.3:";; + short | recursive ) echo "Configuration of RProtoBuf 0.4:";; esac cat <<\_ACEOF @@ -1377,7 +1377,7 @@ test -n "$ac_init_help" && exit $ac_status if $ac_init_version; then cat <<\_ACEOF -RProtoBuf configure 0.3 +RProtoBuf configure 0.4 generated by GNU Autoconf 2.69 Copyright (C) 2012 Free Software Foundation, Inc. @@ -1667,7 +1667,7 @@ This file contains any messages produced by compilers while running configure, to aid debugging if configure makes a mistake. -It was created by RProtoBuf $as_me 0.3, which was +It was created by RProtoBuf $as_me 0.4, which was generated by GNU Autoconf 2.69. Invocation command line was $ $0 $@ @@ -3806,6 +3806,8 @@ ## check for header and ability to link ## first for headers Debian has in libprotobuf-dev +protobuf_common_header=google/protobuf/stubs/common.h +protobuf_common_header_cache_var=`$as_echo "ac_cv_header_$protobuf_common_header" | $as_tr_sh` { $as_echo "$as_me:${as_lineno-$LINENO}: checking for grep that handles long lines and -e" >&5 $as_echo_n "checking for grep that handles long lines and -e... " >&6; } if ${ac_cv_path_GREP+:} false; then : @@ -4065,14 +4067,37 @@ done -ac_fn_cxx_check_header_mongrel "$LINENO" "google/protobuf/stubs/common.h" "ac_cv_header_google_protobuf_stubs_common_h" "$ac_includes_default" -if test "x$ac_cv_header_google_protobuf_stubs_common_h" = xyes; then : +as_ac_Header=`$as_echo "ac_cv_header_$protobuf_common_header" | $as_tr_sh` +ac_fn_cxx_check_header_mongrel "$LINENO" "$protobuf_common_header" "$as_ac_Header" "$ac_includes_default" +if eval test \"x\$"$as_ac_Header"\" = x"yes"; then : else + +# AC_MSG_ERROR([ERROR: ProtoBuf headers required; use '-Iincludedir' in CXXFLAGS for unusual locations.]) + # If it didn't work, try adding /usr/local directly then trying again + { $as_echo "$as_me:${as_lineno-$LINENO}: WARNING: Protobuf headers not found with default CXXFLAGS and CPPFLAGS, manually trying /usr/local/include" >&5 +$as_echo "$as_me: WARNING: Protobuf headers not found with default CXXFLAGS and CPPFLAGS, manually trying /usr/local/include" >&2;} + CPPFLAGS="${protobuf_cxxflags} ${CPPFLAGS} -I/usr/local/include" + CXXFLAGS="${protobuf_cxxflags} ${CXXFLAGS} -I/usr/local/include -L/usr/local/lib" + # unset the cache variable for this particular header + # check, so we can check again with different defaults + # specified. + { $as_echo "$as_me:${as_lineno-$LINENO}: WARNING: Unsetting $protobuf_common_header_cache_var" >&5 +$as_echo "$as_me: WARNING: Unsetting $protobuf_common_header_cache_var" >&2;} + { eval $protobuf_common_header_cache_var=; unset $protobuf_common_header_cache_var;} + as_ac_Header=`$as_echo "ac_cv_header_$protobuf_common_header" | $as_tr_sh` +ac_fn_cxx_check_header_mongrel "$LINENO" "$protobuf_common_header" "$as_ac_Header" "$ac_includes_default" +if eval test \"x\$"$as_ac_Header"\" = x"yes"; then : + +else as_fn_error $? "ERROR: ProtoBuf headers required; use '-Iincludedir' in CXXFLAGS for unusual locations." "$LINENO" 5 fi + +fi + + ## second for headers Debian has in libprotoc-dev ac_fn_cxx_check_header_mongrel "$LINENO" "google/protobuf/compiler/code_generator.h" "ac_cv_header_google_protobuf_compiler_code_generator_h" "$ac_includes_default" if test "x$ac_cv_header_google_protobuf_compiler_code_generator_h" = xyes; then : @@ -4786,7 +4811,7 @@ # report actual input values of CONFIG_FILES etc. instead of their # values after options handling. ac_log=" -This file was extended by RProtoBuf $as_me 0.3, which was +This file was extended by RProtoBuf $as_me 0.4, which was generated by GNU Autoconf 2.69. Invocation command line was CONFIG_FILES = $CONFIG_FILES @@ -4839,7 +4864,7 @@ cat >>$CONFIG_STATUS <<_ACEOF || ac_write_fail=1 ac_cs_config="`$as_echo "$ac_configure_args" | sed 's/^ //; s/[\\""\`\$]/\\\\&/g'`" ac_cs_version="\\ -RProtoBuf config.status 0.3 +RProtoBuf config.status 0.4 configured by $0, generated by GNU Autoconf 2.69, with options \\"\$ac_cs_config\\" Modified: pkg/configure.in =================================================================== --- pkg/configure.in 2014-03-24 21:04:04 UTC (rev 883) +++ pkg/configure.in 2014-03-24 23:33:30 UTC (rev 884) @@ -8,7 +8,7 @@ AC_PREREQ(2.61) # Process this file with autoconf to produce a configure script. -AC_INIT([RProtoBuf],[0.3]) +AC_INIT([RProtoBuf],[0.4]) m4_include([m4/m4-ax_cxx_compile_stdcxx_0x.m4]) # We are using C++ @@ -67,8 +67,22 @@ ## check for header and ability to link ## first for headers Debian has in libprotobuf-dev -AC_CHECK_HEADER(google/protobuf/stubs/common.h,, - [AC_MSG_ERROR([ERROR: ProtoBuf headers required; use '-Iincludedir' in CXXFLAGS for unusual locations.])]) +protobuf_common_header=google/protobuf/stubs/common.h +protobuf_common_header_cache_var=AS_TR_SH([ac_cv_header_$protobuf_common_header]) +AC_CHECK_HEADER([$protobuf_common_header],, + [ + # If it didn't work, try adding /usr/local directly then trying again + AC_MSG_WARN([Protobuf headers not found with default CXXFLAGS and CPPFLAGS, manually trying /usr/local/include]) + CPPFLAGS="${protobuf_cxxflags} ${CPPFLAGS} -I/usr/local/include" + CXXFLAGS="${protobuf_cxxflags} ${CXXFLAGS} -I/usr/local/include -L/usr/local/lib" + # unset the cache variable for this particular header + # check, so we can check again with different defaults + # specified. + AC_MSG_WARN([Unsetting $protobuf_common_header_cache_var]) + AS_UNSET([$protobuf_common_header_cache_var]) + AC_CHECK_HEADER([$protobuf_common_header],, + [AC_MSG_ERROR([ERROR: ProtoBuf headers required; use '-Iincludedir' in CXXFLAGS for unusual locations.])]) + ]) ## second for headers Debian has in libprotoc-dev AC_CHECK_HEADER(google/protobuf/compiler/code_generator.h,, [AC_MSG_ERROR([ERROR: ProtoBuf compiler headers required; use '-Iincludedir' in CXXFLAGS for unusual locations.])]) From noreply at r-forge.r-project.org Tue Mar 25 03:42:24 2014 From: noreply at r-forge.r-project.org (noreply at r-forge.r-project.org) Date: Tue, 25 Mar 2014 03:42:24 +0100 (CET) Subject: [Rprotobuf-commits] r885 - papers/jss Message-ID: <20140325024224.29A50186EF4@r-forge.r-project.org> Author: murray Date: 2014-03-25 03:42:19 +0100 (Tue, 25 Mar 2014) New Revision: 885 Modified: papers/jss/article.R papers/jss/article.Rnw Log: Ditch the non-portable int64 examples with a description of what the option does. Modified: papers/jss/article.R =================================================================== --- papers/jss/article.R 2014-03-24 23:33:30 UTC (rev 884) +++ papers/jss/article.R 2014-03-25 02:42:19 UTC (rev 885) @@ -196,45 +196,14 @@ ################################################### -### code chunk number 25: article.Rnw:1040-1043 +### code chunk number 25: article.Rnw:1078-1080 ################################################### -test <- new(JSSPaper.Example1) -test$repeated_int64 <- c(2^53, 2^53+1) -length(unique(test$repeated_int64)) - - -################################################### -### code chunk number 26: article.Rnw:1050-1051 -################################################### -test$repeated_int64 <- c("9007199254740992", "9007199254740993") - - -################################################### -### code chunk number 27: article.Rnw:1062-1068 -################################################### -options("RProtoBuf.int64AsString" = FALSE) -test$repeated_int64 -length(unique(test$repeated_int64)) -options("RProtoBuf.int64AsString" = TRUE) -test$repeated_int64 -length(unique(test$repeated_int64)) - - -################################################### -### code chunk number 28: article.Rnw:1071-1072 -################################################### -options("RProtoBuf.int64AsString" = FALSE) - - -################################################### -### code chunk number 29: article.Rnw:1089-1091 -################################################### msg <- serialize_pb(iris, NULL) identical(iris, unserialize_pb(msg)) ################################################### -### code chunk number 30: article.Rnw:1122-1125 +### code chunk number 26: article.Rnw:1111-1114 ################################################### datasets <- as.data.frame(data(package="datasets")$results) datasets$name <- sub("\\s+.*$", "", datasets$Item) @@ -242,13 +211,13 @@ ################################################### -### code chunk number 31: article.Rnw:1135-1136 +### code chunk number 27: article.Rnw:1124-1125 ################################################### m <- sum(sapply(datasets$name, function(x) can_serialize_pb(get(x)))) ################################################### -### code chunk number 32: article.Rnw:1149-1156 +### code chunk number 28: article.Rnw:1138-1145 ################################################### attr(CO2, "formula") msg <- serialize_pb(CO2, NULL) @@ -260,7 +229,7 @@ ################################################### -### code chunk number 33: article.Rnw:1172-1191 +### code chunk number 29: article.Rnw:1161-1180 ################################################### datasets$object.size <- unname(sapply(datasets$name, function(x) object.size(eval(as.name(x))))) @@ -284,7 +253,7 @@ ################################################### -### code chunk number 34: article.Rnw:1399-1404 +### code chunk number 30: article.Rnw:1388-1393 ################################################### require(RProtoBuf) require(HistogramTools) @@ -294,7 +263,7 @@ ################################################### -### code chunk number 35: article.Rnw:1472-1479 (eval = FALSE) +### code chunk number 31: article.Rnw:1461-1468 (eval = FALSE) ################################################### ## library("RProtoBuf") ## library("httr") @@ -306,7 +275,7 @@ ################################################### -### code chunk number 36: article.Rnw:1538-1554 (eval = FALSE) +### code chunk number 32: article.Rnw:1527-1543 (eval = FALSE) ################################################### ## library("httr") ## library("RProtoBuf") @@ -327,7 +296,7 @@ ################################################### -### code chunk number 37: article.Rnw:1558-1561 (eval = FALSE) +### code chunk number 33: article.Rnw:1547-1550 (eval = FALSE) ################################################### ## fnargs <- unserialize_pb(inputmsg) ## val <- do.call(stats::rnorm, fnargs) Modified: papers/jss/article.Rnw =================================================================== --- papers/jss/article.Rnw 2014-03-24 23:33:30 UTC (rev 884) +++ papers/jss/article.Rnw 2014-03-25 02:42:19 UTC (rev 885) @@ -1034,44 +1034,33 @@ \pkg{RProtoBuf} allows users to get and set 64-bit integer values by specifying them as character strings. -If we try to set an int64 field in \proglang{R} to double values, we lose -precision: - -<<>>= -test <- new(JSSPaper.Example1) -test$repeated_int64 <- c(2^53, 2^53+1) -length(unique(test$repeated_int64)) -@ - -But when the values are specified as character strings, \pkg{RProtoBuf} -will automatically coerce them into a true 64-bit integer types -before storing them in the Protocol Buffer message: - -<<>>= -test$repeated_int64 <- c("9007199254740992", "9007199254740993") -@ - -When reading the value back into \proglang{R}, numeric types are returned by -default, but when the full precision is required a character value -will be returned if the \code{RProtoBuf.int64AsString} option is set -to \code{TRUE}. The character values are useful because they can +On 64-bit platforms, character strings representing large decimal +numbers will be coerced to int64 during assignment to 64-bit protocol +buffer types to work around the lack of native 64-bit types in \proglang{R}. The +values are stored as distinct int64 values in memory but when accessed +from \proglang{R} language code they will be coerced into numeric values. If the +full 64-bit precision is required, the \code{RProtoBuf.int64AsString} +option can be set to \code{TRUE} to return int64 values from messages as character +strings. The character values are useful because they can accurately be used as unique identifiers and can easily be passed to \proglang{R} packages such as \pkg{int64} \citep{int64} or \pkg{bit64} \citep{bit64} which represent 64-bit integers in \proglang{R}. -<<>>= -options("RProtoBuf.int64AsString" = FALSE) -test$repeated_int64 -length(unique(test$repeated_int64)) -options("RProtoBuf.int64AsString" = TRUE) -test$repeated_int64 -length(unique(test$repeated_int64)) -@ +% test <- new(JSSPaper.Example1) +% test$repeated_int64 <- c(2^53, 2^53+1) +% length(unique(test$repeated_int64)) -<>= -options("RProtoBuf.int64AsString" = FALSE) -@ +% test$repeated_int64 <- c("9007199254740992", "9007199254740993") +% options("RProtoBuf.int64AsString" = FALSE) +% test$repeated_int64 +% length(unique(test$repeated_int64)) +% options("RProtoBuf.int64AsString" = TRUE) +% test$repeated_int64 +% length(unique(test$repeated_int64)) + +% options("RProtoBuf.int64AsString" = FALSE) + \section[Converting R data structures into Protocol Buffers]{Converting \proglang{R} data structures into Protocol Buffers} \label{sec:evaluation} From noreply at r-forge.r-project.org Tue Mar 25 04:01:03 2014 From: noreply at r-forge.r-project.org (noreply at r-forge.r-project.org) Date: Tue, 25 Mar 2014 04:01:03 +0100 (CET) Subject: [Rprotobuf-commits] r886 - papers/jss Message-ID: <20140325030103.4CA6818723E@r-forge.r-project.org> Author: edd Date: 2014-03-25 04:01:01 +0100 (Tue, 25 Mar 2014) New Revision: 886 Modified: papers/jss/article.R papers/jss/article.Rnw Log: minor pass over Murray's excellent edits Modified: papers/jss/article.R =================================================================== --- papers/jss/article.R 2014-03-25 02:42:19 UTC (rev 885) +++ papers/jss/article.R 2014-03-25 03:01:01 UTC (rev 886) @@ -190,20 +190,20 @@ ################################################### -### code chunk number 24: article.Rnw:1027-1028 +### code chunk number 24: article.Rnw:1028-1029 ################################################### 2^53 == (2^53 + 1) ################################################### -### code chunk number 25: article.Rnw:1078-1080 +### code chunk number 25: article.Rnw:1080-1082 ################################################### msg <- serialize_pb(iris, NULL) identical(iris, unserialize_pb(msg)) ################################################### -### code chunk number 26: article.Rnw:1111-1114 +### code chunk number 26: article.Rnw:1113-1116 ################################################### datasets <- as.data.frame(data(package="datasets")$results) datasets$name <- sub("\\s+.*$", "", datasets$Item) @@ -211,13 +211,13 @@ ################################################### -### code chunk number 27: article.Rnw:1124-1125 +### code chunk number 27: article.Rnw:1126-1127 ################################################### m <- sum(sapply(datasets$name, function(x) can_serialize_pb(get(x)))) ################################################### -### code chunk number 28: article.Rnw:1138-1145 +### code chunk number 28: article.Rnw:1140-1147 ################################################### attr(CO2, "formula") msg <- serialize_pb(CO2, NULL) @@ -229,7 +229,7 @@ ################################################### -### code chunk number 29: article.Rnw:1161-1180 +### code chunk number 29: article.Rnw:1163-1182 ################################################### datasets$object.size <- unname(sapply(datasets$name, function(x) object.size(eval(as.name(x))))) @@ -253,7 +253,7 @@ ################################################### -### code chunk number 30: article.Rnw:1388-1393 +### code chunk number 30: article.Rnw:1390-1395 ################################################### require(RProtoBuf) require(HistogramTools) @@ -263,7 +263,7 @@ ################################################### -### code chunk number 31: article.Rnw:1461-1468 (eval = FALSE) +### code chunk number 31: article.Rnw:1463-1470 (eval = FALSE) ################################################### ## library("RProtoBuf") ## library("httr") @@ -275,7 +275,7 @@ ################################################### -### code chunk number 32: article.Rnw:1527-1543 (eval = FALSE) +### code chunk number 32: article.Rnw:1529-1545 (eval = FALSE) ################################################### ## library("httr") ## library("RProtoBuf") @@ -296,7 +296,7 @@ ################################################### -### code chunk number 33: article.Rnw:1547-1550 (eval = FALSE) +### code chunk number 33: article.Rnw:1549-1552 (eval = FALSE) ################################################### ## fnargs <- unserialize_pb(inputmsg) ## val <- do.call(stats::rnorm, fnargs) Modified: papers/jss/article.Rnw =================================================================== --- papers/jss/article.Rnw 2014-03-25 02:42:19 UTC (rev 885) +++ papers/jss/article.Rnw 2014-03-25 03:01:01 UTC (rev 886) @@ -1020,29 +1020,31 @@ \label{sec:int64} \proglang{R} also does not support the native 64-bit integer type. Numeric vectors -with values $\geq 2^{31}$ can only be stored as doubles, which have -limited precision. Thereby \proglang{R} loses the ability to distinguish some -distinct integers: +with integer values greater or equal to $2^{31}$ can only be stored as +floating-point double precision variables. This conversion incurs a loss of +precision, and \proglang{R} loses the ability to distinguish between some +distinct integer variables: <<>>= 2^53 == (2^53 + 1) @ -However, most modern languages do have support for 64-bit integers, +Most modern languages do have support for 64-bit integer values, which becomes problematic when \pkg{RProtoBuf} is used to exchange data with a system that requires this integer type. To work around this, \pkg{RProtoBuf} allows users to get and set 64-bit integer values by specifying them as character strings. On 64-bit platforms, character strings representing large decimal -numbers will be coerced to int64 during assignment to 64-bit protocol -buffer types to work around the lack of native 64-bit types in \proglang{R}. The -values are stored as distinct int64 values in memory but when accessed -from \proglang{R} language code they will be coerced into numeric values. If the +numbers will be coerced to \code{int64} during assignment to 64-bit Protocol +Buffer types to work around the lack of native 64-bit types in \proglang{R} itself. The +values are stored as distinct \code{int64} values in memory. But when accessed +from \proglang{R} language code, they will be coerced into numeric +(floating-point) values. If the full 64-bit precision is required, the \code{RProtoBuf.int64AsString} -option can be set to \code{TRUE} to return int64 values from messages as character -strings. The character values are useful because they can -accurately be used as unique identifiers and can easily be passed to \proglang{R} +option can be set to \code{TRUE} to return \code{int64} values from messages as character +strings. Such character values are useful because they can +accurately be used as unique identifiers, and can easily be passed to \proglang{R} packages such as \pkg{int64} \citep{int64} or \pkg{bit64} \citep{bit64} which represent 64-bit integers in \proglang{R}. From noreply at r-forge.r-project.org Tue Mar 25 04:37:52 2014 From: noreply at r-forge.r-project.org (noreply at r-forge.r-project.org) Date: Tue, 25 Mar 2014 04:37:52 +0100 (CET) Subject: [Rprotobuf-commits] r887 - in pkg: . inst/unitTests Message-ID: <20140325033754.0D25518727E@r-forge.r-project.org> Author: murray Date: 2014-03-25 04:37:51 +0100 (Tue, 25 Mar 2014) New Revision: 887 Modified: pkg/ChangeLog pkg/inst/unitTests/runit.int64.R Log: Add check on Rcpp:::capabilities for LONG LONG support before trying to run the 64-bit int tests. Modified: pkg/ChangeLog =================================================================== --- pkg/ChangeLog 2014-03-25 03:01:01 UTC (rev 886) +++ pkg/ChangeLog 2014-03-25 03:37:51 UTC (rev 887) @@ -1,5 +1,8 @@ 2014-03-24 Murray Stokely + * inst/unitTests/runit.int64.R (test.int64): Check + Rcpp:::capabilities() to ensure we have long long support in Rcpp + before trying to run the 64-bit integer tests. * inst/unitTests/data/unittest.proto: Comment out TestEnumWithDupValue. This causes problems with the newest libprotobuf-2.5 which now wants an option to be specified to Modified: pkg/inst/unitTests/runit.int64.R =================================================================== --- pkg/inst/unitTests/runit.int64.R 2014-03-25 03:01:01 UTC (rev 886) +++ pkg/inst/unitTests/runit.int64.R 2014-03-25 03:37:51 UTC (rev 887) @@ -27,6 +27,10 @@ warning("Can't test 64-bit int type on platform with sizeof(long long) < 8") return } + if (Rcpp:::capabilities()["long long"] != TRUE) { + warning("Can't test 64-bit int type without RCPP_LONG_LONG support.") + return + } a <- new(protobuf_unittest.TestAllTypes) a$repeated_int64 <- 1 From noreply at r-forge.r-project.org Tue Mar 25 04:43:17 2014 From: noreply at r-forge.r-project.org (noreply at r-forge.r-project.org) Date: Tue, 25 Mar 2014 04:43:17 +0100 (CET) Subject: [Rprotobuf-commits] r888 - pkg/inst/unitTests Message-ID: <20140325034317.B6FDD186AB8@r-forge.r-project.org> Author: murray Date: 2014-03-25 04:43:17 +0100 (Tue, 25 Mar 2014) New Revision: 888 Modified: pkg/inst/unitTests/runit.int64.R Log: Correct bug about comparing boolean with NA, spotted by Dirk. Modified: pkg/inst/unitTests/runit.int64.R =================================================================== --- pkg/inst/unitTests/runit.int64.R 2014-03-25 03:37:51 UTC (rev 887) +++ pkg/inst/unitTests/runit.int64.R 2014-03-25 03:43:17 UTC (rev 888) @@ -27,7 +27,8 @@ warning("Can't test 64-bit int type on platform with sizeof(long long) < 8") return } - if (Rcpp:::capabilities()["long long"] != TRUE) { + if (is.na(Rcpp:::capabilities()["long long"]) || + Rcpp:::capabilities()["long long"] != TRUE) { warning("Can't test 64-bit int type without RCPP_LONG_LONG support.") return } From noreply at r-forge.r-project.org Tue Mar 25 05:24:22 2014 From: noreply at r-forge.r-project.org (noreply at r-forge.r-project.org) Date: Tue, 25 Mar 2014 05:24:22 +0100 (CET) Subject: [Rprotobuf-commits] r889 - pkg Message-ID: <20140325042422.816B4187170@r-forge.r-project.org> Author: jeroenooms Date: 2014-03-25 05:24:17 +0100 (Tue, 25 Mar 2014) New Revision: 889 Modified: pkg/DESCRIPTION Log: bump version to test Modified: pkg/DESCRIPTION =================================================================== --- pkg/DESCRIPTION 2014-03-25 03:43:17 UTC (rev 888) +++ pkg/DESCRIPTION 2014-03-25 04:24:17 UTC (rev 889) @@ -1,5 +1,5 @@ Package: RProtoBuf -Version: 0.4.0 +Version: 0.4.1 Date: $Date$ Author: Romain Francois, Dirk Eddelbuettel, Murray Stokely and Jeroen Ooms Maintainer: Dirk Eddelbuettel From noreply at r-forge.r-project.org Tue Mar 25 05:26:26 2014 From: noreply at r-forge.r-project.org (noreply at r-forge.r-project.org) Date: Tue, 25 Mar 2014 05:26:26 +0100 (CET) Subject: [Rprotobuf-commits] r890 - pkg/inst Message-ID: <20140325042626.2BCA61855D5@r-forge.r-project.org> Author: murray Date: 2014-03-25 05:26:25 +0100 (Tue, 25 Mar 2014) New Revision: 890 Modified: pkg/inst/NEWS.Rd Log: Bump up version number and date assuming we can release this tomorrow as 0.4.1. Modified: pkg/inst/NEWS.Rd =================================================================== --- pkg/inst/NEWS.Rd 2014-03-25 04:24:17 UTC (rev 889) +++ pkg/inst/NEWS.Rd 2014-03-25 04:26:25 UTC (rev 890) @@ -2,7 +2,7 @@ \title{News for Package \pkg{RProtoBuf}} \newcommand{\cpkg}{\href{http://CRAN.R-project.org/package=#1}{\pkg{#1}}} -\section{Changes in RProtoBuf version 0.4.x (2014-XX-XX)}{ +\section{Changes in RProtoBuf version 0.4.1 (2014-03-25)}{ \itemize{ \item Document and add a test for the deprecated group functionality. From noreply at r-forge.r-project.org Tue Mar 25 18:41:38 2014 From: noreply at r-forge.r-project.org (noreply at r-forge.r-project.org) Date: Tue, 25 Mar 2014 18:41:38 +0100 (CET) Subject: [Rprotobuf-commits] r891 - in pkg: . inst Message-ID: <20140325174138.8D342184298@r-forge.r-project.org> Author: edd Date: 2014-03-25 18:41:37 +0100 (Tue, 25 Mar 2014) New Revision: 891 Modified: pkg/ChangeLog pkg/inst/NEWS.Rd Log: wrapping up 0.4.1 Modified: pkg/ChangeLog =================================================================== --- pkg/ChangeLog 2014-03-25 04:26:25 UTC (rev 890) +++ pkg/ChangeLog 2014-03-25 17:41:37 UTC (rev 891) @@ -1,3 +1,7 @@ +2014-03-25 Dirk Eddelbuettel + + * DESCRIPTION (Version): Release 0.4.1 + 2014-03-24 Murray Stokely * inst/unitTests/runit.int64.R (test.int64): Check @@ -29,16 +33,15 @@ The type is converted to lower case, which users often forget. * vignettes/RProtoBuf-intro.Rnw (subsection{Deprecated Feature: Protocol Buffer Groups}): document groups. - * inst/CITATION: Add a manual CITATION file pointing to the JSS - preprint on arxiv.org in addition to the normal auto-generated - citation(). + * inst/CITATION: Add a manual CITATION file pointing to the preprint + on arxiv.org in addition to the normal auto-generated citation(). 2014-01-19 Dirk Eddelbuettel * DESCRIPTION: Finalize release 0.4.0 with initial Windows support * src/S4_classes.h: Adapt to S4 access function renaming in Rcpp - 0.10.7 and later, with special thanks to Kevin Ushey + 0.10.7 and later, with special thanks to Kevin Ushey 2014-01-13 Murray Stokely Modified: pkg/inst/NEWS.Rd =================================================================== --- pkg/inst/NEWS.Rd 2014-03-25 04:26:25 UTC (rev 890) +++ pkg/inst/NEWS.Rd 2014-03-25 17:41:37 UTC (rev 891) @@ -6,8 +6,7 @@ \itemize{ \item Document and add a test for the deprecated group functionality. - \item Add a \code{CITATION} file pointing to the JSS preprint on - arXiv.org. + \item Add a \code{CITATION} file pointing to the arXiv.org preprint. \item Fix a bug in the \code{show} method for \code{EnumDescriptor} types. \item Import all top-level enums from imported \code{.proto} files. @@ -16,6 +15,7 @@ allow_alias). } } + \section{Changes in RProtoBuf version 0.4.0 (2014-01-14)}{ \itemize{ \item Changes to support CRAN builds for MS Windows.