[Rprotobuf-commits] r516 - in pkg: . R inst/unitTests src
noreply at r-forge.r-project.org
noreply at r-forge.r-project.org
Wed Aug 28 03:13:36 CEST 2013
Author: murray
Date: 2013-08-28 03:13:35 +0200 (Wed, 28 Aug 2013)
New Revision: 516
Modified:
pkg/ChangeLog
pkg/R/zzz.R
pkg/inst/unitTests/runit.int64.R
pkg/src/Rcppsupport.h
pkg/src/extractors.cpp
pkg/src/mutators.cpp
Log:
Add support for a new option("int64AsString") that controls whether
extractors for 64-bit integer fields return character strings or use
Rcpp's default wrap type which coerces to numeric, possibly losing
precision.
At any time 64-bit integer fields can now be set with strings, e.g.
a$optional_int64 <- "9007199254740993"
And now, by default, a$otional_int64 returns a numeric as now :
a$optional_int64
[1] 9.007199e+15
But we can get the full-precision string version by setting an option:
options("int64AsString"=TRUE)
a$optional_int64
[1] "9007199254740993"
Code clean up and documentation coming later. Also this should just
be done in Rcpp.
Modified: pkg/ChangeLog
===================================================================
--- pkg/ChangeLog 2013-08-27 22:33:38 UTC (rev 515)
+++ pkg/ChangeLog 2013-08-28 01:13:35 UTC (rev 516)
@@ -1,5 +1,13 @@
2013-08-27 Murray Stokely <murray at FreeBSD.org>
+ * src/extractors.cpp (rprotobuf): Add support for a new
+ option("int64AsString") that controls whether extractors for
+ 64-bit integer fields return character strings or use Rcpp's
+ default wrap type which coerces to numeric, possibly losing
+ precision.
+ * R/zzz.R (.onLoad): Initialize options("int64AsString" = FALSE).
+ * src/Rcppsupport.h (rprotobuf): Add RepeatedFieldImporter classes
+ for int64 and uint64 that return strings instead of int64s.
* src/mutators.cpp (rprotobuf): Add support for setting int64
fields as R character vectors that are converted to int64 or
uint64 C++ types with std::stringstream. This allows the user
Modified: pkg/R/zzz.R
===================================================================
--- pkg/R/zzz.R 2013-08-27 22:33:38 UTC (rev 515)
+++ pkg/R/zzz.R 2013-08-28 01:13:35 UTC (rev 516)
@@ -5,10 +5,9 @@
##.Call( "check_libprotobuf_version", minversion, PACKAGE = "RProtoBuf" )
readProtoFiles( package=pkgname, lib.loc=libname )
attachDescriptorPool( pos = length(search()) )
-
+ options("int64AsString" = FALSE)
if( exists( ".httpd.handlers.env", asNamespace( "tools" ) ) ){
e <- tools:::.httpd.handlers.env
e[["RProtoBuf"]] <- RProtoBuf.http.handler
}
}
-
Modified: pkg/inst/unitTests/runit.int64.R
===================================================================
--- pkg/inst/unitTests/runit.int64.R 2013-08-27 22:33:38 UTC (rev 515)
+++ pkg/inst/unitTests/runit.int64.R 2013-08-28 01:13:35 UTC (rev 516)
@@ -28,17 +28,25 @@
# Verify we can set character strings of large 64-bit ints
a$repeated_int64 <- c("9007199254740992", "9007199254740993")
checkEquals(length(a$repeated_int64), 2)
- # Verify we can set any garbage string to an int64.
+ # Verify we can't set any garbage string to a repeated int64.
checkException(a$repeated_int64 <-c("invalid", "invalid"))
+ a$optional_int64 <- 1
+ a$optional_int64 <- "2"
+ checkEquals(a$optional_int64, 2)
+ # Verify we can't set any garbage string to an optional int64.
+ checkException(a$optional_int64 <- "invalid")
+
a <- protobuf_unittest.TestAllTypes$readASCII(
file(system.file("unitTests", "data", "int64.ascii",
package="RProtoBuf")))
# And can read them in OK from an ASCII file.
checkEquals(length(a$repeated_int64), 2)
- # TODO(mstokely): But the accessors still silently cast to double
- # which removes uniqueness. Fix this.
- # Uncomment when RProtoBuf / Rcpp are unbroken with respect to 64-bit ints.
- # checkEquals(length(unique(a$repeated_int64)), 2)
+ # By default, when they are read as numerics, only 1 unique value
+ checkEquals(length(unique(a$repeated_int64)), 1)
+
+ options("int64AsString" = TRUE)
+ # But we can see they are different if we treat them as strings.
+ checkEquals(length(unique(a$repeated_int64)), 2)
}
Modified: pkg/src/Rcppsupport.h
===================================================================
--- pkg/src/Rcppsupport.h 2013-08-27 22:33:38 UTC (rev 515)
+++ pkg/src/Rcppsupport.h 2013-08-28 01:13:35 UTC (rev 516)
@@ -32,6 +32,52 @@
struct enum_field{} ;
struct message_field{} ;
+class Int64AsStringRepeatedFieldImporter {
+public:
+ // Probably want to convert to strings here.
+ typedef string r_import_type;
+ Int64AsStringRepeatedFieldImporter(const GPB::Reflection* ref_ ,
+ const GPB::Message& message_,
+ const GPB::FieldDescriptor* field_):
+ ref(ref_), message(message_), field(field_){}
+ inline int size() const {
+ return ref->FieldSize( message, field ) ;
+ }
+ inline string get(int i) const {
+ stringstream stream;
+ int64 val = ref->GetRepeatedInt64(message, field, i) ;
+ stream << val;
+ return stream.str();
+ }
+ private:
+ const GPB::Reflection* ref ;
+ const GPB::Message& message ;
+ const GPB::FieldDescriptor* field ;
+};
+
+class UInt64AsStringRepeatedFieldImporter {
+public:
+ // Probably want to convert to strings here.
+ typedef string r_import_type;
+ UInt64AsStringRepeatedFieldImporter(const GPB::Reflection* ref_ ,
+ const GPB::Message& message_,
+ const GPB::FieldDescriptor* field_):
+ ref(ref_), message(message_), field(field_){}
+ inline int size() const {
+ return ref->FieldSize( message, field ) ;
+ }
+ inline string get(int i) const {
+ stringstream stream;
+ uint64 val = ref->GetRepeatedUInt64(message, field, i) ;
+ stream << val;
+ return stream.str();
+ }
+ private:
+ const GPB::Reflection* ref ;
+ const GPB::Message& message ;
+ const GPB::FieldDescriptor* field ;
+};
+
template <typename T> class RepeatedFieldImporter{} ;
#undef GENERATE__FIELD__IMPORTER__DECL
Modified: pkg/src/extractors.cpp
===================================================================
--- pkg/src/extractors.cpp 2013-08-27 22:33:38 UTC (rev 515)
+++ pkg/src/extractors.cpp 2013-08-28 01:13:35 UTC (rev 516)
@@ -23,7 +23,50 @@
#include "Rcppsupport.h"
namespace rprotobuf{
-
+
+SEXP kInt64AsStringOptionName = Rf_install("int64AsString");
+
+// Rcpp::wrap silently coerces 64-bit integers to numerics
+// which drop precision for values between 2^53 - 2^64.
+// So, if an option is set, we return as a character string.
+// TODO(mstokely): Do more of this in Rcpp.
+SEXP PreciseRInt64Type(int64 val) {
+ if (Rf_asLogical(Rf_GetOption1(kInt64AsStringOptionName))) {
+ std::stringstream ss;
+ if ((ss << val).fail()) {
+ // This should not happen, its a bug in the code.
+ throwException(
+ "Error converting int64 to STRSXP, unset int64AsString option.",
+ "ConversionException");
+ }
+ // Rcpp::wrap of the string returns vector of single characters.
+ // So we must create this vector first to wrap.
+ std::vector<string> retlist;
+ retlist.push_back(ss.str());
+ return Rcpp::wrap(retlist);
+ } else {
+ return Rcpp::wrap(val);
+ }
+}
+
+SEXP PreciseRUInt64Type(uint64 val) {
+ if (Rf_asLogical(Rf_GetOption1(kInt64AsStringOptionName))) {
+ std::stringstream ss;
+ if ((ss << val).fail()) {
+ // This should not happen, its a bug in the code.
+ throwException(
+ "Error converting uint64 to STRSXP, unset int64AsString option.",
+ "ConversionException");
+ }
+ // Wrap the string version of this int64.
+ std::vector<string> retlist;
+ retlist.push_back(ss.str());
+ return Rcpp::wrap(retlist);
+ } else {
+ return Rcpp::wrap(val);
+ }
+}
+
/**
* extract a field from a message
*
@@ -71,15 +114,33 @@
HANDLE_REPEATED_FIELD(CPPTYPE_INT32, GPB::int32) ;
HANDLE_REPEATED_FIELD(CPPTYPE_UINT32, GPB::uint32) ;
-#ifdef RCPP_HAS_LONG_LONG_TYPES
- HANDLE_REPEATED_FIELD(CPPTYPE_INT64, GPB::int64) ;
- HANDLE_REPEATED_FIELD(CPPTYPE_UINT64, GPB::uint64) ;
-#endif
HANDLE_REPEATED_FIELD(CPPTYPE_DOUBLE, double) ;
HANDLE_REPEATED_FIELD(CPPTYPE_FLOAT, float) ;
HANDLE_REPEATED_FIELD(CPPTYPE_BOOL, bool) ;
HANDLE_REPEATED_FIELD(CPPTYPE_ENUM, enum_field ) ;
HANDLE_REPEATED_FIELD(CPPTYPE_MESSAGE, message_field ) ;
+#ifdef RCPP_HAS_LONG_LONG_TYPES
+ // We can't handle these the same way, because Rcpp::wrap silently
+ // casts int64s to doubles which may cause us to lose precision.
+ case CPPTYPE_INT64:
+ if (Rf_asLogical(Rf_GetOption1(kInt64AsStringOptionName))) {
+ return Rcpp::wrap(
+ Int64AsStringRepeatedFieldImporter(ref, *message,
+ fieldDesc));
+ } else {
+ return Rcpp::wrap(
+ RepeatedFieldImporter<int64>(ref, *message, fieldDesc));
+ }
+ case CPPTYPE_UINT64:
+ if (Rf_asLogical(Rf_GetOption1(kInt64AsStringOptionName))) {
+ return Rcpp::wrap(
+ UInt64AsStringRepeatedFieldImporter(ref, *message,
+ fieldDesc));
+ } else {
+ return Rcpp::wrap(
+ RepeatedFieldImporter<uint64>(ref, *message, fieldDesc));
+ }
+#endif
#undef HANDLE_REPEATED_FIELD
case CPPTYPE_STRING:
@@ -112,13 +173,17 @@
HANDLE_SINGLE_FIELD( CPPTYPE_INT32, Int32 );
HANDLE_SINGLE_FIELD( CPPTYPE_UINT32, UInt32 );
-#ifdef RCPP_HAS_LONG_LONG_TYPES
- HANDLE_SINGLE_FIELD( CPPTYPE_INT64, Int64 );
- HANDLE_SINGLE_FIELD( CPPTYPE_UINT64, UInt64 );
-#endif
HANDLE_SINGLE_FIELD( CPPTYPE_DOUBLE, Double );
HANDLE_SINGLE_FIELD( CPPTYPE_FLOAT, Float );
HANDLE_SINGLE_FIELD( CPPTYPE_BOOL, Bool );
+#ifdef RCPP_HAS_LONG_LONG_TYPES
+ // Handle these types separately since Rcpp::wrap doesn't
+ // do the right thing.
+ case CPPTYPE_INT64:
+ return PreciseRInt64Type(ref->GetInt64(*message, fieldDesc));
+ case CPPTYPE_UINT64:
+ return PreciseRUInt64Type(ref->GetUInt64(*message, fieldDesc));
+#endif
#undef HANDLE_SINGLE_FIELD
case CPPTYPE_STRING:
@@ -146,4 +211,3 @@
}
} // namespace rprotobuf
-
Modified: pkg/src/mutators.cpp
===================================================================
--- pkg/src/mutators.cpp 2013-08-27 22:33:38 UTC (rev 515)
+++ pkg/src/mutators.cpp 2013-08-28 01:13:35 UTC (rev 516)
@@ -1030,13 +1030,56 @@
HANDLE_SINGLE_FIELD( CPPTYPE_INT32, Int32, GPB::int32) ;
HANDLE_SINGLE_FIELD( CPPTYPE_UINT32, UInt32, GPB::uint32) ;
-#ifdef RCPP_HAS_LONG_LONG_TYPES
- HANDLE_SINGLE_FIELD( CPPTYPE_INT64, Int64, GPB::int64) ;
- HANDLE_SINGLE_FIELD( CPPTYPE_UINT64, UInt64, GPB::uint64) ;
-#endif
HANDLE_SINGLE_FIELD( CPPTYPE_DOUBLE, Double, double) ;
HANDLE_SINGLE_FIELD( CPPTYPE_FLOAT, Float, float) ;
HANDLE_SINGLE_FIELD( CPPTYPE_BOOL, Bool, bool) ;
+#ifdef RCPP_HAS_LONG_LONG_TYPES
+ case CPPTYPE_INT64 :
+ {
+ // TODO(mstokely) Rcpp::as<int64> of a STRSEXP
+ // should just work for strings representing
+ // int64s.
+ if (TYPEOF(value) == STRSXP) {
+ const string int64str = COPYSTRING(CHAR(
+ STRING_ELT(value, 0)));
+ std::stringstream ss(int64str);
+ GPB::int64 ret;
+ if ((ss >> ret).fail() || !(ss>>std::ws).eof()) {
+ throwException(
+ "Provided STRSXP cannot be cast to int64",
+ "CastException");
+ }
+ ref->SetInt64(message, field_desc, ret);
+ break ;
+ } else {
+ ref->SetInt64( message, field_desc, Rcpp::as<GPB::int64>(value));
+ break;
+ }
+ }
+ case CPPTYPE_UINT64 :
+ {
+ // TODO(mstokely) Rcpp::as<int64> of a STRSEXP
+ // should just work for strings representing
+ // int64s.
+ if (TYPEOF(value) == STRSXP) {
+ const string int64str = COPYSTRING(CHAR(
+ STRING_ELT(value, 0)));
+ std::stringstream ss(int64str);
+ GPB::uint64 ret;
+ if ((ss >> ret).fail() || !(ss>>std::ws).eof()) {
+ throwException(
+ "Provided STRSXP cannot be cast to uint64",
+ "CastException");
+ }
+ ref->SetUInt64(message, field_desc, ret);
+ break ;
+ } else {
+ ref->SetUInt64(message, field_desc,
+ Rcpp::as<GPB::uint64>(value));
+ break;
+ }
+ }
+#endif
#undef HANDLE_SINGLE_FIELD
default:
throwException("Unsupported type", "ConversionException");
More information about the Rprotobuf-commits
mailing list