[Rprotobuf-commits] r516 - in pkg: . R inst/unitTests src

noreply at r-forge.r-project.org noreply at r-forge.r-project.org
Wed Aug 28 03:13:36 CEST 2013


Author: murray
Date: 2013-08-28 03:13:35 +0200 (Wed, 28 Aug 2013)
New Revision: 516

Modified:
   pkg/ChangeLog
   pkg/R/zzz.R
   pkg/inst/unitTests/runit.int64.R
   pkg/src/Rcppsupport.h
   pkg/src/extractors.cpp
   pkg/src/mutators.cpp
Log:
Add support for a new option("int64AsString") that controls whether
extractors for 64-bit integer fields return character strings or use
Rcpp's default wrap type which coerces to numeric, possibly losing
precision.

At any time 64-bit integer fields can now be set with strings, e.g.

a$optional_int64 <- "9007199254740993"

And now, by default, a$otional_int64 returns a numeric as now :

a$optional_int64
[1] 9.007199e+15

But we can get the full-precision string version by setting an option:

options("int64AsString"=TRUE)
a$optional_int64
[1] "9007199254740993"

Code clean up and documentation coming later.  Also this should just
be done in Rcpp.



Modified: pkg/ChangeLog
===================================================================
--- pkg/ChangeLog	2013-08-27 22:33:38 UTC (rev 515)
+++ pkg/ChangeLog	2013-08-28 01:13:35 UTC (rev 516)
@@ -1,5 +1,13 @@
 2013-08-27  Murray Stokely  <murray at FreeBSD.org>
 
+	* src/extractors.cpp (rprotobuf): Add support for a new
+	  option("int64AsString") that controls whether extractors for
+	  64-bit integer fields return character strings or use Rcpp's
+	  default wrap type which coerces to numeric, possibly losing
+	  precision.
+	* R/zzz.R (.onLoad): Initialize options("int64AsString" = FALSE).
+	* src/Rcppsupport.h (rprotobuf): Add RepeatedFieldImporter classes
+	  for int64 and uint64 that return strings instead of int64s.
 	* src/mutators.cpp (rprotobuf): Add support for setting int64
 	  fields as R character vectors that are converted to int64 or
 	  uint64 C++ types with std::stringstream.  This allows the user

Modified: pkg/R/zzz.R
===================================================================
--- pkg/R/zzz.R	2013-08-27 22:33:38 UTC (rev 515)
+++ pkg/R/zzz.R	2013-08-28 01:13:35 UTC (rev 516)
@@ -5,10 +5,9 @@
     ##.Call( "check_libprotobuf_version", minversion, PACKAGE = "RProtoBuf" )
     readProtoFiles( package=pkgname, lib.loc=libname )
     attachDescriptorPool( pos = length(search()) )
-
+    options("int64AsString" = FALSE)
     if( exists( ".httpd.handlers.env", asNamespace( "tools" ) ) ){
         e <- tools:::.httpd.handlers.env
         e[["RProtoBuf"]] <- RProtoBuf.http.handler
     }
 }
-

Modified: pkg/inst/unitTests/runit.int64.R
===================================================================
--- pkg/inst/unitTests/runit.int64.R	2013-08-27 22:33:38 UTC (rev 515)
+++ pkg/inst/unitTests/runit.int64.R	2013-08-28 01:13:35 UTC (rev 516)
@@ -28,17 +28,25 @@
     # Verify we can set character strings of large 64-bit ints
     a$repeated_int64 <- c("9007199254740992", "9007199254740993")
     checkEquals(length(a$repeated_int64), 2)
-    # Verify we can set any garbage string to an int64.
+    # Verify we can't set any garbage string to a repeated int64.
     checkException(a$repeated_int64 <-c("invalid", "invalid"))
 
+    a$optional_int64 <- 1
+    a$optional_int64 <- "2"
+    checkEquals(a$optional_int64, 2)
+    # Verify we can't set any garbage string to an optional int64.
+    checkException(a$optional_int64 <- "invalid")
+
     a <- protobuf_unittest.TestAllTypes$readASCII(
            file(system.file("unitTests", "data", "int64.ascii",
                             package="RProtoBuf")))
     # And can read them in OK from an ASCII file.
     checkEquals(length(a$repeated_int64), 2)
 
-    # TODO(mstokely): But the accessors still silently cast to double
-    # which removes uniqueness.  Fix this.
-    # Uncomment when RProtoBuf / Rcpp are unbroken with respect to 64-bit ints.
-    # checkEquals(length(unique(a$repeated_int64)), 2)
+    # By default, when they are read as numerics, only 1 unique value
+    checkEquals(length(unique(a$repeated_int64)), 1)
+
+    options("int64AsString" = TRUE)
+    # But we can see they are different if we treat them as strings.
+    checkEquals(length(unique(a$repeated_int64)), 2)
 }

Modified: pkg/src/Rcppsupport.h
===================================================================
--- pkg/src/Rcppsupport.h	2013-08-27 22:33:38 UTC (rev 515)
+++ pkg/src/Rcppsupport.h	2013-08-28 01:13:35 UTC (rev 516)
@@ -32,6 +32,52 @@
 struct enum_field{} ;
 struct message_field{} ;
 
+class Int64AsStringRepeatedFieldImporter {
+public:
+    // Probably want to convert to strings here.
+    typedef string r_import_type;
+    Int64AsStringRepeatedFieldImporter(const GPB::Reflection* ref_ ,
+                                       const GPB::Message& message_,
+                                       const GPB::FieldDescriptor* field_):
+            ref(ref_), message(message_), field(field_){}
+    inline int size() const {
+        return ref->FieldSize( message, field ) ;
+    }
+    inline string get(int i) const {
+        stringstream stream;
+        int64 val = ref->GetRepeatedInt64(message, field, i) ;
+        stream << val;
+        return stream.str();
+    }
+  private:
+    const GPB::Reflection* ref ;
+    const GPB::Message& message ;
+    const GPB::FieldDescriptor* field ;
+};
+
+class UInt64AsStringRepeatedFieldImporter {
+public:
+    // Probably want to convert to strings here.
+    typedef string r_import_type;
+    UInt64AsStringRepeatedFieldImporter(const GPB::Reflection* ref_ ,
+                                        const GPB::Message& message_,
+                                        const GPB::FieldDescriptor* field_):
+            ref(ref_), message(message_), field(field_){}
+    inline int size() const {
+        return ref->FieldSize( message, field ) ;
+    }
+    inline string get(int i) const {
+        stringstream stream;
+        uint64 val = ref->GetRepeatedUInt64(message, field, i) ;
+        stream << val;
+        return stream.str();
+    }
+  private:
+    const GPB::Reflection* ref ;
+    const GPB::Message& message ;
+    const GPB::FieldDescriptor* field ;
+};
+
 template <typename T> class RepeatedFieldImporter{} ;
 
 #undef GENERATE__FIELD__IMPORTER__DECL

Modified: pkg/src/extractors.cpp
===================================================================
--- pkg/src/extractors.cpp	2013-08-27 22:33:38 UTC (rev 515)
+++ pkg/src/extractors.cpp	2013-08-28 01:13:35 UTC (rev 516)
@@ -23,7 +23,50 @@
 #include "Rcppsupport.h"
 
 namespace rprotobuf{
-	
+
+SEXP kInt64AsStringOptionName = Rf_install("int64AsString");
+
+// Rcpp::wrap silently coerces 64-bit integers to numerics
+// which drop precision for values between 2^53 - 2^64.
+// So, if an option is set, we return as a character string.
+// TODO(mstokely): Do more of this in Rcpp.
+SEXP PreciseRInt64Type(int64 val) {
+	if (Rf_asLogical(Rf_GetOption1(kInt64AsStringOptionName))) {
+		std::stringstream ss;
+		if ((ss << val).fail()) {
+			// This should not happen, its a bug in the code.
+			throwException(
+				"Error converting int64 to STRSXP, unset int64AsString option.",
+				"ConversionException");
+		}
+		// Rcpp::wrap of the string returns vector of single characters.
+		// So we must create this vector first to wrap.
+		std::vector<string> retlist;
+		retlist.push_back(ss.str());
+		return Rcpp::wrap(retlist);
+	} else {
+		return Rcpp::wrap(val);
+	}
+}
+
+SEXP PreciseRUInt64Type(uint64 val) {
+	if (Rf_asLogical(Rf_GetOption1(kInt64AsStringOptionName))) {
+		std::stringstream ss;
+		if ((ss << val).fail()) {
+			// This should not happen, its a bug in the code.
+			throwException(
+				"Error converting uint64 to STRSXP, unset int64AsString option.",
+				"ConversionException");
+		}
+		// Wrap the string version of this int64.
+		std::vector<string> retlist;
+		retlist.push_back(ss.str());
+		return Rcpp::wrap(retlist);
+	} else {
+		return Rcpp::wrap(val);
+	}
+}
+
 /**
  * extract a field from a message
  *
@@ -71,15 +114,33 @@
 
 			HANDLE_REPEATED_FIELD(CPPTYPE_INT32, GPB::int32) ;
     		HANDLE_REPEATED_FIELD(CPPTYPE_UINT32, GPB::uint32) ;
-#ifdef RCPP_HAS_LONG_LONG_TYPES
-    		HANDLE_REPEATED_FIELD(CPPTYPE_INT64, GPB::int64) ;
-    		HANDLE_REPEATED_FIELD(CPPTYPE_UINT64, GPB::uint64) ;
-#endif
     		HANDLE_REPEATED_FIELD(CPPTYPE_DOUBLE, double) ;
     		HANDLE_REPEATED_FIELD(CPPTYPE_FLOAT, float) ;
     		HANDLE_REPEATED_FIELD(CPPTYPE_BOOL, bool) ;
     		HANDLE_REPEATED_FIELD(CPPTYPE_ENUM, enum_field ) ;
     		HANDLE_REPEATED_FIELD(CPPTYPE_MESSAGE, message_field ) ;
+#ifdef RCPP_HAS_LONG_LONG_TYPES
+            // We can't handle these the same way, because Rcpp::wrap silently
+            // casts int64s to doubles which may cause us to lose precision.
+            case CPPTYPE_INT64:
+                if (Rf_asLogical(Rf_GetOption1(kInt64AsStringOptionName))) {
+                    return Rcpp::wrap(
+                        Int64AsStringRepeatedFieldImporter(ref, *message,
+                                                           fieldDesc));
+                } else {
+                    return Rcpp::wrap(
+                        RepeatedFieldImporter<int64>(ref, *message, fieldDesc));
+                }
+            case CPPTYPE_UINT64:
+                if (Rf_asLogical(Rf_GetOption1(kInt64AsStringOptionName))) {
+                    return Rcpp::wrap(
+                        UInt64AsStringRepeatedFieldImporter(ref, *message,
+                                                            fieldDesc));
+                } else {
+                    return Rcpp::wrap(
+                        RepeatedFieldImporter<uint64>(ref, *message, fieldDesc));
+                }
+#endif
 #undef HANDLE_REPEATED_FIELD
 
 		case CPPTYPE_STRING:
@@ -112,13 +173,17 @@
 
 		HANDLE_SINGLE_FIELD( CPPTYPE_INT32,  Int32 ); 
 		HANDLE_SINGLE_FIELD( CPPTYPE_UINT32, UInt32 ); 
-#ifdef RCPP_HAS_LONG_LONG_TYPES
-		HANDLE_SINGLE_FIELD( CPPTYPE_INT64,  Int64 );
-		HANDLE_SINGLE_FIELD( CPPTYPE_UINT64, UInt64 );
-#endif
 		HANDLE_SINGLE_FIELD( CPPTYPE_DOUBLE, Double );
 		HANDLE_SINGLE_FIELD( CPPTYPE_FLOAT, Float );
 		HANDLE_SINGLE_FIELD( CPPTYPE_BOOL, Bool );
+#ifdef RCPP_HAS_LONG_LONG_TYPES
+        // Handle these types separately since Rcpp::wrap doesn't
+        // do the right thing.
+        case CPPTYPE_INT64:
+            return PreciseRInt64Type(ref->GetInt64(*message, fieldDesc));
+        case CPPTYPE_UINT64:
+            return PreciseRUInt64Type(ref->GetUInt64(*message, fieldDesc));
+#endif
 #undef HANDLE_SINGLE_FIELD
 
 		case CPPTYPE_STRING:
@@ -146,4 +211,3 @@
 }
 
 } // namespace rprotobuf
-

Modified: pkg/src/mutators.cpp
===================================================================
--- pkg/src/mutators.cpp	2013-08-27 22:33:38 UTC (rev 515)
+++ pkg/src/mutators.cpp	2013-08-28 01:13:35 UTC (rev 516)
@@ -1030,13 +1030,56 @@
 
 			HANDLE_SINGLE_FIELD( CPPTYPE_INT32, Int32, GPB::int32) ;
 			HANDLE_SINGLE_FIELD( CPPTYPE_UINT32, UInt32, GPB::uint32) ;
-#ifdef RCPP_HAS_LONG_LONG_TYPES
-			HANDLE_SINGLE_FIELD( CPPTYPE_INT64, Int64, GPB::int64) ;
-			HANDLE_SINGLE_FIELD( CPPTYPE_UINT64, UInt64, GPB::uint64) ;
-#endif
 			HANDLE_SINGLE_FIELD( CPPTYPE_DOUBLE, Double, double) ;
 			HANDLE_SINGLE_FIELD( CPPTYPE_FLOAT, Float, float) ;
 			HANDLE_SINGLE_FIELD( CPPTYPE_BOOL, Bool, bool) ;
+#ifdef RCPP_HAS_LONG_LONG_TYPES
+			case CPPTYPE_INT64 :
+				{
+					// TODO(mstokely) Rcpp::as<int64> of a STRSEXP
+					// should just work for strings representing
+					// int64s.
+					if (TYPEOF(value) == STRSXP) {
+						const string int64str = COPYSTRING(CHAR(
+							STRING_ELT(value, 0)));
+						std::stringstream ss(int64str);
+                        GPB::int64 ret;
+						if ((ss >> ret).fail() || !(ss>>std::ws).eof()) {
+							throwException(
+								"Provided STRSXP cannot be cast to int64",
+								"CastException");
+						}
+						ref->SetInt64(message, field_desc, ret);
+						break ;
+					} else {
+						ref->SetInt64( message, field_desc, Rcpp::as<GPB::int64>(value));
+						break;
+					}
+				}
+			case CPPTYPE_UINT64 :
+				{
+					// TODO(mstokely) Rcpp::as<int64> of a STRSEXP
+					// should just work for strings representing
+					// int64s.
+					if (TYPEOF(value) == STRSXP) {
+						const string int64str = COPYSTRING(CHAR(
+							STRING_ELT(value, 0)));
+						std::stringstream ss(int64str);
+                        GPB::uint64 ret;
+						if ((ss >> ret).fail() || !(ss>>std::ws).eof()) {
+							throwException(
+								"Provided STRSXP cannot be cast to uint64",
+								"CastException");
+						}
+						ref->SetUInt64(message, field_desc, ret);
+						break ;
+					} else {
+						ref->SetUInt64(message, field_desc,
+									   Rcpp::as<GPB::uint64>(value));
+						break;
+					}
+				}
+#endif
 #undef HANDLE_SINGLE_FIELD
 			default:
 				throwException("Unsupported type", "ConversionException");



More information about the Rprotobuf-commits mailing list