[Rcpp-commits] r3669 - in pkg/RcppCNPy: . inst man src
noreply at r-forge.r-project.org
noreply at r-forge.r-project.org
Fri Jul 6 03:08:18 CEST 2012
Author: edd
Date: 2012-07-06 03:08:16 +0200 (Fri, 06 Jul 2012)
New Revision: 3669
Modified:
pkg/RcppCNPy/ChangeLog
pkg/RcppCNPy/DESCRIPTION
pkg/RcppCNPy/inst/NEWS.Rd
pkg/RcppCNPy/man/RcppCNPy-package.Rd
pkg/RcppCNPy/src/cnpy.cpp
pkg/RcppCNPy/src/cnpy.h
pkg/RcppCNPy/src/cnpyMod.cpp
Log:
Version 0.0.2 with automatic transpose-on-read as well as automatic transpose on read
Modified: pkg/RcppCNPy/ChangeLog
===================================================================
--- pkg/RcppCNPy/ChangeLog 2012-07-05 02:50:19 UTC (rev 3668)
+++ pkg/RcppCNPy/ChangeLog 2012-07-06 01:08:16 UTC (rev 3669)
@@ -1,3 +1,14 @@
+2012-07-05 Dirk Eddelbuettel <edd at dexter>
+
+ * DESCRIPTION (Version): Version 0.0.2
+
+ * src/cnpyMod.cpp: Added transpose() method to transparently deal
+ with the Fortran-vs-C storage order difference between Python and R.
+ Also added support for reading vectors.
+
+ * src/cnpy.{cpp,h}: Added support for loading from .npy.gz files
+ which is automagically enabled if the filename ends in ".gz"
+
2012-07-04 Dirk Eddelbuettel <edd at debian.org>
* Initial version 0.0.1
Modified: pkg/RcppCNPy/DESCRIPTION
===================================================================
--- pkg/RcppCNPy/DESCRIPTION 2012-07-05 02:50:19 UTC (rev 3668)
+++ pkg/RcppCNPy/DESCRIPTION 2012-07-06 01:08:16 UTC (rev 3669)
@@ -1,13 +1,14 @@
Package: RcppCNPy
Type: Package
Title: Rcpp bindings for NumPy files
-Version: 0.0.1
-Date: 2012-07-04
+Version: 0.0.2
+Date: $Date$
Author: Dirk Eddelbuettel
Maintainer: Dirk Eddelbuettel <edd at debian.org>
Description: This package provides access to the cnpy library by Carl Rogers
which provides read and write facilities for files created with (or for) the
- NumPY extension for Python.
+ NumPY extension for Python. Vectors and matrices of either numeric or
+ integer types can be read or written. Compressed files can be read as well.
License: GPL (>= 2)
LazyLoad: yes
Depends: methods, Rcpp (>= 0.9.13)
Property changes on: pkg/RcppCNPy/DESCRIPTION
___________________________________________________________________
Added: svn:keywords
+ Date
Modified: pkg/RcppCNPy/inst/NEWS.Rd
===================================================================
--- pkg/RcppCNPy/inst/NEWS.Rd 2012-07-05 02:50:19 UTC (rev 3668)
+++ pkg/RcppCNPy/inst/NEWS.Rd 2012-07-06 01:08:16 UTC (rev 3669)
@@ -2,6 +2,12 @@
\title{News for Package \pkg{RcppCNPy}}
\newcommand{\cpkg}{\href{http://CRAN.R-project.org/package=#1}{\pkg{#1}}}
+\section{Changes in version 0.0.2 (2012-07-05)}{
+ \itemize{
+ \item Added automatic use of transpose to automagically account for
+ Fortran-vs-C major storage defaults between Python and R.
+ \item Added support for reading gzip'ed files ending in ".npy.gz"
+}
\section{Changes in version 0.0.1 (2012-07-04)}{
\itemize{
\item Initial version, as a straightforward Rcpp modules wrap around
Modified: pkg/RcppCNPy/man/RcppCNPy-package.Rd
===================================================================
--- pkg/RcppCNPy/man/RcppCNPy-package.Rd 2012-07-05 02:50:19 UTC (rev 3668)
+++ pkg/RcppCNPy/man/RcppCNPy-package.Rd 2012-07-06 01:08:16 UTC (rev 3669)
@@ -15,8 +15,7 @@
Support is currently still pretty limited to reading and writing of
either vectors or matrices of either numeric or integer type.
- \emph{Note that matrices must be transposed first to make up for
- Fortran ordering.}
+ Files with \code{gzip} compression can be transparently read as well.
}
\usage{
npyLoad(filename, type="numeric")
Modified: pkg/RcppCNPy/src/cnpy.cpp
===================================================================
--- pkg/RcppCNPy/src/cnpy.cpp 2012-07-05 02:50:19 UTC (rev 3668)
+++ pkg/RcppCNPy/src/cnpy.cpp 2012-07-06 01:08:16 UTC (rev 3669)
@@ -133,6 +133,23 @@
return arr;
}
+cnpy::NpyArray gzload_the_npy_file(gzFile fp) {
+ unsigned int* shape;
+ unsigned int ndims, word_size;
+ cnpy::parse_npy_gzheader(fp,word_size,shape,ndims);
+ //unsigned long long size = 1; //long long so no overflow when multiplying by word_size
+ unsigned long size = 1; //long long so no overflow when multiplying by word_size
+ for(unsigned int i = 0;i < ndims;i++) size *= shape[i];
+
+ cnpy::NpyArray arr;
+ arr.word_size = word_size;
+ arr.shape = std::vector<unsigned int>(shape,shape+ndims);
+ arr.data = new char[size*word_size];
+ //int nread = fread(arr.data,word_size,size,fp);
+ if (gzread(fp,arr.data,word_size*size) < 0) REprintf("cnpy::gzload_the_npy_file error");
+ return arr;
+}
+
cnpy::npz_t cnpy::npz_load(std::string fname) {
FILE* fp = fopen(fname.c_str(),"rb");
@@ -225,5 +242,51 @@
return arr;
}
+cnpy::NpyArray cnpy::npy_gzload(std::string fname) {
+ gzFile fp = gzopen(fname.c_str(), "rb");
+ if(!fp) {
+ REprintf("npy_gzload: Error! Unable to open file %s!\n",fname.c_str());
+ }
+ NpyArray arr = gzload_the_npy_file(fp);
+ gzclose(fp);
+ return arr;
+}
+void cnpy::parse_npy_gzheader(gzFile fp, unsigned int& word_size, unsigned int*& shape, unsigned int& ndims) {
+ char buffer[256];
+ if (gzread(fp,buffer,sizeof(char)*11) != 11) REprintf("cnpy::parse_npy_gzheader read discprepancy");
+ std::string header = gzgets(fp, buffer,256);
+ Rassert(header[header.size()-1] == '\n', "header ended improperly");
+ int loc1, loc2;
+
+ //fortran order
+ loc1 = header.find("fortran_order")+16;
+ bool fortran_order = (header.substr(loc1,5) == "True" ? true : false);
+ Rassert(!fortran_order, "fortran_order error");
+
+ //shape
+ loc1 = header.find("(");
+ loc2 = header.find(")");
+ std::string str_shape = header.substr(loc1+1,loc2-loc1-1);
+ if(str_shape[str_shape.size()-1] == ',') ndims = 1;
+ else ndims = std::count(str_shape.begin(),str_shape.end(),',')+1;
+ shape = new unsigned int[ndims];
+ for(unsigned int i = 0;i < ndims;i++) {
+ loc1 = str_shape.find(",");
+ shape[i] = atoi(str_shape.substr(0,loc1).c_str());
+ str_shape = str_shape.substr(loc1+1);
+ }
+
+ //endian, word size, data type
+ loc1 = header.find("descr")+9;
+ bool littleEndian = (header[loc1] == '<' ? true : false);
+ Rassert(littleEndian, "littleEndian error");
+
+ //char type = header[loc1+1];
+ //assert(type == map_type(T);
+
+ std::string str_ws = header.substr(loc1+2);
+ loc2 = str_ws.find("'");
+ word_size = atoi(str_ws.substr(0,loc2).c_str());
+}
Modified: pkg/RcppCNPy/src/cnpy.h
===================================================================
--- pkg/RcppCNPy/src/cnpy.h 2012-07-05 02:50:19 UTC (rev 3668)
+++ pkg/RcppCNPy/src/cnpy.h 2012-07-06 01:08:16 UTC (rev 3669)
@@ -47,6 +47,8 @@
npz_t npz_load(std::string fname);
NpyArray npz_load(std::string fname, std::string varname);
NpyArray npy_load(std::string fname);
+ NpyArray npy_gzload(std::string fname);
+ void parse_npy_gzheader(gzFile fp,unsigned int& word_size, unsigned int*& shape, unsigned int& ndims);
template<typename T> std::vector<char>& operator+=(std::vector<char>& lhs, const T rhs) {
//write in little endian
Modified: pkg/RcppCNPy/src/cnpyMod.cpp
===================================================================
--- pkg/RcppCNPy/src/cnpyMod.cpp 2012-07-05 02:50:19 UTC (rev 3668)
+++ pkg/RcppCNPy/src/cnpyMod.cpp 2012-07-06 01:08:16 UTC (rev 3669)
@@ -22,20 +22,67 @@
#include <Rcpp.h> // need to include the main Rcpp header file only
#include "cnpy.h" // (local copy of) header for cnpy library
+template <typename T>
+T transpose(const T & m) { // tranpose for IntegerMatrix / NumericMatrix, see array.c in R
+ int k = m.rows(), n = m.cols();
+ //Rcpp::Rcout << "Transposing " << n << " by " << k << std::endl;
+ T z(n, k);
+ int sz1 = n*k-1;
+ typename T::iterator mit, zit;
+ for (mit = m.begin(), zit = z.begin(); mit != m.end(); mit++, zit += n) {
+ if (zit >= z.end()) zit -= sz1;
+ *zit = *mit;
+ }
+ return(z);
+}
+
+// cf stackoverflow.com/questions/874134
+bool hasEnding(std::string const &full, std::string const &ending) {
+ if (full.length() >= ending.length()) {
+ return(0 == full.compare(full.length() - ending.length(), ending.length(), ending));
+ } else {
+ return false;
+ }
+}
+
Rcpp::RObject npyLoad(const std::string & filename, const std::string & type) {
- cnpy::NpyArray arr = cnpy::npy_load(filename);
+
+ cnpy::NpyArray arr;
+
+ if (hasEnding(filename, ".gz")) {
+ arr = cnpy::npy_gzload(filename);
+ } else {
+ arr = cnpy::npy_load(filename);
+ }
+
std::vector<unsigned int> shape = arr.shape;
- if (shape.size() != 2) Rf_error("Wrong dimension");
SEXP ret = R_NilValue; // allows us to assign either int or numeric
- if (type == "numeric") {
- ret = Rcpp::NumericMatrix(shape[0], shape[1], reinterpret_cast<double*>(arr.data));
- } else if (type == "integer") {
- ret = Rcpp::IntegerMatrix(shape[0], shape[1], reinterpret_cast<int*>(arr.data));
+ if (shape.size() == 1) {
+ if (type == "numeric") {
+ double *p = reinterpret_cast<double*>(arr.data);
+ ret = Rcpp::NumericVector(p, p + shape[0]);
+ } else if (type == "integer") {
+ int *p = reinterpret_cast<int*>(arr.data);
+ ret = Rcpp::IntegerVector(p, p + shape[0]);
+ } else {
+ arr.destruct();
+ REprintf("Unsupported type in npyLoad");
+ }
+ } else if (shape.size() == 2) {
+ if (type == "numeric") {
+ // invert dimension for creation, and then tranpose to correct Fortran-vs-C storage
+ ret = transpose(Rcpp::NumericMatrix(shape[1], shape[0], reinterpret_cast<double*>(arr.data)));
+ } else if (type == "integer") {
+ // invert dimension for creation, and then tranpose to correct Fortran-vs-C storage
+ ret = transpose(Rcpp::IntegerMatrix(shape[1], shape[0], reinterpret_cast<double*>(arr.data)));
+ } else {
+ arr.destruct();
+ REprintf("Unsupported type in npyLoad");
+ }
} else {
+ Rf_error("Unsupported dimension in npyLoad");
arr.destruct();
- REprintf("Unsupported type in npyLoad");
}
- arr.destruct();
return ret;
}
@@ -77,7 +124,7 @@
&npyLoad, // function pointer to helper function defined above
List::create( Named("filename"), // function arguments including default value
Named("type") = "numeric"),
- "read an npy file into a numeric or integer matrix");
+ "read an npy file into a numeric or integer vector or matrix");
function("npySave", // name of the identifier at the R level
&npySave, // function pointer to helper function defined above
More information about the Rcpp-commits
mailing list