[Rcpp-commits] r3668 - in pkg: . RcppCNPy RcppCNPy/R RcppCNPy/inst RcppCNPy/man RcppCNPy/src
noreply at r-forge.r-project.org
noreply at r-forge.r-project.org
Thu Jul 5 04:50:20 CEST 2012
Author: edd
Date: 2012-07-05 04:50:19 +0200 (Thu, 05 Jul 2012)
New Revision: 3668
Added:
pkg/RcppCNPy/
pkg/RcppCNPy/ChangeLog
pkg/RcppCNPy/DESCRIPTION
pkg/RcppCNPy/NAMESPACE
pkg/RcppCNPy/R/
pkg/RcppCNPy/R/cnpy.R
pkg/RcppCNPy/cleanup
pkg/RcppCNPy/inst/
pkg/RcppCNPy/inst/NEWS.Rd
pkg/RcppCNPy/inst/cnpy-LICENSE
pkg/RcppCNPy/man/
pkg/RcppCNPy/man/RcppCNPy-package.Rd
pkg/RcppCNPy/src/
pkg/RcppCNPy/src/Makevars
pkg/RcppCNPy/src/Makevars.win
pkg/RcppCNPy/src/cnpy.cpp
pkg/RcppCNPy/src/cnpy.h
pkg/RcppCNPy/src/cnpyMod.cpp
Log:
o New package to read and write Pythun 'NumPy' npy files
o Currently limited to vectors and matrices of integer or numeric type
o Matrices need a transpose too as NumPy uses Fortran mode
Added: pkg/RcppCNPy/ChangeLog
===================================================================
--- pkg/RcppCNPy/ChangeLog (rev 0)
+++ pkg/RcppCNPy/ChangeLog 2012-07-05 02:50:19 UTC (rev 3668)
@@ -0,0 +1,10 @@
+2012-07-04 Dirk Eddelbuettel <edd at debian.org>
+
+ * Initial version 0.0.1
+
+ * src/cnpy.{cpp,h}: Numerous minor changes to conform to CRAN Policy
+
+ * src/cnpyMod.cpp: Rcpp modules wrapping of two function npyLoad and
+ npySave which work on one- and two-dimensional objects of integer or
+ numeric type. Note that Matrices need a transpose due to numpy
+ Fortran ordering.
Added: pkg/RcppCNPy/DESCRIPTION
===================================================================
--- pkg/RcppCNPy/DESCRIPTION (rev 0)
+++ pkg/RcppCNPy/DESCRIPTION 2012-07-05 02:50:19 UTC (rev 3668)
@@ -0,0 +1,15 @@
+Package: RcppCNPy
+Type: Package
+Title: Rcpp bindings for NumPy files
+Version: 0.0.1
+Date: 2012-07-04
+Author: Dirk Eddelbuettel
+Maintainer: Dirk Eddelbuettel <edd at debian.org>
+Description: This package provides access to the cnpy library by Carl Rogers
+ which provides read and write facilities for files created with (or for) the
+ NumPY extension for Python.
+License: GPL (>= 2)
+LazyLoad: yes
+Depends: methods, Rcpp (>= 0.9.13)
+LinkingTo: Rcpp
+RcppModules: cnpy
Added: pkg/RcppCNPy/NAMESPACE
===================================================================
--- pkg/RcppCNPy/NAMESPACE (rev 0)
+++ pkg/RcppCNPy/NAMESPACE 2012-07-05 02:50:19 UTC (rev 3668)
@@ -0,0 +1,3 @@
+useDynLib(RcppCNPy)
+exportPattern("^[[:alpha:]]+")
+import(Rcpp)
Added: pkg/RcppCNPy/R/cnpy.R
===================================================================
--- pkg/RcppCNPy/R/cnpy.R (rev 0)
+++ pkg/RcppCNPy/R/cnpy.R 2012-07-05 02:50:19 UTC (rev 3668)
@@ -0,0 +1,4 @@
+
+loadModule("cnpy", TRUE)
+
+
Added: pkg/RcppCNPy/cleanup
===================================================================
--- pkg/RcppCNPy/cleanup (rev 0)
+++ pkg/RcppCNPy/cleanup 2012-07-05 02:50:19 UTC (rev 3668)
@@ -0,0 +1,6 @@
+#!/bin/sh
+
+rm -f src/*.o src/*.so
+
+find . -name \*~ -exec rm {} \;
+
Property changes on: pkg/RcppCNPy/cleanup
___________________________________________________________________
Added: svn:executable
+ *
Added: pkg/RcppCNPy/inst/NEWS.Rd
===================================================================
--- pkg/RcppCNPy/inst/NEWS.Rd (rev 0)
+++ pkg/RcppCNPy/inst/NEWS.Rd 2012-07-05 02:50:19 UTC (rev 3668)
@@ -0,0 +1,15 @@
+\name{NEWS}
+\title{News for Package \pkg{RcppCNPy}}
+\newcommand{\cpkg}{\href{http://CRAN.R-project.org/package=#1}{\pkg{#1}}}
+
+\section{Changes in version 0.0.1 (2012-07-04)}{
+ \itemize{
+ \item Initial version, as a straightforward Rcpp modules wrap around
+ the \code{cpny} library by Carl Rogers (on github under a MIY
+ licens).
+ \item At present, \code{npy} files can be read and written for
+ vectors and matrices of either \code{numeric} or \code{integer} type.
+ Note however that matrices are currently \emph{transposed} because
+ of the default Fortran ordering done by numpy.
+ }
+}
Added: pkg/RcppCNPy/inst/cnpy-LICENSE
===================================================================
--- pkg/RcppCNPy/inst/cnpy-LICENSE (rev 0)
+++ pkg/RcppCNPy/inst/cnpy-LICENSE 2012-07-05 02:50:19 UTC (rev 3668)
@@ -0,0 +1,21 @@
+The MIT License
+
+Copyright (c) Carl Rogers, 2011
+
+Permission is hereby granted, free of charge, to any person obtaining a copy
+of this software and associated documentation files (the "Software"), to deal
+in the Software without restriction, including without limitation the rights
+to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+copies of the Software, and to permit persons to whom the Software is
+furnished to do so, subject to the following conditions:
+
+The above copyright notice and this permission notice shall be included in
+all copies or substantial portions of the Software.
+
+THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
+THE SOFTWARE.
Added: pkg/RcppCNPy/man/RcppCNPy-package.Rd
===================================================================
--- pkg/RcppCNPy/man/RcppCNPy-package.Rd (rev 0)
+++ pkg/RcppCNPy/man/RcppCNPy-package.Rd 2012-07-05 02:50:19 UTC (rev 3668)
@@ -0,0 +1,78 @@
+\name{RcppCNPy-package}
+\alias{RcppCNPy-package}
+\alias{RcppCNPy}
+\alias{npyLoad}
+\alias{npySave}
+\docType{package}
+\title{
+ File access to data files written by (or for) NumPy (Numeric Python) modules
+}
+\description{
+ This package provides access to the \code{cnpy} library by Carl Rogers
+ which provides read and write facilities for files created with (or for) the
+ NumPy extension for Python.
+
+ Support is currently still pretty limited to reading and writing of
+ either vectors or matrices of either numeric or integer type.
+
+ \emph{Note that matrices must be transposed first to make up for
+ Fortran ordering.}
+}
+\usage{
+ npyLoad(filename, type="numeric")
+ npySave(filename, object, mode="w")
+}
+\arguments{
+ \item{filename}{string with (path and) filename for a \code{npy} object file}
+ \item{type}{string with type 'numeric' (default) or 'integer'}
+ \item{object}{an R object, currently limited to a vector or matrix of
+ either integer or numeric type}
+ \item{mode}{a one-character string indicating whether files are
+ appended to ("a") or written ("w", the default)}
+}
+\details{
+ \tabular{ll}{
+ Package: \tab RcppCNPy\cr
+ Type: \tab Package\cr
+ Version: \tab 0.0.1\cr
+ Date: \tab 2012-07-04\cr
+ License: \tab What license is it under?\cr
+ LazyLoad: \tab yes\cr
+ }
+
+ The package uses Rcpp modules to provide R bindings \code{npyLoadNM()}
+ and \code{npyLoadIM()} which wrap the \code{npy_load()}
+ function. Currently, only two-dimensional matrices are suppported but
+ this can be extended easily to vectors.
+
+ The following minor changes were made to \code{cnpy}:
+ \itemize{
+ \item the \code{printf(...); abort()} combination was replaced in
+ three instances with \code{REprintf(...)} per CRAN Policy guidelines.
+ \item \code{long long} was commented out in two places (which we can revert once
+ CRAN switches to a new compiler and c++11 becomes standard) and one
+ \code{unsigned long long} was replaced by \code{unsigned long}.
+ \item several unused variables were commented out.
+ }
+
+}
+\author{
+ Dirk Eddelbuettel provide the binding to R (using the Rcpp package).
+
+ Carl Rogers wrote the underlying \code{cnpy} library, which is
+ released under the MIT license.
+
+ Maintainer: Dirk Eddelbuettel <edd at debian.org>
+}
+\references{
+ Rcpp, in particular the Rcpp modules documentation.
+
+ The \code{cnpy} repository: \url{https://github.com/rogersce/cnpy}
+}
+\keyword{package}
+\seealso{
+ \code{\link[Rcpp:Rcpp-package]{Rcpp}}
+}
+\examples{
+ ## TODO
+}
Added: pkg/RcppCNPy/src/Makevars
===================================================================
--- pkg/RcppCNPy/src/Makevars (rev 0)
+++ pkg/RcppCNPy/src/Makevars 2012-07-05 02:50:19 UTC (rev 3668)
@@ -0,0 +1,27 @@
+## Use the R_HOME indirection to support installations of multiple R version
+PKG_LIBS = `$(R_HOME)/bin/Rscript -e "Rcpp:::LdFlags()"`
+
+## As an alternative, one can also add this code in a file 'configure'
+##
+## PKG_LIBS=`${R_HOME}/bin/Rscript -e "Rcpp:::LdFlags()"`
+##
+## sed -e "s|@PKG_LIBS@|${PKG_LIBS}|" \
+## src/Makevars.in > src/Makevars
+##
+## which together with the following file 'src/Makevars.in'
+##
+## PKG_LIBS = @PKG_LIBS@
+##
+## can be used to create src/Makevars dynamically. This scheme is more
+## powerful and can be expanded to also check for and link with other
+## libraries. It should be complemented by a file 'cleanup'
+##
+## rm src/Makevars
+##
+## which removes the autogenerated file src/Makevars.
+##
+## Of course, autoconf can also be used to write configure files. This is
+## done by a number of packages, but recommended only for more advanced users
+## comfortable with autoconf and its related tools.
+
+
Added: pkg/RcppCNPy/src/Makevars.win
===================================================================
--- pkg/RcppCNPy/src/Makevars.win (rev 0)
+++ pkg/RcppCNPy/src/Makevars.win 2012-07-05 02:50:19 UTC (rev 3668)
@@ -0,0 +1,3 @@
+
+## Use the R_HOME indirection to support installations of multiple R version
+PKG_LIBS = $(shell "${R_HOME}/bin${R_ARCH_BIN}/Rscript.exe" -e "Rcpp:::LdFlags()")
Added: pkg/RcppCNPy/src/cnpy.cpp
===================================================================
--- pkg/RcppCNPy/src/cnpy.cpp (rev 0)
+++ pkg/RcppCNPy/src/cnpy.cpp 2012-07-05 02:50:19 UTC (rev 3668)
@@ -0,0 +1,229 @@
+//Copyright (C) 2011 Carl Rogers
+//Released under MIT License
+//license available in LICENSE file, or at http://www.opensource.org/licenses/mit-license.php
+
+#include"cnpy.h"
+#include<complex>
+#include<cstdlib>
+#include<algorithm>
+#include<cstring>
+#include<iomanip>
+
+char cnpy::BigEndianTest() {
+ unsigned char x[] = {1,0};
+ short y = *(short*) x;
+ return y == 1 ? '<' : '>';
+}
+
+char cnpy::map_type(const std::type_info& t)
+{
+ if(t == typeid(float) ) return 'f';
+ if(t == typeid(double) ) return 'f';
+ if(t == typeid(long double) ) return 'f';
+
+ if(t == typeid(int) ) return 'i';
+ if(t == typeid(char) ) return 'i';
+ if(t == typeid(short) ) return 'i';
+ if(t == typeid(long) ) return 'i';
+ //if(t == typeid(long long) ) return 'i';
+
+ if(t == typeid(unsigned char) ) return 'u';
+ if(t == typeid(unsigned short) ) return 'u';
+ if(t == typeid(unsigned long) ) return 'u';
+ //if(t == typeid(unsigned long long) ) return 'u';
+ if(t == typeid(unsigned int) ) return 'u';
+
+ if(t == typeid(bool) ) return 'b';
+
+ if(t == typeid(std::complex<float>) ) return 'c';
+ if(t == typeid(std::complex<double>) ) return 'c';
+ if(t == typeid(std::complex<long double>) ) return 'c';
+
+ else return '?';
+}
+
+template<> std::vector<char>& cnpy::operator+=(std::vector<char>& lhs, const std::string rhs) {
+ lhs.insert(lhs.end(),rhs.begin(),rhs.end());
+ return lhs;
+}
+
+template<> std::vector<char>& cnpy::operator+=(std::vector<char>& lhs, const char* rhs) {
+ //write in little endian
+ unsigned char len = strlen(rhs);
+ for(unsigned char byte = 0; byte < len; byte++) {
+ lhs.push_back(rhs[byte]);
+ }
+ return lhs;
+}
+
+void cnpy::parse_npy_header(FILE* fp, unsigned int& word_size, unsigned int*& shape, unsigned int& ndims) {
+ char buffer[256];
+ if (fread(buffer,sizeof(char),11,fp) != 11) REprintf("cnpy::parse_npy_header read discprepancy");
+ std::string header = fgets(buffer,256,fp);
+ Rassert(header[header.size()-1] == '\n', "header ended improperly");
+
+ int loc1, loc2;
+
+ //fortran order
+ loc1 = header.find("fortran_order")+16;
+ bool fortran_order = (header.substr(loc1,5) == "True" ? true : false);
+ Rassert(!fortran_order, "fortran_order error");
+
+ //shape
+ loc1 = header.find("(");
+ loc2 = header.find(")");
+ std::string str_shape = header.substr(loc1+1,loc2-loc1-1);
+ if(str_shape[str_shape.size()-1] == ',') ndims = 1;
+ else ndims = std::count(str_shape.begin(),str_shape.end(),',')+1;
+ shape = new unsigned int[ndims];
+ for(unsigned int i = 0;i < ndims;i++) {
+ loc1 = str_shape.find(",");
+ shape[i] = atoi(str_shape.substr(0,loc1).c_str());
+ str_shape = str_shape.substr(loc1+1);
+ }
+
+ //endian, word size, data type
+ loc1 = header.find("descr")+9;
+ bool littleEndian = (header[loc1] == '<' ? true : false);
+ Rassert(littleEndian, "littleEndian error");
+
+ //char type = header[loc1+1];
+ //assert(type == map_type(T);
+
+ std::string str_ws = header.substr(loc1+2);
+ loc2 = str_ws.find("'");
+ word_size = atoi(str_ws.substr(0,loc2).c_str());
+}
+
+void cnpy::parse_zip_footer(FILE* fp, unsigned short& nrecs, unsigned int& global_header_size, unsigned int& global_header_offset)
+{
+ std::vector<char> footer(22);
+ fseek(fp,-22,SEEK_END);
+ if (fread(&footer[0],sizeof(char),22,fp) != 22) REprintf("cnpy::parse_zip_footer read discprepancy");
+
+ unsigned short disk_no, disk_start, nrecs_on_disk, comment_len;
+ disk_no = *(unsigned short*) &footer[4];
+ disk_start = *(unsigned short*) &footer[6];
+ nrecs_on_disk = *(unsigned short*) &footer[8];
+ nrecs = *(unsigned short*) &footer[10];
+ global_header_size = *(unsigned int*) &footer[12];
+ global_header_offset = *(unsigned int*) &footer[16];
+ comment_len = *(unsigned short*) &footer[20];
+
+ Rassert(disk_no == 0, "disk_no is != 0");
+ Rassert(disk_start == 0, "disk_start != 0");
+ Rassert(nrecs_on_disk == nrecs, "nrecs_on_disk != nrecs");
+ Rassert(comment_len == 0, "comment_len != 0");
+}
+
+cnpy::NpyArray load_the_npy_file(FILE* fp) {
+ unsigned int* shape;
+ unsigned int ndims, word_size;
+ cnpy::parse_npy_header(fp,word_size,shape,ndims);
+ //unsigned long long size = 1; //long long so no overflow when multiplying by word_size
+ unsigned long size = 1; //long long so no overflow when multiplying by word_size
+ for(unsigned int i = 0;i < ndims;i++) size *= shape[i];
+
+ cnpy::NpyArray arr;
+ arr.word_size = word_size;
+ arr.shape = std::vector<unsigned int>(shape,shape+ndims);
+ arr.data = new char[size*word_size];
+ //int nread = fread(arr.data,word_size,size,fp);
+ if (fread(arr.data,word_size,size,fp) != size) REprintf("cnpy::load_the_npy_file read size discrepancy");
+ return arr;
+}
+
+cnpy::npz_t cnpy::npz_load(std::string fname) {
+ FILE* fp = fopen(fname.c_str(),"rb");
+
+ if(!fp) REprintf("npz_load: Error! Unable to open file %s!\n",fname.c_str());
+ Rassert(fp, "fp error");
+
+ cnpy::npz_t arrays;
+
+ while(1) {
+ std::vector<char> local_header(30);
+ if (fread(&local_header[0],sizeof(char),30,fp) != 30) REprintf("cnpy::npz_load read discprepancy on header");
+
+ //if we've reached the global header, stop reading
+ if(local_header[2] != 0x03 || local_header[3] != 0x04) break;
+
+ //read in the variable name
+ unsigned short name_len = *(unsigned short*) &local_header[26];
+ std::string varname(name_len,' ');
+ if (fread(&varname[0],sizeof(char),name_len,fp) != name_len) REprintf("cnpy::npz_load read discprepancy on name_len");
+
+ //erase the lagging .npy
+ varname.erase(varname.end()-4,varname.end());
+
+ //read in the extra field
+ unsigned short extra_field_len = *(unsigned short*) &local_header[28];
+ if(extra_field_len > 0) {
+ std::vector<char> buff(extra_field_len);
+ if (fread(&buff[0],sizeof(char),extra_field_len,fp) != extra_field_len) REprintf("cnpy::npz_load read discprepancy on extra_field_len");
+ }
+
+ arrays[varname] = load_the_npy_file(fp);
+ }
+
+ fclose(fp);
+ return arrays;
+}
+
+cnpy::NpyArray cnpy::npz_load(std::string fname, std::string varname) {
+ FILE* fp = fopen(fname.c_str(),"rb");
+
+ if(!fp) {
+ REprintf("npz_load: Error! Unable to open file %s!\n",fname.c_str());
+ }
+
+ while(1) {
+ std::vector<char> local_header(30);
+ if (fread(&local_header[0],sizeof(char),30,fp) != 30) REprintf("cnpy::npz_load read discprepancy on header");
+
+ //if we've reached the global header, stop reading
+ if(local_header[2] != 0x03 || local_header[3] != 0x04) break;
+
+ //read in the variable name
+ unsigned short name_len = *(unsigned short*) &local_header[26];
+ std::string vname(name_len,' ');
+ if (fread(&vname[0],sizeof(char),name_len,fp) != name_len) REprintf("cnpy::npz_load read discprepancy on name_len");
+ vname.erase(vname.end()-4,vname.end()); //erase the lagging .npy
+
+ //read in the extra field
+ unsigned short extra_field_len = *(unsigned short*) &local_header[28];
+ fseek(fp,extra_field_len,SEEK_CUR); //skip past the extra field
+
+ if(vname == varname) {
+ NpyArray array = load_the_npy_file(fp);
+ fclose(fp);
+ return array;
+ }
+ else {
+ //skip past the data
+ unsigned int size = *(unsigned int*) &local_header[22];
+ fseek(fp,size,SEEK_CUR);
+ }
+ }
+
+ fclose(fp);
+ REprintf("npz_load: Error! Variable name %s not found in %s!\n",varname.c_str(),fname.c_str());
+ // never reached -- not satisfying -Wall -pedantic
+}
+
+cnpy::NpyArray cnpy::npy_load(std::string fname) {
+
+ FILE* fp = fopen(fname.c_str(), "rb");
+
+ if(!fp) {
+ REprintf("npy_load: Error! Unable to open file %s!\n",fname.c_str());
+ }
+
+ NpyArray arr = load_the_npy_file(fp);
+
+ fclose(fp);
+ return arr;
+}
+
+
+
Added: pkg/RcppCNPy/src/cnpy.h
===================================================================
--- pkg/RcppCNPy/src/cnpy.h (rev 0)
+++ pkg/RcppCNPy/src/cnpy.h 2012-07-05 02:50:19 UTC (rev 3668)
@@ -0,0 +1,240 @@
+//Copyright (C) 2011 Carl Rogers
+//Released under MIT License
+//license available in LICENSE file, or at http://www.opensource.org/licenses/mit-license.php
+
+#ifndef LIBCNPY_H_
+#define LIBCNPY_H_
+
+#include<string>
+#include<sstream>
+#include<vector>
+#include<cstdio>
+#include<typeinfo>
+#include<iostream>
+#include<cassert>
+#include<zlib.h>
+#include<map>
+
+#include <R_ext/Print.h> // for REprintf
+
+namespace cnpy {
+
+ inline void Rassert(bool val, std::string txt) {
+ if ( ! val) REprintf(txt.c_str());
+ }
+
+ struct NpyArray {
+ char* data;
+ std::vector<unsigned int> shape;
+ unsigned int word_size;
+ void destruct() {delete[] data;}
+ };
+
+ struct npz_t : public std::map<std::string, NpyArray>
+ {
+ void destruct()
+ {
+ npz_t::iterator it = this->begin();
+ for(; it != this->end(); ++it) (*it).second.destruct();
+ }
+ };
+
+ char BigEndianTest();
+ char map_type(const std::type_info& t);
+ template<typename T> std::vector<char> create_npy_header(const T* data, const unsigned int* shape, const unsigned int ndims);
+ void parse_npy_header(FILE* fp,unsigned int& word_size, unsigned int*& shape, unsigned int& ndims);
+ void parse_zip_footer(FILE* fp, unsigned short& nrecs, unsigned int& global_header_size, unsigned int& global_header_offset);
+ npz_t npz_load(std::string fname);
+ NpyArray npz_load(std::string fname, std::string varname);
+ NpyArray npy_load(std::string fname);
+
+ template<typename T> std::vector<char>& operator+=(std::vector<char>& lhs, const T rhs) {
+ //write in little endian
+ for(unsigned char byte = 0; byte < sizeof(T); byte++) {
+ char val = *((char*)&rhs+byte);
+ lhs.push_back(val);
+ }
+ return lhs;
+ }
+
+ template<> std::vector<char>& operator+=(std::vector<char>& lhs, const std::string rhs);
+ template<> std::vector<char>& operator+=(std::vector<char>& lhs, const char* rhs);
+
+
+ template<typename T> std::string tostring(T i, int pad = 0, char padval = ' ') {
+ std::stringstream s;
+ s << i;
+ return s.str();
+ }
+
+ template<typename T> void npy_save(std::string fname, const T* data, const unsigned int* shape, const unsigned int ndims, std::string mode = "w") {
+ FILE* fp = NULL;
+
+ if(mode == "a") fp = fopen(fname.c_str(),"r+b");
+
+ if(fp) {
+ //file exists. we need to append to it. read the header, modify the array size
+ unsigned int word_size, tmp_dims;
+ unsigned int* tmp_shape = 0;
+ parse_npy_header(fp,word_size,tmp_shape,tmp_dims);
+
+ if(word_size != sizeof(T)) {
+ REprintf("cnpy error: %s has word size %d but npy_save appending data sized %d\n", fname.c_str(), word_size, sizeof(T));
+ //assert( word_size == sizeof(T) );
+ }
+ if(tmp_dims != ndims) {
+ REprintf("cnpy error: npy_save attempting to append misdimensioned data to %s\n", fname.c_str());
+ //assert(tmp_dims == ndims);
+ }
+
+ for(unsigned int i = 1; i < ndims; i++) {
+ if(shape[i] != tmp_shape[i]) {
+ REprintf("cnpy error: npy_save attempting to append misshaped data to %s\n", fname.c_str());
+ //assert(shape[i] == tmp_shape[i]);
+ }
+ }
+ tmp_shape[0] += shape[0];
+
+ fseek(fp,0,SEEK_SET);
+ std::vector<char> header = create_npy_header(data,tmp_shape,ndims);
+ fwrite(&header[0],sizeof(char),header.size(),fp);
+ fseek(fp,0,SEEK_END);
+
+ delete[] tmp_shape;
+ }
+ else {
+ fp = fopen(fname.c_str(),"wb");
+ std::vector<char> header = create_npy_header(data,shape,ndims);
+ fwrite(&header[0],sizeof(char),header.size(),fp);
+ }
+
+ unsigned int nels = 1;
+ for(unsigned int i = 0;i < ndims;i++) nels *= shape[i];
+
+ fwrite(data,sizeof(T),nels,fp);
+ fclose(fp);
+ }
+
+ template<typename T> void npz_save(std::string zipname, std::string fname, const T* data, const unsigned int* shape, const unsigned int ndims, std::string mode = "w")
+ {
+ //first, append a .npy to the fname
+ fname += ".npy";
+
+ //now, on with the show
+ FILE* fp = NULL;
+ unsigned short nrecs = 0;
+ unsigned int global_header_offset = 0;
+ std::vector<char> global_header;
+
+ if(mode == "a") fp = fopen(zipname.c_str(),"r+b");
+
+ if(fp) {
+ //zip file exists. we need to add a new npy file to it.
+ //first read the footer. this gives us the offset and size of the global header
+ //then read and store the global header.
+ //below, we will write the the new data at the start of the global header then append the global header and footer below it
+ unsigned int global_header_size;
+ parse_zip_footer(fp,nrecs,global_header_size,global_header_offset);
+ fseek(fp,global_header_offset,SEEK_SET);
+ global_header.resize(global_header_size);
+ fread(&global_header[0],sizeof(char),global_header_size,fp);
+ fseek(fp,global_header_offset,SEEK_SET);
+ }
+ else {
+ fp = fopen(zipname.c_str(),"wb");
+ }
+
+ std::vector<char> npy_header = create_npy_header(data,shape,ndims);
+
+ unsigned long nels = 1;
+ for (unsigned int m=0; m<ndims; m++ ) nels *= shape[m];
+ int nbytes = nels*sizeof(T) + npy_header.size();
+
+ //get the CRC of the data to be added
+ unsigned int crc = crc32(0L,(unsigned char*)&npy_header[0],npy_header.size());
+ crc = crc32(crc,(unsigned char*)data,nels*sizeof(T));
+
+ //build the local header
+ std::vector<char> local_header;
+ local_header += "PK"; //first part of sig
+ local_header += (unsigned short) 0x0403; //second part of sig
+ local_header += (unsigned short) 20; //min version to extract
+ local_header += (unsigned short) 0; //general purpose bit flag
+ local_header += (unsigned short) 0; //compression method
+ local_header += (unsigned short) 0; //file last mod time
+ local_header += (unsigned short) 0; //file last mod date
+ local_header += (unsigned int) crc; //crc
+ local_header += (unsigned int) nbytes; //compressed size
+ local_header += (unsigned int) nbytes; //uncompressed size
+ local_header += (unsigned short) fname.size(); //fname length
+ local_header += (unsigned short) 0; //extra field length
+ local_header += fname;
+
+ //build global header
+ global_header += "PK"; //first part of sig
+ global_header += (unsigned short) 0x0201; //second part of sig
+ global_header += (unsigned short) 20; //version made by
+ global_header.insert(global_header.end(),local_header.begin()+4,local_header.begin()+30);
+ global_header += (unsigned short) 0; //file comment length
+ global_header += (unsigned short) 0; //disk number where file starts
+ global_header += (unsigned short) 0; //internal file attributes
+ global_header += (unsigned int) 0; //external file attributes
+ global_header += (unsigned int) global_header_offset; //relative offset of local file header, since it begins where the global header used to begin
+ global_header += fname;
+
+ //build footer
+ std::vector<char> footer;
+ footer += "PK"; //first part of sig
+ footer += (unsigned short) 0x0605; //second part of sig
+ footer += (unsigned short) 0; //number of this disk
+ footer += (unsigned short) 0; //disk where footer starts
+ footer += (unsigned short) (nrecs+1); //number of records on this disk
+ footer += (unsigned short) (nrecs+1); //total number of records
+ footer += (unsigned int) global_header.size(); //nbytes of global headers
+ footer += (unsigned int) (global_header_offset + nbytes + local_header.size()); //offset of start of global headers, since global header now starts after newly written array
+ footer += (unsigned short) 0; //zip file comment length
+
+ //write everything
+ fwrite(&local_header[0],sizeof(char),local_header.size(),fp);
+ fwrite(&npy_header[0],sizeof(char),npy_header.size(),fp);
+ fwrite(data,sizeof(T),nels,fp);
+ fwrite(&global_header[0],sizeof(char),global_header.size(),fp);
+ fwrite(&footer[0],sizeof(char),footer.size(),fp);
+ fclose(fp);
+ }
+
+ template<typename T> std::vector<char> create_npy_header(const T* data, const unsigned int* shape, const unsigned int ndims) {
+
+ std::vector<char> dict;
+ dict += "{'descr': '";
+ dict += BigEndianTest();
+ dict += map_type(typeid(T));
+ dict += tostring(sizeof(T));
+ dict += "', 'fortran_order': False, 'shape': (";
+ dict += tostring(shape[0]);
+ for(unsigned int i = 1;i < ndims;i++) {
+ dict += ", ";
+ dict += tostring(shape[i]);
+ }
+ if(ndims == 1) dict += ",";
+ dict += "), }";
+ //pad with spaces so that preamble+dict is modulo 16 bytes. preamble is 10 bytes. dict needs to end with \n
+ int remainder = 16 - (10 + dict.size()) % 16;
+ dict.insert(dict.end(),remainder,' ');
+ dict.back() = '\n';
+
+ std::vector<char> header;
+ header += (char) 0x93;
+ header += "NUMPY";
+ header += (char) 0x01; //major version of numpy format
+ header += (char) 0x00; //minor version of numpy format
+ header += (unsigned short) dict.size();
+ header.insert(header.end(),dict.begin(),dict.end());
+
+ return header;
+ }
+
+
+}
+
+#endif
Added: pkg/RcppCNPy/src/cnpyMod.cpp
===================================================================
--- pkg/RcppCNPy/src/cnpyMod.cpp (rev 0)
+++ pkg/RcppCNPy/src/cnpyMod.cpp 2012-07-05 02:50:19 UTC (rev 3668)
@@ -0,0 +1,89 @@
+// -*- mode: C++; c-indent-level: 4; c-basic-offset: 4; indent-tabs-mode: nil; -*-
+//
+// cnpyMod.cpp: Rcpp R/C++ modules interface to cnpy
+//
+// Copyright (C) 2012 Dirk Eddelbuettel
+//
+// This file is part of RcppCNPy.
+//
+// RcppCNPy is free software: you can redistribute it and/or modify it
+// under the terms of the GNU General Public License as published by
+// the Free Software Foundation, either version 2 of the License, or
+// (at your option) any later version.
+//
+// RcppCNPy is distributed in the hope that it will be useful, but
+// WITHOUT ANY WARRANTY; without even the implied warranty of
+// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+// GNU General Public License for more details.
+//
+// You should have received a copy of the GNU General Public License
+// along with RcppCNPy. If not, see <http://www.gnu.org/licenses/>.
+
+#include <Rcpp.h> // need to include the main Rcpp header file only
+#include "cnpy.h" // (local copy of) header for cnpy library
+
+Rcpp::RObject npyLoad(const std::string & filename, const std::string & type) {
+ cnpy::NpyArray arr = cnpy::npy_load(filename);
+ std::vector<unsigned int> shape = arr.shape;
+ if (shape.size() != 2) Rf_error("Wrong dimension");
+ SEXP ret = R_NilValue; // allows us to assign either int or numeric
+ if (type == "numeric") {
+ ret = Rcpp::NumericMatrix(shape[0], shape[1], reinterpret_cast<double*>(arr.data));
+ } else if (type == "integer") {
+ ret = Rcpp::IntegerMatrix(shape[0], shape[1], reinterpret_cast<int*>(arr.data));
+ } else {
+ arr.destruct();
+ REprintf("Unsupported type in npyLoad");
+ }
+ arr.destruct();
+ return ret;
+}
+
+void npySave(std::string filename, Rcpp::RObject x, std::string mode) {
+ if (::Rf_isMatrix(x)) {
+ if (::Rf_isInteger(x)) {
+ Rcpp::IntegerMatrix mat(x);
+ std::vector<unsigned int> shape = Rcpp::as<std::vector<unsigned int> >(Rcpp::IntegerVector::create(mat.nrow(), mat.ncol()));
+ cnpy::npy_save(filename, mat.begin(), &(shape[0]), 2, mode);
+ } else if (::Rf_isNumeric(x)) {
+ Rcpp::NumericMatrix mat(x);
+ std::vector<unsigned int> shape = Rcpp::as<std::vector<unsigned int> >(Rcpp::IntegerVector::create(mat.nrow(), mat.ncol()));
+ cnpy::npy_save(filename, mat.begin(), &(shape[0]), 2, mode);
+ } else {
+ REprintf("Unsupported matrix type\n");
+ }
+ } else if (::Rf_isVector(x)) {
+ if (::Rf_isInteger(x)) {
+ Rcpp::IntegerVector vec(x);
+ std::vector<unsigned int> shape = Rcpp::as<std::vector<unsigned int> >(Rcpp::IntegerVector::create(vec.length()));
+ cnpy::npy_save(filename, vec.begin(), &(shape[0]), 1, mode);
+ } else if (::Rf_isNumeric(x)) {
+ Rcpp::NumericVector vec(x);
+ std::vector<unsigned int> shape = Rcpp::as<std::vector<unsigned int> >(Rcpp::IntegerVector::create(vec.length()));
+ cnpy::npy_save(filename, vec.begin(), &(shape[0]), 1, mode);
+ } else {
+ REprintf("Unsupported vector type\n");
+ }
+ } else {
+ REprintf("Unsupported type\n");
+ }
+}
+
+RCPP_MODULE(cnpy){
+
+ using namespace Rcpp;
+
+ function("npyLoad", // name of the identifier at the R level
+ &npyLoad, // function pointer to helper function defined above
+ List::create( Named("filename"), // function arguments including default value
+ Named("type") = "numeric"),
+ "read an npy file into a numeric or integer matrix");
+
+ function("npySave", // name of the identifier at the R level
+ &npySave, // function pointer to helper function defined above
+ List::create( Named("filename"), // function arguments including default value
+ Named("object"),
+ Named("mode") = "w"),
+ "save an R object (vector or matrix of type integer or numeric) to an npy file");
+
+}
More information about the Rcpp-commits
mailing list