[Dplr-commits] r730 - in pkg/dplR: . R inst/unitTests src

noreply at r-forge.r-project.org noreply at r-forge.r-project.org
Sun Jan 19 21:00:33 CET 2014


Author: mvkorpel
Date: 2014-01-19 21:00:33 +0100 (Sun, 19 Jan 2014)
New Revision: 730

Added:
   pkg/dplR/src/dplR.c
Modified:
   pkg/dplR/ChangeLog
   pkg/dplR/DESCRIPTION
   pkg/dplR/R/exactmean.R
   pkg/dplR/R/gini.coef.R
   pkg/dplR/R/read.tucson.R
   pkg/dplR/R/sens1.R
   pkg/dplR/R/sens2.R
   pkg/dplR/R/tbrm.R
   pkg/dplR/inst/unitTests/runit.dplR.R
   pkg/dplR/inst/unitTests/runit.io.R
   pkg/dplR/src/dplR.h
   pkg/dplR/src/exactmean.c
   pkg/dplR/src/exactsum.c
   pkg/dplR/src/exactsum.h
   pkg/dplR/src/gini.c
   pkg/dplR/src/rcompact.c
   pkg/dplR/src/readloop.c
   pkg/dplR/src/redfit.c
   pkg/dplR/src/sens.c
   pkg/dplR/src/tbrm.c
Log:
- First changes for future dplR version 1.5.9
- Using .Call() instead of .C() when interfacing with C code
- Fixed a probably unnoticeable bug in the C code called by read.tucson()
- More efficient computation of the gini coefficient
- Some reorganization of the C code / headers
- More RUnit tests for gini.coef(), also one more for read.tucson()


Modified: pkg/dplR/ChangeLog
===================================================================
--- pkg/dplR/ChangeLog	2014-01-16 12:22:26 UTC (rev 729)
+++ pkg/dplR/ChangeLog	2014-01-19 20:00:33 UTC (rev 730)
@@ -1,3 +1,48 @@
+* CHANGES IN dplR VERSION 1.5.9
+
+Files: dplR.h, rcompact.c, redfit.c
+-----------------------------------
+
+- Restructured header inclusions
+- dplR.h declares new function dplRlength (internal use)
+
+File: dplR.c
+------------
+
+- New file for general C functions internally used by dplR
+
+Files: exactmean.c, exactmean.R, gini.c, gini.coef.R,
+readloop.c, read.tucson.R, sens.c, sens1.R, sens2.R, tbrm.c, tbrm.R
+-------------------------------------------------------------------
+
+- Use .Call() instead of .C() for interfacing with C code
+- The C functions need less support (arguments, argument checking) from
+  the calling R function
+
+Files: exactsum.c, exactsum.h
+-----------------------------
+
+- Reuse of code between function by using the C preprocessor
+- New (internal to dplR C code) function cumsum (cumulative sum,
+  overwrites input array)
+- size_t n
+
+File: gini.c
+------------
+
+- Simplified expression for gini coefficent reduces number of
+  arithmetic operations performed
+- Fewer calls to other functions (use of the new cumulative sum function)
+
+File: readloop.c
+----------------
+
+- Fixed buggy handling of malformed Tucson format series that have
+  no data and no stop marker. Previously, this hypothetical case
+  could have caused a read from a location just outside the array
+  bounds. However, in most cases that would probably not have been a
+  problem.
+
 * CHANGES IN dplR VERSION 1.5.8
 
 File: tbrm.Rd

Modified: pkg/dplR/DESCRIPTION
===================================================================
--- pkg/dplR/DESCRIPTION	2014-01-16 12:22:26 UTC (rev 729)
+++ pkg/dplR/DESCRIPTION	2014-01-19 20:00:33 UTC (rev 730)
@@ -2,8 +2,8 @@
 Package: dplR
 Type: Package
 Title: Dendrochronology Program Library in R
-Version: 1.5.8
-Date: 2014-01-15
+Version: 1.5.9
+Date: 2014-01-19
 Authors at R: c(person("Andy", "Bunn", role = c("aut", "cph",
         "cre", "trl"), email = "andy.bunn at wwu.edu"), person("Mikko",
         "Korpela", role = c("aut", "trl")), person("Franco", "Biondi",

Modified: pkg/dplR/R/exactmean.R
===================================================================
--- pkg/dplR/R/exactmean.R	2014-01-16 12:22:26 UTC (rev 729)
+++ pkg/dplR/R/exactmean.R	2014-01-19 20:00:33 UTC (rev 730)
@@ -1,9 +1,5 @@
 `exactmean` <- function(x)
 {
     ## Drops NA and NaN values!
-    y <- as.double(x[!is.na(x)])
-    n <- as.integer(length(y))
-    stopifnot(!is.na(n))
-    .C(dplR.mean,
-       y, n, result=NaN*double(1L), NAOK=TRUE, DUP=FALSE)$result
+    .Call(dplR.mean, as.double(x[!is.na(x)]))
 }

Modified: pkg/dplR/R/gini.coef.R
===================================================================
--- pkg/dplR/R/gini.coef.R	2014-01-16 12:22:26 UTC (rev 729)
+++ pkg/dplR/R/gini.coef.R	2014-01-19 20:00:33 UTC (rev 730)
@@ -1,8 +1,4 @@
 `gini.coef` <- function(x)
 {
-    y <- as.double(x[!is.na(x)])
-    n <- as.integer(length(y))
-    stopifnot(!is.na(n))
-    .C(dplR.gini,
-       y, n, result=double(1L), NAOK=TRUE, DUP=FALSE)$result
+    .Call(dplR.gini, as.double(x[!is.na(x)]))
 }

Modified: pkg/dplR/R/read.tucson.R
===================================================================
--- pkg/dplR/R/read.tucson.R	2014-01-16 12:22:26 UTC (rev 729)
+++ pkg/dplR/R/read.tucson.R	2014-01-19 20:00:33 UTC (rev 730)
@@ -305,20 +305,30 @@
     }
     series.ids <- unique(series)
     nseries <- length(series.ids)
-    series.index <- match(series, series.ids)
+    ## At this time match does not support long vectors in the second
+    ## argument and always returns integers, but let's check the
+    ## result anyway.
+    series.index <- tryCatch(as.integer(match(series, series.ids)),
+                             warning = conditionMessage,
+                             error = conditionMessage)
+    if (!is.integer(series.index)) {
+        stop(gettextf("series.index must be integer: %s",
+                      paste(as.character(series.index), collapse = ", "),
+                      domain = "R-dplR"))
+    }
     extra.col <- dat[[13]]
-    min.year <- min(decade.yr)
-    max.year <- ((max(decade.yr)+10) %/% 10) * 10
-    span <- max.year - min.year + 1
-    rw.vec <- NA*vector(mode="numeric", length=nseries*span)
-    scratch <- rep.int(as.integer(min.year-1), nseries)
-    prec.rproc <- rep.int(as.integer(1), nseries)
 
-    .C(rwl.readloop, series.index, decade.yr, as.vector(x),
-       nrow(x), ncol(x), as.integer(min.year), rw.vec,
-       as.integer(span), as.integer(nseries), scratch, prec.rproc,
-       NAOK=TRUE, DUP=FALSE)
-    rw.mat <- matrix(rw.vec, ncol=nseries, nrow=span)
+    res <- .Call(rwl.readloop, series.index, decade.yr, x)
+    rw.mat <- res[[1]]
+    min.year <- res[[2]]
+    prec.rproc <- res[[3]]
+    span <- nrow(rw.mat)
+    if (span == 0) {
+        rw.df <- as.data.frame(rw.mat)
+        names(rw.df) <- as.character(series.ids)
+        return(rw.df)
+    }
+    max.year <- min.year + (span - 1)
     rownames(rw.mat) <- min.year:max.year
 
     ## The operations in the loop depend on the precision of each series.

Modified: pkg/dplR/R/sens1.R
===================================================================
--- pkg/dplR/R/sens1.R	2014-01-16 12:22:26 UTC (rev 729)
+++ pkg/dplR/R/sens1.R	2014-01-19 20:00:33 UTC (rev 730)
@@ -1,8 +1,4 @@
 `sens1` <- function(x)
 {
-    y <- as.double(x[!is.na(x)])
-    n <- as.integer(length(y))
-    stopifnot(!is.na(n))
-    .C(dplR.sens1,
-       y, n, result=NaN*double(1L), NAOK=TRUE, DUP=FALSE)$result
+    .Call(dplR.sens1, as.double(x[!is.na(x)]))
 }

Modified: pkg/dplR/R/sens2.R
===================================================================
--- pkg/dplR/R/sens2.R	2014-01-16 12:22:26 UTC (rev 729)
+++ pkg/dplR/R/sens2.R	2014-01-19 20:00:33 UTC (rev 730)
@@ -1,8 +1,4 @@
 `sens2` <- function(x)
 {
-    y <- as.double(x[!is.na(x)])
-    n <- as.integer(length(y))
-    stopifnot(!is.na(n))
-    .C(dplR.sens2,
-       y, n, result=NaN*double(1L), NAOK=TRUE, DUP=FALSE)$result
+    .Call(dplR.sens2, as.double(x[!is.na(x)]))
 }

Modified: pkg/dplR/R/tbrm.R
===================================================================
--- pkg/dplR/R/tbrm.R	2014-01-16 12:22:26 UTC (rev 729)
+++ pkg/dplR/R/tbrm.R	2014-01-19 20:00:33 UTC (rev 730)
@@ -1,9 +1,4 @@
 `tbrm` <- function(x, C=9)
 {
-    y <- as.double(x[!is.na(x)])
-    n <- as.integer(length(y))
-    C2 <- as.double(C)
-    stopifnot(!is.na(n), length(C2) == 1)
-    .C(dplR.tbrm, y, n, C2, result = double(1L), NAOK = TRUE,
-       DUP = FALSE)$result
+    .Call(dplR.tbrm, as.double(x[!is.na(x)]), C)
 }

Modified: pkg/dplR/inst/unitTests/runit.dplR.R
===================================================================
--- pkg/dplR/inst/unitTests/runit.dplR.R	2014-01-16 12:22:26 UTC (rev 729)
+++ pkg/dplR/inst/unitTests/runit.dplR.R	2014-01-19 20:00:33 UTC (rev 730)
@@ -461,11 +461,34 @@
 
 test.gini.coef <- function() {
     ## Setup
-    SAMP.SIZE <- 1000
+    MAX.SIZE <- 1000
+    NTIMES <- 10
+    samp <- sample(seq.int(2, MAX.SIZE), max(0, min(NTIMES, MAX.SIZE - 1)))
     ## Test
-    checkEquals(0, gini.coef(rep(42, SAMP.SIZE)),
+    coefs <- vapply(samp,
+                    function(x) {
+                        foo <- numeric(x)
+                        n <- sample(x - 1, 1)
+                        nonzeros <- sample(x, n)
+                        val <- runif(1, 1, 100)
+
+                        foo[nonzeros[1]] <- val
+                        a <- gini.coef(foo)
+
+                        foo[nonzeros] <- val
+                        b <- gini.coef(foo)
+
+                        foo[] <- val
+                        c <- gini.coef(foo)
+
+                        c(a, b, c, n)
+                    }, numeric(4))
+    checkEquals(1 - 1 / samp, coefs[1, ],
+                msg="Winner takes all: 1 - 1/n")
+    checkEquals(1 - coefs[4, ] / samp, coefs[2, ],
+                msg="k (random) equal winners, others get 0: 1 - k/n")
+    checkEquals(numeric(length(samp)), coefs[3, ],
                 msg="Gini coefficient of a set with total equality is 0")
-    ## Needs more tests
 }
 
 test.glk <- function() {

Modified: pkg/dplR/inst/unitTests/runit.io.R
===================================================================
--- pkg/dplR/inst/unitTests/runit.io.R	2014-01-16 12:22:26 UTC (rev 729)
+++ pkg/dplR/inst/unitTests/runit.io.R	2014-01-19 20:00:33 UTC (rev 730)
@@ -175,4 +175,14 @@
                 msg="Row names are correct (test 12)")
     checkEqualsNumeric(c(12.3, 4.56, 7.89, 0.12, 0.34, 0.05, 6.78),
                        res.tf12[[1]], msg="Data are correct (test 12)")
+
+    ## File has no data (invalid file)
+    tf13 <- tempfile()
+    fh13 <- file(tf13, "wt")
+    on.exit(unlink(tf13))
+    writeLines("TST13A  1734", fh13)
+    close(fh13)
+    checkEquals(0, nrow(read.tucson(tf13, header = FALSE)),
+                msg="Detect when file has no measurement data (test 13)")
+
 }

Added: pkg/dplR/src/dplR.c
===================================================================
--- pkg/dplR/src/dplR.c	                        (rev 0)
+++ pkg/dplR/src/dplR.c	2014-01-19 20:00:33 UTC (rev 730)
@@ -0,0 +1,16 @@
+#include "dplR.h"
+
+size_t dplRlength(SEXP x) {
+    size_t xlength;
+    SEXP sn, tmp, ncall;
+    PROTECT_INDEX ipx;
+    PROTECT(tmp = ncall = allocList(2));
+    SET_TYPEOF(ncall, LANGSXP);
+    SETCAR(tmp, install("length")); tmp = CDR(tmp);
+    SETCAR(tmp, x);
+    PROTECT_WITH_INDEX(sn = eval(ncall, R_BaseEnv), &ipx);
+    REPROTECT(sn = coerceVector(sn, REALSXP), ipx);
+    xlength = (size_t) *REAL(sn);
+    UNPROTECT(2);
+    return xlength;
+}

Modified: pkg/dplR/src/dplR.h
===================================================================
--- pkg/dplR/src/dplR.h	2014-01-16 12:22:26 UTC (rev 729)
+++ pkg/dplR/src/dplR.h	2014-01-19 20:00:33 UTC (rev 730)
@@ -2,6 +2,9 @@
 #define DPLR_H
 
 #include <R.h>  /* to include Rconfig.h */
+#include <Rversion.h>
+#include <Rinternals.h>
+size_t dplRlength(SEXP x);
           
 #ifdef ENABLE_NLS
 #include <libintl.h>
@@ -11,4 +14,8 @@
 #define dngettext(pkg, String, StringP, N) (N > 1 ? StringP: String)
 #endif
 
+#if defined(R_VERSION) && R_VERSION >= R_Version(3, 0, 0)
+#define DPLR_RGEQ3
 #endif
+
+#endif

Modified: pkg/dplR/src/exactmean.c
===================================================================
--- pkg/dplR/src/exactmean.c	2014-01-16 12:22:26 UTC (rev 729)
+++ pkg/dplR/src/exactmean.c	2014-01-19 20:00:33 UTC (rev 730)
@@ -1,10 +1,16 @@
 #include <stddef.h>
+#include "dplR.h"
 #include "exactsum.h"
 
-void exactmean(double *x, int *n_ptr, double *result){
-    int n = *n_ptr;
+SEXP exactmean(SEXP x){
+    SEXP ans;
     listnode expansion;
+    size_t n;
     expansion.next = NULL;
-
-    *result = msum(x, n, &expansion) / n;
+    n = dplRlength(x);
+    ans = PROTECT(allocVector(REALSXP, 1));
+    /* Note: x must be a numeric vector */
+    REAL(ans)[0] = msum(REAL(x), n, &expansion) / n;
+    UNPROTECT(1);
+    return ans;
 }

Modified: pkg/dplR/src/exactsum.c
===================================================================
--- pkg/dplR/src/exactsum.c	2014-01-16 12:22:26 UTC (rev 729)
+++ pkg/dplR/src/exactsum.c	2014-01-19 20:00:33 UTC (rev 730)
@@ -3,8 +3,50 @@
 #include "exactsum.h"
 
 /* Written by Mikko Korpela. */
-dplr_double msum(double *array, int n, listnode *expansion){
-    int k;
+
+/* Common building block for functions below */
+#define GROWEXP_Body							\
+    /* Grow-Expansion(expansion, a) */					\
+    readptr = expansion;						\
+    writeptr = expansion;						\
+    while(readptr != NULL && readptr->valid == TRUE) {			\
+	/* Updating readptr is easy: just do it once in the loop */	\
+	/* and stay ahead of writeptr */				\
+	b = readptr->data;						\
+	readptr = readptr->next;					\
+	/* Two-Sum(a,b): x + y == a + b */				\
+	x = a + b;							\
+	b_virtual = x - a;						\
+	a_virtual = x - b_virtual;					\
+	b_roundoff = b - b_virtual;					\
+	a_roundoff = a - a_virtual;					\
+	y = a_roundoff + b_roundoff;					\
+	if(y != 0){							\
+	    writeptr->data = y;						\
+	    /* Loosely specified invariant: always have writeptr */	\
+	    /* point to a writable location */				\
+	    if(writeptr->next != NULL){					\
+		writeptr = writeptr->next;				\
+	    } else{							\
+	        writeptr->next =					\
+		    (listnode *) R_alloc(1, sizeof(listnode));		\
+		writeptr = writeptr->next;				\
+		writeptr->next = NULL;					\
+	    }								\
+	}								\
+	a = x;								\
+    }									\
+    writeptr->data = a;							\
+    writeptr->valid = TRUE;						\
+    									\
+    /* The possible tail of the list is effectively cut (number of */	\
+    /* non-zero elements may decrease), but any allocated space */	\
+    /* remains there */							\
+    if(writeptr->next != NULL)						\
+	writeptr->next->valid = FALSE;
+
+dplr_double msum(double *array, size_t n, listnode *expansion){
+    size_t k;
     dplr_double a,b,a_virtual,b_virtual,a_roundoff,b_roundoff,x,y,total;
     listnode *readptr, *writeptr;
 
@@ -13,45 +55,8 @@
 
     /* Loop through array */
     for(k=0; k<n; k++) {
-	/* Grow-Expansion(expansion, array[k]) */
-	readptr = expansion;
-	writeptr = expansion;
 	a = array[k];
-	while(readptr != NULL && readptr->valid == TRUE) {
-	    /* Updating readptr is easy: just do it once in the loop
-	       and stay ahead of writeptr */
-	    b = readptr->data;
-	    readptr = readptr->next;
-	    /* Two-Sum(a,b): x + y == a + b */
-	    x = a + b;
-	    b_virtual = x - a;
-	    a_virtual = x - b_virtual;
-	    b_roundoff = b - b_virtual;
-	    a_roundoff = a - a_virtual;
-	    y = a_roundoff + b_roundoff;
-	    if(y != 0){
-		writeptr->data = y;
-		/* Loosely specified invariant: always have writeptr
-		   point to a writable location */
-		if(writeptr->next != NULL){
-		    writeptr = writeptr->next;
-		} else{
-		    writeptr->next =
-			(listnode *) R_alloc(1, sizeof(listnode));
-		    writeptr = writeptr->next;
-		    writeptr->next = NULL;
-		}
-	    }
-	    a = x;
-	}
-	writeptr->data = a; /* sum of the list is sum of array[0]..array[k] */
-	writeptr->valid = TRUE;
-
-	/* The possible tail of the list is effectively cut (number of
-	   non-zero elements may decrease), but any allocated space
-	   remains there */
-	if(writeptr->next != NULL)
-	    writeptr->next->valid = FALSE;
+	GROWEXP_Body
     }
 
     /* Add together the elements of the expansion */
@@ -63,47 +68,37 @@
     return(total);
 }
 
-/* Copy-paste of the insides of the for loop in msum */
-void grow_exp(listnode *expansion, dplr_double a){
-    dplr_double b,a_virtual,b_virtual,a_roundoff,b_roundoff,x,y;
-    listnode *readptr, *writeptr;
+/* Cumulative sum, overwrites array */
+dplr_double cumsum(double *array, size_t n, listnode *expansion){
+    size_t k;
+    dplr_double a,b,a_virtual,b_virtual,a_roundoff,b_roundoff,x,y,total;
+    listnode *readptr, *writeptr, *tmp;
+    total = 0.0f;
 
-    /* Grow-Expansion(expansion, array[k]) */
-    readptr = expansion;
-    writeptr = expansion;
-    while(readptr != NULL && readptr->valid == TRUE) {
-	/* Updating readptr is easy: just do it once in the loop
-	   and stay ahead of writeptr */
-	b = readptr->data;
-	readptr = readptr->next;
-	/* Two-Sum(a,b): x + y == a + b */
-	x = a + b;
-	b_virtual = x - a;
-	a_virtual = x - b_virtual;
-	b_roundoff = b - b_virtual;
-	a_roundoff = a - a_virtual;
-	y = a_roundoff + b_roundoff;
-	if(y != 0){
-	    writeptr->data = y;
-	    /* Loosely specified invariant: always have writeptr
-	       point to a writable location */
-	    if(writeptr->next != NULL){
-		writeptr = writeptr->next;
-	    } else{
-		writeptr->next = (listnode *) R_alloc(1, sizeof(listnode));
-		writeptr = writeptr->next;
-		writeptr->next = NULL;
-	    }
+    /* Old data are not valid anymore */
+    expansion->valid = FALSE;
+
+    /* Loop through array */
+    for(k=0; k<n; k++) {
+	a = array[k];
+	GROWEXP_Body;
+	/* Add together the elements of the expansion */
+	total = 0.0f;
+	tmp = expansion;
+	while(tmp != NULL && tmp->valid == TRUE){
+	    total += tmp->data;
+	    tmp = tmp->next;
 	}
-	a = x;
+	/* Overwrite array with cumulative sum */
+	array[k] = (double)total;
     }
-    writeptr->data = a; /* sum of the list is sum of array[0]..array[k] */
-    writeptr->valid = TRUE;
 
-    /* The possible tail of the list is effectively cut (number of
-       non-zero elements may decrease), but any allocated space
-       remains there */
-    if(writeptr->next != NULL)
-	writeptr->next->valid = FALSE;
+    return(total);
+}
 
+void grow_exp(listnode *expansion, dplr_double a){
+    dplr_double b,a_virtual,b_virtual,a_roundoff,b_roundoff,x,y;
+    listnode *readptr, *writeptr;
+
+    GROWEXP_Body
 }

Modified: pkg/dplR/src/exactsum.h
===================================================================
--- pkg/dplR/src/exactsum.h	2014-01-16 12:22:26 UTC (rev 729)
+++ pkg/dplR/src/exactsum.h	2014-01-19 20:00:33 UTC (rev 730)
@@ -100,8 +100,11 @@
   array, because the list usually only has a handful of elements.
   Output: the sum of the numbers
 */
-dplr_double msum(double *array, int n, listnode *expansion);
+dplr_double msum(double *array, size_t n, listnode *expansion);
 
+/* Cumulative sum, overwrites array */
+dplr_double cumsum(double *array, size_t n, listnode *expansion);
+
 /* Add number a to the sum represented by expansion */
 void grow_exp(listnode *expansion, dplr_double a);
 

Modified: pkg/dplR/src/gini.c
===================================================================
--- pkg/dplR/src/gini.c	2014-01-16 12:22:26 UTC (rev 729)
+++ pkg/dplR/src/gini.c	2014-01-19 20:00:33 UTC (rev 730)
@@ -1,68 +1,51 @@
-#include <R.h>
 #include <stddef.h>
+#include <limits.h>
+#include "dplR.h"
 #include "exactsum.h"
 
 /* Written by Mikko Korpela */
-void gini(double *x_const, int *n_ptr, double *result){
-    int i;
-    double *x;
-    dplr_double sum1, sum2;
-    listnode tmp1, tmp2, *tmp_p;
-    int n = *n_ptr;
+SEXP gini(SEXP x){
+    SEXP ans;
+    double *x_const, *x2;
+    dplr_double sum;
+    listnode tmp;
+    size_t i, n;
+    n = dplRlength(x);
+    ans = PROTECT(allocVector(REALSXP, 1));
 
     if(n < 2){
-	*result = 0.0f;
-	return;
+	REAL(ans)[0] = 0.0f;
+	UNPROTECT(1);
+	return ans;
     }
 
+    /* Note: x must be a numeric vector */
+    x_const = REAL(x);
     /* Sort the numbers */
-    x = (double *) R_alloc(n, sizeof(double));
+    x2 = (double *) R_alloc(n, sizeof(double));
     for(i = 0; i < n; i++)
-	x[i] = x_const[i];
-    R_qsort(x, 1, n);
+	x2[i] = x_const[i];
+#ifdef DPLR_RGEQ3
+    R_qsort(x2, 1, n);
+#else
+    /* In R < 3.0.0, n will not be larger than INT_MAX, and R_qsort
+     * takes int indices */
+    R_qsort(x2, 1, (int)n);
+#endif
 
-    /* Setup for grow_exp */
-    tmp1.next = NULL;
-    tmp1.data = x[0];
-    tmp1.valid = TRUE;
+    /* Setup for cumsum, msum */
+    tmp.next = NULL;
 
-    /* Cumulative sum */
-    for(i = 1; i < n; i++){
-	grow_exp(&tmp1, x[i]);
-	tmp_p = &tmp1;
-	sum1 = 0.0f;
-	while(tmp_p != NULL && tmp_p->valid == TRUE){
-	    sum1 += tmp_p->data;
-	    tmp_p = tmp_p->next;
-	}
-	x[i] = sum1;
-    }
+    /* Cumulative sum (overwrites x2) */
+    sum = cumsum(x2, n, &tmp);
 
-    /* Setup for grow_exp */
-    if(tmp1.next != NULL)
-	tmp1.next->valid = FALSE;
-    tmp2.next = NULL;
-
-    /* Gini */
-    tmp1.data = (dplr_double)x[n-1] * (n-1);
-    tmp2.data = x[0];
-    tmp2.valid = TRUE;
-    grow_exp(&tmp2, x[0]);
-    for(i = 1; i < n-1; i++){
-	grow_exp(&tmp1, (dplr_double)x[i] * i);
-	grow_exp(&tmp2, (dplr_double)x[i] * (i+2));
-    }
-    sum1 = 0.0f;
-    tmp_p = &tmp1;
-    while(tmp_p != NULL && tmp_p->valid == TRUE){
-	sum1 += tmp_p->data;
-	tmp_p = tmp_p->next;
-    }
-    sum2 = 0.0f;
-    tmp_p = &tmp2;
-    while(tmp_p != NULL && tmp_p->valid == TRUE){
-	sum2 += tmp_p->data;
-	tmp_p = tmp_p->next;
-    }
-    *result = (sum1-sum2) / ((dplr_double)x[n-1]*n);
+    /* Gini. The following reformulation highlights the "maximum
+     * inequality" gini coefficient of 1 - 1/n (assuming no negative
+     * samples):
+     *
+     * 1 - 1/n - 2 * msum(x2, n - 1, &tmp) / (sum * n)
+     */
+    REAL(ans)[0] = (sum * (n - 1) - 2 * msum(x2, n - 1, &tmp)) / (sum * n);
+    UNPROTECT(1);
+    return ans;
 }

Modified: pkg/dplR/src/rcompact.c
===================================================================
--- pkg/dplR/src/rcompact.c	2014-01-16 12:22:26 UTC (rev 729)
+++ pkg/dplR/src/rcompact.c	2014-01-19 20:00:33 UTC (rev 730)
@@ -1,5 +1,4 @@
 #include "dplR.h"
-#include <Rinternals.h>
 #include <stddef.h>
 #include <stdio.h>
 #include <stdlib.h>

Modified: pkg/dplR/src/readloop.c
===================================================================
--- pkg/dplR/src/readloop.c	2014-01-16 12:22:26 UTC (rev 729)
+++ pkg/dplR/src/readloop.c	2014-01-19 20:00:33 UTC (rev 730)
@@ -1,33 +1,117 @@
-#include <R.h>
+#include "dplR.h"
+#include <Rmath.h>
+#include <limits.h>
 
 /* A function to speed up the heaviest part of read.tucson.R */
 /* Written by Mikko Korpela */
-void readloop(int *series_index, int *decade,
-	      int *x, int *x_nrow_p, int *x_ncol_p, int *min_year_p,
-	      double *rw_mat, int *rw_nrow_p, int *rw_ncol_p,
-	      int *last_yr, int *prec_rproc){
-    int i, j, yr_idx, rw_idx, x_idx, this_series, this_val,
-	this_decade, last_valid;
+SEXP readloop(SEXP series_index, SEXP decade, SEXP x) {
+    SEXP ans, dims, rw_mat, prec_rproc;
+    size_t i, x_nrow, rw_nrow, rw_ncol, x_idx;
+    int j, x_ncol, yr_idx, rw_idx, this_series, this_val, min_year, max_year;
+    int span, this_decade, last_valid, nseries;
+    int *series_index_p, *decade_p, *x_p, *prec_rproc_p, *last_yr;
     double stop_marker;
-    int x_nrow = *x_nrow_p;
-    int x_ncol = *x_ncol_p;
-    int min_year = *min_year_p;
-    int rw_nrow = *rw_nrow_p;
-    int rw_ncol = *rw_ncol_p;
+    double *dims_p, *rw_vec;
 
+    /* Safety checks */
+    if (!(isInteger(series_index) && isInteger(decade) && isInteger(x))) {
+	error(_("all arguments must be integers"));
+    }
+
+    /* Dimensions of x */
+    dims = PROTECT(coerceVector(getAttrib(x, R_DimSymbol), REALSXP));
+    if (length(dims) != 2) {
+	UNPROTECT(1);
+	error(_("'x' must be a matrix"));
+    }
+    dims_p = REAL(dims);
+    /* Nominally max 10 years per row, allow a few more */
+    if (dims_p[1] > 100) {
+	UNPROTECT(1);
+	error(_("too many columns in 'x'"));
+    }
+    x_nrow = (size_t) dims_p[0];
+    x_ncol = (int) dims_p[1];
+    UNPROTECT(1);
+
+    /* More safety checks */
+    if (!(dplRlength(series_index) == x_nrow &&
+	  dplRlength(decade) == x_nrow)) {
+	error(_("dimensions of 'x', 'series_index' and 'decade' must match"));
+    }
+
+    series_index_p = INTEGER(series_index);
+    decade_p = INTEGER(decade);
+    x_p = INTEGER(x);
+
+    /* Calculate dimensions of result matrix */
+    nseries = 0;
+    min_year = INT_MAX;
+    max_year = INT_MIN;
+    for (i = 0; i < x_nrow; i++) {
+	if (series_index_p[i] < 1) {
+	    error(_("'series_index' must be positive"));
+	}
+	nseries = imax2(nseries, series_index_p[i]);
+	this_decade = decade_p[i];
+	j = x_ncol - 1;
+	x_idx = i + j * x_nrow;
+	while (j >= 0 && x_p[x_idx] == NA_INTEGER) {
+	    --j;
+	    x_idx -= x_nrow;
+	}
+	if (j >= 0) {
+	    min_year = imin2(min_year, this_decade);
+	    max_year = imax2(max_year, this_decade + j);
+	}
+    }
+    if (max_year >= min_year) {
+	span = max_year - min_year + 1;
+    } else {
+	min_year = NA_INTEGER;
+	span = 0;
+    }
+    rw_nrow = (size_t) span;
+    rw_ncol = (size_t) nseries;
+
+    /* List for results: rw_mat, min_year, prec_rproc */
+    ans = PROTECT(allocVector(VECSXP, 3));
+    rw_mat = SET_VECTOR_ELT(ans, 0, allocMatrix(REALSXP, span, nseries));
+    rw_vec = REAL(rw_mat);
+    for (i = 0; i < rw_nrow * rw_ncol; i++) {
+	rw_vec[i] = NA_REAL;
+    }
+    SET_VECTOR_ELT(ans, 1, ScalarInteger(min_year));
+    prec_rproc = SET_VECTOR_ELT(ans, 2, allocVector(INTSXP, nseries));
+    prec_rproc_p = INTEGER(prec_rproc);
+    if (span == 0) {
+	for(i = 0; i < rw_ncol; i++){
+	    prec_rproc_p[i] = NA_INTEGER;
+	}
+	warning(_("no data found in 'x'"));
+	UNPROTECT(1);
+	return ans;
+    }
+
+    /* Allocate internal storage */
+    last_yr = (int *) R_alloc(rw_ncol, sizeof(int));
+    for (i = 0; i < rw_ncol; i++) {
+	last_yr[i] = min_year;
+    }
+
     /* Convert between input and output formats */
     for(i = 0; i < x_nrow; i++){
-	this_decade = decade[i];
+	this_decade = decade_p[i];
 	yr_idx = this_decade - min_year;
-	this_series = series_index[i] - 1;
+	this_series = series_index_p[i] - 1;
 	rw_idx = this_series * rw_nrow + yr_idx;
 	x_idx = i;
 	last_valid = last_yr[this_series];
 	for(j = 0; j < x_ncol; j++){
-	    this_val = x[x_idx];
+	    this_val = x_p[x_idx];
 	    x_idx += x_nrow;
 	    if(this_val != NA_INTEGER){
-		rw_mat[rw_idx] = this_val;
+		rw_vec[rw_idx] = this_val;
 		last_valid = this_decade + j;
 	    }
 	    rw_idx++;
@@ -38,11 +122,16 @@
 	    last_yr[this_series] = last_valid;
     }
     for(i = 0; i < rw_ncol; i++){
-	stop_marker = rw_mat[i * rw_nrow + last_yr[i] - min_year];
+	stop_marker = rw_vec[i * rw_nrow + (last_yr[i] - min_year)];
 	if(stop_marker == 999.0f){
-	    prec_rproc[i] = 100;
+	    prec_rproc_p[i] = 100;
 	} else if(stop_marker == -9999.0f){
-	    prec_rproc[i] = 1000;
+	    prec_rproc_p[i] = 1000;
+	} else {
+	    prec_rproc_p[i] = 1;
 	}
     }
+
+    UNPROTECT(1);
+    return ans;
 }

Modified: pkg/dplR/src/redfit.c
===================================================================
--- pkg/dplR/src/redfit.c	2014-01-16 12:22:26 UTC (rev 729)
+++ pkg/dplR/src/redfit.c	2014-01-19 20:00:33 UTC (rev 730)
@@ -19,7 +19,6 @@
  */
 
 #include "dplR.h"
-#include <Rinternals.h>
 #include <Rmath.h>
 #include <complex.h>
 #include <string.h>

Modified: pkg/dplR/src/sens.c
===================================================================
--- pkg/dplR/src/sens.c	2014-01-16 12:22:26 UTC (rev 729)
+++ pkg/dplR/src/sens.c	2014-01-19 20:00:33 UTC (rev 730)
@@ -1,19 +1,25 @@
-#include <R.h>
 #include <stddef.h>
+#include "dplR.h"
 #include "exactsum.h"
 
 /* Written by Mikko Korpela */
-void sens2(double *x_const, int *n_ptr, double *result){
-    int i;
+SEXP sens2(SEXP x){
+    SEXP ans;
+    size_t i, n;
     double previous, this, next;
+    double *x_const;
     dplr_double sum1, sum2;
     listnode tmp, *tmp_p;
-    int n = *n_ptr;
+    n = dplRlength(x);
+    ans = PROTECT(allocVector(REALSXP, 1));
 
     if(n < 2){
-	*result = R_NaN;
-	return;
+	REAL(ans)[0] = R_NaN;
+	UNPROTECT(1);
+	return ans;
     }
+    /* Note: x must be a numeric vector */
+    x_const = REAL(x);
 
     /* Setup for grow_exp and msum */
     tmp.next = NULL;
@@ -70,20 +76,28 @@
     }
 
     sum2 = msum(x_const, n, &tmp);
-    *result = sum1/(sum2-sum2/n);
+    REAL(ans)[0] = sum1/(sum2-sum2/n);
+    UNPROTECT(1);
+    return ans;
 }
 
 /* Written by Mikko Korpela */
-void sens1(double *x_const, int *n_ptr, double *result){
-    int i;
+SEXP sens1(SEXP x){
+    SEXP ans;
+    size_t i, n;
     dplr_double sum, previous, this, term;
+    double *x_const;
     listnode tmp, *tmp_p;
-    int n = *n_ptr;
+    n = dplRlength(x);
+    ans = PROTECT(allocVector(REALSXP, 1));
 
     if(n < 2){
-	*result = R_NaN;
-	return;
+	REAL(ans)[0] = R_NaN;
+	UNPROTECT(1);
+	return ans;
     }
+    /* Note: x must be a numeric vector */
+    x_const = REAL(x);
 
     /* Setup for grow_exp */
     tmp.next = NULL;
@@ -105,5 +119,7 @@
 	tmp_p = tmp_p->next;
     }
 
-    *result = (sum+sum)/(n-1);
+    REAL(ans)[0] = (sum+sum)/(n-1);
+    UNPROTECT(1);
+    return ans;
 }

Modified: pkg/dplR/src/tbrm.c
===================================================================
--- pkg/dplR/src/tbrm.c	2014-01-16 12:22:26 UTC (rev 729)
+++ pkg/dplR/src/tbrm.c	2014-01-19 20:00:33 UTC (rev 730)
@@ -1,65 +1,79 @@
-#include <R.h>
+#include <limits.h>
 #include <stddef.h>
+#include "dplR.h"
 #include "exactsum.h"
 
 /* Tukey's Biweight Robust Mean (tbrm).
-   When called directly, there must be no NAs in 'x_const'.
-   This function only alters the argument 'result'
-   => DUP=FALSE is safe (and the fastest, preferred way).
+   There must be no NAs in 'x'.
    
    Input:
-   - x_const Array of numbers to be summarized by tbrm
-   - n_ptr   Pointer to the length of the array
-   - C_ptr   Pointer to parameter C which adjusts the scaling of the data
-   - result  Pointer to storage location of the result.
-   Output: No return value. The tbrm is written to *result.
+   - x   Array of numbers to be summarized by tbrm (double)
+   - C   Parameter C which adjusts the scaling of the data (double, length 1)
+   Output: numeric vector of length 1
 
    Written by Mikko Korpela.
 */
-void tbrm(double *x_const, int *n_ptr, double *C_ptr, double *result){
+SEXP tbrm(SEXP x, SEXP C){
+    SEXP ans, C2;
     Rboolean n_odd;
-    int i, half, my_count;
-    double this_val, min_val, div_const, x_med, this_wt;
-    double *x, *abs_x_dev, *wt, *wtx;
+    int i, half, my_count, n;
+    size_t nlong;
+    double C_val, this_val, min_val, div_const, x_med, this_wt;
+    double *x2, *abs_x_dev, *wt, *wtx, *x_p;
     listnode tmp;
-    int n = *n_ptr;
-    double C = *C_ptr;
+    nlong = dplRlength(x);
 
+    /* Long vectors not supported (limitation of rPsort) */
+    if (nlong > INT_MAX) {
+	error(_("long vectors not supported"));
+    }
+    C2 = PROTECT(coerceVector(C, REALSXP));
+    if (length(C2) != 1) {
+	UNPROTECT(1);
+	error(_("length of 'C' must be 1"));
+    }
+    C_val = REAL(C2)[0];
+    UNPROTECT(1);
+    n = (int) nlong;
+    ans = PROTECT(allocVector(REALSXP, 1));
     /* Avoid complexity and possible crash in case of empty input
      * vector */
     if(n == 0){
-	*result = R_NaN;
-	return;
+	REAL(ans)[0] = R_NaN;
+	UNPROTECT(1);
+	return ans;
     }
+    /* Note: x must be a numeric vector */
+    x_p = REAL(x);
 
-    /* x is a copy of the argument array x_const (the data) */
-    x = (double *) R_alloc(n, sizeof(double));
+    /* x2 is a copy of the data part of argument x */
+    x2 = (double *) R_alloc(n, sizeof(double));
     for(i = 0; i < n; i++)
-	x[i] = x_const[i];
+	x2[i] = x_p[i];
 
     /* Median of x */
     if((n & 0x1) == 1){ /* n is odd */
 	half = ((unsigned int)n) >> 1;
-	rPsort(x, n, half); /* Partial sort: */
-	x_med = x[half];    /* element at position half is correct.*/
+	rPsort(x2, n, half); /* Partial sort: */
+	x_med = x2[half];    /* element at position half is correct.*/
 	n_odd = TRUE;
     } else { /* n is even */
 	half = ((unsigned int)n) >> 1;
-	rPsort(x, n, half-1);       /* Elements at positions half-1 */
-	min_val = x[half];
+	rPsort(x2, n, half-1);       /* Elements at positions half-1 */
+	min_val = x2[half];
 	for(i = half+1; i < n; i++){/* and half */ 
-	    this_val = x[i];        /* (minimum in the */
+	    this_val = x2[i];        /* (minimum in the */
 	    if(this_val < min_val)  /* "larger than" side) */
 		min_val = this_val;
 	}
-	x_med = (x[half-1]+min_val)/2.0f; 
+	x_med = (x2[half-1]+min_val)/2.0f; 
 	n_odd = FALSE;
     }
 
     /* abs(x - median(x)) */
     abs_x_dev = (double *) R_alloc(n, sizeof(double));
     for(i = 0; i < n; i++){
-	this_val = x[i]-x_med;
+	this_val = x2[i]-x_med;
 	abs_x_dev[i] = this_val<0 ? -this_val : this_val;
     }
 
@@ -77,24 +91,24 @@
 	}
 	div_const = (abs_x_dev[half-1]+min_val)/2.0f;
     }
-    /* This is a normalization constant (well, constant over x[i]) */
-    div_const = div_const * C + 1e-6;
+    /* This is a normalization constant (well, constant over x2[i]) */
+    div_const = div_const * C_val + 1e-6;
 
-    /* Number of values x[i] with non-zero weights */
+    /* Number of values x2[i] with non-zero weights */
     my_count = 0;
 
     /* Recycling memory, i.e. renaming the same space */
     wt = abs_x_dev;
-    wtx = x; /* Have to be careful not to overwrite too soon */
+    wtx = x2; /* Have to be careful not to overwrite too soon */
 
     /* Weights (wt) and weighted data (wtx) */
     for(i = 0; i < n; i++){
-	this_wt = (x[i]-x_med) / div_const;
+	this_wt = (x2[i]-x_med) / div_const;
 	if(this_wt >= -1.0f && this_wt <= 1.0f){ /* absolute value <= 1 */
 	    this_wt = 1.0f - this_wt * this_wt;
 	    this_wt *= this_wt;
 	    wt[my_count] = this_wt;
-	    wtx[my_count++] = this_wt * x[i];
+	    wtx[my_count++] = this_wt * x2[i];
 	}
     }
 
@@ -102,14 +116,15 @@
        Sum of my_count values. No more, no less.
        The tails of the arrays are now garbage, not harmlessly zero. */
     if(my_count == 1){ /* Avoid call to sum function in border case */
-	*result = wtx[0] / wt[0];
+	REAL(ans)[0] = wtx[0] / wt[0];
     } else if(my_count > 0){
 	/* Setup for msum. */
 	tmp.next = NULL;
 	/* Not the usual 'sum of data divided by sum of ones' */
-	*result = msum(wtx, my_count, &tmp) / msum(wt, my_count, &tmp);
+	REAL(ans)[0] = msum(wtx, my_count, &tmp) / msum(wt, my_count, &tmp);
     } else{ /* Nothing to sum */
-	*result = R_NaN;
+	REAL(ans)[0] = R_NaN;
     }
-    return;
+    UNPROTECT(1);
+    return ans;
 }



More information about the Dplr-commits mailing list