[Dplr-commits] r730 - in pkg/dplR: . R inst/unitTests src
noreply at r-forge.r-project.org
noreply at r-forge.r-project.org
Sun Jan 19 21:00:33 CET 2014
Author: mvkorpel
Date: 2014-01-19 21:00:33 +0100 (Sun, 19 Jan 2014)
New Revision: 730
Added:
pkg/dplR/src/dplR.c
Modified:
pkg/dplR/ChangeLog
pkg/dplR/DESCRIPTION
pkg/dplR/R/exactmean.R
pkg/dplR/R/gini.coef.R
pkg/dplR/R/read.tucson.R
pkg/dplR/R/sens1.R
pkg/dplR/R/sens2.R
pkg/dplR/R/tbrm.R
pkg/dplR/inst/unitTests/runit.dplR.R
pkg/dplR/inst/unitTests/runit.io.R
pkg/dplR/src/dplR.h
pkg/dplR/src/exactmean.c
pkg/dplR/src/exactsum.c
pkg/dplR/src/exactsum.h
pkg/dplR/src/gini.c
pkg/dplR/src/rcompact.c
pkg/dplR/src/readloop.c
pkg/dplR/src/redfit.c
pkg/dplR/src/sens.c
pkg/dplR/src/tbrm.c
Log:
- First changes for future dplR version 1.5.9
- Using .Call() instead of .C() when interfacing with C code
- Fixed a probably unnoticeable bug in the C code called by read.tucson()
- More efficient computation of the gini coefficient
- Some reorganization of the C code / headers
- More RUnit tests for gini.coef(), also one more for read.tucson()
Modified: pkg/dplR/ChangeLog
===================================================================
--- pkg/dplR/ChangeLog 2014-01-16 12:22:26 UTC (rev 729)
+++ pkg/dplR/ChangeLog 2014-01-19 20:00:33 UTC (rev 730)
@@ -1,3 +1,48 @@
+* CHANGES IN dplR VERSION 1.5.9
+
+Files: dplR.h, rcompact.c, redfit.c
+-----------------------------------
+
+- Restructured header inclusions
+- dplR.h declares new function dplRlength (internal use)
+
+File: dplR.c
+------------
+
+- New file for general C functions internally used by dplR
+
+Files: exactmean.c, exactmean.R, gini.c, gini.coef.R,
+readloop.c, read.tucson.R, sens.c, sens1.R, sens2.R, tbrm.c, tbrm.R
+-------------------------------------------------------------------
+
+- Use .Call() instead of .C() for interfacing with C code
+- The C functions need less support (arguments, argument checking) from
+ the calling R function
+
+Files: exactsum.c, exactsum.h
+-----------------------------
+
+- Reuse of code between function by using the C preprocessor
+- New (internal to dplR C code) function cumsum (cumulative sum,
+ overwrites input array)
+- size_t n
+
+File: gini.c
+------------
+
+- Simplified expression for gini coefficent reduces number of
+ arithmetic operations performed
+- Fewer calls to other functions (use of the new cumulative sum function)
+
+File: readloop.c
+----------------
+
+- Fixed buggy handling of malformed Tucson format series that have
+ no data and no stop marker. Previously, this hypothetical case
+ could have caused a read from a location just outside the array
+ bounds. However, in most cases that would probably not have been a
+ problem.
+
* CHANGES IN dplR VERSION 1.5.8
File: tbrm.Rd
Modified: pkg/dplR/DESCRIPTION
===================================================================
--- pkg/dplR/DESCRIPTION 2014-01-16 12:22:26 UTC (rev 729)
+++ pkg/dplR/DESCRIPTION 2014-01-19 20:00:33 UTC (rev 730)
@@ -2,8 +2,8 @@
Package: dplR
Type: Package
Title: Dendrochronology Program Library in R
-Version: 1.5.8
-Date: 2014-01-15
+Version: 1.5.9
+Date: 2014-01-19
Authors at R: c(person("Andy", "Bunn", role = c("aut", "cph",
"cre", "trl"), email = "andy.bunn at wwu.edu"), person("Mikko",
"Korpela", role = c("aut", "trl")), person("Franco", "Biondi",
Modified: pkg/dplR/R/exactmean.R
===================================================================
--- pkg/dplR/R/exactmean.R 2014-01-16 12:22:26 UTC (rev 729)
+++ pkg/dplR/R/exactmean.R 2014-01-19 20:00:33 UTC (rev 730)
@@ -1,9 +1,5 @@
`exactmean` <- function(x)
{
## Drops NA and NaN values!
- y <- as.double(x[!is.na(x)])
- n <- as.integer(length(y))
- stopifnot(!is.na(n))
- .C(dplR.mean,
- y, n, result=NaN*double(1L), NAOK=TRUE, DUP=FALSE)$result
+ .Call(dplR.mean, as.double(x[!is.na(x)]))
}
Modified: pkg/dplR/R/gini.coef.R
===================================================================
--- pkg/dplR/R/gini.coef.R 2014-01-16 12:22:26 UTC (rev 729)
+++ pkg/dplR/R/gini.coef.R 2014-01-19 20:00:33 UTC (rev 730)
@@ -1,8 +1,4 @@
`gini.coef` <- function(x)
{
- y <- as.double(x[!is.na(x)])
- n <- as.integer(length(y))
- stopifnot(!is.na(n))
- .C(dplR.gini,
- y, n, result=double(1L), NAOK=TRUE, DUP=FALSE)$result
+ .Call(dplR.gini, as.double(x[!is.na(x)]))
}
Modified: pkg/dplR/R/read.tucson.R
===================================================================
--- pkg/dplR/R/read.tucson.R 2014-01-16 12:22:26 UTC (rev 729)
+++ pkg/dplR/R/read.tucson.R 2014-01-19 20:00:33 UTC (rev 730)
@@ -305,20 +305,30 @@
}
series.ids <- unique(series)
nseries <- length(series.ids)
- series.index <- match(series, series.ids)
+ ## At this time match does not support long vectors in the second
+ ## argument and always returns integers, but let's check the
+ ## result anyway.
+ series.index <- tryCatch(as.integer(match(series, series.ids)),
+ warning = conditionMessage,
+ error = conditionMessage)
+ if (!is.integer(series.index)) {
+ stop(gettextf("series.index must be integer: %s",
+ paste(as.character(series.index), collapse = ", "),
+ domain = "R-dplR"))
+ }
extra.col <- dat[[13]]
- min.year <- min(decade.yr)
- max.year <- ((max(decade.yr)+10) %/% 10) * 10
- span <- max.year - min.year + 1
- rw.vec <- NA*vector(mode="numeric", length=nseries*span)
- scratch <- rep.int(as.integer(min.year-1), nseries)
- prec.rproc <- rep.int(as.integer(1), nseries)
- .C(rwl.readloop, series.index, decade.yr, as.vector(x),
- nrow(x), ncol(x), as.integer(min.year), rw.vec,
- as.integer(span), as.integer(nseries), scratch, prec.rproc,
- NAOK=TRUE, DUP=FALSE)
- rw.mat <- matrix(rw.vec, ncol=nseries, nrow=span)
+ res <- .Call(rwl.readloop, series.index, decade.yr, x)
+ rw.mat <- res[[1]]
+ min.year <- res[[2]]
+ prec.rproc <- res[[3]]
+ span <- nrow(rw.mat)
+ if (span == 0) {
+ rw.df <- as.data.frame(rw.mat)
+ names(rw.df) <- as.character(series.ids)
+ return(rw.df)
+ }
+ max.year <- min.year + (span - 1)
rownames(rw.mat) <- min.year:max.year
## The operations in the loop depend on the precision of each series.
Modified: pkg/dplR/R/sens1.R
===================================================================
--- pkg/dplR/R/sens1.R 2014-01-16 12:22:26 UTC (rev 729)
+++ pkg/dplR/R/sens1.R 2014-01-19 20:00:33 UTC (rev 730)
@@ -1,8 +1,4 @@
`sens1` <- function(x)
{
- y <- as.double(x[!is.na(x)])
- n <- as.integer(length(y))
- stopifnot(!is.na(n))
- .C(dplR.sens1,
- y, n, result=NaN*double(1L), NAOK=TRUE, DUP=FALSE)$result
+ .Call(dplR.sens1, as.double(x[!is.na(x)]))
}
Modified: pkg/dplR/R/sens2.R
===================================================================
--- pkg/dplR/R/sens2.R 2014-01-16 12:22:26 UTC (rev 729)
+++ pkg/dplR/R/sens2.R 2014-01-19 20:00:33 UTC (rev 730)
@@ -1,8 +1,4 @@
`sens2` <- function(x)
{
- y <- as.double(x[!is.na(x)])
- n <- as.integer(length(y))
- stopifnot(!is.na(n))
- .C(dplR.sens2,
- y, n, result=NaN*double(1L), NAOK=TRUE, DUP=FALSE)$result
+ .Call(dplR.sens2, as.double(x[!is.na(x)]))
}
Modified: pkg/dplR/R/tbrm.R
===================================================================
--- pkg/dplR/R/tbrm.R 2014-01-16 12:22:26 UTC (rev 729)
+++ pkg/dplR/R/tbrm.R 2014-01-19 20:00:33 UTC (rev 730)
@@ -1,9 +1,4 @@
`tbrm` <- function(x, C=9)
{
- y <- as.double(x[!is.na(x)])
- n <- as.integer(length(y))
- C2 <- as.double(C)
- stopifnot(!is.na(n), length(C2) == 1)
- .C(dplR.tbrm, y, n, C2, result = double(1L), NAOK = TRUE,
- DUP = FALSE)$result
+ .Call(dplR.tbrm, as.double(x[!is.na(x)]), C)
}
Modified: pkg/dplR/inst/unitTests/runit.dplR.R
===================================================================
--- pkg/dplR/inst/unitTests/runit.dplR.R 2014-01-16 12:22:26 UTC (rev 729)
+++ pkg/dplR/inst/unitTests/runit.dplR.R 2014-01-19 20:00:33 UTC (rev 730)
@@ -461,11 +461,34 @@
test.gini.coef <- function() {
## Setup
- SAMP.SIZE <- 1000
+ MAX.SIZE <- 1000
+ NTIMES <- 10
+ samp <- sample(seq.int(2, MAX.SIZE), max(0, min(NTIMES, MAX.SIZE - 1)))
## Test
- checkEquals(0, gini.coef(rep(42, SAMP.SIZE)),
+ coefs <- vapply(samp,
+ function(x) {
+ foo <- numeric(x)
+ n <- sample(x - 1, 1)
+ nonzeros <- sample(x, n)
+ val <- runif(1, 1, 100)
+
+ foo[nonzeros[1]] <- val
+ a <- gini.coef(foo)
+
+ foo[nonzeros] <- val
+ b <- gini.coef(foo)
+
+ foo[] <- val
+ c <- gini.coef(foo)
+
+ c(a, b, c, n)
+ }, numeric(4))
+ checkEquals(1 - 1 / samp, coefs[1, ],
+ msg="Winner takes all: 1 - 1/n")
+ checkEquals(1 - coefs[4, ] / samp, coefs[2, ],
+ msg="k (random) equal winners, others get 0: 1 - k/n")
+ checkEquals(numeric(length(samp)), coefs[3, ],
msg="Gini coefficient of a set with total equality is 0")
- ## Needs more tests
}
test.glk <- function() {
Modified: pkg/dplR/inst/unitTests/runit.io.R
===================================================================
--- pkg/dplR/inst/unitTests/runit.io.R 2014-01-16 12:22:26 UTC (rev 729)
+++ pkg/dplR/inst/unitTests/runit.io.R 2014-01-19 20:00:33 UTC (rev 730)
@@ -175,4 +175,14 @@
msg="Row names are correct (test 12)")
checkEqualsNumeric(c(12.3, 4.56, 7.89, 0.12, 0.34, 0.05, 6.78),
res.tf12[[1]], msg="Data are correct (test 12)")
+
+ ## File has no data (invalid file)
+ tf13 <- tempfile()
+ fh13 <- file(tf13, "wt")
+ on.exit(unlink(tf13))
+ writeLines("TST13A 1734", fh13)
+ close(fh13)
+ checkEquals(0, nrow(read.tucson(tf13, header = FALSE)),
+ msg="Detect when file has no measurement data (test 13)")
+
}
Added: pkg/dplR/src/dplR.c
===================================================================
--- pkg/dplR/src/dplR.c (rev 0)
+++ pkg/dplR/src/dplR.c 2014-01-19 20:00:33 UTC (rev 730)
@@ -0,0 +1,16 @@
+#include "dplR.h"
+
+size_t dplRlength(SEXP x) {
+ size_t xlength;
+ SEXP sn, tmp, ncall;
+ PROTECT_INDEX ipx;
+ PROTECT(tmp = ncall = allocList(2));
+ SET_TYPEOF(ncall, LANGSXP);
+ SETCAR(tmp, install("length")); tmp = CDR(tmp);
+ SETCAR(tmp, x);
+ PROTECT_WITH_INDEX(sn = eval(ncall, R_BaseEnv), &ipx);
+ REPROTECT(sn = coerceVector(sn, REALSXP), ipx);
+ xlength = (size_t) *REAL(sn);
+ UNPROTECT(2);
+ return xlength;
+}
Modified: pkg/dplR/src/dplR.h
===================================================================
--- pkg/dplR/src/dplR.h 2014-01-16 12:22:26 UTC (rev 729)
+++ pkg/dplR/src/dplR.h 2014-01-19 20:00:33 UTC (rev 730)
@@ -2,6 +2,9 @@
#define DPLR_H
#include <R.h> /* to include Rconfig.h */
+#include <Rversion.h>
+#include <Rinternals.h>
+size_t dplRlength(SEXP x);
#ifdef ENABLE_NLS
#include <libintl.h>
@@ -11,4 +14,8 @@
#define dngettext(pkg, String, StringP, N) (N > 1 ? StringP: String)
#endif
+#if defined(R_VERSION) && R_VERSION >= R_Version(3, 0, 0)
+#define DPLR_RGEQ3
#endif
+
+#endif
Modified: pkg/dplR/src/exactmean.c
===================================================================
--- pkg/dplR/src/exactmean.c 2014-01-16 12:22:26 UTC (rev 729)
+++ pkg/dplR/src/exactmean.c 2014-01-19 20:00:33 UTC (rev 730)
@@ -1,10 +1,16 @@
#include <stddef.h>
+#include "dplR.h"
#include "exactsum.h"
-void exactmean(double *x, int *n_ptr, double *result){
- int n = *n_ptr;
+SEXP exactmean(SEXP x){
+ SEXP ans;
listnode expansion;
+ size_t n;
expansion.next = NULL;
-
- *result = msum(x, n, &expansion) / n;
+ n = dplRlength(x);
+ ans = PROTECT(allocVector(REALSXP, 1));
+ /* Note: x must be a numeric vector */
+ REAL(ans)[0] = msum(REAL(x), n, &expansion) / n;
+ UNPROTECT(1);
+ return ans;
}
Modified: pkg/dplR/src/exactsum.c
===================================================================
--- pkg/dplR/src/exactsum.c 2014-01-16 12:22:26 UTC (rev 729)
+++ pkg/dplR/src/exactsum.c 2014-01-19 20:00:33 UTC (rev 730)
@@ -3,8 +3,50 @@
#include "exactsum.h"
/* Written by Mikko Korpela. */
-dplr_double msum(double *array, int n, listnode *expansion){
- int k;
+
+/* Common building block for functions below */
+#define GROWEXP_Body \
+ /* Grow-Expansion(expansion, a) */ \
+ readptr = expansion; \
+ writeptr = expansion; \
+ while(readptr != NULL && readptr->valid == TRUE) { \
+ /* Updating readptr is easy: just do it once in the loop */ \
+ /* and stay ahead of writeptr */ \
+ b = readptr->data; \
+ readptr = readptr->next; \
+ /* Two-Sum(a,b): x + y == a + b */ \
+ x = a + b; \
+ b_virtual = x - a; \
+ a_virtual = x - b_virtual; \
+ b_roundoff = b - b_virtual; \
+ a_roundoff = a - a_virtual; \
+ y = a_roundoff + b_roundoff; \
+ if(y != 0){ \
+ writeptr->data = y; \
+ /* Loosely specified invariant: always have writeptr */ \
+ /* point to a writable location */ \
+ if(writeptr->next != NULL){ \
+ writeptr = writeptr->next; \
+ } else{ \
+ writeptr->next = \
+ (listnode *) R_alloc(1, sizeof(listnode)); \
+ writeptr = writeptr->next; \
+ writeptr->next = NULL; \
+ } \
+ } \
+ a = x; \
+ } \
+ writeptr->data = a; \
+ writeptr->valid = TRUE; \
+ \
+ /* The possible tail of the list is effectively cut (number of */ \
+ /* non-zero elements may decrease), but any allocated space */ \
+ /* remains there */ \
+ if(writeptr->next != NULL) \
+ writeptr->next->valid = FALSE;
+
+dplr_double msum(double *array, size_t n, listnode *expansion){
+ size_t k;
dplr_double a,b,a_virtual,b_virtual,a_roundoff,b_roundoff,x,y,total;
listnode *readptr, *writeptr;
@@ -13,45 +55,8 @@
/* Loop through array */
for(k=0; k<n; k++) {
- /* Grow-Expansion(expansion, array[k]) */
- readptr = expansion;
- writeptr = expansion;
a = array[k];
- while(readptr != NULL && readptr->valid == TRUE) {
- /* Updating readptr is easy: just do it once in the loop
- and stay ahead of writeptr */
- b = readptr->data;
- readptr = readptr->next;
- /* Two-Sum(a,b): x + y == a + b */
- x = a + b;
- b_virtual = x - a;
- a_virtual = x - b_virtual;
- b_roundoff = b - b_virtual;
- a_roundoff = a - a_virtual;
- y = a_roundoff + b_roundoff;
- if(y != 0){
- writeptr->data = y;
- /* Loosely specified invariant: always have writeptr
- point to a writable location */
- if(writeptr->next != NULL){
- writeptr = writeptr->next;
- } else{
- writeptr->next =
- (listnode *) R_alloc(1, sizeof(listnode));
- writeptr = writeptr->next;
- writeptr->next = NULL;
- }
- }
- a = x;
- }
- writeptr->data = a; /* sum of the list is sum of array[0]..array[k] */
- writeptr->valid = TRUE;
-
- /* The possible tail of the list is effectively cut (number of
- non-zero elements may decrease), but any allocated space
- remains there */
- if(writeptr->next != NULL)
- writeptr->next->valid = FALSE;
+ GROWEXP_Body
}
/* Add together the elements of the expansion */
@@ -63,47 +68,37 @@
return(total);
}
-/* Copy-paste of the insides of the for loop in msum */
-void grow_exp(listnode *expansion, dplr_double a){
- dplr_double b,a_virtual,b_virtual,a_roundoff,b_roundoff,x,y;
- listnode *readptr, *writeptr;
+/* Cumulative sum, overwrites array */
+dplr_double cumsum(double *array, size_t n, listnode *expansion){
+ size_t k;
+ dplr_double a,b,a_virtual,b_virtual,a_roundoff,b_roundoff,x,y,total;
+ listnode *readptr, *writeptr, *tmp;
+ total = 0.0f;
- /* Grow-Expansion(expansion, array[k]) */
- readptr = expansion;
- writeptr = expansion;
- while(readptr != NULL && readptr->valid == TRUE) {
- /* Updating readptr is easy: just do it once in the loop
- and stay ahead of writeptr */
- b = readptr->data;
- readptr = readptr->next;
- /* Two-Sum(a,b): x + y == a + b */
- x = a + b;
- b_virtual = x - a;
- a_virtual = x - b_virtual;
- b_roundoff = b - b_virtual;
- a_roundoff = a - a_virtual;
- y = a_roundoff + b_roundoff;
- if(y != 0){
- writeptr->data = y;
- /* Loosely specified invariant: always have writeptr
- point to a writable location */
- if(writeptr->next != NULL){
- writeptr = writeptr->next;
- } else{
- writeptr->next = (listnode *) R_alloc(1, sizeof(listnode));
- writeptr = writeptr->next;
- writeptr->next = NULL;
- }
+ /* Old data are not valid anymore */
+ expansion->valid = FALSE;
+
+ /* Loop through array */
+ for(k=0; k<n; k++) {
+ a = array[k];
+ GROWEXP_Body;
+ /* Add together the elements of the expansion */
+ total = 0.0f;
+ tmp = expansion;
+ while(tmp != NULL && tmp->valid == TRUE){
+ total += tmp->data;
+ tmp = tmp->next;
}
- a = x;
+ /* Overwrite array with cumulative sum */
+ array[k] = (double)total;
}
- writeptr->data = a; /* sum of the list is sum of array[0]..array[k] */
- writeptr->valid = TRUE;
- /* The possible tail of the list is effectively cut (number of
- non-zero elements may decrease), but any allocated space
- remains there */
- if(writeptr->next != NULL)
- writeptr->next->valid = FALSE;
+ return(total);
+}
+void grow_exp(listnode *expansion, dplr_double a){
+ dplr_double b,a_virtual,b_virtual,a_roundoff,b_roundoff,x,y;
+ listnode *readptr, *writeptr;
+
+ GROWEXP_Body
}
Modified: pkg/dplR/src/exactsum.h
===================================================================
--- pkg/dplR/src/exactsum.h 2014-01-16 12:22:26 UTC (rev 729)
+++ pkg/dplR/src/exactsum.h 2014-01-19 20:00:33 UTC (rev 730)
@@ -100,8 +100,11 @@
array, because the list usually only has a handful of elements.
Output: the sum of the numbers
*/
-dplr_double msum(double *array, int n, listnode *expansion);
+dplr_double msum(double *array, size_t n, listnode *expansion);
+/* Cumulative sum, overwrites array */
+dplr_double cumsum(double *array, size_t n, listnode *expansion);
+
/* Add number a to the sum represented by expansion */
void grow_exp(listnode *expansion, dplr_double a);
Modified: pkg/dplR/src/gini.c
===================================================================
--- pkg/dplR/src/gini.c 2014-01-16 12:22:26 UTC (rev 729)
+++ pkg/dplR/src/gini.c 2014-01-19 20:00:33 UTC (rev 730)
@@ -1,68 +1,51 @@
-#include <R.h>
#include <stddef.h>
+#include <limits.h>
+#include "dplR.h"
#include "exactsum.h"
/* Written by Mikko Korpela */
-void gini(double *x_const, int *n_ptr, double *result){
- int i;
- double *x;
- dplr_double sum1, sum2;
- listnode tmp1, tmp2, *tmp_p;
- int n = *n_ptr;
+SEXP gini(SEXP x){
+ SEXP ans;
+ double *x_const, *x2;
+ dplr_double sum;
+ listnode tmp;
+ size_t i, n;
+ n = dplRlength(x);
+ ans = PROTECT(allocVector(REALSXP, 1));
if(n < 2){
- *result = 0.0f;
- return;
+ REAL(ans)[0] = 0.0f;
+ UNPROTECT(1);
+ return ans;
}
+ /* Note: x must be a numeric vector */
+ x_const = REAL(x);
/* Sort the numbers */
- x = (double *) R_alloc(n, sizeof(double));
+ x2 = (double *) R_alloc(n, sizeof(double));
for(i = 0; i < n; i++)
- x[i] = x_const[i];
- R_qsort(x, 1, n);
+ x2[i] = x_const[i];
+#ifdef DPLR_RGEQ3
+ R_qsort(x2, 1, n);
+#else
+ /* In R < 3.0.0, n will not be larger than INT_MAX, and R_qsort
+ * takes int indices */
+ R_qsort(x2, 1, (int)n);
+#endif
- /* Setup for grow_exp */
- tmp1.next = NULL;
- tmp1.data = x[0];
- tmp1.valid = TRUE;
+ /* Setup for cumsum, msum */
+ tmp.next = NULL;
- /* Cumulative sum */
- for(i = 1; i < n; i++){
- grow_exp(&tmp1, x[i]);
- tmp_p = &tmp1;
- sum1 = 0.0f;
- while(tmp_p != NULL && tmp_p->valid == TRUE){
- sum1 += tmp_p->data;
- tmp_p = tmp_p->next;
- }
- x[i] = sum1;
- }
+ /* Cumulative sum (overwrites x2) */
+ sum = cumsum(x2, n, &tmp);
- /* Setup for grow_exp */
- if(tmp1.next != NULL)
- tmp1.next->valid = FALSE;
- tmp2.next = NULL;
-
- /* Gini */
- tmp1.data = (dplr_double)x[n-1] * (n-1);
- tmp2.data = x[0];
- tmp2.valid = TRUE;
- grow_exp(&tmp2, x[0]);
- for(i = 1; i < n-1; i++){
- grow_exp(&tmp1, (dplr_double)x[i] * i);
- grow_exp(&tmp2, (dplr_double)x[i] * (i+2));
- }
- sum1 = 0.0f;
- tmp_p = &tmp1;
- while(tmp_p != NULL && tmp_p->valid == TRUE){
- sum1 += tmp_p->data;
- tmp_p = tmp_p->next;
- }
- sum2 = 0.0f;
- tmp_p = &tmp2;
- while(tmp_p != NULL && tmp_p->valid == TRUE){
- sum2 += tmp_p->data;
- tmp_p = tmp_p->next;
- }
- *result = (sum1-sum2) / ((dplr_double)x[n-1]*n);
+ /* Gini. The following reformulation highlights the "maximum
+ * inequality" gini coefficient of 1 - 1/n (assuming no negative
+ * samples):
+ *
+ * 1 - 1/n - 2 * msum(x2, n - 1, &tmp) / (sum * n)
+ */
+ REAL(ans)[0] = (sum * (n - 1) - 2 * msum(x2, n - 1, &tmp)) / (sum * n);
+ UNPROTECT(1);
+ return ans;
}
Modified: pkg/dplR/src/rcompact.c
===================================================================
--- pkg/dplR/src/rcompact.c 2014-01-16 12:22:26 UTC (rev 729)
+++ pkg/dplR/src/rcompact.c 2014-01-19 20:00:33 UTC (rev 730)
@@ -1,5 +1,4 @@
#include "dplR.h"
-#include <Rinternals.h>
#include <stddef.h>
#include <stdio.h>
#include <stdlib.h>
Modified: pkg/dplR/src/readloop.c
===================================================================
--- pkg/dplR/src/readloop.c 2014-01-16 12:22:26 UTC (rev 729)
+++ pkg/dplR/src/readloop.c 2014-01-19 20:00:33 UTC (rev 730)
@@ -1,33 +1,117 @@
-#include <R.h>
+#include "dplR.h"
+#include <Rmath.h>
+#include <limits.h>
/* A function to speed up the heaviest part of read.tucson.R */
/* Written by Mikko Korpela */
-void readloop(int *series_index, int *decade,
- int *x, int *x_nrow_p, int *x_ncol_p, int *min_year_p,
- double *rw_mat, int *rw_nrow_p, int *rw_ncol_p,
- int *last_yr, int *prec_rproc){
- int i, j, yr_idx, rw_idx, x_idx, this_series, this_val,
- this_decade, last_valid;
+SEXP readloop(SEXP series_index, SEXP decade, SEXP x) {
+ SEXP ans, dims, rw_mat, prec_rproc;
+ size_t i, x_nrow, rw_nrow, rw_ncol, x_idx;
+ int j, x_ncol, yr_idx, rw_idx, this_series, this_val, min_year, max_year;
+ int span, this_decade, last_valid, nseries;
+ int *series_index_p, *decade_p, *x_p, *prec_rproc_p, *last_yr;
double stop_marker;
- int x_nrow = *x_nrow_p;
- int x_ncol = *x_ncol_p;
- int min_year = *min_year_p;
- int rw_nrow = *rw_nrow_p;
- int rw_ncol = *rw_ncol_p;
+ double *dims_p, *rw_vec;
+ /* Safety checks */
+ if (!(isInteger(series_index) && isInteger(decade) && isInteger(x))) {
+ error(_("all arguments must be integers"));
+ }
+
+ /* Dimensions of x */
+ dims = PROTECT(coerceVector(getAttrib(x, R_DimSymbol), REALSXP));
+ if (length(dims) != 2) {
+ UNPROTECT(1);
+ error(_("'x' must be a matrix"));
+ }
+ dims_p = REAL(dims);
+ /* Nominally max 10 years per row, allow a few more */
+ if (dims_p[1] > 100) {
+ UNPROTECT(1);
+ error(_("too many columns in 'x'"));
+ }
+ x_nrow = (size_t) dims_p[0];
+ x_ncol = (int) dims_p[1];
+ UNPROTECT(1);
+
+ /* More safety checks */
+ if (!(dplRlength(series_index) == x_nrow &&
+ dplRlength(decade) == x_nrow)) {
+ error(_("dimensions of 'x', 'series_index' and 'decade' must match"));
+ }
+
+ series_index_p = INTEGER(series_index);
+ decade_p = INTEGER(decade);
+ x_p = INTEGER(x);
+
+ /* Calculate dimensions of result matrix */
+ nseries = 0;
+ min_year = INT_MAX;
+ max_year = INT_MIN;
+ for (i = 0; i < x_nrow; i++) {
+ if (series_index_p[i] < 1) {
+ error(_("'series_index' must be positive"));
+ }
+ nseries = imax2(nseries, series_index_p[i]);
+ this_decade = decade_p[i];
+ j = x_ncol - 1;
+ x_idx = i + j * x_nrow;
+ while (j >= 0 && x_p[x_idx] == NA_INTEGER) {
+ --j;
+ x_idx -= x_nrow;
+ }
+ if (j >= 0) {
+ min_year = imin2(min_year, this_decade);
+ max_year = imax2(max_year, this_decade + j);
+ }
+ }
+ if (max_year >= min_year) {
+ span = max_year - min_year + 1;
+ } else {
+ min_year = NA_INTEGER;
+ span = 0;
+ }
+ rw_nrow = (size_t) span;
+ rw_ncol = (size_t) nseries;
+
+ /* List for results: rw_mat, min_year, prec_rproc */
+ ans = PROTECT(allocVector(VECSXP, 3));
+ rw_mat = SET_VECTOR_ELT(ans, 0, allocMatrix(REALSXP, span, nseries));
+ rw_vec = REAL(rw_mat);
+ for (i = 0; i < rw_nrow * rw_ncol; i++) {
+ rw_vec[i] = NA_REAL;
+ }
+ SET_VECTOR_ELT(ans, 1, ScalarInteger(min_year));
+ prec_rproc = SET_VECTOR_ELT(ans, 2, allocVector(INTSXP, nseries));
+ prec_rproc_p = INTEGER(prec_rproc);
+ if (span == 0) {
+ for(i = 0; i < rw_ncol; i++){
+ prec_rproc_p[i] = NA_INTEGER;
+ }
+ warning(_("no data found in 'x'"));
+ UNPROTECT(1);
+ return ans;
+ }
+
+ /* Allocate internal storage */
+ last_yr = (int *) R_alloc(rw_ncol, sizeof(int));
+ for (i = 0; i < rw_ncol; i++) {
+ last_yr[i] = min_year;
+ }
+
/* Convert between input and output formats */
for(i = 0; i < x_nrow; i++){
- this_decade = decade[i];
+ this_decade = decade_p[i];
yr_idx = this_decade - min_year;
- this_series = series_index[i] - 1;
+ this_series = series_index_p[i] - 1;
rw_idx = this_series * rw_nrow + yr_idx;
x_idx = i;
last_valid = last_yr[this_series];
for(j = 0; j < x_ncol; j++){
- this_val = x[x_idx];
+ this_val = x_p[x_idx];
x_idx += x_nrow;
if(this_val != NA_INTEGER){
- rw_mat[rw_idx] = this_val;
+ rw_vec[rw_idx] = this_val;
last_valid = this_decade + j;
}
rw_idx++;
@@ -38,11 +122,16 @@
last_yr[this_series] = last_valid;
}
for(i = 0; i < rw_ncol; i++){
- stop_marker = rw_mat[i * rw_nrow + last_yr[i] - min_year];
+ stop_marker = rw_vec[i * rw_nrow + (last_yr[i] - min_year)];
if(stop_marker == 999.0f){
- prec_rproc[i] = 100;
+ prec_rproc_p[i] = 100;
} else if(stop_marker == -9999.0f){
- prec_rproc[i] = 1000;
+ prec_rproc_p[i] = 1000;
+ } else {
+ prec_rproc_p[i] = 1;
}
}
+
+ UNPROTECT(1);
+ return ans;
}
Modified: pkg/dplR/src/redfit.c
===================================================================
--- pkg/dplR/src/redfit.c 2014-01-16 12:22:26 UTC (rev 729)
+++ pkg/dplR/src/redfit.c 2014-01-19 20:00:33 UTC (rev 730)
@@ -19,7 +19,6 @@
*/
#include "dplR.h"
-#include <Rinternals.h>
#include <Rmath.h>
#include <complex.h>
#include <string.h>
Modified: pkg/dplR/src/sens.c
===================================================================
--- pkg/dplR/src/sens.c 2014-01-16 12:22:26 UTC (rev 729)
+++ pkg/dplR/src/sens.c 2014-01-19 20:00:33 UTC (rev 730)
@@ -1,19 +1,25 @@
-#include <R.h>
#include <stddef.h>
+#include "dplR.h"
#include "exactsum.h"
/* Written by Mikko Korpela */
-void sens2(double *x_const, int *n_ptr, double *result){
- int i;
+SEXP sens2(SEXP x){
+ SEXP ans;
+ size_t i, n;
double previous, this, next;
+ double *x_const;
dplr_double sum1, sum2;
listnode tmp, *tmp_p;
- int n = *n_ptr;
+ n = dplRlength(x);
+ ans = PROTECT(allocVector(REALSXP, 1));
if(n < 2){
- *result = R_NaN;
- return;
+ REAL(ans)[0] = R_NaN;
+ UNPROTECT(1);
+ return ans;
}
+ /* Note: x must be a numeric vector */
+ x_const = REAL(x);
/* Setup for grow_exp and msum */
tmp.next = NULL;
@@ -70,20 +76,28 @@
}
sum2 = msum(x_const, n, &tmp);
- *result = sum1/(sum2-sum2/n);
+ REAL(ans)[0] = sum1/(sum2-sum2/n);
+ UNPROTECT(1);
+ return ans;
}
/* Written by Mikko Korpela */
-void sens1(double *x_const, int *n_ptr, double *result){
- int i;
+SEXP sens1(SEXP x){
+ SEXP ans;
+ size_t i, n;
dplr_double sum, previous, this, term;
+ double *x_const;
listnode tmp, *tmp_p;
- int n = *n_ptr;
+ n = dplRlength(x);
+ ans = PROTECT(allocVector(REALSXP, 1));
if(n < 2){
- *result = R_NaN;
- return;
+ REAL(ans)[0] = R_NaN;
+ UNPROTECT(1);
+ return ans;
}
+ /* Note: x must be a numeric vector */
+ x_const = REAL(x);
/* Setup for grow_exp */
tmp.next = NULL;
@@ -105,5 +119,7 @@
tmp_p = tmp_p->next;
}
- *result = (sum+sum)/(n-1);
+ REAL(ans)[0] = (sum+sum)/(n-1);
+ UNPROTECT(1);
+ return ans;
}
Modified: pkg/dplR/src/tbrm.c
===================================================================
--- pkg/dplR/src/tbrm.c 2014-01-16 12:22:26 UTC (rev 729)
+++ pkg/dplR/src/tbrm.c 2014-01-19 20:00:33 UTC (rev 730)
@@ -1,65 +1,79 @@
-#include <R.h>
+#include <limits.h>
#include <stddef.h>
+#include "dplR.h"
#include "exactsum.h"
/* Tukey's Biweight Robust Mean (tbrm).
- When called directly, there must be no NAs in 'x_const'.
- This function only alters the argument 'result'
- => DUP=FALSE is safe (and the fastest, preferred way).
+ There must be no NAs in 'x'.
Input:
- - x_const Array of numbers to be summarized by tbrm
- - n_ptr Pointer to the length of the array
- - C_ptr Pointer to parameter C which adjusts the scaling of the data
- - result Pointer to storage location of the result.
- Output: No return value. The tbrm is written to *result.
+ - x Array of numbers to be summarized by tbrm (double)
+ - C Parameter C which adjusts the scaling of the data (double, length 1)
+ Output: numeric vector of length 1
Written by Mikko Korpela.
*/
-void tbrm(double *x_const, int *n_ptr, double *C_ptr, double *result){
+SEXP tbrm(SEXP x, SEXP C){
+ SEXP ans, C2;
Rboolean n_odd;
- int i, half, my_count;
- double this_val, min_val, div_const, x_med, this_wt;
- double *x, *abs_x_dev, *wt, *wtx;
+ int i, half, my_count, n;
+ size_t nlong;
+ double C_val, this_val, min_val, div_const, x_med, this_wt;
+ double *x2, *abs_x_dev, *wt, *wtx, *x_p;
listnode tmp;
- int n = *n_ptr;
- double C = *C_ptr;
+ nlong = dplRlength(x);
+ /* Long vectors not supported (limitation of rPsort) */
+ if (nlong > INT_MAX) {
+ error(_("long vectors not supported"));
+ }
+ C2 = PROTECT(coerceVector(C, REALSXP));
+ if (length(C2) != 1) {
+ UNPROTECT(1);
+ error(_("length of 'C' must be 1"));
+ }
+ C_val = REAL(C2)[0];
+ UNPROTECT(1);
+ n = (int) nlong;
+ ans = PROTECT(allocVector(REALSXP, 1));
/* Avoid complexity and possible crash in case of empty input
* vector */
if(n == 0){
- *result = R_NaN;
- return;
+ REAL(ans)[0] = R_NaN;
+ UNPROTECT(1);
+ return ans;
}
+ /* Note: x must be a numeric vector */
+ x_p = REAL(x);
- /* x is a copy of the argument array x_const (the data) */
- x = (double *) R_alloc(n, sizeof(double));
+ /* x2 is a copy of the data part of argument x */
+ x2 = (double *) R_alloc(n, sizeof(double));
for(i = 0; i < n; i++)
- x[i] = x_const[i];
+ x2[i] = x_p[i];
/* Median of x */
if((n & 0x1) == 1){ /* n is odd */
half = ((unsigned int)n) >> 1;
- rPsort(x, n, half); /* Partial sort: */
- x_med = x[half]; /* element at position half is correct.*/
+ rPsort(x2, n, half); /* Partial sort: */
+ x_med = x2[half]; /* element at position half is correct.*/
n_odd = TRUE;
} else { /* n is even */
half = ((unsigned int)n) >> 1;
- rPsort(x, n, half-1); /* Elements at positions half-1 */
- min_val = x[half];
+ rPsort(x2, n, half-1); /* Elements at positions half-1 */
+ min_val = x2[half];
for(i = half+1; i < n; i++){/* and half */
- this_val = x[i]; /* (minimum in the */
+ this_val = x2[i]; /* (minimum in the */
if(this_val < min_val) /* "larger than" side) */
min_val = this_val;
}
- x_med = (x[half-1]+min_val)/2.0f;
+ x_med = (x2[half-1]+min_val)/2.0f;
n_odd = FALSE;
}
/* abs(x - median(x)) */
abs_x_dev = (double *) R_alloc(n, sizeof(double));
for(i = 0; i < n; i++){
- this_val = x[i]-x_med;
+ this_val = x2[i]-x_med;
abs_x_dev[i] = this_val<0 ? -this_val : this_val;
}
@@ -77,24 +91,24 @@
}
div_const = (abs_x_dev[half-1]+min_val)/2.0f;
}
- /* This is a normalization constant (well, constant over x[i]) */
- div_const = div_const * C + 1e-6;
+ /* This is a normalization constant (well, constant over x2[i]) */
+ div_const = div_const * C_val + 1e-6;
- /* Number of values x[i] with non-zero weights */
+ /* Number of values x2[i] with non-zero weights */
my_count = 0;
/* Recycling memory, i.e. renaming the same space */
wt = abs_x_dev;
- wtx = x; /* Have to be careful not to overwrite too soon */
+ wtx = x2; /* Have to be careful not to overwrite too soon */
/* Weights (wt) and weighted data (wtx) */
for(i = 0; i < n; i++){
- this_wt = (x[i]-x_med) / div_const;
+ this_wt = (x2[i]-x_med) / div_const;
if(this_wt >= -1.0f && this_wt <= 1.0f){ /* absolute value <= 1 */
this_wt = 1.0f - this_wt * this_wt;
this_wt *= this_wt;
wt[my_count] = this_wt;
- wtx[my_count++] = this_wt * x[i];
+ wtx[my_count++] = this_wt * x2[i];
}
}
@@ -102,14 +116,15 @@
Sum of my_count values. No more, no less.
The tails of the arrays are now garbage, not harmlessly zero. */
if(my_count == 1){ /* Avoid call to sum function in border case */
- *result = wtx[0] / wt[0];
+ REAL(ans)[0] = wtx[0] / wt[0];
} else if(my_count > 0){
/* Setup for msum. */
tmp.next = NULL;
/* Not the usual 'sum of data divided by sum of ones' */
- *result = msum(wtx, my_count, &tmp) / msum(wt, my_count, &tmp);
+ REAL(ans)[0] = msum(wtx, my_count, &tmp) / msum(wt, my_count, &tmp);
} else{ /* Nothing to sum */
- *result = R_NaN;
+ REAL(ans)[0] = R_NaN;
}
- return;
+ UNPROTECT(1);
+ return ans;
}
More information about the Dplr-commits
mailing list