[CHNOSZ-commits] r26 - in pkg/CHNOSZ: . R inst inst/doc inst/tests man vignettes

noreply at r-forge.r-project.org noreply at r-forge.r-project.org
Sat Oct 13 08:08:15 CEST 2012


Author: jedick
Date: 2012-10-13 08:08:14 +0200 (Sat, 13 Oct 2012)
New Revision: 26

Added:
   pkg/CHNOSZ/R/objective.R
   pkg/CHNOSZ/man/objective.Rd
Removed:
   pkg/CHNOSZ/R/util.stat.R
   pkg/CHNOSZ/man/util.stat.Rd
Modified:
   pkg/CHNOSZ/DESCRIPTION
   pkg/CHNOSZ/R/diagram.R
   pkg/CHNOSZ/R/equilibrate.R
   pkg/CHNOSZ/R/examples.R
   pkg/CHNOSZ/R/findit.R
   pkg/CHNOSZ/R/revisit.R
   pkg/CHNOSZ/R/util.expression.R
   pkg/CHNOSZ/R/util.misc.R
   pkg/CHNOSZ/inst/NEWS
   pkg/CHNOSZ/inst/doc/anintro.pdf
   pkg/CHNOSZ/inst/tests/test-equilibrate.R
   pkg/CHNOSZ/inst/tests/test-findit.R
   pkg/CHNOSZ/inst/tests/test-revisit.R
   pkg/CHNOSZ/man/CHNOSZ-package.Rd
   pkg/CHNOSZ/man/diagram.Rd
   pkg/CHNOSZ/man/equilibrate.Rd
   pkg/CHNOSZ/man/findit.Rd
   pkg/CHNOSZ/man/revisit.Rd
   pkg/CHNOSZ/man/util.expression.Rd
   pkg/CHNOSZ/man/util.misc.Rd
   pkg/CHNOSZ/vignettes/anintro.Rnw
   pkg/CHNOSZ/vignettes/anintro.lyx
   pkg/CHNOSZ/vignettes/flowchart.dia
   pkg/CHNOSZ/vignettes/flowchart.pdf
Log:
revised objective functions


Modified: pkg/CHNOSZ/DESCRIPTION
===================================================================
--- pkg/CHNOSZ/DESCRIPTION	2012-10-02 14:15:53 UTC (rev 25)
+++ pkg/CHNOSZ/DESCRIPTION	2012-10-13 06:08:14 UTC (rev 26)
@@ -1,6 +1,6 @@
-Date: 2012-10-02
+Date: 2012-10-13
 Package: CHNOSZ
-Version: 0.9.8
+Version: 0.9.8-1
 Title: Chemical Thermodynamics and Activity Diagrams
 Author: Jeffrey M. Dick
 Maintainer: Jeffrey M. Dick <jmdick at asu.edu>

Modified: pkg/CHNOSZ/R/diagram.R
===================================================================
--- pkg/CHNOSZ/R/diagram.R	2012-10-02 14:15:53 UTC (rev 25)
+++ pkg/CHNOSZ/R/diagram.R	2012-10-13 06:08:14 UTC (rev 26)
@@ -42,7 +42,7 @@
     if(!"loga.equil" %in% names(eout)) {
       eout.is.aout <- TRUE
       # get the balancing coefficients
-      n.balance <- balance.coeffs(eout, balance)
+      n.balance <- balance(eout, balance)$n
     }
   } else if(what %in% rownames(eout$basis)) {
     # to calculate the loga of basis species at equilibrium

Modified: pkg/CHNOSZ/R/equilibrate.R
===================================================================
--- pkg/CHNOSZ/R/equilibrate.R	2012-10-02 14:15:53 UTC (rev 25)
+++ pkg/CHNOSZ/R/equilibrate.R	2012-10-13 06:08:14 UTC (rev 26)
@@ -10,7 +10,8 @@
   ## number of possible species
   nspecies <- length(aout$values)
   ## get the balancing coefficients
-  n.balance <- balance.coeffs(aout, balance)
+  balance <- balance(aout, balance)
+  n.balance <- balance$n
   ## take selected species in 'ispecies'
   if(length(ispecies)==0) stop("the length of ispecies is zero")
   # take out species that have NA affinities
@@ -29,13 +30,13 @@
   if(length(n.balance) < 100) msgout(paste("equilibrate: balancing coefficients are", c2s(n.balance), "\n"))
   ## logarithm of total activity of the balance
   if(is.numeric(loga.balance)) 
-    msgout(paste("equilibrate: log total activity of", balance, "from argument is", loga.balance, "\n"))
+    msgout(paste("equilibrate: logarithm of total", balance$description, "(from loga.balance) is", loga.balance, "\n"))
   else {
     # sum up the activities, then take absolute value
     # in case n.balance is negative
     sumact <- abs(sum(10^aout$species$logact * n.balance))
     loga.balance <- log10(sumact)
-    msgout(paste("equilibrate: log total activity of", balance, "from species is", loga.balance, "\n"))
+    msgout(paste("equilibrate: logarithm of total", balance$description, "is", loga.balance, "\n"))
   }
   ## normalize -- normalize the molar formula by the balance coefficients
 #  # this is the default for systems of proteins as of 20091119
@@ -224,7 +225,7 @@
   return(logact)
 }
 
-balance.coeffs <- function(aout, balance=NULL) {
+balance <- function(aout, balance=NULL) {
   ## generate n.balance from user-given or automatically identified basis species
   ## extracted from equilibrate() 20120929
   # 'balance' can be:
@@ -250,14 +251,15 @@
     n.balance <- rep(balance, length.out=length(aout$values))
     if(all(n.balance==1)) msgout("balance: coefficients are unity\n")
     # use 'balance' below as a name
-    if(all(n.balance==1)) balance <- "species"
-    else balance <- "[user defined]"
+    if(all(n.balance==1)) balance.description <- "moles of species"
+    else balance <- "moles of user-specified coefficients"
   } else {
     # "length" for balancing on protein length
     if(identical(balance, "length")) {
       if(!all(isprotein)) stop("length was the requested balance, but some species are not proteins")
       n.balance <- protein.length(aout$species$name)
-      msgout("balance: coefficients are protein length\n")
+      balance.description <- "protein length"
+      msgout(paste("balance: coefficients are", balance.description, "\n"))
     } else {
       # is the balance the name of a basis species?
       if(length(ibalance)==0) {
@@ -266,12 +268,13 @@
       }
       # the name of the basis species (need this if we got ibalance which which.balance, above)
       balance <- colnames(aout$species)[ibalance[1]]
-      msgout(paste("balance: coefficients are moles of", balance, "in formation reactions\n"))
+      balance.description <- paste("moles of", balance)
+      msgout(paste("balance: coefficients are", balance.description, "in formation reactions\n"))
       # the balance vector
       n.balance <- aout$species[, ibalance[1]]
       # we check if that all formation reactions contain this basis species
       if(any(n.balance==0)) stop("some species have no ", balance, " in the formation reaction")
     }
   }
-  return(n.balance)
+  return(list(n=n.balance, description=balance.description))
 }

Modified: pkg/CHNOSZ/R/examples.R
===================================================================
--- pkg/CHNOSZ/R/examples.R	2012-10-02 14:15:53 UTC (rev 25)
+++ pkg/CHNOSZ/R/examples.R	2012-10-13 06:08:14 UTC (rev 26)
@@ -9,10 +9,10 @@
     "util.args", "util.array", "util.blast", "util.character", 
     "util.data", "util.expression", "util.fasta", "util.formula", "util.matrix", 
     "util.misc", "util.program",
-    "util.seq", "util.stat", "util.units", "taxonomy", "info", "protein.info", "hkf", "water", "subcrt",
+    "util.seq", "util.units", "taxonomy", "info", "protein.info", "hkf", "water", "subcrt",
     "makeup", "basis", "swap.basis", "species", "affinity", "util.affinity", "equil.boltzmann", 
-    "diagram", "buffer", "iprotein", "protein", "ionize.aa", "more.aa", "read.expr", "revisit", 
-    "findit", "transfer", "anim", "EOSregress", "wjd")
+    "diagram", "buffer", "iprotein", "protein", "ionize.aa", "more.aa", "read.expr",
+    "objective", "revisit", "findit", "transfer", "anim", "EOSregress", "wjd")
   plot.it <- FALSE
   if(is.character(do.png))
     png(paste(do.png,"%d.png",sep=""),width=700,height=700,pointsize=18)

Modified: pkg/CHNOSZ/R/findit.R
===================================================================
--- pkg/CHNOSZ/R/findit.R	2012-10-02 14:15:53 UTC (rev 25)
+++ pkg/CHNOSZ/R/findit.R	2012-10-13 06:08:14 UTC (rev 26)
@@ -1,14 +1,14 @@
 # CHNOSZ/findit.R
-# find the minimum or maximum of a target 
+# find the minimum or maximum of a objective 
 # (eg cv, richness, shannon)
 # as a function the specified chemical potentials
 # 20100831 jmd
 
-findit <- function(lims=list(), target="cv", niter=NULL, iprotein=NULL, plot.it=TRUE,
-  T=25, P="Psat", res=NULL, labcex=0.6, loga.ref=NULL,
+findit <- function(lims=list(), objective="CV", niter=NULL, iprotein=NULL, plot.it=TRUE,
+  T=25, P="Psat", res=NULL, labcex=0.6, loga2=NULL,
   loga.balance=0, rat=NULL, balance=NULL) {
   # the lims list has the limits of the arguments to affinity()
-  # we iteratively move toward a higher/lower value of the target
+  # we iteratively move toward a higher/lower value of the objective
   # within these limits
   
   # fun stuff: when running on either side of 100 deg C,
@@ -117,19 +117,13 @@
 
     # now calculate the affinities
     a <- do.call(affinity,aargs)
-    # then calculate the values of the target function
+    # then calculate the values of the objective function
     e <- equilibrate(a, balance=balance, loga.balance=loga.balance)
-    dd <- revisit(e$loga.equil, target, loga.ref=loga.ref)$H
-    # find the extreme value
-    iext <- where.extreme(dd,target)
-    # find the extreme value
-    teststat <- c(teststat,dd[iext])
-    # what are its coordinates
-    dd1 <- dd
-    dd1[] <- 1
-    ai <- which(dd1==1,arr.ind=TRUE)
-    if(nd==1) ai <- ai[iext]
-    else ai <- ai[iext,]
+    dd <- revisit(e$loga.equil, objective, loga2=loga2)$H
+    # coordinates of the extreme value (take only the first set of coords)
+    iopt <- optimal.index(dd, objective)[1,, drop=FALSE]
+    # the extreme value
+    teststat <- c(teststat,dd[iopt])
 
     # loop to update the current parameters
     for(j in 1:length(lims)) {
@@ -139,7 +133,7 @@
       # the increments used
       myinc <- seq(mylims[1],mylims[2],length.out=mylims[3])
       # the value of the variable at the extreme of the function
-      myval <- myinc[ai[j]]
+      myval <- myinc[iopt[j]]
       # update the basis table, T or P
       if(names(lims)[j] %in% rownames(basis)) {
         ibasis <- match(names(lims)[j],rownames(basis))
@@ -162,20 +156,20 @@
       # add our search lines and extreme points
       # to the plot
       if(nd==1) {
-        if(i==1) revisit(e,target,loga.ref,xlim=lims[[1]])
+        if(i==1) revisit(e,objective,loga2,xlim=lims[[1]])
         # on a 1-D diagram we add vertical lines to show our search limits
         abline(v=outlims[[1]][1:2])
         lines(myinc,dd)
-        points(myval,dd[iext])
+        points(myval,dd[iopt])
       } else if(nd==2) {
-        if(i==1) revisit(e,target,loga.ref,xlim=lims[[1]],ylim=lims[[2]],labcex=labcex)
+        if(i==1) revisit(e,objective,loga2,xlim=lims[[1]],ylim=lims[[2]],labcex=labcex)
         else {
           # on a 2-D diagram we add a box around our search limits
           # and an updated map for this region
           ol1 <- outlims[[1]]
           ol2 <- outlims[[2]]
           rect(ol1[1],ol2[1],ol1[2],ol2[2],border=par("fg"),col="white")
-          revisit(e,target,loga.ref,xlim=lims[[1]],ylim=lims[[2]],add=TRUE,labcex=labcex)
+          revisit(e,objective,loga2,xlim=lims[[1]],ylim=lims[[2]],add=TRUE,labcex=labcex)
         }
         text(out[[1]],out[[2]])
         points(out[[1]],out[[2]],cex=2)
@@ -193,10 +187,10 @@
         # we extract the third dimension until only two remain
         for(j in 3:nd) {
           # ai[j] - which slice in this dimension has the extremum
-          for(k in 1:length(e$loga.equil)) e$loga.equil[[k]] <- slice(e$loga.equil[[k]],3,ai[j])
+          for(k in 1:length(e$loga.equil)) e$loga.equil[[k]] <- slice(e$loga.equil[[k]],3,iopt[j])
         }
         # now make the plot
-        revisit(e,target,loga.ref,xlim=lims[[1]],ylim=lims[[2]],add=add,labcex=labcex)
+        revisit(e,objective,loga2,xlim=lims[[1]],ylim=lims[[2]],add=add,labcex=labcex)
         # indicate the location of the extremum
         text(out[[1]],out[[2]])
         points(out[[1]],out[[2]],cex=2)
@@ -211,7 +205,7 @@
   }  # end main loop
   # build return list: values of chemical variables and test statistic
   teststat <- list(teststat)
-  names(teststat) <- target
+  names(teststat) <- objective
   value <- c(out,teststat)
   # build return list: limits at each step
   out <- list(value=value,lolim=lolim,hilim=hilim)

Added: pkg/CHNOSZ/R/objective.R
===================================================================
--- pkg/CHNOSZ/R/objective.R	                        (rev 0)
+++ pkg/CHNOSZ/R/objective.R	2012-10-13 06:08:14 UTC (rev 26)
@@ -0,0 +1,181 @@
+# CHNOSZ/objective.R
+# objective functions for revisit(), findit()
+# all objective functions in one file, added attributes  20121009 jmd 
+
+get.objfun <- function(objective) {
+  # get the function with the name given in 'objective'
+  objfun <- get(objective)
+  # perform a check on its usability
+  if(!"optimum" %in% names(attributes(objfun)))
+    stop(paste(objective, "is not a function with an attribute named 'optimum'"))
+  # return the function
+  return(objfun)
+}
+
+# input: a1 or loga1 is matrix with a column for each species and a row for each condition
+# output: vector with length equal to number of rows of a1 or loga1
+# attributes:
+# optimum - the target for optimization (minimal or maximal)
+## absolute - is the absolute value of the function used in the calculation? (TRUE or FALSE)
+
+SD <- structure(
+  function(a1) {
+    # calculate standard deviation
+    SD <- apply(a1, 1, sd)
+    return(SD)
+  },
+  optimum="minimal"
+)
+
+CV <- structure(
+  function(a1) {
+    # get the coefficient of variation
+    SD <- SD(a1)
+    CV <- SD / rowMeans(a1)
+    return(CV)
+  },
+  optimum="minimal"
+)
+
+shannon <- structure(
+  function(a1) {
+    # shannon entropy
+    p <- a1/rowSums(a1)
+    H <- rowSums(-p*log(p))
+    return(H)
+  },
+  optimum="maximal"
+)
+
+DGmix <- structure(
+  function(loga1) {
+    # Gibbs energy/2.303RT of ideal mixing
+    a1 <- 10^loga1
+    DGmix <- rowSums(a1*loga1)
+    return(DGmix)
+  },
+  optimum="minimal"
+)
+
+qqr <- structure(
+  function(loga1) {
+    # correlation coefficient of a q-q plot (for log-normality comparisons) 20100901
+    # first, a function to calculate qqr
+    qqrfun <- function(x) {
+      # this is to catch errors from qqnorm
+      qqn <- try(qqnorm(x, plot.it=FALSE), silent=TRUE)
+      if(class(qqn)=="try-error") qqr <- NA
+      else qqr <- cor(qqn[[1]], qqn[[2]])
+    }
+    # apply the function to the rows of loga1
+    qqr <- apply(loga1, 1, qqrfun)
+    return(qqr)
+  },
+  optimum="maximal"
+)
+
+logact <- structure(
+  function(loga1, loga2) {
+    # the logarithm of activity of the species identified in loga2 (species number)
+    return(loga1[, loga2[1]])
+  },
+  optimum="maximal"
+)
+
+spearman <- structure(
+  function(loga1, loga2) {
+    # Spearman's rho (rank correlation coefficient) 20100912
+    spearfun <- function(a, b) {
+      # based on help(dSpearman) in package SuppDists
+      if(length(a)!=length(b)) stop("a and b must be same length")
+      if(any(is.na(a)) | any(is.na(b))) return(NA)
+      ra <- rank(a)
+      rb <- rank(b)
+      dr <- rb - ra
+      d <- sum(dr^2)
+      r <- length(a)
+      x <- 1-6*d/(r*(r^2-1))
+      return(x)
+    }
+    rho <- apply(loga1, 1, spearfun, b=loga2)
+    return(rho)
+  },
+  optimum="maximal"
+)
+
+pearson <- structure(
+  function(loga1, loga2) {
+    # pearson correlation coefficient
+    H <- apply(loga1, 1, cor, y=loga2)
+    return(H)
+  },
+  optimum="maximal"
+)
+
+RMSD <- structure(
+  function(loga1, loga2) {
+    rmsd <- function(a, b) {
+      # to calculate the root mean square deviation
+      d <- b - a
+      sd <- d^2
+      msd <- mean(sd)
+      rmsd <- sqrt(msd)
+      return(rmsd)
+    }
+    RMSD <- apply(loga1, 1, rmsd, b=loga2)
+    return(RMSD)
+  },
+  optimum="minimal"
+)
+
+CVRMSD <- structure(
+  function(loga1, loga2) {
+    # coefficient of variation of the root mean squared deviation
+    RMSD <- RMSD(loga1, loga2)
+    CVRMSD <- RMSD/rowMeans(loga1)
+    return(CVRMSD)
+  },
+  optimum="minimal"
+)
+
+DDGmix <- structure(
+  function(loga1, loga2) {
+    # difference in Gibbs energy/2.303RT of ideal mixing
+    a2 <- 10^loga2
+    DGmix2 <- sum(a2*loga2)
+    DGmix1 <- DGmix(loga1)
+    DDGmix <- DGmix2 - DGmix1
+    return(DDGmix)
+  },
+  optimum="minimal"
+)
+
+DGtr <- structure(
+  function(loga1, loga2, Astar) {
+    dgtr <- function(loga1, loga2, Astar) {
+      # calculate the Gibbs energy/2.303RT of transformation
+      # from an initial to a final assemblage at constant T, P and 
+      # chemical potentials of basis species 20120917
+      # loga1 - logarithms of activity of species in the initial assemblage
+      # loga2 - logarithms of activity of species in the final assemblage
+      # Astar - starved (of activity of the species of interest) values of chemical affinity/2.303RT
+      # remove the logarithms
+      a1 <- 10^loga1
+      a2 <- 10^loga2
+      # the molal Gibbs energy in the initial and final states
+      # (derived from integrating A = Astar - RTln(a) from a1 to a2)
+      G1 <- -a1*Astar + a1*loga1 - a1/log(10)
+      G2 <- -a2*Astar + a2*loga2 - a2/log(10)
+      # calculate the change in molal Gibbs energy for each species
+      DG <- G2 - G1
+      # return the sum
+      return(sum(DG))
+    }
+    # we need to index both loga1 and Astar
+    DGtr <- unlist(palply(seq(nrow(loga1)), function(i) {
+      dgtr(loga1[i, ], loga2, Astar[i, ])
+    }))
+    return(DGtr)
+  },
+  optimum="minimal"
+)

Modified: pkg/CHNOSZ/R/revisit.R
===================================================================
--- pkg/CHNOSZ/R/revisit.R	2012-10-02 14:15:53 UTC (rev 25)
+++ pkg/CHNOSZ/R/revisit.R	2012-10-13 06:08:14 UTC (rev 26)
@@ -2,56 +2,39 @@
 # 20090415 functions related to diversity calculations
 # 20100929 merged draw.diversity and revisit
 
-where.extreme <- function(z, target, do.sat=FALSE) {
-  if(missing(target)) stop("no target specified")
-  # are we interested in a maximum or minimum?
-  if(tolower(target) %in% c("sd", "sd.log", "cv", "cv.log", "rmsd", "cvrmsd", "dgxf"))
-    myext <- "minimum" else myext <- "maximum"
-  # do we care about the sign of the index?
-  if(tolower(target) %in% c("sd", "sd.log", "cv", "cv.log", "rmsd", "cvrmsd")) 
-    doabs <- TRUE else doabs <- FALSE
-  # takes a matrix, returns the x,y coordinates of the extremum
-  if(doabs) z <- abs(z)
-  if(myext=="minimum") iext <- which.min(z)
-  else iext <- which.max(z)
-  ret.val <- iext
-  # for a matrix, it gets more complicated esp.
-  # if there are multiple instances of the extremum
-  # (happens with saturated richnesses)
-  if(do.sat & length(dim(z))==2) {
-    iext <- which(z==z[iext])
-    x.out <- y.out <- numeric()
-    xres <- ncol(z)
-    yres <- nrow(z)
-    for(i in 1:length(iext)) {
-      # column (x coord)
-      x <- ceiling(iext[i]/xres)
-      # and row (y coord)
-      y <- iext[i] - floor(iext[i]/yres)*yres
-      if(y==0) y <- yres  # there's a more eloquent way...
-      x.out <- c(x.out,x)
-      y.out <- c(y.out,y)
-    }
-    ret.val <- list(ix=y.out,iy=x.out)
-  }
+optimal.index <- function(z, objective) {
+  # for a vector, returns the index of the optimum value
+  # for a matrix, returns the x, y coordinates of the optimum
+  # find the minimum or maximum? look at attribute of objfun
+  objfun <- get.objfun(objective)
+  optimum <- attributes(objfun)$optimum
+#  # do we care about the sign of the index?
+#  if(tolower(target) %in% c("sd", "sd.log", "cv", "cv.log", "rmsd", "cvrmsd")) 
+#    doabs <- TRUE else doabs <- FALSE
+#  if(doabs) z <- abs(z)
+  # the value of the optimum
+  if(optimum=="minimal") optval <- z[which.min(z)]
+  else optval <- z[which.max(z)]
+  # the index of the optimum
+  # (or indices if multiple instances of the optimum)
+  ret.val <- which(z==optval, arr.ind=TRUE)
   return(ret.val)
 }
 
-extremes <- function(z, target) {
-  if(missing(target)) stop("no target specified")
+extremes <- function(z, objective) {
   # are we interested in a maximum or minimum?
-  if(tolower(target) %in% c("sd", "sd.log", "cv", "cv.log", "rmsd", "cvrmsd", "dgxf")) 
-    myext <- "minimum" else myext <- "maximum"
-  # do we care about the sign of the index?
-  if(tolower(target) %in% c("sd", "sd.log", "cv", "cv.log", "rmsd", "cvrmsd")) 
-    doabs <- TRUE else doabs <- FALSE
+  objfun <- get.objfun(objective)
+  optimum <- attributes(objfun)$optimum
+#  # do we care about the sign of the index?
+#  if(tolower(target) %in% c("sd", "sd.log", "cv", "cv.log", "rmsd", "cvrmsd")) 
+#    doabs <- TRUE else doabs <- FALSE
+#  if(doabs) z <- abs(z)
   # takes a matrix, returns the y as f(x) and x as f(y)
-  # trajectories of the extreme
-  if(doabs) z <- abs(z)
+  # trajectories of the optimum
   y <- x <- numeric()
   xres <- ncol(z)
   yres <- nrow(z)
-  if(myext=="minimum") {
+  if(optimum=="minimal") {
     for(i in 1:xres) y <- c(y, which.min(z[i,]))
     for(i in 1:yres) x <- c(x, which.min(z[,i]))
   } else {
@@ -61,336 +44,200 @@
   return(list(x=x, y=y))
 }
 
-revisit <- function(eout, target="cv", loga.ref=NULL,
-  plot.it=NULL, col=par("fg"), yline=2, ylim=NULL, ispecies=NULL, add=FALSE,
-  cex=par("cex"), lwd=par("lwd"), mar=NULL, side=1:4, xlim=NULL, labcex=0.6,
-  pch=1, legend="", legend.x=NULL, lpch=NULL, main=NULL, lograt.ref=NULL, plot.ext=TRUE, DGxf.swap12=FALSE) {
-  # calculate and plot diversity indices of relative abundances
-  # 20090316 jmd
+revisit <- function(eout, objective = "CV", loga2 = NULL, ispecies = NULL,
+  col = par("fg"), yline = 2, ylim = NULL, cex = par("cex"),
+  lwd = par("lwd"), mar = NULL, side = 1:4, xlim = NULL, labcex = 0.6,
+  pch = 1, main = NULL, plot.it = NULL, add = FALSE, plot.optval = TRUE,
+  style.2D = "contour") {
+  # given a list of logarithms of activities of species
+  # (as vectors or matrices or higher dimensional arrays) 
+  # calculate a diversity index or thermodynamic property 
+  # with the same dimensions   20090316 jmd v1
   # eout can be the output from equilibrate (enables plotting)
   # or simply a list of logarithms of activity 
   # (each list entry must have the same dimensions)
-  # test if the entries have the same dimensions
-  ud <- unique(lapply(1:length(eout),function(x) dim(eout[[x]])))
+
+  # if the entries have the same lengths they are assumed to be logarithms of activity
+  ud <- unique(sapply(eout, length))
   if(length(ud)==1) {
     # eout is list of logarithms of activity
     if(missing(plot.it)) plot.it <- FALSE
-    if(plot.it) stop("can't make a plot if argument 'eout' is not the output from equilibrate()")
-    logact <- eout
+    if(plot.it) stop("can't make a plot if 'eout' is not the output from equilibrate()")
+    loga1 <- eout
+    eout.is.eout <- FALSE
   } else {
-     # d is the output from diagram()
-    if(!"loga.equil" %in% names(eout)) {
+    # test if eout is the output from equilibrate()
+    if(!"loga.equil" %in% names(eout))
       stop(paste("the list provided in 'eout' is not the output from equilibrate()"))
-    }
     if(missing(plot.it)) plot.it <- TRUE
-    logact <- eout$loga.equil
+    loga1 <- eout$loga.equil
+    eout.is.eout <- TRUE
   }
-  # check that all needed arguments are present
-  target.lower <- tolower(target)
-  if(target.lower %in% c("richness", "cvrmsd", "spearman", "pearson", "logact", "dgxf") & is.null(loga.ref))
-    stop(paste("for '", target, "' target, loga.ref must be supplied", sep=""))
+
   # take a subset (or all) of the species
-  if(is.null(ispecies)) ispecies <- 1:length(logact)
-  logact <- logact[ispecies]
-  # number of species
-  ns <- length(logact)
+  if(is.null(ispecies)) ispecies <- 1:length(loga1)
+  loga1 <- loga1[ispecies]
   # the dimensions 
-  mydim <- dim(logact[[1]])
-  nd <- length(mydim)
-  if(nd==1) if(mydim==1) nd <- 0
-  msgout(paste("revisit: calculating", target, "in", nd, "dimensions\n"))
+  dim1 <- dim(as.array(loga1[[1]]))
+  # the number of dimensions
+  nd <- ifelse(identical(dim1, 1L), 0, length(dim1))
+  msgout(paste("revisit: calculating", objective, "in", nd, "dimensions\n"))
 
-  ## on to diversity calculations
-  # given a list of logarithms of activities of species
-  # (as vectors or matrices or higher dimensional arrays) 
-  # calculate a diversity index of the same dimensions
-  # these targets only depend on the logarithms of activities from diagram() (logact):
-  # "shannon" shannon entropy
-  # "sd" standard deviation
-  # "cv" coefficient of variation
-  # "sd.log", "cv.log" SD/CV for the logarithms of activity
-  # "qqr" correlation coefficient on q-q plot (i.e., normality test)
-  # these targets also depend on reference logarithms of activities (loga.ref):
-  # "richness" species richness
-  # "cvrmsd" coefficient of variation of rmsd
-  # "spearman" spearman correlation coefficient
-  # "pearson" pearson correlation coefficient
-  # "logact" maximize the activity of a species
-  # "DGxf" minimize the Gibbs energy of transformation
-  
-  # vectorize the entries in the logact list
-  for(i in 1:ns) {
-    logact[[i]] <- as.vector(logact[[i]])
+  # get the objective function
+  objfun <- get.objfun(objective)
+  # the arguments to the function (a1, [a2]) or (loga1, [loga2], [Astar])
+  objargs <- names(formals(objfun))
+
+  # these objectives only depend on the activities (a1/loga1):
+  # shannon, SD, CV, QQR
+  if(any(grepl("a1", objargs))) {
+    # vectorize the list entries: a1/loga1
+    loga1 <- sapply(loga1, c)
+    # for 0-D case we need to keep loga1 as a 1-row matrix (sapply simplifies to vector)
+    if(nd==0) loga1 <- t(loga1)
     # convert infinite values to NA
-    logact[[i]][is.infinite(logact[[i]])] <- NA
+    loga1[is.infinite(loga1)] <- NA
+    # remove logarithms if necessary
+    if(any(grepl("loga1", objargs))) a1 <- loga1
+    else a1 <- 10^loga1
   }
-  # make a place for the results
-  H <- logact[[1]]
-  H[] <- 0
-  # ratio-specific calculations
-  if(!is.null(lograt.ref)) {
-    if(!target.lower %in% c("rmsd", "cvrmsd", "spearman", "pearson"))
-      stop(paste("target",target,"not available when comparing activity ratios"))
-    else(msgout("revisit: calculating activity ratios\n"))
-    # instead of logact we use calculated log activity ratio
-    logact <- lograt(loga.ref, logact)
-    loga.ref <- lograt.ref
+
+  # these objectives also depend on reference activities (a2/loga2):
+  # richness, RMSD, CVRMSD, spearman, pearson, DGtr
+  if(any(grepl("a2", objargs))) {
+    # check that all needed arguments are present
+    if(is.null(loga2)) stop(paste("loga2 must be supplied for", objective))
+    # if loga2 is a single value, expand it into the dimensions of loga1
+    if(length(loga2)==1) loga2 <- rep(loga2, length.out=ncol(loga1))
+    # check that loga2 has the same length as loga1
+    if(!identical(ncol(loga1), length(loga2))) stop(paste("loga2 has different length (", 
+      length(loga2), ") than list in eout (", ncol(loga1), ")", sep=""))
+    # remove logarithms if necessary
+    if(any(grepl("loga2", objargs))) a2 <- loga2
+    else a2 <- 10^loga2
   }
-  # target-specific calculations
 
-  if(target.lower %in% c("sd", "cv")) {
-    # build a matrix; rows are species
-    myad <- t(list2array(logact))
-    # remove logarithms
-    myad <- 10^myad
-    # get the standard deviations
-    H <- palply(1:ncol(myad), function(i) sd(myad[,i]))
-    H <- as.numeric(H)
-    # for coefficient of variation, divide by the mean
-    if(target.lower=="cv") H <- H / colMeans(myad)
+  # construct array of values: Astar (for DGtr)
+  if(any(grepl("Astar", objargs))) {
+    Astar <- eout$Astar[ispecies]
+    Astar <- sapply(Astar, as.vector)
+  }
 
-  } else if(target.lower %in% c("sd.log", "cv.log")) {
-    # build a matrix; rows are species
-    myad <- t(list2array(logact))
-    # get the standard deviations
-    H <- palply(1:ncol(myad), function(i) sd(myad[,i]))
-    H <- as.numeric(H)
-    # for coefficient of variation, divide by the mean
-    if(target.lower=="cv.log") H <- H / colMeans(myad)
+  # calculation of the objective function
+  # "H" is a remnant of the first target, shannon entropy
+  if(length(objargs) == 1) H <- objfun(a1)
+  else if(length(objargs) == 2) H <- objfun(a1, a2)
+  else if(length(objargs) == 3) H <- objfun(a1, a2, Astar)
 
-  } else if(target.lower=="richness") {
-    # given a list of logarithms of activities of species
-    # (as matrices or vectors) calculate the richness 
-    # make a place for the results
-    # loop over species
-    for(j in 1:length(logact)) {
-      isthere <- logact[[j]] > loga.ref
-      H[isthere] <- H[isthere] + 1
-    }
-
-  } else if(target.lower=="shannon") {
-    # for this calculation we need to loop twice;
-    # first to convert logact to act and get acttotal
-    act <- logact
-    for(i in 1:ns) {
-      # exponentiate the logarithmic values
-      act[[i]] <- 10^logact[[i]]
-      if(i==1) acttotal <- act[[i]] else acttotal <- acttotal + act[[i]]
-    }
-    # now do the calculation
-    for(i in 1:ns) {
-      dH <- -act[[i]]/acttotal*log(act[[i]]/acttotal)
-      if(!any(is.na(dH))) H <- H + dH
-      else(warning(paste("revisit: skipping species", i, "which gives NA")))
-    }
-
-  } else if(target.lower=="qqr") {
-    # normality test using correlation coefficient of a q-q plot 20100901
-    actarr <- list2array(logact)
-    qqrfun <- function(i, actarr) {
-      y <- actarr[i,]
-      # this is to catch errors from qqr (qqnorm)
-      out <- try(qqr(y), silent=TRUE)
-      if(class(out)=="try-error") out <- NA
-      return(out)
-    }
-    H <- as.numeric(palply(1:length(H), qqrfun, actarr))
-
-  } else if(target.lower=="spearman") {
-    # spearman rank correlation coefficient 20100912
-    actarr <- list2array(logact)
-    spearfun <- function(i, actarr) {
-      y <- actarr[i,]
-      out <- spearman(y, loga.ref)
-      return(out)
-    }
-    H <- as.numeric(palply(1:length(H), spearfun, actarr))
-
-  } else if(target.lower=="pearson") {
-    # pearson correlation coefficient
-    actarr <- list2array(logact)
-    pearfun <- function(i, actarr) {
-      y <- actarr[i, ]
-      out <- cor(y, loga.ref)
-      return(out)
-    }
-    H <- as.numeric(palply(1:length(H), pearfun, actarr))
-
-  } else if(target.lower=="rmsd") {
-    # root mean squared deviation
-    actarr <- list2array(logact)
-    rmsdfun <- function(i, actarr) {
-      y <- actarr[i, ]
-      out <- rmsd(loga.ref, y)
-      return(out)
-    }
-    H <- as.numeric(palply(1:length(H), rmsdfun, actarr))
-
-  } else if(target.lower=="cvrmsd") {
-    # coefficient of variation of the root mean squared deviation
-    actarr <- list2array(logact)
-    cvrmsdfun <- function(i, actarr) {
-      y <- actarr[i, ]
-      out <- cvrmsd(loga.ref, y)
-      return(out)
-    }
-    H <- as.numeric(palply(1:length(H), cvrmsdfun, actarr))
-
-  } else if(target.lower=="logact") {
-    # where the activity of a species is maximal
-    H <- logact[[loga.ref]]
-
-  } else if(target.lower=="dgxf") {
-    # Gibbs energy of transformation to the observed assemblage 
-    actarr <- list2array(logact)
-    # select species, vectorize, then put the Astar values into an array
-    Astar <- eout$Astar[ispecies]
-    for(i in 1:ns) Astar[[i]] <- as.vector(Astar[[i]])
-    Astararr <- list2array(Astar)
-    Gfun <- function(i, actarr, Astararr) {
-      loga.equil <- actarr[i, ]
-      Astar <- Astararr[i, ]
-      # direction of transformation:
-      # swap12=FALSE: loga.equil(Astar) --> loga.ref
-      # swap12=TRUE:  loga.ref(Astar) --> loga.equil
-      if(DGxf.swap12) out <- -DGxf(loga.ref, loga.equil, Astar)
-      else out <- DGxf(loga.equil, loga.ref, Astar)
-      return(out)
-    }
-    H <- as.numeric(palply(1:length(H), Gfun, actarr, Astararr))
-  
-  } else stop(paste("specified target '", target, "' not available", sep=""))
   # replace dims
-  dim(H) <- mydim
+  dim(H) <- dim1
+
   ## now on to plotting + assembling return values
+  # for zero or more than two dimensions we'll just return the values
+  # of the objective function and the index of the optimal value
+  iopt <- optimal.index(H, objective)
+  ret.val <- list(H=H, iopt=iopt)
   # get information about the x-axis
-  if(plot.it & nd > 0 & nd < 3) {
+  if(eout.is.eout & nd > 0) {
     xname <- eout$vars[1]
-    yname <- eout$vars[2]
-    xres <- length(eout$vals[[1]])
-    xrange <- range(eout$vals[[1]])
-    # special operations for pH
-    if(xname=="H+") {
-      xname <- "pH"
-      xrange <- -xrange 
-    }
     # the x-values
-    xs <- seq(xrange[1],xrange[2],length.out=xres)
-  }
-  # make plots and return values
+    xs <- eout$vals[[1]]
+    xrange <- range(xs)
+  } else xs <- NA
+
+  # make plots and assemble return values
   if(nd==0) {
     # a 0-D plot
     if(plot.it) {
-      plotted <- FALSE
-      if(target.lower=="qqr") {
-        actarr <- list2array(logact)
-        qqnorm(actarr,col=col,pch=pch,main="")
-        qqline(actarr)
-        plotted <- TRUE
-      } else if(target.lower %in% c("rmsd","cvrmsd","spearman","pearson")) {
-        # plot the points
-        ylab <- "loga.calc"
-        xlab <- "loga.ref"
-        if(!is.null(lograt.ref)) {
-          ylab <- "lograt.calc"
-          xlab <- "lograt.ref"
-        }
-        plot(loga.ref,actarr,xlab=xlab,ylab=ylab,pch=pch,col=col)
+      if(objective=="qqr") {
+        # make a q-q plot for qqr
+        qqnorm(loga1, col=col, pch=pch)
+        qqline(loga1)
+      } else if(any(grepl("a2", objargs))) {
+        # plot the points for a referenced objective
+        ylab <- "loga1"
+        xlab <- "loga2"
+        plot(loga2, loga1, xlab=xlab, ylab=ylab, pch=pch, col=col)
         # add a 1:1 line
-        lines(range(loga.ref),range(loga.ref),col="grey")
+        lines(range(loga2), range(loga2), col="grey")
         # add a lowess line
-        ls <- loess.smooth(loga.ref,actarr)
-        lines(ls$x,ls$y,col="red")
-        plotted <- TRUE
-      }
-      if(plotted) {
-        # add a title
-        if(missing(main)) main <- paste(target,"=",round(H,3)) 
-        title(main=main)
-        if(!is.null(legend.x)) {
-          if(is.null(lpch)) lpch <- unique(pch)
-          legend(legend.x,legend=legend,pch=lpch)
-        }
-      }
+        ls <- loess.smooth(loga2, loga1)
+        lines(ls$x, ls$y, col="red")
+      } else plot.it <- FALSE
+      # add a title
+      if(missing(main)) main <- paste(objective, "=", round(H,3)) 
+      if(plot.it) title(main=main)
     }
-    ret.val <- list(H=H)
-
   } else if(nd==1) {
-    # locate the extremum
-    ix <- where.extreme(H,target)
-    extval <- H[ix]
+    # locate the optimal value
+    ixopt <- c(iopt)
+    xopt <- xs[ixopt]
+    optimum <- H[ixopt]
+    ret.val <- list(H=H, ixopt=ixopt, xopt=xopt, optimum=optimum)
     # a 1-D plot
     if(plot.it) {
-      if(is.null(ylim)) {
-        if(target.lower=="richness") {
-            ylim <- 0
-            if(max(H) > ylim) ylim <- max(H) + 1
-            ylim <- c(0,ylim)
-        }
-        else ylim <- extendrange(H,f=0.075)
-      }
+      if(is.null(ylim)) ylim <- extendrange(H, f=0.075)
       if(is.null(xlim)) xlim <- xrange
-      # format the target name if it's DGxf
-      if(target.lower=="dgxf") ylab <- expr.property("DGxf/2.303RT")
-      else ylab <- target
-      if(!add) thermo.plot.new(xlim=xlim,ylim=ylim,xlab=axis.label(xname),ylab=ylab,yline=yline,
-        cex=cex,lwd=lwd,mar=mar,side=side)
[TRUNCATED]

To get the complete diff run:
    svnlook diff /svnroot/chnosz -r 26


More information about the CHNOSZ-commits mailing list