[CHNOSZ-commits] r26 - in pkg/CHNOSZ: . R inst inst/doc inst/tests man vignettes
noreply at r-forge.r-project.org
noreply at r-forge.r-project.org
Sat Oct 13 08:08:15 CEST 2012
Author: jedick
Date: 2012-10-13 08:08:14 +0200 (Sat, 13 Oct 2012)
New Revision: 26
Added:
pkg/CHNOSZ/R/objective.R
pkg/CHNOSZ/man/objective.Rd
Removed:
pkg/CHNOSZ/R/util.stat.R
pkg/CHNOSZ/man/util.stat.Rd
Modified:
pkg/CHNOSZ/DESCRIPTION
pkg/CHNOSZ/R/diagram.R
pkg/CHNOSZ/R/equilibrate.R
pkg/CHNOSZ/R/examples.R
pkg/CHNOSZ/R/findit.R
pkg/CHNOSZ/R/revisit.R
pkg/CHNOSZ/R/util.expression.R
pkg/CHNOSZ/R/util.misc.R
pkg/CHNOSZ/inst/NEWS
pkg/CHNOSZ/inst/doc/anintro.pdf
pkg/CHNOSZ/inst/tests/test-equilibrate.R
pkg/CHNOSZ/inst/tests/test-findit.R
pkg/CHNOSZ/inst/tests/test-revisit.R
pkg/CHNOSZ/man/CHNOSZ-package.Rd
pkg/CHNOSZ/man/diagram.Rd
pkg/CHNOSZ/man/equilibrate.Rd
pkg/CHNOSZ/man/findit.Rd
pkg/CHNOSZ/man/revisit.Rd
pkg/CHNOSZ/man/util.expression.Rd
pkg/CHNOSZ/man/util.misc.Rd
pkg/CHNOSZ/vignettes/anintro.Rnw
pkg/CHNOSZ/vignettes/anintro.lyx
pkg/CHNOSZ/vignettes/flowchart.dia
pkg/CHNOSZ/vignettes/flowchart.pdf
Log:
revised objective functions
Modified: pkg/CHNOSZ/DESCRIPTION
===================================================================
--- pkg/CHNOSZ/DESCRIPTION 2012-10-02 14:15:53 UTC (rev 25)
+++ pkg/CHNOSZ/DESCRIPTION 2012-10-13 06:08:14 UTC (rev 26)
@@ -1,6 +1,6 @@
-Date: 2012-10-02
+Date: 2012-10-13
Package: CHNOSZ
-Version: 0.9.8
+Version: 0.9.8-1
Title: Chemical Thermodynamics and Activity Diagrams
Author: Jeffrey M. Dick
Maintainer: Jeffrey M. Dick <jmdick at asu.edu>
Modified: pkg/CHNOSZ/R/diagram.R
===================================================================
--- pkg/CHNOSZ/R/diagram.R 2012-10-02 14:15:53 UTC (rev 25)
+++ pkg/CHNOSZ/R/diagram.R 2012-10-13 06:08:14 UTC (rev 26)
@@ -42,7 +42,7 @@
if(!"loga.equil" %in% names(eout)) {
eout.is.aout <- TRUE
# get the balancing coefficients
- n.balance <- balance.coeffs(eout, balance)
+ n.balance <- balance(eout, balance)$n
}
} else if(what %in% rownames(eout$basis)) {
# to calculate the loga of basis species at equilibrium
Modified: pkg/CHNOSZ/R/equilibrate.R
===================================================================
--- pkg/CHNOSZ/R/equilibrate.R 2012-10-02 14:15:53 UTC (rev 25)
+++ pkg/CHNOSZ/R/equilibrate.R 2012-10-13 06:08:14 UTC (rev 26)
@@ -10,7 +10,8 @@
## number of possible species
nspecies <- length(aout$values)
## get the balancing coefficients
- n.balance <- balance.coeffs(aout, balance)
+ balance <- balance(aout, balance)
+ n.balance <- balance$n
## take selected species in 'ispecies'
if(length(ispecies)==0) stop("the length of ispecies is zero")
# take out species that have NA affinities
@@ -29,13 +30,13 @@
if(length(n.balance) < 100) msgout(paste("equilibrate: balancing coefficients are", c2s(n.balance), "\n"))
## logarithm of total activity of the balance
if(is.numeric(loga.balance))
- msgout(paste("equilibrate: log total activity of", balance, "from argument is", loga.balance, "\n"))
+ msgout(paste("equilibrate: logarithm of total", balance$description, "(from loga.balance) is", loga.balance, "\n"))
else {
# sum up the activities, then take absolute value
# in case n.balance is negative
sumact <- abs(sum(10^aout$species$logact * n.balance))
loga.balance <- log10(sumact)
- msgout(paste("equilibrate: log total activity of", balance, "from species is", loga.balance, "\n"))
+ msgout(paste("equilibrate: logarithm of total", balance$description, "is", loga.balance, "\n"))
}
## normalize -- normalize the molar formula by the balance coefficients
# # this is the default for systems of proteins as of 20091119
@@ -224,7 +225,7 @@
return(logact)
}
-balance.coeffs <- function(aout, balance=NULL) {
+balance <- function(aout, balance=NULL) {
## generate n.balance from user-given or automatically identified basis species
## extracted from equilibrate() 20120929
# 'balance' can be:
@@ -250,14 +251,15 @@
n.balance <- rep(balance, length.out=length(aout$values))
if(all(n.balance==1)) msgout("balance: coefficients are unity\n")
# use 'balance' below as a name
- if(all(n.balance==1)) balance <- "species"
- else balance <- "[user defined]"
+ if(all(n.balance==1)) balance.description <- "moles of species"
+ else balance <- "moles of user-specified coefficients"
} else {
# "length" for balancing on protein length
if(identical(balance, "length")) {
if(!all(isprotein)) stop("length was the requested balance, but some species are not proteins")
n.balance <- protein.length(aout$species$name)
- msgout("balance: coefficients are protein length\n")
+ balance.description <- "protein length"
+ msgout(paste("balance: coefficients are", balance.description, "\n"))
} else {
# is the balance the name of a basis species?
if(length(ibalance)==0) {
@@ -266,12 +268,13 @@
}
# the name of the basis species (need this if we got ibalance which which.balance, above)
balance <- colnames(aout$species)[ibalance[1]]
- msgout(paste("balance: coefficients are moles of", balance, "in formation reactions\n"))
+ balance.description <- paste("moles of", balance)
+ msgout(paste("balance: coefficients are", balance.description, "in formation reactions\n"))
# the balance vector
n.balance <- aout$species[, ibalance[1]]
# we check if that all formation reactions contain this basis species
if(any(n.balance==0)) stop("some species have no ", balance, " in the formation reaction")
}
}
- return(n.balance)
+ return(list(n=n.balance, description=balance.description))
}
Modified: pkg/CHNOSZ/R/examples.R
===================================================================
--- pkg/CHNOSZ/R/examples.R 2012-10-02 14:15:53 UTC (rev 25)
+++ pkg/CHNOSZ/R/examples.R 2012-10-13 06:08:14 UTC (rev 26)
@@ -9,10 +9,10 @@
"util.args", "util.array", "util.blast", "util.character",
"util.data", "util.expression", "util.fasta", "util.formula", "util.matrix",
"util.misc", "util.program",
- "util.seq", "util.stat", "util.units", "taxonomy", "info", "protein.info", "hkf", "water", "subcrt",
+ "util.seq", "util.units", "taxonomy", "info", "protein.info", "hkf", "water", "subcrt",
"makeup", "basis", "swap.basis", "species", "affinity", "util.affinity", "equil.boltzmann",
- "diagram", "buffer", "iprotein", "protein", "ionize.aa", "more.aa", "read.expr", "revisit",
- "findit", "transfer", "anim", "EOSregress", "wjd")
+ "diagram", "buffer", "iprotein", "protein", "ionize.aa", "more.aa", "read.expr",
+ "objective", "revisit", "findit", "transfer", "anim", "EOSregress", "wjd")
plot.it <- FALSE
if(is.character(do.png))
png(paste(do.png,"%d.png",sep=""),width=700,height=700,pointsize=18)
Modified: pkg/CHNOSZ/R/findit.R
===================================================================
--- pkg/CHNOSZ/R/findit.R 2012-10-02 14:15:53 UTC (rev 25)
+++ pkg/CHNOSZ/R/findit.R 2012-10-13 06:08:14 UTC (rev 26)
@@ -1,14 +1,14 @@
# CHNOSZ/findit.R
-# find the minimum or maximum of a target
+# find the minimum or maximum of a objective
# (eg cv, richness, shannon)
# as a function the specified chemical potentials
# 20100831 jmd
-findit <- function(lims=list(), target="cv", niter=NULL, iprotein=NULL, plot.it=TRUE,
- T=25, P="Psat", res=NULL, labcex=0.6, loga.ref=NULL,
+findit <- function(lims=list(), objective="CV", niter=NULL, iprotein=NULL, plot.it=TRUE,
+ T=25, P="Psat", res=NULL, labcex=0.6, loga2=NULL,
loga.balance=0, rat=NULL, balance=NULL) {
# the lims list has the limits of the arguments to affinity()
- # we iteratively move toward a higher/lower value of the target
+ # we iteratively move toward a higher/lower value of the objective
# within these limits
# fun stuff: when running on either side of 100 deg C,
@@ -117,19 +117,13 @@
# now calculate the affinities
a <- do.call(affinity,aargs)
- # then calculate the values of the target function
+ # then calculate the values of the objective function
e <- equilibrate(a, balance=balance, loga.balance=loga.balance)
- dd <- revisit(e$loga.equil, target, loga.ref=loga.ref)$H
- # find the extreme value
- iext <- where.extreme(dd,target)
- # find the extreme value
- teststat <- c(teststat,dd[iext])
- # what are its coordinates
- dd1 <- dd
- dd1[] <- 1
- ai <- which(dd1==1,arr.ind=TRUE)
- if(nd==1) ai <- ai[iext]
- else ai <- ai[iext,]
+ dd <- revisit(e$loga.equil, objective, loga2=loga2)$H
+ # coordinates of the extreme value (take only the first set of coords)
+ iopt <- optimal.index(dd, objective)[1,, drop=FALSE]
+ # the extreme value
+ teststat <- c(teststat,dd[iopt])
# loop to update the current parameters
for(j in 1:length(lims)) {
@@ -139,7 +133,7 @@
# the increments used
myinc <- seq(mylims[1],mylims[2],length.out=mylims[3])
# the value of the variable at the extreme of the function
- myval <- myinc[ai[j]]
+ myval <- myinc[iopt[j]]
# update the basis table, T or P
if(names(lims)[j] %in% rownames(basis)) {
ibasis <- match(names(lims)[j],rownames(basis))
@@ -162,20 +156,20 @@
# add our search lines and extreme points
# to the plot
if(nd==1) {
- if(i==1) revisit(e,target,loga.ref,xlim=lims[[1]])
+ if(i==1) revisit(e,objective,loga2,xlim=lims[[1]])
# on a 1-D diagram we add vertical lines to show our search limits
abline(v=outlims[[1]][1:2])
lines(myinc,dd)
- points(myval,dd[iext])
+ points(myval,dd[iopt])
} else if(nd==2) {
- if(i==1) revisit(e,target,loga.ref,xlim=lims[[1]],ylim=lims[[2]],labcex=labcex)
+ if(i==1) revisit(e,objective,loga2,xlim=lims[[1]],ylim=lims[[2]],labcex=labcex)
else {
# on a 2-D diagram we add a box around our search limits
# and an updated map for this region
ol1 <- outlims[[1]]
ol2 <- outlims[[2]]
rect(ol1[1],ol2[1],ol1[2],ol2[2],border=par("fg"),col="white")
- revisit(e,target,loga.ref,xlim=lims[[1]],ylim=lims[[2]],add=TRUE,labcex=labcex)
+ revisit(e,objective,loga2,xlim=lims[[1]],ylim=lims[[2]],add=TRUE,labcex=labcex)
}
text(out[[1]],out[[2]])
points(out[[1]],out[[2]],cex=2)
@@ -193,10 +187,10 @@
# we extract the third dimension until only two remain
for(j in 3:nd) {
# ai[j] - which slice in this dimension has the extremum
- for(k in 1:length(e$loga.equil)) e$loga.equil[[k]] <- slice(e$loga.equil[[k]],3,ai[j])
+ for(k in 1:length(e$loga.equil)) e$loga.equil[[k]] <- slice(e$loga.equil[[k]],3,iopt[j])
}
# now make the plot
- revisit(e,target,loga.ref,xlim=lims[[1]],ylim=lims[[2]],add=add,labcex=labcex)
+ revisit(e,objective,loga2,xlim=lims[[1]],ylim=lims[[2]],add=add,labcex=labcex)
# indicate the location of the extremum
text(out[[1]],out[[2]])
points(out[[1]],out[[2]],cex=2)
@@ -211,7 +205,7 @@
} # end main loop
# build return list: values of chemical variables and test statistic
teststat <- list(teststat)
- names(teststat) <- target
+ names(teststat) <- objective
value <- c(out,teststat)
# build return list: limits at each step
out <- list(value=value,lolim=lolim,hilim=hilim)
Added: pkg/CHNOSZ/R/objective.R
===================================================================
--- pkg/CHNOSZ/R/objective.R (rev 0)
+++ pkg/CHNOSZ/R/objective.R 2012-10-13 06:08:14 UTC (rev 26)
@@ -0,0 +1,181 @@
+# CHNOSZ/objective.R
+# objective functions for revisit(), findit()
+# all objective functions in one file, added attributes 20121009 jmd
+
+get.objfun <- function(objective) {
+ # get the function with the name given in 'objective'
+ objfun <- get(objective)
+ # perform a check on its usability
+ if(!"optimum" %in% names(attributes(objfun)))
+ stop(paste(objective, "is not a function with an attribute named 'optimum'"))
+ # return the function
+ return(objfun)
+}
+
+# input: a1 or loga1 is matrix with a column for each species and a row for each condition
+# output: vector with length equal to number of rows of a1 or loga1
+# attributes:
+# optimum - the target for optimization (minimal or maximal)
+## absolute - is the absolute value of the function used in the calculation? (TRUE or FALSE)
+
+SD <- structure(
+ function(a1) {
+ # calculate standard deviation
+ SD <- apply(a1, 1, sd)
+ return(SD)
+ },
+ optimum="minimal"
+)
+
+CV <- structure(
+ function(a1) {
+ # get the coefficient of variation
+ SD <- SD(a1)
+ CV <- SD / rowMeans(a1)
+ return(CV)
+ },
+ optimum="minimal"
+)
+
+shannon <- structure(
+ function(a1) {
+ # shannon entropy
+ p <- a1/rowSums(a1)
+ H <- rowSums(-p*log(p))
+ return(H)
+ },
+ optimum="maximal"
+)
+
+DGmix <- structure(
+ function(loga1) {
+ # Gibbs energy/2.303RT of ideal mixing
+ a1 <- 10^loga1
+ DGmix <- rowSums(a1*loga1)
+ return(DGmix)
+ },
+ optimum="minimal"
+)
+
+qqr <- structure(
+ function(loga1) {
+ # correlation coefficient of a q-q plot (for log-normality comparisons) 20100901
+ # first, a function to calculate qqr
+ qqrfun <- function(x) {
+ # this is to catch errors from qqnorm
+ qqn <- try(qqnorm(x, plot.it=FALSE), silent=TRUE)
+ if(class(qqn)=="try-error") qqr <- NA
+ else qqr <- cor(qqn[[1]], qqn[[2]])
+ }
+ # apply the function to the rows of loga1
+ qqr <- apply(loga1, 1, qqrfun)
+ return(qqr)
+ },
+ optimum="maximal"
+)
+
+logact <- structure(
+ function(loga1, loga2) {
+ # the logarithm of activity of the species identified in loga2 (species number)
+ return(loga1[, loga2[1]])
+ },
+ optimum="maximal"
+)
+
+spearman <- structure(
+ function(loga1, loga2) {
+ # Spearman's rho (rank correlation coefficient) 20100912
+ spearfun <- function(a, b) {
+ # based on help(dSpearman) in package SuppDists
+ if(length(a)!=length(b)) stop("a and b must be same length")
+ if(any(is.na(a)) | any(is.na(b))) return(NA)
+ ra <- rank(a)
+ rb <- rank(b)
+ dr <- rb - ra
+ d <- sum(dr^2)
+ r <- length(a)
+ x <- 1-6*d/(r*(r^2-1))
+ return(x)
+ }
+ rho <- apply(loga1, 1, spearfun, b=loga2)
+ return(rho)
+ },
+ optimum="maximal"
+)
+
+pearson <- structure(
+ function(loga1, loga2) {
+ # pearson correlation coefficient
+ H <- apply(loga1, 1, cor, y=loga2)
+ return(H)
+ },
+ optimum="maximal"
+)
+
+RMSD <- structure(
+ function(loga1, loga2) {
+ rmsd <- function(a, b) {
+ # to calculate the root mean square deviation
+ d <- b - a
+ sd <- d^2
+ msd <- mean(sd)
+ rmsd <- sqrt(msd)
+ return(rmsd)
+ }
+ RMSD <- apply(loga1, 1, rmsd, b=loga2)
+ return(RMSD)
+ },
+ optimum="minimal"
+)
+
+CVRMSD <- structure(
+ function(loga1, loga2) {
+ # coefficient of variation of the root mean squared deviation
+ RMSD <- RMSD(loga1, loga2)
+ CVRMSD <- RMSD/rowMeans(loga1)
+ return(CVRMSD)
+ },
+ optimum="minimal"
+)
+
+DDGmix <- structure(
+ function(loga1, loga2) {
+ # difference in Gibbs energy/2.303RT of ideal mixing
+ a2 <- 10^loga2
+ DGmix2 <- sum(a2*loga2)
+ DGmix1 <- DGmix(loga1)
+ DDGmix <- DGmix2 - DGmix1
+ return(DDGmix)
+ },
+ optimum="minimal"
+)
+
+DGtr <- structure(
+ function(loga1, loga2, Astar) {
+ dgtr <- function(loga1, loga2, Astar) {
+ # calculate the Gibbs energy/2.303RT of transformation
+ # from an initial to a final assemblage at constant T, P and
+ # chemical potentials of basis species 20120917
+ # loga1 - logarithms of activity of species in the initial assemblage
+ # loga2 - logarithms of activity of species in the final assemblage
+ # Astar - starved (of activity of the species of interest) values of chemical affinity/2.303RT
+ # remove the logarithms
+ a1 <- 10^loga1
+ a2 <- 10^loga2
+ # the molal Gibbs energy in the initial and final states
+ # (derived from integrating A = Astar - RTln(a) from a1 to a2)
+ G1 <- -a1*Astar + a1*loga1 - a1/log(10)
+ G2 <- -a2*Astar + a2*loga2 - a2/log(10)
+ # calculate the change in molal Gibbs energy for each species
+ DG <- G2 - G1
+ # return the sum
+ return(sum(DG))
+ }
+ # we need to index both loga1 and Astar
+ DGtr <- unlist(palply(seq(nrow(loga1)), function(i) {
+ dgtr(loga1[i, ], loga2, Astar[i, ])
+ }))
+ return(DGtr)
+ },
+ optimum="minimal"
+)
Modified: pkg/CHNOSZ/R/revisit.R
===================================================================
--- pkg/CHNOSZ/R/revisit.R 2012-10-02 14:15:53 UTC (rev 25)
+++ pkg/CHNOSZ/R/revisit.R 2012-10-13 06:08:14 UTC (rev 26)
@@ -2,56 +2,39 @@
# 20090415 functions related to diversity calculations
# 20100929 merged draw.diversity and revisit
-where.extreme <- function(z, target, do.sat=FALSE) {
- if(missing(target)) stop("no target specified")
- # are we interested in a maximum or minimum?
- if(tolower(target) %in% c("sd", "sd.log", "cv", "cv.log", "rmsd", "cvrmsd", "dgxf"))
- myext <- "minimum" else myext <- "maximum"
- # do we care about the sign of the index?
- if(tolower(target) %in% c("sd", "sd.log", "cv", "cv.log", "rmsd", "cvrmsd"))
- doabs <- TRUE else doabs <- FALSE
- # takes a matrix, returns the x,y coordinates of the extremum
- if(doabs) z <- abs(z)
- if(myext=="minimum") iext <- which.min(z)
- else iext <- which.max(z)
- ret.val <- iext
- # for a matrix, it gets more complicated esp.
- # if there are multiple instances of the extremum
- # (happens with saturated richnesses)
- if(do.sat & length(dim(z))==2) {
- iext <- which(z==z[iext])
- x.out <- y.out <- numeric()
- xres <- ncol(z)
- yres <- nrow(z)
- for(i in 1:length(iext)) {
- # column (x coord)
- x <- ceiling(iext[i]/xres)
- # and row (y coord)
- y <- iext[i] - floor(iext[i]/yres)*yres
- if(y==0) y <- yres # there's a more eloquent way...
- x.out <- c(x.out,x)
- y.out <- c(y.out,y)
- }
- ret.val <- list(ix=y.out,iy=x.out)
- }
+optimal.index <- function(z, objective) {
+ # for a vector, returns the index of the optimum value
+ # for a matrix, returns the x, y coordinates of the optimum
+ # find the minimum or maximum? look at attribute of objfun
+ objfun <- get.objfun(objective)
+ optimum <- attributes(objfun)$optimum
+# # do we care about the sign of the index?
+# if(tolower(target) %in% c("sd", "sd.log", "cv", "cv.log", "rmsd", "cvrmsd"))
+# doabs <- TRUE else doabs <- FALSE
+# if(doabs) z <- abs(z)
+ # the value of the optimum
+ if(optimum=="minimal") optval <- z[which.min(z)]
+ else optval <- z[which.max(z)]
+ # the index of the optimum
+ # (or indices if multiple instances of the optimum)
+ ret.val <- which(z==optval, arr.ind=TRUE)
return(ret.val)
}
-extremes <- function(z, target) {
- if(missing(target)) stop("no target specified")
+extremes <- function(z, objective) {
# are we interested in a maximum or minimum?
- if(tolower(target) %in% c("sd", "sd.log", "cv", "cv.log", "rmsd", "cvrmsd", "dgxf"))
- myext <- "minimum" else myext <- "maximum"
- # do we care about the sign of the index?
- if(tolower(target) %in% c("sd", "sd.log", "cv", "cv.log", "rmsd", "cvrmsd"))
- doabs <- TRUE else doabs <- FALSE
+ objfun <- get.objfun(objective)
+ optimum <- attributes(objfun)$optimum
+# # do we care about the sign of the index?
+# if(tolower(target) %in% c("sd", "sd.log", "cv", "cv.log", "rmsd", "cvrmsd"))
+# doabs <- TRUE else doabs <- FALSE
+# if(doabs) z <- abs(z)
# takes a matrix, returns the y as f(x) and x as f(y)
- # trajectories of the extreme
- if(doabs) z <- abs(z)
+ # trajectories of the optimum
y <- x <- numeric()
xres <- ncol(z)
yres <- nrow(z)
- if(myext=="minimum") {
+ if(optimum=="minimal") {
for(i in 1:xres) y <- c(y, which.min(z[i,]))
for(i in 1:yres) x <- c(x, which.min(z[,i]))
} else {
@@ -61,336 +44,200 @@
return(list(x=x, y=y))
}
-revisit <- function(eout, target="cv", loga.ref=NULL,
- plot.it=NULL, col=par("fg"), yline=2, ylim=NULL, ispecies=NULL, add=FALSE,
- cex=par("cex"), lwd=par("lwd"), mar=NULL, side=1:4, xlim=NULL, labcex=0.6,
- pch=1, legend="", legend.x=NULL, lpch=NULL, main=NULL, lograt.ref=NULL, plot.ext=TRUE, DGxf.swap12=FALSE) {
- # calculate and plot diversity indices of relative abundances
- # 20090316 jmd
+revisit <- function(eout, objective = "CV", loga2 = NULL, ispecies = NULL,
+ col = par("fg"), yline = 2, ylim = NULL, cex = par("cex"),
+ lwd = par("lwd"), mar = NULL, side = 1:4, xlim = NULL, labcex = 0.6,
+ pch = 1, main = NULL, plot.it = NULL, add = FALSE, plot.optval = TRUE,
+ style.2D = "contour") {
+ # given a list of logarithms of activities of species
+ # (as vectors or matrices or higher dimensional arrays)
+ # calculate a diversity index or thermodynamic property
+ # with the same dimensions 20090316 jmd v1
# eout can be the output from equilibrate (enables plotting)
# or simply a list of logarithms of activity
# (each list entry must have the same dimensions)
- # test if the entries have the same dimensions
- ud <- unique(lapply(1:length(eout),function(x) dim(eout[[x]])))
+
+ # if the entries have the same lengths they are assumed to be logarithms of activity
+ ud <- unique(sapply(eout, length))
if(length(ud)==1) {
# eout is list of logarithms of activity
if(missing(plot.it)) plot.it <- FALSE
- if(plot.it) stop("can't make a plot if argument 'eout' is not the output from equilibrate()")
- logact <- eout
+ if(plot.it) stop("can't make a plot if 'eout' is not the output from equilibrate()")
+ loga1 <- eout
+ eout.is.eout <- FALSE
} else {
- # d is the output from diagram()
- if(!"loga.equil" %in% names(eout)) {
+ # test if eout is the output from equilibrate()
+ if(!"loga.equil" %in% names(eout))
stop(paste("the list provided in 'eout' is not the output from equilibrate()"))
- }
if(missing(plot.it)) plot.it <- TRUE
- logact <- eout$loga.equil
+ loga1 <- eout$loga.equil
+ eout.is.eout <- TRUE
}
- # check that all needed arguments are present
- target.lower <- tolower(target)
- if(target.lower %in% c("richness", "cvrmsd", "spearman", "pearson", "logact", "dgxf") & is.null(loga.ref))
- stop(paste("for '", target, "' target, loga.ref must be supplied", sep=""))
+
# take a subset (or all) of the species
- if(is.null(ispecies)) ispecies <- 1:length(logact)
- logact <- logact[ispecies]
- # number of species
- ns <- length(logact)
+ if(is.null(ispecies)) ispecies <- 1:length(loga1)
+ loga1 <- loga1[ispecies]
# the dimensions
- mydim <- dim(logact[[1]])
- nd <- length(mydim)
- if(nd==1) if(mydim==1) nd <- 0
- msgout(paste("revisit: calculating", target, "in", nd, "dimensions\n"))
+ dim1 <- dim(as.array(loga1[[1]]))
+ # the number of dimensions
+ nd <- ifelse(identical(dim1, 1L), 0, length(dim1))
+ msgout(paste("revisit: calculating", objective, "in", nd, "dimensions\n"))
- ## on to diversity calculations
- # given a list of logarithms of activities of species
- # (as vectors or matrices or higher dimensional arrays)
- # calculate a diversity index of the same dimensions
- # these targets only depend on the logarithms of activities from diagram() (logact):
- # "shannon" shannon entropy
- # "sd" standard deviation
- # "cv" coefficient of variation
- # "sd.log", "cv.log" SD/CV for the logarithms of activity
- # "qqr" correlation coefficient on q-q plot (i.e., normality test)
- # these targets also depend on reference logarithms of activities (loga.ref):
- # "richness" species richness
- # "cvrmsd" coefficient of variation of rmsd
- # "spearman" spearman correlation coefficient
- # "pearson" pearson correlation coefficient
- # "logact" maximize the activity of a species
- # "DGxf" minimize the Gibbs energy of transformation
-
- # vectorize the entries in the logact list
- for(i in 1:ns) {
- logact[[i]] <- as.vector(logact[[i]])
+ # get the objective function
+ objfun <- get.objfun(objective)
+ # the arguments to the function (a1, [a2]) or (loga1, [loga2], [Astar])
+ objargs <- names(formals(objfun))
+
+ # these objectives only depend on the activities (a1/loga1):
+ # shannon, SD, CV, QQR
+ if(any(grepl("a1", objargs))) {
+ # vectorize the list entries: a1/loga1
+ loga1 <- sapply(loga1, c)
+ # for 0-D case we need to keep loga1 as a 1-row matrix (sapply simplifies to vector)
+ if(nd==0) loga1 <- t(loga1)
# convert infinite values to NA
- logact[[i]][is.infinite(logact[[i]])] <- NA
+ loga1[is.infinite(loga1)] <- NA
+ # remove logarithms if necessary
+ if(any(grepl("loga1", objargs))) a1 <- loga1
+ else a1 <- 10^loga1
}
- # make a place for the results
- H <- logact[[1]]
- H[] <- 0
- # ratio-specific calculations
- if(!is.null(lograt.ref)) {
- if(!target.lower %in% c("rmsd", "cvrmsd", "spearman", "pearson"))
- stop(paste("target",target,"not available when comparing activity ratios"))
- else(msgout("revisit: calculating activity ratios\n"))
- # instead of logact we use calculated log activity ratio
- logact <- lograt(loga.ref, logact)
- loga.ref <- lograt.ref
+
+ # these objectives also depend on reference activities (a2/loga2):
+ # richness, RMSD, CVRMSD, spearman, pearson, DGtr
+ if(any(grepl("a2", objargs))) {
+ # check that all needed arguments are present
+ if(is.null(loga2)) stop(paste("loga2 must be supplied for", objective))
+ # if loga2 is a single value, expand it into the dimensions of loga1
+ if(length(loga2)==1) loga2 <- rep(loga2, length.out=ncol(loga1))
+ # check that loga2 has the same length as loga1
+ if(!identical(ncol(loga1), length(loga2))) stop(paste("loga2 has different length (",
+ length(loga2), ") than list in eout (", ncol(loga1), ")", sep=""))
+ # remove logarithms if necessary
+ if(any(grepl("loga2", objargs))) a2 <- loga2
+ else a2 <- 10^loga2
}
- # target-specific calculations
- if(target.lower %in% c("sd", "cv")) {
- # build a matrix; rows are species
- myad <- t(list2array(logact))
- # remove logarithms
- myad <- 10^myad
- # get the standard deviations
- H <- palply(1:ncol(myad), function(i) sd(myad[,i]))
- H <- as.numeric(H)
- # for coefficient of variation, divide by the mean
- if(target.lower=="cv") H <- H / colMeans(myad)
+ # construct array of values: Astar (for DGtr)
+ if(any(grepl("Astar", objargs))) {
+ Astar <- eout$Astar[ispecies]
+ Astar <- sapply(Astar, as.vector)
+ }
- } else if(target.lower %in% c("sd.log", "cv.log")) {
- # build a matrix; rows are species
- myad <- t(list2array(logact))
- # get the standard deviations
- H <- palply(1:ncol(myad), function(i) sd(myad[,i]))
- H <- as.numeric(H)
- # for coefficient of variation, divide by the mean
- if(target.lower=="cv.log") H <- H / colMeans(myad)
+ # calculation of the objective function
+ # "H" is a remnant of the first target, shannon entropy
+ if(length(objargs) == 1) H <- objfun(a1)
+ else if(length(objargs) == 2) H <- objfun(a1, a2)
+ else if(length(objargs) == 3) H <- objfun(a1, a2, Astar)
- } else if(target.lower=="richness") {
- # given a list of logarithms of activities of species
- # (as matrices or vectors) calculate the richness
- # make a place for the results
- # loop over species
- for(j in 1:length(logact)) {
- isthere <- logact[[j]] > loga.ref
- H[isthere] <- H[isthere] + 1
- }
-
- } else if(target.lower=="shannon") {
- # for this calculation we need to loop twice;
- # first to convert logact to act and get acttotal
- act <- logact
- for(i in 1:ns) {
- # exponentiate the logarithmic values
- act[[i]] <- 10^logact[[i]]
- if(i==1) acttotal <- act[[i]] else acttotal <- acttotal + act[[i]]
- }
- # now do the calculation
- for(i in 1:ns) {
- dH <- -act[[i]]/acttotal*log(act[[i]]/acttotal)
- if(!any(is.na(dH))) H <- H + dH
- else(warning(paste("revisit: skipping species", i, "which gives NA")))
- }
-
- } else if(target.lower=="qqr") {
- # normality test using correlation coefficient of a q-q plot 20100901
- actarr <- list2array(logact)
- qqrfun <- function(i, actarr) {
- y <- actarr[i,]
- # this is to catch errors from qqr (qqnorm)
- out <- try(qqr(y), silent=TRUE)
- if(class(out)=="try-error") out <- NA
- return(out)
- }
- H <- as.numeric(palply(1:length(H), qqrfun, actarr))
-
- } else if(target.lower=="spearman") {
- # spearman rank correlation coefficient 20100912
- actarr <- list2array(logact)
- spearfun <- function(i, actarr) {
- y <- actarr[i,]
- out <- spearman(y, loga.ref)
- return(out)
- }
- H <- as.numeric(palply(1:length(H), spearfun, actarr))
-
- } else if(target.lower=="pearson") {
- # pearson correlation coefficient
- actarr <- list2array(logact)
- pearfun <- function(i, actarr) {
- y <- actarr[i, ]
- out <- cor(y, loga.ref)
- return(out)
- }
- H <- as.numeric(palply(1:length(H), pearfun, actarr))
-
- } else if(target.lower=="rmsd") {
- # root mean squared deviation
- actarr <- list2array(logact)
- rmsdfun <- function(i, actarr) {
- y <- actarr[i, ]
- out <- rmsd(loga.ref, y)
- return(out)
- }
- H <- as.numeric(palply(1:length(H), rmsdfun, actarr))
-
- } else if(target.lower=="cvrmsd") {
- # coefficient of variation of the root mean squared deviation
- actarr <- list2array(logact)
- cvrmsdfun <- function(i, actarr) {
- y <- actarr[i, ]
- out <- cvrmsd(loga.ref, y)
- return(out)
- }
- H <- as.numeric(palply(1:length(H), cvrmsdfun, actarr))
-
- } else if(target.lower=="logact") {
- # where the activity of a species is maximal
- H <- logact[[loga.ref]]
-
- } else if(target.lower=="dgxf") {
- # Gibbs energy of transformation to the observed assemblage
- actarr <- list2array(logact)
- # select species, vectorize, then put the Astar values into an array
- Astar <- eout$Astar[ispecies]
- for(i in 1:ns) Astar[[i]] <- as.vector(Astar[[i]])
- Astararr <- list2array(Astar)
- Gfun <- function(i, actarr, Astararr) {
- loga.equil <- actarr[i, ]
- Astar <- Astararr[i, ]
- # direction of transformation:
- # swap12=FALSE: loga.equil(Astar) --> loga.ref
- # swap12=TRUE: loga.ref(Astar) --> loga.equil
- if(DGxf.swap12) out <- -DGxf(loga.ref, loga.equil, Astar)
- else out <- DGxf(loga.equil, loga.ref, Astar)
- return(out)
- }
- H <- as.numeric(palply(1:length(H), Gfun, actarr, Astararr))
-
- } else stop(paste("specified target '", target, "' not available", sep=""))
# replace dims
- dim(H) <- mydim
+ dim(H) <- dim1
+
## now on to plotting + assembling return values
+ # for zero or more than two dimensions we'll just return the values
+ # of the objective function and the index of the optimal value
+ iopt <- optimal.index(H, objective)
+ ret.val <- list(H=H, iopt=iopt)
# get information about the x-axis
- if(plot.it & nd > 0 & nd < 3) {
+ if(eout.is.eout & nd > 0) {
xname <- eout$vars[1]
- yname <- eout$vars[2]
- xres <- length(eout$vals[[1]])
- xrange <- range(eout$vals[[1]])
- # special operations for pH
- if(xname=="H+") {
- xname <- "pH"
- xrange <- -xrange
- }
# the x-values
- xs <- seq(xrange[1],xrange[2],length.out=xres)
- }
- # make plots and return values
+ xs <- eout$vals[[1]]
+ xrange <- range(xs)
+ } else xs <- NA
+
+ # make plots and assemble return values
if(nd==0) {
# a 0-D plot
if(plot.it) {
- plotted <- FALSE
- if(target.lower=="qqr") {
- actarr <- list2array(logact)
- qqnorm(actarr,col=col,pch=pch,main="")
- qqline(actarr)
- plotted <- TRUE
- } else if(target.lower %in% c("rmsd","cvrmsd","spearman","pearson")) {
- # plot the points
- ylab <- "loga.calc"
- xlab <- "loga.ref"
- if(!is.null(lograt.ref)) {
- ylab <- "lograt.calc"
- xlab <- "lograt.ref"
- }
- plot(loga.ref,actarr,xlab=xlab,ylab=ylab,pch=pch,col=col)
+ if(objective=="qqr") {
+ # make a q-q plot for qqr
+ qqnorm(loga1, col=col, pch=pch)
+ qqline(loga1)
+ } else if(any(grepl("a2", objargs))) {
+ # plot the points for a referenced objective
+ ylab <- "loga1"
+ xlab <- "loga2"
+ plot(loga2, loga1, xlab=xlab, ylab=ylab, pch=pch, col=col)
# add a 1:1 line
- lines(range(loga.ref),range(loga.ref),col="grey")
+ lines(range(loga2), range(loga2), col="grey")
# add a lowess line
- ls <- loess.smooth(loga.ref,actarr)
- lines(ls$x,ls$y,col="red")
- plotted <- TRUE
- }
- if(plotted) {
- # add a title
- if(missing(main)) main <- paste(target,"=",round(H,3))
- title(main=main)
- if(!is.null(legend.x)) {
- if(is.null(lpch)) lpch <- unique(pch)
- legend(legend.x,legend=legend,pch=lpch)
- }
- }
+ ls <- loess.smooth(loga2, loga1)
+ lines(ls$x, ls$y, col="red")
+ } else plot.it <- FALSE
+ # add a title
+ if(missing(main)) main <- paste(objective, "=", round(H,3))
+ if(plot.it) title(main=main)
}
- ret.val <- list(H=H)
-
} else if(nd==1) {
- # locate the extremum
- ix <- where.extreme(H,target)
- extval <- H[ix]
+ # locate the optimal value
+ ixopt <- c(iopt)
+ xopt <- xs[ixopt]
+ optimum <- H[ixopt]
+ ret.val <- list(H=H, ixopt=ixopt, xopt=xopt, optimum=optimum)
# a 1-D plot
if(plot.it) {
- if(is.null(ylim)) {
- if(target.lower=="richness") {
- ylim <- 0
- if(max(H) > ylim) ylim <- max(H) + 1
- ylim <- c(0,ylim)
- }
- else ylim <- extendrange(H,f=0.075)
- }
+ if(is.null(ylim)) ylim <- extendrange(H, f=0.075)
if(is.null(xlim)) xlim <- xrange
- # format the target name if it's DGxf
- if(target.lower=="dgxf") ylab <- expr.property("DGxf/2.303RT")
- else ylab <- target
- if(!add) thermo.plot.new(xlim=xlim,ylim=ylim,xlab=axis.label(xname),ylab=ylab,yline=yline,
- cex=cex,lwd=lwd,mar=mar,side=side)
[TRUNCATED]
To get the complete diff run:
svnlook diff /svnroot/chnosz -r 26
More information about the CHNOSZ-commits
mailing list