[CHNOSZ-commits] r902 - in pkg/CHNOSZ: . R inst inst/tinytest man

Thu May 22 13:33:02 CEST 2025

Author: jedick
Date: 2025-05-22 13:33:01 +0200 (Thu, 22 May 2025)
New Revision: 902

Added:
   pkg/CHNOSZ/inst/tinytest/test-rank.affinity.R
Modified:
   pkg/CHNOSZ/DESCRIPTION
   pkg/CHNOSZ/R/rank.affinity.R
   pkg/CHNOSZ/inst/NEWS.Rd
   pkg/CHNOSZ/man/rank.affinity.Rd
Log:
Add rescaling of average ranks to rank.affinity()


Modified: pkg/CHNOSZ/DESCRIPTION
===================================================================

--- pkg/CHNOSZ/DESCRIPTION	2025-05-22 00:12:15 UTC (rev 901)
+++ pkg/CHNOSZ/DESCRIPTION	2025-05-22 11:33:01 UTC (rev 902)
@@ -1,6 +1,6 @@
-Date: 2025-05-21
+Date: 2025-05-22
 Package: CHNOSZ
-Version: 2.1.0-73
+Version: 2.1.0-74
 Title: Thermodynamic Calculations and Diagrams for Geochemistry
 Authors at R: c(
     person("Jeffrey", "Dick", , "j3ffdick at gmail.com", role = c("aut", "cre"),

Modified: pkg/CHNOSZ/R/rank.affinity.R
===================================================================
--- pkg/CHNOSZ/R/rank.affinity.R	2025-05-22 00:12:15 UTC (rev 901)
+++ pkg/CHNOSZ/R/rank.affinity.R	2025-05-22 11:33:01 UTC (rev 902)
@@ -1,32 +1,82 @@
-# Calculate normalized sum of ranking of affinities for species in designated groups
+# Calculate average rank of affinities for species in different groups
 # 20220416 jmd first version
+# 20250522 rescale ranks
 
-rank.affinity <- function(aout, groups, percent = TRUE) {
+rank.affinity <- function(aout, groups, rescale = TRUE, percent = FALSE) {
+
   # Put the affinities into matrix form
   amat <- sapply(aout$values, as.numeric)
   # Calculate ranks
   # https://stackoverflow.com/questions/1412775/pmax-parallel-maximum-equivalent-for-rank-in-r
   arank <- apply(amat, 1, rank)
-  # Get the normalized ranks for each group
+
+  # Count total number of species in all groups
+  groups_vector <- unlist(groups)
+  if(is.integer(groups_vector)) ntot <- length(groups_vector)
+  if(is.numeric(groups_vector)) ntot <- length(groups_vector)
+  if(is.logical(groups_vector)) ntot <- sum(groups_vector)
+
+  # Get the bounds of average ranks for a group with one species
+  min1 <- 1
+  max1 <- ntot
+
+  # Get the average rank for species in each group
   grank <- sapply(groups, function(group) {
-    # Sum the ranks for this group and divide by number of species in the group
+
+    # Get number of species in this group
     if(inherits(group, "logical")) n <- sum(group)
     if(inherits(group, "integer")) n <- length(group)
-    colSums(arank[group, ]) / n
+    # Also handle indices classed as numeric 20250522
+    if(inherits(group, "numeric")) n <- length(group)
+    # Sum the ranks and divide by number of species
+    rank_avg <- colSums(arank[group, , drop = FALSE]) / n
+
+    if(rescale) {
+      # Rescale ranks 20250522
+      # Get the bounds of average ranks for a group with n species
+      # Minimum is the average of 1..n for n species
+      min <- sum(1:n) / n
+      # The margin is the difference between the minimum and 1
+      margin <- min - min1
+      # Lower and upper bounds are symmetric, so we subtract the margin from total number of species to get the max
+      max <- ntot - margin
+      ## Factor to rescale average ranks from an n-species group to a 1-species group
+      #scaling_factor <- (max1 - min1) / (max - min)
+      ## To center the range, we have to subtract the margin on both sides
+      #(rank_avg - 2 * margin) * scaling_factor
+
+      # Build a linear model mapping from x (bounds of group with n species) to y (bounds of group with 1 species)
+      x <- c(min, max)
+      y <- c(min1, max1)
+      rescale_lm <- lm(y ~ x)
+      # Rescale average ranks with the linear model
+      rank_avg <- predict(rescale_lm, data.frame(x = rank_avg))
+
+    } else {
+      rank_avg
+    }
+
   })
-  # Calculate rank-sum percentage 20240106
+
+  # Calculate average rank percentage 20240106
   if(percent) grank <- grank / rowSums(grank) * 100
+
   # Restore dims
   dims <- dim(aout$values[[1]])
-  # apply() got 'simplify' argument in R 4.1.0 20230313
-  # Using 'simplify = FALSE' in R < 4.1.0 caused error: 3 arguments passed to 'dim<-' which requires 2
-  if(getRversion() < "4.1.0") glist <- lapply(lapply(apply(grank, 2, list), "[[", 1), "dim<-", dims)
-  else glist <- apply(grank, 2, "dim<-", dims, simplify = FALSE)
+  if(getRversion() < "4.1.0") {
+    # Using 'simplify = FALSE' in R < 4.1.0 caused error: 3 arguments passed to 'dim<-' which requires 2
+    glist <- lapply(lapply(apply(grank, 2, list), "[[", 1), "dim<-", dims)
+  } else {
+    # apply() got 'simplify' argument in R 4.1.0 20230313
+    glist <- apply(grank, 2, "dim<-", dims, simplify = FALSE)
+  }
   aout$values <- glist
+
   # Rename species to group names (for use by diagram())
   aout$species <- aout$species[1:length(groups), ]
   aout$species$name <- names(groups)
-  # "Sign" the object with our function name
+  # Label the object with our function name
   aout$fun <- "rank.affinity"
   aout
+
 }

Modified: pkg/CHNOSZ/inst/NEWS.Rd
===================================================================
--- pkg/CHNOSZ/inst/NEWS.Rd	2025-05-22 00:12:15 UTC (rev 901)
+++ pkg/CHNOSZ/inst/NEWS.Rd	2025-05-22 11:33:01 UTC (rev 902)
@@ -15,7 +15,7 @@
 \newcommand{\Cp}{\ifelse{latex}{\eqn{C_P}}{\ifelse{html}{\out{<I>C<sub>P</sub></I>}}{Cp}}}
 \newcommand{\DG0}{\ifelse{latex}{\eqn{{\Delta}G^{\circ}}}{\ifelse{html}{\out{Δ<I>G</I>°}}{ΔG°}}}
 
-\section{Changes in CHNOSZ version 2.1.0-73 (2025-05-21)}{
+\section{Changes in CHNOSZ version 2.1.0-74 (2025-05-22)}{
 
   \subsection{OBIGT DEFAULT DATA}{
     \itemize{
@@ -140,6 +140,9 @@
       current default) or \samp{cr} (minerals using the Maier-Kelley equation
       in the \samp{CGL} model; a new method)).
 
+      \item \code{rank.affinity()} now rescales average ranks of groups to the
+      same bounds.
+
     }
   }
 

Added: pkg/CHNOSZ/inst/tinytest/test-rank.affinity.R
===================================================================
--- pkg/CHNOSZ/inst/tinytest/test-rank.affinity.R	                        (rev 0)
+++ pkg/CHNOSZ/inst/tinytest/test-rank.affinity.R	2025-05-22 11:33:01 UTC (rev 902)
@@ -0,0 +1,16 @@
+# Load default settings for CHNOSZ
+reset()
+
+# Test added 20250522
+basis("CHNOSe")
+species(c("SO4-2", "H2S", "HS-"))
+aout <- affinity(pH = c(0, 14, 10), Eh = c(-1, 1, 10))
+groups <- list(oxidized = 1, reduced = 2:3)
+# The tests only pass with rescale = TRUE (the default)
+arank <- rank.affinity(aout, groups)
+info <- "Different-sized groups have same range of average ranks"
+expect_equal(range(arank$values[[1]]), c(1, 3), info = info)
+expect_equal(range(arank$values[[2]]), c(1, 3), info = info)
+info <- "The sum of average ranks from both groups is 1 + the number of species"
+sum_average_ranks <- unique(as.integer(arank$values[[1]] + arank$values[[2]]))
+expect_equal(sum_average_ranks, 1 + nrow(species()), info = info)

Modified: pkg/CHNOSZ/man/rank.affinity.Rd
===================================================================
--- pkg/CHNOSZ/man/rank.affinity.Rd	2025-05-22 00:12:15 UTC (rev 901)
+++ pkg/CHNOSZ/man/rank.affinity.Rd	2025-05-22 11:33:01 UTC (rev 902)
@@ -7,19 +7,22 @@
 }
 
 \usage{
-  rank.affinity(aout, groups, percent = TRUE)
+  rank.affinity(aout, groups, rescale = TRUE, percent = FALSE)
 }
 
 \arguments{
   \item{aout}{list, output of \code{\link{affinity}}}
-  \item{groups}{named list of indices (integer or numeric) for species in each group}
-  \item{percent}{return average rank percentage for each group}
+  \item{groups}{named list of indices (integer or logical) for species in each group}
+  \item{rescale}{logical, rescale average ranks for each group to have the same bounds?}
+  \item{percent}{logical, take percentages of average ranks for each group (after rescaling)?}
 }
 
 \details{
-The affinities for all species are \code{\link{rank}}ed, then the mean ranking for the species in each group is calculated.
-The mean rankings of groups are converted to a percentage, or returned as-is if \code{percent} is FALSE.
-Note that the calculations are applied to each set of conditions individually (i.e., each grid point in the affinity \code{\link{affinity}} calculation).
+The affinities for all species are \code{\link{rank}}ed, then the average rank for the species in each group is calculated.
+The calculations are applied to each set of conditions individually (i.e., each grid point in the affinity \code{\link{affinity}} calculation).
+
+Unless \code{rescale} is FALSE, the average rank of each group is rescaled to have the same bounds (from 1 to the total number of species).
+If \code{percent} is TRUE, the average ranks of groups (after rescaling) are converted to percentages.
 }
 
 \value{