[Zooimage-commits] r202 - pkg/zooimage/R

Mon Jun 18 16:01:56 CEST 2012

Author: kevin
Date: 2012-06-18 16:01:55 +0200 (Mon, 18 Jun 2012)
New Revision: 202

Modified:
   pkg/zooimage/R/ZIConf.R
Log:
add additional parameters to the confusionStat function

Modified: pkg/zooimage/R/ZIConf.R
===================================================================

--- pkg/zooimage/R/ZIConf.R	2012-06-14 08:11:26 UTC (rev 201)
+++ pkg/zooimage/R/ZIConf.R	2012-06-18 14:01:55 UTC (rev 202)
@@ -332,67 +332,204 @@
 }
 
 ## Table with stats per groupe precision, recall, etc
-confusionStat <- function (ZIClass, ZIConf = NULL, sort.by = "FN")
+confusionStat <- function(ZIClass, ZIConf = NULL, sort.by = NULL, decreasing = FALSE, NaN.rm = FALSE)
 {
-    if (is.null(ZIConf)) ZIConf <- ZIConf(ZIClass)
-    ## True positive --> all organism on diagonal!
-    TP <- diag(ZIConf)
-    ## Sum of true positive
-    SumTP <- sum(TP)
+    # Create confusion matrix
+    if(is.null(ZIConf)){
+        Confu <- ZIConf(ZIClass)
+    } else {
+	   Confu <- ZIConf
+    }
+    ##### General parameters
+    # Number of groups
+    Ngp <- ncol(Confu)
     
-    ## Sum rows and columns
-    SumRow <- rowSums(ZIConf) # TP + FN
-    SumCol <- colSums(ZIConf) # TP + FP
+    # Total : TP + TN + FP + FN
+    Tot <- sum(Confu)
     
-    ## Out of diagonal
-    # False negative item
-    FN <- SumRow - TP
-    ## False positive items
-    FP <- SumCol - TP
+    # TP : True positive item : All items on diagonal
+    TP <- diag(Confu)
     
-    ## Total
-    Tot <- sum(ZIConf)
+    # TP + TN : sum of diagonal = All correct identification
+    TP_TN <- sum(TP)
     
-    ## General stats
-    ## Accuracy (TN + TP) / (TP + TN + FP + FN)
-    Accuracy <- SumTP / Tot * 100
-    Error <- 100 - Accuracy
+    # TP + FP : sum of columns : Automatic classification
+    TP_FP <- colSums(Confu)
     
-    ## Stats by group
+    # TP + FN : sum of rows : Manual classification
+    TP_FN <- rowSums(Confu)
+    
+    # FP : False positive items
+    FP <- TP_FP - TP    
 
-    ## Proportion of false negative
-    FalseNeg <- FN / SumRow * 100
+    # FN : False negative item
+    FN <- TP_FN - TP
 
-    ## Proportion of false positive
-    FalsePos <- FP / SumCol * 100
+    # TN : True Negative = Total - TP - FP - FN
+    TN <- rep(Tot, Ngp) - TP - FP - FN
+    
+    ##### General statistics
+    # Accuracy = (TP + TN) / (TP + TN + FP + FN)
+    Accuracy <- TP_TN / Tot
+    
+    # Error = 1 - Accuracy
+    Error <- 1 - Accuracy
+    
+    ##### The 8 basic ratios
+    # Recall = TP / (TP + FN) = 1 - FNR
+    Recall <- TP / (TP_FN)
 
-    ## Recall = True positive rate = Sensitivity
-	## = Probability of detection = TP / (TP + FN)
-    Recall <- TP / (TP + FN)
+    # Specificity = TN / (TN + FP) = 1 - FPR
+    Specificity <- TN / (TN + FP)
 
-    ## Precision = TP / (TP + FP)
-    Precision <- TP / (TP + FP)
+    # Precision = TP / (TP + FP) = 1 - FDR
+    Precision <- TP / (TP_FP)
+    
+    # NPV : Negative predicted value = TN / (TN + FN) = 1 - FOR
+    NPV <- TN / (TN + FN)
+    
+    # FPR : False positive rate = 1 - Specificity = FP / (FP + TN) 
+    FPR <- FP / (FP + TN) #1 - Specificity
+    
+    # FNR : False negative rate = 1 - Recall = FN / (TP + FN)
+    FNR <- FN / (TP + FN) #1 - Recall
 
-    ## Specificity = 1 - FP = TN / (TN + FP)
-    TN <- numeric()
-    for (i in 1:length(TP)) TN[i] <- SumTP - TP[i]
-    Specificity <- TN / (TN + FP) # 100 - FalsePos
+    # FDR : False Discovery Rate = 1 - Precision = FP / (TP + FP)
+    FDR <- FP / (TP_FP) #1 - Precision
+    
+    # FOR : False omission rate = 1 - NPV = FN / (FN + TN)
+    FOR <- FN / (FN + TN) #1 - NPV
 
-    ## Bias
-    Bias <- SumCol - SumRow
+    ##### The 4 ratios of ratios
+    # LRPT = Likelihood Ratio for Positive Tests = Recall / FPR = Recall / (1 - Specificity)
+    LRPT <- Recall / (FPR)
     
+    # LRNT = Likelihood Ratio for Negative Tests = FNR / Specificity = (1 - Recall) / Specificity
+    LRNT <- FNR / (Specificity)
+    
+    # LRPS : Likelihood Ratio for Positive Subjects = Precision / FOR = Precision / (1 - NPV)
+    LRPS <- Precision / (FOR)
+    
+    # LRNS : Likelihood Ratio Negative Subjects = FDR / NPV = (1 - Precision) / (1 - FOR)
+    LRNS <- FDR / (NPV)
+    
+    ##### Additional statistics
+    # F-measure = F1 score = Harmonic mean of Precision and recall
+    Fmeasure <- 2 * ((Precision * Recall) / (Precision + Recall))
+    
+    # Balanced accuracy = (Sensitivity + Specificity) / 2
+    BalAcc <- (Recall + Specificity) / 2
+
+    # MCC : Matthews correlation coefficient
+    Sum1 <- TP + FP
+    Sum2 <- TP + FN
+    Sum3 <- TN + FP
+    Sum4 <- TN + FN
+    Denominator <- sqrt(Sum1 * Sum2 * Sum3 * Sum4)
+    ZeroIdx <- Sum1 == 0 | Sum2 == 0 | Sum3 == 0 | Sum4 == 0
+    if(any(ZeroIdx)){
+	   Denominator[ZeroIdx] <- 1
+    }
+    MCC <- ((TP * TN) - (FP * FN)) / Denominator
+    
+    # Chisq : Significance
+    Chisq <- (((TP * TN) - (FP * FN))^2 * (TP + TN + FP + FN)) / ((TP + FP) * (TP + FN) * (TN + FP) * (TN + FN))
+
+    # Automatic classification - Manual calssification
+    Auto_Manu <- TP_FP - TP_FN
+    
+    # Dissimilarity Index of Bray Curtis
+    Dissimilarity <- abs(Auto_Manu) / (sum(TP_FP) + sum(TP_FN))
+    
     res <- data.frame(
-		FN = round(FalseNeg, digits = 3),
-		FP = round(FalsePos, digits = 3),
-        Recall = round(Recall, digits = 3),
-		Precision = round(Precision, digits = 3),
-		SumTS = SumRow, SumPred = SumCol, Bias = Bias)
+	   Auto = TP_FP, Manu = TP_FN, Auto_Manu = Auto_Manu, Dissimilarity = Dissimilarity,
+	   TP = TP, FP = FP, FN = FN, TN = TN,
+	   Recall = Recall, Precision = Precision,	Specificity = Specificity, NPV = NPV,
+	   FPR = FPR, FNR = FNR, FDR = FDR, FOR = FOR,
+	   LRPT = LRPT, LRNT = LRNT, LRPS = LRPS, LRNS = LRNS,
+	   Fmeasure = Fmeasure, BalAcc = BalAcc, MCC = MCC, Chisq = Chisq
+    )
+
+    rownames(res) <- rownames(Confu)
+    # Sort the table in function of one parameter by default FN
+    if(!is.null(sort.by)){
+	   res <- res[order(res[, sort.by], decreasing = decreasing), ]
+    }
+    attr(res, "Accuracy") <- Accuracy
+    attr(res, "Error") <- Error
     
-    ## Sort the table in function of one parameter by default FN
-    res <- res[order(res[, sort.by]), ]
-    
-    attr(res, "GeneralStats") <- c(Accuracy = Accuracy, Error = Error)
-    cat(paste("Accuracy:", round(Accuracy, digits = 2), "%", "\n", "Error:",
-		round(Error, digits = 2), "%", "\n"))
+    # Remove Nan if any 0/0
+    if(isTRUE(NaN.rm)){
+    	# Cases where it is impossible to calculate some statistics: 0/0 or X/0
+    	# Case 1 : Everything is Correct --> FP = 0, FN = 0
+    	Case1Idx <- FP == 0 & FN == 0 # no any error!
+    	if(any(Case1Idx)){
+    	    Recall[Case1Idx] <- 1
+    	    FNR[Case1Idx] <- 0
+    	    Precision[Case1Idx] <- 1
+    	    FDR[Case1Idx] <- 0
+    	    Specificity[Case1Idx] <- 1
+    	    FPR[Case1Idx] <- 0
+    	    NPV[Case1Idx] <- 1
+    	    FOR[Case1Idx] <- 0
+    	}
+    	# Case 2 : Everything is Wrong and only false positive --> Impossible to calculate Recall and NPV
+    	Case2Idx <- TP == 0 & TN == 0 & FP > 0 & FN == 0
+    	if(any(Case2Idx)){
+    #	    Recall[Case2Idx] <- 0
+    #	    FNR[Case2Idx] <- 1
+    #	    NPV[Case2Idx] <- 0
+    #	    FOR[Case2Idx] <- 1
+    	    Recall[Case2Idx] <- 1
+    	    FNR[Case2Idx] <- 0
+    	    NPV[Case2Idx] <- 1
+    	    FOR[Case2Idx] <- 0
+    	}
+    	# Case 3 : Everything is Wrong and only false negative --> Impossible to calculate Precision and Specificity
+    	Case3Idx <- TP == 0 & TN == 0 & FP == 0 & FN > 0
+    	if(any(Case3Idx)){
+    #	    Precision[Case3Idx] <- 0
+    #	    FDR[Case3Idx] <- 1
+    #	    Specificity[Case3Idx] <- 0
+    #	    FPR[Case3Idx] <- 1
+    	    Precision[Case3Idx] <- 1
+    	    FDR[Case3Idx] <- 0
+    	    Specificity[Case3Idx] <- 1
+    	    FPR[Case3Idx] <- 0
+    	}
+    	# Case 4 : No FP and No TN --> Impossible to calculate Specificity
+    	Case4Idx <- TP > 0 & TN == 0 & FP == 0 & FN > 0
+    	if(any(Case4Idx)){
+    #	    Specificity[Case4Idx] <- 0
+    #	    FPR[Case4Idx] <- 1
+    	    Specificity[Case4Idx] <- 1
+    	    FPR[Case4Idx] <- 0
+    	}
+    	# Case 5 : No TP and No FP --> Impossible to calculate Precision
+    	Case5Idx <- TP == 0 & TN > 0 & FP == 0 & FN > 0
+    	if(any(Case5Idx)){
+    #	    Precision[Case5Idx] <- 0
+    #	    FDR[Case5Idx] <- 1
+    	    Precision[Case5Idx] <- 1
+    	    FDR[Case5Idx] <- 0
+    	}
+    	# Case 6 : No TP and no FN --> Impossible to calculate Recall
+    	Case6Idx <- TP == 0 & TN > 0 & FP > 0 & FN == 0
+    	if(any(Case6Idx)){
+    #	    Recall[Case6Idx] <- 0
+    #	    FNR[Case6Idx] <- 1
+    	    Recall[Case6Idx] <- 1
+    	    FNR[Case6Idx] <- 0
+    	}
+    	# Case 7 : No TN and no FN --> Impossible to calculate Negative predicted value
+    	Case7Idx <- TP > 0 & TN == 0 & FP > 0 & FN == 0
+    	if(any(Case7Idx)){
+    #	    NPV[Case7Idx] <- 0
+    #	    FOR[Case7Idx] <- 1
+    	    NPV[Case7Idx] <- 1
+    	    FOR[Case7Idx] <- 0
+    	}
+    }
+    cat(paste("Accuracy:", round(Accuracy * 100, digits = 2), "%", "\n", "Error:", round(Error * 100, digits = 2), "%", "\n"))
     return(res)
 }