[Qca-commits] r40 - pkg/R
noreply at r-forge.r-project.org
noreply at r-forge.r-project.org
Thu Nov 20 19:12:20 CET 2014
Author: alrikthiem
Date: 2014-11-20 19:12:20 +0100 (Thu, 20 Nov 2014)
New Revision: 40
Added:
pkg/R/retention.R
Log:
no comment
Added: pkg/R/retention.R
===================================================================
--- pkg/R/retention.R (rev 0)
+++ pkg/R/retention.R 2014-11-20 18:12:20 UTC (rev 40)
@@ -0,0 +1,154 @@
+`retention` <-
+function(data, outcome = "", conditions = c(""), type = "error",
+ assump = "independent", n.cut = 1, incl.cut = 1, p.pert = 0.1,
+ n.pert = 1) {
+
+ names(mydata) <- toupper(names(data))
+ conditions <- toupper(conditions)
+ outcome <- toupper(outcome)
+ if (any(conditions == "")) {
+ conditions <- names(data)[which(names(data) != outcome)]
+ }
+
+ data <- data[, c(conditions, outcome)]
+
+ # unique combinations
+ udata <- unique(data[, conditions])
+ rownames(udata) <- seq(nrow(udata))
+ cpos <- cneg <- rep(0, nrow(udata))
+
+ # count how many combinations are positive and how many are negative
+ for (i in seq(nrow(udata))) {
+ for (j in seq(nrow(data))) {
+ if (all(udata[i, ] == data[j, conditions])) {
+ if (data[j, outcome] == 1) {
+ cpos[i] <- cpos[i] + 1 # positive
+ }
+ else if (data[j, outcome] == 0) {
+ cneg[i] <- cneg[i] + 1 # negative
+ }
+ }
+ }
+ }
+ total <- cpos + cneg
+ udata <- udata[total >= n.cut, , drop = FALSE]
+ cpos <- cpos[total >= n.cut]
+ cneg <- cneg[total >= n.cut]
+ total <- total[total >= n.cut]
+ calculatePairs <- function(x, n.pert, type = "deletion") {
+ pairsxl <- combn(nrow(udata), x)
+ nofsetsxl <- 0
+ for (j in seq(ncol(pairsxl))) {
+ cposneg <- NULL
+ for (l in seq(x)) {
+ cposneg <- c(cposneg, cbind(cpos, cneg)[pairsxl[l, j], ])
+ }
+ allpairs <- createMatrix(cposneg + 2) - 1
+ allpairs <- allpairs[apply(allpairs, 1, function(y) all(y >= 0)), , drop = FALSE]
+ linesubset <- rep(TRUE, nrow(allpairs))
+ for (l in seq(x)) {
+
+ # at least one measurement error for each configuration l
+ linesubset <- linesubset & rowSums(allpairs[, l*2 - c(1, 0)]) >= 1
+ }
+ allpairs <- allpairs[linesubset & rowSums(allpairs) <= n.pert, , drop = FALSE]
+ for (i in seq(nrow(allpairs))) {
+ lchanges <- rep(FALSE, x)
+ for (l in seq(x)) {
+ initially <- cpos[pairsxl[l, j]]/total[pairsxl[l, j]]
+ if (type == "deletion") {
+ newtotaless <- total[pairsxl[l, j]] - allpairs[i, l*2 - 1]
+ after <- (cpos[pairsxl[l, j]] - allpairs[i, l*2 - 1])/newtotaless
+ lchanges[l] <- ((initially >= incl.cut & after < incl.cut) | newtotaless < n.cut) |
+ ((initially < incl.cut & after >= incl.cut) & newtotaless >= n.cut)
+ }
+ else if (type == "error") {
+ after <- (cpos[pairsxl[l, j]] + allpairs[i, l*2] - allpairs[i, l*2 - 1])/total[pairsxl[l, j]]
+ lchanges[l] <- (initially >= incl.cut & after < incl.cut) |
+ (initially < incl.cut & after >= incl.cut)
+ }
+ }
+ if (all(lchanges)) {
+ combs <- 1
+ for (l in seq(x)) {
+ combs <- combs*choose(cpos[pairsxl[l, j]], allpairs[i, l*2 - 1])
+ combs <- combs*choose(cneg[pairsxl[l, j]], allpairs[i, l*2])
+ }
+ nofsetsxl <- nofsetsxl + combs*choose(nrow(dat) - sum(cposneg), n.pert - sum(allpairs[i, ]))
+ }
+ }
+ }
+ return(nofsetsxl)
+ }
+ if (assump == "dependent") {
+ if (nrow(udata) < n.pert) {
+ stop("\nCannot make pairs of ", n.pert, " lines with only ", nrow(udata), " unique configurations.\n\n", call. = FALSE)
+ }
+
+ # initialize number of sets of D cases that affect configurations of type l
+ nofsets <- 0
+ for (i in seq(n.pert)) {
+ nofsetsxl <- calculatePairs(i, n.pert, type = type)
+ nofsets <- nofsets + ifelse(i %% 2 == 1, nofsetsxl, -1*nofsetsxl)
+ }
+ return(as.vector(1 - nofsets/choose(nrow(dat), n.pert)))
+ }
+ else if (assump == "independent") {
+ pfinal <- 1
+ if (type == "deletion") {
+ for (l in seq(nrow(udata))) {
+ ptmp <- 1
+
+ # calculate all possible pairs of positives and negatives
+ allpairs <- createMatrix(c(cpos[l], cneg[l]) + 2) - 1
+ allpairs <- allpairs[apply(allpairs, 1, function(x) all(x >= 0)), , drop = FALSE]
+
+ # at least one positive or one negative must be deleted
+ allpairs <- allpairs[rowSums(allpairs) >= 1, , drop = FALSE]
+
+ # loop over all surviving combinations
+ for (i in seq(nrow(allpairs))) {
+ # is new total (after deleting some cases) less than n.cut?
+ newtotaless <- total[l] - allpairs[i, 1] - allpairs[i, 2]
+ initially <- cpos[l]/total[l]
+ # first column of all pairs is equivalent of "i" in formula;
+ # second column to "j"
+ after <- (cpos[l] - allpairs[i, 1])/newtotaless
+ # only valid for conservative solution type;
+ # if parsimonious, losing a negative initial combination due to new total less than n.cut might also change the solution
+ # here, we state new total at least n.cut only to make sure when a negative combination enters the group of positive ones
+ if (((initially >= incl.cut & after < incl.cut) | newtotaless < n.cut) |
+ ((initially < incl.cut & after >= incl.cut) & newtotaless >= n.cut)) {
+ ptmp <- ptmp - dbinom(allpairs[i, 1], cpos[l], p.pert) * dbinom(allpairs[i, 2], cneg[l], p.pert)
+ }
+ }
+
+ # final probability gets changed, for each unique combination
+ pfinal <- pfinal*ptmp
+ }
+ }
+ else if (type == "error") {
+ for (l in seq(nrow(udata))) {
+ ptmp <- 1
+ # calculate all possible pairs of positives and negatives
+ allpairs <- createMatrix(c(cpos[l], cneg[l]) + 2) - 1
+ allpairs <- allpairs[apply(allpairs, 1, function(x) all(x >= 0)), , drop = FALSE]
+ # at least one positive or one negative must be changed
+ allpairs <- allpairs[rowSums(allpairs) >= 1, , drop = FALSE]
+ # loop over all surviving combinations
+ for (i in seq(nrow(allpairs))) {
+ initially <- cpos[l]/total[l]
+ # first column of allpairs is the equivalent of "i" in the formula, second column to "j"
+ after <- (cpos[l] - allpairs[i, 1] + allpairs[i, 2])/total[l]
+ if ((initially >= incl.cut & after < incl.cut) | (initially < incl.cut & after >= incl.cut)) {
+ ptmp <- ptmp - dbinom(allpairs[i, 1], cpos[l], p.pert) * dbinom(allpairs[i, 2], cneg[l], p.pert)
+ }
+ }
+
+ # final probability gets changed, for each unique combination
+ pfinal <- pfinal*ptmp
+ }
+ }
+ return(pfinal)
+ }
+}
More information about the Qca-commits
mailing list