From noreply at r-forge.r-project.org Sun Mar 2 15:16:11 2014 From: noreply at r-forge.r-project.org (noreply at r-forge.r-project.org) Date: Sun, 2 Mar 2014 15:16:11 +0100 (CET) Subject: [Rflptools-commits] r15 - in pkg/RFLPtools: . R data inst/doc man Message-ID: <20140302141611.D3358186C01@r-forge.r-project.org> Author: stamats Date: 2014-03-02 15:16:11 +0100 (Sun, 02 Mar 2014) New Revision: 15 Added: pkg/RFLPtools/R/FragMatch.R pkg/RFLPtools/R/germ.R pkg/RFLPtools/R/simulateRFLPData.R pkg/RFLPtools/data/newDataGerm.RData pkg/RFLPtools/data/refDataGerm.RData pkg/RFLPtools/man/FragMatch.Rd pkg/RFLPtools/man/RFLPcombine.Rd pkg/RFLPtools/man/RFLPlod.Rd pkg/RFLPtools/man/germ.Rd pkg/RFLPtools/man/newDataGerm.Rd pkg/RFLPtools/man/refDataGerm.Rd pkg/RFLPtools/man/simulateRFLPdata.Rd Modified: pkg/RFLPtools/DESCRIPTION pkg/RFLPtools/NAMESPACE pkg/RFLPtools/NEWS pkg/RFLPtools/R/RFLPdist.R pkg/RFLPtools/R/RFLPdist2.R pkg/RFLPtools/R/RFLPdist2ref.R pkg/RFLPtools/inst/doc/RFLPtools.pdf pkg/RFLPtools/man/0RFLP-package.Rd pkg/RFLPtools/man/BLASTdata.Rd pkg/RFLPtools/man/RFLPdata.Rd pkg/RFLPtools/man/RFLPdist.Rd pkg/RFLPtools/man/RFLPdist2.Rd pkg/RFLPtools/man/RFLPdist2ref.Rd pkg/RFLPtools/man/RFLPplot.Rd pkg/RFLPtools/man/RFLPqc.Rd pkg/RFLPtools/man/RFLPref.Rd pkg/RFLPtools/man/RFLPrefplot.Rd pkg/RFLPtools/man/diffDist.Rd pkg/RFLPtools/man/linCombDist.Rd pkg/RFLPtools/man/nrBands.Rd pkg/RFLPtools/man/read.blast.Rd pkg/RFLPtools/man/read.rflp.Rd pkg/RFLPtools/man/sim2dist.Rd pkg/RFLPtools/man/simMatrix.Rd pkg/RFLPtools/man/write.hclust.Rd Log: several extensions of version 1.5; see NEWS file Modified: pkg/RFLPtools/DESCRIPTION =================================================================== --- pkg/RFLPtools/DESCRIPTION 2013-01-04 16:42:46 UTC (rev 14) +++ pkg/RFLPtools/DESCRIPTION 2014-03-02 14:16:11 UTC (rev 15) @@ -1,13 +1,13 @@ Package: RFLPtools Type: Package Title: Tools to analyse RFLP data -Version: 1.5 -Date: 2013-01-04 +Version: 1.6 +Date: 2014-03-02 Author: Fabienne Flessa, Alexandra Kehl, Matthias Kohl Maintainer: Matthias Kohl Description: RFLPtools provides functions to analyse DNA fragment samples (i.e. derived from RFLP-analysis) and standalone BLAST report files (i.e. DNA sequence analysis). -Depends: R(>= 2.10.0), stats, utils, graphics, grDevices, RColorBrewer +Depends: R(>= 3.0.0), stats, utils, graphics, grDevices, RColorBrewer Suggests: lattice, MKmisc(>= 0.8) License: LGPL-3 Modified: pkg/RFLPtools/NAMESPACE =================================================================== --- pkg/RFLPtools/NAMESPACE 2013-01-04 16:42:46 UTC (rev 14) +++ pkg/RFLPtools/NAMESPACE 2014-03-02 14:16:11 UTC (rev 15) @@ -1,5 +1,6 @@ -export(read.rflp, - diffDist, +export(diffDist, + FragMatch, + germ, linCombDist, RFLPqc, RFLPdist, @@ -11,6 +12,8 @@ RFLPlod, nrBands, read.blast, + read.rflp, simMatrix, sim2dist, + simulateRFLPdata, write.hclust) Modified: pkg/RFLPtools/NEWS =================================================================== --- pkg/RFLPtools/NEWS 2013-01-04 16:42:46 UTC (rev 14) +++ pkg/RFLPtools/NEWS 2014-03-02 14:16:11 UTC (rev 15) @@ -3,6 +3,22 @@ ############################################################################### ########################################################### +## Version 1.6 +########################################################### +- added reference Flessa et al. (2013) to man files. +- added some further checks for function input. +- functions RFLPdist and RFLPdist2ref obtained an argument LOD for handling + values below some given limit of detection (LOD). +- new example datasets refDataGerm and newDataGerm taken from the GERM + software +- added function germ which represents an implementation of the Good-Enough + RFLP Matcher (GERM) program. +- added function FragMatch which represents an implementation of the FragMatch + program. +- added function simulateRFLPdata to simulate RFLP data. + + +########################################################### ## Version 1.5 ########################################################### - update of vignette Added: pkg/RFLPtools/R/FragMatch.R =================================================================== --- pkg/RFLPtools/R/FragMatch.R (rev 0) +++ pkg/RFLPtools/R/FragMatch.R 2014-03-02 14:16:11 UTC (rev 15) @@ -0,0 +1,77 @@ +FragMatch <- function(newData, refData, maxValue = 1000, errorBound = 25, + weight = 1, na.rm = TRUE){ + if(length(maxValue) != length(errorBound)) + stop("'maxValue' and 'errorBound' must have identical lengths.") + if(any(maxValue <= 0)) + stop("'maxValue' has to be a vector of positive values.") + if(any(errorBound <= 0)) + stop("'errorBound' has to be a vector of positive values.") + if(length(weight) > 1){ + weight <- weight[1] + warning("Only first element of 'weight' is used.") + } + if(weight < 0) + stop("'weight' has to be a non-negative real.") + + errorBound <- errorBound[order(maxValue)] + maxValue <- sort(maxValue) + + refNames <- unique(refData$Sample) + newNames <- unique(newData$Sample) + + res <- matrix("", nrow = length(newNames), ncol = length(refNames)) + rownames(res) <- newNames + colnames(res) <- refNames + + for(i in 1:length(newNames)){ + for(j in 1:length(refNames)){ + New <- newData[newData$Sample == newNames[i],] + Ref <- refData[refData$Sample == refNames[j],] + nrEnz <- length(unique(Ref$Enzyme)) + res.enz.match <- max.pos.match <- numeric(nrEnz) + refenzNames <- unique(Ref$Enzyme) + newenzNames <- unique(New$Enzyme) + + if(length(refenzNames) != length(newenzNames)) + stop("Number of enzymes is different for new data and reference data!") + if(!all(refenzNames == newenzNames)) + stop("Names of enzymes are different for new data and reference data!") + + for(k in 1:nrEnz){ + Newk <- New[New$Enzyme == newenzNames[k],] + Refk <- Ref[Ref$Enzyme == newenzNames[k],] + ## number of matches + max.pos.match[k] <- nrow(Refk) + res.enz.match[k] <- .countMatches(abs(Newk$MW), abs(Refk$MW), + maxValue = maxValue, + errorBound = errorBound) + } + + ind.weights <- max.pos.match == res.enz.match + max.match <- sum(weight*max.pos.match) + res.match <- sum(ifelse(ind.weights, weight*res.enz.match, res.enz.match)) + res[i,j] <- paste(res.match, "_", max.match, sep = "") + } + } + res +} + +.countMatches <- function(x, y, maxValue, errorBound){ + nrMatch <- 0 + if(any(y > max(maxValue)) | any(x > max(maxValue))){ + errorBound <- c(errorBound, errorBound[which.max(maxValue)]) + maxValue <- c(maxValue, max(y,x)) + } + for(i in 1:length(maxValue)){ + yi <- y[y < maxValue[i]] + xi <- x[x < (maxValue[i]+errorBound[i])] + for(j in 1:length(yi)){ + ind <- (xi > (yi[j]-errorBound[i])) & (xi < (yi[j]+errorBound[i])) + if(any(ind)){ + nrMatch <- nrMatch + 1 + xi <- xi[-which(ind)[1]] + } + } + } + nrMatch +} Modified: pkg/RFLPtools/R/RFLPdist.R =================================================================== --- pkg/RFLPtools/R/RFLPdist.R 2013-01-04 16:42:46 UTC (rev 14) +++ pkg/RFLPtools/R/RFLPdist.R 2014-03-02 14:16:11 UTC (rev 15) @@ -4,10 +4,20 @@ ## x: data.frame with RFLP data ## distfun: function to compute distance (cf. ?dist) -RFLPdist <- function(x, distfun = dist, nrBands){ +RFLPdist <- function(x, distfun = dist, nrBands, LOD = 0){ stopifnot(is.data.frame(x)) stopifnot(is.function(distfun)) + if(!missing(nrBands)){ + if(nrBands <= 0) + stop("'nrBands' has to be a positive interger!") + } + if(LOD < 0) + stop("'LOD' has to be non-negative!") + if(LOD > 0){ + x <- x[x$MW >= LOD,] + } + x1 <- split(x, x$Sample) nrbands <- sort(unique(sapply(x1, nrow))) x1.bands <- sapply(x1, nrow) Modified: pkg/RFLPtools/R/RFLPdist2.R =================================================================== --- pkg/RFLPtools/R/RFLPdist2.R 2013-01-04 16:42:46 UTC (rev 14) +++ pkg/RFLPtools/R/RFLPdist2.R 2014-03-02 14:16:11 UTC (rev 15) @@ -14,15 +14,23 @@ ## diag: see ?dist ## upper: see ?dist ## compares samples with number of bands in: nrBands, nrBands + 1, ..., nrBands + nrMissing -RFLPdist2 <- function(x, distfun = dist, nrBands, nrMissing, LOD, diag = FALSE, upper = FALSE){ +RFLPdist2 <- function(x, distfun = dist, nrBands, nrMissing, LOD = 0, diag = FALSE, upper = FALSE){ stopifnot(is.data.frame(x)) stopifnot(is.function(distfun)) if(missing(nrMissing)) stop("'nrMissing' is not specified!") if(nrMissing == 0) stop("'nrMissing == 0', please use function 'RFLPdist'!") + if(nrMissing <= 0) + stop("'nrMissing' has to be a positive interger!") + if(missing(nrBands)) + stop("'nrBands' is not specified!") + if(nrBands <= 0) + stop("'nrBands' has to be a positive interger!") + if(LOD < 0) + stop("'LOD' has to be non-negative!") - if(missing(LOD)){ + if(LOD == 0){ x1 <- split(x, x$Sample) x1.bands <- sapply(x1, nrow) Modified: pkg/RFLPtools/R/RFLPdist2ref.R =================================================================== --- pkg/RFLPtools/R/RFLPdist2ref.R 2013-01-04 16:42:46 UTC (rev 14) +++ pkg/RFLPtools/R/RFLPdist2ref.R 2014-03-02 14:16:11 UTC (rev 15) @@ -5,13 +5,19 @@ ## x: data.frame with RFLP data ## ref: data.frame with RFLP reference data ## distfun: function to compute distance (cf. ?dist) -RFLPdist2ref <- function(x, ref, distfun = dist, nrBands){ +RFLPdist2ref <- function(x, ref, distfun = dist, nrBands, LOD = 0){ stopifnot(is.data.frame(x)) stopifnot(is.data.frame(ref)) stopifnot(is.function(distfun)) if(missing(nrBands)) stop("Number of Bands 'nrBands' is missing.") + if(nrBands <= 0) + stop("'nrBands' has to be a positive interger!") + if(LOD > 0){ + x <- x[x$MW >= LOD,] + ref <- ref[ref$MW >= LOD,] + } x1 <- split(x, x$Sample) ref1 <- split(ref, ref$Sample) Added: pkg/RFLPtools/R/germ.R =================================================================== --- pkg/RFLPtools/R/germ.R (rev 0) +++ pkg/RFLPtools/R/germ.R 2014-03-02 14:16:11 UTC (rev 15) @@ -0,0 +1,95 @@ +germ <- function(newData, refData, + parameters = list("Max forward error" = 25, + "Max backward error" = 25, + "Max sum error" = 100, + "Lower measurement limit" = 100), + method = "joint", na.rm = TRUE){ + if(!is.na(pmatch(method, "joint"))) method <- "joint" + METHODS <- c("joint", "forward", "backward", "sum") + method <- pmatch(method, METHODS) + if(is.na(method)) stop("invalid ranking method") + if (method == -1) stop("ambiguous ranking method") + + newNames <- unique(newData$Sample) + refNames <- unique(refData$Sample) + + res <- array(NA, dim = c(length(refNames), 4, length(newNames))) + rownames(res) <- refNames + colnames(res) <- c("Forward Max", "Backward Max", "Sum of Bands", "Joint") + dimnames(res)[[3]] <- newNames + + if(na.rm){ + newData <- newData[!is.na(newData$MW),] + refData <- refData[!is.na(refData$MW),] + } + + for(i in 1:length(newNames)){ + for(j in 1:length(refNames)){ + New <- newData[newData$Sample == newNames[i],] + Ref <- refData[refData$Sample == refNames[j],] + nrEnz <- length(unique(Ref$Enzyme)) + res.enz.sum <- res.enz.bw <- res.enz.fw <- numeric(nrEnz) + refenzNames <- unique(Ref$Enzyme) + newenzNames <- unique(New$Enzyme) + + if(length(refenzNames) != length(newenzNames)) + stop("Number of enzymes is different for new data and reference data!") + if(!all(refenzNames == newenzNames)) + stop("Names of enzymes are different for new data and reference data!") + + for(k in 1:nrEnz){ + Newk <- New[New$Enzyme == newenzNames[k],] + Refk <- Ref[Ref$Enzyme == newenzNames[k],] + ## forward + ind.fw <- Newk$MW > (parameters$"Max forward error" + + parameters$"Lower measurement limit") + res.enz.fw[k] <- .forward(abs(Newk$MW[ind.fw]), abs(Refk$MW)) + ## backward + ind.bw <- Refk$MW > (parameters$"Max backward error" + + parameters$"Lower measurement limit") + res.enz.bw[k] <- .backward(abs(Newk$MW), abs(Refk$MW[ind.bw])) + ## sum + res.enz.sum[k] <- abs(sum(abs(Newk$MW)) - sum(abs(Refk$MW))) + } +# cat(Ref$Sample[1], ":\t", res.enz.sum, "\n") + res[j,1,i] <- max(res.enz.fw, na.rm = TRUE) + res[j,2,i] <- max(res.enz.bw, na.rm = TRUE) + res[j,3,i] <- max(res.enz.sum, na.rm = TRUE) + res[j,4,i] <- res[j,1,i]+res[j,2,i] + } + } + res.ord <- vector("list", length = length(newNames)) + names(res.ord) <- newNames + if(method == 1){ + for(i in 1:length(newNames)){ + res.ord[[i]] <- res[order(res[,4,i]),,i] + } + } + if(method == 2){ + for(i in 1:length(newNames)){ + res.ord[[i]] <- res[order(res[,1,i]),,i] + } + } + if(method == 3){ + for(i in 1:length(newNames)){ + res.ord[[i]] <- res[order(res[,2,i]),,i] + } + } + if(method == 4){ + for(i in 1:length(newNames)){ + res.ord[[i]] <- res[order(res[,3,i]),,i] + } + } + res.ord +} +.backward <- function(x, y){ + res <- numeric(length(y)) + ## zero padding of x + if(length(x) < length(y)) + x <- c(x, rep(0, length(y)-length(x))) + for(i in 1:length(y)){ + res[i] <- min(abs(y[i]-x)) + } + max(res) +} +.forward <- function(x, y){ .backward(x = y, y = x) } Added: pkg/RFLPtools/R/simulateRFLPData.R =================================================================== --- pkg/RFLPtools/R/simulateRFLPData.R (rev 0) +++ pkg/RFLPtools/R/simulateRFLPData.R 2014-03-02 14:16:11 UTC (rev 15) @@ -0,0 +1,56 @@ +############################################################################### +## R Code to simulate reference data +############################################################################### + +## N: number of samples for each of the number of bands +## nrBands: number of bands for which data is simulated +## total number of samples: N*length(nrBands) +## BandCenters: molecular weights are randomly generated around "band centers" +## delta: use uniform distribution with +/- delta around the band centers +## refData: if TRUE also Taxonname and Accession are added +simulateRFLPdata <- function(N = 10, nrBands = 3:12, + bandCenters = seq(100, 800, by = 100), + delta = 50, refData = FALSE){ + if(length(N) > 1){ + N <- N[1] + warning("Only the first element of 'N' is used.") + } + if(N <= 0) stop("'N' has to be a positive integer!") + N <- trunc(N) + + if(any(nrBands <= 0)) stop("'nrBands' has to be a vector of positive integer!") + if(any(bandCenters <= 0)) stop("'bandCenters' has to be a vector of positive reals!") + if(length(delta) > 1){ + delta <- delta[1] + warning("Only the first element of 'delta' is used.") + } + if(delta <= 0) stop("'delta' has to be a positive real!") + + ## data matrix + simData <- matrix(NA, nrow = sum(N*nrBands), ncol = 3) + colnames(simData) <- c("Sample", "Band", "MW") + + row.count <- 0 + sample.count <- 1 + for(i in nrBands){ + for(j in 1:N){ + ## randomly select "band centers" (with replacement!) + Bcent <- sample(bandCenters, i, replace = TRUE) + ## simulate molecular weights + simData[row.count+(1:i),] <- c(rep(sample.count, i), + 1:i, + sort(runif(i, min = Bcent-delta, max = Bcent+delta))) + row.count <- row.count + i + sample.count <- sample.count + 1 + } + } + + ## Generate data.frame and add column Enzyme + simData <- data.frame(simData, Enzyme = "Enzyme 1") + simData$Sample <- paste("Sample", simData$Sample) + if(refData){ + simData$Taxonname <- simData$Sample + simData$Accession <- simData$Sample + } + simData +} Added: pkg/RFLPtools/data/newDataGerm.RData =================================================================== (Binary files differ) Property changes on: pkg/RFLPtools/data/newDataGerm.RData ___________________________________________________________________ Added: svn:mime-type + application/octet-stream Added: pkg/RFLPtools/data/refDataGerm.RData =================================================================== (Binary files differ) Property changes on: pkg/RFLPtools/data/refDataGerm.RData ___________________________________________________________________ Added: svn:mime-type + application/octet-stream Modified: pkg/RFLPtools/inst/doc/RFLPtools.pdf =================================================================== (Binary files differ) Modified: pkg/RFLPtools/man/0RFLP-package.Rd =================================================================== --- pkg/RFLPtools/man/0RFLP-package.Rd 2013-01-04 16:42:46 UTC (rev 14) +++ pkg/RFLPtools/man/0RFLP-package.Rd 2014-03-02 14:16:11 UTC (rev 15) @@ -14,9 +14,9 @@ \details{ \tabular{ll}{ Package: \tab RFLPtools\cr -Version: \tab 1.5 \cr -Date: \tab 2013-01-04 \cr -Depends: \tab R(>= 2.10.0), stats, utils, grDevices, RColorBrewer\cr +Version: \tab 1.6 \cr +Date: \tab 2014-03-02 \cr +Depends: \tab R(>= 3.0.0), stats, utils, grDevices, RColorBrewer\cr Suggests: \tab lattice, MKmisc(>= 0.8)\cr License: \tab LGPL-3\cr }} @@ -34,19 +34,31 @@ Local Blast download: \url{ftp://ftp.ncbi.nih.gov/blast/executables/release/} Blast News: \url{http://blast.ncbi.nlm.nih.gov/Blast.cgi?CMD=Web&PAGE_TYPE=BlastNews} + + Ian A. Dickie, Peter G. Avis, David J. McLaughlin, Peter B. Reich. + Good-Enough RFLP Matcher (GERM) program. + Mycorrhiza 2003, 13:171-172. - Poussier, Stephane; Trigalet-Demery, Danielle; Vandewalle, Peggy; Goffinet, Bruno; Luisetti, Jacques; Trigalet, Andre. - Genetic diversity of Ralstonia solanacearum as assessed by PCR-RFLP of the hrp gene region, AFLP and 16S rRNA sequence - analysis, and identification of an African subdivision. - Microbiology 2000 146:1679-1692 - + Flessa, F., Kehl, A., Kohl, M. + Analysing diversity and community structures using PCR-RFLP: a new software application. + Molecular Ecology Resources 2013 Jul; 13(4):726-33. + Matsumoto, Masaru; Furuya, Naruto; Takanami, Yoichi; Matsuyama, Nobuaki. RFLP analysis of the PCR-amplified 28S rDNA in Rhizoctonia solani. - Mycoscience 1996 37:351 - 356 + Mycoscience 1996 37:351-356. Persoh, D., Melcher, M., Flessa, F., Rambold, G.: First fungal community analyses of endophytic ascomycetes associated with Viscum album ssp. austriacum and itshost Pinus sylvestris. Fungal Biology 2010 Jul;114(7):585-96. + + Poussier, Stephane; Trigalet-Demery, Danielle; Vandewalle, Peggy; Goffinet, Bruno; Luisetti, Jacques; Trigalet, Andre. + Genetic diversity of Ralstonia solanacearum as assessed by PCR-RFLP of the hrp gene region, AFLP and 16S rRNA sequence + analysis, and identification of an African subdivision. + Microbiology 2000 146:1679-1692. + + T. A. Saari, S. K. Saari, C. D. Campbell, I. J Alexander, I. C. Anderson. + FragMatch - a program for the analysis of DNA fragment data. + Mycorrhiza 2007, 17:133-136 } \examples{ data(RFLPdata) Modified: pkg/RFLPtools/man/BLASTdata.Rd =================================================================== --- pkg/RFLPtools/man/BLASTdata.Rd 2013-01-04 16:42:46 UTC (rev 14) +++ pkg/RFLPtools/man/BLASTdata.Rd 2014-03-02 14:16:11 UTC (rev 15) @@ -41,6 +41,10 @@ Blast News: http://blast.ncbi.nlm.nih.gov/Blast.cgi?CMD=Web&PAGE_TYPE=BlastNews BioEdit v7.0.9: Tom Hall, Ibis Biosciences; http://www.mbio.ncsu.edu/BioEdit/bioedit.html + + Flessa, F., Kehl, A., Kohl, M. + Analysing diversity and community structures using PCR-RFLP: a new software application. + Molecular Ecology Resources 2013 Jul; 13(4):726-33. } \examples{ data(BLASTdata) Added: pkg/RFLPtools/man/FragMatch.Rd =================================================================== --- pkg/RFLPtools/man/FragMatch.Rd (rev 0) +++ pkg/RFLPtools/man/FragMatch.Rd 2014-03-02 14:16:11 UTC (rev 15) @@ -0,0 +1,52 @@ +\name{FragMatch} +\alias{FragMatch} +\title{ + Compute matches for RFLP data via FragMatch. +} +\description{ + Compute matches for RFLP data using FragMatch - a program for the analysis of DNA + fragment data. +} +\usage{ +FragMatch(newData, refData, maxValue = 1000, errorBound = 25, + weight = 1, na.rm = TRUE) +} +\arguments{ + \item{newData}{ data.frame with new RFLP data; see \code{\link{newDataGerm}}.} + \item{refData}{ data.frame with reference RFLP data; see \code{\link{refDataGerm}}.} + \item{maxValue}{ numeric: maximum value for which the error bound is applied. + Can be a vector of length larger than 1.} + \item{errorBound}{ numeric: error bound corresponding to \code{maxValue}. + Can be a vector of length larger than 1.} + \item{weight}{ numeric: weight for weighting partial matches; see details section.} + \item{na.rm}{ logical: indicating whether NA values should be stripped before the + computation proceeds.} +} +\details{ + A rather simple algorithm which consists of counting the number of matches where + it is considered a match if the value is inside a range of +/- \code{errorBound}. + + If there is more than one enzyme, one can use weights to give the partial perfect + matches for a certain enzyme a higher (or also smaller) weight. +} +\value{ + A character matrix with entries of the form \code{"a_b"} which means that there + were \code{a} out of \code{b} possible matches. +} +\references{ + T. A. Saari, S. K. Saari, C. D. Campbell, I. J Alexander, I. C. Anderson. + FragMatch - a program for the analysis of DNA fragment data. + Mycorrhiza 2007, 17:133-136 +} +\author{ + Matthias Kohl \email{Matthias.Kohl at stamats.de} +} +%\note{} +\seealso{ \code{\link{newDataGerm}}, \code{\link{refDataGerm}} } +\examples{ + data(refDataGerm) + data(newDataGerm) + + res <- FragMatch(newDataGerm, refDataGerm) +} +\keyword{multivariate} Added: pkg/RFLPtools/man/RFLPcombine.Rd =================================================================== --- pkg/RFLPtools/man/RFLPcombine.Rd (rev 0) +++ pkg/RFLPtools/man/RFLPcombine.Rd 2014-03-02 14:16:11 UTC (rev 15) @@ -0,0 +1,47 @@ +\name{RFLPcombine} +\alias{RFLPcombine} +\title{ + Combine RFLP data sets +} +\description{ + Function to combine an arbitrary number of RFLP data sets. +} +\usage{ +RFLPcombine(\dots) +} +\arguments{ + \item{\dots}{ two or more data.frames with RFLP data.} +} +\details{ + The data sets are combined using \code{\link{rbind}}. + + If data sets with identical sample identifiers are given, the + identifiers are made unique using \code{\link{make.unique}}. +} +\value{ + A \code{data.frame} with variables + \describe{ + \item{\code{Sample}}{character: sample identifier. } + \item{\code{Band}}{integer: band number. } + \item{\code{MW}}{integer: molecular weight. } + \item{\code{Gel}}{character: gel identifier. } + } +} +\references{ + Flessa, F., Kehl, A., Kohl, M. + Analysing diversity and community structures using PCR-RFLP: a new software application. + Molecular Ecology Resources 2013 Jul; 13(4):726-33. +} +\author{ + Fabienne Flessa \email{Fabienne.Flessa at uni-bayreuth.de},\cr + Alexandra Kehl \email{Alexandra.Kehl at botgarten.uni-tuebingen.de},\cr + Matthias Kohl \email{Matthias.Kohl at stamats.de} +} +%\note{} +\seealso{ \code{\link{RFLPdata}} } +\examples{ +data(RFLPdata) +res <- RFLPcombine(RFLPdata, RFLPdata, RFLPdata) +RFLPplot(res, nrBands = 4) +} +\keyword{manip} Modified: pkg/RFLPtools/man/RFLPdata.Rd =================================================================== --- pkg/RFLPtools/man/RFLPdata.Rd 2013-01-04 16:42:46 UTC (rev 14) +++ pkg/RFLPtools/man/RFLPdata.Rd 2014-03-02 14:16:11 UTC (rev 15) @@ -23,7 +23,11 @@ \source{ The data set was generated by F. Flessa. } -%\references{ ??? } +\references{ + Flessa, F., Kehl, A., Kohl, M. + Analysing diversity and community structures using PCR-RFLP: a new software application. + Molecular Ecology Resources 2013 Jul; 13(4):726-33. +} \examples{ data(RFLPdata) str(RFLPdata) Modified: pkg/RFLPtools/man/RFLPdist.Rd =================================================================== --- pkg/RFLPtools/man/RFLPdist.Rd 2013-01-04 16:42:46 UTC (rev 14) +++ pkg/RFLPtools/man/RFLPdist.Rd 2014-03-02 14:16:11 UTC (rev 15) @@ -8,7 +8,7 @@ the distance between the molecular weights is computed. } \usage{ -RFLPdist(x, distfun = dist, nrBands) +RFLPdist(x, distfun = dist, nrBands, LOD = 0) } \arguments{ \item{x}{ data.frame with RFLP data; see \code{\link{RFLPdata}}.} @@ -16,6 +16,7 @@ cf. \code{\link[stats]{dist}}.} \item{nrBands}{ if not missing, then only samples with the specified number of bands are considered. } + \item{LOD}{ threshold for low-bp bands. } } \details{ For each number of bands the given distance between the molecular weights is computed. @@ -23,6 +24,9 @@ bands which occur in each group. If \code{nrBands} is specified only samples with this number of bands are considered. + + If \code{LOD > 0} is specified, all values below \code{LOD} are removed before the + distances are calculated. } \value{ A named list with the distances; see \code{\link[stats]{dist}}. @@ -30,6 +34,10 @@ In case \code{nrBands} is not missing, an object of S3 class \code{dist}. } \references{ + Flessa, F., Kehl, A., Kohl, M. + Analysing diversity and community structures using PCR-RFLP: a new software application. + Molecular Ecology Resources 2013 Jul; 13(4):726-33. + Poussier, Stephane; Trigalet-Demery, Danielle; Vandewalle, Peggy; Goffinet, Bruno; Luisetti, Jacques; Trigalet, Andre. Genetic diversity of Ralstonia solanacearum as assessed by PCR-RFLP of the hrp gene region, AFLP and 16S rRNA sequence analysis, and identification of an African subdivision. Modified: pkg/RFLPtools/man/RFLPdist2.Rd =================================================================== --- pkg/RFLPtools/man/RFLPdist2.Rd 2013-01-04 16:42:46 UTC (rev 14) +++ pkg/RFLPtools/man/RFLPdist2.Rd 2014-03-02 14:16:11 UTC (rev 15) @@ -13,7 +13,7 @@ argument \code{nrMissing}. } \usage{ -RFLPdist2(x, distfun = dist, nrBands, nrMissing, LOD, +RFLPdist2(x, distfun = dist, nrBands, nrMissing, LOD = 0, diag = FALSE, upper = FALSE) } \arguments{ @@ -32,7 +32,7 @@ all samples with number of bands in nrBands, nrBands+1, ..., nrBands+nrMissing are compared. - If \code{LOD} is specified, it is assumed that missing bands can only occur for + If \code{LOD > 0} is specified, it is assumed that missing bands can only occur for molecular weights smaller than \code{LOD}. As a consequence only samples which have \code{nrBands} bands with molecular weight larger or equal to \code{LOD} are selected. @@ -42,7 +42,7 @@ and the molecular weight of all possible subsets of S2 with x bands are computed. The distance between S1 and S2 is then defined as the minimum of all these distances. - If \code{LOD} is specified, only all combinations of values below \code{LOD} are + If \code{LOD > 0} is specified, only all combinations of values below \code{LOD} are considered. This option may be useful, if gel image quality is low, and the detection of bands @@ -52,6 +52,10 @@ An object of class \code{"dist"} returned; cf. \code{\link[stats]{dist}}. } \references{ + Flessa, F., Kehl, A., Kohl, M. + Analysing diversity and community structures using PCR-RFLP: a new software application. + Molecular Ecology Resources 2013 Jul; 13(4):726-33. + Ian A. Dickie, Peter G. Avis, David J. McLaughlin, Peter B. Reich. Good-Enough RFLP Matcher (GERM) program. Mycorrhiza 2003, 13:171-172. Modified: pkg/RFLPtools/man/RFLPdist2ref.Rd =================================================================== --- pkg/RFLPtools/man/RFLPdist2ref.Rd 2013-01-04 16:42:46 UTC (rev 14) +++ pkg/RFLPtools/man/RFLPdist2ref.Rd 2014-03-02 14:16:11 UTC (rev 15) @@ -7,7 +7,7 @@ Function to compute distance between RFLP data and RFLP reference data. } \usage{ -RFLPdist2ref(x, ref, distfun = dist, nrBands) +RFLPdist2ref(x, ref, distfun = dist, nrBands, LOD = 0) } \arguments{ \item{x}{ data.frame with RFLP data; e.g. \code{\link{RFLPdata}}.} @@ -16,17 +16,25 @@ cf. \code{\link[stats]{dist}}.} \item{nrBands}{ only samples and reference samples with this number of bands are considered. } + \item{LOD}{ threshold for low-bp bands. } } \details{ For each sample with \code{nrBands} bands the distance to each reference sample with \code{nrBands} bands is computed. The result is a matrix with the corresponding distances where rows represent the samples and columns the reference samples. + + If \code{LOD > 0} is specified, all values below \code{LOD} are removed before the + distances are calculated. This applies to \code{x} and \code{ref}. } \value{ A matrix with distances. } -%\references{ ??? } +\references{ + Flessa, F., Kehl, A., Kohl, M. + Analysing diversity and community structures using PCR-RFLP: a new software application. + Molecular Ecology Resources 2013 Jul; 13(4):726-33. +} \author{ Fabienne Flessa \email{Fabienne.Flessa at uni-bayreuth.de},\cr Alexandra Kehl \email{Alexandra.Kehl at botgarten.uni-tuebingen.de},\cr Added: pkg/RFLPtools/man/RFLPlod.Rd =================================================================== --- pkg/RFLPtools/man/RFLPlod.Rd (rev 0) +++ pkg/RFLPtools/man/RFLPlod.Rd 2014-03-02 14:16:11 UTC (rev 15) @@ -0,0 +1,52 @@ +\name{RFLPlod} +\alias{RFLPlod} +\title{ + Remove bands below LOD +} +\description{ + Function to exclude bands below a given LOD. +} +\usage{ +RFLPlod(x, LOD) +} +\arguments{ + \item{x}{ data.frame with RFLP data.} + \item{LOD}{ threshold for low-bp bands.} +} +\details{ + Low-bp bands may be regarded as unreliable. Function + \code{RFLPlod} can be used to exclude such bands, which + are likely to be absent in some other samples, before + further analyses. +} +\value{ + A \code{data.frame} with variables + \describe{ + \item{\code{Sample}}{character: sample identifier. } + \item{\code{Band}}{integer: band number. } + \item{\code{MW}}{integer: molecular weight. } + \item{\code{Gel}}{character: gel identifier. } + } +} +\references{ + Flessa, F., Kehl, A., Kohl, M. + Analysing diversity and community structures using PCR-RFLP: a new software application. + Molecular Ecology Resources 2013 Jul; 13(4):726-33. +} +\author{ + Fabienne Flessa \email{Fabienne.Flessa at uni-bayreuth.de},\cr + Alexandra Kehl \email{Alexandra.Kehl at botgarten.uni-tuebingen.de},\cr + Matthias Kohl \email{Matthias.Kohl at stamats.de} +} +%\note{} +\seealso{ \code{\link{RFLPdata}} } +\examples{ +data(RFLPdata) +## remove bands with MW smaller than 60 +RFLPdata.lod <- RFLPlod(RFLPdata, LOD = 60) +par(mfrow = c(1, 2)) +RFLPplot(RFLPdata, nrBands = 4, ylim = c(40, 670)) +RFLPplot(RFLPdata.lod, nrBands = 4, ylim = c(40, 670)) +title(sub = "After applying RFLPlod") +} +\keyword{manip} Modified: pkg/RFLPtools/man/RFLPplot.Rd =================================================================== --- pkg/RFLPtools/man/RFLPplot.Rd 2013-01-04 16:42:46 UTC (rev 14) +++ pkg/RFLPtools/man/RFLPplot.Rd 2014-03-02 14:16:11 UTC (rev 15) @@ -28,7 +28,7 @@ \item{cex.axis}{ size of the x-axis annotation. } \item{colBands}{ color for the bands. Has to be of length 1 or number of samples. If missing, \code{"Set1"} of \pkg{RColorBrewer} is used; see - \code{\link[RColorBrewer]{brewer.pal}}. } + \code{\link[RColorBrewer]{ColorBrewer}}. } \item{xlab}{ passed to function \code{\link[graphics]{plot}}. } \item{ylab}{ passed to function \code{\link[graphics]{plot}}. } \item{ylim}{ passed to function \code{\link[graphics]{plot}}. If missing an appropriate @@ -44,7 +44,11 @@ and the detection of bands is doubtful. } \value{invisible} -%\references{ ??? } +\references{ + Flessa, F., Kehl, A., Kohl, M. + Analysing diversity and community structures using PCR-RFLP: a new software application. + Molecular Ecology Resources 2013 Jul; 13(4):726-33. +} \author{ Fabienne Flessa \email{Fabienne.Flessa at uni-bayreuth.de},\cr Alexandra Kehl \email{Alexandra.Kehl at botgarten.uni-tuebingen.de},\cr Modified: pkg/RFLPtools/man/RFLPqc.Rd =================================================================== --- pkg/RFLPtools/man/RFLPqc.Rd 2013-01-04 16:42:46 UTC (rev 14) +++ pkg/RFLPtools/man/RFLPqc.Rd 2014-03-02 14:16:11 UTC (rev 15) @@ -46,7 +46,11 @@ \item{\code{Gel}}{character: gel identifier. } } } -%\references{ ??? } +\references{ + Flessa, F., Kehl, A., Kohl, M. + Analysing diversity and community structures using PCR-RFLP: a new software application. + Molecular Ecology Resources 2013 Jul; 13(4):726-33. +} \author{ Fabienne Flessa \email{Fabienne.Flessa at uni-bayreuth.de},\cr Alexandra Kehl \email{Alexandra.Kehl at botgarten.uni-tuebingen.de},\cr Modified: pkg/RFLPtools/man/RFLPref.Rd =================================================================== --- pkg/RFLPtools/man/RFLPref.Rd 2013-01-04 16:42:46 UTC (rev 14) +++ pkg/RFLPtools/man/RFLPref.Rd 2014-03-02 14:16:11 UTC (rev 15) @@ -5,7 +5,7 @@ \description{ This is an example data set for RFLP reference. } -\usage{data(RFLPdata)} +\usage{data(RFLPref)} \format{ A data frame with 35 observations on the following five variables \describe{ @@ -25,7 +25,11 @@ \source{ The data set was generated by F. Flessa. } -%\references{ ??? } +\references{ + Flessa, F., Kehl, A., Kohl, M. + Analysing diversity and community structures using PCR-RFLP: a new software application. + Molecular Ecology Resources 2013 Jul; 13(4):726-33. +} \examples{ data(RFLPref) str(RFLPref) Modified: pkg/RFLPtools/man/RFLPrefplot.Rd =================================================================== --- pkg/RFLPtools/man/RFLPrefplot.Rd 2013-01-04 16:42:46 UTC (rev 14) +++ pkg/RFLPtools/man/RFLPrefplot.Rd 2014-03-02 14:16:11 UTC (rev 15) @@ -26,7 +26,7 @@ \item{devNew}{ logical. Open new graphics device for each plot. } \item{colBands}{ color for the bands. Has to be of length 1 or number of samples. If missing, \code{"Set1"} of \pkg{RColorBrewer} is used; see - \code{\link[RColorBrewer]{brewer.pal}}. } + \code{\link[RColorBrewer]{ColorBrewer}}. } \item{xlab}{ passed to function \code{\link[graphics]{plot}}. } \item{ylab}{ passed to function \code{\link[graphics]{plot}}. } [TRUNCATED] To get the complete diff run: svnlook diff /svnroot/rflptools -r 15 From noreply at r-forge.r-project.org Sun Mar 2 16:41:06 2014 From: noreply at r-forge.r-project.org (noreply at r-forge.r-project.org) Date: Sun, 2 Mar 2014 16:41:06 +0100 (CET) Subject: [Rflptools-commits] r16 - in pkg/RFLPtools/inst: . scripts Message-ID: <20140302154106.821EC18475B@r-forge.r-project.org> Author: stamats Date: 2014-03-02 16:41:06 +0100 (Sun, 02 Mar 2014) New Revision: 16 Added: pkg/RFLPtools/inst/scripts/ pkg/RFLPtools/inst/scripts/SimulationStudy.R Log: added R script with simulation study Added: pkg/RFLPtools/inst/scripts/SimulationStudy.R =================================================================== --- pkg/RFLPtools/inst/scripts/SimulationStudy.R (rev 0) +++ pkg/RFLPtools/inst/scripts/SimulationStudy.R 2014-03-02 15:41:06 UTC (rev 16) @@ -0,0 +1,542 @@ +## load packages RFLPtools (version >= 1.6), MKmisc +library(RFLPtools) +library(MKmisc) + +## general parameters +M <- 5 # finally use M <- 1000 +N <- 10 # increase N? +nrB <- 3:10 + +## helper functions +checkResultsGerm <- function(newName, resData){ + rownames(resData[[newName]])[1] == newName +} +dist2ref <- function(nB, newData, refData, distfun = dist, nrMissing = 0, LOD = 0){ + newData$Sample <- paste("New", newData$Sample, sep = "_") + complData <- rbind(newData, refData) + if(nrMissing == 0){ + res <- RFLPdist(complData, nrBands = nB, distfun = distfun, LOD = LOD) + }else{ + res <- RFLPdist2(complData, nrBands = nB, distfun = distfun, + nrMissing = nrMissing) + } + res +} +checkResultsRFLP <- function(resData){ + resData <- as.matrix(resData) + resData <- resData[,grepl("New", colnames(resData))] + resData <- resData[!grepl("New", rownames(resData)),] + newNam <- sapply(strsplit(colnames(resData), split = "_"), "[", 2) + refNam <- rownames(resData)[apply(resData, 2, which.min)] + sum(refNam == newNam) +} +checkResultsFragMatch <- function(resData, nrMissing = 0){ + Matches <- sapply(apply(resData, 2, strsplit, split = "_"), + function(x, nrMissing){ + sapply(x, + function(x, nrMissing){ + as.integer(x[1]) == (as.integer(x[2])-nrMissing) + }, + nrMissing = nrMissing) + }, nrMissing = nrMissing) + rowNams <- rownames(resData) + colNams <- colnames(resData) + nrMatch <- 0 + for(i in 1:ncol(Matches)){ + if(colNams[i] %in% rowNams[which(Matches[,i])]) nrMatch <- nrMatch + 1 + } + nrMatch +} + + +############################################################################### +## 1. Only Measurement error +############################################################################### +acc.germ.joint1 <- numeric(M) +acc.germ.forward1 <- numeric(M) +acc.germ.backward1 <- numeric(M) +acc.germ.sum1 <- numeric(M) +acc.rflptools.eucl1 <- numeric(M) +acc.rflptools.cor1 <- numeric(M) +acc.rflptools.diff1 <- numeric(M) +acc.FragMatch1.5 <- numeric(M) +acc.FragMatch1.10 <- numeric(M) +acc.FragMatch1.25 <- numeric(M) + +for(i in 1:M){ + print(i) + # generate reference data + refData1 <- simulateRFLPdata(N = N, bandCenters = seq(200, 800, by = 100), + nrBands = nrB, refData = TRUE) + # add measurement error + newData1 <- refData1 + newData1$MW <- newData1$MW + rnorm(nrow(newData1), mean = 0, sd = 5) + + ## check range of measurement error + #summary(newData1$MW - refData1$MW) + + # apply germ + res1.germ.joint <- germ(newData = newData1, refData = refData1) + res1.germ.forward <- lapply(res1.germ.joint, function(x) x[order(x[,"Forward Max"]),]) + res1.germ.backward <- lapply(res1.germ.joint, function(x) x[order(x[,"Backward Max"]),]) + res1.germ.sum <- lapply(res1.germ.joint, function(x) x[order(x[,"Sum of Bands"]),]) + + ## accuracy for GERM (in %) + SampleNames <- unique(refData1$Sample) + acc.germ.joint1[i] <- 100*sum(sapply(SampleNames, checkResultsGerm, resData = res1.germ.joint))/length(SampleNames) + acc.germ.forward1[i] <- 100*sum(sapply(SampleNames, checkResultsGerm, resData = res1.germ.forward))/length(SampleNames) + acc.germ.backward1[i] <- 100*sum(sapply(SampleNames, checkResultsGerm, resData = res1.germ.backward))/length(SampleNames) + acc.germ.sum1[i] <- 100*sum(sapply(SampleNames, checkResultsGerm, resData = res1.germ.sum))/length(SampleNames) + + ## results for RFLPtools + res1.rflptools.eucl <- lapply(nrB, dist2ref, newData = newData1, refData = refData1) + res1.rflptools.cor <- lapply(nrB, dist2ref, newData = newData1, refData = refData1, dist = corDist) + res1.rflptools.diff <- lapply(nrB, dist2ref, newData = newData1, refData = refData1, dist = diffDist) + + ## accuracy for RFLPtools + acc.rflptools.eucl1[i] <- 100*sum(sapply(res1.rflptools.eucl, checkResultsRFLP))/length(SampleNames) + acc.rflptools.cor1[i] <- 100*sum(sapply(res1.rflptools.cor, checkResultsRFLP))/length(SampleNames) + acc.rflptools.diff1[i] <- 100*sum(sapply(res1.rflptools.diff, checkResultsRFLP))/length(SampleNames) + + ## apply FragMatch + res1.FragMatch.5 <- FragMatch(newData = newData1, refData = refData1, errorBound = 5) + res1.FragMatch.10 <- FragMatch(newData = newData1, refData = refData1, errorBound = 10) + res1.FragMatch.25 <- FragMatch(newData = newData1, refData = refData1, errorBound = 25) + + ## accuracy for fraqMatch + acc.FragMatch1.5[i] <- 100*checkResultsFragMatch(res1.FragMatch.5)/length(SampleNames) + acc.FragMatch1.10[i] <- 100*checkResultsFragMatch(res1.FragMatch.10)/length(SampleNames) + acc.FragMatch1.25[i] <- 100*checkResultsFragMatch(res1.FragMatch.25)/length(SampleNames) +} +mean(acc.germ.joint1) +mean(acc.germ.forward1) +mean(acc.germ.backward1) +mean(acc.germ.sum1) +mean(acc.rflptools.eucl1) +mean(acc.rflptools.cor1) +mean(acc.rflptools.diff1) +mean(acc.FragMatch1.5) +mean(acc.FragMatch1.10) +mean(acc.FragMatch1.25) + + +############################################################################### +## 2. Measurement error + 1 band below LOD +############################################################################### +acc.germ.joint2 <- numeric(M) +acc.germ.forward2 <- numeric(M) +acc.germ.backward2 <- numeric(M) +acc.germ.sum2 <- numeric(M) +acc.rflptools.eucl2 <- numeric(M) +acc.rflptools.cor2 <- numeric(M) +acc.rflptools.diff2 <- numeric(M) +acc.rflptools.eucl21 <- numeric(M) +acc.rflptools.cor21 <- numeric(M) +acc.rflptools.diff21 <- numeric(M) +acc.FragMatch2.5 <- numeric(M) +acc.FragMatch2.10 <- numeric(M) +acc.FragMatch2.25 <- numeric(M) + +for(i in 1:M){ + print(i) + # generate reference data + refData2 <- simulateRFLPdata(N = N, bandCenters = seq(200, 800, by = 100), + nrBands = nrB, refData = TRUE) + # add band below LOD + SampleNames <- unique(refData2$Sample) + for(j in 1:length(SampleNames)){ + temp <- refData2[refData2$Sample == SampleNames[j],] + addLOD <- data.frame(Sample = SampleNames[j], + Band = 0, + MW = runif(1, min = 0, max = 100), + Enzyme = temp$Enzyme[1], + Taxonname = temp$Taxonname[1], + Accession = temp$Accession[1]) + temp <- rbind(addLOD, temp) + temp$Band <- temp$Band + 1 + if(j == 1) + newData2 <- temp + else + newData2 <- rbind(newData2, temp) + } + rownames(newData2) <- 1:nrow(newData2) + + # add measurement error + newData2$MW <- newData2$MW + rnorm(nrow(newData2), mean = 0, sd = 5) + + # apply germ + res2.germ.joint <- germ(newData = newData2, refData = refData2) + res2.germ.forward <- lapply(res2.germ.joint, function(x) x[order(x[,"Forward Max"]),]) + res2.germ.backward <- lapply(res2.germ.joint, function(x) x[order(x[,"Backward Max"]),]) + res2.germ.sum <- lapply(res2.germ.joint, function(x) x[order(x[,"Sum of Bands"]),]) + + ## accuracy for GERM (in %) + acc.germ.joint2[i] <- 100*sum(sapply(SampleNames, checkResultsGerm, resData = res2.germ.joint))/length(SampleNames) + acc.germ.forward2[i] <- 100*sum(sapply(SampleNames, checkResultsGerm, resData = res2.germ.forward))/length(SampleNames) + acc.germ.backward2[i] <- 100*sum(sapply(SampleNames, checkResultsGerm, resData = res2.germ.backward))/length(SampleNames) + acc.germ.sum2[i] <- 100*sum(sapply(SampleNames, checkResultsGerm, resData = res2.germ.sum))/length(SampleNames) + + ## results for RFLPtools + res2.rflptools.eucl <- lapply(nrB, dist2ref, newData = newData2, refData = refData2, LOD = 125) + res2.rflptools.cor <- lapply(nrB, dist2ref, newData = newData2, refData = refData2, dist = corDist, LOD = 125) + res2.rflptools.diff <- lapply(nrB, dist2ref, newData = newData2, refData = refData2, dist = diffDist, LOD = 125) + res2.rflptools.eucl1 <- lapply(nrB, dist2ref, newData = newData2, refData = refData2, nrMissing = 1) + res2.rflptools.cor1 <- lapply(nrB, dist2ref, newData = newData2, refData = refData2, dist = corDist, nrMissing = 1) + res2.rflptools.diff1 <- lapply(nrB, dist2ref, newData = newData2, refData = refData2, dist = diffDist, nrMissing = 1) + + ## accuracy for RFLPtools + acc.rflptools.eucl2[i] <- 100*sum(sapply(res2.rflptools.eucl, checkResultsRFLP))/length(SampleNames) + acc.rflptools.cor2[i] <- 100*sum(sapply(res2.rflptools.cor, checkResultsRFLP))/length(SampleNames) + acc.rflptools.diff2[i] <- 100*sum(sapply(res2.rflptools.diff, checkResultsRFLP))/length(SampleNames) + acc.rflptools.eucl21[i] <- 100*sum(sapply(res2.rflptools.eucl1, checkResultsRFLP))/length(SampleNames) + acc.rflptools.cor21[i] <- 100*sum(sapply(res2.rflptools.cor1, checkResultsRFLP))/length(SampleNames) + acc.rflptools.diff21[i] <- 100*sum(sapply(res2.rflptools.diff1, checkResultsRFLP))/length(SampleNames) + + ## apply FragMatch + res2.FragMatch.5 <- FragMatch(newData = newData2, refData = refData2, errorBound = 5) + res2.FragMatch.10 <- FragMatch(newData = newData2, refData = refData2, errorBound = 10) + res2.FragMatch.25 <- FragMatch(newData = newData2, refData = refData2, errorBound = 25) + + ## accuracy for fraqMatch + acc.FragMatch2.5[i] <- 100*checkResultsFragMatch(res2.FragMatch.5)/length(SampleNames) + acc.FragMatch2.10[i] <- 100*checkResultsFragMatch(res2.FragMatch.10)/length(SampleNames) + acc.FragMatch2.25[i] <- 100*checkResultsFragMatch(res2.FragMatch.25)/length(SampleNames) +} +mean(acc.germ.joint2) +mean(acc.germ.forward2) +mean(acc.germ.backward2) +mean(acc.germ.sum2) +mean(acc.rflptools.eucl2) +mean(acc.rflptools.cor2) +mean(acc.rflptools.diff2) +mean(acc.rflptools.eucl21) +mean(acc.rflptools.cor21) +mean(acc.rflptools.diff21) +mean(acc.FragMatch2.5) +mean(acc.FragMatch2.10) +mean(acc.FragMatch2.25) + + +############################################################################### +## 3. Measurement error + 1 faint band +############################################################################### +acc.germ.joint3 <- numeric(M) +acc.germ.forward3 <- numeric(M) +acc.germ.backward3 <- numeric(M) +acc.germ.sum3 <- numeric(M) +acc.rflptools.eucl3 <- numeric(M) +acc.rflptools.cor3 <- numeric(M) +acc.rflptools.diff3 <- numeric(M) +acc.FragMatch3.5 <- numeric(M) +acc.FragMatch3.10 <- numeric(M) +acc.FragMatch3.25 <- numeric(M) + +for(i in 1:M){ + print(i) + # generate reference data + refData3 <- simulateRFLPdata(N = N, bandCenters = seq(200, 800, by = 100), + nrBands = nrB, refData = TRUE) + # add one addtional band "somewhere" + SampleNames <- unique(refData3$Sample) + for(j in 1:length(SampleNames)){ + temp <- refData3[refData3$Sample == SampleNames[j],] + addBand <- data.frame(Sample = SampleNames[j], + Band = max(temp$Band)+1, + MW = -runif(1, min = 150, max = 850), + Enzyme = temp$Enzyme[1], + Taxonname = temp$Taxonname[1], + Accession = temp$Accession[1]) + temp <- rbind(temp, addBand) + temp <- temp[order(abs(temp$MW)),] + temp$Band <- 1:nrow(temp) + if(j == 1) + newData3 <- temp + else + newData3 <- rbind(newData3, temp) + } + rownames(newData3) <- 1:nrow(newData3) + + # add measurement error + newData3$MW <- newData3$MW + rnorm(nrow(newData3), mean = 0, sd = 5) + + # apply germ + res3.germ.joint <- germ(newData = newData3, refData = refData3) + res3.germ.forward <- lapply(res3.germ.joint, function(x) x[order(x[,"Forward Max"]),]) + res3.germ.backward <- lapply(res3.germ.joint, function(x) x[order(x[,"Backward Max"]),]) + res3.germ.sum <- lapply(res3.germ.joint, function(x) x[order(x[,"Sum of Bands"]),]) + + ## accuracy for GERM (in %) + acc.germ.joint3[i] <- 100*sum(sapply(SampleNames, checkResultsGerm, resData = res3.germ.joint))/length(SampleNames) + acc.germ.forward3[i] <- 100*sum(sapply(SampleNames, checkResultsGerm, resData = res3.germ.forward))/length(SampleNames) + acc.germ.backward3[i] <- 100*sum(sapply(SampleNames, checkResultsGerm, resData = res3.germ.backward))/length(SampleNames) + acc.germ.sum3[i] <- 100*sum(sapply(SampleNames, checkResultsGerm, resData = res3.germ.sum))/length(SampleNames) + + ## results for RFLPtools + newData31 <- newData3 + newData31$MW <- abs(newData31$MW) + res3.rflptools.eucl1 <- lapply(nrB, dist2ref, newData = newData31, refData = refData3, nrMissing = 1) + res3.rflptools.cor1 <- lapply(nrB, dist2ref, newData = newData31, refData = refData3, dist = corDist, nrMissing = 1) + res3.rflptools.diff1 <- lapply(nrB, dist2ref, newData = newData31, refData = refData3, dist = diffDist, nrMissing = 1) + + ## accuracy for RFLPtools + acc.rflptools.eucl3[i] <- 100*sum(sapply(res3.rflptools.eucl1, checkResultsRFLP))/length(SampleNames) + acc.rflptools.cor3[i] <- 100*sum(sapply(res3.rflptools.cor1, checkResultsRFLP))/length(SampleNames) + acc.rflptools.diff3[i] <- 100*sum(sapply(res3.rflptools.diff1, checkResultsRFLP))/length(SampleNames) + + ## apply FragMatch + res3.FragMatch.5 <- FragMatch(newData = newData31, refData = refData3, errorBound = 5) + res3.FragMatch.10 <- FragMatch(newData = newData31, refData = refData3, errorBound = 10) + res3.FragMatch.25 <- FragMatch(newData = newData31, refData = refData3, errorBound = 25) + + ## accuracy for fraqMatch + acc.FragMatch3.5[i] <- 100*checkResultsFragMatch(res3.FragMatch.5)/length(SampleNames) + acc.FragMatch3.10[i] <- 100*checkResultsFragMatch(res3.FragMatch.10)/length(SampleNames) + acc.FragMatch3.25[i] <- 100*checkResultsFragMatch(res3.FragMatch.25)/length(SampleNames) +} +mean(acc.germ.joint3) +mean(acc.germ.forward3) +mean(acc.germ.backward3) +mean(acc.germ.sum3) +mean(acc.rflptools.eucl3) +mean(acc.rflptools.cor3) +mean(acc.rflptools.diff3) +mean(acc.FragMatch3.5) +mean(acc.FragMatch3.10) +mean(acc.FragMatch3.25) + + +############################################################################### +## 4. Measurement error + 1 missing band +############################################################################### +acc.germ.joint4 <- numeric(M) +acc.germ.forward4 <- numeric(M) +acc.germ.backward4 <- numeric(M) +acc.germ.sum4 <- numeric(M) +acc.rflptools.eucl4 <- numeric(M) +acc.rflptools.cor4 <- numeric(M) +acc.rflptools.diff4 <- numeric(M) +acc.FragMatch4.5 <- numeric(M) +acc.FragMatch4.10 <- numeric(M) +acc.FragMatch4.25 <- numeric(M) + +for(i in 1:M){ + print(i) + # generate reference data + refData4 <- simulateRFLPdata(N = N, bandCenters = seq(200, 800, by = 100), + nrBands = nrB, refData = TRUE) + # remove one band randomly + SampleNames <- unique(refData4$Sample) + for(j in 1:length(SampleNames)){ + temp <- refData4[refData4$Sample == SampleNames[j],] + temp <- temp[-sample(1:nrow(temp), 1),] + temp$Band <- 1:nrow(temp) + if(j == 1) + newData4 <- temp + else + newData4 <- rbind(newData4, temp) + } + rownames(newData4) <- 1:nrow(newData4) + + # add measurement error + newData4$MW <- newData4$MW + rnorm(nrow(newData4), mean = 0, sd = 5) + + # apply germ + res4.germ.joint <- germ(newData = newData4, refData = refData4) + res4.germ.forward <- lapply(res4.germ.joint, function(x) x[order(x[,"Forward Max"]),]) + res4.germ.backward <- lapply(res4.germ.joint, function(x) x[order(x[,"Backward Max"]),]) + res4.germ.sum <- lapply(res4.germ.joint, function(x) x[order(x[,"Sum of Bands"]),]) + + ## accuracy for GERM (in %) + acc.germ.joint4[i] <- 100*sum(sapply(SampleNames, checkResultsGerm, resData = res4.germ.joint))/length(SampleNames) + acc.germ.forward4[i] <- 100*sum(sapply(SampleNames, checkResultsGerm, resData = res4.germ.forward))/length(SampleNames) + acc.germ.backward4[i] <- 100*sum(sapply(SampleNames, checkResultsGerm, resData = res4.germ.backward))/length(SampleNames) + acc.germ.sum4[i] <- 100*sum(sapply(SampleNames, checkResultsGerm, resData = res4.germ.sum))/length(SampleNames) + + ## results for RFLPtools + res4.rflptools.eucl1 <- lapply(nrB, dist2ref, newData = newData4, refData = refData4, nrMissing = 1) + res4.rflptools.cor1 <- lapply(nrB, dist2ref, newData = newData4, refData = refData4, dist = corDist, nrMissing = 1) + res4.rflptools.diff1 <- lapply(nrB, dist2ref, newData = newData4, refData = refData4, dist = diffDist, nrMissing = 1) + + ## accuracy for RFLPtools + acc.rflptools.eucl4[i] <- 100*sum(sapply(res4.rflptools.eucl1[-length(nrB)], checkResultsRFLP))/length(SampleNames) + acc.rflptools.cor4[i] <- 100*sum(sapply(res4.rflptools.cor1[-length(nrB)], checkResultsRFLP))/length(SampleNames) + acc.rflptools.diff4[i] <- 100*sum(sapply(res4.rflptools.diff1[-length(nrB)], checkResultsRFLP))/length(SampleNames) + + ## apply FragMatch + res4.FragMatch.5 <- FragMatch(newData = newData4, refData = refData4, errorBound = 5) + res4.FragMatch.10 <- FragMatch(newData = newData4, refData = refData4, errorBound = 10) + res4.FragMatch.25 <- FragMatch(newData = newData4, refData = refData4, errorBound = 25) + + ## accuracy for fraqMatch + acc.FragMatch4.5[i] <- 100*checkResultsFragMatch(res4.FragMatch.5, nrMissing = 1)/length(SampleNames) + acc.FragMatch4.10[i] <- 100*checkResultsFragMatch(res4.FragMatch.10, nrMissing = 1)/length(SampleNames) + acc.FragMatch4.25[i] <- 100*checkResultsFragMatch(res4.FragMatch.25, nrMissing = 1)/length(SampleNames) +} +mean(acc.germ.joint4) +mean(acc.germ.forward4) +mean(acc.germ.backward4) +mean(acc.germ.sum4) +mean(acc.rflptools.eucl4) +mean(acc.rflptools.cor4) +mean(acc.rflptools.diff4) +mean(acc.FragMatch4.5) +mean(acc.FragMatch4.10) +mean(acc.FragMatch4.25) + + +############################################################################### +## 5. Measurement error + 1 replicated band +############################################################################### +acc.germ.joint5 <- numeric(M) +acc.germ.forward5 <- numeric(M) +acc.germ.backward5 <- numeric(M) +acc.germ.sum5 <- numeric(M) +acc.rflptools.eucl5 <- numeric(M) +acc.rflptools.cor5 <- numeric(M) +acc.rflptools.diff5 <- numeric(M) +acc.FragMatch5.5 <- numeric(M) +acc.FragMatch5.10 <- numeric(M) +acc.FragMatch5.25 <- numeric(M) + +for(i in 1:M){ + print(i) + # generate reference data + refData5 <- simulateRFLPdata(N = N, bandCenters = seq(200, 800, by = 100), + nrBands = nrB, refData = TRUE) + # select randomly one band, replicate this band, add measurement error + SampleNames <- unique(refData5$Sample) + for(j in 1:length(SampleNames)){ + temp <- refData5[refData5$Sample == SampleNames[j],] + addBand <- temp[sample(1:nrow(temp), 1),, drop = FALSE] + temp <- rbind(temp, addBand) + temp <- temp[order(abs(temp$MW)),] + temp$Band <- 1:nrow(temp) + if(j == 1) + newData5 <- temp + else + newData5 <- rbind(newData5, temp) + } + rownames(newData5) <- 1:nrow(newData5) + + # add measurement error + newData5$MW <- newData5$MW + rnorm(nrow(newData5), mean = 0, sd = 5) + + # apply germ + res5.germ.joint <- germ(newData = newData5, refData = refData5) + res5.germ.forward <- lapply(res5.germ.joint, function(x) x[order(x[,"Forward Max"]),]) + res5.germ.backward <- lapply(res5.germ.joint, function(x) x[order(x[,"Backward Max"]),]) + res5.germ.sum <- lapply(res5.germ.joint, function(x) x[order(x[,"Sum of Bands"]),]) + + ## accuracy for GERM (in %) + acc.germ.joint5[i] <- 100*sum(sapply(SampleNames, checkResultsGerm, resData = res5.germ.joint))/length(SampleNames) + acc.germ.forward5[i] <- 100*sum(sapply(SampleNames, checkResultsGerm, resData = res5.germ.forward))/length(SampleNames) + acc.germ.backward5[i] <- 100*sum(sapply(SampleNames, checkResultsGerm, resData = res5.germ.backward))/length(SampleNames) + acc.germ.sum5[i] <- 100*sum(sapply(SampleNames, checkResultsGerm, resData = res5.germ.sum))/length(SampleNames) + + ## results for RFLPtools + res5.rflptools.eucl1 <- lapply(nrB, dist2ref, newData = newData5, refData = refData5, nrMissing = 1) + res5.rflptools.cor1 <- lapply(nrB, dist2ref, newData = newData5, refData = refData5, dist = corDist, nrMissing = 1) + res5.rflptools.diff1 <- lapply(nrB, dist2ref, newData = newData5, refData = refData5, dist = diffDist, nrMissing = 1) + + ## accuracy for RFLPtools + acc.rflptools.eucl5[i] <- 100*sum(sapply(res5.rflptools.eucl1, checkResultsRFLP))/length(SampleNames) + acc.rflptools.cor5[i] <- 100*sum(sapply(res5.rflptools.cor1, checkResultsRFLP))/length(SampleNames) + acc.rflptools.diff5[i] <- 100*sum(sapply(res5.rflptools.diff1, checkResultsRFLP))/length(SampleNames) + + ## apply FragMatch + res5.FragMatch.5 <- FragMatch(newData = newData5, refData = refData5, errorBound = 5) + res5.FragMatch.10 <- FragMatch(newData = newData5, refData = refData5, errorBound = 10) + res5.FragMatch.25 <- FragMatch(newData = newData5, refData = refData5, errorBound = 25) + + ## accuracy for fraqMatch + acc.FragMatch5.5[i] <- 100*checkResultsFragMatch(res5.FragMatch.5)/length(SampleNames) + acc.FragMatch5.10[i] <- 100*checkResultsFragMatch(res5.FragMatch.10)/length(SampleNames) + acc.FragMatch5.25[i] <- 100*checkResultsFragMatch(res5.FragMatch.25)/length(SampleNames) +} +mean(acc.germ.joint5) +mean(acc.germ.forward5) +mean(acc.germ.backward5) +mean(acc.germ.sum5) +mean(acc.rflptools.eucl5) +mean(acc.rflptools.cor5) +mean(acc.rflptools.diff5) +mean(acc.FragMatch5.5) +mean(acc.FragMatch5.10) +mean(acc.FragMatch5.25) + + +############################################################################### +## 6. Measurement error + positive shift of bands +############################################################################### +# small shifts are no problem for the algorithms -> use larger shift +shift <- 50 +acc.germ.joint6 <- numeric(M) +acc.germ.forward6 <- numeric(M) +acc.germ.backward6 <- numeric(M) +acc.germ.sum6 <- numeric(M) +acc.rflptools.eucl6 <- numeric(M) +acc.rflptools.cor6 <- numeric(M) +acc.rflptools.diff6 <- numeric(M) +acc.FragMatch6.5 <- numeric(M) +acc.FragMatch6.10 <- numeric(M) +acc.FragMatch6.25 <- numeric(M) + +for(i in 1:M){ + print(i) + # generate reference data + refData6 <- simulateRFLPdata(N = N, bandCenters = seq(200, 800, by = 100), + nrBands = nrB, refData = TRUE) + # fixed shift of bands + newData6 <- refData6 + newData6$MW <- newData6$MW + shift + + # add measurement error + newData6$MW <- newData6$MW + rnorm(nrow(newData6), mean = 0, sd = 5) + + ## check range of measurement error + #summary(newData6$MW - refData6$MW) + + # apply germ + res6.germ.joint <- germ(newData = newData6, refData = refData6) + res6.germ.forward <- lapply(res6.germ.joint, function(x) x[order(x[,"Forward Max"]),]) + res6.germ.backward <- lapply(res6.germ.joint, function(x) x[order(x[,"Backward Max"]),]) + res6.germ.sum <- lapply(res6.germ.joint, function(x) x[order(x[,"Sum of Bands"]),]) + + ## accuracy for GERM (in %) + SampleNames <- unique(refData6$Sample) + acc.germ.joint6[i] <- 100*sum(sapply(SampleNames, checkResultsGerm, resData = res6.germ.joint))/length(SampleNames) + acc.germ.forward6[i] <- 100*sum(sapply(SampleNames, checkResultsGerm, resData = res6.germ.forward))/length(SampleNames) + acc.germ.backward6[i] <- 100*sum(sapply(SampleNames, checkResultsGerm, resData = res6.germ.backward))/length(SampleNames) + acc.germ.sum6[i] <- 100*sum(sapply(SampleNames, checkResultsGerm, resData = res6.germ.sum))/length(SampleNames) + + ## results for RFLPtools + res6.rflptools.eucl <- lapply(nrB, dist2ref, newData = newData6, refData = refData6) + res6.rflptools.cor <- lapply(nrB, dist2ref, newData = newData6, refData = refData6, dist = corDist) + res6.rflptools.diff <- lapply(nrB, dist2ref, newData = newData6, refData = refData6, dist = diffDist) + + ## accuracy for RFLPtools + acc.rflptools.eucl6[i] <- 100*sum(sapply(res6.rflptools.eucl, checkResultsRFLP))/length(SampleNames) + acc.rflptools.cor6[i] <- 100*sum(sapply(res6.rflptools.cor, checkResultsRFLP))/length(SampleNames) + acc.rflptools.diff6[i] <- 100*sum(sapply(res6.rflptools.diff, checkResultsRFLP))/length(SampleNames) + + ## apply FragMatch + res6.FragMatch.5 <- FragMatch(newData = newData6, refData = refData6, errorBound = 5) + res6.FragMatch.10 <- FragMatch(newData = newData6, refData = refData6, errorBound = 10) + res6.FragMatch.25 <- FragMatch(newData = newData6, refData = refData6, errorBound = 25) + + ## accuracy for fraqMatch + acc.FragMatch6.5[i] <- 100*checkResultsFragMatch(res6.FragMatch.5)/length(SampleNames) + acc.FragMatch6.10[i] <- 100*checkResultsFragMatch(res6.FragMatch.10)/length(SampleNames) + acc.FragMatch6.25[i] <- 100*checkResultsFragMatch(res6.FragMatch.25)/length(SampleNames) +} +mean(acc.germ.joint6) +mean(acc.germ.forward6) +mean(acc.germ.backward6) +mean(acc.germ.sum6) +mean(acc.rflptools.eucl6) +mean(acc.rflptools.cor6) +mean(acc.rflptools.diff6) +mean(acc.FragMatch6.5) +mean(acc.FragMatch6.10) +mean(acc.FragMatch6.25) + From noreply at r-forge.r-project.org Sun Mar 2 20:32:36 2014 From: noreply at r-forge.r-project.org (noreply at r-forge.r-project.org) Date: Sun, 2 Mar 2014 20:32:36 +0100 (CET) Subject: [Rflptools-commits] r17 - pkg/RFLPtools Message-ID: <20140302193236.B8E48186C89@r-forge.r-project.org> Author: stamats Date: 2014-03-02 20:32:36 +0100 (Sun, 02 Mar 2014) New Revision: 17 Modified: pkg/RFLPtools/NAMESPACE Log: now builds and checks without warnings or errors under R 3.0.3 RC and R devel. Modified: pkg/RFLPtools/NAMESPACE =================================================================== --- pkg/RFLPtools/NAMESPACE 2014-03-02 15:41:06 UTC (rev 16) +++ pkg/RFLPtools/NAMESPACE 2014-03-02 19:32:36 UTC (rev 17) @@ -1,3 +1,4 @@ +import("RColorBrewer") export(diffDist, FragMatch, germ,