[Wavetiling-commits] r15 - in pkg: . R

Mon Dec 19 10:12:01 CET 2011

Author: kdbeuf
Date: 2011-12-19 10:12:00 +0100 (Mon, 19 Dec 2011)
New Revision: 15

Modified:
   pkg/DESCRIPTION
   pkg/R/methods-WaveTilingFeatureSet.R
   pkg/TODO
Log:
Fixed filterOverlap,WaveTilingFeatureSet-method


Modified: pkg/DESCRIPTION
===================================================================

--- pkg/DESCRIPTION	2011-12-13 15:37:21 UTC (rev 14)
+++ pkg/DESCRIPTION	2011-12-19 09:12:00 UTC (rev 15)
@@ -5,8 +5,8 @@
 Title: Wavelet-Based Models for Tiling Array Transcriptome Analysis
 Author: Kristof De Beuf <kristof.debeuf at UGent.be>, Peter Pipelers <peter.pipelers at ugent.be> and Lieven Clement <lieven.clement at gmail.com>
 Maintainer: Kristof De Beuf <kristof.debeuf at UGent.be>
-Depends: oligoClasses, Biobase, GenomeGraphs 
-Imports: oligo, affy, IRanges, Biostrings, waveslim, methods
+Depends: oligo, oligoClasses, Biobase, GenomeGraphs, IRanges, Biostrings, GenomicRanges
+Imports: affy, waveslim, methods
 Suggests:
 Description: This package is designed to conduct transcriptome analysis for tiling arrays based on fast wavelet-based functional models.
 Collate: allClasses.R allGenerics.R helperFunctions.R

Modified: pkg/R/methods-WaveTilingFeatureSet.R
===================================================================
--- pkg/R/methods-WaveTilingFeatureSet.R	2011-12-13 15:37:21 UTC (rev 14)
+++ pkg/R/methods-WaveTilingFeatureSet.R	2011-12-19 09:12:00 UTC (rev 15)
@@ -1,3 +1,16 @@
+# temporary (?)
+# I can only use this after inserting strand information to the database when parsing the bpmap and Cel file in the (updated) package PDInfoBuilder
+setMethod("pmStrand","WaveTilingFeatureSet",function(object)
+{
+	conn <- db(object)
+	sql <- paste("SELECT fid, strand", "FROM pmfeature", "INNER JOIN chrom_dict", "USING(chrom)")
+	tmp <- dbGetQuery(conn, sql)
+	tmp <- tmp[order(tmp[["fid"]]),]
+        return(tmp[["strand"]])
+}
+)
+
+
 setMethod("addPheno",signature("WaveTilingFeatureSet"),function(object,noGroups,groupNames,replics,...)
 {
 	if (dim(object)[2] != sum(replics))
@@ -74,7 +87,7 @@
 	dataPMSeq <- pmSequence(object)
 	if (remap)
 	{
-		setTrustBand <- min(unique(nchar(dataPMSeq)))
+		setTrustBand <- min(unique(width(dataPMSeq)))
 		if (setTrustBand < 15)
 		{
 			warning("There are probes smaller than 15 bp")
@@ -86,11 +99,17 @@
 			cat("begin remapping forward strand...\n")
 			cat("extract probe sequences\n")
 			pmSeqDict <- PDict(dataPMSeq,tb.start=1,tb.end=trBand)
-			chrSeq <- readFASTA(fastaFile,strip.desc=FALSE)
-			chrDNAString <- lapply(chrSeq,function(x) DNAString(paste(x[[2]],collapse="")))
-			names(chrDNAString) <- paste("chr",chrId,sep="")
+			chrSeqAll <- read.DNAStringSet(fastaFile,format="fasta")
+			chrSeq <- chrSeqAll[chrId]
+			names(chrSeq) <- paste("chr",chrId,sep="")
 			cat("match probe sequences to DNA sequence\n")
-			startPM <- lapply(chrDNAString,function(x) startIndex(matchPDict(pmSeqDict,x)))
+			chrSeqList <- list()
+			for (i in 1:length(chrSeq))
+			{
+				chrSeqList[[i]] <- chrSeq[[i]]
+			}
+			names(chrSeqList) <- names(chrSeq)
+			startPM <- lapply(chrSeqList,function(x) startIndex(matchPDict(pmSeqDict,x)))
 			nposChrPM <- lapply(startPM,function(x) sapply(x,length))
 			pmMatch <- Reduce("+",nposChrPM)==1
 			pmMatchIndex <- (1:length(pmMatch))[pmMatch]
@@ -105,25 +124,30 @@
 			posInit <- c()
 			posInit[mp[,1]] <- unlist(lapply(startPM,function(x) unlist(x[pmMatch])))
 			strandInit <- rep("forward",length(pmMatchIndex))
-			cat("remapping reverse strand done\n")
+			cat("remapping forward strand done\n")
 		}
 		if ((strand=="reverse") | (strand=="both"))
 		{
 			cat("begin remapping reverse strand...\n")
 			## Fix me: sometimes reverseComplement/sometimes not?
-			#dataPMSeqRevComp <- reverseComplement(pmSequence(object))
-			dataPMSeqRevComp <- pmSequence(object)
+			dataPMSeqRevComp <- reverseComplement(pmSequence(object))
+			#dataPMSeqRevComp <- pmSequence(object)
 			cat("extract probe sequences\n")
 			pmSeqDictRevComp <- PDict(dataPMSeqRevComp,tb.start=1,tb.end=trBand)
 			if (strand=="reverse")
 			{
-				
-				chrSeq <- readFASTA(fastaFile,strip.desc=FALSE)
-				chrDNAString <- lapply(chrSeq,function(x) DNAString(paste(x[[2]],collapse="")))
-				names(chrDNAString) <- paste("chr",chrId,sep="")
+				chrSeqAll <- read.DNAStringSet(fastaFile,format="fasta")
+				chrSeq <- chrSeqAll[chrId]
+				names(chrSeq) <- paste("chr",chrId,sep="")
+				chrSeqList <- list()
+				for (i in 1:length(chrSeq))
+				{
+					chrSeqList[[i]] <- chrSeq[[i]]
+				}
+				names(chrSeqList) <- names(chrSeq)
 			}
 			cat("match probe sequences to DNA sequence\n")
-			startPMRevComp <- lapply(chrDNAString,function(x) startIndex(matchPDict(pmSeqDictRevComp,x)))
+			startPMRevComp <- lapply(chrSeqList,function(x) startIndex(matchPDict(pmSeqDictRevComp,x)))
 			nposChrPMRevComp <- lapply(startPMRevComp,function(x) sapply(x,length))
 			pmMatchRevComp <- Reduce("+",nposChrPMRevComp)==1
 			pmMatchIndexRevComp <- (1:length(pmMatchRevComp))[pmMatchRevComp]
@@ -145,8 +169,9 @@
 	{
 		if ((strand=="forward") | (strand=="both"))
 		{
-			pmMatch <- pmStrand(object)=="+"
-			pmMatchIndex <- (1:length(pmMatch))[pmMatch]
+			pmMatchIndex <- which(pmStrand(object)==1 & pmChr(object) %in% paste("Chr",chrId,sep=""))
+			## Fix me: does not work yet for special chromosomes like Chr C and Chr M in Arabidopsis
+			# sometimes 0/1, sometimes +/- ?
 			chrInit <- pmChr(object)[pmMatchIndex]
 			posInit <- pmPosition(object)[pmMatchIndex]
 			strandInit <- pmStrand(object)[pmMatchIndex]
@@ -154,8 +179,7 @@
 		if ((strand=="reverse") | (strand=="both"))
 		{
 			dataPMSeqRevComp <- reverseComplement(pmSequence(object))
-			pmMatchRevComp <- pmStrand(object)=="-"
-			pmMatchIndexRevComp <- (1:length(pmMatchRevComp))[pmMatchRevComp]
+			pmMatchIndexRevComp <- which(pmStrand(object)==0 & pmChr(object) %in% paste("Chr",chrId,sep=""))
 			chrInitRevComp <- pmChr(object)[pmMatchIndexRevComp]
 			posInitRevComp <- pmPosition(object)[pmMatchIndexRevComp]
 			strandInitRevComp <- pmStrand(object)[pmMatchIndexRevComp]
@@ -167,8 +191,11 @@
 	if (strand=="both")
 	{
 		cat("filter overlaps forward-reverse strand\n")
-		forwardReverseOverlap <- (1:length(pmMatch))[(dataPMSeq %in% dataPMSeqRevComp)]
-		reverseForwardOverlap <- (1:length(pmMatchRevComp))[(dataPMSeqRevComp %in% dataPMSeq)]
+		## should be done more efficiently with something in Biostrings... ("%in%" does not work)
+		dataPMSeqChar <- as.character(dataPMSeq)
+		dataPMSeqRevCompChar <- as.character(dataPMSeqRevComp)
+		forwardReverseOverlap <- which(dataPMSeqChar %in% dataPMSeqRevCompChar)
+		reverseForwardOverlap <- which(dataPMSeqRevCompChar %in% dataPMSeqChar)
 	}
 	if (MM)
 	{
@@ -176,12 +203,16 @@
 		if (strand=="forward")
 		{
 			dataMMSeq <- pm2mm(dataPMSeq)
-			pmMmOverlap <- (1:length(pmMatch))[(dataPMSeq %in% dataMMSeq)]
+			dataPMSeqChar <- as.character(dataPMSeq)
+			dataMMSeqChar <- as.character(dataMMSeq)
+			pmMmOverlap <- which(dataPMSeqChar %in% dataMMSeqChar)
 		}
 		if (strand=="reverse")
 		{
 			dataMMSeqRevComp <- pm2mm(dataPMSeqRevComp)
-			pmMmOverlap <- (1:length(pmMatchRevComp))[(dataPMSeqRevComp %in% dataMMSeqRevComp)]
+			dataPMSeqRevCompChar <- as.character(dataPMSeqRevComp)
+			dataMMSeqRevCompChar <- ac.character(dataMMSegRevComp)
+			pmMmOverlap <- which(dataPMSeqRevCompChar %in% dataMMSeqRevCompChar)
 		}
 	}
 	indicesForward <- NULL

Modified: pkg/TODO
===================================================================
--- pkg/TODO	2011-12-13 15:37:21 UTC (rev 14)
+++ pkg/TODO	2011-12-19 09:12:00 UTC (rev 15)
@@ -1,12 +1,16 @@
-KDB:
-1) wfm.analysis,WaveTilingFeatureSet-method: check calculation of confidence intervals
-2) wfm.analysis,WaveTilingFeatureSet-method: check implementation of "two.sided" (adapt according to SAGMB paper)
-3) plotWfm,Wfm-method: introduce option to give groupnames to put on Y-axis of plot
-4) getNonAnnotatedRegions,Wfm-method: make function more generic wrt how the strands and features in the annotation file are defined
-5) getNonAnnotatedRegions,Wfm-method: include option to give max./mean expression / FC per region
-6) getNonAnnotatedRegions,Wfm-method: add option to set threshold for minimum density of probes within the regions
-7) getSigGenes,Wfm-method: add option to include thresholds (density, outliers,...)
-8) getSigGenes,Wfm-method: add option to also detect regions that don't overlap but are in the neighbourhood of annotated regions
-9) getSigGenes,Wfm-method: add option to include mean / max expression / FC for each gene
-10) filterOverlap,WaveTilingFeatureSet-method: check if this function makes use of all the TilingFeatureSet functionalities as constructed in the oligo-package (eg. pmChr, pmStrand...) remark: pmStrand does not seem to work properly
 
+1) filterOverlap,WaveTilingFeatureSet-method: adapting function
+2) mail Benilton Carvalho to make available pmStrand for TilingFeatureSet
+3) wfm.analysis,WaveTilingFeatureSet-method: adapt function to obtain identical smoothing (C-code)
+4) filterOverlap,WaveTilingFeatureSet-method: check if this function makes use of all the TilingFeatureSet functionalities as constructed in the oligo-package (eg. pmChr, pmStrand...) remark: pmStrand does not seem to work properly
+5) wfm.analysis,WaveTilingFeatureSet-method: check calculation of confidence intervals
+6) wfm.analysis,WaveTilingFeatureSet-method: check implementation of "two.sided" (adapt according to SAGMB paper)
+7) plotWfm,Wfm-method: introduce option to give groupnames to put on Y-axis of plot
+8) getNonAnnotatedRegions,Wfm-method: make function more generic wrt how the strands and features in the annotation file are defined
+9) getNonAnnotatedRegions,Wfm-method: include option to give max./mean expression / FC per region
+10) getNonAnnotatedRegions,Wfm-method: add option to set threshold for minimum density of probes within the regions
+11) getSigGenes,Wfm-method: add option to include thresholds (density, outliers,...)
+12) getSigGenes,Wfm-method: add option to also detect regions that don't overlap but are in the neighbourhood of annotated regions
+13) getSigGenes,Wfm-method: add option to include mean / max expression / FC for each gene
+
+