[Wavetiling-commits] r15 - in pkg: . R
noreply at r-forge.r-project.org
noreply at r-forge.r-project.org
Mon Dec 19 10:12:01 CET 2011
Author: kdbeuf
Date: 2011-12-19 10:12:00 +0100 (Mon, 19 Dec 2011)
New Revision: 15
Modified:
pkg/DESCRIPTION
pkg/R/methods-WaveTilingFeatureSet.R
pkg/TODO
Log:
Fixed filterOverlap,WaveTilingFeatureSet-method
Modified: pkg/DESCRIPTION
===================================================================
--- pkg/DESCRIPTION 2011-12-13 15:37:21 UTC (rev 14)
+++ pkg/DESCRIPTION 2011-12-19 09:12:00 UTC (rev 15)
@@ -5,8 +5,8 @@
Title: Wavelet-Based Models for Tiling Array Transcriptome Analysis
Author: Kristof De Beuf <kristof.debeuf at UGent.be>, Peter Pipelers <peter.pipelers at ugent.be> and Lieven Clement <lieven.clement at gmail.com>
Maintainer: Kristof De Beuf <kristof.debeuf at UGent.be>
-Depends: oligoClasses, Biobase, GenomeGraphs
-Imports: oligo, affy, IRanges, Biostrings, waveslim, methods
+Depends: oligo, oligoClasses, Biobase, GenomeGraphs, IRanges, Biostrings, GenomicRanges
+Imports: affy, waveslim, methods
Suggests:
Description: This package is designed to conduct transcriptome analysis for tiling arrays based on fast wavelet-based functional models.
Collate: allClasses.R allGenerics.R helperFunctions.R
Modified: pkg/R/methods-WaveTilingFeatureSet.R
===================================================================
--- pkg/R/methods-WaveTilingFeatureSet.R 2011-12-13 15:37:21 UTC (rev 14)
+++ pkg/R/methods-WaveTilingFeatureSet.R 2011-12-19 09:12:00 UTC (rev 15)
@@ -1,3 +1,16 @@
+# temporary (?)
+# I can only use this after inserting strand information to the database when parsing the bpmap and Cel file in the (updated) package PDInfoBuilder
+setMethod("pmStrand","WaveTilingFeatureSet",function(object)
+{
+ conn <- db(object)
+ sql <- paste("SELECT fid, strand", "FROM pmfeature", "INNER JOIN chrom_dict", "USING(chrom)")
+ tmp <- dbGetQuery(conn, sql)
+ tmp <- tmp[order(tmp[["fid"]]),]
+ return(tmp[["strand"]])
+}
+)
+
+
setMethod("addPheno",signature("WaveTilingFeatureSet"),function(object,noGroups,groupNames,replics,...)
{
if (dim(object)[2] != sum(replics))
@@ -74,7 +87,7 @@
dataPMSeq <- pmSequence(object)
if (remap)
{
- setTrustBand <- min(unique(nchar(dataPMSeq)))
+ setTrustBand <- min(unique(width(dataPMSeq)))
if (setTrustBand < 15)
{
warning("There are probes smaller than 15 bp")
@@ -86,11 +99,17 @@
cat("begin remapping forward strand...\n")
cat("extract probe sequences\n")
pmSeqDict <- PDict(dataPMSeq,tb.start=1,tb.end=trBand)
- chrSeq <- readFASTA(fastaFile,strip.desc=FALSE)
- chrDNAString <- lapply(chrSeq,function(x) DNAString(paste(x[[2]],collapse="")))
- names(chrDNAString) <- paste("chr",chrId,sep="")
+ chrSeqAll <- read.DNAStringSet(fastaFile,format="fasta")
+ chrSeq <- chrSeqAll[chrId]
+ names(chrSeq) <- paste("chr",chrId,sep="")
cat("match probe sequences to DNA sequence\n")
- startPM <- lapply(chrDNAString,function(x) startIndex(matchPDict(pmSeqDict,x)))
+ chrSeqList <- list()
+ for (i in 1:length(chrSeq))
+ {
+ chrSeqList[[i]] <- chrSeq[[i]]
+ }
+ names(chrSeqList) <- names(chrSeq)
+ startPM <- lapply(chrSeqList,function(x) startIndex(matchPDict(pmSeqDict,x)))
nposChrPM <- lapply(startPM,function(x) sapply(x,length))
pmMatch <- Reduce("+",nposChrPM)==1
pmMatchIndex <- (1:length(pmMatch))[pmMatch]
@@ -105,25 +124,30 @@
posInit <- c()
posInit[mp[,1]] <- unlist(lapply(startPM,function(x) unlist(x[pmMatch])))
strandInit <- rep("forward",length(pmMatchIndex))
- cat("remapping reverse strand done\n")
+ cat("remapping forward strand done\n")
}
if ((strand=="reverse") | (strand=="both"))
{
cat("begin remapping reverse strand...\n")
## Fix me: sometimes reverseComplement/sometimes not?
- #dataPMSeqRevComp <- reverseComplement(pmSequence(object))
- dataPMSeqRevComp <- pmSequence(object)
+ dataPMSeqRevComp <- reverseComplement(pmSequence(object))
+ #dataPMSeqRevComp <- pmSequence(object)
cat("extract probe sequences\n")
pmSeqDictRevComp <- PDict(dataPMSeqRevComp,tb.start=1,tb.end=trBand)
if (strand=="reverse")
{
-
- chrSeq <- readFASTA(fastaFile,strip.desc=FALSE)
- chrDNAString <- lapply(chrSeq,function(x) DNAString(paste(x[[2]],collapse="")))
- names(chrDNAString) <- paste("chr",chrId,sep="")
+ chrSeqAll <- read.DNAStringSet(fastaFile,format="fasta")
+ chrSeq <- chrSeqAll[chrId]
+ names(chrSeq) <- paste("chr",chrId,sep="")
+ chrSeqList <- list()
+ for (i in 1:length(chrSeq))
+ {
+ chrSeqList[[i]] <- chrSeq[[i]]
+ }
+ names(chrSeqList) <- names(chrSeq)
}
cat("match probe sequences to DNA sequence\n")
- startPMRevComp <- lapply(chrDNAString,function(x) startIndex(matchPDict(pmSeqDictRevComp,x)))
+ startPMRevComp <- lapply(chrSeqList,function(x) startIndex(matchPDict(pmSeqDictRevComp,x)))
nposChrPMRevComp <- lapply(startPMRevComp,function(x) sapply(x,length))
pmMatchRevComp <- Reduce("+",nposChrPMRevComp)==1
pmMatchIndexRevComp <- (1:length(pmMatchRevComp))[pmMatchRevComp]
@@ -145,8 +169,9 @@
{
if ((strand=="forward") | (strand=="both"))
{
- pmMatch <- pmStrand(object)=="+"
- pmMatchIndex <- (1:length(pmMatch))[pmMatch]
+ pmMatchIndex <- which(pmStrand(object)==1 & pmChr(object) %in% paste("Chr",chrId,sep=""))
+ ## Fix me: does not work yet for special chromosomes like Chr C and Chr M in Arabidopsis
+ # sometimes 0/1, sometimes +/- ?
chrInit <- pmChr(object)[pmMatchIndex]
posInit <- pmPosition(object)[pmMatchIndex]
strandInit <- pmStrand(object)[pmMatchIndex]
@@ -154,8 +179,7 @@
if ((strand=="reverse") | (strand=="both"))
{
dataPMSeqRevComp <- reverseComplement(pmSequence(object))
- pmMatchRevComp <- pmStrand(object)=="-"
- pmMatchIndexRevComp <- (1:length(pmMatchRevComp))[pmMatchRevComp]
+ pmMatchIndexRevComp <- which(pmStrand(object)==0 & pmChr(object) %in% paste("Chr",chrId,sep=""))
chrInitRevComp <- pmChr(object)[pmMatchIndexRevComp]
posInitRevComp <- pmPosition(object)[pmMatchIndexRevComp]
strandInitRevComp <- pmStrand(object)[pmMatchIndexRevComp]
@@ -167,8 +191,11 @@
if (strand=="both")
{
cat("filter overlaps forward-reverse strand\n")
- forwardReverseOverlap <- (1:length(pmMatch))[(dataPMSeq %in% dataPMSeqRevComp)]
- reverseForwardOverlap <- (1:length(pmMatchRevComp))[(dataPMSeqRevComp %in% dataPMSeq)]
+ ## should be done more efficiently with something in Biostrings... ("%in%" does not work)
+ dataPMSeqChar <- as.character(dataPMSeq)
+ dataPMSeqRevCompChar <- as.character(dataPMSeqRevComp)
+ forwardReverseOverlap <- which(dataPMSeqChar %in% dataPMSeqRevCompChar)
+ reverseForwardOverlap <- which(dataPMSeqRevCompChar %in% dataPMSeqChar)
}
if (MM)
{
@@ -176,12 +203,16 @@
if (strand=="forward")
{
dataMMSeq <- pm2mm(dataPMSeq)
- pmMmOverlap <- (1:length(pmMatch))[(dataPMSeq %in% dataMMSeq)]
+ dataPMSeqChar <- as.character(dataPMSeq)
+ dataMMSeqChar <- as.character(dataMMSeq)
+ pmMmOverlap <- which(dataPMSeqChar %in% dataMMSeqChar)
}
if (strand=="reverse")
{
dataMMSeqRevComp <- pm2mm(dataPMSeqRevComp)
- pmMmOverlap <- (1:length(pmMatchRevComp))[(dataPMSeqRevComp %in% dataMMSeqRevComp)]
+ dataPMSeqRevCompChar <- as.character(dataPMSeqRevComp)
+ dataMMSeqRevCompChar <- ac.character(dataMMSegRevComp)
+ pmMmOverlap <- which(dataPMSeqRevCompChar %in% dataMMSeqRevCompChar)
}
}
indicesForward <- NULL
Modified: pkg/TODO
===================================================================
--- pkg/TODO 2011-12-13 15:37:21 UTC (rev 14)
+++ pkg/TODO 2011-12-19 09:12:00 UTC (rev 15)
@@ -1,12 +1,16 @@
-KDB:
-1) wfm.analysis,WaveTilingFeatureSet-method: check calculation of confidence intervals
-2) wfm.analysis,WaveTilingFeatureSet-method: check implementation of "two.sided" (adapt according to SAGMB paper)
-3) plotWfm,Wfm-method: introduce option to give groupnames to put on Y-axis of plot
-4) getNonAnnotatedRegions,Wfm-method: make function more generic wrt how the strands and features in the annotation file are defined
-5) getNonAnnotatedRegions,Wfm-method: include option to give max./mean expression / FC per region
-6) getNonAnnotatedRegions,Wfm-method: add option to set threshold for minimum density of probes within the regions
-7) getSigGenes,Wfm-method: add option to include thresholds (density, outliers,...)
-8) getSigGenes,Wfm-method: add option to also detect regions that don't overlap but are in the neighbourhood of annotated regions
-9) getSigGenes,Wfm-method: add option to include mean / max expression / FC for each gene
-10) filterOverlap,WaveTilingFeatureSet-method: check if this function makes use of all the TilingFeatureSet functionalities as constructed in the oligo-package (eg. pmChr, pmStrand...) remark: pmStrand does not seem to work properly
+1) filterOverlap,WaveTilingFeatureSet-method: adapting function
+2) mail Benilton Carvalho to make available pmStrand for TilingFeatureSet
+3) wfm.analysis,WaveTilingFeatureSet-method: adapt function to obtain identical smoothing (C-code)
+4) filterOverlap,WaveTilingFeatureSet-method: check if this function makes use of all the TilingFeatureSet functionalities as constructed in the oligo-package (eg. pmChr, pmStrand...) remark: pmStrand does not seem to work properly
+5) wfm.analysis,WaveTilingFeatureSet-method: check calculation of confidence intervals
+6) wfm.analysis,WaveTilingFeatureSet-method: check implementation of "two.sided" (adapt according to SAGMB paper)
+7) plotWfm,Wfm-method: introduce option to give groupnames to put on Y-axis of plot
+8) getNonAnnotatedRegions,Wfm-method: make function more generic wrt how the strands and features in the annotation file are defined
+9) getNonAnnotatedRegions,Wfm-method: include option to give max./mean expression / FC per region
+10) getNonAnnotatedRegions,Wfm-method: add option to set threshold for minimum density of probes within the regions
+11) getSigGenes,Wfm-method: add option to include thresholds (density, outliers,...)
+12) getSigGenes,Wfm-method: add option to also detect regions that don't overlap but are in the neighbourhood of annotated regions
+13) getSigGenes,Wfm-method: add option to include mean / max expression / FC for each gene
+
+
More information about the Wavetiling-commits
mailing list