[Patchwork-commits] r109 - in pkg: patchwork patchwork/R patchwork/man patchworkCG patchworkCG/R patchworkCG/man patchworkData patchworkData/man
noreply at r-forge.r-project.org
noreply at r-forge.r-project.org
Mon Mar 12 17:10:08 CET 2012
Author: sebastian_d
Date: 2012-03-12 17:10:08 +0100 (Mon, 12 Mar 2012)
New Revision: 109
Added:
pkg/patchwork/R/patchwork.copynumbers.r
pkg/patchwork/R/patchwork.plot.r
pkg/patchwork/R/subfunctions_copynumbers.r
pkg/patchwork/man/patchwork.copynumbers.Rd
pkg/patchwork/man/patchwork.plot.Rd
pkg/patchwork/man/subfunctions_copynumbers.Rd
pkg/patchworkCG/R/patchwork.CG.copynumbers.r
pkg/patchworkCG/R/patchwork.CG.plot.r
pkg/patchworkCG/R/subfunctions_copynumbers.r
pkg/patchworkCG/man/patchwork.CG.copynumbers.Rd
pkg/patchworkCG/man/patchwork.CG.plot.Rd
pkg/patchworkCG/man/subfunctions_copynumbers.Rd
Removed:
pkg/patchwork/R/patchwork.CNA.r
pkg/patchwork/R/patchwork.findCNs.r
pkg/patchwork/R/subfunctions_findCNs.r
pkg/patchwork/man/patchwork.CNA.Rd
pkg/patchwork/man/patchwork.findCNs.Rd
pkg/patchwork/man/subfunctions_findCNs.Rd
pkg/patchworkCG/R/patchwork.CG.r
pkg/patchworkCG/R/patchwork.CGCNV.r
pkg/patchworkCG/R/subfunctions_CGCNV.r
pkg/patchworkCG/man/patchwork.CG.Rd
pkg/patchworkCG/man/patchwork.CGCNV.Rd
pkg/patchworkCG/man/subfunctions_CGCNV.Rd
Modified:
pkg/patchwork/NAMESPACE
pkg/patchwork/R/patchwork.GCNorm.r
pkg/patchwork/R/patchwork.createreference.r
pkg/patchwork/R/patchwork.readChroms.r
pkg/patchwork/README
pkg/patchworkCG/NAMESPACE
pkg/patchworkCG/R/CG_KaChCN.r
pkg/patchworkCG/README
pkg/patchworkCG/man/CG_KaCh.Rd
pkg/patchworkCG/man/CG_KaChCN.Rd
pkg/patchworkCG/man/CG_Ka_check.Rd
pkg/patchworkCG/man/CG_karyotype.Rd
pkg/patchworkCG/man/ideogram.Rd
pkg/patchworkData/DESCRIPTION
pkg/patchworkData/man/ideogram.Rd
Log:
big commit after changing function names
Modified: pkg/patchwork/NAMESPACE
===================================================================
--- pkg/patchwork/NAMESPACE 2012-03-12 16:05:58 UTC (rev 108)
+++ pkg/patchwork/NAMESPACE 2012-03-12 16:10:08 UTC (rev 109)
@@ -6,17 +6,17 @@
karyotype,
patchwork.alleledata,
patchwork.applyref,
- patchwork.CNA,
+ patchwork.plot,
patchwork.createreference,
- patchwork.findCNs,
+ patchwork.copynumbers,
patchwork.GCNorm,
patchwork.Medians_n_AI,
patchwork.readChroms,
patchwork.segment,
patchwork.smoothing,
- #subfunctions_findCNs
+ #subfunctions_copynumbers
weightedMedian,
weightedMean,
is.autosome,
deChrom_ucsc,
- chrom_ucsc)
+ chrom_ucsc)
\ No newline at end of file
Deleted: pkg/patchwork/R/patchwork.CNA.r
===================================================================
--- pkg/patchwork/R/patchwork.CNA.r 2012-03-12 16:05:58 UTC (rev 108)
+++ pkg/patchwork/R/patchwork.CNA.r 2012-03-12 16:10:08 UTC (rev 109)
@@ -1,122 +0,0 @@
-patchwork.CNA <- function(BamFile,Pileup,reference=NULL,normal.bam=NULL,normal.pileup=NULL,Alpha=0.0001,SD=1)
- {
-
- #Attempt to read file pile.alleles, incase its already been created.
- alf <- normalalf <- NULL
- try( load("pile.alleles.Rdata"), silent=TRUE )
-
- #If it wasnt created, create it using the perl script pile2alleles.pl
- #which is included in the package. Creates pile.alleles in whichever folder
- #you are running R from. (getwd())
- if(is.null(alf))
- {
- cat("Initiating Allele Data Generation \n")
- if (!is.null(normal.pileup)) normalalf <- patchwork.alleledata(normal.pileup)
- alf = patchwork.alleledata(Pileup, normalalf=normalalf)
- cat("Allele Data Generation Complete \n")
- save(alf,file="pile.alleles.Rdata")
- }
-
- #Generate chroms object depending on the naming in pileup (alf).
- #either chr1...chr22,chrX,chrY or 1...22,X,Y
- data(ideogram,package="patchworkData")
-
- chroms = as.character(unique(ideogram$chr))
-
- ## Read coverage data. If already done, will load "data.Rdata" instead.
- data <- normaldata <- NULL
- try ( load('data.Rdata'), silent=TRUE )
-
- #If data object does not exist, IE it was not loaded in the previous line
- #perform the function on the chromosomes to create the object.
- if(is.null(data)) {
- cat("Initiating Read Chromosomal Coverage \n")
- data = patchwork.readChroms(BamFile,chroms)
- cat("Read Chromosomal Coverage Complete \n")
- save(data,file='data.Rdata')
- }
-
- #Perform GC normalization if the amount of columns indicate that gc normalization
- #has not already been performed.
- if(length(data) < 7) {
- cat("Initiating GC Content Normalization \n")
- data = patchwork.GCNorm(data)
- cat("GC Content Normalization Complete \n")
- save(data,file='data.Rdata')
- }
- #after this, no further normalization was done on reference sequences.
-
- #If matched normal bam file was defined, load and normalize it the same way.
- if (!is.null(normal.bam) & is.null(reference)) {
- normaldata=NULL
- try ( load('normaldata.Rdata'), silent=TRUE )
- if(is.null(normaldata)) {
- cat("Initiating Read Chromosomal Coverage (matched normal) \n")
- normaldata = patchwork.readChroms(normal.bam,chroms)
- cat("Read Chromosomal Coverage Complete (matched normal) \n")
- }
- if(length(normaldata) != 3) {
- cat("Initiating GC Content Normalization (matched normal) \n")
- normaldata = patchwork.GCNorm(normaldata[,1:6])
- cat("GC Content Normalization Complete (matched normal) \n")
- normaldata <- data.frame(chr=normaldata$chr,pos=normaldata$pos,normal=normaldata$norm)
- save(normaldata,file='normaldata.Rdata')
- }
- #save(normaldata,file='normaldata.Rdata')
- }
-
-
- # Smooth the data.
- kbsegs = NULL
- try( load("smoothed.Rdata"), silent=TRUE )
- if(length(kbsegs) == 0)
- {
- cat("Initiating Smoothing \n")
- kbsegs = patchwork.smoothing(data,normaldata,reference,chroms)
- save(kbsegs,file="smoothed.Rdata")
- cat("Smoothing Complete \n")
- }
-
- #Segment the data.
- library(DNAcopy)
- segs = NULL
- try( load("Segments.Rdata"), silent=TRUE )
- if(length(segs) == 0)
- {
- cat("Initiating Segmentation \n")
- cat("Note: If segmentation fails to initiate the probable reason is that you have not ")
- cat("installed the R package DNAcopy. See patchwork's README for installation instructions. \n")
- segs = patchwork.segment(kbsegs,chroms,Alpha,SD)
- save(segs,file="Segments.Rdata")
- cat("Segmentation Complete \n")
- }
-
- if(length(segs) == 6)
- {
- #Get medians and AI for the segments.
- cat("Initiating Segment data extraction (Medians and AI) \n")
- segs = patchwork.Medians_n_AI(segs,kbsegs,alf)
- save(segs,file="Segments.Rdata")
- cat("Segment data extraction Complete \n \n \n")
- }
-
- cat("Saving information objects needed for patchwork.findCNs in findCNs.Rdata \n \n \n")
- save(segs,alf,kbsegs,file="findCNs.Rdata")
-
- #Plot it
- cat("Initiating Plotting \n")
- karyotype(segs$chr,segs$start,segs$end,segs$median,segs$ai,
- name=as.character(BamFile),
- xlim=c(0,2.4),
- ylim=c(0.1,1))
- karyotype_chroms(segs$chr,segs$start,segs$end,segs$median,segs$ai,
- kbsegs$chr,kbsegs$pos,kbsegs$ratio,
- alf$achr,alf$apos,(1-alf$amin/alf$amax),
- name=as.character(BamFile),
- xlim=c(0,2.4),
- ylim=c(0.1,1))
- cat("Plotting Complete \n")
- cat("Shutting down..... \n")
- }
-
-
\ No newline at end of file
Modified: pkg/patchwork/R/patchwork.GCNorm.r
===================================================================
--- pkg/patchwork/R/patchwork.GCNorm.r 2012-03-12 16:05:58 UTC (rev 108)
+++ pkg/patchwork/R/patchwork.GCNorm.r 2012-03-12 16:10:08 UTC (rev 109)
@@ -1,7 +1,6 @@
patchwork.GCNorm <- function(data)
{
#Load data included in package
- #packagepath = system.file(package="CNA")
#load(paste(packagepath,"/data/commonSnps132.RData",sep=""))
#load(paste(packagepath,"/data/ideogram.RData",sep=""))
#load(paste(packagepath,"/data/normaldata.RData",sep=""))
Added: pkg/patchwork/R/patchwork.copynumbers.r
===================================================================
--- pkg/patchwork/R/patchwork.copynumbers.r (rev 0)
+++ pkg/patchwork/R/patchwork.copynumbers.r 2012-03-12 16:10:08 UTC (rev 109)
@@ -0,0 +1,294 @@
+patchwork.copynumbers = function(name = "copynumbers_",cn2,delta,het,hom,maxCn=8,ceiling=1,forcedelta=F)
+ {
+
+ #packagepath = system.file(package="patchwork")
+ #load(paste(packagepath,"/data/commonSnps132.RData",sep=""))
+ #load(paste(packagepath,"/data/ideogram.RData",sep=""))
+ data(ideogram,package="patchworkData")
+ load("copynumbers.Rdata")
+
+ voidchrom <- c('chrX','chrY') # may add non-integer chroms here....
+
+ tix=NULL #temporary index
+ int=NULL ## contains coverage estimate of each (total) copy number
+ ai=NULL ## contains Allelic Imbalance Ratio estimate of each copy number variant.
+ regions <- segs
+ regions <- regions[(is.autosome(regions$chr)®ions$np>50)&(!is.nan(regions$ai)),] ## will use these regions
+ regions <- regions[!is.na(regions$median),]
+
+ ## likely cn2 regions sit within delta/3 from cn2.
+ expectedAt <- cn2
+ tix$cn2 <- abs(regions$median - expectedAt) < (delta/3) ## index of likely cn2 regions
+ temp <- regions[tix$cn2,] ## cn2 regions
+ med <- weightedMedian(temp$median,temp$np) ## improved value of Log-R at cn2 (returns NULL if theres nothing there)
+ int$cn2 <- ifelse(!is.null(med),med,expectedAt) ## saved to int.
+
+ ## likely cn1 regions sit at about cn2 - delta:
+ d <- delta ## the (Log-R) distance to cn1
+ expectedAt <- int$cn2-d ## cn1 is expected here
+ tix$cn1 <- abs(regions$median - expectedAt) < (d/3) ## index of likely cn1 regions
+ temp <- regions[tix$cn1,]
+ med <- weightedMedian(temp$median,temp$np)
+ int$cn1 <- ifelse(!is.null(med),med,expectedAt)
+ if (forcedelta) int$cn1 <- int$cn2-delta
+
+ ## likely cn0 regions sit below cn1 - delta:
+ d <- int$cn2-int$cn1
+ expectedAt <- int$cn1-d
+ tix$cn0 <- regions$median < expectedAt
+ temp <- regions[tix$cn0,]
+ med <- weightedMedian(temp$median,temp$np)
+ int$cn0 <- ifelse(!is.null(med),med,expectedAt)
+ if (forcedelta) int$cn0 <- int$cn1-delta
+
+
+ ## likely cn3 regions sit at about cn2+delta
+ d <- delta
+ expectedAt <- int$cn2+d
+ tix$cn3 <- abs(regions$median - expectedAt) < (d/3)
+ temp <- regions[tix$cn3,]
+ med <- weightedMedian(temp$median,temp$np)
+ int$cn3 <- ifelse(!is.null(med),med,expectedAt)
+ if (forcedelta) int$cn3 <- int$cn2+delta
+
+
+ ## cn4 follows at ...
+ d <- delta
+ expectedAt <- int$cn3+d
+ tix$cn4 <- abs(regions$median - expectedAt) < (d/4)
+ temp <- regions[tix$cn4,]
+ med <- weightedMedian(temp$median,temp$np)
+ int$cn4 <- ifelse(!is.null(med),med,expectedAt)
+ if (forcedelta) int$cn4 <- int$cn3+delta
+
+ ## generalized for higher cns
+ for (cn in 5:maxCn)
+ {
+ thisCn <- paste('cn',cn,sep='')
+ prevCn <- paste('cn',cn-1,sep='')
+ pprevCn <- paste('cn',cn-2,sep='')
+ d <- delta #dmin*(int[prevCn][[1]]-int[pprevCn][[1]])
+ expectedAt <- int[prevCn][[1]]+d
+ tix[[thisCn]] <- abs(regions$median - expectedAt) < (d/5)
+ temp <- regions[tix[thisCn][[1]],]
+ med <- weightedMedian(temp$median,temp$np)
+ int[thisCn] <- ifelse(!is.null(med),med,expectedAt)
+ if (forcedelta) int[[thisCn]] <- int[[prevCn]]+delta
+ }
+
+ # smooth the cn relationship ?
+
+ ## at cn2, find the variant clusters (normal and CNNLOH)
+ ix <- (abs(regions$median - int$cn2) < 0.2*(int$cn3-int$cn2) ) # taking only closely-matching segments
+ data <- regions[ix,]
+ data <- data[!is.na(data$ai),] # ...with a calculated allelic imbalance.
+
+ expectedAt <- het
+ ix <- 2*abs(data$ai-het) < abs(data$ai-hom)
+ med <- weightedMedian(data$ai[ix],data$snvs[ix])
+ ai$cn2m1 <- ifelse (!is.null(med),med,expectedAt)
+
+ expectedAt <- hom
+ ix <- 2*abs(data$ai-hom) < abs(data$ai-het)
+ med <- weightedMedian(data$ai[ix],data$snvs[ix])
+ ai$cn2m0 <- ifelse (!is.null(med),med,expectedAt)
+
+ ## for cn1 (and 0)
+ ix <- (abs(regions$median - int$cn1) < 0.2*(int$cn2-int$cn1) )
+ data <- regions[ix,]
+ data <- data[!is.na(data$ai),]
+ expectedAt <- (ai$cn2m0+ai$cn2m1)*3/5 ## Decent estimate.
+ med <- weightedMedian(data$ai,data$snvs) ## Average allelic imbalance weighted on snp count
+ ai$cn1m0 <- ifelse (!is.null(med),med,expectedAt) ## Will be NA if there was no CNNLOH
+ ai$cn0m0 <- NA #unimportant
+
+ ## for cn3:
+ ix <- (abs(regions$median - int$cn3) < 0.2*(int$cn4-int$cn3) )
+ data <- regions[ix,]
+ data <- data[!is.na(data$ai),]
+
+ range <- ai$cn2m0-ai$cn2m1 #the distance between 2normal and CNNLOH
+ # get the 3(1) regions:
+ expectedAt <- ai$cn2m1+range/3 # this is an approx if cn3m1 is absent.
+
+ ix <- (data$ai<ai$cn2m0) & (data$ai>ai$cn2m1) # take regions with less AI than cn2m0 but more than cn2m1
+ med <- weightedMedian(data$ai[ix],data$snvs[ix]) # average allelic imbalance weighted on snp count
+ ai$cn3m1 <- ifelse (!is.null(med),med,expectedAt)
+
+ # now for cn3m0
+ expectedAt <- ai$cn3m1 + range # approx of cn3m0
+ ix <- (abs(data$ai-expectedAt) < (expectedAt-ai$cn3m1)/4) # take those much closer to exp than to cn3m1
+ med <- weightedMedian(data$ai[ix],data$snvs[ix])
+ try (if (med<ai$cn2m0) med <- NULL, silent=T) ## in case of heterogeneities or strange signals, we disallow this effect.
+ ai$cn3m0 <- ifelse (!is.null(med),med,expectedAt)
+
+ if (is.na(ai$cn1m0)) ai$cn1m0 <- mean(ai$cn3m1,ai$cn2m0) ## If deletions were missing, place an estimate from cn3
+
+ ## now for cn4
+ ix <- abs(regions$median - int$cn4) < 0.2*(int$cn4-int$cn3)
+ data <- regions[ix,]
+ data <- data[!is.na(data$ai),]
+
+ # get the 4(2) regions: they are at AI about cn2m1
+ expectedAt <- ai$cn2m1 # this is a good approx
+
+ ix <- data$ai<ai$cn3m1 # let all below cn3m1 in
+ med <- weightedMedian(data$ai[ix],data$snvs[ix])
+ ai$cn4m2 <- ifelse (!is.null(med),med,expectedAt)
+ if (ai$cn4m2>ai$cn2m1) ai$cn4m2 <- ai$cn2m1 # sanity check.
+
+ data <- data[!ix,] # coutinue with the remaining
+ # now 4(1) has less ai than 3(0) -> sits at about cn3m1+(cn3m1-cn2m1).
+ expectedAt <- ai$cn3m1+(ai$cn3m1-ai$cn2m1)
+ ix <- abs(data$ai-expectedAt)<abs(data$ai-ai$cn3m0) # take those closer to exp than to 3(0)
+ med <- weightedMedian(data$ai[ix],data$snvs[ix])
+ ai$cn4m1 <- ifelse (!is.null(med),med,expectedAt)
+
+ # now 4(0) is at about 3(0) + [0.9 - 3(0)] * [3(0)-2(0)] / [0.9-2(0)]
+ expectedAt <- ai$cn3m0 + (ceiling-ai$cn3m0)*(ai$cn3m0-ai$cn2m0)/(ceiling-ai$cn2m0)
+ ix <- abs(data$ai-expectedAt) < 0.2*(expectedAt-ai$cn4m2) # we take those very close to exp
+ med <- weightedMedian(data$ai[ix],data$snvs[ix])
+ try (if (med<ai$cn3m0) med <- NULL, silent=T) ## in case of heterogeneities or strange signals, we disallow this effect.
+ ai$cn4m0 <- ifelse (!is.null(med),med,expectedAt)
+
+ ## generalization for higher copy numbers. it gets complicated.
+ for (cn in 5:maxCn) {
+ thisCn <- paste('cn',cn,sep='')
+ prevCn <- paste('cn',cn-1,sep='')
+ pprevCn <- paste('cn',cn-2,sep='')
+
+ ix <- (abs(regions$median - int[thisCn][[1]]) < 0.2*(int[thisCn][[1]]-int[prevCn][[1]]) )
+ data <- regions[ix,]
+ data <- data[!is.na(data$ai),]
+ data <- data[data$np>50,] # long regions for safety
+
+ ## try to find variants, starting with LOH
+ # LOH such as 5(0)
+ m <- 0
+ thisVariant=paste(thisCn,'m',0,sep='')
+ c4m0 <- ai[paste(prevCn,'m',m,sep='')][[1]] # relative naming for clarity
+ c3m0 <- ai[paste(pprevCn,'m',m,sep='')][[1]]
+
+ expectedAt <- ceiling-((ceiling-c4m0)*(ceiling-max(c4m0,c3m0))/(ceiling-min(c3m0,c4m0)))
+ #ai[thisVariant] <- expectedAt
+ # we then take those close to exp (within delta[3(0),4(0)])
+ ix <- abs(data$ai-expectedAt) < abs(c4m0 - c3m0)
+ med <- weightedMedian(data$ai[ix],data$snvs[ix])
+ try (if (med<ai$cn4m0) med <- NULL, silent=T) ## in case of heterogeneities or strange signals, we disallow this effect.
+ ai[thisVariant] <- ifelse (!is.null(med),med,expectedAt)
+
+ ## then from balanced to less balanced
+ minorVariants=trunc(cn/2):1
+ first <- T
+ for (m in minorVariants) {
+ thisVariant=paste(thisCn,'m',m,sep='')
+ if (m==cn/2) {
+ # We have balanced variant, so rather easy.
+ expectedAt <- ai[paste(pprevCn,'m',m-1,sep='')][[1]] # this is a good approx, balanced at cn-2
+ ix <- data$ai<ai[paste(prevCn,'m',m-1,sep='')][[1]] # let all below (cn-1, mcn-1) in
+ med <- weightedMedian(data$ai[ix],data$snvs[ix])
+ ai[thisVariant] <- ifelse (!is.null(med),med,expectedAt)
+ if (ai[thisVariant] > ai[paste(pprevCn,'m',m-1,sep='')][[1]]) ai[thisVariant] <- ai[paste(pprevCn,'m',m-1,sep='')][[1]] # sanity check.
+ first <- F
+ } else if (first) {
+ # its not balanced but its the most balanced of the unbalanced. something like 5(2)
+ expectedAt <- 0.5*( ai[paste(prevCn,'m',m,sep='')][[1]] + ai[paste(pprevCn,'m',m-1,sep='')][[1]] ) # that means between 4(2) and 3(1)
+ ix <- abs(data$ai-expectedAt) < ( ai[paste(prevCn,'m',m,sep='')][[1]] - ai[paste(pprevCn,'m',m-1,sep='')][[1]] ) /3 # let all "between" 4(2) and 3(1) in
+ med <- weightedMedian(data$ai[ix],data$snvs[ix])
+ ai[thisVariant] <- ifelse (!is.null(med),med,expectedAt)
+ first <- F
+ } else {
+ # not the most balanced unbalanced variant, for example 5(1):
+ expectedAt <- ai[paste(thisCn,'m',minorVariants[1],sep='')][[1]] + (minorVariants[1]-m) * (ai[paste(thisCn,'m',0,sep='')][[1]] - ai[paste(thisCn,'m',minorVariants[1],sep='')][[1]]) / trunc(cn/2) # 5(2) + (which)* 5(0)-5(2) /(n)
+ ix <- abs(data$ai-expectedAt) < ( ai[paste(prevCn,'m',m,sep='')][[1]] - ai[paste(pprevCn,'m',m-1,sep='')][[1]] ) /3 # let all "between" 4(1) and 3(0) in
+ med <- weightedMedian(data$ai[ix],data$snvs[ix])
+ ai[thisVariant] <- ifelse (!is.null(med),med,expectedAt)
+ }
+ } # done with minor variants
+ } # done with copy numbers
+
+ sampleInfo <- list('int'=int,'ai'=ai)
+
+ ## Assign total and minor copy numbers to all segments.
+ regions <- segs[!is.na(segs$median),] ## This time, work on all segments available.
+
+ Cn <- NULL ## Total copy number
+ mCn <- NULL ## Minor allele copy number
+ fullCN <- NULL ## Variant label. ('cnXmY')
+
+ intDist <- NULL ## distance to certain Log-R
+ imbaDist <- NULL ## distance to certain allelic imbalance
+
+
+
+ ## give each segment the best matching CN and mCN
+ for (i in 1:nrow(regions))
+ {
+ # set total copy number
+ distance <- Inf
+ for (cn in 0:maxCn)
+ {
+ t_int <- int[paste('cn',cn,sep='')][[1]] ## get Log-R of particular cn from 'int'
+ t_dis <- abs(regions$median[i]-t_int) ## distance to that particular cn
+ if (t_dis < distance)
+ { ## nearest so far, save.
+ distance <- t_dis -> intDist[i]
+ Cn[i] <- cn
+ }
+ }
+ }
+ # Y makes CN0 sit very low, this is a fix on non-Y Cn0
+ #if ((Cn[i] == 1)&(intDist[i] > (int$cn2-int$cn1))) Cn[i] <- 0 ## currently not needed.
+ for (i in 1:nrow(regions))
+ {
+ # set minor CN
+ distance <- Inf
+ if (Cn[i]<=1)
+ {
+ mCn[i] <- 0
+ }
+ else if (!is.na(regions$ai[i]))
+ if (!is.nan(regions$ai[i]))
+ for (m in 0:trunc(Cn[i]/2))
+ {
+ t_ai <- ai[paste('cn',Cn[i],'m',m,sep='')][[1]]
+ t_dis <- abs(regions$ai[i]-t_ai)
+ if (t_dis < distance)
+ {
+ distance <- t_dis -> imbaDist[i]
+ mCn[i] <- m
+ }
+ }
+ else
+ mCn[i] <- NA
+ fullCN[i] <- paste('cn',Cn[i],'m',mCn[i],sep='') # Full description
+ }
+
+ regions$Cn <- Cn
+ regions$mCn <- mCn
+ regions$fullCN <- fullCN
+
+ meanCn <- weightedMean(regions$Cn,regions$np)
+ ix1 <- regions$Cn==trunc(meanCn)
+ ix2 <- regions$Cn==ceiling(meanCn)
+ xdelta <- weightedMean(regions$median[ix2],regions$np[ix2]) - weightedMean(regions$median[ix1],regions$np[ix1])
+ expected_delta <- 1/meanCn
+
+ tumor_percentDNA <- xdelta / expected_delta
+ tumor_percent <- tumor_percentDNA/(meanCn/2) / ( tumor_percentDNA/(meanCn/2) + (1-tumor_percentDNA) )
+
+ regions$meanCn <- meanCn
+ regions$tumor_percent <- tumor_percent
+ write.csv(regions,file='Copynumbers.csv')
+
+
+ karyotype_check(regions$chr,regions$start,regions$end,regions$mean,regions$ai,
+ regions$Cn,regions$mCn,list(int=int,ai=ai),name=name,
+ xlim=c(0,2.4),ylim=c(0.1,1))
+
+ karyotype_chromsCN(regions$chr,regions$start,regions$end,regions$mean,regions$ai,
+ regions$Cn,regions$mCn,kbsegs$chr,
+ kbsegs$pos,kbsegs$ratio,alf$achr,alf$apos,
+ (1-alf$amin/alf$amax),name=name,xlim=c(0,2.4),
+ ylim=c(0.1,1))
+ }
\ No newline at end of file
Modified: pkg/patchwork/R/patchwork.createreference.r
===================================================================
--- pkg/patchwork/R/patchwork.createreference.r 2012-03-12 16:05:58 UTC (rev 108)
+++ pkg/patchwork/R/patchwork.createreference.r 2012-03-12 16:10:08 UTC (rev 109)
@@ -60,6 +60,6 @@
cat("Saving to ",output,".Rdata \n",sep="")
- save(normaldata,file=paste(output,".RData",sep=""))
+ save(normaldata,file=paste(output,".Rdata",sep=""))
return(normaldata)
}
\ No newline at end of file
Deleted: pkg/patchwork/R/patchwork.findCNs.r
===================================================================
--- pkg/patchwork/R/patchwork.findCNs.r 2012-03-12 16:05:58 UTC (rev 108)
+++ pkg/patchwork/R/patchwork.findCNs.r 2012-03-12 16:10:08 UTC (rev 109)
@@ -1,294 +0,0 @@
-patchwork.findCNs = function(name = "findCNs_",cn2,delta,het,hom,maxCn=8,ceiling=1,forcedelta=F)
- {
-
- #packagepath = system.file(package="patchwork")
- #load(paste(packagepath,"/data/commonSnps132.RData",sep=""))
- #load(paste(packagepath,"/data/ideogram.RData",sep=""))
- data(ideogram,package="patchworkData")
- load("findCNs.Rdata")
-
- voidchrom <- c('chrX','chrY') # may add non-integer chroms here....
-
- tix=NULL #temporary index
- int=NULL ## contains coverage estimate of each (total) copy number
- ai=NULL ## contains Allelic Imbalance Ratio estimate of each copy number variant.
- regions <- segs
- regions <- regions[(is.autosome(regions$chr)®ions$np>50)&(!is.nan(regions$ai)),] ## will use these regions
- regions <- regions[!is.na(regions$median),]
-
- ## likely cn2 regions sit within delta/3 from cn2.
- expectedAt <- cn2
- tix$cn2 <- abs(regions$median - expectedAt) < (delta/3) ## index of likely cn2 regions
- temp <- regions[tix$cn2,] ## cn2 regions
- med <- weightedMedian(temp$median,temp$np) ## improved value of Log-R at cn2 (returns NULL if theres nothing there)
- int$cn2 <- ifelse(!is.null(med),med,expectedAt) ## saved to int.
-
- ## likely cn1 regions sit at about cn2 - delta:
- d <- delta ## the (Log-R) distance to cn1
- expectedAt <- int$cn2-d ## cn1 is expected here
- tix$cn1 <- abs(regions$median - expectedAt) < (d/3) ## index of likely cn1 regions
- temp <- regions[tix$cn1,]
- med <- weightedMedian(temp$median,temp$np)
- int$cn1 <- ifelse(!is.null(med),med,expectedAt)
- if (forcedelta) int$cn1 <- int$cn2-delta
-
- ## likely cn0 regions sit below cn1 - delta:
- d <- int$cn2-int$cn1
- expectedAt <- int$cn1-d
- tix$cn0 <- regions$median < expectedAt
- temp <- regions[tix$cn0,]
- med <- weightedMedian(temp$median,temp$np)
- int$cn0 <- ifelse(!is.null(med),med,expectedAt)
- if (forcedelta) int$cn0 <- int$cn1-delta
-
-
- ## likely cn3 regions sit at about cn2+delta
- d <- delta
- expectedAt <- int$cn2+d
- tix$cn3 <- abs(regions$median - expectedAt) < (d/3)
- temp <- regions[tix$cn3,]
- med <- weightedMedian(temp$median,temp$np)
- int$cn3 <- ifelse(!is.null(med),med,expectedAt)
- if (forcedelta) int$cn3 <- int$cn2+delta
-
-
- ## cn4 follows at ...
- d <- delta
- expectedAt <- int$cn3+d
- tix$cn4 <- abs(regions$median - expectedAt) < (d/4)
- temp <- regions[tix$cn4,]
- med <- weightedMedian(temp$median,temp$np)
- int$cn4 <- ifelse(!is.null(med),med,expectedAt)
- if (forcedelta) int$cn4 <- int$cn3+delta
-
- ## generalized for higher cns
- for (cn in 5:maxCn)
- {
- thisCn <- paste('cn',cn,sep='')
- prevCn <- paste('cn',cn-1,sep='')
- pprevCn <- paste('cn',cn-2,sep='')
- d <- delta #dmin*(int[prevCn][[1]]-int[pprevCn][[1]])
- expectedAt <- int[prevCn][[1]]+d
- tix[[thisCn]] <- abs(regions$median - expectedAt) < (d/5)
- temp <- regions[tix[thisCn][[1]],]
- med <- weightedMedian(temp$median,temp$np)
- int[thisCn] <- ifelse(!is.null(med),med,expectedAt)
- if (forcedelta) int[[thisCn]] <- int[[prevCn]]+delta
- }
-
- # smooth the cn relationship ?
-
- ## at cn2, find the variant clusters (normal and CNNLOH)
- ix <- (abs(regions$median - int$cn2) < 0.2*(int$cn3-int$cn2) ) # taking only closely-matching segments
- data <- regions[ix,]
- data <- data[!is.na(data$ai),] # ...with a calculated allelic imbalance.
-
- expectedAt <- het
- ix <- 2*abs(data$ai-het) < abs(data$ai-hom)
- med <- weightedMedian(data$ai[ix],data$snvs[ix])
- ai$cn2m1 <- ifelse (!is.null(med),med,expectedAt)
-
- expectedAt <- hom
- ix <- 2*abs(data$ai-hom) < abs(data$ai-het)
- med <- weightedMedian(data$ai[ix],data$snvs[ix])
- ai$cn2m0 <- ifelse (!is.null(med),med,expectedAt)
-
- ## for cn1 (and 0)
- ix <- (abs(regions$median - int$cn1) < 0.2*(int$cn2-int$cn1) )
- data <- regions[ix,]
- data <- data[!is.na(data$ai),]
- expectedAt <- (ai$cn2m0+ai$cn2m1)*3/5 ## Decent estimate.
- med <- weightedMedian(data$ai,data$snvs) ## Average allelic imbalance weighted on snp count
- ai$cn1m0 <- ifelse (!is.null(med),med,expectedAt) ## Will be NA if there was no CNNLOH
- ai$cn0m0 <- NA #unimportant
-
- ## for cn3:
- ix <- (abs(regions$median - int$cn3) < 0.2*(int$cn4-int$cn3) )
- data <- regions[ix,]
- data <- data[!is.na(data$ai),]
-
- range <- ai$cn2m0-ai$cn2m1 #the distance between 2normal and CNNLOH
- # get the 3(1) regions:
- expectedAt <- ai$cn2m1+range/3 # this is an approx if cn3m1 is absent.
-
- ix <- (data$ai<ai$cn2m0) & (data$ai>ai$cn2m1) # take regions with less AI than cn2m0 but more than cn2m1
- med <- weightedMedian(data$ai[ix],data$snvs[ix]) # average allelic imbalance weighted on snp count
- ai$cn3m1 <- ifelse (!is.null(med),med,expectedAt)
-
- # now for cn3m0
- expectedAt <- ai$cn3m1 + range # approx of cn3m0
- ix <- (abs(data$ai-expectedAt) < (expectedAt-ai$cn3m1)/4) # take those much closer to exp than to cn3m1
- med <- weightedMedian(data$ai[ix],data$snvs[ix])
- try (if (med<ai$cn2m0) med <- NULL, silent=T) ## in case of heterogeneities or strange signals, we disallow this effect.
- ai$cn3m0 <- ifelse (!is.null(med),med,expectedAt)
-
- if (is.na(ai$cn1m0)) ai$cn1m0 <- mean(ai$cn3m1,ai$cn2m0) ## If deletions were missing, place an estimate from cn3
-
- ## now for cn4
- ix <- abs(regions$median - int$cn4) < 0.2*(int$cn4-int$cn3)
- data <- regions[ix,]
- data <- data[!is.na(data$ai),]
-
- # get the 4(2) regions: they are at AI about cn2m1
- expectedAt <- ai$cn2m1 # this is a good approx
-
- ix <- data$ai<ai$cn3m1 # let all below cn3m1 in
- med <- weightedMedian(data$ai[ix],data$snvs[ix])
- ai$cn4m2 <- ifelse (!is.null(med),med,expectedAt)
- if (ai$cn4m2>ai$cn2m1) ai$cn4m2 <- ai$cn2m1 # sanity check.
-
- data <- data[!ix,] # coutinue with the remaining
- # now 4(1) has less ai than 3(0) -> sits at about cn3m1+(cn3m1-cn2m1).
- expectedAt <- ai$cn3m1+(ai$cn3m1-ai$cn2m1)
- ix <- abs(data$ai-expectedAt)<abs(data$ai-ai$cn3m0) # take those closer to exp than to 3(0)
- med <- weightedMedian(data$ai[ix],data$snvs[ix])
- ai$cn4m1 <- ifelse (!is.null(med),med,expectedAt)
-
- # now 4(0) is at about 3(0) + [0.9 - 3(0)] * [3(0)-2(0)] / [0.9-2(0)]
- expectedAt <- ai$cn3m0 + (ceiling-ai$cn3m0)*(ai$cn3m0-ai$cn2m0)/(ceiling-ai$cn2m0)
- ix <- abs(data$ai-expectedAt) < 0.2*(expectedAt-ai$cn4m2) # we take those very close to exp
- med <- weightedMedian(data$ai[ix],data$snvs[ix])
- try (if (med<ai$cn3m0) med <- NULL, silent=T) ## in case of heterogeneities or strange signals, we disallow this effect.
- ai$cn4m0 <- ifelse (!is.null(med),med,expectedAt)
-
- ## generalization for higher copy numbers. it gets complicated.
- for (cn in 5:maxCn) {
- thisCn <- paste('cn',cn,sep='')
- prevCn <- paste('cn',cn-1,sep='')
- pprevCn <- paste('cn',cn-2,sep='')
-
- ix <- (abs(regions$median - int[thisCn][[1]]) < 0.2*(int[thisCn][[1]]-int[prevCn][[1]]) )
- data <- regions[ix,]
- data <- data[!is.na(data$ai),]
- data <- data[data$np>50,] # long regions for safety
-
- ## try to find variants, starting with LOH
- # LOH such as 5(0)
- m <- 0
- thisVariant=paste(thisCn,'m',0,sep='')
- c4m0 <- ai[paste(prevCn,'m',m,sep='')][[1]] # relative naming for clarity
- c3m0 <- ai[paste(pprevCn,'m',m,sep='')][[1]]
-
- expectedAt <- ceiling-((ceiling-c4m0)*(ceiling-max(c4m0,c3m0))/(ceiling-min(c3m0,c4m0)))
- #ai[thisVariant] <- expectedAt
- # we then take those close to exp (within delta[3(0),4(0)])
- ix <- abs(data$ai-expectedAt) < abs(c4m0 - c3m0)
- med <- weightedMedian(data$ai[ix],data$snvs[ix])
- try (if (med<ai$cn4m0) med <- NULL, silent=T) ## in case of heterogeneities or strange signals, we disallow this effect.
- ai[thisVariant] <- ifelse (!is.null(med),med,expectedAt)
-
- ## then from balanced to less balanced
- minorVariants=trunc(cn/2):1
- first <- T
- for (m in minorVariants) {
- thisVariant=paste(thisCn,'m',m,sep='')
- if (m==cn/2) {
- # We have balanced variant, so rather easy.
- expectedAt <- ai[paste(pprevCn,'m',m-1,sep='')][[1]] # this is a good approx, balanced at cn-2
- ix <- data$ai<ai[paste(prevCn,'m',m-1,sep='')][[1]] # let all below (cn-1, mcn-1) in
- med <- weightedMedian(data$ai[ix],data$snvs[ix])
- ai[thisVariant] <- ifelse (!is.null(med),med,expectedAt)
- if (ai[thisVariant] > ai[paste(pprevCn,'m',m-1,sep='')][[1]]) ai[thisVariant] <- ai[paste(pprevCn,'m',m-1,sep='')][[1]] # sanity check.
- first <- F
- } else if (first) {
- # its not balanced but its the most balanced of the unbalanced. something like 5(2)
- expectedAt <- 0.5*( ai[paste(prevCn,'m',m,sep='')][[1]] + ai[paste(pprevCn,'m',m-1,sep='')][[1]] ) # that means between 4(2) and 3(1)
- ix <- abs(data$ai-expectedAt) < ( ai[paste(prevCn,'m',m,sep='')][[1]] - ai[paste(pprevCn,'m',m-1,sep='')][[1]] ) /3 # let all "between" 4(2) and 3(1) in
- med <- weightedMedian(data$ai[ix],data$snvs[ix])
- ai[thisVariant] <- ifelse (!is.null(med),med,expectedAt)
- first <- F
- } else {
- # not the most balanced unbalanced variant, for example 5(1):
- expectedAt <- ai[paste(thisCn,'m',minorVariants[1],sep='')][[1]] + (minorVariants[1]-m) * (ai[paste(thisCn,'m',0,sep='')][[1]] - ai[paste(thisCn,'m',minorVariants[1],sep='')][[1]]) / trunc(cn/2) # 5(2) + (which)* 5(0)-5(2) /(n)
- ix <- abs(data$ai-expectedAt) < ( ai[paste(prevCn,'m',m,sep='')][[1]] - ai[paste(pprevCn,'m',m-1,sep='')][[1]] ) /3 # let all "between" 4(1) and 3(0) in
- med <- weightedMedian(data$ai[ix],data$snvs[ix])
- ai[thisVariant] <- ifelse (!is.null(med),med,expectedAt)
- }
- } # done with minor variants
- } # done with copy numbers
-
- sampleInfo <- list('int'=int,'ai'=ai)
-
- ## Assign total and minor copy numbers to all segments.
- regions <- segs[!is.na(segs$median),] ## This time, work on all segments available.
-
- Cn <- NULL ## Total copy number
- mCn <- NULL ## Minor allele copy number
- fullCN <- NULL ## Variant label. ('cnXmY')
-
- intDist <- NULL ## distance to certain Log-R
- imbaDist <- NULL ## distance to certain allelic imbalance
-
-
-
- ## give each segment the best matching CN and mCN
- for (i in 1:nrow(regions))
- {
- # set total copy number
- distance <- Inf
- for (cn in 0:maxCn)
- {
- t_int <- int[paste('cn',cn,sep='')][[1]] ## get Log-R of particular cn from 'int'
- t_dis <- abs(regions$median[i]-t_int) ## distance to that particular cn
- if (t_dis < distance)
- { ## nearest so far, save.
- distance <- t_dis -> intDist[i]
- Cn[i] <- cn
- }
- }
- }
- # Y makes CN0 sit very low, this is a fix on non-Y Cn0
- #if ((Cn[i] == 1)&(intDist[i] > (int$cn2-int$cn1))) Cn[i] <- 0 ## currently not needed.
- for (i in 1:nrow(regions))
- {
- # set minor CN
- distance <- Inf
- if (Cn[i]<=1)
- {
- mCn[i] <- 0
- }
- else if (!is.na(regions$ai[i]))
- if (!is.nan(regions$ai[i]))
- for (m in 0:trunc(Cn[i]/2))
- {
- t_ai <- ai[paste('cn',Cn[i],'m',m,sep='')][[1]]
- t_dis <- abs(regions$ai[i]-t_ai)
- if (t_dis < distance)
- {
- distance <- t_dis -> imbaDist[i]
- mCn[i] <- m
- }
- }
- else
- mCn[i] <- NA
- fullCN[i] <- paste('cn',Cn[i],'m',mCn[i],sep='') # Full description
- }
-
- regions$Cn <- Cn
- regions$mCn <- mCn
- regions$fullCN <- fullCN
-
- meanCn <- weightedMean(regions$Cn,regions$np)
- ix1 <- regions$Cn==trunc(meanCn)
- ix2 <- regions$Cn==ceiling(meanCn)
- xdelta <- weightedMean(regions$median[ix2],regions$np[ix2]) - weightedMean(regions$median[ix1],regions$np[ix1])
- expected_delta <- 1/meanCn
-
- tumor_percentDNA <- xdelta / expected_delta
- tumor_percent <- tumor_percentDNA/(meanCn/2) / ( tumor_percentDNA/(meanCn/2) + (1-tumor_percentDNA) )
-
- regions$meanCn <- meanCn
- regions$tumor_percent <- tumor_percent
- write.csv(regions,file='Copynumbers.csv')
-
-
- karyotype_check(regions$chr,regions$start,regions$end,regions$mean,regions$ai,
- regions$Cn,regions$mCn,list(int=int,ai=ai),name=name,
- xlim=c(0,2.4),ylim=c(0.1,1))
-
- karyotype_chromsCN(regions$chr,regions$start,regions$end,regions$mean,regions$ai,
- regions$Cn,regions$mCn,kbsegs$chr,
- kbsegs$pos,kbsegs$ratio,alf$achr,alf$apos,
- (1-alf$amin/alf$amax),name=name,xlim=c(0,2.4),
- ylim=c(0.1,1))
- }
\ No newline at end of file
Added: pkg/patchwork/R/patchwork.plot.r
===================================================================
--- pkg/patchwork/R/patchwork.plot.r (rev 0)
[TRUNCATED]
To get the complete diff run:
svnlook diff /svnroot/patchwork -r 109
More information about the Patchwork-commits
mailing list