[Patchwork-commits] r109 - in pkg: patchwork patchwork/R patchwork/man patchworkCG patchworkCG/R patchworkCG/man patchworkData patchworkData/man

noreply at r-forge.r-project.org noreply at r-forge.r-project.org
Mon Mar 12 17:10:08 CET 2012


Author: sebastian_d
Date: 2012-03-12 17:10:08 +0100 (Mon, 12 Mar 2012)
New Revision: 109

Added:
   pkg/patchwork/R/patchwork.copynumbers.r
   pkg/patchwork/R/patchwork.plot.r
   pkg/patchwork/R/subfunctions_copynumbers.r
   pkg/patchwork/man/patchwork.copynumbers.Rd
   pkg/patchwork/man/patchwork.plot.Rd
   pkg/patchwork/man/subfunctions_copynumbers.Rd
   pkg/patchworkCG/R/patchwork.CG.copynumbers.r
   pkg/patchworkCG/R/patchwork.CG.plot.r
   pkg/patchworkCG/R/subfunctions_copynumbers.r
   pkg/patchworkCG/man/patchwork.CG.copynumbers.Rd
   pkg/patchworkCG/man/patchwork.CG.plot.Rd
   pkg/patchworkCG/man/subfunctions_copynumbers.Rd
Removed:
   pkg/patchwork/R/patchwork.CNA.r
   pkg/patchwork/R/patchwork.findCNs.r
   pkg/patchwork/R/subfunctions_findCNs.r
   pkg/patchwork/man/patchwork.CNA.Rd
   pkg/patchwork/man/patchwork.findCNs.Rd
   pkg/patchwork/man/subfunctions_findCNs.Rd
   pkg/patchworkCG/R/patchwork.CG.r
   pkg/patchworkCG/R/patchwork.CGCNV.r
   pkg/patchworkCG/R/subfunctions_CGCNV.r
   pkg/patchworkCG/man/patchwork.CG.Rd
   pkg/patchworkCG/man/patchwork.CGCNV.Rd
   pkg/patchworkCG/man/subfunctions_CGCNV.Rd
Modified:
   pkg/patchwork/NAMESPACE
   pkg/patchwork/R/patchwork.GCNorm.r
   pkg/patchwork/R/patchwork.createreference.r
   pkg/patchwork/R/patchwork.readChroms.r
   pkg/patchwork/README
   pkg/patchworkCG/NAMESPACE
   pkg/patchworkCG/R/CG_KaChCN.r
   pkg/patchworkCG/README
   pkg/patchworkCG/man/CG_KaCh.Rd
   pkg/patchworkCG/man/CG_KaChCN.Rd
   pkg/patchworkCG/man/CG_Ka_check.Rd
   pkg/patchworkCG/man/CG_karyotype.Rd
   pkg/patchworkCG/man/ideogram.Rd
   pkg/patchworkData/DESCRIPTION
   pkg/patchworkData/man/ideogram.Rd
Log:
big commit after changing function names

Modified: pkg/patchwork/NAMESPACE
===================================================================
--- pkg/patchwork/NAMESPACE	2012-03-12 16:05:58 UTC (rev 108)
+++ pkg/patchwork/NAMESPACE	2012-03-12 16:10:08 UTC (rev 109)
@@ -6,17 +6,17 @@
 		karyotype,
 		patchwork.alleledata,
 		patchwork.applyref,
-		patchwork.CNA,
+		patchwork.plot,
 		patchwork.createreference,
-		patchwork.findCNs,
+		patchwork.copynumbers,
 		patchwork.GCNorm,
 		patchwork.Medians_n_AI,
 		patchwork.readChroms,
 		patchwork.segment,
 		patchwork.smoothing,
-		#subfunctions_findCNs
+		#subfunctions_copynumbers
 		weightedMedian,
 		weightedMean,
 		is.autosome,
 		deChrom_ucsc,
-		chrom_ucsc)
+		chrom_ucsc)
\ No newline at end of file

Deleted: pkg/patchwork/R/patchwork.CNA.r
===================================================================
--- pkg/patchwork/R/patchwork.CNA.r	2012-03-12 16:05:58 UTC (rev 108)
+++ pkg/patchwork/R/patchwork.CNA.r	2012-03-12 16:10:08 UTC (rev 109)
@@ -1,122 +0,0 @@
-patchwork.CNA <- function(BamFile,Pileup,reference=NULL,normal.bam=NULL,normal.pileup=NULL,Alpha=0.0001,SD=1)
-	{
-		
-	#Attempt to read file pile.alleles, incase its already been created.
-	alf <- normalalf <- NULL
-	try( load("pile.alleles.Rdata"), silent=TRUE )
-	
-	#If it wasnt created, create it using the perl script pile2alleles.pl
-	#which is included in the package. Creates pile.alleles in whichever folder
-	#you are running R from. (getwd())
-	if(is.null(alf))
-		{
-		cat("Initiating Allele Data Generation \n")
-		if (!is.null(normal.pileup)) normalalf <- patchwork.alleledata(normal.pileup)
-		alf = patchwork.alleledata(Pileup, normalalf=normalalf)
-		cat("Allele Data Generation Complete \n")
-		save(alf,file="pile.alleles.Rdata")
-		}
-	
-	#Generate chroms object depending on the naming in pileup (alf).
-	#either chr1...chr22,chrX,chrY or 1...22,X,Y
-	data(ideogram,package="patchworkData")
-	
-	chroms = as.character(unique(ideogram$chr))	
-	
-	## Read coverage data. If already done, will load "data.Rdata" instead.
-	data <- normaldata <- NULL
-	try ( load('data.Rdata'), silent=TRUE )
-	
-	#If data object does not exist, IE it was not loaded in the previous line
-	#perform the function on the chromosomes to create the object.
-	if(is.null(data)) {
-		cat("Initiating Read Chromosomal Coverage \n")
-		data = patchwork.readChroms(BamFile,chroms)
-		cat("Read Chromosomal Coverage Complete \n")
-		save(data,file='data.Rdata')
-	}
-	
-	#Perform GC normalization if the amount of columns indicate that gc normalization
-	#has not already been performed.
-	if(length(data) < 7) {
-		cat("Initiating GC Content Normalization \n")
-		data = patchwork.GCNorm(data)
-		cat("GC Content Normalization Complete \n")
-		save(data,file='data.Rdata')
-	}
-	#after this, no further normalization was done on reference sequences.
-
-	#If matched normal bam file was defined, load and normalize it the same way.
-	if (!is.null(normal.bam) & is.null(reference)) {
-		normaldata=NULL
-		try ( load('normaldata.Rdata'), silent=TRUE )
-		if(is.null(normaldata)) {
-			cat("Initiating Read Chromosomal Coverage (matched normal) \n")
-			normaldata = patchwork.readChroms(normal.bam,chroms)
-			cat("Read Chromosomal Coverage Complete (matched normal) \n")
-		}	
-		if(length(normaldata) != 3) {
-			cat("Initiating GC Content Normalization (matched normal) \n")
-			normaldata = patchwork.GCNorm(normaldata[,1:6])
-			cat("GC Content Normalization Complete (matched normal) \n")
-			normaldata <- data.frame(chr=normaldata$chr,pos=normaldata$pos,normal=normaldata$norm)
-			save(normaldata,file='normaldata.Rdata')
-		}
-		#save(normaldata,file='normaldata.Rdata')
-	} 
-
-	
-	# Smooth the data.
-	kbsegs = NULL
-	try( load("smoothed.Rdata"), silent=TRUE )	
-	if(length(kbsegs) == 0)
-		{
-		cat("Initiating Smoothing \n")
-		kbsegs = patchwork.smoothing(data,normaldata,reference,chroms)
-		save(kbsegs,file="smoothed.Rdata")
-		cat("Smoothing Complete \n")
-		}
-	
-	#Segment the data.
-	library(DNAcopy)
-	segs = NULL
-	try( load("Segments.Rdata"), silent=TRUE )
-	if(length(segs) == 0)
-		{
-		cat("Initiating Segmentation \n")
-		cat("Note: If segmentation fails to initiate the probable reason is that you have not ")
-		cat("installed the R package DNAcopy. See patchwork's README for installation instructions. \n")
-		segs = patchwork.segment(kbsegs,chroms,Alpha,SD)
-		save(segs,file="Segments.Rdata")
-		cat("Segmentation Complete \n")
-		}
-		
-	if(length(segs) == 6)
-		{
-		#Get medians and AI for the segments.
-		cat("Initiating Segment data extraction (Medians and AI) \n")
-		segs = patchwork.Medians_n_AI(segs,kbsegs,alf)
-		save(segs,file="Segments.Rdata")
-		cat("Segment data extraction Complete \n \n \n")
-		}
-	
-	cat("Saving information objects needed for patchwork.findCNs in findCNs.Rdata \n \n \n")
-	save(segs,alf,kbsegs,file="findCNs.Rdata")
-	
-	#Plot it
-	cat("Initiating Plotting \n")
-	karyotype(segs$chr,segs$start,segs$end,segs$median,segs$ai,
-			name=as.character(BamFile),
-			xlim=c(0,2.4),
-			ylim=c(0.1,1))
-	karyotype_chroms(segs$chr,segs$start,segs$end,segs$median,segs$ai,
-			kbsegs$chr,kbsegs$pos,kbsegs$ratio,
-			alf$achr,alf$apos,(1-alf$amin/alf$amax),
-			name=as.character(BamFile),
-			xlim=c(0,2.4),
-			ylim=c(0.1,1))
-	cat("Plotting Complete \n")
-	cat("Shutting down..... \n")
-	}
-	
-	
\ No newline at end of file

Modified: pkg/patchwork/R/patchwork.GCNorm.r
===================================================================
--- pkg/patchwork/R/patchwork.GCNorm.r	2012-03-12 16:05:58 UTC (rev 108)
+++ pkg/patchwork/R/patchwork.GCNorm.r	2012-03-12 16:10:08 UTC (rev 109)
@@ -1,7 +1,6 @@
 patchwork.GCNorm <- function(data)
 	{
 	#Load data included in package
-	#packagepath = system.file(package="CNA")
 	#load(paste(packagepath,"/data/commonSnps132.RData",sep=""))
 	#load(paste(packagepath,"/data/ideogram.RData",sep=""))
 	#load(paste(packagepath,"/data/normaldata.RData",sep=""))

Added: pkg/patchwork/R/patchwork.copynumbers.r
===================================================================
--- pkg/patchwork/R/patchwork.copynumbers.r	                        (rev 0)
+++ pkg/patchwork/R/patchwork.copynumbers.r	2012-03-12 16:10:08 UTC (rev 109)
@@ -0,0 +1,294 @@
+patchwork.copynumbers = function(name = "copynumbers_",cn2,delta,het,hom,maxCn=8,ceiling=1,forcedelta=F)
+	{
+	
+	#packagepath = system.file(package="patchwork")
+	#load(paste(packagepath,"/data/commonSnps132.RData",sep=""))
+	#load(paste(packagepath,"/data/ideogram.RData",sep=""))
+	data(ideogram,package="patchworkData")
+	load("copynumbers.Rdata")
+
+	voidchrom <- c('chrX','chrY') # may add non-integer chroms here....
+
+    tix=NULL     #temporary index
+    int=NULL     ## contains coverage estimate of each (total) copy number
+    ai=NULL         ## contains Allelic Imbalance Ratio estimate of each copy number variant.
+    regions <- segs
+    regions <- regions[(is.autosome(regions$chr)&regions$np>50)&(!is.nan(regions$ai)),] ## will use these regions
+    regions <- regions[!is.na(regions$median),]
+
+    ## likely cn2 regions sit within delta/3 from cn2.
+    expectedAt <- cn2
+    tix$cn2 <- abs(regions$median - expectedAt) < (delta/3)    ## index of likely cn2 regions
+    temp <- regions[tix$cn2,]                                ## cn2 regions
+    med <- weightedMedian(temp$median,temp$np)            ## improved value of Log-R at cn2 (returns NULL if theres nothing there)
+    int$cn2 <- ifelse(!is.null(med),med,expectedAt)            ## saved to int.
+
+    ## likely cn1 regions sit at about cn2 - delta:
+    d <- delta                                                 ## the (Log-R) distance to cn1
+    expectedAt <- int$cn2-d                                     ## cn1 is expected here
+    tix$cn1 <- abs(regions$median - expectedAt) < (d/3)        ## index of likely cn1 regions
+    temp <- regions[tix$cn1,]
+    med <- weightedMedian(temp$median,temp$np)
+    int$cn1 <- ifelse(!is.null(med),med,expectedAt)
+    if (forcedelta) int$cn1 <- int$cn2-delta
+
+    ## likely cn0 regions sit below cn1 - delta:
+    d <- int$cn2-int$cn1
+    expectedAt <- int$cn1-d                                     
+    tix$cn0 <- regions$median < expectedAt
+    temp <- regions[tix$cn0,]
+    med <- weightedMedian(temp$median,temp$np)
+    int$cn0 <- ifelse(!is.null(med),med,expectedAt)
+    if (forcedelta) int$cn0 <- int$cn1-delta
+
+
+    ## likely cn3 regions sit at about cn2+delta
+    d <- delta
+    expectedAt <- int$cn2+d
+    tix$cn3 <- abs(regions$median - expectedAt) < (d/3)
+    temp <- regions[tix$cn3,]
+    med <- weightedMedian(temp$median,temp$np)
+    int$cn3 <- ifelse(!is.null(med),med,expectedAt)
+    if (forcedelta) int$cn3 <- int$cn2+delta
+
+
+    ## cn4 follows at ...
+    d <- delta
+    expectedAt <- int$cn3+d
+    tix$cn4 <- abs(regions$median - expectedAt) < (d/4)
+    temp <- regions[tix$cn4,]
+    med <- weightedMedian(temp$median,temp$np)
+    int$cn4 <- ifelse(!is.null(med),med,expectedAt)
+    if (forcedelta) int$cn4 <- int$cn3+delta
+
+    ## generalized for higher cns
+    for (cn in 5:maxCn)
+    	{
+    	thisCn <- paste('cn',cn,sep='')
+    	prevCn <- paste('cn',cn-1,sep='')
+    	pprevCn <- paste('cn',cn-2,sep='')
+    	d <- delta #dmin*(int[prevCn][[1]]-int[pprevCn][[1]])
+    	expectedAt <- int[prevCn][[1]]+d
+    	tix[[thisCn]] <- abs(regions$median - expectedAt) < (d/5)
+    	temp <- regions[tix[thisCn][[1]],]
+    	med <- weightedMedian(temp$median,temp$np)
+    	int[thisCn] <- ifelse(!is.null(med),med,expectedAt)
+        if (forcedelta) int[[thisCn]] <- int[[prevCn]]+delta
+    	}
+
+	#  smooth the cn relationship ?
+
+    ## at cn2, find the variant clusters (normal and CNNLOH)
+    ix <- (abs(regions$median - int$cn2) < 0.2*(int$cn3-int$cn2) ) # taking only closely-matching segments
+    data <- regions[ix,]
+    data <- data[!is.na(data$ai),] # ...with a calculated allelic imbalance.
+
+    expectedAt <- het
+    ix <- 2*abs(data$ai-het) < abs(data$ai-hom)
+    med <- weightedMedian(data$ai[ix],data$snvs[ix]) 
+    ai$cn2m1 <- ifelse (!is.null(med),med,expectedAt)
+
+    expectedAt <- hom
+    ix <- 2*abs(data$ai-hom) < abs(data$ai-het)
+    med <- weightedMedian(data$ai[ix],data$snvs[ix]) 
+    ai$cn2m0 <- ifelse (!is.null(med),med,expectedAt)
+
+    ## for cn1 (and 0)
+    ix <- (abs(regions$median - int$cn1) < 0.2*(int$cn2-int$cn1) )
+    data <- regions[ix,]
+    data <- data[!is.na(data$ai),]
+    expectedAt <- (ai$cn2m0+ai$cn2m1)*3/5     ## Decent estimate.
+    med <- weightedMedian(data$ai,data$snvs) ## Average allelic imbalance weighted on snp count
+    ai$cn1m0 <- ifelse (!is.null(med),med,expectedAt)    ## Will be NA if there was no CNNLOH
+    ai$cn0m0 <- NA #unimportant
+
+    ## for cn3:
+    ix <- (abs(regions$median - int$cn3) < 0.2*(int$cn4-int$cn3) )
+    data <- regions[ix,]
+    data <- data[!is.na(data$ai),]
+  
+    range <- ai$cn2m0-ai$cn2m1 #the distance between 2normal and CNNLOH
+    # get the 3(1) regions: 
+    expectedAt <- ai$cn2m1+range/3 # this is an approx if cn3m1 is absent.
+
+    ix <- (data$ai<ai$cn2m0) & (data$ai>ai$cn2m1) # take regions with less AI than cn2m0 but more than cn2m1
+    med <- weightedMedian(data$ai[ix],data$snvs[ix]) # average allelic imbalance weighted on snp count
+    ai$cn3m1 <- ifelse (!is.null(med),med,expectedAt)
+
+    # now for cn3m0
+    expectedAt <- ai$cn3m1 + range # approx of cn3m0
+    ix <- (abs(data$ai-expectedAt) < (expectedAt-ai$cn3m1)/4)  # take those much closer to exp than to cn3m1
+    med <- weightedMedian(data$ai[ix],data$snvs[ix]) 
+    try (if (med<ai$cn2m0) med <- NULL, silent=T)    ## in case of heterogeneities or strange signals, we disallow this effect.
+    ai$cn3m0 <- ifelse (!is.null(med),med,expectedAt)
+
+    if (is.na(ai$cn1m0)) ai$cn1m0 <- mean(ai$cn3m1,ai$cn2m0) ## If deletions were missing, place an estimate from cn3 
+    
+    ## now for cn4
+    ix <- abs(regions$median - int$cn4) < 0.2*(int$cn4-int$cn3) 
+    data <- regions[ix,]
+    data <- data[!is.na(data$ai),]
+
+    # get the 4(2) regions: they are at AI about cn2m1
+    expectedAt <- ai$cn2m1 # this is a good approx 
+
+    ix <- data$ai<ai$cn3m1 # let all below cn3m1 in
+    med <- weightedMedian(data$ai[ix],data$snvs[ix]) 
+    ai$cn4m2 <- ifelse (!is.null(med),med,expectedAt)
+    if (ai$cn4m2>ai$cn2m1) ai$cn4m2 <- ai$cn2m1 # sanity check.
+
+    data <- data[!ix,] # coutinue with the remaining
+    # now 4(1) has less ai than 3(0) -> sits at about cn3m1+(cn3m1-cn2m1).
+    expectedAt <- ai$cn3m1+(ai$cn3m1-ai$cn2m1)
+    ix <- abs(data$ai-expectedAt)<abs(data$ai-ai$cn3m0) # take those closer to exp than to 3(0)
+    med <- weightedMedian(data$ai[ix],data$snvs[ix]) 
+    ai$cn4m1 <- ifelse (!is.null(med),med,expectedAt)
+
+    # now 4(0) is at about 3(0) + [0.9 - 3(0)] * [3(0)-2(0)] / [0.9-2(0)] 
+    expectedAt <- ai$cn3m0 + (ceiling-ai$cn3m0)*(ai$cn3m0-ai$cn2m0)/(ceiling-ai$cn2m0)
+    ix <- abs(data$ai-expectedAt) < 0.2*(expectedAt-ai$cn4m2) # we take those very close to exp
+    med <- weightedMedian(data$ai[ix],data$snvs[ix])
+     try (if (med<ai$cn3m0) med <- NULL, silent=T)    ## in case of heterogeneities or strange signals, we disallow this effect.
+    ai$cn4m0 <- ifelse (!is.null(med),med,expectedAt)
+
+    ## generalization for higher copy numbers. it gets complicated.
+    for (cn in 5:maxCn) {
+    thisCn <- paste('cn',cn,sep='')
+    prevCn <- paste('cn',cn-1,sep='')
+    pprevCn <- paste('cn',cn-2,sep='')
+    
+    ix <- (abs(regions$median - int[thisCn][[1]]) < 0.2*(int[thisCn][[1]]-int[prevCn][[1]]) )
+    data <- regions[ix,]
+    data <- data[!is.na(data$ai),]
+    data <- data[data$np>50,] # long regions for safety
+
+    ## try to find variants, starting with LOH
+    # LOH such as 5(0)
+    m <- 0
+    thisVariant=paste(thisCn,'m',0,sep='')
+    c4m0 <- ai[paste(prevCn,'m',m,sep='')][[1]] # relative naming for clarity
+    c3m0 <- ai[paste(pprevCn,'m',m,sep='')][[1]]
+    
+    expectedAt <- ceiling-((ceiling-c4m0)*(ceiling-max(c4m0,c3m0))/(ceiling-min(c3m0,c4m0)))
+    #ai[thisVariant] <- expectedAt
+    # we then take those close to exp (within delta[3(0),4(0)])
+    ix <- abs(data$ai-expectedAt) < abs(c4m0 - c3m0) 
+    med <- weightedMedian(data$ai[ix],data$snvs[ix]) 
+    try (if (med<ai$cn4m0) med <- NULL, silent=T)    ## in case of heterogeneities or strange signals, we disallow this effect.
+    ai[thisVariant] <- ifelse (!is.null(med),med,expectedAt)
+    
+    ## then from balanced to less balanced
+    minorVariants=trunc(cn/2):1 
+    first <- T
+    for (m in minorVariants) {
+        thisVariant=paste(thisCn,'m',m,sep='')
+        if (m==cn/2) {
+        # We have balanced variant, so rather easy.
+        expectedAt <- ai[paste(pprevCn,'m',m-1,sep='')][[1]] # this is a good approx, balanced at cn-2
+        ix <- data$ai<ai[paste(prevCn,'m',m-1,sep='')][[1]] # let all below (cn-1, mcn-1) in
+        med <- weightedMedian(data$ai[ix],data$snvs[ix]) 
+        ai[thisVariant] <- ifelse (!is.null(med),med,expectedAt)
+	if (ai[thisVariant] > ai[paste(pprevCn,'m',m-1,sep='')][[1]]) ai[thisVariant] <- ai[paste(pprevCn,'m',m-1,sep='')][[1]] # sanity check.
+        first <- F
+        } else if (first) { 
+        # its not balanced but its the most balanced of the unbalanced. something like 5(2)
+        expectedAt <- 0.5*( ai[paste(prevCn,'m',m,sep='')][[1]] + ai[paste(pprevCn,'m',m-1,sep='')][[1]] ) # that means between 4(2) and 3(1)
+        ix <- abs(data$ai-expectedAt) < ( ai[paste(prevCn,'m',m,sep='')][[1]] - ai[paste(pprevCn,'m',m-1,sep='')][[1]] ) /3 # let all "between" 4(2) and 3(1) in
+        med <- weightedMedian(data$ai[ix],data$snvs[ix]) 
+        ai[thisVariant] <- ifelse (!is.null(med),med,expectedAt)
+        first <- F
+        } else {
+        # not the most balanced unbalanced variant, for example 5(1):
+        expectedAt <- ai[paste(thisCn,'m',minorVariants[1],sep='')][[1]] + (minorVariants[1]-m) * (ai[paste(thisCn,'m',0,sep='')][[1]] - ai[paste(thisCn,'m',minorVariants[1],sep='')][[1]]) / trunc(cn/2) # 5(2) + (which)* 5(0)-5(2) /(n)
+        ix <- abs(data$ai-expectedAt) < ( ai[paste(prevCn,'m',m,sep='')][[1]] - ai[paste(pprevCn,'m',m-1,sep='')][[1]] ) /3 # let all "between" 4(1) and 3(0) in
+        med <- weightedMedian(data$ai[ix],data$snvs[ix]) 
+        ai[thisVariant] <- ifelse (!is.null(med),med,expectedAt)
+        } 
+    } # done with minor variants
+    } # done with copy numbers
+
+    sampleInfo <- list('int'=int,'ai'=ai)
+
+	##  Assign total and minor copy numbers to all segments.
+    regions <- segs[!is.na(segs$median),]    ## This time, work on all segments available.
+
+    Cn <- NULL            ## Total copy number
+    mCn <- NULL            ## Minor allele copy number
+    fullCN <- NULL        ## Variant label. ('cnXmY')
+
+    intDist <- NULL        ## distance to certain Log-R
+    imbaDist <- NULL    ## distance to certain allelic imbalance
+
+	
+
+    ## give each segment the best matching CN and mCN
+    for (i in 1:nrow(regions)) 
+    	{
+   	 	# set total copy number
+    	distance <- Inf
+    	for (cn in 0:maxCn) 
+    		{
+        	t_int <- int[paste('cn',cn,sep='')][[1]]    ## get Log-R of particular cn from 'int'
+        	t_dis <- abs(regions$median[i]-t_int)            ## distance to that particular cn
+        	if (t_dis < distance) 
+        		{                        ## nearest so far, save.
+        		distance <- t_dis -> intDist[i]
+        		Cn[i] <- cn
+        		}
+    		}
+    	}
+    # Y makes CN0 sit very low, this is a fix on non-Y Cn0
+    #if ((Cn[i] == 1)&(intDist[i] > (int$cn2-int$cn1))) Cn[i] <- 0 ## currently not needed.
+	for (i in 1:nrow(regions)) 
+		{
+   		# set minor CN
+    	distance <- Inf
+    	if (Cn[i]<=1)
+    		{
+        	mCn[i] <- 0
+    		} 
+    	else if (!is.na(regions$ai[i])) 
+    		if (!is.nan(regions$ai[i])) 
+    			for (m in 0:trunc(Cn[i]/2)) 
+    				{
+        			t_ai <- ai[paste('cn',Cn[i],'m',m,sep='')][[1]]
+        			t_dis <- abs(regions$ai[i]-t_ai)
+        			if (t_dis < distance) 
+        				{
+        				distance <- t_dis -> imbaDist[i]
+        				mCn[i] <- m
+        				}
+    				}
+    		else 
+    			mCn[i] <- NA
+    			fullCN[i] <- paste('cn',Cn[i],'m',mCn[i],sep='') # Full description
+  		}
+ 
+    regions$Cn <- Cn
+    regions$mCn <- mCn
+    regions$fullCN <- fullCN
+	
+	meanCn <- weightedMean(regions$Cn,regions$np)
+	ix1 <- regions$Cn==trunc(meanCn)
+	ix2 <- regions$Cn==ceiling(meanCn)
+	xdelta <- weightedMean(regions$median[ix2],regions$np[ix2]) - weightedMean(regions$median[ix1],regions$np[ix1])
+	expected_delta <- 1/meanCn
+	
+	tumor_percentDNA <- xdelta / expected_delta
+    tumor_percent <- tumor_percentDNA/(meanCn/2) / ( tumor_percentDNA/(meanCn/2) + (1-tumor_percentDNA) )
+
+    regions$meanCn <- meanCn
+    regions$tumor_percent <- tumor_percent
+    write.csv(regions,file='Copynumbers.csv')
+
+
+	karyotype_check(regions$chr,regions$start,regions$end,regions$mean,regions$ai,
+					regions$Cn,regions$mCn,list(int=int,ai=ai),name=name,
+					xlim=c(0,2.4),ylim=c(0.1,1))
+					
+	karyotype_chromsCN(regions$chr,regions$start,regions$end,regions$mean,regions$ai,
+						regions$Cn,regions$mCn,kbsegs$chr,
+						kbsegs$pos,kbsegs$ratio,alf$achr,alf$apos,
+						(1-alf$amin/alf$amax),name=name,xlim=c(0,2.4),
+						ylim=c(0.1,1))
+	}
\ No newline at end of file

Modified: pkg/patchwork/R/patchwork.createreference.r
===================================================================
--- pkg/patchwork/R/patchwork.createreference.r	2012-03-12 16:05:58 UTC (rev 108)
+++ pkg/patchwork/R/patchwork.createreference.r	2012-03-12 16:10:08 UTC (rev 109)
@@ -60,6 +60,6 @@
 
 	
 	cat("Saving to ",output,".Rdata \n",sep="")
-	save(normaldata,file=paste(output,".RData",sep=""))
+	save(normaldata,file=paste(output,".Rdata",sep=""))
 	return(normaldata)
 	}
\ No newline at end of file

Deleted: pkg/patchwork/R/patchwork.findCNs.r
===================================================================
--- pkg/patchwork/R/patchwork.findCNs.r	2012-03-12 16:05:58 UTC (rev 108)
+++ pkg/patchwork/R/patchwork.findCNs.r	2012-03-12 16:10:08 UTC (rev 109)
@@ -1,294 +0,0 @@
-patchwork.findCNs = function(name = "findCNs_",cn2,delta,het,hom,maxCn=8,ceiling=1,forcedelta=F)
-	{
-	
-	#packagepath = system.file(package="patchwork")
-	#load(paste(packagepath,"/data/commonSnps132.RData",sep=""))
-	#load(paste(packagepath,"/data/ideogram.RData",sep=""))
-	data(ideogram,package="patchworkData")
-	load("findCNs.Rdata")
-
-	voidchrom <- c('chrX','chrY') # may add non-integer chroms here....
-
-    tix=NULL     #temporary index
-    int=NULL     ## contains coverage estimate of each (total) copy number
-    ai=NULL         ## contains Allelic Imbalance Ratio estimate of each copy number variant.
-    regions <- segs
-    regions <- regions[(is.autosome(regions$chr)&regions$np>50)&(!is.nan(regions$ai)),] ## will use these regions
-    regions <- regions[!is.na(regions$median),]
-
-    ## likely cn2 regions sit within delta/3 from cn2.
-    expectedAt <- cn2
-    tix$cn2 <- abs(regions$median - expectedAt) < (delta/3)    ## index of likely cn2 regions
-    temp <- regions[tix$cn2,]                                ## cn2 regions
-    med <- weightedMedian(temp$median,temp$np)            ## improved value of Log-R at cn2 (returns NULL if theres nothing there)
-    int$cn2 <- ifelse(!is.null(med),med,expectedAt)            ## saved to int.
-
-    ## likely cn1 regions sit at about cn2 - delta:
-    d <- delta                                                 ## the (Log-R) distance to cn1
-    expectedAt <- int$cn2-d                                     ## cn1 is expected here
-    tix$cn1 <- abs(regions$median - expectedAt) < (d/3)        ## index of likely cn1 regions
-    temp <- regions[tix$cn1,]
-    med <- weightedMedian(temp$median,temp$np)
-    int$cn1 <- ifelse(!is.null(med),med,expectedAt)
-    if (forcedelta) int$cn1 <- int$cn2-delta
-
-    ## likely cn0 regions sit below cn1 - delta:
-    d <- int$cn2-int$cn1
-    expectedAt <- int$cn1-d                                     
-    tix$cn0 <- regions$median < expectedAt
-    temp <- regions[tix$cn0,]
-    med <- weightedMedian(temp$median,temp$np)
-    int$cn0 <- ifelse(!is.null(med),med,expectedAt)
-    if (forcedelta) int$cn0 <- int$cn1-delta
-
-
-    ## likely cn3 regions sit at about cn2+delta
-    d <- delta
-    expectedAt <- int$cn2+d
-    tix$cn3 <- abs(regions$median - expectedAt) < (d/3)
-    temp <- regions[tix$cn3,]
-    med <- weightedMedian(temp$median,temp$np)
-    int$cn3 <- ifelse(!is.null(med),med,expectedAt)
-    if (forcedelta) int$cn3 <- int$cn2+delta
-
-
-    ## cn4 follows at ...
-    d <- delta
-    expectedAt <- int$cn3+d
-    tix$cn4 <- abs(regions$median - expectedAt) < (d/4)
-    temp <- regions[tix$cn4,]
-    med <- weightedMedian(temp$median,temp$np)
-    int$cn4 <- ifelse(!is.null(med),med,expectedAt)
-    if (forcedelta) int$cn4 <- int$cn3+delta
-
-    ## generalized for higher cns
-    for (cn in 5:maxCn)
-    	{
-    	thisCn <- paste('cn',cn,sep='')
-    	prevCn <- paste('cn',cn-1,sep='')
-    	pprevCn <- paste('cn',cn-2,sep='')
-    	d <- delta #dmin*(int[prevCn][[1]]-int[pprevCn][[1]])
-    	expectedAt <- int[prevCn][[1]]+d
-    	tix[[thisCn]] <- abs(regions$median - expectedAt) < (d/5)
-    	temp <- regions[tix[thisCn][[1]],]
-    	med <- weightedMedian(temp$median,temp$np)
-    	int[thisCn] <- ifelse(!is.null(med),med,expectedAt)
-        if (forcedelta) int[[thisCn]] <- int[[prevCn]]+delta
-    	}
-
-	#  smooth the cn relationship ?
-
-    ## at cn2, find the variant clusters (normal and CNNLOH)
-    ix <- (abs(regions$median - int$cn2) < 0.2*(int$cn3-int$cn2) ) # taking only closely-matching segments
-    data <- regions[ix,]
-    data <- data[!is.na(data$ai),] # ...with a calculated allelic imbalance.
-
-    expectedAt <- het
-    ix <- 2*abs(data$ai-het) < abs(data$ai-hom)
-    med <- weightedMedian(data$ai[ix],data$snvs[ix]) 
-    ai$cn2m1 <- ifelse (!is.null(med),med,expectedAt)
-
-    expectedAt <- hom
-    ix <- 2*abs(data$ai-hom) < abs(data$ai-het)
-    med <- weightedMedian(data$ai[ix],data$snvs[ix]) 
-    ai$cn2m0 <- ifelse (!is.null(med),med,expectedAt)
-
-    ## for cn1 (and 0)
-    ix <- (abs(regions$median - int$cn1) < 0.2*(int$cn2-int$cn1) )
-    data <- regions[ix,]
-    data <- data[!is.na(data$ai),]
-    expectedAt <- (ai$cn2m0+ai$cn2m1)*3/5     ## Decent estimate.
-    med <- weightedMedian(data$ai,data$snvs) ## Average allelic imbalance weighted on snp count
-    ai$cn1m0 <- ifelse (!is.null(med),med,expectedAt)    ## Will be NA if there was no CNNLOH
-    ai$cn0m0 <- NA #unimportant
-
-    ## for cn3:
-    ix <- (abs(regions$median - int$cn3) < 0.2*(int$cn4-int$cn3) )
-    data <- regions[ix,]
-    data <- data[!is.na(data$ai),]
-  
-    range <- ai$cn2m0-ai$cn2m1 #the distance between 2normal and CNNLOH
-    # get the 3(1) regions: 
-    expectedAt <- ai$cn2m1+range/3 # this is an approx if cn3m1 is absent.
-
-    ix <- (data$ai<ai$cn2m0) & (data$ai>ai$cn2m1) # take regions with less AI than cn2m0 but more than cn2m1
-    med <- weightedMedian(data$ai[ix],data$snvs[ix]) # average allelic imbalance weighted on snp count
-    ai$cn3m1 <- ifelse (!is.null(med),med,expectedAt)
-
-    # now for cn3m0
-    expectedAt <- ai$cn3m1 + range # approx of cn3m0
-    ix <- (abs(data$ai-expectedAt) < (expectedAt-ai$cn3m1)/4)  # take those much closer to exp than to cn3m1
-    med <- weightedMedian(data$ai[ix],data$snvs[ix]) 
-    try (if (med<ai$cn2m0) med <- NULL, silent=T)    ## in case of heterogeneities or strange signals, we disallow this effect.
-    ai$cn3m0 <- ifelse (!is.null(med),med,expectedAt)
-
-    if (is.na(ai$cn1m0)) ai$cn1m0 <- mean(ai$cn3m1,ai$cn2m0) ## If deletions were missing, place an estimate from cn3 
-    
-    ## now for cn4
-    ix <- abs(regions$median - int$cn4) < 0.2*(int$cn4-int$cn3) 
-    data <- regions[ix,]
-    data <- data[!is.na(data$ai),]
-
-    # get the 4(2) regions: they are at AI about cn2m1
-    expectedAt <- ai$cn2m1 # this is a good approx 
-
-    ix <- data$ai<ai$cn3m1 # let all below cn3m1 in
-    med <- weightedMedian(data$ai[ix],data$snvs[ix]) 
-    ai$cn4m2 <- ifelse (!is.null(med),med,expectedAt)
-    if (ai$cn4m2>ai$cn2m1) ai$cn4m2 <- ai$cn2m1 # sanity check.
-
-    data <- data[!ix,] # coutinue with the remaining
-    # now 4(1) has less ai than 3(0) -> sits at about cn3m1+(cn3m1-cn2m1).
-    expectedAt <- ai$cn3m1+(ai$cn3m1-ai$cn2m1)
-    ix <- abs(data$ai-expectedAt)<abs(data$ai-ai$cn3m0) # take those closer to exp than to 3(0)
-    med <- weightedMedian(data$ai[ix],data$snvs[ix]) 
-    ai$cn4m1 <- ifelse (!is.null(med),med,expectedAt)
-
-    # now 4(0) is at about 3(0) + [0.9 - 3(0)] * [3(0)-2(0)] / [0.9-2(0)] 
-    expectedAt <- ai$cn3m0 + (ceiling-ai$cn3m0)*(ai$cn3m0-ai$cn2m0)/(ceiling-ai$cn2m0)
-    ix <- abs(data$ai-expectedAt) < 0.2*(expectedAt-ai$cn4m2) # we take those very close to exp
-    med <- weightedMedian(data$ai[ix],data$snvs[ix])
-     try (if (med<ai$cn3m0) med <- NULL, silent=T)    ## in case of heterogeneities or strange signals, we disallow this effect.
-    ai$cn4m0 <- ifelse (!is.null(med),med,expectedAt)
-
-    ## generalization for higher copy numbers. it gets complicated.
-    for (cn in 5:maxCn) {
-    thisCn <- paste('cn',cn,sep='')
-    prevCn <- paste('cn',cn-1,sep='')
-    pprevCn <- paste('cn',cn-2,sep='')
-    
-    ix <- (abs(regions$median - int[thisCn][[1]]) < 0.2*(int[thisCn][[1]]-int[prevCn][[1]]) )
-    data <- regions[ix,]
-    data <- data[!is.na(data$ai),]
-    data <- data[data$np>50,] # long regions for safety
-
-    ## try to find variants, starting with LOH
-    # LOH such as 5(0)
-    m <- 0
-    thisVariant=paste(thisCn,'m',0,sep='')
-    c4m0 <- ai[paste(prevCn,'m',m,sep='')][[1]] # relative naming for clarity
-    c3m0 <- ai[paste(pprevCn,'m',m,sep='')][[1]]
-    
-    expectedAt <- ceiling-((ceiling-c4m0)*(ceiling-max(c4m0,c3m0))/(ceiling-min(c3m0,c4m0)))
-    #ai[thisVariant] <- expectedAt
-    # we then take those close to exp (within delta[3(0),4(0)])
-    ix <- abs(data$ai-expectedAt) < abs(c4m0 - c3m0) 
-    med <- weightedMedian(data$ai[ix],data$snvs[ix]) 
-    try (if (med<ai$cn4m0) med <- NULL, silent=T)    ## in case of heterogeneities or strange signals, we disallow this effect.
-    ai[thisVariant] <- ifelse (!is.null(med),med,expectedAt)
-    
-    ## then from balanced to less balanced
-    minorVariants=trunc(cn/2):1 
-    first <- T
-    for (m in minorVariants) {
-        thisVariant=paste(thisCn,'m',m,sep='')
-        if (m==cn/2) {
-        # We have balanced variant, so rather easy.
-        expectedAt <- ai[paste(pprevCn,'m',m-1,sep='')][[1]] # this is a good approx, balanced at cn-2
-        ix <- data$ai<ai[paste(prevCn,'m',m-1,sep='')][[1]] # let all below (cn-1, mcn-1) in
-        med <- weightedMedian(data$ai[ix],data$snvs[ix]) 
-        ai[thisVariant] <- ifelse (!is.null(med),med,expectedAt)
-	if (ai[thisVariant] > ai[paste(pprevCn,'m',m-1,sep='')][[1]]) ai[thisVariant] <- ai[paste(pprevCn,'m',m-1,sep='')][[1]] # sanity check.
-        first <- F
-        } else if (first) { 
-        # its not balanced but its the most balanced of the unbalanced. something like 5(2)
-        expectedAt <- 0.5*( ai[paste(prevCn,'m',m,sep='')][[1]] + ai[paste(pprevCn,'m',m-1,sep='')][[1]] ) # that means between 4(2) and 3(1)
-        ix <- abs(data$ai-expectedAt) < ( ai[paste(prevCn,'m',m,sep='')][[1]] - ai[paste(pprevCn,'m',m-1,sep='')][[1]] ) /3 # let all "between" 4(2) and 3(1) in
-        med <- weightedMedian(data$ai[ix],data$snvs[ix]) 
-        ai[thisVariant] <- ifelse (!is.null(med),med,expectedAt)
-        first <- F
-        } else {
-        # not the most balanced unbalanced variant, for example 5(1):
-        expectedAt <- ai[paste(thisCn,'m',minorVariants[1],sep='')][[1]] + (minorVariants[1]-m) * (ai[paste(thisCn,'m',0,sep='')][[1]] - ai[paste(thisCn,'m',minorVariants[1],sep='')][[1]]) / trunc(cn/2) # 5(2) + (which)* 5(0)-5(2) /(n)
-        ix <- abs(data$ai-expectedAt) < ( ai[paste(prevCn,'m',m,sep='')][[1]] - ai[paste(pprevCn,'m',m-1,sep='')][[1]] ) /3 # let all "between" 4(1) and 3(0) in
-        med <- weightedMedian(data$ai[ix],data$snvs[ix]) 
-        ai[thisVariant] <- ifelse (!is.null(med),med,expectedAt)
-        } 
-    } # done with minor variants
-    } # done with copy numbers
-
-    sampleInfo <- list('int'=int,'ai'=ai)
-
-	##  Assign total and minor copy numbers to all segments.
-    regions <- segs[!is.na(segs$median),]    ## This time, work on all segments available.
-
-    Cn <- NULL            ## Total copy number
-    mCn <- NULL            ## Minor allele copy number
-    fullCN <- NULL        ## Variant label. ('cnXmY')
-
-    intDist <- NULL        ## distance to certain Log-R
-    imbaDist <- NULL    ## distance to certain allelic imbalance
-
-	
-
-    ## give each segment the best matching CN and mCN
-    for (i in 1:nrow(regions)) 
-    	{
-   	 	# set total copy number
-    	distance <- Inf
-    	for (cn in 0:maxCn) 
-    		{
-        	t_int <- int[paste('cn',cn,sep='')][[1]]    ## get Log-R of particular cn from 'int'
-        	t_dis <- abs(regions$median[i]-t_int)            ## distance to that particular cn
-        	if (t_dis < distance) 
-        		{                        ## nearest so far, save.
-        		distance <- t_dis -> intDist[i]
-        		Cn[i] <- cn
-        		}
-    		}
-    	}
-    # Y makes CN0 sit very low, this is a fix on non-Y Cn0
-    #if ((Cn[i] == 1)&(intDist[i] > (int$cn2-int$cn1))) Cn[i] <- 0 ## currently not needed.
-	for (i in 1:nrow(regions)) 
-		{
-   		# set minor CN
-    	distance <- Inf
-    	if (Cn[i]<=1)
-    		{
-        	mCn[i] <- 0
-    		} 
-    	else if (!is.na(regions$ai[i])) 
-    		if (!is.nan(regions$ai[i])) 
-    			for (m in 0:trunc(Cn[i]/2)) 
-    				{
-        			t_ai <- ai[paste('cn',Cn[i],'m',m,sep='')][[1]]
-        			t_dis <- abs(regions$ai[i]-t_ai)
-        			if (t_dis < distance) 
-        				{
-        				distance <- t_dis -> imbaDist[i]
-        				mCn[i] <- m
-        				}
-    				}
-    		else 
-    			mCn[i] <- NA
-    			fullCN[i] <- paste('cn',Cn[i],'m',mCn[i],sep='') # Full description
-  		}
- 
-    regions$Cn <- Cn
-    regions$mCn <- mCn
-    regions$fullCN <- fullCN
-	
-	meanCn <- weightedMean(regions$Cn,regions$np)
-	ix1 <- regions$Cn==trunc(meanCn)
-	ix2 <- regions$Cn==ceiling(meanCn)
-	xdelta <- weightedMean(regions$median[ix2],regions$np[ix2]) - weightedMean(regions$median[ix1],regions$np[ix1])
-	expected_delta <- 1/meanCn
-	
-	tumor_percentDNA <- xdelta / expected_delta
-    tumor_percent <- tumor_percentDNA/(meanCn/2) / ( tumor_percentDNA/(meanCn/2) + (1-tumor_percentDNA) )
-
-    regions$meanCn <- meanCn
-    regions$tumor_percent <- tumor_percent
-    write.csv(regions,file='Copynumbers.csv')
-
-
-	karyotype_check(regions$chr,regions$start,regions$end,regions$mean,regions$ai,
-					regions$Cn,regions$mCn,list(int=int,ai=ai),name=name,
-					xlim=c(0,2.4),ylim=c(0.1,1))
-					
-	karyotype_chromsCN(regions$chr,regions$start,regions$end,regions$mean,regions$ai,
-						regions$Cn,regions$mCn,kbsegs$chr,
-						kbsegs$pos,kbsegs$ratio,alf$achr,alf$apos,
-						(1-alf$amin/alf$amax),name=name,xlim=c(0,2.4),
-						ylim=c(0.1,1))
-	}
\ No newline at end of file

Added: pkg/patchwork/R/patchwork.plot.r
===================================================================
--- pkg/patchwork/R/patchwork.plot.r	                        (rev 0)
[TRUNCATED]

To get the complete diff run:
    svnlook diff /svnroot/patchwork -r 109


More information about the Patchwork-commits mailing list