[Patchwork-commits] r65 - in pkg: . patchworkCG patchworkCG/R patchworkCG/data patchworkCG/man

noreply at r-forge.r-project.org noreply at r-forge.r-project.org
Tue Jan 10 17:12:55 CET 2012


Author: sebastian_d
Date: 2012-01-10 17:12:55 +0100 (Tue, 10 Jan 2012)
New Revision: 65

Added:
   pkg/patchworkCG/
   pkg/patchworkCG/DESCRIPTION
   pkg/patchworkCG/NAMESPACE
   pkg/patchworkCG/R/
   pkg/patchworkCG/R/CG_KaCh.r
   pkg/patchworkCG/R/CG_KaChCN.r
   pkg/patchworkCG/R/CG_Ka_check.r
   pkg/patchworkCG/R/CG_karyotype.r
   pkg/patchworkCG/R/patchwork.AI.r
   pkg/patchworkCG/R/patchwork.CG.r
   pkg/patchworkCG/R/patchwork.CGCNV.r
   pkg/patchworkCG/R/subfunctions_CGCNV.r
   pkg/patchworkCG/README
   pkg/patchworkCG/data/
   pkg/patchworkCG/data/ideogram.RData
   pkg/patchworkCG/man/
   pkg/patchworkCG/man/CG_KaCh.Rd
   pkg/patchworkCG/man/CG_KaChCN.Rd
   pkg/patchworkCG/man/CG_Ka_check.Rd
   pkg/patchworkCG/man/CG_karyotype.Rd
   pkg/patchworkCG/man/ideogram.Rd
   pkg/patchworkCG/man/patchwork.AI.Rd
   pkg/patchworkCG/man/patchwork.CG.Rd
   pkg/patchworkCG/man/patchwork.CGCNV.Rd
   pkg/patchworkCG/man/subfunctions_CGCNV.Rd
Log:
Adding patchworkCG, which deals with CompleteGenomics Data.

Added: pkg/patchworkCG/DESCRIPTION
===================================================================
--- pkg/patchworkCG/DESCRIPTION	                        (rev 0)
+++ pkg/patchworkCG/DESCRIPTION	2012-01-10 16:12:55 UTC (rev 65)
@@ -0,0 +1,9 @@
+Package: patchwork.CG
+Type: Package
+Title: Allele-specific Copy Number Analysis of CompleteGenomics Whole Genome data
+Version: 2.0
+Date: 2011-10-13
+Author: Markus Rasmussen, Sebastian DiLorenzo
+Maintainer: Markus Rasmussen <Markus.Rasmussen at medsci.uu.se>
+Description: Visualizes whole genome data
+License: GPL-2

Added: pkg/patchworkCG/NAMESPACE
===================================================================
--- pkg/patchworkCG/NAMESPACE	                        (rev 0)
+++ pkg/patchworkCG/NAMESPACE	2012-01-10 16:12:55 UTC (rev 65)
@@ -0,0 +1,13 @@
+export(patchwork.AI,
+		patchwork.CG,
+		CG_Ka_check,
+		CG_KaCh,
+		CG_KaChCN,
+		CG_karyotype,
+		patchwork.CGCNV,
+		#subfunctions_findCNs
+		weightedMedian,
+		weightedMean,
+		is.autosome,
+		deChrom_ucsc,
+		chrom_ucsc)
\ No newline at end of file

Added: pkg/patchworkCG/R/CG_KaCh.r
===================================================================
--- pkg/patchworkCG/R/CG_KaCh.r	                        (rev 0)
+++ pkg/patchworkCG/R/CG_KaCh.r	2012-01-10 16:12:55 UTC (rev 65)
@@ -0,0 +1,112 @@
+CG_KaCh <- function(chr,start,end,int,ai,mchr,mpos,mval,
+								sval,name='',xlim=c(0,2.4),ylim=c(0.3,1))  
+	{   
+    
+    #packagepath = system.file(package="patchwork")
+ 	
+    #load(paste(packagepath,"/data/ideogram.RData",sep=""))
+    
+    data(ideogram,package="patchworkGC")
+    
+    #create a color ramp palette to segue the color scheme for plot.
+    
+    colors_p <- colorRampPalette(c("#6600FF","#9900CC"),space="rgb")
+    colors_q <- colorRampPalette(c("#CC0099","#CC0000"),space="rgb")
+
+    ai[is.na(ai)]=0
+    aix=ai!=0
+    chr=chr[aix]
+    start=start[aix]
+    end=end[aix]
+    int=int[aix]
+    ai=ai[aix]
+    pos <- (start+end)/2
+    length=end-start
+    
+    size=rep(1,length(chr))
+    size[length>2000000]=2
+    size[length>5000000]=3
+    size[length>10000000]=4
+       
+    for (c in 1:24) 
+    	{
+        this <- ideogram[ideogram$c==c,]
+        
+        png(paste(name,'_KaCh_',this$chr,'.png',sep=''),width=1080,height=1300)
+        	layout(matrix(1:3,nrow=3,byrow=T), widths=10,heights=c(10,5,5))
+        	
+        	mar.default <- c(5,4,4,2) + 0.1
+			par(mar = mar.default + c(0, 4, 0, 0)) 
+        
+       	 	ix <- chr==this$chr
+        	col <- rep('#B0B0B070',length(chr))       
+        	col[ix & (pos < this$mid)] <- paste(colors_p(sum(ix & (pos < this$mid))), '70', sep='')  
+        	col[ix & (pos > this$mid)] <- paste(colors_q(sum(ix & (pos > this$mid))), '70', sep='') 
+        
+        	plot(c(int[!ix],int[ix]),c(ai[!ix],ai[ix]),
+            	pch=16,
+            	cex=c(size[!ix],size[ix]),
+            	cex.lab=3,
+            	#mar=c(0.1,0.1,0.1,0.1),
+		    	main = "",
+		    	xlab = "Total intensity -->",
+		    	ylab = '',
+		    	col = c(col[!ix],col[ix]),
+		    	xlim = xlim,
+		    	ylim = ylim,
+		    	yaxt="n",
+        		xaxt="n")
+        	axis(1,cex.axis=2)
+        	axis(2,cex.axis=2)
+        	mtext(text="Allelic imbalance -->",side=2,line=4,las=3,cex=2)
+		
+        	par(new=F)
+        	mix <- mchr==as.character(this$chr)
+        	#col <- rep('#D0D0D0D0',sum(mix))       
+        	#col[ix & (pos < this$mid)] <- paste(colors_p(sum(ix & (pos < this$mid))), '70', sep='')  
+        	#col[ix & (pos > this$mid)] <- paste(colors_q(sum(ix & (pos > this$mid))), '70', sep='') 
+        	col=rep('#000000',sum(ix))
+        	col[pos[ix] < this$mid] <- colors_p(sum(pos[ix] < this$mid))
+        	col[pos[ix] > this$mid] <- colors_q(sum(pos[ix] > this$mid))
+        
+        	plot(mpos[mix],mval[mix],
+            	pch=16,
+            	cex=2,
+            	cex.lab=3,
+            	#mar=c(0.1,0.1,0.1,0.1),
+    	    	main = "",
+		    	xlab = "Total intensity",
+		    	ylab = "",
+		    	col = '#00000010',
+		    	xlim = c(0,this$length),
+		    	ylim = xlim,
+		    	yaxt="n",
+        		xaxt="n")
+        	axis(1,cex.axis=2)
+        	axis(2,cex.axis=2)
+		    
+        	segments(x0=start[ix],x1=end[ix],
+        	    y0=int[ix],y1=int[ix],                
+            	col=col,
+            	lwd=4,)
+            
+       	 	par(new=F)       
+        	six <- mchr==as.character(this$chr)
+        	plot(mpos[six],sval[six],
+            	pch=16,
+            	cex=2,
+            	cex.lab=3,
+            	#mar=c(0.1,0.1,0.1,0.1),
+            	main = "",
+		    	xlab = "Allelic imbalance",
+		    	ylab = "",
+		    	col = '#00000010',
+		    	xlim = c(0,this$length),
+		    	ylim = c(0,1),
+		    	yaxt="n",
+        		xaxt="n")
+        	axis(1,cex.axis=2)
+        	axis(2,cex.axis=2)
+    	dev.off()
+    	}
+	}
\ No newline at end of file

Added: pkg/patchworkCG/R/CG_KaChCN.r
===================================================================
--- pkg/patchworkCG/R/CG_KaChCN.r	                        (rev 0)
+++ pkg/patchworkCG/R/CG_KaChCN.r	2012-01-10 16:12:55 UTC (rev 65)
@@ -0,0 +1,134 @@
+CG_KaChCN <- function(chr,start,end,int,ai,Cn,mCn,mchr,mpos,mval,
+								sval,name='',xlim=c(0,2.4),ylim=c(0,1),
+								maxCn=8)  
+	{   
+    
+    #packagepath = system.file(package="patchwork")
+ 	
+    #load(paste(packagepath,"/data/ideogram.RData",sep=""))
+    
+    data(ideogram,package="patchworkCG")
+    
+    colors_p <- colorRampPalette(c("#6600FF","#9900CC"),space="rgb")
+    colors_q <- colorRampPalette(c("#CC0099","#CC0000"),space="rgb")
+
+    ai[is.na(ai)]=0
+    aix=ai!=0
+    chr=chr[aix]
+    start=start[aix]
+    end=end[aix]
+    int=int[aix]
+    ai=ai[aix]
+    Cn=Cn[aix]
+    mCn=mCn[aix]
+    pos <- (start+end)/2
+    length=end-start
+    
+    size=rep(1,length(chr))
+    size[length>2000000]=2
+    size[length>5000000]=3
+    size[length>10000000]=4
+       
+    for (c in 1:24) 
+    	{
+        this <- ideogram[ideogram$c==c,]
+        
+        png(paste(name,'_KaChCN_',this$chr,'.png',sep=''),width=1080,height=1300)
+        	layout(matrix(1:4,nrow=4,byrow=T), widths=10,heights=c(10,5,5,5))
+        	
+        	mar.default <- c(5,4,4,2) + 0.1
+			par(mar = mar.default + c(0, 4, 0, 0)) 
+        	
+        	ix <- chr==this$chr
+        	col <- rep('#B0B0B070',length(chr))       
+        	col[ix & (pos < this$mid)] <- paste(colors_p(sum(ix & (pos < this$mid))), '70', sep='')  
+        	col[ix & (pos > this$mid)] <- paste(colors_q(sum(ix & (pos > this$mid))), '70', sep='') 
+        
+        	plot(c(int[!ix],int[ix]),c(ai[!ix],ai[ix]),
+            	pch=16,
+            	cex=c(size[!ix],size[ix]),
+            	cex.lab=3,
+            	#mar=c(0.1,0.1,0.1,0.1),
+    	    	main = "",
+		    	xlab = "Total intensity -->",
+		    	#ylab = "Allelic imbalance -->",
+		    	ylab = '',
+		    	col = c(col[!ix],col[ix]),
+		    	xlim = xlim,
+		    	ylim = ylim,
+		    	yaxt="n",
+        		xaxt="n")
+        	axis(1,cex.axis=2)
+        	axis(2,cex.axis=2)
+        	mtext(text="Allelic imbalance -->",side=2,line=4,las=3,cex=2)
+		    	
+			col=rep('#000000',sum(ix))
+        	col[pos[ix] < this$mid] <- colors_p(sum(pos[ix] < this$mid))
+        	col[pos[ix] > this$mid] <- colors_q(sum(pos[ix] > this$mid))
+        	par(new=F)
+        	
+        	plot(1,1,type='n',
+            	xlab='Total and minor copy number',
+            	ylab='',
+            	cex.lab=3,
+            	xlim = c(0,this$length),
+            	ylim = c(-0.1,8.1),
+		    	yaxt="n",
+        		xaxt="n")
+        	axis(1,cex.axis=2)
+        	axis(2,cex.axis=2)
+            	
+        	segments(x0=start[ix],x1=end[ix],
+         	   y0=Cn[ix],y1=Cn[ix],                
+            	col=col,
+            	lwd=5,)   #Om fel, prova ta bort detta komma
+            	
+        	segments(x0=start[ix],x1=end[ix],
+            	y0=mCn[ix],y1=mCn[ix],                
+            	col='#BBBBBB',
+            	lwd=5,) #Om fel, prova ta bort detta komma
+ 
+        	par(new=F)
+        	mix <- as.character(mchr)==this$chr        
+        	plot(mpos[mix],mval[mix],
+            	pch=16,
+            	cex=2,
+            	cex.lab=3,
+            	#mar=c(0.1,0.1,0.1,0.1),
+    	    	main = "",
+		    	xlab = "Total intensity",
+		    	ylab = "",
+		    	col = '#00000010',
+		    	xlim = c(0,this$length),
+		    	ylim = xlim,
+		    	yaxt="n",
+        		xaxt="n")
+        	axis(1,cex.axis=2)
+        	axis(2,cex.axis=2)
+		    	
+        	segments(x0=start[ix],x1=end[ix],
+            	y0=int[ix],y1=int[ix],                
+            	col=col,
+            	lwd=4,)
+            	
+        	par(new=F)       
+        	six <- as.character(mchr)==this$chr
+        	
+        	plot(mpos[six],sval[six],
+        	    pch=16,
+            	cex=2,
+            	cex.lab=3,
+            	#mar=c(0.1,0.1,0.1,0.1),
+            	main = "",
+		    	xlab = "Allelic imbalance",
+		    	ylab = "",
+		    	col = '#00000010',
+		    	xlim = c(0,this$length),
+		    	ylim = c(0,1),
+		    	yaxt="n",
+        		xaxt="n")
+        	axis(1,cex.axis=2)
+        	axis(2,cex.axis=2)
+        dev.off()
+    	}
+	}
\ No newline at end of file

Added: pkg/patchworkCG/R/CG_Ka_check.r
===================================================================
--- pkg/patchworkCG/R/CG_Ka_check.r	                        (rev 0)
+++ pkg/patchworkCG/R/CG_Ka_check.r	2012-01-10 16:12:55 UTC (rev 65)
@@ -0,0 +1,66 @@
+CG_Ka_check<- function(chr,start,end,int,ai,Cn,mCn,t,name='',xlim=c(0,2.4),ylim=c(0,1))
+	{
+	## TAPS scatter plot of a full sample, used for visual quality control. 
+    png(paste(name,'_Ka_check.png',sep=''),width=1300,height=1300)
+ 
+
+    	colors_p <- colorRampPalette(c("#6600FF","#9900CC"),space="rgb")
+    	colors_q <- colorRampPalette(c("#CC0099","#CC0000"),space="rgb")
+
+    	#layout(matrix(1:25,nrow=5,byrow=T), widths=1,heights=1)
+    
+    	aix=!(is.na(mCn)|is.na(Cn))
+    	chr=chr[aix]
+    	start=start[aix]
+    	end=end[aix]
+    	int=int[aix]
+    	ai=ai[aix]
+    	Cn=Cn[aix]
+    	mCn=mCn[aix]
+    	pos <- (start+end)/2
+    	length=end-start
+    	labels=paste(Cn,mCn,sep='m')
+    
+    	size=rep(1,length(chr))
+    	size[length>2000000]=2
+    	size[length>5000000]=3
+    	size[length>10000000]=4
+    	
+		## The variants present in this data
+    	variants <- unique(labels)
+    	variants_data <- NULL
+    	
+    	## Extracts the Log-R and Allelic Imbalance Ratio of variants
+    	for (v in variants) 
+    		{ 
+        	t_cn <- paste('cn',strsplit(v,'m')[[1]][1],sep='')
+        	t_int <- as.numeric(t$int[t_cn][[1]])
+        	t_ai <- as.numeric(t$ai[paste('cn',v,sep='')][[1]])
+        	variants_data <- rbind(variants_data,c(t_int,t_ai))
+    		}
+  
+    	col <- rep('#B0B0B070',length(chr))
+         
+   	 	plot(int,ai,
+        	pch=16,
+        	cex=c(size),
+        	cex.lab=3,
+        	mar=c(0.1,0.1,0.1,0.1),
+    		main = "",
+			xlab = name,
+			ylab = "",
+			col = col,
+			xlim = xlim,
+			ylim = ylim,
+			yaxt="n",
+        	xaxt="n")
+        	axis(1,cex.axis=2)
+        	axis(2,cex.axis=2)
+        	
+		text(variants_data[,1],variants_data[,2],
+        	labels=variants,
+        	col='black',
+        	cex=2)
+    
+    dev.off()
+	}

Added: pkg/patchworkCG/R/CG_karyotype.r
===================================================================
--- pkg/patchworkCG/R/CG_karyotype.r	                        (rev 0)
+++ pkg/patchworkCG/R/CG_karyotype.r	2012-01-10 16:12:55 UTC (rev 65)
@@ -0,0 +1,69 @@
+CG_karyotype <- function(chr,start,end,int,ai,
+						name='',xlim=c(0,2.4),ylim=c(0.3,1)) 
+	{   
+    png(paste(name,'_karyotype.png',sep=''),width=1300,height=1300)
+ 
+    	#packagepath = system.file(package="patchwork")
+ 	
+    	#load(paste(packagepath,"/data/ideogram.RData",sep=""))
+    	
+    	data(ideogram,package="patchworkCG")
+    	
+		colors_p <- colorRampPalette(c("#6600FF","#9900CC"),space="rgb")
+    	colors_q <- colorRampPalette(c("#CC0099","#CC0000"),space="rgb")
+
+    	layout(matrix(1:25,nrow=5,byrow=T), widths=1,heights=1)
+    	
+    	aix=ai!=0
+    	chr=chr[aix]
+    	start=start[aix]
+    	end=end[aix]
+    	int=int[aix]
+    	ai=ai[aix]
+    	pos <- (start+end)/2
+    	length=end-start
+    
+    	size=rep(0.5,length(chr))
+    	size[length>2000000]=1
+    	size[length>5000000]=2
+    	size[length>10000000]=3
+ 
+    
+    	for (c in 1:24) 
+    		{
+
+        	this <- ideogram[ideogram$c==c,]
+        	ix <- chr==this$chr
+        	ix[is.na(ix)]=FALSE
+
+        	col <- rep('#B0B0B070',length(chr))
+        
+        	col[ix & (pos < this$mid)] <- paste(colors_p(sum(ix & (pos < this$mid))), '70', sep='')  
+        	col[ix & (pos > this$mid)] <- paste(colors_q(sum(ix & (pos > this$mid))), '70', sep='')
+ 
+        	plot(c(int[!ix],int[ix]),c(ai[!ix],ai[ix]),
+        		pch=16,
+        		cex=c(size[!ix],size[ix]),
+        		cex.lab=3,
+        		mar=c(0.1,0.1,0.1,0.1),
+				main = "",
+				xlab = this$chr,
+				ylab = "",
+				col = c(col[!ix],col[ix]),
+				xlim = xlim,
+				ylim = ylim,
+		    	yaxt="n",
+        		xaxt="n")
+        	axis(1,cex.axis=1.5)
+        	axis(2,cex.axis=1.5)
+			par(new=F)  
+    		}
+    dev.off()
+	}
+
+
+    
+    
+
+
+

Added: pkg/patchworkCG/R/patchwork.AI.r
===================================================================
--- pkg/patchworkCG/R/patchwork.AI.r	                        (rev 0)
+++ pkg/patchworkCG/R/patchwork.AI.r	2012-01-10 16:12:55 UTC (rev 65)
@@ -0,0 +1,20 @@
+patchwork.AI <- function(segs,mastervar)
+	{
+	segs$ai <- segs$snvs <- segs$ratio <- NA
+	
+	for (i in 1:nrow(segs))
+		{
+		#Calculate ratio between relcov and segment lengths
+		segs$ratio[i] = segs$relcov[i] / (sum(as.numeric(segs$relcov*(segs$end-segs$start))) / sum(as.numeric(segs$end - segs$start)))
+		
+		#Index of correct snp positions to pull snp data from
+		#                right chromosome
+		ix = mastervar$chr==as.character(segs$chr[i]) &
+		#    right area (between start and end)
+			(mastervar$begin > segs$start[i] & mastervar$begin < segs$end[i])
+		segs$snvs[i] = sum(ix)
+		segs$ai[i] = 1 - sum(mastervar$min[ix]) / sum(mastervar$max[ix])
+		}
+	segs$ai[is.nan(segs$ai)]=0
+	return(segs)
+	}
\ No newline at end of file

Added: pkg/patchworkCG/R/patchwork.CG.r
===================================================================
--- pkg/patchworkCG/R/patchwork.CG.r	                        (rev 0)
+++ pkg/patchworkCG/R/patchwork.CG.r	2012-01-10 16:12:55 UTC (rev 65)
@@ -0,0 +1,135 @@
+patchwork.CG <- function(path,name='CG_sample_',override_file_input = FALSE)
+	{
+	
+	#------------------------------------------------#
+	# Read input files
+	#------------------------------------------------#
+	
+	if(override_file_input==TRUE)
+		{
+		#Prompt for input of path to your files.
+		cat("ex: ~/user/documents/projekt/masterVarBeta-123-456-ASM-T1-N1.tsv")
+		cat("\n Enter complete path AND filename to your masterVarBeta file:")
+		mv = scan(n=1)
+		cat("\n ex: ~/user/documents/projekt/somaticCnvSegmentsNondiploidBeta-123-456-ASM-T1-N1.tsv")
+		cat("\n Enter complete path AND filename to your somaticCnvSegmentsNondiploidBeta file:")
+		sm = scan(n=1)
+		
+		cat("\n Reading files from ASM folder \n")
+		
+		#Read masterVarBeta
+		mastervar = read.table(mv, sep="\t",skip=21,
+						colClasses=c("NULL","NULL","character","integer","integer",
+						"NULL","character",rep("NULL",16),"numeric","numeric",
+						rep("NULL",17),"integer","integer",rep("NULL",5)))
+						
+		#Read somaticCnvSegmentsNondiploidBeta in ASM/CNV folder
+		segm = read.table(sm,sep="\t",skip=21,
+				colClasses=c("character","integer","integer","numeric","numeric",
+				rep("NULL",7)))
+				
+		cat("File input complete \n")
+		}
+	else
+		{
+		mastervar_ = list.files(path=path,pattern="masterVar")
+		path_segm = paste(path,"/CNV",sep="")
+		segm_ = list.files(path=path_segm,pattern="somaticCnvSegmentsNondiploid")
+	
+		cat("\n Reading files from ASM folder \n")
+	
+		#Read mastervarbeta file in ASM folder
+		mastervar = read.table(paste(path,"/",mastervar_,sep=""),
+							sep="\t",
+							colClasses=c("NULL","NULL","character","integer","integer",
+							"NULL","character",rep("NULL",16),"numeric","numeric",
+							rep("NULL",17),"integer","integer",rep("NULL",5)),
+							skip=21)
+	
+		#Read somaticCnvSegmentsNondiploidBeta in ASM/CNV folder
+		segm = read.table(paste(path_segm,"/",segm_,sep=""),
+					sep="\t",skip=21,
+					colClasses=c("character","integer","integer","numeric","numeric",
+					rep("NULL",7)))
+
+		colnames(segm)=c("chr","start","end","avgnormcov","relcov")
+							
+		colnames(mastervar)=c("chr","begin","end","vartype","ref_count","tot_count",
+									"ref_countN","tot_countN")
+	
+		cat("File input complete \n")
+		}
+	
+	#------------------------------------------------#
+	# Filter out unecessary/unwanted values
+	#------------------------------------------------#
+	
+	#Remove all rows that have NA values in column 8.
+	mastervar = mastervar[!is.na(mastervar[,8]),]
+	
+	#Only keep when referenceallelereadcount/totalreadcount is between 0.2 and 0.8
+	ix = ((mastervar$ref_countN/mastervar$tot_countN) > 0.2 &
+		(mastervar$ref_countN/mastervar$tot_countN) < 0.8)
+		
+	mastervar = mastervar[ix,]
+	
+	#Remove NA values generated by for example 0/0
+	mastervar = mastervar[!is.na(mastervar[,8]),]
+
+	#only keep stuff in column 4 that is snp
+	#mastervar = mastervar[mastervar$vartype == "snp",]
+	#norm_mv = norm_mv[norm_mv$vartype == "snp",]
+	
+	#remove all NA values. (there are no reads on that position)
+	#mastervar = mastervar[!is.na(mastervar[,6]),]
+
+	#ix = mastervar$ref_count < 3 | mastervar$ref_count == 0 | mastervar$ref_count == mastervar$tot_count
+	#mastervar = mastervar[!ix,]
+	
+	#ix = mastervar$ref_count == 0
+	#mastervar = mastervar[!ix,]
+	
+	#ix = mastervar$ref_count == mastervar$tot_count
+	#mastervar = mastervar[!ix,]
+	
+	#------------------------------------------------#
+	# Add and calculate some columns for mastervar
+	#------------------------------------------------#
+	
+	cat("Performing calculations \n")
+	
+	mastervar$ratio <- mastervar$min <- mastervar$max <- mastervar$mut_count <- NA
+
+	#Calculate and fill min/max columns
+	mastervar$mut_count = mastervar$tot_count - mastervar$ref_count
+	mastervar$max = apply(mastervar[,c(5,9)],1,max)
+	mastervar$min = mastervar$tot_count - mastervar$max
+	mastervar$ratio = mastervar$tot_count / mean(mastervar$tot_count)
+	
+	#------------------------------------------------#
+	# Create segs using patchwork.AI, save files
+	#------------------------------------------------#
+	
+	segs = patchwork.AI(segm,mastervar)
+	
+	cat("Calculations complete \n")
+	
+	cat("Saving objects to CG.Rdata \n")
+	
+	save(segs,mastervar,file="CG.Rdata")
+	
+	#------------------------------------------------#
+	# Plot
+	#------------------------------------------------#
+	
+	
+	cat("Initiating Plotting \n")
+	CG_karyotype(segs$chr,segs$start,segs$end,segs$ratio,segs$ai,
+					name=name,xlim=c(0,2.4),ylim=c(0,1))
+	
+	CG_KaCh(segs$chr,segs$start,segs$end,segs$ratio,segs$ai,
+					mastervar$chr,mastervar$begin,mastervar$ratio,(1 - mastervar$min/mastervar$max),
+					name=name,xlim=c(0,2.4),ylim=c(0,1))
+	cat("Plotting Complete \n")
+	cat("patchwork.CG Complete \n Please read documentation on running patchwork.CGCNV \n")
+	}
\ No newline at end of file

Added: pkg/patchworkCG/R/patchwork.CGCNV.r
===================================================================
--- pkg/patchworkCG/R/patchwork.CGCNV.r	                        (rev 0)
+++ pkg/patchworkCG/R/patchwork.CGCNV.r	2012-01-10 16:12:55 UTC (rev 65)
@@ -0,0 +1,284 @@
+patchwork.CGCNV = function(cn2,delta,het,hom,maxCn=8,ceiling=1,name="CGCNV_")
+	{
+	
+	#packagepath = system.file(package="patchwork")
+	#load(paste(packagepath,"/data/commonSnps132.RData",sep=""))
+	#load(paste(packagepath,"/data/ideogram.RData",sep=""))
+	data(ideogram,package="patchworkCG")
+	load("CG.Rdata")
+
+	voidchrom <- c('chrX','chrY') # may add non-integer chroms here....
+
+    tix=NULL     #temporary index
+    int=NULL     ## contains coverage estimate of each (total) copy number
+    ai=NULL         ## contains Allelic Imbalance Ratio estimate of each copy number variant.
+    regions <- segs
+    regions$np <- round((regions$end - regions$start)/10000)
+    regions <- regions[(is.autosome(regions$chr)&regions$np>50)&(!is.nan(regions$ai)),] ## will use these regions
+    regions <- regions[!is.na(regions$ratio),]
+
+    ## likely cn2 regions sit within delta/3 from cn2.
+    expectedAt <- cn2
+    tix$cn2 <- abs(regions$ratio - expectedAt) < (delta/3)    ## index of likely cn2 regions
+    temp <- regions[tix$cn2,]                                ## cn2 regions
+    med <- weightedMedian(temp$median,temp$np)            ## improved value of Log-R at cn2 (returns NULL if theres nothing there)
+    int$cn2 <- ifelse(!is.null(med),med,expectedAt)            ## saved to int.
+
+    ## likely cn1 regions sit at about cn2 - delta:
+    d <- delta                                                 ## the (Log-R) distance to cn1
+    expectedAt <- int$cn2-d                                     ## cn1 is expected here
+    tix$cn1 <- abs(regions$ratio - expectedAt) < (d/3)        ## index of likely cn1 regions
+    temp <- regions[tix$cn1,]
+    med <- weightedMedian(temp$median,temp$np)
+    int$cn1 <- ifelse(!is.null(med),med,expectedAt)
+
+    ## likely cn0 regions sit below cn1 - delta:
+    d <- int$cn2-int$cn1
+    expectedAt <- int$cn1-d                                     
+    tix$cn0 <- regions$ratio < expectedAt
+    temp <- regions[tix$cn0,]
+    med <- weightedMedian(temp$median,temp$np)
+    int$cn0 <- ifelse(!is.null(med),med,expectedAt)
+
+    ## likely cn3 regions sit at about cn2+delta
+    d <- delta
+    expectedAt <- int$cn2+d
+    tix$cn3 <- abs(regions$ratio - expectedAt) < (d/3)
+    temp <- regions[tix$cn3,]
+    med <- weightedMedian(temp$median,temp$np)
+    int$cn3 <- ifelse(!is.null(med),med,expectedAt)
+
+    ## cn4 follows at ...
+    d <- delta
+    expectedAt <- int$cn3+d
+    tix$cn4 <- abs(regions$ratio - expectedAt) < (d/4)
+    temp <- regions[tix$cn4,]
+    med <- weightedMedian(temp$median,temp$np)
+    int$cn4 <- ifelse(!is.null(med),med,expectedAt)
+
+    ## generalized for higher cns
+    for (cn in 5:maxCn)
+    	{
+    	thisCn <- paste('cn',cn,sep='')
+    	prevCn <- paste('cn',cn-1,sep='')
+    	pprevCn <- paste('cn',cn-2,sep='')
+    	d <- delta #dmin*(int[prevCn][[1]]-int[pprevCn][[1]])
+    	expectedAt <- int[prevCn][[1]]+d
+    	tix[[thisCn]] <- abs(regions$ratio - expectedAt) < (d/5)
+    	temp <- regions[tix[thisCn][[1]],]
+    	med <- weightedMedian(temp$median,temp$np)
+    	int[thisCn] <- ifelse(!is.null(med),med,expectedAt)
+    	}
+
+	#  smooth the cn relationship ?
+
+    ## at cn2, find the variant clusters (normal and CNNLOH)
+    ix <- (abs(regions$ratio - int$cn2) < 0.2*(int$cn3-int$cn2) ) # taking only closely-matching segments
+    data <- regions[ix,]
+    data <- data[!is.na(data$ai),] # ...with a calculated allelic imbalance.
+
+    expectedAt <- het
+    ix <- 2*abs(data$ai-het) < abs(data$ai-hom)
+    med <- weightedMedian(data$ai[ix],data$snvs[ix]) 
+    ai$cn2m1 <- ifelse (!is.null(med),med,expectedAt)
+
+    expectedAt <- hom
+    ix <- 2*abs(data$ai-hom) < abs(data$ai-het)
+    med <- weightedMedian(data$ai[ix],data$snvs[ix]) 
+    ai$cn2m0 <- ifelse (!is.null(med),med,expectedAt)
+
+    ## for cn1 (and 0)
+    ix <- (abs(regions$ratio - int$cn1) < 0.2*(int$cn2-int$cn1) )
+    data <- regions[ix,]
+    data <- data[!is.na(data$ai),]
+    expectedAt <- (ai$cn2m0+ai$cn2m1)*3/5     ## Decent estimate.
+    med <- weightedMedian(data$ai,data$snvs) ## Average allelic imbalance weighted on snp count
+    ai$cn1m0 <- ifelse (!is.null(med),med,expectedAt)    ## Will be NA if there was no CNNLOH
+    ai$cn0m0 <- NA #unimportant
+
+    ## for cn3:
+    ix <- (abs(regions$ratio - int$cn3) < 0.2*(int$cn4-int$cn3) )
+    data <- regions[ix,]
+    data <- data[!is.na(data$ai),]
+  
+    range <- ai$cn2m0-ai$cn2m1 #the distance between 2normal and CNNLOH
+    # get the 3(1) regions: 
+    expectedAt <- ai$cn2m1+range/3 # this is an approx if cn3m1 is absent.
+
+    ix <- (data$ai<ai$cn2m0) & (data$ai>ai$cn2m1) # take regions with less AI than cn2m0 but more than cn2m1
+    med <- weightedMedian(data$ai[ix],data$snvs[ix]) # average allelic imbalance weighted on snp count
+    ai$cn3m1 <- ifelse (!is.null(med),med,expectedAt)
+
+    # now for cn3m0
+    expectedAt <- ai$cn3m1 + range # approx of cn3m0
+    ix <- (abs(data$ai-expectedAt) < (expectedAt-ai$cn3m1)/4)  # take those much closer to exp than to cn3m1
+    med <- weightedMedian(data$ai[ix],data$snvs[ix]) 
+    try (if (med<ai$cn2m0) med <- NULL, silent=T)    ## in case of heterogeneities or strange signals, we disallow this effect.
+    ai$cn3m0 <- ifelse (!is.null(med),med,expectedAt)
+
+    if (is.na(ai$cn1m0)) ai$cn1m0 <- mean(ai$cn3m1,ai$cn2m0) ## If deletions were missing, place an estimate from cn3 
+    
+    ## now for cn4
+    ix <- abs(regions$ratio - int$cn4) < 0.2*(int$cn4-int$cn3) 
+    data <- regions[ix,]
+    data <- data[!is.na(data$ai),]
+
+    # get the 4(2) regions: they are at AI about cn2m1
+    expectedAt <- ai$cn2m1 # this is a good approx 
+
+    ix <- data$ai<ai$cn3m1 # let all below cn3m1 in
+    med <- weightedMedian(data$ai[ix],data$snvs[ix]) 
+    ai$cn4m2 <- ifelse (!is.null(med),med,expectedAt)
+    if (ai$cn4m2>ai$cn2m1) ai$cn4m2 <- ai$cn2m1 # sanity check.
+
+    data <- data[!ix,] # coutinue with the remaining
+    # now 4(1) has less ai than 3(0) -> sits at about cn3m1+(cn3m1-cn2m1).
+    expectedAt <- ai$cn3m1+(ai$cn3m1-ai$cn2m1)
+    ix <- abs(data$ai-expectedAt)<abs(data$ai-ai$cn3m0) # take those closer to exp than to 3(0)
+    med <- weightedMedian(data$ai[ix],data$snvs[ix]) 
+    ai$cn4m1 <- ifelse (!is.null(med),med,expectedAt)
+
+    # now 4(0) is at about 3(0) + [0.9 - 3(0)] * [3(0)-2(0)] / [0.9-2(0)] 
+    expectedAt <- ai$cn3m0 + (ceiling-ai$cn3m0)*(ai$cn3m0-ai$cn2m0)/(ceiling-ai$cn2m0)
+    ix <- abs(data$ai-expectedAt) < 0.2*(expectedAt-ai$cn4m2) # we take those very close to exp
+    med <- weightedMedian(data$ai[ix],data$snvs[ix])
+     try (if (med<ai$cn3m0) med <- NULL, silent=T)    ## in case of heterogeneities or strange signals, we disallow this effect.
+    ai$cn4m0 <- ifelse (!is.null(med),med,expectedAt)
+
+    ## generalization for higher copy numbers. it gets complicated.
+    for (cn in 5:maxCn) {
+    thisCn <- paste('cn',cn,sep='')
+    prevCn <- paste('cn',cn-1,sep='')
+    pprevCn <- paste('cn',cn-2,sep='')
+    
+    ix <- (abs(regions$ratio - int[thisCn][[1]]) < 0.2*(int[thisCn][[1]]-int[prevCn][[1]]) )
+    data <- regions[ix,]
+    data <- data[!is.na(data$ai),]
+    data <- data[data$np>50,] # long regions for safety
+
+    ## try to find variants, starting with LOH
+    # LOH such as 5(0)
+    m <- 0
+    thisVariant=paste(thisCn,'m',0,sep='')
+    c4m0 <- ai[paste(prevCn,'m',m,sep='')][[1]] # relative naming for clarity
+    c3m0 <- ai[paste(pprevCn,'m',m,sep='')][[1]]
+    
+    expectedAt <- ceiling-((ceiling-c4m0)*(ceiling-max(c4m0,c3m0))/(ceiling-min(c3m0,c4m0)))
+    #ai[thisVariant] <- expectedAt
+    # we then take those close to exp (within delta[3(0),4(0)])
+    ix <- abs(data$ai-expectedAt) < abs(c4m0 - c3m0) 
+    med <- weightedMedian(data$ai[ix],data$snvs[ix]) 
+    try (if (med<ai$cn4m0) med <- NULL, silent=T)    ## in case of heterogeneities or strange signals, we disallow this effect.
+    ai[thisVariant] <- ifelse (!is.null(med),med,expectedAt)
+    
+    ## then from balanced to less balanced
+    minorVariants=trunc(cn/2):1 
+    first <- T
+    for (m in minorVariants) {
+        thisVariant=paste(thisCn,'m',m,sep='')
+        if (m==cn/2) {
+        # We have balanced variant, so rather easy.
+        expectedAt <- ai[paste(pprevCn,'m',m-1,sep='')][[1]] # this is a good approx, balanced at cn-2
+        ix <- data$ai<ai[paste(prevCn,'m',m-1,sep='')][[1]] # let all below (cn-1, mcn-1) in
+        med <- weightedMedian(data$ai[ix],data$snvs[ix]) 
+        ai[thisVariant] <- ifelse (!is.null(med),med,expectedAt)
+	if (ai[thisVariant] > ai[paste(pprevCn,'m',m-1,sep='')][[1]]) ai[thisVariant] <- ai[paste(pprevCn,'m',m-1,sep='')][[1]] # sanity check.
+        first <- F
+        } else if (first) { 
+        # its not balanced but its the most balanced of the unbalanced. something like 5(2)
+        expectedAt <- 0.5*( ai[paste(prevCn,'m',m,sep='')][[1]] + ai[paste(pprevCn,'m',m-1,sep='')][[1]] ) # that means between 4(2) and 3(1)
+        ix <- abs(data$ai-expectedAt) < ( ai[paste(prevCn,'m',m,sep='')][[1]] - ai[paste(pprevCn,'m',m-1,sep='')][[1]] ) /3 # let all "between" 4(2) and 3(1) in
+        med <- weightedMedian(data$ai[ix],data$snvs[ix]) 
+        ai[thisVariant] <- ifelse (!is.null(med),med,expectedAt)
+        first <- F
+        } else {
+        # not the most balanced unbalanced variant, for example 5(1):
+        expectedAt <- ai[paste(thisCn,'m',minorVariants[1],sep='')][[1]] + (minorVariants[1]-m) * (ai[paste(thisCn,'m',0,sep='')][[1]] - ai[paste(thisCn,'m',minorVariants[1],sep='')][[1]]) / trunc(cn/2) # 5(2) + (which)* 5(0)-5(2) /(n)
+        ix <- abs(data$ai-expectedAt) < ( ai[paste(prevCn,'m',m,sep='')][[1]] - ai[paste(pprevCn,'m',m-1,sep='')][[1]] ) /3 # let all "between" 4(1) and 3(0) in
+        med <- weightedMedian(data$ai[ix],data$snvs[ix]) 
+        ai[thisVariant] <- ifelse (!is.null(med),med,expectedAt)
+        } 
+    } # done with minor variants
+    } # done with copy numbers
+
+    sampleInfo <- list('int'=int,'ai'=ai)
+
+	##  Assign total and minor copy numbers to all segments.
+    regions <- segs[!is.na(segs$ratio),]    ## This time, work on all segments available.
+
+    Cn <- NULL            ## Total copy number
+    mCn <- NULL            ## Minor allele copy number
+    fullCN <- NULL        ## Variant label. ('cnXmY')
+
+    intDist <- NULL        ## distance to certain Log-R
+    imbaDist <- NULL    ## distance to certain allelic imbalance
+
+	
+
+    ## give each segment the best matching CN and mCN
+    for (i in 1:nrow(regions)) 
+    	{
+   	 	# set total copy number
+    	distance <- Inf
+    	for (cn in 0:maxCn) 
+    		{
+        	t_int <- int[paste('cn',cn,sep='')][[1]]    ## get Log-R of particular cn from 'int'
+        	t_dis <- abs(regions$ratio[i]-t_int)            ## distance to that particular cn
+        	if (t_dis < distance) 
+        		{                        ## nearest so far, save.
+        		distance <- t_dis -> intDist[i]
+        		Cn[i] <- cn
+        		}
+    		}
+    	}
+    # Y makes CN0 sit very low, this is a fix on non-Y Cn0
+    #if ((Cn[i] == 1)&(intDist[i] > (int$cn2-int$cn1))) Cn[i] <- 0 ## currently not needed.
+	for (i in 1:nrow(regions)) 
+		{
+   		# set minor CN
+    	distance <- Inf
+    	if (Cn[i]<=1)
+    		{
+        	mCn[i] <- 0
+    		} 
+    	else if (!is.na(regions$ai[i])) 
+    		if (!is.nan(regions$ai[i])) 
+    			for (m in 0:trunc(Cn[i]/2)) 
+    				{
+        			t_ai <- ai[paste('cn',Cn[i],'m',m,sep='')][[1]]
+        			t_dis <- abs(regions$ai[i]-t_ai)
+        			if (t_dis < distance) 
+        				{
+        				distance <- t_dis -> imbaDist[i]
+        				mCn[i] <- m
+        				}
+    				}
+    		else 
+    			mCn[i] <- NA
+    			fullCN[i] <- paste('cn',Cn[i],'m',mCn[i],sep='') # Full description
+  		}
+ 
+    regions$Cn <- Cn
+    regions$mCn <- mCn
+    regions$fullCN <- fullCN
+
+	write.csv(regions,file='Copynumbers.csv')
+	
+	meanCn <- weightedMean(regions$Cn,regions$np)
+	ix1 <- regions$Cn==trunc(meanCn)
+	ix2 <- regions$Cn==ceiling(meanCn)
+	xdelta <- weightedMean(regions$ratio[ix2],regions$np[ix2]) - weightedMean(regions$ratio[ix1],regions$np[ix1])
+	expected_delta <- 1/meanCn
+	
+	tumor_percent <- xdelta / expected_delta
+
+	CG_Ka_check(regions$chr,regions$start,regions$end,regions$ratio,regions$ai,
+					regions$Cn,regions$mCn,list(int=int,ai=ai),name=name,
+					xlim=c(0,2.4),ylim=c(0,1))
+					
+	CG_KaChCN(regions$chr,regions$start,regions$end,regions$ratio,regions$ai,
+						regions$Cn,regions$mCn,mastervar$chr,
+						mastervar$begin,mastervar$ratio,
+						(1 - mastervar$min/mastervar$max),name=name,xlim=c(0,2.4),
+						ylim=c(0,1))
+	}
\ No newline at end of file

Added: pkg/patchworkCG/R/subfunctions_CGCNV.r
===================================================================
--- pkg/patchworkCG/R/subfunctions_CGCNV.r	                        (rev 0)
+++ pkg/patchworkCG/R/subfunctions_CGCNV.r	2012-01-10 16:12:55 UTC (rev 65)
@@ -0,0 +1,50 @@
+###################### functions
+weightedMedian <- function(data,weights) {
+    ix <- rep(T,length(data))
+    if (length(ix)==0) return(NULL)
+    try( ix <- !is.na(data+weights) & !is.nan(data+weights), silent=T)
+    try( return (median(rep(data[ix],ceiling(weights[ix])))), silent=T)
+    return (NULL)
+}
+weightedMean <- function(data,weights) {
+    ix <- rep(T,length(data))
+    if (length(ix)==0) return(NULL)
+    try( ix <- !is.na(data+weights) & !is.nan(data+weights), silent=T)
+    try( return (mean(rep(data[ix],ceiling(weights[ix])))), silent=T)
+    return (NULL)
+}
+is.autosome <- function(vector) {
+    temp <- deChrom_ucsc(vector)
+    return (temp<23)
+}
+deChrom_ucsc <- function(data) {
+##  chroms in 1:24 and chr1:chrY format conversion
+if (length(data)==0) return (data)
+keep_index=NULL
+CHR=rep(0,length(data))
+existing=unique(data)
+for (i in 1:length(existing))   {
+  temp=strsplit(as.character(existing[i]),'_')
[TRUNCATED]

To get the complete diff run:
    svnlook diff /svnroot/patchwork -r 65


More information about the Patchwork-commits mailing list