[Patchwork-commits] r35 - in pkg: . patchwork patchwork/R patchwork/inst patchwork/inst/perl patchwork/inst/python patchwork/inst/python/pysam patchwork/man

noreply at r-forge.r-project.org noreply at r-forge.r-project.org
Wed Nov 30 17:32:01 CET 2011


Author: sebastian_d
Date: 2011-11-30 17:32:00 +0100 (Wed, 30 Nov 2011)
New Revision: 35

Added:
   pkg/patchwork/
   pkg/patchwork/._.DS_Store
   pkg/patchwork/DESCRIPTION
   pkg/patchwork/NAMESPACE
   pkg/patchwork/R/
   pkg/patchwork/R/karyotype.r
   pkg/patchwork/R/karyotype_check.r
   pkg/patchwork/R/karyotype_chroms.r
   pkg/patchwork/R/karyotype_chromsCN.r
   pkg/patchwork/R/patchwork.CNA.r
   pkg/patchwork/R/patchwork.GCNorm.r
   pkg/patchwork/R/patchwork.Medians_n_AI.r
   pkg/patchwork/R/patchwork.alleledata.r
   pkg/patchwork/R/patchwork.applyref.r
   pkg/patchwork/R/patchwork.createreference.r
   pkg/patchwork/R/patchwork.findCNs.r
   pkg/patchwork/R/patchwork.readChroms.r
   pkg/patchwork/R/patchwork.segment.r
   pkg/patchwork/R/patchwork.smoothing.r
   pkg/patchwork/R/subfunctions_findCNs.r
   pkg/patchwork/inst/
   pkg/patchwork/inst/._.DS_Store
   pkg/patchwork/inst/perl/
   pkg/patchwork/inst/perl/pile2alleles.pl
   pkg/patchwork/inst/python/
   pkg/patchwork/inst/python/Pysamloader.py
   pkg/patchwork/inst/python/pysam/
   pkg/patchwork/inst/python/pysam/Pileup.py
   pkg/patchwork/inst/python/pysam/TabProxies.c
   pkg/patchwork/inst/python/pysam/TabProxies.pxd
   pkg/patchwork/inst/python/pysam/TabProxies.pyx
   pkg/patchwork/inst/python/pysam/VCF.py
   pkg/patchwork/inst/python/pysam/__init__.py
   pkg/patchwork/inst/python/pysam/csamtools.c
   pkg/patchwork/inst/python/pysam/csamtools.pxd
   pkg/patchwork/inst/python/pysam/csamtools.pyx
   pkg/patchwork/inst/python/pysam/ctabix.c
   pkg/patchwork/inst/python/pysam/ctabix.pxd
   pkg/patchwork/inst/python/pysam/ctabix.pyx
   pkg/patchwork/inst/python/pysam/cvcf.c
   pkg/patchwork/inst/python/pysam/cvcf.pxd
   pkg/patchwork/inst/python/pysam/cvcf.pyx
   pkg/patchwork/inst/python/pysam/namedtuple.py
   pkg/patchwork/inst/python/pysam/pysam_util.c
   pkg/patchwork/inst/python/pysam/pysam_util.h
   pkg/patchwork/inst/python/pysam/tabix_util.c
   pkg/patchwork/inst/python/pysam/version.py
   pkg/patchwork/man/
   pkg/patchwork/man/karyotype.Rd
   pkg/patchwork/man/karyotype_check.Rd
   pkg/patchwork/man/karyotype_chroms.Rd
   pkg/patchwork/man/karyotype_chromsCN.Rd
   pkg/patchwork/man/patchwork.CNA.Rd
   pkg/patchwork/man/patchwork.GCNorm.Rd
   pkg/patchwork/man/patchwork.Medians_n_AI.Rd
   pkg/patchwork/man/patchwork.alleledata.Rd
   pkg/patchwork/man/patchwork.applyref.Rd
   pkg/patchwork/man/patchwork.createreference.Rd
   pkg/patchwork/man/patchwork.findCNs.Rd
   pkg/patchwork/man/patchwork.readChroms.Rd
   pkg/patchwork/man/patchwork.segment.Rd
   pkg/patchwork/man/patchwork.smoothing.Rd
   pkg/patchwork/man/subfunctions_findCNs.Rd
Log:
Initial recommit post separation,,, again

Added: pkg/patchwork/._.DS_Store
===================================================================
(Binary files differ)


Property changes on: pkg/patchwork/._.DS_Store
___________________________________________________________________
Added: svn:mime-type
   + application/octet-stream

Added: pkg/patchwork/DESCRIPTION
===================================================================
--- pkg/patchwork/DESCRIPTION	                        (rev 0)
+++ pkg/patchwork/DESCRIPTION	2011-11-30 16:32:00 UTC (rev 35)
@@ -0,0 +1,13 @@
+Package: patchwork
+Type: Package
+Title: Allele-specific Copy Number Analysis of Whole genome data
+Version: 2.0
+Date: 2011-10-13
+Author: Markus Rasmussen, Sebastian DiLorenzo
+Maintainer: Markus Rasmussen <Markus.Rasmussen at medsci.uu.se>
+Description: Performs a copy number analysis of whole genome data.
+License: GPL-2
+LazyLoad: no
+#LazyData: yes
+Depends: DNAcopy, patchworkData
+Imports: DNAcopy, patchworkData

Added: pkg/patchwork/NAMESPACE
===================================================================
--- pkg/patchwork/NAMESPACE	                        (rev 0)
+++ pkg/patchwork/NAMESPACE	2011-11-30 16:32:00 UTC (rev 35)
@@ -0,0 +1,22 @@
+export(karyotype_check,
+		karyotype_chroms,
+		karyotype_chromsCN,
+		karyotype,
+		patchwork.alleledata,
+		patchwork.applyref,
+		patchwork.CNA,
+		patchwork.createreference,
+		patchwork.findCNs,
+		patchwork.GCNorm,
+		patchwork.Medians_n_AI,
+		patchwork.readChroms,
+		patchwork.segment,
+		patchwork.smoothing,
+		#subfunctions_findCNs
+		weightedMedian,
+		weightedMean,
+		is.autosome,
+		deChrom_ucsc,
+		chrom_ucsc)
+		
+imports(DNAcopy,patchworkData)
\ No newline at end of file

Added: pkg/patchwork/R/karyotype.r
===================================================================
--- pkg/patchwork/R/karyotype.r	                        (rev 0)
+++ pkg/patchwork/R/karyotype.r	2011-11-30 16:32:00 UTC (rev 35)
@@ -0,0 +1,74 @@
+karyotype <- function(chr,start,end,int,ai,chroms,
+						name='',xlim=c(-1.02,1.02),ylim=0:1) 
+	{   
+    png(paste(name,'.karyotype.png',sep=''),width=1300,height=1300)
+ 
+    	#packagepath = system.file(package="patchwork")
+ 	
+    	#load(paste(packagepath,"/data/ideogram.RData",sep=""))
+    	
+    	data(ideogram,package="patchworkData")
+    	
+    	#substitute ideogram chr names with chroms-object chr names
+    	# so it will work independent of chr field name.
+    
+    	ideogram$chr = chroms
+    	
+		colors_p <- colorRampPalette(c("#6600FF","#9900CC"),space="rgb")
+    	colors_q <- colorRampPalette(c("#CC0099","#CC0000"),space="rgb")
+
+    	layout(matrix(1:25,nrow=5,byrow=T), widths=1,heights=1)
+    	
+    	aix=ai!=0
+    	chr=chr[aix]
+    	start=start[aix]
+    	end=end[aix]
+    	int=int[aix]
+    	ai=ai[aix]
+    	pos <- (start+end)/2
+    	length=end-start
+    
+    	size=rep(0.5,length(chr))
+    	size[length>2000000]=1
+    	size[length>5000000]=2
+    	size[length>10000000]=3
+ 
+    
+    	for (c in chroms) 
+    		{
+
+        	this <- ideogram[ideogram$c==c,]
+        	ix <- chr==this$chr
+        	ix[is.na(ix)]=FALSE
+
+        	col <- rep('#B0B0B070',length(chr))
+        
+        	col[ix & (pos < this$mid)] <- paste(colors_p(sum(ix & (pos < this$mid))), '70', sep='')  
+        	col[ix & (pos > this$mid)] <- paste(colors_q(sum(ix & (pos > this$mid))), '70', sep='')
+ 
+        	plot(c(int[!ix],int[ix]),c(ai[!ix],ai[ix]),
+        		pch=16,
+        		cex=c(size[!ix],size[ix]),
+        		cex.lab=3,
+        		mar=c(0.1,0.1,0.1,0.1),
+				main = "",
+				xlab = this$chr,
+				ylab = "",
+				col = c(col[!ix],col[ix]),
+				xlim = xlim,
+				ylim = ylim,
+		    	yaxt="n",
+        		xaxt="n")
+        	axis(1,cex.axis=1.5)
+        	axis(2,cex.axis=1.5)
+			par(new=F)  
+    		}
+    dev.off()
+	}
+
+
+    
+    
+
+
+

Added: pkg/patchwork/R/karyotype_check.r
===================================================================
--- pkg/patchwork/R/karyotype_check.r	                        (rev 0)
+++ pkg/patchwork/R/karyotype_check.r	2011-11-30 16:32:00 UTC (rev 35)
@@ -0,0 +1,66 @@
+karyotype_check <- function(chr,start,end,int,ai,Cn,mCn,t,name='',xlim=c(-1.02,1.02),ylim=0:1)
+	{
+	## TAPS scatter plot of a full sample, used for visual quality control. 
+    png(paste(name,'_karyotype_check.png',sep=''),width=1300,height=1300)
+ 
+
+    	colors_p <- colorRampPalette(c("#6600FF","#9900CC"),space="rgb")
+    	colors_q <- colorRampPalette(c("#CC0099","#CC0000"),space="rgb")
+
+    	#layout(matrix(1:25,nrow=5,byrow=T), widths=1,heights=1)
+    
+    	aix=!(is.na(mCn)|is.na(Cn))
+    	chr=chr[aix]
+    	start=start[aix]
+    	end=end[aix]
+    	int=int[aix]
+    	ai=ai[aix]
+    	Cn=Cn[aix]
+    	mCn=mCn[aix]
+    	pos <- (start+end)/2
+    	length=end-start
+    	labels=paste(Cn,mCn,sep='m')
+    
+    	size=rep(1,length(chr))
+    	size[length>2000000]=2
+    	size[length>5000000]=3
+    	size[length>10000000]=4
+    	
+		## The variants present in this data
+    	variants <- unique(labels)
+    	variants_data <- NULL
+    	
+    	## Extracts the Log-R and Allelic Imbalance Ratio of variants
+    	for (v in variants) 
+    		{ 
+        	t_cn <- paste('cn',strsplit(v,'m')[[1]][1],sep='')
+        	t_int <- as.numeric(t$int[t_cn][[1]])
+        	t_ai <- as.numeric(t$ai[paste('cn',v,sep='')][[1]])
+        	variants_data <- rbind(variants_data,c(t_int,t_ai))
+    		}
+  
+    	col <- rep('#B0B0B070',length(chr))
+         
+   	 	plot(int,ai,
+        	pch=16,
+        	cex=c(size),
+        	cex.lab=3,
+        	mar=c(0.1,0.1,0.1,0.1),
+    		main = "",
+			xlab = name,
+			ylab = "",
+			col = col,
+			xlim = xlim,
+			ylim = ylim,
+			yaxt="n",
+        	xaxt="n")
+        	axis(1,cex.axis=2)
+        	axis(2,cex.axis=2)
+        	
+		text(variants_data[,1],variants_data[,2],
+        	labels=variants,
+        	col='black',
+        	cex=2)
+    
+    dev.off()
+	}

Added: pkg/patchwork/R/karyotype_chroms.r
===================================================================
--- pkg/patchwork/R/karyotype_chroms.r	                        (rev 0)
+++ pkg/patchwork/R/karyotype_chroms.r	2011-11-30 16:32:00 UTC (rev 35)
@@ -0,0 +1,117 @@
+karyotype_chroms <- function(chr,start,end,int,ai,chroms,mchr,mpos,mval,
+								schr,spos,sval,name='',xlim=c(0,2.4),ylim=c(0.3,1))  
+	{   
+    
+    #packagepath = system.file(package="patchwork")
+ 	
+    #load(paste(packagepath,"/data/ideogram.RData",sep=""))
+    
+    data(ideogram,package="patchworkData")
+    
+    #substitute ideogram chr names with chroms-object chr names
+    # so it will work independent of chr field name.
+    
+    ideogram$chr = chroms
+    
+    #create a color ramp palette to segue the color scheme for plot.
+    
+    colors_p <- colorRampPalette(c("#6600FF","#9900CC"),space="rgb")
+    colors_q <- colorRampPalette(c("#CC0099","#CC0000"),space="rgb")
+
+    ai[is.na(ai)]=0
+    aix=ai!=0
+    chr=chr[aix]
+    start=start[aix]
+    end=end[aix]
+    int=int[aix]
+    ai=ai[aix]
+    pos <- (start+end)/2
+    length=end-start
+    
+    size=rep(1,length(chr))
+    size[length>2000000]=2
+    size[length>5000000]=3
+    size[length>10000000]=4
+       
+    for (c in chroms) 
+    	{
+        this <- ideogram[ideogram$c==c,]
+        
+        png(paste(name,'_karyotype.',this$chr,'.png',sep=''),width=1080,height=1300)
+        	layout(matrix(1:3,nrow=3,byrow=T), widths=10,heights=c(10,5,5))
+        	
+        	mar.default <- c(5,4,4,2) + 0.1
+			par(mar = mar.default + c(0, 4, 0, 0)) 
+        
+       	 	ix <- chr==this$chr
+        	col <- rep('#B0B0B070',length(chr))       
+        	col[ix & (pos < this$mid)] <- paste(colors_p(sum(ix & (pos < this$mid))), '70', sep='')  
+        	col[ix & (pos > this$mid)] <- paste(colors_q(sum(ix & (pos > this$mid))), '70', sep='') 
+        
+        	plot(c(int[!ix],int[ix]),c(ai[!ix],ai[ix]),
+            	pch=16,
+            	cex=c(size[!ix],size[ix]),
+            	cex.lab=3,
+            	#mar=c(0.1,0.1,0.1,0.1),
+		    	main = "",
+		    	xlab = "Total intensity -->",
+		    	ylab = '',
+		    	col = c(col[!ix],col[ix]),
+		    	xlim = xlim,
+		    	ylim = ylim,
+		    	yaxt="n",
+        		xaxt="n")
+        	axis(1,cex.axis=2)
+        	axis(2,cex.axis=2)
+        	mtext(text="Allelic imbalance -->",side=2,line=4,las=3,cex=2)
+		
+        	par(new=F)
+        	mix <- mchr==as.character(this$chr)
+        	#col <- rep('#D0D0D0D0',sum(mix))       
+        	#col[ix & (pos < this$mid)] <- paste(colors_p(sum(ix & (pos < this$mid))), '70', sep='')  
+        	#col[ix & (pos > this$mid)] <- paste(colors_q(sum(ix & (pos > this$mid))), '70', sep='') 
+        	col=rep('#000000',sum(ix))
+        	col[pos[ix] < this$mid] <- colors_p(sum(pos[ix] < this$mid))
+        	col[pos[ix] > this$mid] <- colors_q(sum(pos[ix] > this$mid))
+        
+        	plot(mpos[mix],mval[mix],
+            	pch=16,
+            	cex=2,
+            	cex.lab=3,
+            	#mar=c(0.1,0.1,0.1,0.1),
+    	    	main = "",
+		    	xlab = "Total intensity",
+		    	ylab = "",
+		    	col = '#00000010',
+		    	xlim = c(0,this$length),
+		    	ylim = xlim,
+		    	yaxt="n",
+        		xaxt="n")
+        	axis(1,cex.axis=2)
+        	axis(2,cex.axis=2)
+		    
+        	segments(x0=start[ix],x1=end[ix],
+        	    y0=int[ix],y1=int[ix],                
+            	col=col,
+            	lwd=4,)
+            
+       	 	par(new=F)       
+        	six <- schr==as.character(this$chr)
+        	plot(spos[six],sval[six],
+            	pch=16,
+            	cex=2,
+            	cex.lab=3,
+            	#mar=c(0.1,0.1,0.1,0.1),
+            	main = "",
+		    	xlab = "Allelic imbalance",
+		    	ylab = "",
+		    	col = '#00000010',
+		    	xlim = c(0,this$length),
+		    	ylim = c(0,1),
+		    	yaxt="n",
+        		xaxt="n")
+        	axis(1,cex.axis=2)
+        	axis(2,cex.axis=2)
+    	dev.off()
+    	}
+	}
\ No newline at end of file

Added: pkg/patchwork/R/karyotype_chromsCN.r
===================================================================
--- pkg/patchwork/R/karyotype_chromsCN.r	                        (rev 0)
+++ pkg/patchwork/R/karyotype_chromsCN.r	2011-11-30 16:32:00 UTC (rev 35)
@@ -0,0 +1,142 @@
+karyotype_chromsCN <- function(chr,start,end,int,ai,Cn,mCn,mchr,mpos,mval,
+								schr,spos,sval,name='',xlim=c(-1.02,1.82),ylim=0:1,
+								maxCn=8)  
+	{   
+    
+    #packagepath = system.file(package="patchwork")
+ 	
+    #load(paste(packagepath,"/data/ideogram.RData",sep=""))
+    
+    data(ideogram,package="patchworkData")
+    
+    #substitute ideogram chr names with chroms-object chr names
+    # so it will work independent of chr field name.
+    
+    chroms = as.character(unique(schr))
+    chroms = chroms[chroms != chroms[grep("M",chroms)]]
+    
+    ideogram$chr = chroms
+    
+    colors_p <- colorRampPalette(c("#6600FF","#9900CC"),space="rgb")
+    colors_q <- colorRampPalette(c("#CC0099","#CC0000"),space="rgb")
+
+    ai[is.na(ai)]=0
+    aix=ai!=0
+    chr=chr[aix]
+    start=start[aix]
+    end=end[aix]
+    int=int[aix]
+    ai=ai[aix]
+    Cn=Cn[aix]
+    mCn=mCn[aix]
+    pos <- (start+end)/2
+    length=end-start
+    
+    size=rep(1,length(chr))
+    size[length>2000000]=2
+    size[length>5000000]=3
+    size[length>10000000]=4
+       
+    for (c in chroms) 
+    	{
+        this <- ideogram[ideogram$c==c,]
+        
+        png(paste(name,'_karyotypeCN.',this$chr,'.png',sep=''),width=1080,height=1300)
+        	layout(matrix(1:4,nrow=4,byrow=T), widths=10,heights=c(10,5,5,5))
+        	
+        	mar.default <- c(5,4,4,2) + 0.1
+			par(mar = mar.default + c(0, 4, 0, 0)) 
+        	
+        	ix <- chr==this$chr
+        	col <- rep('#B0B0B070',length(chr))       
+        	col[ix & (pos < this$mid)] <- paste(colors_p(sum(ix & (pos < this$mid))), '70', sep='')  
+        	col[ix & (pos > this$mid)] <- paste(colors_q(sum(ix & (pos > this$mid))), '70', sep='') 
+        
+        	plot(c(int[!ix],int[ix]),c(ai[!ix],ai[ix]),
+            	pch=16,
+            	cex=c(size[!ix],size[ix]),
+            	cex.lab=3,
+            	#mar=c(0.1,0.1,0.1,0.1),
+    	    	main = "",
+		    	xlab = "Total intensity -->",
+		    	#ylab = "Allelic imbalance -->",
+		    	ylab = '',
+		    	col = c(col[!ix],col[ix]),
+		    	xlim = xlim,
+		    	ylim = ylim,
+		    	yaxt="n",
+        		xaxt="n")
+        	axis(1,cex.axis=2)
+        	axis(2,cex.axis=2)
+        	mtext(text="Allelic imbalance -->",side=2,line=4,las=3,cex=2)
+		    	
+			col=rep('#000000',sum(ix))
+        	col[pos[ix] < this$mid] <- colors_p(sum(pos[ix] < this$mid))
+        	col[pos[ix] > this$mid] <- colors_q(sum(pos[ix] > this$mid))
+        	par(new=F)
+        	
+        	plot(1,1,type='n',
+            	xlab='Total and minor copy number',
+            	ylab='',
+            	cex.lab=3,
+            	xlim = c(0,this$length),
+            	ylim = c(-0.1,8.1),
+		    	yaxt="n",
+        		xaxt="n")
+        	axis(1,cex.axis=2)
+        	axis(2,cex.axis=2)
+            	
+        	segments(x0=start[ix],x1=end[ix],
+         	   y0=Cn[ix],y1=Cn[ix],                
+            	col=col,
+            	lwd=5,)   #Om fel, prova ta bort detta komma
+            	
+        	segments(x0=start[ix],x1=end[ix],
+            	y0=mCn[ix],y1=mCn[ix],                
+            	col='#BBBBBB',
+            	lwd=5,) #Om fel, prova ta bort detta komma
+ 
+        	par(new=F)
+        	mix <- as.character(mchr)==this$chr        
+        	plot(mpos[mix],mval[mix],
+            	pch=16,
+            	cex=2,
+            	cex.lab=3,
+            	#mar=c(0.1,0.1,0.1,0.1),
+    	    	main = "",
+		    	xlab = "Total intensity",
+		    	ylab = "",
+		    	col = '#00000010',
+		    	xlim = c(0,this$length),
+		    	ylim = xlim,
+		    	yaxt="n",
+        		xaxt="n")
+        	axis(1,cex.axis=2)
+        	axis(2,cex.axis=2)
+		    	
+        	segments(x0=start[ix],x1=end[ix],
+            	y0=int[ix],y1=int[ix],                
+            	col=col,
+            	lwd=4,)
+            	
+        	par(new=F)       
+        	six <- as.character(schr)==this$chr
+        	
+        	plot(spos[six],sval[six],
+        	    pch=16,
+            	cex=2,
+            	cex.lab=3,
+            	#mar=c(0.1,0.1,0.1,0.1),
+            	main = "",
+		    	xlab = "Allelic imbalance",
+		    	ylab = "",
+		    	col = '#00000010',
+		    	xlim = c(0,this$length),
+		    	ylim = c(0,1),
+		    	yaxt="n",
+        		xaxt="n")
+        	axis(1,cex.axis=2)
+        	axis(2,cex.axis=2)
+        dev.off()
+    	}
+	}
\ No newline at end of file

Added: pkg/patchwork/R/patchwork.CNA.r
===================================================================
--- pkg/patchwork/R/patchwork.CNA.r	                        (rev 0)
+++ pkg/patchwork/R/patchwork.CNA.r	2011-11-30 16:32:00 UTC (rev 35)
@@ -0,0 +1,107 @@
+patchwork.CNA <- function(BamFile,Pileup,reference,Alpha=0.0001,SD=1)
+	{
+	
+	#Load DNAcopy
+	#library(DNAcopy)
+	#Load data included in package
+	#packagepath = system.file(package="patchwork")
+	#load(paste(packagepath,"/data/commonSnps132.RData",sep=""))
+	#load(paste(packagepath,"/data/ideogram.RData",sep=""))
+	#load(paste(packagepath,"/data/normaldata.RData",sep=""))
+	
+	#Attempt to read file pile.alleles, incase its already been created.
+	alf = NULL
+	try( load("pile.alleles.Rdata"), silent=TRUE )
+	
+	#If it wasnt created, create it using the perl script pile2alleles.pl
+	#which is included in the package. Creates pile.alleles in whichever folder
+	#you are running R from. (getwd())
+	if(length(alf) == 1)
+		{
+		cat("Initiating Allele Data Generation \n")
+		alf = patchwork.alleledata(Pileup)
+		cat("Allele Data Generation Complete \n")
+		save(alf,file="pile.alleles.Rdata")
+		}
+	
+	chroms=as.character(unique(alf$achr))
+	chroms = chroms[chroms != chroms[grep("M",chroms)]]
+	
+	## Read coverage data. If already done, will load "data.Rdata" instead.
+	data=NULL
+	try ( load('data.Rdata'), silent=TRUE )
+	
+	#If data object does not exist, IE it was not loaded in the previous line
+	#perform the function on the chromosomes to create the object.
+	if(is.null(data))
+		{
+		cat("Initiating Read Chroms \n")
+		data = patchwork.readChroms(BamFile,chroms)
+		cat("Read Chroms Complete \n")
+		save(data,file='data.Rdata')
+		}
+	
+	#Perform GC normalization if the amount of columns indicate that gc normalization
+	#has not already been performed.
+	if(length(data) < 7)
+		{
+		cat("Initiating Gc Normalization \n")
+		data = patchwork.GCNorm(data)
+		cat("GC Normalization Complete \n")
+		save(data,file='data.Rdata')
+		}
+	#after this, no further normalization was done on reference sequences.
+		
+	#Apply normal reference to data.
+	#If solid is true it will runa gainst a SOLiD reference, else
+	# it will go with Illumina.
+	if(length(data) < 9)
+		{
+		cat("Initiating Application of Reference \n")
+		data = patchwork.applyref(data,reference)
+		cat("Application of Reference Complete \n")
+		save(data,file='data.Rdata')
+		}
+
+	kbsegs = NULL
+	try( load("smoothed.Rdata"), silent=TRUE )
+	
+	if(length(kbsegs) == 1)
+		{
+		#Apply smoothing to the data.
+		cat("Initiating Smoothing \n")
+		kbsegs = patchwork.smoothing(data,chroms)
+		save(kbsegs,file="smoothed.Rdata")
+		cat("Smoothing Complete \n")
+		}
+	
+	#Segment the data.
+	cat("Initiating Segmentation \n")
+	segs = patchwork.segment(kbsegs,chroms,Alpha,SD)
+	cat("Segmentation Complete \n")
+	
+	#Get medians and AI for the segments.
+	cat("Initiating Segment data extraction (Medians and AI) \n")
+	segs = patchwork.Medians_n_AI(segs,kbsegs,alf)
+	save(segs,file="Segments.Rdata")
+	cat("Segment data extraction Complete \n \n \n")
+	
+	cat("Saving information objects needed for patchwork.findCNs in findCNs.Rdata \n \n \n")
+	save(segs,alf,kbsegs,BamFile,file="findCNs.Rdata")
+	
+	#Plot it
+	cat("Initiating Plotting \n")
+	karyotype(segs$chr,segs$start,segs$end,segs$median,segs$ai,
+			chroms,name=as.character(BamFile),
+			xlim=c(0,2.4),ylim=c(0.3,1))
+	karyotype_chroms(segs$chr,segs$start,segs$end,segs$median,segs$ai,
+			chroms,kbsegs$chr,kbsegs$pos,kbsegs$ratio,
+			alf$achr,alf$apos,(1-alf$amin/alf$amax),
+			name=as.character(BamFile),xlim=c(0,2.4),
+			ylim=c(0.3,1))
+	cat("Plotting Complete \n")
+	cat("Shutting down..... \n")
+	
+	}
+	
+	
\ No newline at end of file

Added: pkg/patchwork/R/patchwork.GCNorm.r
===================================================================
--- pkg/patchwork/R/patchwork.GCNorm.r	                        (rev 0)
+++ pkg/patchwork/R/patchwork.GCNorm.r	2011-11-30 16:32:00 UTC (rev 35)
@@ -0,0 +1,58 @@
+patchwork.GCNorm <- function(data)
+	{
+	#Load data included in package
+	#packagepath = system.file(package="CNA")
+	#load(paste(packagepath,"/data/commonSnps132.RData",sep=""))
+	#load(paste(packagepath,"/data/ideogram.RData",sep=""))
+	#load(paste(packagepath,"/data/normaldata.RData",sep=""))
+
+	## GC content normalization
+	info = data.frame(from=c(0,seq(10,90,2)),to=c(seq(10,90,2),100),mean=NA,median=NA,np=NA)
+	xnorm = data$counts
+	
+	#1st round
+	for (i in 1:nrow(info))
+		{
+		ix = (data$gc >= info$from[i]) & (data$gc <= info$to[i])
+		info$mean[i] = mean(data$counts[ix])
+		info$median[i] = median(data$counts[ix])
+		info$sd[i] = sd(data$counts[ix])
+		info$np[i] = sum(ix)
+		xnorm[ix] = 1 * ( data$counts[ix] / info$mean[i] )
+		}
+	
+	#xnorm = data$counts
+	
+	#for (i in 1:nrow(info))
+	#	{
+	#	ix = (data$gc >= info$from[i]) & (data$gc <= info$to[i])
+	#	xnorm[ix] = 1 * ( data$counts[ix] / info$mean[i] )
+	#	}
+		
+	xnorm = round(xnorm,2)
+		
+	# 2nd round
+	info2 = data.frame(from=c(0,seq(20,80,2)),to=c(seq(20,80,2),100),mean=NA,median=NA,np=NA)
+	xnorm2 = xnorm
+	
+	for (i in 1:nrow(info2))
+		{
+		ix = (data$gck >= info2$from[i]) & (data$gck <= info2$to[i])
+		info2$mean[i] = mean(xnorm[ix])
+		info2$median[i] = median(xnorm[ix])
+		info2$sd[i] = sd(xnorm[ix])
+		info2$np[i] = sum(ix)
+		xnorm2[ix] = 1 * ( xnorm[ix] / info2$mean[i] )
+		}
+		
+	#xnorm2 = xnorm
+	#for (i in 1:nrow(info2))
+	#	{
+	#	ix = (data$gck >= info2$from[i]) & (data$gck <= info2$to[i])
+	#	xnorm2[ix] = 1 * ( xnorm[ix] / info2$mean[i] )
+	#	}
+		
+	xnorm2 = round(xnorm2,2)
+	data$norm=xnorm2
+	return(data)
+	}

Added: pkg/patchwork/R/patchwork.Medians_n_AI.r
===================================================================
--- pkg/patchwork/R/patchwork.Medians_n_AI.r	                        (rev 0)
+++ pkg/patchwork/R/patchwork.Medians_n_AI.r	2011-11-30 16:32:00 UTC (rev 35)
@@ -0,0 +1,16 @@
+patchwork.Medians_n_AI <- function(segs,kbsegs,alf)	
+	{
+	## Get medians and AI for them segs
+	segs$median <- segs$ai <- segs$snvs <- NA
+	for (i in 1:nrow(segs))
+		{
+		ix = kbsegs$chr==as.character(segs$chr[i]) & ( kbsegs$pos > segs$start[i] & kbsegs$pos < segs$end[i] )
+		segs$median[i] = median(kbsegs$ratio[ix])
+
+		ix = alf$achr==as.character(segs$chr[i]) & ( alf$apos > segs$start[i] & alf$apos < segs$end[i] )
+		segs$snvs[i] = sum(ix)
+		segs$ai[i] = 1 - sum(alf$amin[ix]) / sum(alf$amax[ix])
+		}
+	segs$ai[is.nan(segs$ai)]=0
+	return(segs)
+	}
\ No newline at end of file

Added: pkg/patchwork/R/patchwork.alleledata.r
===================================================================
--- pkg/patchwork/R/patchwork.alleledata.r	                        (rev 0)
+++ pkg/patchwork/R/patchwork.alleledata.r	2011-11-30 16:32:00 UTC (rev 35)
@@ -0,0 +1,27 @@
+patchwork.alleledata <- function(Pileup)
+	{
+	
+	#Load data included in package
+	packagepath = system.file(package="patchwork")
+	#load(paste(packagepath,"/data/commonSnps132.RData",sep=""))
+	
+	data(commonSnps132,package="patchworkData")
+	#load(paste(packagepath,"/data/ideogram.RData",sep=""))
+	#load(paste(packagepath,"/data/normaldata.RData",sep=""))
+	system(paste("cp -r ",packagepath,"/perl .perl",sep=""))
+	system(paste("cat ",Pileup," | perl .perl/pile2alleles.pl > ",getwd(),"/pile.alleles",sep=""))
+	system("rm -r .perl")
+	alf = read.csv('pile.alleles', sep='\t',header=F)[,1:6]
+	system("rm pile.alleles")
+	
+	colnames(alf) = c('achr','apos','atype','aqual','atot','amut')
+	alf$aref = alf$atot - alf$amut
+	alf = alf [alf$amut >= 1 & alf$aref >= 1,]
+	alf$amax = apply(alf[,6:7],1,max)
+	alf$amin = alf$atot-alf$amax
+	#alf=alf[alf$atot<100,] # outlierz
+
+	alf=merge(alf,dbSnp[,c(1,3)], all=F, by=1:2)
+	
+	return(alf)
+	}

Added: pkg/patchwork/R/patchwork.applyref.r
===================================================================
--- pkg/patchwork/R/patchwork.applyref.r	                        (rev 0)
+++ pkg/patchwork/R/patchwork.applyref.r	2011-11-30 16:32:00 UTC (rev 35)
@@ -0,0 +1,33 @@
+patchwork.applyref <- function(data,reference)
+	{
+	#Load data included in package
+	#packagepath = system.file(package="patchwork")
+	#load(paste(packagepath,"/data/commonSnps132.RData",sep=""))
+	#load(paste(packagepath,"/data/ideogram.RData",sep=""))
+	
+	if(tolower(reference) == "solid")
+		{
+		#load(paste(packagepath,"/data/datasolid.RData",sep=""))
+		data(datasolid,package="patchworkData")
+		normaldata = datasolid
+		}
+	else if(tolower(reference) %in% c("solexa","illumina"))
+		{
+		#load(paste(packagepath,"/data/datasolexa.RData",sep=""))
+		data(datasolexa,package="patchworkData")
+		normaldata = datasolexa
+		}
+	else
+		{
+		load(reference)
+		}
+
+	## read and apply normal/reference
+	#load('/proj/b2010074/GC/normaldata.Rdata')				
+	data <- merge(data[,1:7],normaldata,all=F,by=1:2)
+	#data$ratio <- data$norm / data$norm_mean
+	data$reference <- data$norm_mean  ## thats temporary
+	data=data[order(data$pos),]
+	data=data[order(data$chr),]
+	return(data)
+	}
\ No newline at end of file

Added: pkg/patchwork/R/patchwork.createreference.r
===================================================================
--- pkg/patchwork/R/patchwork.createreference.r	                        (rev 0)
+++ pkg/patchwork/R/patchwork.createreference.r	2011-11-30 16:32:00 UTC (rev 35)
@@ -0,0 +1,75 @@
+patchwork.createreference = function(...,output="REFOUT")
+	{
+	
+	data=NULL
+	
+	files = list(...)
+	
+	#packagepath = system.file(package="patchwork")
+	#load(paste(packagepath,"/data/ideogram.RData",sep=""))
+	alf = NULL
+	try( load("pile.alleles.Rdata"), silent=TRUE )
+	
+	if(length(alf) > 1)
+		{
+		chroms=as.character(unique(alf$achr))
+		chroms = chroms[chroms != chroms[grep("M",chroms)]]
+		}
+	else
+		{
+		data(ideogram,package="patchworkData")
+		chroms=as.character(unique(ideogram$chr))
+		}
+	j = 0
+	
+	for (i in files)
+		{
+		j = j + 1
+		cat(paste("Reading Chromosomes of file ",i," \n",sep=""))
+		data[[j]] = patchwork.readChroms(i,chroms)
+		cat(paste("Initiating Gc Normalization of file ",i," \n",sep=""))
+		data[[j]] = patchwork.GCNorm(data[[j]])
+		data[[j]] = data[[j]][c("chr","pos",
+											#"counts",
+											"norm")]
+		}
+	
+	if (j > 1)
+		{
+		cat("Merging the files into reference. \n")
+		data_ = merge(data[[1]],data[[2]],all=TRUE,by=1:2)
+		if (j > 2)
+			{
+			for (c in 3:j)
+				{
+				data_ = merge(data_,data[[c]],all=TRUE,by=1:2)
+				}
+			}
+		#counts_ = seq(from=3,to=(length(data_) - 1),by=2)
+		norm_ 	= seq(from=3,to=length(data_),by=1) 
+	
+		cat("Creating and applying mean functions to the reference. \n")
+		normaldata = data.frame(chr=data_$chr,pos=data_$pos,
+							#counts_mean=apply(data_[,counts_],
+							#1,mean,na.rm=TRUE),
+							norm_mean=apply(data_[,norm_],
+							1,mean,na.rm=TRUE))
+		}
+	else
+		{
+		data = data[[j]]
+		cat("Creating and applying mean functions to the reference. \n")
+		cat("Note: Creating from single bamfile. Cannot obtain mean ")
+		cat("counts or norm however naming columns counts_mean and ")
+		cat("norm_mean for compatability \n")
+		normaldata = data.frame(chr=data$chr,pos=data$pos,gc=data$gc,
+							gck=data$gck,gcm=data$gcm,
+							counts_mean=data$counts,
+							norm_mean=data$norm)
+		}
+
+	
+	cat("Saving to ",output,".Rdata \n",sep="")
+	save(normaldata,file=paste(output,".RData",sep=""))
+	return(normaldata)
+	}
\ No newline at end of file

Added: pkg/patchwork/R/patchwork.findCNs.r
===================================================================
--- pkg/patchwork/R/patchwork.findCNs.r	                        (rev 0)
+++ pkg/patchwork/R/patchwork.findCNs.r	2011-11-30 16:32:00 UTC (rev 35)
@@ -0,0 +1,283 @@
+patchwork.findCNs = function(cn2,delta,het,hom,maxCn=8,ceiling=1)
+	{
+	
+	#packagepath = system.file(package="patchwork")
+	#load(paste(packagepath,"/data/commonSnps132.RData",sep=""))
+	#load(paste(packagepath,"/data/ideogram.RData",sep=""))
+	data(ideogram,package="patchworkData")
+	load("findCNs.Rdata")
+
+	voidchrom <- c('chrX','chrY') # may add non-integer chroms here....
+
+    tix=NULL     #temporary index
+    int=NULL     ## contains coverage estimate of each (total) copy number
+    ai=NULL         ## contains Allelic Imbalance Ratio estimate of each copy number variant.
+    regions <- segs
+    regions <- regions[(is.autosome(regions$chr)&regions$np>50)&(!is.nan(regions$ai)),] ## will use these regions
+    regions <- regions[!is.na(regions$median),]
+
+    ## likely cn2 regions sit within delta/3 from cn2.
+    expectedAt <- cn2
+    tix$cn2 <- abs(regions$median - expectedAt) < (delta/3)    ## index of likely cn2 regions
+    temp <- regions[tix$cn2,]                                ## cn2 regions
+    med <- weightedMedian(temp$median,temp$np)            ## improved value of Log-R at cn2 (returns NULL if theres nothing there)
+    int$cn2 <- ifelse(!is.null(med),med,expectedAt)            ## saved to int.
+
+    ## likely cn1 regions sit at about cn2 - delta:
+    d <- delta                                                 ## the (Log-R) distance to cn1
+    expectedAt <- int$cn2-d                                     ## cn1 is expected here
+    tix$cn1 <- abs(regions$median - expectedAt) < (d/3)        ## index of likely cn1 regions
+    temp <- regions[tix$cn1,]
+    med <- weightedMedian(temp$median,temp$np)
+    int$cn1 <- ifelse(!is.null(med),med,expectedAt)
+
+    ## likely cn0 regions sit below cn1 - delta:
+    d <- int$cn2-int$cn1
+    expectedAt <- int$cn1-d                                     
+    tix$cn0 <- regions$median < expectedAt
+    temp <- regions[tix$cn0,]
+    med <- weightedMedian(temp$median,temp$np)
+    int$cn0 <- ifelse(!is.null(med),med,expectedAt)
+
+    ## likely cn3 regions sit at about cn2+delta
+    d <- delta
+    expectedAt <- int$cn2+d
+    tix$cn3 <- abs(regions$median - expectedAt) < (d/3)
+    temp <- regions[tix$cn3,]
+    med <- weightedMedian(temp$median,temp$np)
+    int$cn3 <- ifelse(!is.null(med),med,expectedAt)
+
+    ## cn4 follows at ...
+    d <- delta
+    expectedAt <- int$cn3+d
+    tix$cn4 <- abs(regions$median - expectedAt) < (d/4)
+    temp <- regions[tix$cn4,]
+    med <- weightedMedian(temp$median,temp$np)
+    int$cn4 <- ifelse(!is.null(med),med,expectedAt)
+
+    ## generalized for higher cns
+    for (cn in 5:maxCn)
+    	{
+    	thisCn <- paste('cn',cn,sep='')
+    	prevCn <- paste('cn',cn-1,sep='')
+    	pprevCn <- paste('cn',cn-2,sep='')
+    	d <- delta #dmin*(int[prevCn][[1]]-int[pprevCn][[1]])
+    	expectedAt <- int[prevCn][[1]]+d
+    	tix[[thisCn]] <- abs(regions$median - expectedAt) < (d/5)
+    	temp <- regions[tix[thisCn][[1]],]
+    	med <- weightedMedian(temp$median,temp$np)
+    	int[thisCn] <- ifelse(!is.null(med),med,expectedAt)
+    	}
+
+	#  smooth the cn relationship ?
+
+    ## at cn2, find the variant clusters (normal and CNNLOH)
+    ix <- (abs(regions$median - int$cn2) < 0.2*(int$cn3-int$cn2) ) # taking only closely-matching segments
+    data <- regions[ix,]
+    data <- data[!is.na(data$ai),] # ...with a calculated allelic imbalance.
+
+    expectedAt <- het
+    ix <- 2*abs(data$ai-het) < abs(data$ai-hom)
+    med <- weightedMedian(data$ai[ix],data$snvs[ix]) 
+    ai$cn2m1 <- ifelse (!is.null(med),med,expectedAt)
+
+    expectedAt <- hom
+    ix <- 2*abs(data$ai-hom) < abs(data$ai-het)
+    med <- weightedMedian(data$ai[ix],data$snvs[ix]) 
+    ai$cn2m0 <- ifelse (!is.null(med),med,expectedAt)
+
+    ## for cn1 (and 0)
+    ix <- (abs(regions$median - int$cn1) < 0.2*(int$cn2-int$cn1) )
+    data <- regions[ix,]
+    data <- data[!is.na(data$ai),]
+    expectedAt <- (ai$cn2m0+ai$cn2m1)*3/5     ## Decent estimate.
+    med <- weightedMedian(data$ai,data$snvs) ## Average allelic imbalance weighted on snp count
+    ai$cn1m0 <- ifelse (!is.null(med),med,expectedAt)    ## Will be NA if there was no CNNLOH
+    ai$cn0m0 <- NA #unimportant
+
+    ## for cn3:
+    ix <- (abs(regions$median - int$cn3) < 0.2*(int$cn4-int$cn3) )
+    data <- regions[ix,]
+    data <- data[!is.na(data$ai),]
+  
+    range <- ai$cn2m0-ai$cn2m1 #the distance between 2normal and CNNLOH
+    # get the 3(1) regions: 
+    expectedAt <- ai$cn2m1+range/3 # this is an approx if cn3m1 is absent.
+
+    ix <- (data$ai<ai$cn2m0) & (data$ai>ai$cn2m1) # take regions with less AI than cn2m0 but more than cn2m1
+    med <- weightedMedian(data$ai[ix],data$snvs[ix]) # average allelic imbalance weighted on snp count
+    ai$cn3m1 <- ifelse (!is.null(med),med,expectedAt)
+
+    # now for cn3m0
+    expectedAt <- ai$cn3m1 + range # approx of cn3m0
+    ix <- (abs(data$ai-expectedAt) < (expectedAt-ai$cn3m1)/4)  # take those much closer to exp than to cn3m1
+    med <- weightedMedian(data$ai[ix],data$snvs[ix]) 
+    try (if (med<ai$cn2m0) med <- NULL, silent=T)    ## in case of heterogeneities or strange signals, we disallow this effect.
+    ai$cn3m0 <- ifelse (!is.null(med),med,expectedAt)
+
+    if (is.na(ai$cn1m0)) ai$cn1m0 <- mean(ai$cn3m1,ai$cn2m0) ## If deletions were missing, place an estimate from cn3 
+    
+    ## now for cn4
+    ix <- abs(regions$median - int$cn4) < 0.2*(int$cn4-int$cn3) 
+    data <- regions[ix,]
+    data <- data[!is.na(data$ai),]
+
+    # get the 4(2) regions: they are at AI about cn2m1
+    expectedAt <- ai$cn2m1 # this is a good approx 
+
+    ix <- data$ai<ai$cn3m1 # let all below cn3m1 in
+    med <- weightedMedian(data$ai[ix],data$snvs[ix]) 
+    ai$cn4m2 <- ifelse (!is.null(med),med,expectedAt)
+    if (ai$cn4m2>ai$cn2m1) ai$cn4m2 <- ai$cn2m1 # sanity check.
+
+    data <- data[!ix,] # coutinue with the remaining
+    # now 4(1) has less ai than 3(0) -> sits at about cn3m1+(cn3m1-cn2m1).
+    expectedAt <- ai$cn3m1+(ai$cn3m1-ai$cn2m1)
+    ix <- abs(data$ai-expectedAt)<abs(data$ai-ai$cn3m0) # take those closer to exp than to 3(0)
+    med <- weightedMedian(data$ai[ix],data$snvs[ix]) 
+    ai$cn4m1 <- ifelse (!is.null(med),med,expectedAt)
+
+    # now 4(0) is at about 3(0) + [0.9 - 3(0)] * [3(0)-2(0)] / [0.9-2(0)] 
+    expectedAt <- ai$cn3m0 + (ceiling-ai$cn3m0)*(ai$cn3m0-ai$cn2m0)/(ceiling-ai$cn2m0)
+    ix <- abs(data$ai-expectedAt) < 0.2*(expectedAt-ai$cn4m2) # we take those very close to exp
+    med <- weightedMedian(data$ai[ix],data$snvs[ix])
+     try (if (med<ai$cn3m0) med <- NULL, silent=T)    ## in case of heterogeneities or strange signals, we disallow this effect.
+    ai$cn4m0 <- ifelse (!is.null(med),med,expectedAt)
+
+    ## generalization for higher copy numbers. it gets complicated.
+    for (cn in 5:maxCn) {
+    thisCn <- paste('cn',cn,sep='')
+    prevCn <- paste('cn',cn-1,sep='')
+    pprevCn <- paste('cn',cn-2,sep='')
+    
+    ix <- (abs(regions$median - int[thisCn][[1]]) < 0.2*(int[thisCn][[1]]-int[prevCn][[1]]) )
+    data <- regions[ix,]
+    data <- data[!is.na(data$ai),]
+    data <- data[data$np>50,] # long regions for safety
+
+    ## try to find variants, starting with LOH
+    # LOH such as 5(0)
+    m <- 0
+    thisVariant=paste(thisCn,'m',0,sep='')
+    c4m0 <- ai[paste(prevCn,'m',m,sep='')][[1]] # relative naming for clarity
+    c3m0 <- ai[paste(pprevCn,'m',m,sep='')][[1]]
+    
[TRUNCATED]

To get the complete diff run:
    svnlook diff /svnroot/patchwork -r 35


More information about the Patchwork-commits mailing list