[Patchwork-commits] r65 - in pkg: . patchworkCG patchworkCG/R patchworkCG/data patchworkCG/man
noreply at r-forge.r-project.org
noreply at r-forge.r-project.org
Tue Jan 10 17:12:55 CET 2012
Author: sebastian_d
Date: 2012-01-10 17:12:55 +0100 (Tue, 10 Jan 2012)
New Revision: 65
Added:
pkg/patchworkCG/
pkg/patchworkCG/DESCRIPTION
pkg/patchworkCG/NAMESPACE
pkg/patchworkCG/R/
pkg/patchworkCG/R/CG_KaCh.r
pkg/patchworkCG/R/CG_KaChCN.r
pkg/patchworkCG/R/CG_Ka_check.r
pkg/patchworkCG/R/CG_karyotype.r
pkg/patchworkCG/R/patchwork.AI.r
pkg/patchworkCG/R/patchwork.CG.r
pkg/patchworkCG/R/patchwork.CGCNV.r
pkg/patchworkCG/R/subfunctions_CGCNV.r
pkg/patchworkCG/README
pkg/patchworkCG/data/
pkg/patchworkCG/data/ideogram.RData
pkg/patchworkCG/man/
pkg/patchworkCG/man/CG_KaCh.Rd
pkg/patchworkCG/man/CG_KaChCN.Rd
pkg/patchworkCG/man/CG_Ka_check.Rd
pkg/patchworkCG/man/CG_karyotype.Rd
pkg/patchworkCG/man/ideogram.Rd
pkg/patchworkCG/man/patchwork.AI.Rd
pkg/patchworkCG/man/patchwork.CG.Rd
pkg/patchworkCG/man/patchwork.CGCNV.Rd
pkg/patchworkCG/man/subfunctions_CGCNV.Rd
Log:
Adding patchworkCG, which deals with CompleteGenomics Data.
Added: pkg/patchworkCG/DESCRIPTION
===================================================================
--- pkg/patchworkCG/DESCRIPTION (rev 0)
+++ pkg/patchworkCG/DESCRIPTION 2012-01-10 16:12:55 UTC (rev 65)
@@ -0,0 +1,9 @@
+Package: patchwork.CG
+Type: Package
+Title: Allele-specific Copy Number Analysis of CompleteGenomics Whole Genome data
+Version: 2.0
+Date: 2011-10-13
+Author: Markus Rasmussen, Sebastian DiLorenzo
+Maintainer: Markus Rasmussen <Markus.Rasmussen at medsci.uu.se>
+Description: Visualizes whole genome data
+License: GPL-2
Added: pkg/patchworkCG/NAMESPACE
===================================================================
--- pkg/patchworkCG/NAMESPACE (rev 0)
+++ pkg/patchworkCG/NAMESPACE 2012-01-10 16:12:55 UTC (rev 65)
@@ -0,0 +1,13 @@
+export(patchwork.AI,
+ patchwork.CG,
+ CG_Ka_check,
+ CG_KaCh,
+ CG_KaChCN,
+ CG_karyotype,
+ patchwork.CGCNV,
+ #subfunctions_findCNs
+ weightedMedian,
+ weightedMean,
+ is.autosome,
+ deChrom_ucsc,
+ chrom_ucsc)
\ No newline at end of file
Added: pkg/patchworkCG/R/CG_KaCh.r
===================================================================
--- pkg/patchworkCG/R/CG_KaCh.r (rev 0)
+++ pkg/patchworkCG/R/CG_KaCh.r 2012-01-10 16:12:55 UTC (rev 65)
@@ -0,0 +1,112 @@
+CG_KaCh <- function(chr,start,end,int,ai,mchr,mpos,mval,
+ sval,name='',xlim=c(0,2.4),ylim=c(0.3,1))
+ {
+
+ #packagepath = system.file(package="patchwork")
+
+ #load(paste(packagepath,"/data/ideogram.RData",sep=""))
+
+ data(ideogram,package="patchworkGC")
+
+ #create a color ramp palette to segue the color scheme for plot.
+
+ colors_p <- colorRampPalette(c("#6600FF","#9900CC"),space="rgb")
+ colors_q <- colorRampPalette(c("#CC0099","#CC0000"),space="rgb")
+
+ ai[is.na(ai)]=0
+ aix=ai!=0
+ chr=chr[aix]
+ start=start[aix]
+ end=end[aix]
+ int=int[aix]
+ ai=ai[aix]
+ pos <- (start+end)/2
+ length=end-start
+
+ size=rep(1,length(chr))
+ size[length>2000000]=2
+ size[length>5000000]=3
+ size[length>10000000]=4
+
+ for (c in 1:24)
+ {
+ this <- ideogram[ideogram$c==c,]
+
+ png(paste(name,'_KaCh_',this$chr,'.png',sep=''),width=1080,height=1300)
+ layout(matrix(1:3,nrow=3,byrow=T), widths=10,heights=c(10,5,5))
+
+ mar.default <- c(5,4,4,2) + 0.1
+ par(mar = mar.default + c(0, 4, 0, 0))
+
+ ix <- chr==this$chr
+ col <- rep('#B0B0B070',length(chr))
+ col[ix & (pos < this$mid)] <- paste(colors_p(sum(ix & (pos < this$mid))), '70', sep='')
+ col[ix & (pos > this$mid)] <- paste(colors_q(sum(ix & (pos > this$mid))), '70', sep='')
+
+ plot(c(int[!ix],int[ix]),c(ai[!ix],ai[ix]),
+ pch=16,
+ cex=c(size[!ix],size[ix]),
+ cex.lab=3,
+ #mar=c(0.1,0.1,0.1,0.1),
+ main = "",
+ xlab = "Total intensity -->",
+ ylab = '',
+ col = c(col[!ix],col[ix]),
+ xlim = xlim,
+ ylim = ylim,
+ yaxt="n",
+ xaxt="n")
+ axis(1,cex.axis=2)
+ axis(2,cex.axis=2)
+ mtext(text="Allelic imbalance -->",side=2,line=4,las=3,cex=2)
+
+ par(new=F)
+ mix <- mchr==as.character(this$chr)
+ #col <- rep('#D0D0D0D0',sum(mix))
+ #col[ix & (pos < this$mid)] <- paste(colors_p(sum(ix & (pos < this$mid))), '70', sep='')
+ #col[ix & (pos > this$mid)] <- paste(colors_q(sum(ix & (pos > this$mid))), '70', sep='')
+ col=rep('#000000',sum(ix))
+ col[pos[ix] < this$mid] <- colors_p(sum(pos[ix] < this$mid))
+ col[pos[ix] > this$mid] <- colors_q(sum(pos[ix] > this$mid))
+
+ plot(mpos[mix],mval[mix],
+ pch=16,
+ cex=2,
+ cex.lab=3,
+ #mar=c(0.1,0.1,0.1,0.1),
+ main = "",
+ xlab = "Total intensity",
+ ylab = "",
+ col = '#00000010',
+ xlim = c(0,this$length),
+ ylim = xlim,
+ yaxt="n",
+ xaxt="n")
+ axis(1,cex.axis=2)
+ axis(2,cex.axis=2)
+
+ segments(x0=start[ix],x1=end[ix],
+ y0=int[ix],y1=int[ix],
+ col=col,
+ lwd=4,)
+
+ par(new=F)
+ six <- mchr==as.character(this$chr)
+ plot(mpos[six],sval[six],
+ pch=16,
+ cex=2,
+ cex.lab=3,
+ #mar=c(0.1,0.1,0.1,0.1),
+ main = "",
+ xlab = "Allelic imbalance",
+ ylab = "",
+ col = '#00000010',
+ xlim = c(0,this$length),
+ ylim = c(0,1),
+ yaxt="n",
+ xaxt="n")
+ axis(1,cex.axis=2)
+ axis(2,cex.axis=2)
+ dev.off()
+ }
+ }
\ No newline at end of file
Added: pkg/patchworkCG/R/CG_KaChCN.r
===================================================================
--- pkg/patchworkCG/R/CG_KaChCN.r (rev 0)
+++ pkg/patchworkCG/R/CG_KaChCN.r 2012-01-10 16:12:55 UTC (rev 65)
@@ -0,0 +1,134 @@
+CG_KaChCN <- function(chr,start,end,int,ai,Cn,mCn,mchr,mpos,mval,
+ sval,name='',xlim=c(0,2.4),ylim=c(0,1),
+ maxCn=8)
+ {
+
+ #packagepath = system.file(package="patchwork")
+
+ #load(paste(packagepath,"/data/ideogram.RData",sep=""))
+
+ data(ideogram,package="patchworkCG")
+
+ colors_p <- colorRampPalette(c("#6600FF","#9900CC"),space="rgb")
+ colors_q <- colorRampPalette(c("#CC0099","#CC0000"),space="rgb")
+
+ ai[is.na(ai)]=0
+ aix=ai!=0
+ chr=chr[aix]
+ start=start[aix]
+ end=end[aix]
+ int=int[aix]
+ ai=ai[aix]
+ Cn=Cn[aix]
+ mCn=mCn[aix]
+ pos <- (start+end)/2
+ length=end-start
+
+ size=rep(1,length(chr))
+ size[length>2000000]=2
+ size[length>5000000]=3
+ size[length>10000000]=4
+
+ for (c in 1:24)
+ {
+ this <- ideogram[ideogram$c==c,]
+
+ png(paste(name,'_KaChCN_',this$chr,'.png',sep=''),width=1080,height=1300)
+ layout(matrix(1:4,nrow=4,byrow=T), widths=10,heights=c(10,5,5,5))
+
+ mar.default <- c(5,4,4,2) + 0.1
+ par(mar = mar.default + c(0, 4, 0, 0))
+
+ ix <- chr==this$chr
+ col <- rep('#B0B0B070',length(chr))
+ col[ix & (pos < this$mid)] <- paste(colors_p(sum(ix & (pos < this$mid))), '70', sep='')
+ col[ix & (pos > this$mid)] <- paste(colors_q(sum(ix & (pos > this$mid))), '70', sep='')
+
+ plot(c(int[!ix],int[ix]),c(ai[!ix],ai[ix]),
+ pch=16,
+ cex=c(size[!ix],size[ix]),
+ cex.lab=3,
+ #mar=c(0.1,0.1,0.1,0.1),
+ main = "",
+ xlab = "Total intensity -->",
+ #ylab = "Allelic imbalance -->",
+ ylab = '',
+ col = c(col[!ix],col[ix]),
+ xlim = xlim,
+ ylim = ylim,
+ yaxt="n",
+ xaxt="n")
+ axis(1,cex.axis=2)
+ axis(2,cex.axis=2)
+ mtext(text="Allelic imbalance -->",side=2,line=4,las=3,cex=2)
+
+ col=rep('#000000',sum(ix))
+ col[pos[ix] < this$mid] <- colors_p(sum(pos[ix] < this$mid))
+ col[pos[ix] > this$mid] <- colors_q(sum(pos[ix] > this$mid))
+ par(new=F)
+
+ plot(1,1,type='n',
+ xlab='Total and minor copy number',
+ ylab='',
+ cex.lab=3,
+ xlim = c(0,this$length),
+ ylim = c(-0.1,8.1),
+ yaxt="n",
+ xaxt="n")
+ axis(1,cex.axis=2)
+ axis(2,cex.axis=2)
+
+ segments(x0=start[ix],x1=end[ix],
+ y0=Cn[ix],y1=Cn[ix],
+ col=col,
+ lwd=5,) #Om fel, prova ta bort detta komma
+
+ segments(x0=start[ix],x1=end[ix],
+ y0=mCn[ix],y1=mCn[ix],
+ col='#BBBBBB',
+ lwd=5,) #Om fel, prova ta bort detta komma
+
+ par(new=F)
+ mix <- as.character(mchr)==this$chr
+ plot(mpos[mix],mval[mix],
+ pch=16,
+ cex=2,
+ cex.lab=3,
+ #mar=c(0.1,0.1,0.1,0.1),
+ main = "",
+ xlab = "Total intensity",
+ ylab = "",
+ col = '#00000010',
+ xlim = c(0,this$length),
+ ylim = xlim,
+ yaxt="n",
+ xaxt="n")
+ axis(1,cex.axis=2)
+ axis(2,cex.axis=2)
+
+ segments(x0=start[ix],x1=end[ix],
+ y0=int[ix],y1=int[ix],
+ col=col,
+ lwd=4,)
+
+ par(new=F)
+ six <- as.character(mchr)==this$chr
+
+ plot(mpos[six],sval[six],
+ pch=16,
+ cex=2,
+ cex.lab=3,
+ #mar=c(0.1,0.1,0.1,0.1),
+ main = "",
+ xlab = "Allelic imbalance",
+ ylab = "",
+ col = '#00000010',
+ xlim = c(0,this$length),
+ ylim = c(0,1),
+ yaxt="n",
+ xaxt="n")
+ axis(1,cex.axis=2)
+ axis(2,cex.axis=2)
+ dev.off()
+ }
+ }
\ No newline at end of file
Added: pkg/patchworkCG/R/CG_Ka_check.r
===================================================================
--- pkg/patchworkCG/R/CG_Ka_check.r (rev 0)
+++ pkg/patchworkCG/R/CG_Ka_check.r 2012-01-10 16:12:55 UTC (rev 65)
@@ -0,0 +1,66 @@
+CG_Ka_check<- function(chr,start,end,int,ai,Cn,mCn,t,name='',xlim=c(0,2.4),ylim=c(0,1))
+ {
+ ## TAPS scatter plot of a full sample, used for visual quality control.
+ png(paste(name,'_Ka_check.png',sep=''),width=1300,height=1300)
+
+
+ colors_p <- colorRampPalette(c("#6600FF","#9900CC"),space="rgb")
+ colors_q <- colorRampPalette(c("#CC0099","#CC0000"),space="rgb")
+
+ #layout(matrix(1:25,nrow=5,byrow=T), widths=1,heights=1)
+
+ aix=!(is.na(mCn)|is.na(Cn))
+ chr=chr[aix]
+ start=start[aix]
+ end=end[aix]
+ int=int[aix]
+ ai=ai[aix]
+ Cn=Cn[aix]
+ mCn=mCn[aix]
+ pos <- (start+end)/2
+ length=end-start
+ labels=paste(Cn,mCn,sep='m')
+
+ size=rep(1,length(chr))
+ size[length>2000000]=2
+ size[length>5000000]=3
+ size[length>10000000]=4
+
+ ## The variants present in this data
+ variants <- unique(labels)
+ variants_data <- NULL
+
+ ## Extracts the Log-R and Allelic Imbalance Ratio of variants
+ for (v in variants)
+ {
+ t_cn <- paste('cn',strsplit(v,'m')[[1]][1],sep='')
+ t_int <- as.numeric(t$int[t_cn][[1]])
+ t_ai <- as.numeric(t$ai[paste('cn',v,sep='')][[1]])
+ variants_data <- rbind(variants_data,c(t_int,t_ai))
+ }
+
+ col <- rep('#B0B0B070',length(chr))
+
+ plot(int,ai,
+ pch=16,
+ cex=c(size),
+ cex.lab=3,
+ mar=c(0.1,0.1,0.1,0.1),
+ main = "",
+ xlab = name,
+ ylab = "",
+ col = col,
+ xlim = xlim,
+ ylim = ylim,
+ yaxt="n",
+ xaxt="n")
+ axis(1,cex.axis=2)
+ axis(2,cex.axis=2)
+
+ text(variants_data[,1],variants_data[,2],
+ labels=variants,
+ col='black',
+ cex=2)
+
+ dev.off()
+ }
Added: pkg/patchworkCG/R/CG_karyotype.r
===================================================================
--- pkg/patchworkCG/R/CG_karyotype.r (rev 0)
+++ pkg/patchworkCG/R/CG_karyotype.r 2012-01-10 16:12:55 UTC (rev 65)
@@ -0,0 +1,69 @@
+CG_karyotype <- function(chr,start,end,int,ai,
+ name='',xlim=c(0,2.4),ylim=c(0.3,1))
+ {
+ png(paste(name,'_karyotype.png',sep=''),width=1300,height=1300)
+
+ #packagepath = system.file(package="patchwork")
+
+ #load(paste(packagepath,"/data/ideogram.RData",sep=""))
+
+ data(ideogram,package="patchworkCG")
+
+ colors_p <- colorRampPalette(c("#6600FF","#9900CC"),space="rgb")
+ colors_q <- colorRampPalette(c("#CC0099","#CC0000"),space="rgb")
+
+ layout(matrix(1:25,nrow=5,byrow=T), widths=1,heights=1)
+
+ aix=ai!=0
+ chr=chr[aix]
+ start=start[aix]
+ end=end[aix]
+ int=int[aix]
+ ai=ai[aix]
+ pos <- (start+end)/2
+ length=end-start
+
+ size=rep(0.5,length(chr))
+ size[length>2000000]=1
+ size[length>5000000]=2
+ size[length>10000000]=3
+
+
+ for (c in 1:24)
+ {
+
+ this <- ideogram[ideogram$c==c,]
+ ix <- chr==this$chr
+ ix[is.na(ix)]=FALSE
+
+ col <- rep('#B0B0B070',length(chr))
+
+ col[ix & (pos < this$mid)] <- paste(colors_p(sum(ix & (pos < this$mid))), '70', sep='')
+ col[ix & (pos > this$mid)] <- paste(colors_q(sum(ix & (pos > this$mid))), '70', sep='')
+
+ plot(c(int[!ix],int[ix]),c(ai[!ix],ai[ix]),
+ pch=16,
+ cex=c(size[!ix],size[ix]),
+ cex.lab=3,
+ mar=c(0.1,0.1,0.1,0.1),
+ main = "",
+ xlab = this$chr,
+ ylab = "",
+ col = c(col[!ix],col[ix]),
+ xlim = xlim,
+ ylim = ylim,
+ yaxt="n",
+ xaxt="n")
+ axis(1,cex.axis=1.5)
+ axis(2,cex.axis=1.5)
+ par(new=F)
+ }
+ dev.off()
+ }
+
+
+
+
+
+
+
Added: pkg/patchworkCG/R/patchwork.AI.r
===================================================================
--- pkg/patchworkCG/R/patchwork.AI.r (rev 0)
+++ pkg/patchworkCG/R/patchwork.AI.r 2012-01-10 16:12:55 UTC (rev 65)
@@ -0,0 +1,20 @@
+patchwork.AI <- function(segs,mastervar)
+ {
+ segs$ai <- segs$snvs <- segs$ratio <- NA
+
+ for (i in 1:nrow(segs))
+ {
+ #Calculate ratio between relcov and segment lengths
+ segs$ratio[i] = segs$relcov[i] / (sum(as.numeric(segs$relcov*(segs$end-segs$start))) / sum(as.numeric(segs$end - segs$start)))
+
+ #Index of correct snp positions to pull snp data from
+ # right chromosome
+ ix = mastervar$chr==as.character(segs$chr[i]) &
+ # right area (between start and end)
+ (mastervar$begin > segs$start[i] & mastervar$begin < segs$end[i])
+ segs$snvs[i] = sum(ix)
+ segs$ai[i] = 1 - sum(mastervar$min[ix]) / sum(mastervar$max[ix])
+ }
+ segs$ai[is.nan(segs$ai)]=0
+ return(segs)
+ }
\ No newline at end of file
Added: pkg/patchworkCG/R/patchwork.CG.r
===================================================================
--- pkg/patchworkCG/R/patchwork.CG.r (rev 0)
+++ pkg/patchworkCG/R/patchwork.CG.r 2012-01-10 16:12:55 UTC (rev 65)
@@ -0,0 +1,135 @@
+patchwork.CG <- function(path,name='CG_sample_',override_file_input = FALSE)
+ {
+
+ #------------------------------------------------#
+ # Read input files
+ #------------------------------------------------#
+
+ if(override_file_input==TRUE)
+ {
+ #Prompt for input of path to your files.
+ cat("ex: ~/user/documents/projekt/masterVarBeta-123-456-ASM-T1-N1.tsv")
+ cat("\n Enter complete path AND filename to your masterVarBeta file:")
+ mv = scan(n=1)
+ cat("\n ex: ~/user/documents/projekt/somaticCnvSegmentsNondiploidBeta-123-456-ASM-T1-N1.tsv")
+ cat("\n Enter complete path AND filename to your somaticCnvSegmentsNondiploidBeta file:")
+ sm = scan(n=1)
+
+ cat("\n Reading files from ASM folder \n")
+
+ #Read masterVarBeta
+ mastervar = read.table(mv, sep="\t",skip=21,
+ colClasses=c("NULL","NULL","character","integer","integer",
+ "NULL","character",rep("NULL",16),"numeric","numeric",
+ rep("NULL",17),"integer","integer",rep("NULL",5)))
+
+ #Read somaticCnvSegmentsNondiploidBeta in ASM/CNV folder
+ segm = read.table(sm,sep="\t",skip=21,
+ colClasses=c("character","integer","integer","numeric","numeric",
+ rep("NULL",7)))
+
+ cat("File input complete \n")
+ }
+ else
+ {
+ mastervar_ = list.files(path=path,pattern="masterVar")
+ path_segm = paste(path,"/CNV",sep="")
+ segm_ = list.files(path=path_segm,pattern="somaticCnvSegmentsNondiploid")
+
+ cat("\n Reading files from ASM folder \n")
+
+ #Read mastervarbeta file in ASM folder
+ mastervar = read.table(paste(path,"/",mastervar_,sep=""),
+ sep="\t",
+ colClasses=c("NULL","NULL","character","integer","integer",
+ "NULL","character",rep("NULL",16),"numeric","numeric",
+ rep("NULL",17),"integer","integer",rep("NULL",5)),
+ skip=21)
+
+ #Read somaticCnvSegmentsNondiploidBeta in ASM/CNV folder
+ segm = read.table(paste(path_segm,"/",segm_,sep=""),
+ sep="\t",skip=21,
+ colClasses=c("character","integer","integer","numeric","numeric",
+ rep("NULL",7)))
+
+ colnames(segm)=c("chr","start","end","avgnormcov","relcov")
+
+ colnames(mastervar)=c("chr","begin","end","vartype","ref_count","tot_count",
+ "ref_countN","tot_countN")
+
+ cat("File input complete \n")
+ }
+
+ #------------------------------------------------#
+ # Filter out unecessary/unwanted values
+ #------------------------------------------------#
+
+ #Remove all rows that have NA values in column 8.
+ mastervar = mastervar[!is.na(mastervar[,8]),]
+
+ #Only keep when referenceallelereadcount/totalreadcount is between 0.2 and 0.8
+ ix = ((mastervar$ref_countN/mastervar$tot_countN) > 0.2 &
+ (mastervar$ref_countN/mastervar$tot_countN) < 0.8)
+
+ mastervar = mastervar[ix,]
+
+ #Remove NA values generated by for example 0/0
+ mastervar = mastervar[!is.na(mastervar[,8]),]
+
+ #only keep stuff in column 4 that is snp
+ #mastervar = mastervar[mastervar$vartype == "snp",]
+ #norm_mv = norm_mv[norm_mv$vartype == "snp",]
+
+ #remove all NA values. (there are no reads on that position)
+ #mastervar = mastervar[!is.na(mastervar[,6]),]
+
+ #ix = mastervar$ref_count < 3 | mastervar$ref_count == 0 | mastervar$ref_count == mastervar$tot_count
+ #mastervar = mastervar[!ix,]
+
+ #ix = mastervar$ref_count == 0
+ #mastervar = mastervar[!ix,]
+
+ #ix = mastervar$ref_count == mastervar$tot_count
+ #mastervar = mastervar[!ix,]
+
+ #------------------------------------------------#
+ # Add and calculate some columns for mastervar
+ #------------------------------------------------#
+
+ cat("Performing calculations \n")
+
+ mastervar$ratio <- mastervar$min <- mastervar$max <- mastervar$mut_count <- NA
+
+ #Calculate and fill min/max columns
+ mastervar$mut_count = mastervar$tot_count - mastervar$ref_count
+ mastervar$max = apply(mastervar[,c(5,9)],1,max)
+ mastervar$min = mastervar$tot_count - mastervar$max
+ mastervar$ratio = mastervar$tot_count / mean(mastervar$tot_count)
+
+ #------------------------------------------------#
+ # Create segs using patchwork.AI, save files
+ #------------------------------------------------#
+
+ segs = patchwork.AI(segm,mastervar)
+
+ cat("Calculations complete \n")
+
+ cat("Saving objects to CG.Rdata \n")
+
+ save(segs,mastervar,file="CG.Rdata")
+
+ #------------------------------------------------#
+ # Plot
+ #------------------------------------------------#
+
+
+ cat("Initiating Plotting \n")
+ CG_karyotype(segs$chr,segs$start,segs$end,segs$ratio,segs$ai,
+ name=name,xlim=c(0,2.4),ylim=c(0,1))
+
+ CG_KaCh(segs$chr,segs$start,segs$end,segs$ratio,segs$ai,
+ mastervar$chr,mastervar$begin,mastervar$ratio,(1 - mastervar$min/mastervar$max),
+ name=name,xlim=c(0,2.4),ylim=c(0,1))
+ cat("Plotting Complete \n")
+ cat("patchwork.CG Complete \n Please read documentation on running patchwork.CGCNV \n")
+ }
\ No newline at end of file
Added: pkg/patchworkCG/R/patchwork.CGCNV.r
===================================================================
--- pkg/patchworkCG/R/patchwork.CGCNV.r (rev 0)
+++ pkg/patchworkCG/R/patchwork.CGCNV.r 2012-01-10 16:12:55 UTC (rev 65)
@@ -0,0 +1,284 @@
+patchwork.CGCNV = function(cn2,delta,het,hom,maxCn=8,ceiling=1,name="CGCNV_")
+ {
+
+ #packagepath = system.file(package="patchwork")
+ #load(paste(packagepath,"/data/commonSnps132.RData",sep=""))
+ #load(paste(packagepath,"/data/ideogram.RData",sep=""))
+ data(ideogram,package="patchworkCG")
+ load("CG.Rdata")
+
+ voidchrom <- c('chrX','chrY') # may add non-integer chroms here....
+
+ tix=NULL #temporary index
+ int=NULL ## contains coverage estimate of each (total) copy number
+ ai=NULL ## contains Allelic Imbalance Ratio estimate of each copy number variant.
+ regions <- segs
+ regions$np <- round((regions$end - regions$start)/10000)
+ regions <- regions[(is.autosome(regions$chr)®ions$np>50)&(!is.nan(regions$ai)),] ## will use these regions
+ regions <- regions[!is.na(regions$ratio),]
+
+ ## likely cn2 regions sit within delta/3 from cn2.
+ expectedAt <- cn2
+ tix$cn2 <- abs(regions$ratio - expectedAt) < (delta/3) ## index of likely cn2 regions
+ temp <- regions[tix$cn2,] ## cn2 regions
+ med <- weightedMedian(temp$median,temp$np) ## improved value of Log-R at cn2 (returns NULL if theres nothing there)
+ int$cn2 <- ifelse(!is.null(med),med,expectedAt) ## saved to int.
+
+ ## likely cn1 regions sit at about cn2 - delta:
+ d <- delta ## the (Log-R) distance to cn1
+ expectedAt <- int$cn2-d ## cn1 is expected here
+ tix$cn1 <- abs(regions$ratio - expectedAt) < (d/3) ## index of likely cn1 regions
+ temp <- regions[tix$cn1,]
+ med <- weightedMedian(temp$median,temp$np)
+ int$cn1 <- ifelse(!is.null(med),med,expectedAt)
+
+ ## likely cn0 regions sit below cn1 - delta:
+ d <- int$cn2-int$cn1
+ expectedAt <- int$cn1-d
+ tix$cn0 <- regions$ratio < expectedAt
+ temp <- regions[tix$cn0,]
+ med <- weightedMedian(temp$median,temp$np)
+ int$cn0 <- ifelse(!is.null(med),med,expectedAt)
+
+ ## likely cn3 regions sit at about cn2+delta
+ d <- delta
+ expectedAt <- int$cn2+d
+ tix$cn3 <- abs(regions$ratio - expectedAt) < (d/3)
+ temp <- regions[tix$cn3,]
+ med <- weightedMedian(temp$median,temp$np)
+ int$cn3 <- ifelse(!is.null(med),med,expectedAt)
+
+ ## cn4 follows at ...
+ d <- delta
+ expectedAt <- int$cn3+d
+ tix$cn4 <- abs(regions$ratio - expectedAt) < (d/4)
+ temp <- regions[tix$cn4,]
+ med <- weightedMedian(temp$median,temp$np)
+ int$cn4 <- ifelse(!is.null(med),med,expectedAt)
+
+ ## generalized for higher cns
+ for (cn in 5:maxCn)
+ {
+ thisCn <- paste('cn',cn,sep='')
+ prevCn <- paste('cn',cn-1,sep='')
+ pprevCn <- paste('cn',cn-2,sep='')
+ d <- delta #dmin*(int[prevCn][[1]]-int[pprevCn][[1]])
+ expectedAt <- int[prevCn][[1]]+d
+ tix[[thisCn]] <- abs(regions$ratio - expectedAt) < (d/5)
+ temp <- regions[tix[thisCn][[1]],]
+ med <- weightedMedian(temp$median,temp$np)
+ int[thisCn] <- ifelse(!is.null(med),med,expectedAt)
+ }
+
+ # smooth the cn relationship ?
+
+ ## at cn2, find the variant clusters (normal and CNNLOH)
+ ix <- (abs(regions$ratio - int$cn2) < 0.2*(int$cn3-int$cn2) ) # taking only closely-matching segments
+ data <- regions[ix,]
+ data <- data[!is.na(data$ai),] # ...with a calculated allelic imbalance.
+
+ expectedAt <- het
+ ix <- 2*abs(data$ai-het) < abs(data$ai-hom)
+ med <- weightedMedian(data$ai[ix],data$snvs[ix])
+ ai$cn2m1 <- ifelse (!is.null(med),med,expectedAt)
+
+ expectedAt <- hom
+ ix <- 2*abs(data$ai-hom) < abs(data$ai-het)
+ med <- weightedMedian(data$ai[ix],data$snvs[ix])
+ ai$cn2m0 <- ifelse (!is.null(med),med,expectedAt)
+
+ ## for cn1 (and 0)
+ ix <- (abs(regions$ratio - int$cn1) < 0.2*(int$cn2-int$cn1) )
+ data <- regions[ix,]
+ data <- data[!is.na(data$ai),]
+ expectedAt <- (ai$cn2m0+ai$cn2m1)*3/5 ## Decent estimate.
+ med <- weightedMedian(data$ai,data$snvs) ## Average allelic imbalance weighted on snp count
+ ai$cn1m0 <- ifelse (!is.null(med),med,expectedAt) ## Will be NA if there was no CNNLOH
+ ai$cn0m0 <- NA #unimportant
+
+ ## for cn3:
+ ix <- (abs(regions$ratio - int$cn3) < 0.2*(int$cn4-int$cn3) )
+ data <- regions[ix,]
+ data <- data[!is.na(data$ai),]
+
+ range <- ai$cn2m0-ai$cn2m1 #the distance between 2normal and CNNLOH
+ # get the 3(1) regions:
+ expectedAt <- ai$cn2m1+range/3 # this is an approx if cn3m1 is absent.
+
+ ix <- (data$ai<ai$cn2m0) & (data$ai>ai$cn2m1) # take regions with less AI than cn2m0 but more than cn2m1
+ med <- weightedMedian(data$ai[ix],data$snvs[ix]) # average allelic imbalance weighted on snp count
+ ai$cn3m1 <- ifelse (!is.null(med),med,expectedAt)
+
+ # now for cn3m0
+ expectedAt <- ai$cn3m1 + range # approx of cn3m0
+ ix <- (abs(data$ai-expectedAt) < (expectedAt-ai$cn3m1)/4) # take those much closer to exp than to cn3m1
+ med <- weightedMedian(data$ai[ix],data$snvs[ix])
+ try (if (med<ai$cn2m0) med <- NULL, silent=T) ## in case of heterogeneities or strange signals, we disallow this effect.
+ ai$cn3m0 <- ifelse (!is.null(med),med,expectedAt)
+
+ if (is.na(ai$cn1m0)) ai$cn1m0 <- mean(ai$cn3m1,ai$cn2m0) ## If deletions were missing, place an estimate from cn3
+
+ ## now for cn4
+ ix <- abs(regions$ratio - int$cn4) < 0.2*(int$cn4-int$cn3)
+ data <- regions[ix,]
+ data <- data[!is.na(data$ai),]
+
+ # get the 4(2) regions: they are at AI about cn2m1
+ expectedAt <- ai$cn2m1 # this is a good approx
+
+ ix <- data$ai<ai$cn3m1 # let all below cn3m1 in
+ med <- weightedMedian(data$ai[ix],data$snvs[ix])
+ ai$cn4m2 <- ifelse (!is.null(med),med,expectedAt)
+ if (ai$cn4m2>ai$cn2m1) ai$cn4m2 <- ai$cn2m1 # sanity check.
+
+ data <- data[!ix,] # coutinue with the remaining
+ # now 4(1) has less ai than 3(0) -> sits at about cn3m1+(cn3m1-cn2m1).
+ expectedAt <- ai$cn3m1+(ai$cn3m1-ai$cn2m1)
+ ix <- abs(data$ai-expectedAt)<abs(data$ai-ai$cn3m0) # take those closer to exp than to 3(0)
+ med <- weightedMedian(data$ai[ix],data$snvs[ix])
+ ai$cn4m1 <- ifelse (!is.null(med),med,expectedAt)
+
+ # now 4(0) is at about 3(0) + [0.9 - 3(0)] * [3(0)-2(0)] / [0.9-2(0)]
+ expectedAt <- ai$cn3m0 + (ceiling-ai$cn3m0)*(ai$cn3m0-ai$cn2m0)/(ceiling-ai$cn2m0)
+ ix <- abs(data$ai-expectedAt) < 0.2*(expectedAt-ai$cn4m2) # we take those very close to exp
+ med <- weightedMedian(data$ai[ix],data$snvs[ix])
+ try (if (med<ai$cn3m0) med <- NULL, silent=T) ## in case of heterogeneities or strange signals, we disallow this effect.
+ ai$cn4m0 <- ifelse (!is.null(med),med,expectedAt)
+
+ ## generalization for higher copy numbers. it gets complicated.
+ for (cn in 5:maxCn) {
+ thisCn <- paste('cn',cn,sep='')
+ prevCn <- paste('cn',cn-1,sep='')
+ pprevCn <- paste('cn',cn-2,sep='')
+
+ ix <- (abs(regions$ratio - int[thisCn][[1]]) < 0.2*(int[thisCn][[1]]-int[prevCn][[1]]) )
+ data <- regions[ix,]
+ data <- data[!is.na(data$ai),]
+ data <- data[data$np>50,] # long regions for safety
+
+ ## try to find variants, starting with LOH
+ # LOH such as 5(0)
+ m <- 0
+ thisVariant=paste(thisCn,'m',0,sep='')
+ c4m0 <- ai[paste(prevCn,'m',m,sep='')][[1]] # relative naming for clarity
+ c3m0 <- ai[paste(pprevCn,'m',m,sep='')][[1]]
+
+ expectedAt <- ceiling-((ceiling-c4m0)*(ceiling-max(c4m0,c3m0))/(ceiling-min(c3m0,c4m0)))
+ #ai[thisVariant] <- expectedAt
+ # we then take those close to exp (within delta[3(0),4(0)])
+ ix <- abs(data$ai-expectedAt) < abs(c4m0 - c3m0)
+ med <- weightedMedian(data$ai[ix],data$snvs[ix])
+ try (if (med<ai$cn4m0) med <- NULL, silent=T) ## in case of heterogeneities or strange signals, we disallow this effect.
+ ai[thisVariant] <- ifelse (!is.null(med),med,expectedAt)
+
+ ## then from balanced to less balanced
+ minorVariants=trunc(cn/2):1
+ first <- T
+ for (m in minorVariants) {
+ thisVariant=paste(thisCn,'m',m,sep='')
+ if (m==cn/2) {
+ # We have balanced variant, so rather easy.
+ expectedAt <- ai[paste(pprevCn,'m',m-1,sep='')][[1]] # this is a good approx, balanced at cn-2
+ ix <- data$ai<ai[paste(prevCn,'m',m-1,sep='')][[1]] # let all below (cn-1, mcn-1) in
+ med <- weightedMedian(data$ai[ix],data$snvs[ix])
+ ai[thisVariant] <- ifelse (!is.null(med),med,expectedAt)
+ if (ai[thisVariant] > ai[paste(pprevCn,'m',m-1,sep='')][[1]]) ai[thisVariant] <- ai[paste(pprevCn,'m',m-1,sep='')][[1]] # sanity check.
+ first <- F
+ } else if (first) {
+ # its not balanced but its the most balanced of the unbalanced. something like 5(2)
+ expectedAt <- 0.5*( ai[paste(prevCn,'m',m,sep='')][[1]] + ai[paste(pprevCn,'m',m-1,sep='')][[1]] ) # that means between 4(2) and 3(1)
+ ix <- abs(data$ai-expectedAt) < ( ai[paste(prevCn,'m',m,sep='')][[1]] - ai[paste(pprevCn,'m',m-1,sep='')][[1]] ) /3 # let all "between" 4(2) and 3(1) in
+ med <- weightedMedian(data$ai[ix],data$snvs[ix])
+ ai[thisVariant] <- ifelse (!is.null(med),med,expectedAt)
+ first <- F
+ } else {
+ # not the most balanced unbalanced variant, for example 5(1):
+ expectedAt <- ai[paste(thisCn,'m',minorVariants[1],sep='')][[1]] + (minorVariants[1]-m) * (ai[paste(thisCn,'m',0,sep='')][[1]] - ai[paste(thisCn,'m',minorVariants[1],sep='')][[1]]) / trunc(cn/2) # 5(2) + (which)* 5(0)-5(2) /(n)
+ ix <- abs(data$ai-expectedAt) < ( ai[paste(prevCn,'m',m,sep='')][[1]] - ai[paste(pprevCn,'m',m-1,sep='')][[1]] ) /3 # let all "between" 4(1) and 3(0) in
+ med <- weightedMedian(data$ai[ix],data$snvs[ix])
+ ai[thisVariant] <- ifelse (!is.null(med),med,expectedAt)
+ }
+ } # done with minor variants
+ } # done with copy numbers
+
+ sampleInfo <- list('int'=int,'ai'=ai)
+
+ ## Assign total and minor copy numbers to all segments.
+ regions <- segs[!is.na(segs$ratio),] ## This time, work on all segments available.
+
+ Cn <- NULL ## Total copy number
+ mCn <- NULL ## Minor allele copy number
+ fullCN <- NULL ## Variant label. ('cnXmY')
+
+ intDist <- NULL ## distance to certain Log-R
+ imbaDist <- NULL ## distance to certain allelic imbalance
+
+
+
+ ## give each segment the best matching CN and mCN
+ for (i in 1:nrow(regions))
+ {
+ # set total copy number
+ distance <- Inf
+ for (cn in 0:maxCn)
+ {
+ t_int <- int[paste('cn',cn,sep='')][[1]] ## get Log-R of particular cn from 'int'
+ t_dis <- abs(regions$ratio[i]-t_int) ## distance to that particular cn
+ if (t_dis < distance)
+ { ## nearest so far, save.
+ distance <- t_dis -> intDist[i]
+ Cn[i] <- cn
+ }
+ }
+ }
+ # Y makes CN0 sit very low, this is a fix on non-Y Cn0
+ #if ((Cn[i] == 1)&(intDist[i] > (int$cn2-int$cn1))) Cn[i] <- 0 ## currently not needed.
+ for (i in 1:nrow(regions))
+ {
+ # set minor CN
+ distance <- Inf
+ if (Cn[i]<=1)
+ {
+ mCn[i] <- 0
+ }
+ else if (!is.na(regions$ai[i]))
+ if (!is.nan(regions$ai[i]))
+ for (m in 0:trunc(Cn[i]/2))
+ {
+ t_ai <- ai[paste('cn',Cn[i],'m',m,sep='')][[1]]
+ t_dis <- abs(regions$ai[i]-t_ai)
+ if (t_dis < distance)
+ {
+ distance <- t_dis -> imbaDist[i]
+ mCn[i] <- m
+ }
+ }
+ else
+ mCn[i] <- NA
+ fullCN[i] <- paste('cn',Cn[i],'m',mCn[i],sep='') # Full description
+ }
+
+ regions$Cn <- Cn
+ regions$mCn <- mCn
+ regions$fullCN <- fullCN
+
+ write.csv(regions,file='Copynumbers.csv')
+
+ meanCn <- weightedMean(regions$Cn,regions$np)
+ ix1 <- regions$Cn==trunc(meanCn)
+ ix2 <- regions$Cn==ceiling(meanCn)
+ xdelta <- weightedMean(regions$ratio[ix2],regions$np[ix2]) - weightedMean(regions$ratio[ix1],regions$np[ix1])
+ expected_delta <- 1/meanCn
+
+ tumor_percent <- xdelta / expected_delta
+
+ CG_Ka_check(regions$chr,regions$start,regions$end,regions$ratio,regions$ai,
+ regions$Cn,regions$mCn,list(int=int,ai=ai),name=name,
+ xlim=c(0,2.4),ylim=c(0,1))
+
+ CG_KaChCN(regions$chr,regions$start,regions$end,regions$ratio,regions$ai,
+ regions$Cn,regions$mCn,mastervar$chr,
+ mastervar$begin,mastervar$ratio,
+ (1 - mastervar$min/mastervar$max),name=name,xlim=c(0,2.4),
+ ylim=c(0,1))
+ }
\ No newline at end of file
Added: pkg/patchworkCG/R/subfunctions_CGCNV.r
===================================================================
--- pkg/patchworkCG/R/subfunctions_CGCNV.r (rev 0)
+++ pkg/patchworkCG/R/subfunctions_CGCNV.r 2012-01-10 16:12:55 UTC (rev 65)
@@ -0,0 +1,50 @@
+###################### functions
+weightedMedian <- function(data,weights) {
+ ix <- rep(T,length(data))
+ if (length(ix)==0) return(NULL)
+ try( ix <- !is.na(data+weights) & !is.nan(data+weights), silent=T)
+ try( return (median(rep(data[ix],ceiling(weights[ix])))), silent=T)
+ return (NULL)
+}
+weightedMean <- function(data,weights) {
+ ix <- rep(T,length(data))
+ if (length(ix)==0) return(NULL)
+ try( ix <- !is.na(data+weights) & !is.nan(data+weights), silent=T)
+ try( return (mean(rep(data[ix],ceiling(weights[ix])))), silent=T)
+ return (NULL)
+}
+is.autosome <- function(vector) {
+ temp <- deChrom_ucsc(vector)
+ return (temp<23)
+}
+deChrom_ucsc <- function(data) {
+## chroms in 1:24 and chr1:chrY format conversion
+if (length(data)==0) return (data)
+keep_index=NULL
+CHR=rep(0,length(data))
+existing=unique(data)
+for (i in 1:length(existing)) {
+ temp=strsplit(as.character(existing[i]),'_')
[TRUNCATED]
To get the complete diff run:
svnlook diff /svnroot/patchwork -r 65
More information about the Patchwork-commits
mailing list