[Patchwork-commits] r189 - .git .git/logs .git/logs/refs/heads .git/logs/refs/remotes/origin .git/refs/heads .git/refs/remotes/origin pkg/TAPS pkg/TAPS/R pkg/TAPS/man pkg/patchwork pkg/patchwork/R pkg/patchwork/man www
noreply at r-forge.r-project.org
noreply at r-forge.r-project.org
Wed Sep 11 14:49:44 CEST 2013
Author: sebastian_d
Date: 2013-09-11 14:49:43 +0200 (Wed, 11 Sep 2013)
New Revision: 189
Added:
pkg/TAPS/man/TAPS_compare.Rd
pkg/TAPS/man/TAPS_freq.Rd
Modified:
.git/COMMIT_EDITMSG
.git/index
.git/logs/HEAD
.git/logs/refs/heads/master
.git/logs/refs/remotes/origin/master
.git/refs/heads/master
.git/refs/remotes/origin/master
pkg/TAPS/DESCRIPTION
pkg/TAPS/NAMESPACE
pkg/TAPS/R/TAPS.r
pkg/TAPS/R/zzz.r
pkg/TAPS/man/TAPS_call.Rd
pkg/patchwork/DESCRIPTION
pkg/patchwork/NAMESPACE
pkg/patchwork/R/karyotype.r
pkg/patchwork/R/karyotype_check.r
pkg/patchwork/R/karyotype_chroms.r
pkg/patchwork/R/karyotype_chromsCN.r
pkg/patchwork/R/patchwork.alleledata.r
pkg/patchwork/R/patchwork.copynumbers.r
pkg/patchwork/R/patchwork.plot.r
pkg/patchwork/man/karyotype.Rd
pkg/patchwork/man/karyotype_chroms.Rd
pkg/patchwork/man/karyotype_chromsCN.Rd
www/TAPS_inst.php
www/changelog.php
Log:
Extensive changes to patchwork plotting. New functions, TAPS_compare and TAPS_freq, to TAPS
Modified: .git/COMMIT_EDITMSG
===================================================================
--- .git/COMMIT_EDITMSG 2013-09-05 07:39:10 UTC (rev 188)
+++ .git/COMMIT_EDITMSG 2013-09-11 12:49:43 UTC (rev 189)
@@ -1 +1 @@
-homepage info update pysam
+some updates to taps homepage info
Modified: .git/index
===================================================================
(Binary files differ)
Modified: .git/logs/HEAD
===================================================================
--- .git/logs/HEAD 2013-09-05 07:39:10 UTC (rev 188)
+++ .git/logs/HEAD 2013-09-11 12:49:43 UTC (rev 189)
@@ -58,3 +58,10 @@
5a2afd158f70d21ed8bf6a45f61a19dbbb6b79c2 c08551d489e8bc28692c600dfffa7e7d4cb37ca5 Sebastian DiLorenzo <dilorenzo.sebastian at gmail.com> 1372256038 +0200 commit: Move pysam out of package
c08551d489e8bc28692c600dfffa7e7d4cb37ca5 b617a63c7595a2cb1b2a85e2ed73a67eccac8b32 Sebastian DiLorenzo <dilorenzo.sebastian at gmail.com> 1372346578 +0200 commit: homepage info update pysam
b617a63c7595a2cb1b2a85e2ed73a67eccac8b32 f626f1a4b4965df8008d349712318cedf6dde9eb Sebastian DiLorenzo <dilorenzo.sebastian at gmail.com> 1372427760 +0200 pull : Fast-forward
+f626f1a4b4965df8008d349712318cedf6dde9eb 9327100077803d89d70ede58fe98f0949fe726e8 Sebastian DiLorenzo <dilorenzo.sebastian at gmail.com> 1376640199 +0200 commit: Updated plotting procedures significantly of patchwork.
+9327100077803d89d70ede58fe98f0949fe726e8 43c186b570d94002d0576a8f805be76267cf2b6a Sebastian DiLorenzo <dilorenzo.sebastian at gmail.com> 1376644958 +0200 commit: deleted two unused pngs
+43c186b570d94002d0576a8f805be76267cf2b6a d63069fedd7b5d39e37a64432b37a149711b9b21 Sebastian DiLorenzo <dilorenzo.sebastian at gmail.com> 1377264167 +0200 commit: some changes to pw_requ to avoid missunderstandings
+d63069fedd7b5d39e37a64432b37a149711b9b21 74262096bdcb8c53bcf9f4719ff9ae6b23565262 Sebastian DiLorenzo <dilorenzo.sebastian at gmail.com> 1378892982 +0200 commit: some updates to taps homepage info
+74262096bdcb8c53bcf9f4719ff9ae6b23565262 884409fb1057881cbdbd2db1c86c85d72bf477c6 Sebastian DiLorenzo <dilorenzo.sebastian at gmail.com> 1378894727 +0200 pull : Fast-forward
+884409fb1057881cbdbd2db1c86c85d72bf477c6 7375bc63263a290cbfb537af8c38af2df6ba0f0c Sebastian DiLorenzo <dilorenzo.sebastian at gmail.com> 1378902806 +0200 pull : Fast-forward
+7375bc63263a290cbfb537af8c38af2df6ba0f0c 6f560c07eeb2bc5a9c449af56829257933ebcf87 Sebastian DiLorenzo <dilorenzo.sebastian at gmail.com> 1378903472 +0200 pull : Fast-forward
Modified: .git/logs/refs/heads/master
===================================================================
--- .git/logs/refs/heads/master 2013-09-05 07:39:10 UTC (rev 188)
+++ .git/logs/refs/heads/master 2013-09-11 12:49:43 UTC (rev 189)
@@ -58,3 +58,10 @@
5a2afd158f70d21ed8bf6a45f61a19dbbb6b79c2 c08551d489e8bc28692c600dfffa7e7d4cb37ca5 Sebastian DiLorenzo <dilorenzo.sebastian at gmail.com> 1372256038 +0200 commit: Move pysam out of package
c08551d489e8bc28692c600dfffa7e7d4cb37ca5 b617a63c7595a2cb1b2a85e2ed73a67eccac8b32 Sebastian DiLorenzo <dilorenzo.sebastian at gmail.com> 1372346578 +0200 commit: homepage info update pysam
b617a63c7595a2cb1b2a85e2ed73a67eccac8b32 f626f1a4b4965df8008d349712318cedf6dde9eb Sebastian DiLorenzo <dilorenzo.sebastian at gmail.com> 1372427760 +0200 pull : Fast-forward
+f626f1a4b4965df8008d349712318cedf6dde9eb 9327100077803d89d70ede58fe98f0949fe726e8 Sebastian DiLorenzo <dilorenzo.sebastian at gmail.com> 1376640199 +0200 commit: Updated plotting procedures significantly of patchwork.
+9327100077803d89d70ede58fe98f0949fe726e8 43c186b570d94002d0576a8f805be76267cf2b6a Sebastian DiLorenzo <dilorenzo.sebastian at gmail.com> 1376644958 +0200 commit: deleted two unused pngs
+43c186b570d94002d0576a8f805be76267cf2b6a d63069fedd7b5d39e37a64432b37a149711b9b21 Sebastian DiLorenzo <dilorenzo.sebastian at gmail.com> 1377264167 +0200 commit: some changes to pw_requ to avoid missunderstandings
+d63069fedd7b5d39e37a64432b37a149711b9b21 74262096bdcb8c53bcf9f4719ff9ae6b23565262 Sebastian DiLorenzo <dilorenzo.sebastian at gmail.com> 1378892982 +0200 commit: some updates to taps homepage info
+74262096bdcb8c53bcf9f4719ff9ae6b23565262 884409fb1057881cbdbd2db1c86c85d72bf477c6 Sebastian DiLorenzo <dilorenzo.sebastian at gmail.com> 1378894727 +0200 pull : Fast-forward
+884409fb1057881cbdbd2db1c86c85d72bf477c6 7375bc63263a290cbfb537af8c38af2df6ba0f0c Sebastian DiLorenzo <dilorenzo.sebastian at gmail.com> 1378902806 +0200 pull : Fast-forward
+7375bc63263a290cbfb537af8c38af2df6ba0f0c 6f560c07eeb2bc5a9c449af56829257933ebcf87 Sebastian DiLorenzo <dilorenzo.sebastian at gmail.com> 1378903472 +0200 pull : Fast-forward
Modified: .git/logs/refs/remotes/origin/master
===================================================================
--- .git/logs/refs/remotes/origin/master 2013-09-05 07:39:10 UTC (rev 188)
+++ .git/logs/refs/remotes/origin/master 2013-09-11 12:49:43 UTC (rev 189)
@@ -56,3 +56,10 @@
5a2afd158f70d21ed8bf6a45f61a19dbbb6b79c2 c08551d489e8bc28692c600dfffa7e7d4cb37ca5 Sebastian DiLorenzo <dilorenzo.sebastian at gmail.com> 1372256058 +0200 update by push
c08551d489e8bc28692c600dfffa7e7d4cb37ca5 b617a63c7595a2cb1b2a85e2ed73a67eccac8b32 Sebastian DiLorenzo <dilorenzo.sebastian at gmail.com> 1372346594 +0200 update by push
b617a63c7595a2cb1b2a85e2ed73a67eccac8b32 f626f1a4b4965df8008d349712318cedf6dde9eb Sebastian DiLorenzo <S_D at imv096.medsci.uu.se> 1372427760 +0200 pull : fast-forward
+f626f1a4b4965df8008d349712318cedf6dde9eb 9327100077803d89d70ede58fe98f0949fe726e8 Sebastian DiLorenzo <dilorenzo.sebastian at gmail.com> 1376640235 +0200 update by push
+9327100077803d89d70ede58fe98f0949fe726e8 43c186b570d94002d0576a8f805be76267cf2b6a Sebastian DiLorenzo <dilorenzo.sebastian at gmail.com> 1376644972 +0200 update by push
+43c186b570d94002d0576a8f805be76267cf2b6a d63069fedd7b5d39e37a64432b37a149711b9b21 Sebastian DiLorenzo <dilorenzo.sebastian at gmail.com> 1377264183 +0200 update by push
+d63069fedd7b5d39e37a64432b37a149711b9b21 74262096bdcb8c53bcf9f4719ff9ae6b23565262 Sebastian DiLorenzo <dilorenzo.sebastian at gmail.com> 1378893000 +0200 update by push
+74262096bdcb8c53bcf9f4719ff9ae6b23565262 884409fb1057881cbdbd2db1c86c85d72bf477c6 Sebastian DiLorenzo <S_D at imv092.medsci.uu.se> 1378894674 +0200 fetch: fast-forward
+884409fb1057881cbdbd2db1c86c85d72bf477c6 7375bc63263a290cbfb537af8c38af2df6ba0f0c Sebastian DiLorenzo <S_D at imv092.medsci.uu.se> 1378902806 +0200 pull : fast-forward
+7375bc63263a290cbfb537af8c38af2df6ba0f0c 6f560c07eeb2bc5a9c449af56829257933ebcf87 Sebastian DiLorenzo <S_D at imv092.medsci.uu.se> 1378903403 +0200 pull : fast-forward
Modified: .git/refs/heads/master
===================================================================
--- .git/refs/heads/master 2013-09-05 07:39:10 UTC (rev 188)
+++ .git/refs/heads/master 2013-09-11 12:49:43 UTC (rev 189)
@@ -1 +1 @@
-f626f1a4b4965df8008d349712318cedf6dde9eb
+6f560c07eeb2bc5a9c449af56829257933ebcf87
Modified: .git/refs/remotes/origin/master
===================================================================
--- .git/refs/remotes/origin/master 2013-09-05 07:39:10 UTC (rev 188)
+++ .git/refs/remotes/origin/master 2013-09-11 12:49:43 UTC (rev 189)
@@ -1 +1 @@
-f626f1a4b4965df8008d349712318cedf6dde9eb
+6f560c07eeb2bc5a9c449af56829257933ebcf87
Modified: pkg/TAPS/DESCRIPTION
===================================================================
--- pkg/TAPS/DESCRIPTION 2013-09-05 07:39:10 UTC (rev 188)
+++ pkg/TAPS/DESCRIPTION 2013-09-11 12:49:43 UTC (rev 189)
@@ -1,8 +1,8 @@
Package: TAPS
Type: Package
Title: Tumor Abberation Prediction Suite
-Version: 1.0
-Date: 2013-05-11
+Version: 2.0
+Date: 2013-09-10
Author: Markus Mayrhofer, Hanna Goransson-Kultima, Sebastian DiLorenzo
Maintainer: Sebastian DiLorenzo <Sebastian.DiLorenzo at medsci.uu.se> #Markus Mayrhofer <Markus.Mayrhofer at medsci.uu.se>
Description: Performs a allele-specific copy number analysis of array data.
Modified: pkg/TAPS/NAMESPACE
===================================================================
--- pkg/TAPS/NAMESPACE 2013-09-05 07:39:10 UTC (rev 188)
+++ pkg/TAPS/NAMESPACE 2013-09-11 12:49:43 UTC (rev 189)
@@ -1,5 +1,7 @@
export(TAPS_plot,
TAPS_call,
- TAPS_region)
+ TAPS_region,
+ TAPS_freq,
+ TAPS_compare)
Modified: pkg/TAPS/R/TAPS.r
===================================================================
--- pkg/TAPS/R/TAPS.r 2013-09-05 07:39:10 UTC (rev 188)
+++ pkg/TAPS/R/TAPS.r 2013-09-11 12:49:43 UTC (rev 189)
@@ -16,6 +16,7 @@
suppressPackageStartupMessages(library(stats))
suppressPackageStartupMessages(library(DNAcopy))
suppressPackageStartupMessages(library(fields))
+ suppressPackageStartupMessages(library(xlsx))
#list.of.packages <- c("stats", "fields")
@@ -34,23 +35,25 @@
## This function takes a directory as input, then builds short-segment TAPS scatter plots for each sample (subdirectory) in the directory.
setwd(directory)
subs <- getSubdirs()
+ subs=subs[subs!='frequencies' & subs!='frequencies_comp']
if (is.null(subs)) { ## check samples = subdirectories or a single sample = current directory
subs=thisSubdir()
setwd('..')
}
- # create SampleData file if there is none.
- if (length(grep('SampleData.txt',dir()))==0) {
- sampleData=data.frame(Sample=subs,cn2='',delta='',loh='', MAPD=NA, MHOF=NA, calculate.copynumbers='yes')
+ # create SampleData file if there is none.
+ if (length(grep('SampleData.xlsx',dir()))==0) {
+ sampleData <- data.frame(Sample=subs,cn1= -0.5, cn2=0, cn3=NA, loh=0.7, MAPD=NA, MHOF=NA)
+ write.xlsx(sampleData,'SampleData.xlsx',row.names=F)
} else {
- sampleData=load.txt('SampleData.txt')
+ sampleData=read.xlsx('SampleData.xlsx',1)
}
for (i in 1:length(subs)) {
setwd(subs[i])
name <- subs[i]
- #if (length(grep('SampleData.txt',dir()))==0) save.txt(data.frame(Sample=name,cn2='',delta='',loh='',completed.analysis='no'),file='SampleData.txt')
+ #if (length(grep('sampleData.csv',dir()))==0) save.txt(data.frame(Sample=name,cn2='',delta='',loh='',completed.analysis='no'),file='sampleData.csv')
cat(' ..loading', subs[i])
if(length(grep("*.cyhd.cychp",dir()))==1) ##cyhd sample
@@ -137,13 +140,13 @@
save.txt(segments,'_segments.txt')
}
- segments$Value <- segments$Value-median(Log2$Value) ## Median-centering
- Log2$Value <- Log2$Value-median(Log2$Value) ## Median-centering
+ segments$Value <- segments$Value-mean(Log2$Value) ## Median-centering
+ Log2$Value <- Log2$Value-mean(Log2$Value) ## Median-centering
- allRegions=NULL; try(load('allRegions.Rdata'),silent=T)
+ allRegions=NULL; if ('allRegions.Rdata' %in% dir()) load('allRegions.Rdata')
if (is.null(allRegions)) allRegions <- makeRegions(Log2, alf, segments) ## Calculates necessary data for segments (all functions are in this file)
save(allRegions,file='allRegions.Rdata')
- regs=NULL; try(load('shortRegions.Rdata'),silent=T)
+ regs=NULL; if ('shortRegions.Rdata' %in% dir()) load('shortRegions.Rdata')
if (is.null(regs)) {
regs <- regsFromSegs(Log2,alf,segments,bin=bin,min=5) ## Calculates the same data for shortened segments
save(regs,file='shortRegions.Rdata')
@@ -151,14 +154,14 @@
## Sample QC
sampleData$MAPD[i] <- MAPD <- round(median(abs(diff(Log2$Value[Log2$Chromosome=='chr1'][order(Log2$Start[Log2$Chromosome=='chr1'])]))),2)
- sampleData$MHOF[i] <- MHOF <- round(100*median(0.5+abs(0.5-alf$Value)),1)
+ sampleData$MHOF[i] <- MHOF <- round(100*median(0.5+abs(0.5-alf$Value[alf$Chromosome!='chrX'])),1)
#round(100*median(0.5+regs$hom[regs$scores<0.5],na.rm=T),1)
#MAID=round(median(abs(diff(regs$scores[!is.na(regs$scores)]))),3)
#Save for TAPS_region()
save(regs,Log2,alf,segments,file="TAPS_plot_output.Rdata")
- save.txt(sampleData,file='../SampleData.txt')
+ #save.txt(sampleData,file='../sampleData.csv')
#Clear warnings generated previously so hopefully I can see what is actually causing the program to fail.
#assign("last.warning", NULL, envir = baseenv())
@@ -179,12 +182,16 @@
cat('..done\n')
setwd('..')
}
+ write.xlsx(sampleData,'SampleData.xlsx',row.names=F)
}
###
###
-TAPS_call <- function(directory=NULL,#xlim=c(-1,1),ylim=c(0,1),
- minseg=1,maxCn=12) {
+TAPS_call <- function(samples='all',directory=getwd()) {
+ minseg=1
+ maxCn=12
+ suppressPackageStartupMessages(library(xlsx))
+
## TAPS_call outputs the total and minor allele copy numbers of all segments as a text file, and as images for visual confirmation.
## sampleInfo_TAPS.txt must be present in each sample folder. If TAPS_plot could not make a good guess of the Log-R of copy number 2
## and the Log-R difference to a deletion, you must interpret the scatter plots and edit sampleInfo_TAPS.txt.
@@ -197,28 +204,33 @@
#directory = readline("Please supply such a directory now: ")
}
-
setwd(directory)
#subs <- getSubdirs()
- if (length(grep('SampleData.txt',dir()))==1)
+ if (length(grep('SampleData.xlsx',dir()))==1)
{
- sampleData <- load.txt('SampleData.txt')
+ sampleData=read.xlsx('SampleData.xlsx',1)
}
else
{
- sampleData <- load.txt('../SampleData.txt')
+ sampleData <- read.xlsx('../SampleData.xlsx',1)
}
subs=as.character(sampleData$Sample)
if (is.null(subs)) {
subs=thisSubdir()
+ subs=subs[subs!='frequencies' & subs!='frequencies_comp']
setwd('..')
}
- for (i in 1:length(subs)) if (sampleData$calculate.copynumbers[i]=='yes') {
+
+ if (samples[1]=='all') samples=rep(T,length(subs))
+ if (is.logical(samples)) samples=which(samples)
+ subs=subs[samples]
+
+ for (i in 1:length(subs)) {
setwd(subs[i])
name <- subs[i]
- sampleInfo <- sampleData[sampleData$Sample==subs[i],]
+ sampleInfo <- sampleData[sampleData$Sample==subs[i],2:5]
if (nrow(sampleInfo)==1) {
cat(' ..loading', subs[i])
@@ -236,38 +248,47 @@
segments <- segments[!is.nan(segments$Value),]
segments <- segments[!is.na(segments$Value),]
- segments$Value <- segments$Value-median(Log2$Value)
- Log2$Value <- Log2$Value-median(Log2$Value)
+ segments$Value <- segments$Value-mean(Log2$Value)
+ Log2$Value <- Log2$Value-mean(Log2$Value)
cat(' ..processing.\n')
load('allRegions.Rdata') ## These were prepared in TAPS_plot
+ load('shortRegions.Rdata')
#allRegions <- makeRegions(Log2, alf, segments)
## estimates the Log-R and Allelic Imbalance Ration of all variants up to maxCn
- t <- findCNs(Log2,alf,allRegions,dmin=0.9,maxCn=maxCn,ceiling=1,sampleInfo=sampleInfo)
+ t <- findCNs(Log2,alf,allRegions,regs,name,maxCn=maxCn,ceiling=1,sampleInfo=sampleInfo)
+ save(t,file='t.Rdata')
- u <- setCNs(allRegions,t$int,t$ai,maxCn) ## Assigns copy number variant for all segments
+ u <- setCNs(allRegions,t$int,t$ai,t$model,maxCn) ## Assigns copy number variant for all segments
allRegions$regions <- u$regions
## adjacent segments with idendical copy number are merged (except over centromere) and all are saved to a text file
save.txt(u$regions,file=paste(name,'_segmentCN.txt',sep=''))
regions=allRegions$regions
+<<<<<<< HEAD
+ #save(u$model,file="model.Rdata")
+ write.table(t(as.data.frame(u$model)),file='model.txt',row.names=T)
+=======
save(t,regions,file="regions_t.Rdata")
+
+ #save parameters as strings
+ parameters=paste("Parameters given: cn2:",sampleInfo$cn2," delta:",sampleInfo$delta," loh:",sampleInfo$loh)
+>>>>>>> d63069fedd7b5d39e37a64432b37a149711b9b21
karyotype_check(regions$Chromosome,regions$Start,regions$End,regions$log2,regions$imba,regions$Cn,regions$mCn,t,ideogram=NULL,name=name)
karyotype_chromsCN(regions$Chromosome,regions$Start,regions$End,regions$log2,
regions$imba,regions$Cn,regions$mCn,ideogram=NULL,
as.character(Log2$Chromosome),Log2$Start,Log2$Value,as.character(alf$Chromosome),
- alf$Start,alf$Value,t,name=name,xlim=c(-1,1),ylim=c(0,1))
+ alf$Start,alf$Value,t,name=name,xlim=c(-1,1),ylim=c(0,1),parameters=parameters)
cat('..done\n')
- sampleData$completed.analysis[i] <- ''
} else cat('Skipped',name,'\n')
setwd('..')
}
- save.txt(sampleData,file='SampleData.txt')
+ #save.txt(sampleData,file='sampleData.csv')
}
###
regsFromSegs <- function (Log2,alf, segments, bin=200,min=1) {
@@ -602,96 +623,109 @@
##
###
-findCNs <- function(Log2,alf,allRegions,name=thisSubdir(),dmin=0.9,maxCn=10,ceiling=1,sampleInfo=NULL) {
- ## This function takes an estimate of the Log-R of copy number two (shift) and the difference in log-R between copy numbers 2 and 1 (delta)
- ## (3 and 2 works too). Then, the Log-R and Allelic Imbalance Ratio of all possible copy number variants up to maxCn are estimated from
- ## the Log-R and Allelic Imbalance Ratio of all the segments. This function will NOT be useful unless there is already a solid estimate
- ## of 'shift' and 'delta'. See the previous function.
+findCNs <- function(Log2,alf,allRegions,regs,name=thisSubdir(),maxCn=10,ceiling=1,sampleInfo=NULL) {
+ ## This function takes an estimate of the Log-R of copy numbers 1, 2 and 3. At least two of these should be entered.
+ ## Then, the Log-R and Allelic Imbalance Ratio of all possible copy number variants up to maxCn are estimated from
+ ## the Log-R and Allelic Imbalance Ratio of all the segments.
+ if (is.null(sampleInfo)) cat ('there was no estimation available for',name)
- shift=sampleInfo$cn2
- delta=sampleInfo$delta
+ cns=1:maxCn; est=sampleInfo[1:3]; est[est==' ']=NA; est=as.numeric(est)
+ m <- lm(2^est ~ cns[1:3])$coefficients # can handle one NA in est. This model is for "ratio as a function of copy number".
+ est[is.na(est)] = log2(m[1]+cns[which(is.na(est))]*m[2]) # simple linear regression to fill the missing
tix=NULL #temporary index
+ probes=NULL #number of probes at each copy number, for weighting
int=NULL ## contains Log-R estimate of each (total) copy number
ai=NULL ## contains Allelic Imbalance Ratio estimate of each copy number variant.
regions <- allRegions$regions
regions <- regions[(is.autosome(regions$Chromosome)®ions$lengthMB>1)&(!is.na(regions$imba)),] ## will use these regions
- ## likely cn2 regions sit within delta/3 from shift.
- expectedAt <- shift
- tix$cn2 <- abs(regions$log2 - expectedAt) < (delta/3) ## index of likely cn2 regions
+ ## likely cn2 regions sit near the estimate.
+ expectedAt <- est[2]
+ tix$cn2 <- abs(regions$log2 - expectedAt) < diff(est)[2]/3 ## index of likely cn2 regions
temp <- regions[tix$cn2,] ## cn2 regions
med <- weightedMedian(temp$log2,temp$probes) ## improved value of Log-R at cn2 (returns NULL if theres nothing there)
- int$cn2 <- ifelse(!is.null(med),med,expectedAt) ## saved to int.
+ probes[2] <- sum(temp$probes)
+ int[2] <- ifelse(!is.null(med),med,expectedAt) ## saved to int.
- ## likely cn1 regions sit at about cn2 - delta:
- d <- delta ## the (Log-R) distance to cn1
- expectedAt <- int$cn2-d ## cn1 is expected here
- tix$cn1 <- abs(regions$log2 - expectedAt) < (d/3) ## index of likely cn1 regions
+ ## likely cn1 regions sit near estimate
+ expectedAt <- est[1] ## cn1 is expected here
+ tix$cn1 <- abs(regions$log2 - expectedAt) < diff(est)[1]/3 ## index of likely cn1 regions
temp <- regions[tix$cn1,]
med <- weightedMedian(temp$log2,temp$probes)
- int$cn1 <- ifelse(!is.null(med),med,expectedAt)
+ probes[1] <- sum(temp$probes)
+ int[1] <- ifelse(!is.null(med),med,expectedAt)
- ## likely cn0 regions sit below cn1 - delta:
- d <- int$cn2-int$cn1
- expectedAt <- int$cn1-d
- tix$cn0 <- regions$log2 < expectedAt
- temp <- regions[tix$cn0,]
- med <- weightedMedian(temp$log2,temp$probes)
- int$cn0 <- ifelse(!is.null(med),med,expectedAt)
-
- ## likely cn3 regions sit at about cn2+delta*dmin (dmin is about 0.9, the factor by which the distance between consecutive copy numbers diminish)
- d <- delta*dmin
- expectedAt <- int$cn2+d
- tix$cn3 <- abs(regions$log2 - expectedAt) < (d/3)
+ ## likely cn3 regions sit near estimate
+ expectedAt <- est[3]
+ tix$cn3 <- abs(regions$log2 - expectedAt) < diff(est)[2]/3
temp <- regions[tix$cn3,]
med <- weightedMedian(temp$log2,temp$probes)
- int$cn3 <- ifelse(!is.null(med),med,expectedAt)
+ probes[3] <- sum(temp$probes)
+ int[3] <- ifelse(!is.null(med),med,expectedAt)
## cn4 follows at ...
- d <- dmin*(int$cn3-int$cn2)
- expectedAt <- int$cn3+d
- tix$cn4 <- abs(regions$log2 - expectedAt) < (d/4)
+ m <- lm(2^int[1:3] ~ cns[1:3], weights=probes[1:3])$coefficients ## use regression to estimate.
+ expectedAt <- log2(m[1]+4*m[2])
+ tix$cn4 <- abs(regions$log2 - expectedAt) < mean(diff(int))/3
temp <- regions[tix$cn4,]
med <- weightedMedian(temp$log2,temp$probes)
- int$cn4 <- ifelse(!is.null(med),med,expectedAt)
+ probes[4] <- sum(temp$probes)
+ int[4] <- ifelse(!is.null(med),med,expectedAt)
## generalized for higher cns
for (cn in 5:maxCn) {
thisCn <- paste('cn',cn,sep='')
prevCn <- paste('cn',cn-1,sep='')
pprevCn <- paste('cn',cn-2,sep='')
- d <- dmin*(int[prevCn][[1]]-int[pprevCn][[1]])
- expectedAt <- int[prevCn][[1]]+d
- tix[[thisCn]] <- abs(regions$log2 - expectedAt) < (d/5)
+ m <- lm(2^int[1:(cn-1)] ~ cns[1:(cn-1)], weights=probes[1:(cn-1)])$coefficients
+ expectedAt <- log2(m[1]+cn*m[2])
+ tix[[thisCn]] <- abs(regions$log2 - expectedAt) < mean(diff(int))/5
temp <- regions[tix[thisCn][[1]],]
med <- weightedMedian(temp$log2,temp$probes)
- int[thisCn] <- ifelse(!is.null(med),med,expectedAt)
+ probes[cn] <- sum(temp$probes)
+ int[cn] <- ifelse(!is.null(med),med,expectedAt)
}
+ ## likely cn0 regions sit below cn1 - delta:
+ expectedAt <- log2(m[1]+0*m[2])
+ tix$cn0 <- abs(regions$log2 - expectedAt) < 0.5*(int[2]-int[1])
+ temp <- regions[tix$cn0,]
+ med <- weightedMedian(temp$log2,temp$probes)
+ int0 <- ifelse(!is.null(med),med,expectedAt) ## "int0"
+
+ ## Estimate tumor dna content from intensity-cn relationship and average ploidy --UNRELIABLE
+ md <- lm(2^int ~ cns, weights=probes)$coefficients
+ m=NULL; m$intercept=md[1][[1]]; m$k=md[2][[1]]
+ probes[is.na(probes)]=0
+ m$meanCn <- mean(rep(cns, probes), na.rm=T)
+ m$theoretical_delta=1/m$meanCn
+ #m$real_delta=0.57*m$theoretical_delta ## The 0.57 is empirical from cancer cell lines
+ #m$dnafrac=m$k/m$real_delta
+ #m$cellfrac=1/(1+m$meanCn/2*(1/m$dnafrac-1))
+
+
+
+ loh_exp <- as.numeric(sampleInfo[4])
## at cn2, find the variant clusters (normal and CNNLOH)
- ix <- (abs(regions$log2 - int$cn2) < 0.2*(int$cn3-int$cn2) ) # taking only closely-matching segments
+ ix <- (abs(regions$log2 - int[2]) < 0.2*(int[3]-int[2]) ) # taking only closely-matching segments
data <- regions[ix,]
data <- data[!is.na(data$imba),] # ...with a calculated allelic imbalance.
-
- ix <- (abs(regions$log2 - int$cn3) < 0.2*(int$cn4-int$cn3) )
- data3 <- regions[ix,]
- data3 <- data[!is.na(data$imba),]
- expectedAt <- 0.1
- ix <- data$imba<min(data3$imba)
+ ## 2m1
+ expectedAt=0.1
+ ix <- abs(data$imba-.1) < abs(data$imba-loh_exp)
med <- weightedMedian(data$imba[ix],data$snps[ix]) ## Average allelic imbalance weighted on snp count
ai$cn2m1 <- ifelse (!is.null(med),med,expectedAt)
-
- expectedAt <- sampleInfo$loh
- ix <- abs(data$imba-sampleInfo$loh) < 0.2* (sampleInfo$loh-ai$cn2m1)
+ ## loh
+ expextedAt=loh_exp
+ ix <- !ix
med <- weightedMedian(data$imba[ix],data$snps[ix])
ai$cn2m0 <- ifelse (!is.null(med),med,expectedAt)
-
## for cn1 (and 0)
- ix <- (abs(regions$log2 - int$cn1) < 0.2*(int$cn2-int$cn1) )
+ ix <- (abs(regions$log2 - int[1]) < 0.2*(int[2]-int[1]) )
data <- regions[ix,]
data <- data[!is.na(data$imba),]
expectedAt <- (ai$cn2m0+ai$cn2m1)*3/5 ## Decent estimate.
@@ -700,7 +734,7 @@
ai$cn0m0 <- NA #unimportant
## for cn3:
- ix <- (abs(regions$log2 - int$cn3) < 0.2*(int$cn4-int$cn3) )
+ ix <- (abs(regions$log2 - int[3]) < 0.2*(int[4]-int[3]) )
data <- regions[ix,]
data <- data[!is.na(data$imba),]
@@ -714,7 +748,7 @@
ai$cn3m1 <- ifelse (!is.null(med),med,expectedAt)
# now for cn3m0
- expectedAt <- ai$cn3m1 + range*dmin # approx of cn3m0
+ expectedAt <- ai$cn3m1 + range*0.9 # approx of cn3m0
ix <- (abs(data$imba-expectedAt) / abs(data$imba-ai$cn3m1)) < 0.5 # take those much closer to exp than to cn3m1
med <- weightedMedian(data$imba[ix],data$snps[ix])
@@ -737,7 +771,7 @@
if (is.na(ai$cn1m0)) ai$cn1m0 <- (ai$cn3m1+ai$cn2m0)/2 ## If deletions were missing, place an estimate from cn3
## now for cn4
- ix <- abs(regions$log2 - int$cn4) < 0.2*(int$cn4-int$cn3)
+ ix <- abs(regions$log2 - int[4]) < 0.2*(int[4]-int[3])
data <- regions[ix,]
data <- data[!is.na(data$imba),]
@@ -769,17 +803,17 @@
prevCn <- paste('cn',cn-1,sep='')
pprevCn <- paste('cn',cn-2,sep='')
- ix <- (abs(regions$log2 - int[thisCn][[1]]) < 0.2*(int[thisCn][[1]]-int[prevCn][[1]]) )
+ ix <- (abs(regions$log2 - int[cn]) < 0.2*(int[cn]-int[cn-1]) )
data <- regions[ix,]
data <- data[!is.na(data$imba),]
data <- data[data$lengthMB>3,] # long regions for safety
## try to find variants, starting with LOH
# LOH such as 5(0)
- m <- 0
+ mi <- 0
thisVariant=paste(thisCn,'m',0,sep='')
- c4m0 <- ai[paste(prevCn,'m',m,sep='')][[1]] # relative naming for clarity
- c3m0 <- ai[paste(pprevCn,'m',m,sep='')][[1]]
+ c4m0 <- ai[paste(prevCn,'m',mi,sep='')][[1]] # relative naming for clarity
+ c3m0 <- ai[paste(pprevCn,'m',mi,sep='')][[1]]
expectedAt <- ceiling-((ceiling-c4m0)*(ceiling-max(c4m0,c3m0))/(ceiling-min(c3m0,c4m0)))
#ai[thisVariant] <- expectedAt
@@ -792,44 +826,48 @@
## then from balanced to less balanced
minorVariants=trunc(cn/2):1
first <- T
- for (m in minorVariants) {
- thisVariant=paste(thisCn,'m',m,sep='')
- if (m==cn/2) {
+ for (mi in minorVariants) {
+ thisVariant=paste(thisCn,'m',mi,sep='')
+ if (mi==cn/2) {
# We have balanced variant, so rather easy.
- expectedAt <- ai[paste(pprevCn,'m',m-1,sep='')][[1]] # this is a good approx, balanced at cn-2
- ix <- data$imba<ai[paste(prevCn,'m',m-1,sep='')][[1]] # let all below (cn-1, mcn-1) in
+ expectedAt <- ai[paste(pprevCn,'m',mi-1,sep='')][[1]] # this is a good approx, balanced at cn-2
+ ix <- data$imba<ai[paste(prevCn,'m',mi-1,sep='')][[1]] # let all below (cn-1, mcn-1) in
med <- weightedMedian(data$imba[ix],data$snps[ix])
ai[thisVariant] <- ifelse (!is.null(med),med,expectedAt)
- if (ai[thisVariant] > ai[paste(pprevCn,'m',m-1,sep='')][[1]]) ai[thisVariant] <- ai[paste(pprevCn,'m',m-1,sep='')][[1]] # don't let it sneak off.'
+ if (ai[thisVariant] > ai[paste(pprevCn,'m',mi-1,sep='')][[1]]) ai[thisVariant] <- ai[paste(pprevCn,'m',mi-1,sep='')][[1]] # don't let it sneak off.'
first <- F
} else if (first) {
# its not balanced but its the most balanced of the unbalanced. something like 5(2)
- expectedAt <- 0.5*( ai[paste(prevCn,'m',m,sep='')][[1]] + ai[paste(pprevCn,'m',m-1,sep='')][[1]] ) # that means between 4(2) and 3(1)
- ix <- abs(data$imba-expectedAt) < ( ai[paste(prevCn,'m',m,sep='')][[1]] - ai[paste(pprevCn,'m',m-1,sep='')][[1]] ) /3 # let all "between" 4(2) and 3(1) in
+ expectedAt <- 0.5*( ai[paste(prevCn,'m',mi,sep='')][[1]] + ai[paste(pprevCn,'m',mi-1,sep='')][[1]] ) # that means between 4(2) and 3(1)
+ ix <- abs(data$imba-expectedAt) < ( ai[paste(prevCn,'m',mi,sep='')][[1]] - ai[paste(pprevCn,'m',mi-1,sep='')][[1]] ) /3 # let all "between" 4(2) and 3(1) in
med <- weightedMedian(data$imba[ix],data$snps[ix])
ai[thisVariant] <- ifelse (!is.null(med),med,expectedAt)
first <- F
} else {
# not the most balanced unbalanced variant, for example 5(1):
expectedAt <- ai[paste(thisCn,'m',minorVariants[1],sep='')][[1]] +
- (minorVariants[1]-m) * (ai[paste(thisCn,'m',0,sep='')][[1]] - ai[paste(thisCn,'m',minorVariants[1],sep='')][[1]]) / trunc(cn/2) # 5(2) + (which)* 5(0)-5(2) /(n)
- ix <- abs(data$imba-expectedAt) < ( ai[paste(prevCn,'m',m,sep='')][[1]] - ai[paste(pprevCn,'m',m-1,sep='')][[1]] ) /3 # let all "between" 4(1) and 3(0) in
+ (minorVariants[1]-mi) * (ai[paste(thisCn,'m',0,sep='')][[1]] -
+ ai[paste(thisCn,'m',minorVariants[1],sep='')][[1]]) / trunc(cn/2) # 5(2) + (which)* 5(0)-5(2) /(n)
+ ix <- abs(data$imba-expectedAt) < ( ai[paste(prevCn,'m',mi,sep='')][[1]] - ai[paste(pprevCn,'m',mi-1,sep='')][[1]] ) /3 # let all "between" 4(1) and 3(0) in
med <- weightedMedian(data$imba[ix],data$snps[ix])
ai[thisVariant] <- ifelse (!is.null(med),med,expectedAt)
}
} # done with minor variants
} # done with copy numbers
-
- return(list('int'=int,'ai'=ai))
+ int=as.list(c(int0,int)); names(int)=paste('cn',0:maxCn,sep='')
+ return(list('int'=int,'ai'=ai,'model'=m))
}
+
+
+
###
-setCNs <- function(allRegions,int,ai,maxCn=12) {
+setCNs <- function(allRegions,int,ai,model,maxCn=12) {
## Assign total and minor copy numbers to all segments.
regions <- allRegions$regions[,-4] ## This time, work on all segments available.
Cn <- NULL ## Total copy number
mCn <- NULL ## Minor allele copy number
- fullCN <- NULL ## Variant label. ('cnXmY')
+ Cnx <- NULL ## Variant label. ('cnXmY')
intDist <- NULL ## distance to certain Log-R
imbaDist <- NULL ## distance to certain allelic imbalance
@@ -839,48 +877,58 @@
# set total copy number
distance <- Inf
for (cn in 0:maxCn) {
- t_int <- int[paste('cn',cn,sep='')][[1]] ## get Log-R of particular cn from 'int'
+ t_int <- int[[paste('cn',cn,sep='')]] ## get Log-R of particular cn from 'int'
t_dis <- abs(regions$log2[i]-t_int) ## distance to that particular cn
if (t_dis < distance) { ## nearest so far, save.
distance <- t_dis -> intDist[i]
Cn[i] <- cn
}
}
-
- # Y makes CN0 sit very low, this is a fix on non-Y Cn0
- #if ((Cn[i] == 1)&(intDist[i] > (int$cn2-int$cn1))) Cn[i] <- 0 ## currently not needed.
-
+ }
+
+ ### Calculate model based Cns
+ Cnx=Cn; for (cn in 0:maxCn) {
+ Cnx[Cn==cn]=cn + (2^regions$log2[Cn==cn]-2^int[[paste('cn',cn,sep='')]])/model$k
+ }; Cnx[Cnx<0]=0
+ Cn=round(Cnx)
+
+ ## Set minor CN
+ for (i in 1:nrow(regions)) {
# set minor CN
distance <- Inf
if (Cn[i]<=1) {
mCn[i] <- 0
- } else if (!is.na(regions$imba[i])) for (m in 0:trunc(Cn[i]/2)) {
+ } else if (Cn[i]<=maxCn & !is.na(regions$imba[i])) for (m in 0:trunc(Cn[i]/2)) {
t_ai <- ai[paste('cn',Cn[i],'m',m,sep='')][[1]]
t_dis <- abs(regions$imba[i]-t_ai)
[TRUNCATED]
To get the complete diff run:
svnlook diff /svnroot/patchwork -r 189
More information about the Patchwork-commits
mailing list