From noreply at r-forge.r-project.org Wed Nov 6 13:33:03 2013 From: noreply at r-forge.r-project.org (noreply at r-forge.r-project.org) Date: Wed, 6 Nov 2013 13:33:03 +0100 (CET) Subject: [Patchwork-commits] r197 - .git .git/logs .git/logs/refs/heads .git/logs/refs/remotes/origin .git/refs/heads .git/refs/remotes/origin pkg/TAPS/R pkg/TAPS/man www Message-ID: <20131106123303.31542184724@r-forge.r-project.org> Author: sebastian_d Date: 2013-11-06 13:33:02 +0100 (Wed, 06 Nov 2013) New Revision: 197 Modified: .git/COMMIT_EDITMSG .git/index .git/logs/HEAD .git/logs/refs/heads/master .git/logs/refs/remotes/origin/master .git/refs/heads/master .git/refs/remotes/origin/master pkg/TAPS/R/TAPS.r pkg/TAPS/man/TAPS_call.Rd pkg/TAPS/man/TAPS_plot.Rd www/TAPS_exec.php Log: remove xlsx Modified: .git/COMMIT_EDITMSG =================================================================== --- .git/COMMIT_EDITMSG 2013-10-21 11:08:15 UTC (rev 196) +++ .git/COMMIT_EDITMSG 2013-11-06 12:33:02 UTC (rev 197) @@ -1 +1 @@ -small update to TAPS_plot +merga och ta bort xlsx Modified: .git/index =================================================================== (Binary files differ) Modified: .git/logs/HEAD =================================================================== --- .git/logs/HEAD 2013-10-21 11:08:15 UTC (rev 196) +++ .git/logs/HEAD 2013-11-06 12:33:02 UTC (rev 197) @@ -71,3 +71,5 @@ 2347ca003cf8ce861e1d4ec1dc0f5c96a0ac3071 85d1e6948ab98a6bd2c58f5dc820740c198a5444 Sebastian DiLorenzo 1380028151 +0200 pull : Fast-forward 85d1e6948ab98a6bd2c58f5dc820740c198a5444 0044e7bf9b5beb763bd5f47e54c8b778999eede2 Sebastian DiLorenzo 1381839717 +0200 commit: Update to TAPS call homepage by markus 0044e7bf9b5beb763bd5f47e54c8b778999eede2 64da542e6e940d1ec28e8b99c82be94c86a2e3fd Sebastian DiLorenzo 1382000849 +0200 commit: small update to TAPS_plot +64da542e6e940d1ec28e8b99c82be94c86a2e3fd 5ef4202a68e5b1b59947297330a5175922c464ae Sebastian DiLorenzo 1383741292 +0100 commit: merga och ta bort xlsx +5ef4202a68e5b1b59947297330a5175922c464ae 6af27c3b92782d6bad79f2b4ff84cbaf8db0ac7e Sebastian DiLorenzo 1383741298 +0100 pull: Merge made by the 'recursive' strategy. Modified: .git/logs/refs/heads/master =================================================================== --- .git/logs/refs/heads/master 2013-10-21 11:08:15 UTC (rev 196) +++ .git/logs/refs/heads/master 2013-11-06 12:33:02 UTC (rev 197) @@ -71,3 +71,5 @@ 2347ca003cf8ce861e1d4ec1dc0f5c96a0ac3071 85d1e6948ab98a6bd2c58f5dc820740c198a5444 Sebastian DiLorenzo 1380028151 +0200 pull : Fast-forward 85d1e6948ab98a6bd2c58f5dc820740c198a5444 0044e7bf9b5beb763bd5f47e54c8b778999eede2 Sebastian DiLorenzo 1381839717 +0200 commit: Update to TAPS call homepage by markus 0044e7bf9b5beb763bd5f47e54c8b778999eede2 64da542e6e940d1ec28e8b99c82be94c86a2e3fd Sebastian DiLorenzo 1382000849 +0200 commit: small update to TAPS_plot +64da542e6e940d1ec28e8b99c82be94c86a2e3fd 5ef4202a68e5b1b59947297330a5175922c464ae Sebastian DiLorenzo 1383741292 +0100 commit: merga och ta bort xlsx +5ef4202a68e5b1b59947297330a5175922c464ae 6af27c3b92782d6bad79f2b4ff84cbaf8db0ac7e Sebastian DiLorenzo 1383741298 +0100 pull: Merge made by the 'recursive' strategy. Modified: .git/logs/refs/remotes/origin/master =================================================================== --- .git/logs/refs/remotes/origin/master 2013-10-21 11:08:15 UTC (rev 196) +++ .git/logs/refs/remotes/origin/master 2013-11-06 12:33:02 UTC (rev 197) @@ -69,3 +69,4 @@ 2347ca003cf8ce861e1d4ec1dc0f5c96a0ac3071 85d1e6948ab98a6bd2c58f5dc820740c198a5444 Sebastian DiLorenzo 1380028151 +0200 pull : fast-forward 85d1e6948ab98a6bd2c58f5dc820740c198a5444 0044e7bf9b5beb763bd5f47e54c8b778999eede2 Sebastian DiLorenzo 1381839752 +0200 update by push 0044e7bf9b5beb763bd5f47e54c8b778999eede2 64da542e6e940d1ec28e8b99c82be94c86a2e3fd Sebastian DiLorenzo 1382000861 +0200 update by push +64da542e6e940d1ec28e8b99c82be94c86a2e3fd 261f0569cd0eb75824fa8da974f7e7c74232a16a Sebastian DiLorenzo 1383741141 +0100 pull: fast-forward Modified: .git/refs/heads/master =================================================================== --- .git/refs/heads/master 2013-10-21 11:08:15 UTC (rev 196) +++ .git/refs/heads/master 2013-11-06 12:33:02 UTC (rev 197) @@ -1 +1 @@ -64da542e6e940d1ec28e8b99c82be94c86a2e3fd +6af27c3b92782d6bad79f2b4ff84cbaf8db0ac7e Modified: .git/refs/remotes/origin/master =================================================================== --- .git/refs/remotes/origin/master 2013-10-21 11:08:15 UTC (rev 196) +++ .git/refs/remotes/origin/master 2013-11-06 12:33:02 UTC (rev 197) @@ -1 +1 @@ -64da542e6e940d1ec28e8b99c82be94c86a2e3fd +261f0569cd0eb75824fa8da974f7e7c74232a16a Modified: pkg/TAPS/R/TAPS.r =================================================================== --- pkg/TAPS/R/TAPS.r 2013-10-21 11:08:15 UTC (rev 196) +++ pkg/TAPS/R/TAPS.r 2013-11-06 12:33:02 UTC (rev 197) @@ -16,7 +16,7 @@ TAPS_plot <- function(#samples='all', - directory=NULL,#xlim=c(-1,2),ylim=c(0,1), + directory=NULL,autoEstimate=FALSE, bin=400) { #Automatically check, and if needed install, packages stats and fields @@ -54,18 +54,18 @@ } # create SampleData file if there is none. - if (length(grep('SampleData.xlsx',dir()))==0) { + if (length(grep('SampleData.csv',dir()))==0) { if(!is.null(subsToSampleData)) { - sampleData <- data.frame(Sample=subsToSampleData,cn1= -0.5, cn2=0, cn3=NA, loh=0.7, MAPD=NA, MHOF=NA) + sampleData <- data.frame(Sample=subsToSampleData,cn1= NA, cn2=NA, cn3=NA, loh=NA, MAPD=NA, MHOF=NA) } else { - sampleData <- data.frame(Sample=subs,cn1= -0.5, cn2=0, cn3=NA, loh=0.7, MAPD=NA, MHOF=NA) + sampleData <- data.frame(Sample=subs,cn1= NA, cn2=NA, cn3=NA, loh=NA, MAPD=NA, MHOF=NA) } - write.xlsx(sampleData,'SampleData.xlsx',row.names=F) + save.txt(sampleData,'SampleData.csv') } else { - sampleData=read.xlsx('SampleData.xlsx',1) + sampleData=load.txt('SampleData.csv') } #if (samples[1]=='all') samples=rep(T,length(subs)) @@ -214,10 +214,27 @@ as.character(Log2$Chromosome),Log2$Start,Log2$Value,as.character(alf$Chromosome),alf$Start, alf$Value,name=name,MAPD=MAPD,MHOF=MHOF) + ## Finally add estimates if needed: + e=NULL + if (is.logical(autoEstimate)) { + if (length(autoEstimate)>1) { + if (autoEstimate[i]) e=getEstimates(regs$logs,regs$scores) + } else if (autoEstimate[1]) e=getEstimates(regs$logs,regs$scores) + } else if (is.numeric(autoEstimate)) { + if (i %in% autoEstimate) e=getEstimates(regs$logs,regs$scores) + } else if (is.character(autoEstimate)) if (name %in% autoEstimate) + e=getEstimates(regs$logs,regs$scores) + if (!is.null(e)) { + sampleData$cn1=e[1] + sampleData$cn2=e[2] + sampleData$cn3=e[3] + sampleData$loh=e[4] + } + cat('..done\n') setwd('..') } - write.xlsx(sampleData,'SampleData.xlsx',row.names=F) + write.txt(sampleData,'SampleData.csv') } ### @@ -235,6 +252,7 @@ maxCn=12 suppressPackageStartupMessages(library(xlsx)) + ## TAPS_call outputs the total and minor allele copy numbers of all segments as a text file, and as images for visual confirmation. ## sampleInfo_TAPS.txt must be present in each sample folder. If TAPS_plot could not make a good guess of the Log-R of copy number 2 ## and the Log-R difference to a deletion, you must interpret the scatter plots and edit sampleInfo_TAPS.txt. @@ -250,13 +268,23 @@ setwd(directory) #subs <- getSubdirs() - if (length(grep('SampleData.xlsx',dir()))==1) + if (length(grep('SampleData.csv',dir()))==0) { - sampleData=read.xlsx('SampleData.xlsx',1) + if (length(grep('SampleData.xlsx',dir()))==1) + { + sampleData=read.xlsx('SampleData.xlsx',1) + save.txt(sampleData,'SampleData.csv') + } + sampleData=load.txt('SampleData.csv') } + + if (length(grep('SampleData.csv',dir()))==1) + { + sampleData=load.txt('SampleData.csv') + } else { - sampleData <- read.xlsx('../SampleData.xlsx',1) + sampleData <- load.txt('../SampleData.csv') } subs=as.character(sampleData$Sample) @@ -273,15 +301,15 @@ for (i in 1:length(subs)) { setwd(subs[i]) name <- subs[i] - sampleInfo <- sampleData[sampleData$Sample==subs[i],2:5] - if (nrow(sampleInfo)==1) { + sampleInfo <- sampleData[i,c('cn1','cn2','cn3','loh')] + if (nrow(sampleInfo)==1) if (sum(is.na(sampleInfo))<4) { cat(' ..loading', subs[i]) Log2 <- readLog2() alf <- readAlf(localDir) segments <- readSegments() - #Some samples throw NA values, we simply remove these. + #Some samples contain NA values, we simply remove these. Log2=Log2[!is.nan(Log2$Value),] Log2=Log2[!is.na(Log2$Value),] @@ -683,7 +711,7 @@ ## the Log-R and Allelic Imbalance Ratio of all the segments. if (is.null(sampleInfo)) cat ('there was no estimation available for',name) - cns=1:maxCn; est=sampleInfo[1:3]; est[est==' ']=NA; est=as.numeric(est) + cns=1:maxCn; est=sampleInfo[1:3]; est[est==' ']=NA; est=as.numeric(as.character(est)) m <- lm(2^est ~ cns[1:3])$coefficients # can handle one NA in est. This model is for "ratio as a function of copy number". est[is.na(est)] = log2(m[1]+cns[which(is.na(est))]*m[2]) # simple linear regression to fill the missing @@ -933,10 +961,10 @@ for (cn in 0:maxCn) { t_int <- int[[paste('cn',cn,sep='')]] ## get Log-R of particular cn from 'int' t_dis <- abs(regions$log2[i]-t_int) ## distance to that particular cn - if (t_dis < distance) { ## nearest so far, save. + try (if (t_dis < distance) { ## nearest so far, save. distance <- t_dis -> intDist[i] Cn[i] <- cn - } + }, silent=T) } } @@ -956,10 +984,10 @@ t_ai <- ai[paste('cn',Cn[i],'m',m,sep='')][[1]] t_dis <- abs(regions$imba[i]-t_ai) #cat('Line',i,'Cn', Cn[i],'m:',m,'Comparing',t_dis,'with',t_int,'.......') - if (t_dis < distance) { + try( if (t_dis < distance) { distance <- t_dis -> imbaDist[i] mCn[i] <- m - } + }, silent=T) } else mCn[i] <- NA #fullCN[i] <- paste('cn',Cn[i],'m',mCn[i],sep='') # Full description } @@ -1344,7 +1372,7 @@ suppressPackageStartupMessages(library(xlsx)) - sampleData <- read.xlsx('SampleData.xlsx',1) + sampleData <- load.txt('SampleData.txt') olddir <- getwd() if (!is.na(outdir)) { try(dir.create(outdir), silent=T) @@ -1507,7 +1535,7 @@ suppressPackageStartupMessages(library(xlsx)) - sampleData=read.xlsx('SampleData.xlsx',1) + sampleData=load.txt('SampleData.txt') subs=as.character(sampleData$Sample) @@ -1597,7 +1625,7 @@ comparison='', name1='1', name2='2', color='#000000') { - + p_cutoff <- 0.05 freq_cutoff <- 0 @@ -2628,11 +2656,11 @@ #Check if name of sample is too long to be graphically pleasing and cut it if that is the case if(nchar(name)>12) { - mtext(paste("Detailed view with copynumbers of sample: ",substring(name,1,12),"\n Chromsome ",c,sep=""),side=3) + mtext(paste("Sample: ",substring(name,1,12),"\n Chromsome ",c,sep=""),side=3) } else { - mtext(paste("Detailed view with copynumbers of sample: ",name,"\n Chromsome ",c,sep=""),side=3) + mtext(paste("Sample: ",name,"\n Chromsome ",c,sep=""),side=3) } #------------------------------------------------------------ @@ -3062,7 +3090,7 @@ #Titles,date/time and axis labels mtext(text="log-ratio",side=1,line=1.1,cex=1) mtext(text="Allelic imbalance",side=2,line=1.5,cex=1) - mtext(paste("Detailed view of sample: ",name,"\n Chromosome ",c,", Region: ",Rstart,"-",Rend,sep=""),side=3) + mtext(paste("Sample: ",name,"\n Chromosome ",c,", Region: ",Rstart,"-",Rend,sep=""),side=3) #------------------------------------------------------------ #Top Right - Signal @@ -3441,7 +3469,70 @@ +getEstimates <- function(logR, imba) { + #load('shortRegions.Rdata') + #logR=allRegions$regions$log2[allRegions$regions$lengthMB>5] + #imba=allRegions$regions$imba[allRegions$regions$lengthMB>5] + #logR=regs$logs + #imba=regs$scores + + # Find the max point, that is likely an integer copy number. + d=density(logR[abs(logR)<0.15],na.rm=T) + max=d$x[order(d$y,decreasing=T)][1] + + # Find optimal logR's for integer copy numbers + R=2^logR-2^max + deltas=seq(0.05,0.5,0.01) # Allow a delta-ratio of 5% to 50% + n=length(deltas) + dev=rep(NA,n) + for (i in 1:n) { + delta=deltas[i] + mod=R %% delta + mod[mod>(delta/2)]=delta-mod[mod>(delta/2)] + dev[i]=mean(mod) + } + + best=deltas[dev/deltas==min(dev/deltas)] + + # guess the copy number at "max" + imbas=imba[abs(R)0 & diff(d$y[2:n])<0) + ## remove crappy maxes: + maxes=maxes[d$y[maxes] > 0.05*max(d$y[maxes])] + + cn=3 + if (d$x[maxes][1]<0.15) # even copy number + cn=2 + if (length(maxes)>2) + if (min(diff(maxes))/max(diff(maxes))>0.7) + cn=4 + if (length(maxes)==1) + if (d$x[maxes]>0.35) + cn=1 + + cn1=log2(2^max-(cn-1)*best) + try(cn1 <- median(logR[abs(logR-cn1)<0.1]),silent=T) + cn2=log2(2^max-(cn-2)*best) + try(cn2 <- median(logR[abs(logR-cn2)<0.1]),silent=T) + cn3=log2(2^max-(cn-3)*best) + try(cn3 <- median(logR[abs(logR-cn3)<0.1]),silent=T) + + R2=2^cn2 + imba2=imba[abs(2^logR-R2)0 & diff(d$y[2:n])<0) + ## remove crappy maxes: + maxes=maxes[d$y[maxes] > 0.05*max(d$y[maxes])] + if (length(maxes)==1) { + loh=0.75 + } else loh=d$x[maxes[length(maxes)]] + return(round(c(cn1,cn2,cn3,loh),2)) +} @@ -3492,3 +3583,5 @@ + + Modified: pkg/TAPS/man/TAPS_call.Rd =================================================================== --- pkg/TAPS/man/TAPS_call.Rd 2013-10-21 11:08:15 UTC (rev 196) +++ pkg/TAPS/man/TAPS_call.Rd 2013-11-06 12:33:02 UTC (rev 197) @@ -28,7 +28,7 @@ \details{ TAPS_call is the second step in the TAPS analysis, to be run after TAPS_plot. It requires: \cr -1) Your interpretation of plots which you add to the SampleData.xlsx file: \cr +1) Your interpretation of plots which you add to the SampleData.csv file: \cr cn1-cn3: The log-ratio of copy number 1-3 (in the main clone), any two or three of these values are required. \cr loh: The allelic imbalance of copy number 2 LOH. \cr (Default values are fine for fairly pure, near-diploid samples) \cr Modified: pkg/TAPS/man/TAPS_plot.Rd =================================================================== --- pkg/TAPS/man/TAPS_plot.Rd 2013-10-21 11:08:15 UTC (rev 196) +++ pkg/TAPS/man/TAPS_plot.Rd 2013-11-06 12:33:02 UTC (rev 197) @@ -68,9 +68,10 @@ Workflow:\cr 1. From the folder containing your samples (sample folders) run TAPS_plot(). \cr 2. Investigate the scatter plots generated in your sample folders. \cr -3. To proceed with copy number calls, find and open the file "SampleData.txt". \cr -4. For each sample, enter an interpretation of Log-Ratio @ copy number 2 ("cn2"), the difference in - Log-Ratio to a deletion ("delta") and the allelic imbalance ratio of CNNLOH ("loh"). Save the file. \cr +3. To proceed with copy number calls, find and open the file "SampleData.csv". \cr +4. For each sample, enter an interpretation of Log-Ratio @ copy number 1-3 ("cn1-3", + two+ of the three values are required) and the allelic imbalance ratio of LOH and + cn2 ("loh"). Save the file. \cr 5. Run TAPS_call(). \cr 6. Inspect the karyotype_check images, and the new chromosome-wise images. \cr 7. If all looks reasonable, you will find good copy number estimates in 'Copynumbers.csv'. \cr Modified: www/TAPS_exec.php =================================================================== --- www/TAPS_exec.php 2013-10-21 11:08:15 UTC (rev 196) +++ www/TAPS_exec.php 2013-11-06 12:33:02 UTC (rev 197) @@ -100,7 +100,7 @@ This will have generated one "<samplename>_overview.jpeg" in the main directory (should there be one), several plots and .Rdata files in the sample folder and a file named -simply "SampleData.xlsx" which will be covered shortly.

+simply "SampleData.csv" which will be covered shortly.


@@ -116,18 +116,18 @@

TAPS_call() also has "directory" as its main argument, the parameters are read from the file -"SampleData.xlsx". If you desire to execute TAPS_call() on many samples, add the arguments to the -main directory SampleData.xlsx. If you want to execute TAPS_call() on just one sample, add the arguments -for that sample to SampleData.xlsx and specify that sample using the "directory" argument of TAPS_call(). +"SampleData.csv". If you desire to execute TAPS_call() on many samples, add the arguments to the +main directory SampleData.csv. If you want to execute TAPS_call() on just one sample, add the arguments +for that sample to SampleData.csv and specify that sample using the "directory" argument of TAPS_call().

-An example run of TAPS_call(), where "mainsamplefolder" contains SampleData.xlsx:
+An example run of TAPS_call(), where "mainsamplefolder" contains SampleData.csv:
 	TAPS_call(directory="path/to/mainsamplefolder")
 
-To infer the arguments for SampleData.xlsx that TAPS_call() uses you will need to look at one of +To infer the arguments for SampleData.csv that TAPS_call() uses you will need to look at one of the chromosomal plots generated using TAPS_plot(). The structure and relationships in the plot can be interpreted to figure out the most probable locations of the allele-specific copynumbers.