[Patchwork-commits] r197 - .git .git/logs .git/logs/refs/heads .git/logs/refs/remotes/origin .git/refs/heads .git/refs/remotes/origin pkg/TAPS/R pkg/TAPS/man www

noreply at r-forge.r-project.org noreply at r-forge.r-project.org
Wed Nov 6 13:33:03 CET 2013


Author: sebastian_d
Date: 2013-11-06 13:33:02 +0100 (Wed, 06 Nov 2013)
New Revision: 197

Modified:
   .git/COMMIT_EDITMSG
   .git/index
   .git/logs/HEAD
   .git/logs/refs/heads/master
   .git/logs/refs/remotes/origin/master
   .git/refs/heads/master
   .git/refs/remotes/origin/master
   pkg/TAPS/R/TAPS.r
   pkg/TAPS/man/TAPS_call.Rd
   pkg/TAPS/man/TAPS_plot.Rd
   www/TAPS_exec.php
Log:
remove xlsx

Modified: .git/COMMIT_EDITMSG
===================================================================
--- .git/COMMIT_EDITMSG	2013-10-21 11:08:15 UTC (rev 196)
+++ .git/COMMIT_EDITMSG	2013-11-06 12:33:02 UTC (rev 197)
@@ -1 +1 @@
-small update to TAPS_plot
+merga och ta bort xlsx

Modified: .git/index
===================================================================
(Binary files differ)

Modified: .git/logs/HEAD
===================================================================
--- .git/logs/HEAD	2013-10-21 11:08:15 UTC (rev 196)
+++ .git/logs/HEAD	2013-11-06 12:33:02 UTC (rev 197)
@@ -71,3 +71,5 @@
 2347ca003cf8ce861e1d4ec1dc0f5c96a0ac3071 85d1e6948ab98a6bd2c58f5dc820740c198a5444 Sebastian DiLorenzo <dilorenzo.sebastian at gmail.com> 1380028151 +0200	pull : Fast-forward
 85d1e6948ab98a6bd2c58f5dc820740c198a5444 0044e7bf9b5beb763bd5f47e54c8b778999eede2 Sebastian DiLorenzo <dilorenzo.sebastian at gmail.com> 1381839717 +0200	commit: Update to TAPS call homepage by markus
 0044e7bf9b5beb763bd5f47e54c8b778999eede2 64da542e6e940d1ec28e8b99c82be94c86a2e3fd Sebastian DiLorenzo <dilorenzo.sebastian at gmail.com> 1382000849 +0200	commit: small update to TAPS_plot
+64da542e6e940d1ec28e8b99c82be94c86a2e3fd 5ef4202a68e5b1b59947297330a5175922c464ae Sebastian DiLorenzo <dilorenzo.sebastian at gmail.com> 1383741292 +0100	commit: merga och ta bort xlsx
+5ef4202a68e5b1b59947297330a5175922c464ae 6af27c3b92782d6bad79f2b4ff84cbaf8db0ac7e Sebastian DiLorenzo <dilorenzo.sebastian at gmail.com> 1383741298 +0100	pull: Merge made by the 'recursive' strategy.

Modified: .git/logs/refs/heads/master
===================================================================
--- .git/logs/refs/heads/master	2013-10-21 11:08:15 UTC (rev 196)
+++ .git/logs/refs/heads/master	2013-11-06 12:33:02 UTC (rev 197)
@@ -71,3 +71,5 @@
 2347ca003cf8ce861e1d4ec1dc0f5c96a0ac3071 85d1e6948ab98a6bd2c58f5dc820740c198a5444 Sebastian DiLorenzo <dilorenzo.sebastian at gmail.com> 1380028151 +0200	pull : Fast-forward
 85d1e6948ab98a6bd2c58f5dc820740c198a5444 0044e7bf9b5beb763bd5f47e54c8b778999eede2 Sebastian DiLorenzo <dilorenzo.sebastian at gmail.com> 1381839717 +0200	commit: Update to TAPS call homepage by markus
 0044e7bf9b5beb763bd5f47e54c8b778999eede2 64da542e6e940d1ec28e8b99c82be94c86a2e3fd Sebastian DiLorenzo <dilorenzo.sebastian at gmail.com> 1382000849 +0200	commit: small update to TAPS_plot
+64da542e6e940d1ec28e8b99c82be94c86a2e3fd 5ef4202a68e5b1b59947297330a5175922c464ae Sebastian DiLorenzo <dilorenzo.sebastian at gmail.com> 1383741292 +0100	commit: merga och ta bort xlsx
+5ef4202a68e5b1b59947297330a5175922c464ae 6af27c3b92782d6bad79f2b4ff84cbaf8db0ac7e Sebastian DiLorenzo <dilorenzo.sebastian at gmail.com> 1383741298 +0100	pull: Merge made by the 'recursive' strategy.

Modified: .git/logs/refs/remotes/origin/master
===================================================================
--- .git/logs/refs/remotes/origin/master	2013-10-21 11:08:15 UTC (rev 196)
+++ .git/logs/refs/remotes/origin/master	2013-11-06 12:33:02 UTC (rev 197)
@@ -69,3 +69,4 @@
 2347ca003cf8ce861e1d4ec1dc0f5c96a0ac3071 85d1e6948ab98a6bd2c58f5dc820740c198a5444 Sebastian DiLorenzo <S_D at imv092.medsci.uu.se> 1380028151 +0200	pull : fast-forward
 85d1e6948ab98a6bd2c58f5dc820740c198a5444 0044e7bf9b5beb763bd5f47e54c8b778999eede2 Sebastian DiLorenzo <dilorenzo.sebastian at gmail.com> 1381839752 +0200	update by push
 0044e7bf9b5beb763bd5f47e54c8b778999eede2 64da542e6e940d1ec28e8b99c82be94c86a2e3fd Sebastian DiLorenzo <dilorenzo.sebastian at gmail.com> 1382000861 +0200	update by push
+64da542e6e940d1ec28e8b99c82be94c86a2e3fd 261f0569cd0eb75824fa8da974f7e7c74232a16a Sebastian DiLorenzo <S_D at imv091.medsci.uu.se> 1383741141 +0100	pull: fast-forward

Modified: .git/refs/heads/master
===================================================================
--- .git/refs/heads/master	2013-10-21 11:08:15 UTC (rev 196)
+++ .git/refs/heads/master	2013-11-06 12:33:02 UTC (rev 197)
@@ -1 +1 @@
-64da542e6e940d1ec28e8b99c82be94c86a2e3fd
+6af27c3b92782d6bad79f2b4ff84cbaf8db0ac7e

Modified: .git/refs/remotes/origin/master
===================================================================
--- .git/refs/remotes/origin/master	2013-10-21 11:08:15 UTC (rev 196)
+++ .git/refs/remotes/origin/master	2013-11-06 12:33:02 UTC (rev 197)
@@ -1 +1 @@
-64da542e6e940d1ec28e8b99c82be94c86a2e3fd
+261f0569cd0eb75824fa8da974f7e7c74232a16a

Modified: pkg/TAPS/R/TAPS.r
===================================================================
--- pkg/TAPS/R/TAPS.r	2013-10-21 11:08:15 UTC (rev 196)
+++ pkg/TAPS/R/TAPS.r	2013-11-06 12:33:02 UTC (rev 197)
@@ -16,7 +16,7 @@
 
 
 TAPS_plot <- function(#samples='all',
-                     directory=NULL,#xlim=c(-1,2),ylim=c(0,1),
+                     directory=NULL,autoEstimate=FALSE,
                       bin=400) {
     #Automatically check, and if needed install, packages stats and fields
     
@@ -54,18 +54,18 @@
     }
     
    # create SampleData file if there is none.   
-    if (length(grep('SampleData.xlsx',dir()))==0) {
+    if (length(grep('SampleData.csv',dir()))==0) {
         if(!is.null(subsToSampleData))
             {
-            sampleData <- data.frame(Sample=subsToSampleData,cn1= -0.5, cn2=0, cn3=NA, loh=0.7, MAPD=NA, MHOF=NA)
+            sampleData <- data.frame(Sample=subsToSampleData,cn1= NA, cn2=NA, cn3=NA, loh=NA, MAPD=NA, MHOF=NA)
             }
         else
             {
-            sampleData <- data.frame(Sample=subs,cn1= -0.5, cn2=0, cn3=NA, loh=0.7, MAPD=NA, MHOF=NA)
+            sampleData <- data.frame(Sample=subs,cn1= NA, cn2=NA, cn3=NA, loh=NA, MAPD=NA, MHOF=NA)
             }
-        write.xlsx(sampleData,'SampleData.xlsx',row.names=F)
+        save.txt(sampleData,'SampleData.csv')
     } else {
-        sampleData=read.xlsx('SampleData.xlsx',1)
+        sampleData=load.txt('SampleData.csv')
     }
 
     #if (samples[1]=='all') samples=rep(T,length(subs))
@@ -214,10 +214,27 @@
                          as.character(Log2$Chromosome),Log2$Start,Log2$Value,as.character(alf$Chromosome),alf$Start,
                          alf$Value,name=name,MAPD=MAPD,MHOF=MHOF)
         
+        ## Finally add estimates if needed:
+        e=NULL
+        if (is.logical(autoEstimate)) {
+            if (length(autoEstimate)>1) {
+                if (autoEstimate[i]) e=getEstimates(regs$logs,regs$scores)
+            } else if (autoEstimate[1]) e=getEstimates(regs$logs,regs$scores)
+        } else if (is.numeric(autoEstimate)) { 
+            if (i %in% autoEstimate) e=getEstimates(regs$logs,regs$scores)
+        } else if (is.character(autoEstimate)) if (name %in% autoEstimate)
+            e=getEstimates(regs$logs,regs$scores)
+        if (!is.null(e)) {
+            sampleData$cn1=e[1]
+            sampleData$cn2=e[2]
+            sampleData$cn3=e[3]
+            sampleData$loh=e[4]
+        }
+        
         cat('..done\n')
         setwd('..')
     }
-    write.xlsx(sampleData,'SampleData.xlsx',row.names=F)
+    write.txt(sampleData,'SampleData.csv')
 }
 ###
 
@@ -235,6 +252,7 @@
     maxCn=12
     suppressPackageStartupMessages(library(xlsx))    
     
+    
     ## TAPS_call outputs the total and minor allele copy numbers of all segments as a text file, and as images for visual confirmation.
     ## sampleInfo_TAPS.txt must be present in each sample folder. If TAPS_plot could not make a good guess of the Log-R of copy number 2 
     ## and the Log-R difference to a deletion, you must interpret the scatter plots and edit sampleInfo_TAPS.txt.
@@ -250,13 +268,23 @@
     setwd(directory)
     #subs <- getSubdirs()
     
-    if (length(grep('SampleData.xlsx',dir()))==1)
+    if (length(grep('SampleData.csv',dir()))==0)
     {
-        sampleData=read.xlsx('SampleData.xlsx',1)
+        if (length(grep('SampleData.xlsx',dir()))==1)
+        {
+            sampleData=read.xlsx('SampleData.xlsx',1)
+            save.txt(sampleData,'SampleData.csv')
+        }
+        sampleData=load.txt('SampleData.csv')
     }
+    
+    if (length(grep('SampleData.csv',dir()))==1)
+    {
+        sampleData=load.txt('SampleData.csv')
+    }
     else
     {
-        sampleData <- read.xlsx('../SampleData.xlsx',1)
+        sampleData <- load.txt('../SampleData.csv')
     }
     subs=as.character(sampleData$Sample)
     
@@ -273,15 +301,15 @@
     for (i in 1:length(subs)) {
         setwd(subs[i])
         name <- subs[i]
-        sampleInfo <- sampleData[sampleData$Sample==subs[i],2:5]
-        if (nrow(sampleInfo)==1) {
+        sampleInfo <- sampleData[i,c('cn1','cn2','cn3','loh')]
+        if (nrow(sampleInfo)==1) if (sum(is.na(sampleInfo))<4) {
             
             cat(' ..loading', subs[i])
             Log2 <- readLog2()
             alf <- readAlf(localDir)
             segments <- readSegments()
             
-            #Some samples throw NA values, we simply remove these.
+            #Some samples contain NA values, we simply remove these.
             Log2=Log2[!is.nan(Log2$Value),]
             Log2=Log2[!is.na(Log2$Value),]
             
@@ -683,7 +711,7 @@
     ## the Log-R and Allelic Imbalance Ratio of all the segments. 
     if (is.null(sampleInfo)) cat ('there was no estimation available for',name)
     
-    cns=1:maxCn; est=sampleInfo[1:3]; est[est==' ']=NA; est=as.numeric(est)
+    cns=1:maxCn; est=sampleInfo[1:3]; est[est==' ']=NA; est=as.numeric(as.character(est))
     m <- lm(2^est ~ cns[1:3])$coefficients # can handle one NA in est. This model is for "ratio as a function of copy number".
     est[is.na(est)] = log2(m[1]+cns[which(is.na(est))]*m[2]) # simple linear regression to fill the missing
     
@@ -933,10 +961,10 @@
         for (cn in 0:maxCn) {
             t_int <- int[[paste('cn',cn,sep='')]]   ## get Log-R of particular cn from 'int'
             t_dis <- abs(regions$log2[i]-t_int)            ## distance to that particular cn
-            if (t_dis < distance) {                        ## nearest so far, save.
+            try (if (t_dis < distance) {                        ## nearest so far, save.
                 distance <- t_dis -> intDist[i]
                 Cn[i] <- cn
-            }
+            }, silent=T)
         }
     }        
 
@@ -956,10 +984,10 @@
             t_ai <- ai[paste('cn',Cn[i],'m',m,sep='')][[1]]
             t_dis <- abs(regions$imba[i]-t_ai)
             #cat('Line',i,'Cn', Cn[i],'m:',m,'Comparing',t_dis,'with',t_int,'.......')
-            if (t_dis < distance) {
+            try( if (t_dis < distance) {
                 distance <- t_dis -> imbaDist[i]
                 mCn[i] <- m
-            }
+            }, silent=T)
         } else mCn[i] <- NA
         #fullCN[i] <- paste('cn',Cn[i],'m',mCn[i],sep='') # Full description
     }
@@ -1344,7 +1372,7 @@
     
     suppressPackageStartupMessages(library(xlsx))    
     
-    sampleData <- read.xlsx('SampleData.xlsx',1)
+    sampleData <- load.txt('SampleData.txt')
     olddir <- getwd()
     if (!is.na(outdir)) {
         try(dir.create(outdir), silent=T)
@@ -1507,7 +1535,7 @@
     
     suppressPackageStartupMessages(library(xlsx))    
 
-    sampleData=read.xlsx('SampleData.xlsx',1)
+    sampleData=load.txt('SampleData.txt')
     
     subs=as.character(sampleData$Sample)
     
@@ -1597,7 +1625,7 @@
                               comparison='', 
                               name1='1', name2='2', 
                               color='#000000') {    
-    
+
     p_cutoff <- 0.05
     freq_cutoff <- 0
     
@@ -2628,11 +2656,11 @@
         #Check if name of sample is too long to be graphically pleasing and cut it if that is the case
         if(nchar(name)>12)
         {
-            mtext(paste("Detailed view with copynumbers of sample: ",substring(name,1,12),"\n Chromsome ",c,sep=""),side=3)
+            mtext(paste("Sample: ",substring(name,1,12),"\n Chromsome ",c,sep=""),side=3)
         }
         else
         {
-            mtext(paste("Detailed view with copynumbers of sample: ",name,"\n Chromsome ",c,sep=""),side=3)
+            mtext(paste("Sample: ",name,"\n Chromsome ",c,sep=""),side=3)
         }
         
         #------------------------------------------------------------
@@ -3062,7 +3090,7 @@
     #Titles,date/time and axis labels
     mtext(text="log-ratio",side=1,line=1.1,cex=1)
     mtext(text="Allelic imbalance",side=2,line=1.5,cex=1)
-    mtext(paste("Detailed view of sample: ",name,"\n Chromosome ",c,", Region: ",Rstart,"-",Rend,sep=""),side=3)
+    mtext(paste("Sample: ",name,"\n Chromosome ",c,", Region: ",Rstart,"-",Rend,sep=""),side=3)
     
     #------------------------------------------------------------
     #Top Right - Signal
@@ -3441,7 +3469,70 @@
 
 
 
+getEstimates <- function(logR, imba) {
+    #load('shortRegions.Rdata')
+    #logR=allRegions$regions$log2[allRegions$regions$lengthMB>5]
+    #imba=allRegions$regions$imba[allRegions$regions$lengthMB>5]
+    #logR=regs$logs
+    #imba=regs$scores
+    
+    # Find the max point, that is likely an integer copy number.
+    d=density(logR[abs(logR)<0.15],na.rm=T)
+    max=d$x[order(d$y,decreasing=T)][1]
+    
+    # Find optimal logR's for integer copy numbers
+    R=2^logR-2^max
+    deltas=seq(0.05,0.5,0.01) # Allow a delta-ratio of 5% to 50%
+    n=length(deltas)
+    dev=rep(NA,n)
+    for (i in 1:n) { 
+        delta=deltas[i]
+        mod=R %% delta
+        mod[mod>(delta/2)]=delta-mod[mod>(delta/2)]
+        dev[i]=mean(mod)
+    }
+    
+    best=deltas[dev/deltas==min(dev/deltas)]
+    
+    # guess the copy number at "max"
+    imbas=imba[abs(R)<best/4]
+    imbas=imbas[!is.na(imbas)]
+    d=density(imbas,na.rm=T)
+    n=length(d$y)
+    maxes=which(diff(d$y[1:(n-1)])>0 & diff(d$y[2:n])<0)
+    ## remove crappy maxes:
+    maxes=maxes[d$y[maxes] > 0.05*max(d$y[maxes])]
+    
+    cn=3
+    if (d$x[maxes][1]<0.15) # even copy number
+        cn=2
+    if (length(maxes)>2)
+        if (min(diff(maxes))/max(diff(maxes))>0.7)
+            cn=4
+    if (length(maxes)==1)
+        if (d$x[maxes]>0.35)
+            cn=1
+    
+    cn1=log2(2^max-(cn-1)*best)
+    try(cn1 <- median(logR[abs(logR-cn1)<0.1]),silent=T)
+    cn2=log2(2^max-(cn-2)*best)
+    try(cn2 <- median(logR[abs(logR-cn2)<0.1]),silent=T)
+    cn3=log2(2^max-(cn-3)*best)
+    try(cn3 <- median(logR[abs(logR-cn3)<0.1]),silent=T)
 
+    
+    R2=2^cn2
+    imba2=imba[abs(2^logR-R2)<best/4]
+    d=density(imba2,na.rm=T)
+    n=length(d$y)
+    maxes=which(diff(d$y[1:(n-1)])>0 & diff(d$y[2:n])<0)
+    ## remove crappy maxes:
+    maxes=maxes[d$y[maxes] > 0.05*max(d$y[maxes])]
+    if (length(maxes)==1) {
+        loh=0.75
+    } else loh=d$x[maxes[length(maxes)]]
+    return(round(c(cn1,cn2,cn3,loh),2))
+}
 
 
 
@@ -3492,3 +3583,5 @@
 
 
 
+
+

Modified: pkg/TAPS/man/TAPS_call.Rd
===================================================================
--- pkg/TAPS/man/TAPS_call.Rd	2013-10-21 11:08:15 UTC (rev 196)
+++ pkg/TAPS/man/TAPS_call.Rd	2013-11-06 12:33:02 UTC (rev 197)
@@ -28,7 +28,7 @@
 
 \details{
 TAPS_call is the second step in the TAPS analysis, to be run after TAPS_plot. It requires: \cr
-1) Your interpretation of plots which you add to the SampleData.xlsx file: \cr
+1) Your interpretation of plots which you add to the SampleData.csv file: \cr
     cn1-cn3: The log-ratio of copy number 1-3 (in the main clone), any two or three of these values are required. \cr
     loh: The allelic imbalance of copy number 2 LOH. \cr
     (Default values are fine for fairly pure, near-diploid samples) \cr

Modified: pkg/TAPS/man/TAPS_plot.Rd
===================================================================
--- pkg/TAPS/man/TAPS_plot.Rd	2013-10-21 11:08:15 UTC (rev 196)
+++ pkg/TAPS/man/TAPS_plot.Rd	2013-11-06 12:33:02 UTC (rev 197)
@@ -68,9 +68,10 @@
 Workflow:\cr
 1. From the folder containing your samples (sample folders) run TAPS_plot(). \cr
 2. Investigate the scatter plots generated in your sample folders. \cr
-3. To proceed with copy number calls, find and open the file "SampleData.txt". \cr
-4. For each sample, enter an interpretation of Log-Ratio @ copy number 2 ("cn2"), the difference in
-   Log-Ratio to a deletion ("delta") and the allelic imbalance ratio of CNNLOH ("loh"). Save the file. \cr
+3. To proceed with copy number calls, find and open the file "SampleData.csv". \cr
+4. For each sample, enter an interpretation of Log-Ratio @ copy number 1-3 ("cn1-3",
+   two+ of the three values are required) and the allelic imbalance ratio of LOH and 
+   cn2 ("loh"). Save the file. \cr
 5. Run TAPS_call(). \cr
 6. Inspect the karyotype_check images, and the new chromosome-wise images. \cr
 7. If all looks reasonable, you will find good copy number estimates in 'Copynumbers.csv'. \cr

Modified: www/TAPS_exec.php
===================================================================
--- www/TAPS_exec.php	2013-10-21 11:08:15 UTC (rev 196)
+++ www/TAPS_exec.php	2013-11-06 12:33:02 UTC (rev 197)
@@ -100,7 +100,7 @@
 
 This will have generated one "<samplename>_overview.jpeg" in the main directory (should
 there be one), several plots and .Rdata files in the sample folder and a file named
-simply "SampleData.xlsx" which will be covered shortly. <br /><br />
+simply "SampleData.csv" which will be covered shortly. <br /><br />
 
 <hr class="alt1" />
 
@@ -116,18 +116,18 @@
 <br /><br />
 
 TAPS_call() also has "directory" as its main argument, the parameters are read from the file
-"SampleData.xlsx". If you desire to execute TAPS_call() on many samples, add the arguments to the
-main directory SampleData.xlsx. If you want to execute TAPS_call() on just one sample, add the arguments
-for that sample to SampleData.xlsx and specify that sample using the "directory" argument of TAPS_call().
+"SampleData.csv". If you desire to execute TAPS_call() on many samples, add the arguments to the
+main directory SampleData.csv. If you want to execute TAPS_call() on just one sample, add the arguments
+for that sample to SampleData.csv and specify that sample using the "directory" argument of TAPS_call().
 <br /><br />
 
-An example run of TAPS_call(), where "mainsamplefolder" contains SampleData.xlsx: <br />
+An example run of TAPS_call(), where "mainsamplefolder" contains SampleData.csv: <br />
 
 <pre>
 	TAPS_call(directory="path/to/mainsamplefolder")
 </pre>
 
-To infer the arguments for SampleData.xlsx that TAPS_call() uses you will need to look at one of
+To infer the arguments for SampleData.csv that TAPS_call() uses you will need to look at one of
 the chromosomal plots generated using TAPS_plot(). The structure and relationships in the plot
 can be interpreted to figure out the most probable locations of the allele-specific copynumbers.
 <br /><br />



More information about the Patchwork-commits mailing list