[Patchwork-commits] r200 - .git .git/logs .git/logs/refs/heads .git/logs/refs/remotes/origin .git/refs/heads .git/refs/remotes/origin pkg/TAPS pkg/TAPS/R pkg/patchwork/inst/perl

noreply at r-forge.r-project.org noreply at r-forge.r-project.org
Mon Jan 13 09:50:27 CET 2014


Author: sebastian_d
Date: 2014-01-13 09:50:26 +0100 (Mon, 13 Jan 2014)
New Revision: 200

Modified:
   .git/COMMIT_EDITMSG
   .git/index
   .git/logs/HEAD
   .git/logs/refs/heads/master
   .git/logs/refs/remotes/origin/master
   .git/refs/heads/master
   .git/refs/remotes/origin/master
   pkg/TAPS/NAMESPACE
   pkg/TAPS/R/TAPS.r
   pkg/patchwork/inst/perl/mpile2alleles.pl
   pkg/patchwork/inst/perl/pile2alleles.pl
Log:
2014 making sure everything is synced rforge/git

Modified: .git/COMMIT_EDITMSG
===================================================================
--- .git/COMMIT_EDITMSG	2013-12-13 15:28:31 UTC (rev 199)
+++ .git/COMMIT_EDITMSG	2014-01-13 08:50:26 UTC (rev 200)
@@ -1 +1 @@
-merga och ta bort xlsx
+2014 making sure everything is synced rforge/git

Modified: .git/index
===================================================================
(Binary files differ)

Modified: .git/logs/HEAD
===================================================================
--- .git/logs/HEAD	2013-12-13 15:28:31 UTC (rev 199)
+++ .git/logs/HEAD	2014-01-13 08:50:26 UTC (rev 200)
@@ -73,3 +73,8 @@
 0044e7bf9b5beb763bd5f47e54c8b778999eede2 64da542e6e940d1ec28e8b99c82be94c86a2e3fd Sebastian DiLorenzo <dilorenzo.sebastian at gmail.com> 1382000849 +0200	commit: small update to TAPS_plot
 64da542e6e940d1ec28e8b99c82be94c86a2e3fd 5ef4202a68e5b1b59947297330a5175922c464ae Sebastian DiLorenzo <dilorenzo.sebastian at gmail.com> 1383741292 +0100	commit: merga och ta bort xlsx
 5ef4202a68e5b1b59947297330a5175922c464ae 6af27c3b92782d6bad79f2b4ff84cbaf8db0ac7e Sebastian DiLorenzo <dilorenzo.sebastian at gmail.com> 1383741298 +0100	pull: Merge made by the 'recursive' strategy.
+6af27c3b92782d6bad79f2b4ff84cbaf8db0ac7e e1235f0d290d0d320c282b182e3a1b8d74fa032b Sebastian DiLorenzo <dilorenzo.sebastian at gmail.com> 1386326202 +0100	commit: updates to mpile2alleles.pl to avoid interruption if a line seems incorrect
+e1235f0d290d0d320c282b182e3a1b8d74fa032b a3d6911bbe093e6a2149615473f958a9081f4745 Sebastian DiLorenzo <dilorenzo.sebastian at gmail.com> 1386326256 +0100	pull: Merge made by the 'recursive' strategy.
+a3d6911bbe093e6a2149615473f958a9081f4745 c66991c3bf461c1c1ad49a9f3381716cefcc0e32 Sebastian DiLorenzo <dilorenzo.sebastian at gmail.com> 1386336664 +0100	commit: small fix mppile2alleles
+c66991c3bf461c1c1ad49a9f3381716cefcc0e32 1c3b0b54598dac7863316a79427364b873bec3f3 Sebastian DiLorenzo <dilorenzo.sebastian at gmail.com> 1389602878 +0100	commit: 2014 making sure everything is synced rforge/git
+1c3b0b54598dac7863316a79427364b873bec3f3 78c55c83e03724a24900fbaf881864197d23ab4d Sebastian DiLorenzo <dilorenzo.sebastian at gmail.com> 1389602903 +0100	pull: Merge made by the 'recursive' strategy.

Modified: .git/logs/refs/heads/master
===================================================================
--- .git/logs/refs/heads/master	2013-12-13 15:28:31 UTC (rev 199)
+++ .git/logs/refs/heads/master	2014-01-13 08:50:26 UTC (rev 200)
@@ -73,3 +73,8 @@
 0044e7bf9b5beb763bd5f47e54c8b778999eede2 64da542e6e940d1ec28e8b99c82be94c86a2e3fd Sebastian DiLorenzo <dilorenzo.sebastian at gmail.com> 1382000849 +0200	commit: small update to TAPS_plot
 64da542e6e940d1ec28e8b99c82be94c86a2e3fd 5ef4202a68e5b1b59947297330a5175922c464ae Sebastian DiLorenzo <dilorenzo.sebastian at gmail.com> 1383741292 +0100	commit: merga och ta bort xlsx
 5ef4202a68e5b1b59947297330a5175922c464ae 6af27c3b92782d6bad79f2b4ff84cbaf8db0ac7e Sebastian DiLorenzo <dilorenzo.sebastian at gmail.com> 1383741298 +0100	pull: Merge made by the 'recursive' strategy.
+6af27c3b92782d6bad79f2b4ff84cbaf8db0ac7e e1235f0d290d0d320c282b182e3a1b8d74fa032b Sebastian DiLorenzo <dilorenzo.sebastian at gmail.com> 1386326202 +0100	commit: updates to mpile2alleles.pl to avoid interruption if a line seems incorrect
+e1235f0d290d0d320c282b182e3a1b8d74fa032b a3d6911bbe093e6a2149615473f958a9081f4745 Sebastian DiLorenzo <dilorenzo.sebastian at gmail.com> 1386326256 +0100	pull: Merge made by the 'recursive' strategy.
+a3d6911bbe093e6a2149615473f958a9081f4745 c66991c3bf461c1c1ad49a9f3381716cefcc0e32 Sebastian DiLorenzo <dilorenzo.sebastian at gmail.com> 1386336664 +0100	commit: small fix mppile2alleles
+c66991c3bf461c1c1ad49a9f3381716cefcc0e32 1c3b0b54598dac7863316a79427364b873bec3f3 Sebastian DiLorenzo <dilorenzo.sebastian at gmail.com> 1389602878 +0100	commit: 2014 making sure everything is synced rforge/git
+1c3b0b54598dac7863316a79427364b873bec3f3 78c55c83e03724a24900fbaf881864197d23ab4d Sebastian DiLorenzo <dilorenzo.sebastian at gmail.com> 1389602903 +0100	pull: Merge made by the 'recursive' strategy.

Modified: .git/logs/refs/remotes/origin/master
===================================================================
--- .git/logs/refs/remotes/origin/master	2013-12-13 15:28:31 UTC (rev 199)
+++ .git/logs/refs/remotes/origin/master	2014-01-13 08:50:26 UTC (rev 200)
@@ -70,3 +70,8 @@
 85d1e6948ab98a6bd2c58f5dc820740c198a5444 0044e7bf9b5beb763bd5f47e54c8b778999eede2 Sebastian DiLorenzo <dilorenzo.sebastian at gmail.com> 1381839752 +0200	update by push
 0044e7bf9b5beb763bd5f47e54c8b778999eede2 64da542e6e940d1ec28e8b99c82be94c86a2e3fd Sebastian DiLorenzo <dilorenzo.sebastian at gmail.com> 1382000861 +0200	update by push
 64da542e6e940d1ec28e8b99c82be94c86a2e3fd 261f0569cd0eb75824fa8da974f7e7c74232a16a Sebastian DiLorenzo <S_D at imv091.medsci.uu.se> 1383741141 +0100	pull: fast-forward
+261f0569cd0eb75824fa8da974f7e7c74232a16a 2990bb534c6bc1939f29fc9477f36979214d519e Sebastian DiLorenzo <S_D at imv091.medsci.uu.se> 1386326256 +0100	pull: fast-forward
+2990bb534c6bc1939f29fc9477f36979214d519e a3d6911bbe093e6a2149615473f958a9081f4745 Sebastian DiLorenzo <dilorenzo.sebastian at gmail.com> 1386326296 +0100	update by push
+a3d6911bbe093e6a2149615473f958a9081f4745 c66991c3bf461c1c1ad49a9f3381716cefcc0e32 Sebastian DiLorenzo <dilorenzo.sebastian at gmail.com> 1386336669 +0100	update by push
+c66991c3bf461c1c1ad49a9f3381716cefcc0e32 65601241ae236432ef8a7f3aab8d7af3068d4344 Sebastian DiLorenzo <S_D at array-47-13.medsci.uu.se> 1389602902 +0100	pull: fast-forward
+65601241ae236432ef8a7f3aab8d7af3068d4344 78c55c83e03724a24900fbaf881864197d23ab4d Sebastian DiLorenzo <dilorenzo.sebastian at gmail.com> 1389602990 +0100	update by push

Modified: .git/refs/heads/master
===================================================================
--- .git/refs/heads/master	2013-12-13 15:28:31 UTC (rev 199)
+++ .git/refs/heads/master	2014-01-13 08:50:26 UTC (rev 200)
@@ -1 +1 @@
-6af27c3b92782d6bad79f2b4ff84cbaf8db0ac7e
+78c55c83e03724a24900fbaf881864197d23ab4d

Modified: .git/refs/remotes/origin/master
===================================================================
--- .git/refs/remotes/origin/master	2013-12-13 15:28:31 UTC (rev 199)
+++ .git/refs/remotes/origin/master	2014-01-13 08:50:26 UTC (rev 200)
@@ -1 +1 @@
-261f0569cd0eb75824fa8da974f7e7c74232a16a
+78c55c83e03724a24900fbaf881864197d23ab4d

Modified: pkg/TAPS/NAMESPACE
===================================================================
--- pkg/TAPS/NAMESPACE	2013-12-13 15:28:31 UTC (rev 199)
+++ pkg/TAPS/NAMESPACE	2014-01-13 08:50:26 UTC (rev 200)
@@ -2,6 +2,8 @@
 		TAPS_call,
 		TAPS_region,
 		TAPS_freq,
-		TAPS_compare)
+		TAPS_compare,
+                TAPS_estimates)
 
 
+

Modified: pkg/TAPS/R/TAPS.r
===================================================================
--- pkg/TAPS/R/TAPS.r	2013-12-13 15:28:31 UTC (rev 199)
+++ pkg/TAPS/R/TAPS.r	2014-01-13 08:50:26 UTC (rev 200)
@@ -14,10 +14,9 @@
 #   ##    ##     ## ##        ##    ##    ##        ##       ##     ##    ##    
 #   ##    ##     ## ##         ######     ##        ########  #######     ##  
 
-
 TAPS_plot <- function(#samples='all',
                      directory=NULL,autoEstimate=FALSE,
-                      bin=400) {
+                      bin=400,cores=1) {
     #Automatically check, and if needed install, packages stats and fields
     
     #Load stats. It should be in all, at least semi-new, R distributions so we dont need to install.package it or
@@ -27,6 +26,9 @@
     suppressPackageStartupMessages(library(DNAcopy))
     suppressPackageStartupMessages(library(fields))
     suppressPackageStartupMessages(library(xlsx))    
+    suppressPackageStartupMessages(library(foreach))
+    suppressPackageStartupMessages(library(doMC))
+    suppressPackageStartupMessages(registerDoMC(cores=cores))
 
 
     #list.of.packages <- c("stats", "fields")
@@ -35,7 +37,7 @@
     
     if (is.null(directory))
     {
-        cat("Using working directory\n")
+        print("Using working directory")
         directory = "."
         #cat("You have not assigned a directory containing one or more folders of samples for TAPS_plot to execute. \n")
         #cat("Example: \"/user/mysamples/\" or, to run it in your current working directory, \".\" \n")
@@ -71,14 +73,18 @@
     #if (samples[1]=='all') samples=rep(T,length(subs))
     #if (is.logical(samples)) samples=which(samples)
     #subs=subs[samples]
-
-         
+    root <- getwd()
+    print(paste('root: ',root,sep=''))
     
-    for (i in 1:length(subs)) {
-        setwd(subs[i])
+    # for (i in 1:length(subs)) {
+    #junk only stores the list from foreach.
+    junk <- foreach (i = 1:length(subs)) %dopar% {
+        setwd(paste(root,'/',subs[i],sep=''))
         name <- subs[i]
         #if (length(grep('sampleData.csv',dir()))==0) save.txt(data.frame(Sample=name,cn2='',delta='',loh='',completed.analysis='no'),file='sampleData.csv')
-        cat(' ..loading', subs[i])
+        # cat(' ..loading', subs[i])
+        # print(paste(i,': ',subs[i],': Opening',sep=''))
+        print(paste(i,'/',length(subs),': ',subs[i],' Loading',sep=''))
         
         if(length(grep("*.cyhd.cychp",dir()))==1)				##cyhd sample
         {
@@ -158,12 +164,13 @@
         segments <- segments[!is.na(segments$Value),]
         
         
-        cat(' ..processing')
+        # cat(' ..processing')
         if (is.null(segments)) {                                 ## segmentation using DNA-copy if needed (must then be installed)
             segments <- segment_DNAcopy(Log2)
             save.txt(segments,'_segments.txt') 
         }
         
+        
         segments$Value <- segments$Value-mean(Log2$Value)     ## Median-centering
         Log2$Value <- Log2$Value-mean(Log2$Value)             ## Median-centering
         
@@ -177,12 +184,20 @@
         }
         
         ## Sample QC 
-        sampleData$MAPD[i] <- MAPD <- round(median(abs(diff(Log2$Value[Log2$Chromosome=='chr1'][order(Log2$Start[Log2$Chromosome=='chr1'])]))),2)
-        sampleData$MHOF[i] <- MHOF <- round(100*median(0.5+abs(0.5-alf$Value[alf$Chromosome!='chrX'])),1)
+        # print(paste(i,': ',segments,', ',' sample QC',sep=''))
+        # sampleData$MAPD[i] <- MAPD <- round(median(abs(diff(Log2$Value[Log2$Chromosome=='chr1'][order(Log2$Start[Log2$Chromosome=='chr1'])]))),2)
+        # sampleData$MHOF[i] <- MHOF <- round(100*median(0.5+abs(0.5-alf$Value[alf$Chromosome!='chrX'])),1)
+        # print(round(median(abs(diff(Log2$Value[Log2$Chromosome=='chr1'][order(Log2$Start[Log2$Chromosome=='chr1'])]))),2))
+        # print(round(100*median(0.5+abs(0.5-alf$Value[alf$Chromosome!='chrX'])),1))
+
+        sampleData$MAPD[i] <- round(median(abs(diff(Log2$Value[Log2$Chromosome=='chr1'][order(Log2$Start[Log2$Chromosome=='chr1'])]))),2)
+        sampleData$MHOF[i] <- round(100*median(0.5+abs(0.5-alf$Value[alf$Chromosome!='chrX'])),1)
+
             #round(100*median(0.5+regs$hom[regs$scores<0.5],na.rm=T),1)        
         #MAID=round(median(abs(diff(regs$scores[!is.na(regs$scores)]))),3)
         
         #Save for TAPS_region()
+        # print(paste(i,': taps_region()',sep=''))
         save(regs,Log2,alf,segments,file="TAPS_plot_output.Rdata")
         
         #save.txt(sampleData,file='../sampleData.csv')
@@ -201,18 +216,17 @@
             hg18=F
             }
 
-        cat('..plotting.\n')
-        
+        # cat('..plotting.\n')
         OverviewPlot(regs$chr,regs$start,regs$end,regs$logs,regs$scores,hg18=hg18,
                      as.character(Log2$Chromosome),Log2$Start,Log2$Value,as.character(alf$Chromosome),alf$Start,alf$Value,
-                     name=name,MAPD=MAPD,MHOF=MHOF)                
+                     name=name,MAPD=sampleData$MAPD[i],MHOF=sampleData$MHOF[i])                
         
         ## Chromosome-wise plots for manual analysis
         regions=allRegions$regions
         
         karyotype_chroms(regs$chr,regs$start,regs$end,regs$logs,regs$scores,hg18=hg18,
                          as.character(Log2$Chromosome),Log2$Start,Log2$Value,as.character(alf$Chromosome),alf$Start,
-                         alf$Value,name=name,MAPD=MAPD,MHOF=MHOF)
+                         alf$Value,name=name,MAPD=sampleData$MAPD[i],MHOF=sampleData$MHOF[i])
         
         ## Finally add estimates if needed:
         e=NULL
@@ -231,8 +245,11 @@
             sampleData$loh=e[4]
         }
         
-        cat('..done\n')
-        setwd('..')
+        # cat('..done\n')
+        # print(paste(i,': ',subs[i],': OK',sep=''))
+        print(paste(i,'/',length(subs),': ',subs[i],' OK',sep=''))
+        setwd(root)
+        1
     }
     save.txt(sampleData,'SampleData.csv')
 }
@@ -247,10 +264,13 @@
 #   ##    ##     ## ##         ######      ######  ##     ## ######## ######## 
 
 ###
-TAPS_call <- function(samples='all',directory=getwd()) {
+TAPS_call <- function(samples='all',directory=getwd(),cores=1) {
     minseg=1
     maxCn=12
     suppressPackageStartupMessages(library(xlsx))    
+    suppressPackageStartupMessages(library(foreach))
+    suppressPackageStartupMessages(library(doMC))
+    suppressPackageStartupMessages(registerDoMC(cores=cores))
     
     
     ## TAPS_call outputs the total and minor allele copy numbers of all segments as a text file, and as images for visual confirmation.
@@ -258,7 +278,7 @@
     ## and the Log-R difference to a deletion, you must interpret the scatter plots and edit sampleInfo_TAPS.txt.
     if (is.null(directory))
     {
-        cat("No directory supplied, using working directory.")
+        print("No directory supplied, using working directory.")
         directory = "."
         #cat("You have not assigned a directory containing one or more folders of samples for TAPS_call to execute. \n")
         #cat("Example: \"/user/mysamples/\" or, to run it in your current working directory, \".\" \n")
@@ -281,8 +301,7 @@
     if (length(grep('SampleData.csv',dir()))==1)
     {
         sampleData=load.txt('SampleData.csv')
-    }
-    else
+    } else
     {
         sampleData <- load.txt('../SampleData.csv')
     }
@@ -297,14 +316,18 @@
     if (samples[1]=='all') samples=rep(T,length(subs))
     if (is.logical(samples)) samples=which(samples)
     subs=subs[samples]
-    
-    for (i in 1:length(subs)) {
+    sampleData <- sampleData[samples,]
+
+    # for (i in 1:length(subs)) {
+    #junk only stores the list from foreach.
+    junk <- foreach(i=1:length(subs)) %dopar% {
         setwd(subs[i])
         name <- subs[i]
         sampleInfo <- sampleData[i,c('cn1','cn2','cn3','loh')]
         if (nrow(sampleInfo)==1) if (sum(is.na(sampleInfo))<4) {
             
-            cat(' ..loading', subs[i])
+                # cat(' ..loading', subs[i])
+            print(paste(i,'/',length(subs),': ',subs[i],' Loading',sep=''))
             Log2 <- readLog2()
             alf <- readAlf(localDir)
             segments <- readSegments()
@@ -322,7 +345,7 @@
             segments$Value <- segments$Value-mean(Log2$Value) 
             Log2$Value <- Log2$Value-mean(Log2$Value)
             
-            cat(' ..processing.\n')
+            # cat(' ..processing.\n')
             
             load('allRegions.Rdata')                            ## These were prepared in TAPS_plot
             load('shortRegions.Rdata')
@@ -334,7 +357,7 @@
             
             u <- setCNs(allRegions,t$int,t$ai,t$model,maxCn)            ## Assigns copy number variant for all segments
             allRegions$regions <- u$regions
-            ## adjacent segments with idendical copy number are merged (except over centromere) and all are saved to a text file
+            ## adjacent segments with idendical copy number are NOT... merged (except over centromere) and all are saved to a text file
             save.txt(u$regions,file=paste(name,'_segmentCN.txt',sep='')) 
             regions=allRegions$regions
 
@@ -358,19 +381,21 @@
             #save parameters as strings
             parameters=paste("Parameters given: cn2:",sampleInfo$cn2," delta:",sampleInfo$delta," loh:",sampleInfo$loh)
             
-            karyotype_check(regions$Chromosome,regions$Start,regions$End,regions$log2,regions$imba,regions$Cn,regions$mCn,t,name=name)
+            #karyotype_check(regions$Chromosome,regions$Start,regions$End,regions$log2,regions$imba,regions$Cn,regions$mCn,t,name=name)
             
             karyotype_chromsCN(regions$Chromosome,regions$Start,regions$End,regions$log2,
                                regions$imba,regions$Cn,regions$mCn,hg18=hg18,
                                as.character(Log2$Chromosome),Log2$Start,Log2$Value,as.character(alf$Chromosome),
                                alf$Start,alf$Value,t,name=name,xlim=c(-1,1),ylim=c(0,1),parameters=parameters)
             
-            cat('..done\n')
-        } else cat('Skipped',name,'\n')
+            # cat('..done\n')
+            print(paste(i,'/',length(subs),': ',name,' ', 'OK',sep=''))
+        } else print(paste(i,'/',length(subs),': ',name,' ', 'Skipped',sep=''))
         
         setwd('..')
     }
     #save.txt(sampleData,file='sampleData.csv')
+    1
 }
 ###
 regsFromSegs <- function (Log2,alf, segments, bin=200,min=1) {
@@ -454,7 +479,7 @@
     ## This function reads Log-ratio from the file "probes.txt" which must be present in the current directory.
     Log2=NULL
     try( Log2 <- read.csv(file='probes.txt',header=T,sep='\t'), silent=T)
-    if (!is.null(Log2)) cat(' ..found probes.txt')
+    # if (!is.null(Log2)) cat(' ..found probes.txt')
     if (is.null(Log2)) {
         try( Log2 <- read.csv(file='_probes.txt',header=T,sep='\t'), silent=T)
         if (!is.null(Log2)) cat(' ..found _probes.txt')
@@ -493,7 +518,7 @@
     ## This funciton reads allele frequency [B/(A+B)] from the file 'snps.txt', which must be present in the current directory.
     alf=NULL
     try( alf <- read.csv(file='snps.txt',header=T,sep='\t'), silent=T)
-    if (!is.null(alf)) cat(' ..found snps.txt')
+    # if (!is.null(alf)) cat(' ..found snps.txt')
     if (is.null(alf)) {
         try( alf <- read.csv(file='_snps.txt',header=T,sep='\t'), silent=T)
         if (!is.null(alf)) cat(' ..found _snps.txt')
@@ -508,7 +533,7 @@
     ## Using a HMM is not recommended unless you have a homogenous, diploid sample. (And then there is more user-friendly software anyway.)
     segments=NULL
     try( segments <- read.csv(file='segments.txt',header=T,sep='\t'),silent=T)
-    if (!is.null(segments)) cat(' ..found segments.txt')
+    # if (!is.null(segments)) cat(' ..found segments.txt')
     if (is.null(segments)) { 
         try( segments <- read.csv(file='_segments.txt',header=T,sep='\t'),silent=T)
         if (!is.null(segments)) cat(' ..found _segments.txt')
@@ -735,7 +760,7 @@
     tix$cn1 <- abs(regions$log2 - expectedAt) < diff(est)[1]/3        ## index of likely cn1 regions
     temp <- regions[tix$cn1,]
     med <- weightedMedian(temp$log2,temp$probes)
-    probes[1] <- sum(temp$probes)
+    probes[1] <- ifelse(is.null(med), 50, sum(temp$probes))
     int[1] <- ifelse(!is.null(med),med,expectedAt)
     
     ## likely cn3 regions sit near estimate
@@ -743,7 +768,7 @@
     tix$cn3 <- abs(regions$log2 - expectedAt) < diff(est)[2]/3
     temp <- regions[tix$cn3,]
     med <- weightedMedian(temp$log2,temp$probes)
-    probes[3] <- sum(temp$probes)
+    probes[3] <- ifelse(is.null(med), 50, sum(temp$probes))
     int[3] <- ifelse(!is.null(med),med,expectedAt)
     
     ## cn4 follows at ...
@@ -752,7 +777,7 @@
     tix$cn4 <- abs(regions$log2 - expectedAt) < mean(diff(int))/3
     temp <- regions[tix$cn4,]
     med <- weightedMedian(temp$log2,temp$probes)
-    probes[4] <- sum(temp$probes)
+    probes[4] <- ifelse(is.null(med), 50, sum(temp$probes))
     int[4] <- ifelse(!is.null(med),med,expectedAt)
     
     ## generalized for higher cns
@@ -765,7 +790,7 @@
         tix[[thisCn]] <- abs(regions$log2 - expectedAt) < mean(diff(int))/5
         temp <- regions[tix[thisCn][[1]],]
         med <- weightedMedian(temp$log2,temp$probes)
-        probes[cn] <- sum(temp$probes)
+        probes[cn] <- ifelse(is.null(med), 0, sum(temp$probes))
         int[cn] <- ifelse(!is.null(med),med,expectedAt)
     }
     
@@ -800,7 +825,7 @@
     med <- weightedMedian(data$imba[ix],data$snps[ix]) ## Average allelic imbalance weighted on snp count
     ai$cn2m1 <- ifelse (!is.null(med),med,expectedAt)
     ## loh
-    expextedAt=loh_exp
+    expectedAt=loh_exp
     ix <- !ix
     med <- weightedMedian(data$imba[ix],data$snps[ix]) 
     ai$cn2m0 <- ifelse (!is.null(med),med,expectedAt)
@@ -1372,7 +1397,7 @@
     
     suppressPackageStartupMessages(library(xlsx))    
     
-    sampleData <- load.txt('SampleData.txt')
+    sampleData <- load.txt('SampleData.csv')
     olddir <- getwd()
     if (!is.na(outdir)) {
         try(dir.create(outdir), silent=T)
@@ -1535,7 +1560,7 @@
     
     suppressPackageStartupMessages(library(xlsx))    
 
-    sampleData=load.txt('SampleData.txt')
+    sampleData=load.txt('SampleData.csv')
     
     subs=as.character(sampleData$Sample)
     
@@ -3465,11 +3490,37 @@
     
 }
 
+#Wrapper for getEstimates(). It will execute getEstimates in every sample folder.
+#If SampleData.csv already exists it will create the file SampleData_YYYY-MM-DD_HH-MM-SS.csv instead.
+TAPS_estimates <- function(path=getwd()) {
+    library(foreach)
+    library(TAPS)
+    # setwd("/media/safe/COAD_TCGA/NexusTxtFiles/tumorCELFiles")
+    root <- path
+    samples <- dir()[file.info(dir())$isdir]
+    # samples <- samples[grep('^[A-Z]',samples)]
+    datat <- foreach(sample=samples) %do% {
+        setwd(paste(root,'/',sample,sep=''))
+        if(file.exists('shortRegions.Rdata')) {
+            load('shortRegions.Rdata')
+            cn <- getEstimates(regs$logs,regs$scores)
+            try(rm(regs),T)
+            data.frame(Sample = sample,cn1=cn[1],cn2=cn[2],cn3=cn[3],loh=cn[4])
+        } else {
+            data.frame(Sample = sample,cn1=NA,cn2=NA,cn3=NA,loh=NA)
+        }
+    }
+    sampledata <- do.call(rbind,datat)
+    setwd(root)
+    if (file.exists('SampleData.csv')) {
+        write.table(sampledata,paste('SampleData',format(Sys.time(), "_%F_%T.csv"),sep=''),sep='\t',row.names=F)
+    } else {
+        write.table(sampledata,'SampleData.csv',sep='\t',row.names=F)
+    }
+}
 
 
-
-
-getEstimates <- function(logR, imba) {
+getEstimates <- function(logR, imba, cellLines=F) {
     #load('shortRegions.Rdata')
     #logR=allRegions$regions$log2[allRegions$regions$lengthMB>5]
     #imba=allRegions$regions$imba[allRegions$regions$lengthMB>5]
@@ -3482,7 +3533,7 @@
     
     # Find optimal logR's for integer copy numbers
     R=2^logR-2^max
-    deltas=seq(0.05,0.5,0.01) # Allow a delta-ratio of 5% to 50%
+    deltas=seq(0.05,0.33,0.01) # Allow a delta-ratio of 5% to 50%
     n=length(deltas)
     dev=rep(NA,n)
     for (i in 1:n) { 
@@ -3501,7 +3552,7 @@
     n=length(d$y)
     maxes=which(diff(d$y[1:(n-1)])>0 & diff(d$y[2:n])<0)
     ## remove crappy maxes:
-    maxes=maxes[d$y[maxes] > 0.05*max(d$y[maxes])]
+    maxes=maxes[d$y[maxes] > 0.1*max(d$y[maxes])]
     
     cn=3
     if (d$x[maxes][1]<0.15) # even copy number
@@ -3513,7 +3564,7 @@
         if (d$x[maxes]>0.35)
             cn=1
     
-    cn1=log2(2^max-(cn-1)*best)
+    try(cn1 <- log2(2^max-(cn-1)*best),silent=T)
     try(cn1 <- median(logR[abs(logR-cn1)<0.1]),silent=T)
     cn2=log2(2^max-(cn-2)*best)
     try(cn2 <- median(logR[abs(logR-cn2)<0.1]),silent=T)
@@ -3523,6 +3574,7 @@
     
     R2=2^cn2
     imba2=imba[abs(2^logR-R2)<best/4]
+    if(sum(is.na(imba2)) > (length(imba2)/2) | length(imba2) < 4) return(c(NA,NA,NA,NA))
     d=density(imba2,na.rm=T)
     n=length(d$y)
     maxes=which(diff(d$y[1:(n-1)])>0 & diff(d$y[2:n])<0)
@@ -3534,54 +3586,3 @@
     return(round(c(cn1,cn2,cn3,loh),2))
 }
 
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-

Modified: pkg/patchwork/inst/perl/mpile2alleles.pl
===================================================================
--- pkg/patchwork/inst/perl/mpile2alleles.pl	2013-12-13 15:28:31 UTC (rev 199)
+++ pkg/patchwork/inst/perl/mpile2alleles.pl	2014-01-13 08:50:26 UTC (rev 200)
@@ -56,92 +56,113 @@
 while (<VCF>)
 	{
 	chomp;
+	#Next if header
 	next if /^#/;
-	my ($vchr, $vpos, $cons, $consQual) = (split /\t/, $_)[0,1,4,5];
+	# 			 CHROM  POS   ID   REF   ALT    QUAL  FILTER INFO FORMAT HCC1954BL.bam
+	#            chr1   11035 .    G     A      4.77  .    DP++ GT++  0/1:33,0,25:29    // the '++' means there was more information there but i shortened it
+	if ($_ =~ /(\S+)\t(\S+)\t\S+\t\S+\t(\S+)\t(\S+)\t\S+\t\S+\t\S+\t\S+/)
+		{
+		my ($vchr, $vpos, $cons, $consQual) = ($1,$2,$3,$4); #(split /\t/, $_)[0,1,4,5];
 
-	next if $cons eq 'N';
-	next if length $cons > 1;
-	$cons = uc $cons;
+		next if $cons eq 'N';
+		next if length $cons > 1;
+		$cons = uc $cons;
 
-	while (<PILEUP>) 
-		{
-		chomp;
-		my ($chr, $pos, $ref, $depth, $baseString) = (split /\t/, $_)[0,1,2,3,4];
+		while (<PILEUP>) 
+			{
+			chomp;
+			#            CHROM  POS    REF    COV    BASESTR            ?
+			#            chr1   10296  c      30     ,$++  9<(A8<.#=#>#<!+:51!@!#!B#!!!!# // the '++' means there was more information there but i shortened it
+			if ($_ =~ /(\S+)\t(\S+)\t(\S+)\t(\S+)\t(\S+)\t\S+/)
+				{
+				my ($chr, $pos, $ref, $depth, $baseString) = ($1,$2,$3,$4,$5); #(split /\t/, $_)[0,1,2,3,4];
 
-		#For testing purposes
-		# $j++;
+				#For testing purposes
+				# $j++;
 
-		#uc upper case
-		$ref = uc $ref;
-		next if $ref eq '*';
+				#uc upper case
+				$ref = uc $ref;
+				next if $ref eq '*';
 
-		#If the position and chromosome match between pileup and vcf
-		# add if smaller than
-		if ($vchr eq $chr && $vpos == $pos)
-			{
-			my $snp = ($cons =~ m/[AGCT]/) ? $cons : $iupac{$ref}{$cons};
-			$snps{$chr}{$pos}{'ref'} = $ref;
-			$snps{$chr}{$pos}{'snp'} = $snp;
-			$snps{$chr}{$pos}{'qual'} = $consQual;
+				#If the position and chromosome match between pileup and vcf
+				# add if smaller than
+				if ($vchr eq $chr && $vpos == $pos)
+					{
+					my $snp = ($cons =~ m/[AGCT]/) ? $cons : $iupac{$ref}{$cons};
+					$snps{$chr}{$pos}{'ref'} = $ref;
+					$snps{$chr}{$pos}{'snp'} = $snp;
+					$snps{$chr}{$pos}{'qual'} = $consQual;
 
-		# 	#For testing purposes
-		# $j == the faulty line number so we can see what is actually happening there
-		# 	if($j == 576673 || $j == 683784 || $j == 733447)
-		# 		{
-		# 		print OUT "Line: $. \n";
-		# 		print OUT "VCF: $vchr $vpos $cons $consQual \n";
-		# 		print OUT "PILEUP: $chr $pos $ref $depth $baseString \n";
-		# 		print OUT "snp: $snp \n";
-		# 		print OUT "assigned: $snps{$chr}{$pos}{'snp'} \n";
-		# 		$i++;
-		# 		}
+					# 	#For testing purposes
+					# $j == the faulty line number so we can see what is actually happening there
+					# 	if($j == 576673 || $j == 683784 || $j == 733447)
+					# 		{
+					# 		print OUT "Line: $. \n";
+					# 		print OUT "VCF: $vchr $vpos $cons $consQual \n";
+					# 		print OUT "PILEUP: $chr $pos $ref $depth $baseString \n";
+					# 		print OUT "snp: $snp \n";
+					# 		print OUT "assigned: $snps{$chr}{$pos}{'snp'} \n";
+					# 		$i++;
+					# 		}
 
-		# 	print OK "Line: $. \n";
-		# 	print OK "VCF: $vchr $vpos $cons $consQual \n";
-		# 	print OK "PILEUP: $chr $pos $ref $depth $baseString \n";
-		# 	print OK "snp: $snp \n";
-		# 	print OK "assigned: $snps{$chr}{$pos}{'snp'} \n";
-			
-		# 	if ($i == 3)
-		# 	{
-		# 		close(OUT);
-		# 		close(OK);
-		# 	}
+					# 	print OK "Line: $. \n";
+					# 	print OK "VCF: $vchr $vpos $cons $consQual \n";
+					# 	print OK "PILEUP: $chr $pos $ref $depth $baseString \n";
+					# 	print OK "snp: $snp \n";
+					# 	print OK "assigned: $snps{$chr}{$pos}{'snp'} \n";
+						
+					# 	if ($i == 3)
+					# 	{
+					# 		close(OUT);
+					# 		close(OK);
+					# 	}
 
-			my $snpCount;
-			if ($snps{$chr}{$pos}{'snp'} eq 'A') {
-				$snpCount = $baseString =~ tr/Aa/Aa/;
-			} elsif ($snps{$chr}{$pos}{'snp'} eq 'C') {
-				$snpCount = $baseString =~ tr/Cc/Cc/;
-			} elsif ($snps{$chr}{$pos}{'snp'} eq 'G') {
-				$snpCount = $baseString =~ tr/Gg/Gg/;
-			} elsif ($snps{$chr}{$pos}{'snp'} eq 'T') {
-				$snpCount = $baseString =~ tr/Tt/Tt/;
-			} elsif ($snps{$chr}{$pos}{'snp'} eq 'A|C') {
-				$snpCount = $baseString =~ tr/AaCc/AaCc/;
-			} elsif ($snps{$chr}{$pos}{'snp'} eq 'A|G') {
-				$snpCount = $baseString =~ tr/AaGg/AaGg/;
-			} elsif ($snps{$chr}{$pos}{'snp'} eq 'A|T') {
-				$snpCount = $baseString =~ tr/AaTt/AaTt/;
-			} elsif ($snps{$chr}{$pos}{'snp'} eq 'G|C') {
-				$snpCount = $baseString =~ tr/GgCc/GgCc/;
-			} elsif ($snps{$chr}{$pos}{'snp'} eq 'G|T') {
-				$snpCount = $baseString =~ tr/GgTt/GgTt/;
-			} elsif ($snps{$chr}{$pos}{'snp'} eq 'C|T') {
-				$snpCount = $baseString =~ tr/CcTt/CcTt/;
-			} else {
-				$snpCount = 0;
-			}
-			$snps{$chr}{$pos}{'depth'} = $depth;
-			$snps{$chr}{$pos}{'freq'} = $snpCount;
-			$snps{$chr}{$pos}{'pct'} = sprintf '%.2f', ($snpCount/$depth);
+					my $snpCount;
+					if ($snps{$chr}{$pos}{'snp'} eq 'A') {
+						$snpCount = $baseString =~ tr/Aa/Aa/;
+					} elsif ($snps{$chr}{$pos}{'snp'} eq 'C') {
+						$snpCount = $baseString =~ tr/Cc/Cc/;
+					} elsif ($snps{$chr}{$pos}{'snp'} eq 'G') {
+						$snpCount = $baseString =~ tr/Gg/Gg/;
+					} elsif ($snps{$chr}{$pos}{'snp'} eq 'T') {
+						$snpCount = $baseString =~ tr/Tt/Tt/;
+					} elsif ($snps{$chr}{$pos}{'snp'} eq 'A|C') {
+						$snpCount = $baseString =~ tr/AaCc/AaCc/;
+					} elsif ($snps{$chr}{$pos}{'snp'} eq 'A|G') {
+						$snpCount = $baseString =~ tr/AaGg/AaGg/;
+					} elsif ($snps{$chr}{$pos}{'snp'} eq 'A|T') {
+						$snpCount = $baseString =~ tr/AaTt/AaTt/;
+					} elsif ($snps{$chr}{$pos}{'snp'} eq 'G|C') {
+						$snpCount = $baseString =~ tr/GgCc/GgCc/;
+					} elsif ($snps{$chr}{$pos}{'snp'} eq 'G|T') {
+						$snpCount = $baseString =~ tr/GgTt/GgTt/;
+					} elsif ($snps{$chr}{$pos}{'snp'} eq 'C|T') {
+						$snpCount = $baseString =~ tr/CcTt/CcTt/;
+					} else {
+						$snpCount = 0;
+					}
+					$snps{$chr}{$pos}{'depth'} = $depth;
+					$snps{$chr}{$pos}{'freq'} = $snpCount;
+					$snps{$chr}{$pos}{'pct'} = sprintf '%.2f', ($snpCount/$depth);
 
-			last;
+					last;
+					}
+					elsif ($vchr eq $chr && $vpos < $pos) {
+						last;
+					}
+				}
+			else
+				{
+				#print STDERR "In file $filename, line incompatible: $_ \n";
+				print STDERR "MPILEUP line $. incompatible: $_ \n";
+				}
 			}
-			elsif ($vchr eq $chr && $vpos < $pos) {
-				last;
-			}
 		}
+	else
+		{
+		#print STDERR "In file $filename, line incompatible: $_ \n";
+		print STDERR "VCF line $. incompatible: $_ \n";
+		}
 	}
 
 close PILEUP;

Modified: pkg/patchwork/inst/perl/pile2alleles.pl
===================================================================
--- pkg/patchwork/inst/perl/pile2alleles.pl	2013-12-13 15:28:31 UTC (rev 199)
+++ pkg/patchwork/inst/perl/pile2alleles.pl	2014-01-13 08:50:26 UTC (rev 200)
@@ -39,6 +39,7 @@
 {
 	chomp;
 	#if ($_ == ""){next;}
+	#            chr1   10884  c      G      6     44   60    3      GGG   ??>
 	if ($_ =~ /(\S+)\t(\S+)\t(\S+)\t(\S+)\t(\S+)\t\S+\t\S+\t(\S+)\t(\S+)\t\S+/)
 	{
 		my ($chr, $pos, $ref, $cons, $consQual, $depth, $baseString) = ($1,$2,$3,$4,$5,$6,$7); #(split /\t/, $_)[0,1,2,3,4,7,8];
@@ -80,7 +81,7 @@
 	else
 	{
 		#print STDERR "In file $filename, line incompatible: $_ \n";
-		print STDERR "Line incompatible: $_ \n";
+		print STDERR "Line $. incompatible: $_ \n";
 	}
 }
 



More information about the Patchwork-commits mailing list