[Patchwork-commits] r200 - .git .git/logs .git/logs/refs/heads .git/logs/refs/remotes/origin .git/refs/heads .git/refs/remotes/origin pkg/TAPS pkg/TAPS/R pkg/patchwork/inst/perl
noreply at r-forge.r-project.org
noreply at r-forge.r-project.org
Mon Jan 13 09:50:27 CET 2014
Author: sebastian_d
Date: 2014-01-13 09:50:26 +0100 (Mon, 13 Jan 2014)
New Revision: 200
Modified:
.git/COMMIT_EDITMSG
.git/index
.git/logs/HEAD
.git/logs/refs/heads/master
.git/logs/refs/remotes/origin/master
.git/refs/heads/master
.git/refs/remotes/origin/master
pkg/TAPS/NAMESPACE
pkg/TAPS/R/TAPS.r
pkg/patchwork/inst/perl/mpile2alleles.pl
pkg/patchwork/inst/perl/pile2alleles.pl
Log:
2014 making sure everything is synced rforge/git
Modified: .git/COMMIT_EDITMSG
===================================================================
--- .git/COMMIT_EDITMSG 2013-12-13 15:28:31 UTC (rev 199)
+++ .git/COMMIT_EDITMSG 2014-01-13 08:50:26 UTC (rev 200)
@@ -1 +1 @@
-merga och ta bort xlsx
+2014 making sure everything is synced rforge/git
Modified: .git/index
===================================================================
(Binary files differ)
Modified: .git/logs/HEAD
===================================================================
--- .git/logs/HEAD 2013-12-13 15:28:31 UTC (rev 199)
+++ .git/logs/HEAD 2014-01-13 08:50:26 UTC (rev 200)
@@ -73,3 +73,8 @@
0044e7bf9b5beb763bd5f47e54c8b778999eede2 64da542e6e940d1ec28e8b99c82be94c86a2e3fd Sebastian DiLorenzo <dilorenzo.sebastian at gmail.com> 1382000849 +0200 commit: small update to TAPS_plot
64da542e6e940d1ec28e8b99c82be94c86a2e3fd 5ef4202a68e5b1b59947297330a5175922c464ae Sebastian DiLorenzo <dilorenzo.sebastian at gmail.com> 1383741292 +0100 commit: merga och ta bort xlsx
5ef4202a68e5b1b59947297330a5175922c464ae 6af27c3b92782d6bad79f2b4ff84cbaf8db0ac7e Sebastian DiLorenzo <dilorenzo.sebastian at gmail.com> 1383741298 +0100 pull: Merge made by the 'recursive' strategy.
+6af27c3b92782d6bad79f2b4ff84cbaf8db0ac7e e1235f0d290d0d320c282b182e3a1b8d74fa032b Sebastian DiLorenzo <dilorenzo.sebastian at gmail.com> 1386326202 +0100 commit: updates to mpile2alleles.pl to avoid interruption if a line seems incorrect
+e1235f0d290d0d320c282b182e3a1b8d74fa032b a3d6911bbe093e6a2149615473f958a9081f4745 Sebastian DiLorenzo <dilorenzo.sebastian at gmail.com> 1386326256 +0100 pull: Merge made by the 'recursive' strategy.
+a3d6911bbe093e6a2149615473f958a9081f4745 c66991c3bf461c1c1ad49a9f3381716cefcc0e32 Sebastian DiLorenzo <dilorenzo.sebastian at gmail.com> 1386336664 +0100 commit: small fix mppile2alleles
+c66991c3bf461c1c1ad49a9f3381716cefcc0e32 1c3b0b54598dac7863316a79427364b873bec3f3 Sebastian DiLorenzo <dilorenzo.sebastian at gmail.com> 1389602878 +0100 commit: 2014 making sure everything is synced rforge/git
+1c3b0b54598dac7863316a79427364b873bec3f3 78c55c83e03724a24900fbaf881864197d23ab4d Sebastian DiLorenzo <dilorenzo.sebastian at gmail.com> 1389602903 +0100 pull: Merge made by the 'recursive' strategy.
Modified: .git/logs/refs/heads/master
===================================================================
--- .git/logs/refs/heads/master 2013-12-13 15:28:31 UTC (rev 199)
+++ .git/logs/refs/heads/master 2014-01-13 08:50:26 UTC (rev 200)
@@ -73,3 +73,8 @@
0044e7bf9b5beb763bd5f47e54c8b778999eede2 64da542e6e940d1ec28e8b99c82be94c86a2e3fd Sebastian DiLorenzo <dilorenzo.sebastian at gmail.com> 1382000849 +0200 commit: small update to TAPS_plot
64da542e6e940d1ec28e8b99c82be94c86a2e3fd 5ef4202a68e5b1b59947297330a5175922c464ae Sebastian DiLorenzo <dilorenzo.sebastian at gmail.com> 1383741292 +0100 commit: merga och ta bort xlsx
5ef4202a68e5b1b59947297330a5175922c464ae 6af27c3b92782d6bad79f2b4ff84cbaf8db0ac7e Sebastian DiLorenzo <dilorenzo.sebastian at gmail.com> 1383741298 +0100 pull: Merge made by the 'recursive' strategy.
+6af27c3b92782d6bad79f2b4ff84cbaf8db0ac7e e1235f0d290d0d320c282b182e3a1b8d74fa032b Sebastian DiLorenzo <dilorenzo.sebastian at gmail.com> 1386326202 +0100 commit: updates to mpile2alleles.pl to avoid interruption if a line seems incorrect
+e1235f0d290d0d320c282b182e3a1b8d74fa032b a3d6911bbe093e6a2149615473f958a9081f4745 Sebastian DiLorenzo <dilorenzo.sebastian at gmail.com> 1386326256 +0100 pull: Merge made by the 'recursive' strategy.
+a3d6911bbe093e6a2149615473f958a9081f4745 c66991c3bf461c1c1ad49a9f3381716cefcc0e32 Sebastian DiLorenzo <dilorenzo.sebastian at gmail.com> 1386336664 +0100 commit: small fix mppile2alleles
+c66991c3bf461c1c1ad49a9f3381716cefcc0e32 1c3b0b54598dac7863316a79427364b873bec3f3 Sebastian DiLorenzo <dilorenzo.sebastian at gmail.com> 1389602878 +0100 commit: 2014 making sure everything is synced rforge/git
+1c3b0b54598dac7863316a79427364b873bec3f3 78c55c83e03724a24900fbaf881864197d23ab4d Sebastian DiLorenzo <dilorenzo.sebastian at gmail.com> 1389602903 +0100 pull: Merge made by the 'recursive' strategy.
Modified: .git/logs/refs/remotes/origin/master
===================================================================
--- .git/logs/refs/remotes/origin/master 2013-12-13 15:28:31 UTC (rev 199)
+++ .git/logs/refs/remotes/origin/master 2014-01-13 08:50:26 UTC (rev 200)
@@ -70,3 +70,8 @@
85d1e6948ab98a6bd2c58f5dc820740c198a5444 0044e7bf9b5beb763bd5f47e54c8b778999eede2 Sebastian DiLorenzo <dilorenzo.sebastian at gmail.com> 1381839752 +0200 update by push
0044e7bf9b5beb763bd5f47e54c8b778999eede2 64da542e6e940d1ec28e8b99c82be94c86a2e3fd Sebastian DiLorenzo <dilorenzo.sebastian at gmail.com> 1382000861 +0200 update by push
64da542e6e940d1ec28e8b99c82be94c86a2e3fd 261f0569cd0eb75824fa8da974f7e7c74232a16a Sebastian DiLorenzo <S_D at imv091.medsci.uu.se> 1383741141 +0100 pull: fast-forward
+261f0569cd0eb75824fa8da974f7e7c74232a16a 2990bb534c6bc1939f29fc9477f36979214d519e Sebastian DiLorenzo <S_D at imv091.medsci.uu.se> 1386326256 +0100 pull: fast-forward
+2990bb534c6bc1939f29fc9477f36979214d519e a3d6911bbe093e6a2149615473f958a9081f4745 Sebastian DiLorenzo <dilorenzo.sebastian at gmail.com> 1386326296 +0100 update by push
+a3d6911bbe093e6a2149615473f958a9081f4745 c66991c3bf461c1c1ad49a9f3381716cefcc0e32 Sebastian DiLorenzo <dilorenzo.sebastian at gmail.com> 1386336669 +0100 update by push
+c66991c3bf461c1c1ad49a9f3381716cefcc0e32 65601241ae236432ef8a7f3aab8d7af3068d4344 Sebastian DiLorenzo <S_D at array-47-13.medsci.uu.se> 1389602902 +0100 pull: fast-forward
+65601241ae236432ef8a7f3aab8d7af3068d4344 78c55c83e03724a24900fbaf881864197d23ab4d Sebastian DiLorenzo <dilorenzo.sebastian at gmail.com> 1389602990 +0100 update by push
Modified: .git/refs/heads/master
===================================================================
--- .git/refs/heads/master 2013-12-13 15:28:31 UTC (rev 199)
+++ .git/refs/heads/master 2014-01-13 08:50:26 UTC (rev 200)
@@ -1 +1 @@
-6af27c3b92782d6bad79f2b4ff84cbaf8db0ac7e
+78c55c83e03724a24900fbaf881864197d23ab4d
Modified: .git/refs/remotes/origin/master
===================================================================
--- .git/refs/remotes/origin/master 2013-12-13 15:28:31 UTC (rev 199)
+++ .git/refs/remotes/origin/master 2014-01-13 08:50:26 UTC (rev 200)
@@ -1 +1 @@
-261f0569cd0eb75824fa8da974f7e7c74232a16a
+78c55c83e03724a24900fbaf881864197d23ab4d
Modified: pkg/TAPS/NAMESPACE
===================================================================
--- pkg/TAPS/NAMESPACE 2013-12-13 15:28:31 UTC (rev 199)
+++ pkg/TAPS/NAMESPACE 2014-01-13 08:50:26 UTC (rev 200)
@@ -2,6 +2,8 @@
TAPS_call,
TAPS_region,
TAPS_freq,
- TAPS_compare)
+ TAPS_compare,
+ TAPS_estimates)
+
Modified: pkg/TAPS/R/TAPS.r
===================================================================
--- pkg/TAPS/R/TAPS.r 2013-12-13 15:28:31 UTC (rev 199)
+++ pkg/TAPS/R/TAPS.r 2014-01-13 08:50:26 UTC (rev 200)
@@ -14,10 +14,9 @@
# ## ## ## ## ## ## ## ## ## ## ##
# ## ## ## ## ###### ## ######## ####### ##
-
TAPS_plot <- function(#samples='all',
directory=NULL,autoEstimate=FALSE,
- bin=400) {
+ bin=400,cores=1) {
#Automatically check, and if needed install, packages stats and fields
#Load stats. It should be in all, at least semi-new, R distributions so we dont need to install.package it or
@@ -27,6 +26,9 @@
suppressPackageStartupMessages(library(DNAcopy))
suppressPackageStartupMessages(library(fields))
suppressPackageStartupMessages(library(xlsx))
+ suppressPackageStartupMessages(library(foreach))
+ suppressPackageStartupMessages(library(doMC))
+ suppressPackageStartupMessages(registerDoMC(cores=cores))
#list.of.packages <- c("stats", "fields")
@@ -35,7 +37,7 @@
if (is.null(directory))
{
- cat("Using working directory\n")
+ print("Using working directory")
directory = "."
#cat("You have not assigned a directory containing one or more folders of samples for TAPS_plot to execute. \n")
#cat("Example: \"/user/mysamples/\" or, to run it in your current working directory, \".\" \n")
@@ -71,14 +73,18 @@
#if (samples[1]=='all') samples=rep(T,length(subs))
#if (is.logical(samples)) samples=which(samples)
#subs=subs[samples]
-
-
+ root <- getwd()
+ print(paste('root: ',root,sep=''))
- for (i in 1:length(subs)) {
- setwd(subs[i])
+ # for (i in 1:length(subs)) {
+ #junk only stores the list from foreach.
+ junk <- foreach (i = 1:length(subs)) %dopar% {
+ setwd(paste(root,'/',subs[i],sep=''))
name <- subs[i]
#if (length(grep('sampleData.csv',dir()))==0) save.txt(data.frame(Sample=name,cn2='',delta='',loh='',completed.analysis='no'),file='sampleData.csv')
- cat(' ..loading', subs[i])
+ # cat(' ..loading', subs[i])
+ # print(paste(i,': ',subs[i],': Opening',sep=''))
+ print(paste(i,'/',length(subs),': ',subs[i],' Loading',sep=''))
if(length(grep("*.cyhd.cychp",dir()))==1) ##cyhd sample
{
@@ -158,12 +164,13 @@
segments <- segments[!is.na(segments$Value),]
- cat(' ..processing')
+ # cat(' ..processing')
if (is.null(segments)) { ## segmentation using DNA-copy if needed (must then be installed)
segments <- segment_DNAcopy(Log2)
save.txt(segments,'_segments.txt')
}
+
segments$Value <- segments$Value-mean(Log2$Value) ## Median-centering
Log2$Value <- Log2$Value-mean(Log2$Value) ## Median-centering
@@ -177,12 +184,20 @@
}
## Sample QC
- sampleData$MAPD[i] <- MAPD <- round(median(abs(diff(Log2$Value[Log2$Chromosome=='chr1'][order(Log2$Start[Log2$Chromosome=='chr1'])]))),2)
- sampleData$MHOF[i] <- MHOF <- round(100*median(0.5+abs(0.5-alf$Value[alf$Chromosome!='chrX'])),1)
+ # print(paste(i,': ',segments,', ',' sample QC',sep=''))
+ # sampleData$MAPD[i] <- MAPD <- round(median(abs(diff(Log2$Value[Log2$Chromosome=='chr1'][order(Log2$Start[Log2$Chromosome=='chr1'])]))),2)
+ # sampleData$MHOF[i] <- MHOF <- round(100*median(0.5+abs(0.5-alf$Value[alf$Chromosome!='chrX'])),1)
+ # print(round(median(abs(diff(Log2$Value[Log2$Chromosome=='chr1'][order(Log2$Start[Log2$Chromosome=='chr1'])]))),2))
+ # print(round(100*median(0.5+abs(0.5-alf$Value[alf$Chromosome!='chrX'])),1))
+
+ sampleData$MAPD[i] <- round(median(abs(diff(Log2$Value[Log2$Chromosome=='chr1'][order(Log2$Start[Log2$Chromosome=='chr1'])]))),2)
+ sampleData$MHOF[i] <- round(100*median(0.5+abs(0.5-alf$Value[alf$Chromosome!='chrX'])),1)
+
#round(100*median(0.5+regs$hom[regs$scores<0.5],na.rm=T),1)
#MAID=round(median(abs(diff(regs$scores[!is.na(regs$scores)]))),3)
#Save for TAPS_region()
+ # print(paste(i,': taps_region()',sep=''))
save(regs,Log2,alf,segments,file="TAPS_plot_output.Rdata")
#save.txt(sampleData,file='../sampleData.csv')
@@ -201,18 +216,17 @@
hg18=F
}
- cat('..plotting.\n')
-
+ # cat('..plotting.\n')
OverviewPlot(regs$chr,regs$start,regs$end,regs$logs,regs$scores,hg18=hg18,
as.character(Log2$Chromosome),Log2$Start,Log2$Value,as.character(alf$Chromosome),alf$Start,alf$Value,
- name=name,MAPD=MAPD,MHOF=MHOF)
+ name=name,MAPD=sampleData$MAPD[i],MHOF=sampleData$MHOF[i])
## Chromosome-wise plots for manual analysis
regions=allRegions$regions
karyotype_chroms(regs$chr,regs$start,regs$end,regs$logs,regs$scores,hg18=hg18,
as.character(Log2$Chromosome),Log2$Start,Log2$Value,as.character(alf$Chromosome),alf$Start,
- alf$Value,name=name,MAPD=MAPD,MHOF=MHOF)
+ alf$Value,name=name,MAPD=sampleData$MAPD[i],MHOF=sampleData$MHOF[i])
## Finally add estimates if needed:
e=NULL
@@ -231,8 +245,11 @@
sampleData$loh=e[4]
}
- cat('..done\n')
- setwd('..')
+ # cat('..done\n')
+ # print(paste(i,': ',subs[i],': OK',sep=''))
+ print(paste(i,'/',length(subs),': ',subs[i],' OK',sep=''))
+ setwd(root)
+ 1
}
save.txt(sampleData,'SampleData.csv')
}
@@ -247,10 +264,13 @@
# ## ## ## ## ###### ###### ## ## ######## ########
###
-TAPS_call <- function(samples='all',directory=getwd()) {
+TAPS_call <- function(samples='all',directory=getwd(),cores=1) {
minseg=1
maxCn=12
suppressPackageStartupMessages(library(xlsx))
+ suppressPackageStartupMessages(library(foreach))
+ suppressPackageStartupMessages(library(doMC))
+ suppressPackageStartupMessages(registerDoMC(cores=cores))
## TAPS_call outputs the total and minor allele copy numbers of all segments as a text file, and as images for visual confirmation.
@@ -258,7 +278,7 @@
## and the Log-R difference to a deletion, you must interpret the scatter plots and edit sampleInfo_TAPS.txt.
if (is.null(directory))
{
- cat("No directory supplied, using working directory.")
+ print("No directory supplied, using working directory.")
directory = "."
#cat("You have not assigned a directory containing one or more folders of samples for TAPS_call to execute. \n")
#cat("Example: \"/user/mysamples/\" or, to run it in your current working directory, \".\" \n")
@@ -281,8 +301,7 @@
if (length(grep('SampleData.csv',dir()))==1)
{
sampleData=load.txt('SampleData.csv')
- }
- else
+ } else
{
sampleData <- load.txt('../SampleData.csv')
}
@@ -297,14 +316,18 @@
if (samples[1]=='all') samples=rep(T,length(subs))
if (is.logical(samples)) samples=which(samples)
subs=subs[samples]
-
- for (i in 1:length(subs)) {
+ sampleData <- sampleData[samples,]
+
+ # for (i in 1:length(subs)) {
+ #junk only stores the list from foreach.
+ junk <- foreach(i=1:length(subs)) %dopar% {
setwd(subs[i])
name <- subs[i]
sampleInfo <- sampleData[i,c('cn1','cn2','cn3','loh')]
if (nrow(sampleInfo)==1) if (sum(is.na(sampleInfo))<4) {
- cat(' ..loading', subs[i])
+ # cat(' ..loading', subs[i])
+ print(paste(i,'/',length(subs),': ',subs[i],' Loading',sep=''))
Log2 <- readLog2()
alf <- readAlf(localDir)
segments <- readSegments()
@@ -322,7 +345,7 @@
segments$Value <- segments$Value-mean(Log2$Value)
Log2$Value <- Log2$Value-mean(Log2$Value)
- cat(' ..processing.\n')
+ # cat(' ..processing.\n')
load('allRegions.Rdata') ## These were prepared in TAPS_plot
load('shortRegions.Rdata')
@@ -334,7 +357,7 @@
u <- setCNs(allRegions,t$int,t$ai,t$model,maxCn) ## Assigns copy number variant for all segments
allRegions$regions <- u$regions
- ## adjacent segments with idendical copy number are merged (except over centromere) and all are saved to a text file
+ ## adjacent segments with idendical copy number are NOT... merged (except over centromere) and all are saved to a text file
save.txt(u$regions,file=paste(name,'_segmentCN.txt',sep=''))
regions=allRegions$regions
@@ -358,19 +381,21 @@
#save parameters as strings
parameters=paste("Parameters given: cn2:",sampleInfo$cn2," delta:",sampleInfo$delta," loh:",sampleInfo$loh)
- karyotype_check(regions$Chromosome,regions$Start,regions$End,regions$log2,regions$imba,regions$Cn,regions$mCn,t,name=name)
+ #karyotype_check(regions$Chromosome,regions$Start,regions$End,regions$log2,regions$imba,regions$Cn,regions$mCn,t,name=name)
karyotype_chromsCN(regions$Chromosome,regions$Start,regions$End,regions$log2,
regions$imba,regions$Cn,regions$mCn,hg18=hg18,
as.character(Log2$Chromosome),Log2$Start,Log2$Value,as.character(alf$Chromosome),
alf$Start,alf$Value,t,name=name,xlim=c(-1,1),ylim=c(0,1),parameters=parameters)
- cat('..done\n')
- } else cat('Skipped',name,'\n')
+ # cat('..done\n')
+ print(paste(i,'/',length(subs),': ',name,' ', 'OK',sep=''))
+ } else print(paste(i,'/',length(subs),': ',name,' ', 'Skipped',sep=''))
setwd('..')
}
#save.txt(sampleData,file='sampleData.csv')
+ 1
}
###
regsFromSegs <- function (Log2,alf, segments, bin=200,min=1) {
@@ -454,7 +479,7 @@
## This function reads Log-ratio from the file "probes.txt" which must be present in the current directory.
Log2=NULL
try( Log2 <- read.csv(file='probes.txt',header=T,sep='\t'), silent=T)
- if (!is.null(Log2)) cat(' ..found probes.txt')
+ # if (!is.null(Log2)) cat(' ..found probes.txt')
if (is.null(Log2)) {
try( Log2 <- read.csv(file='_probes.txt',header=T,sep='\t'), silent=T)
if (!is.null(Log2)) cat(' ..found _probes.txt')
@@ -493,7 +518,7 @@
## This funciton reads allele frequency [B/(A+B)] from the file 'snps.txt', which must be present in the current directory.
alf=NULL
try( alf <- read.csv(file='snps.txt',header=T,sep='\t'), silent=T)
- if (!is.null(alf)) cat(' ..found snps.txt')
+ # if (!is.null(alf)) cat(' ..found snps.txt')
if (is.null(alf)) {
try( alf <- read.csv(file='_snps.txt',header=T,sep='\t'), silent=T)
if (!is.null(alf)) cat(' ..found _snps.txt')
@@ -508,7 +533,7 @@
## Using a HMM is not recommended unless you have a homogenous, diploid sample. (And then there is more user-friendly software anyway.)
segments=NULL
try( segments <- read.csv(file='segments.txt',header=T,sep='\t'),silent=T)
- if (!is.null(segments)) cat(' ..found segments.txt')
+ # if (!is.null(segments)) cat(' ..found segments.txt')
if (is.null(segments)) {
try( segments <- read.csv(file='_segments.txt',header=T,sep='\t'),silent=T)
if (!is.null(segments)) cat(' ..found _segments.txt')
@@ -735,7 +760,7 @@
tix$cn1 <- abs(regions$log2 - expectedAt) < diff(est)[1]/3 ## index of likely cn1 regions
temp <- regions[tix$cn1,]
med <- weightedMedian(temp$log2,temp$probes)
- probes[1] <- sum(temp$probes)
+ probes[1] <- ifelse(is.null(med), 50, sum(temp$probes))
int[1] <- ifelse(!is.null(med),med,expectedAt)
## likely cn3 regions sit near estimate
@@ -743,7 +768,7 @@
tix$cn3 <- abs(regions$log2 - expectedAt) < diff(est)[2]/3
temp <- regions[tix$cn3,]
med <- weightedMedian(temp$log2,temp$probes)
- probes[3] <- sum(temp$probes)
+ probes[3] <- ifelse(is.null(med), 50, sum(temp$probes))
int[3] <- ifelse(!is.null(med),med,expectedAt)
## cn4 follows at ...
@@ -752,7 +777,7 @@
tix$cn4 <- abs(regions$log2 - expectedAt) < mean(diff(int))/3
temp <- regions[tix$cn4,]
med <- weightedMedian(temp$log2,temp$probes)
- probes[4] <- sum(temp$probes)
+ probes[4] <- ifelse(is.null(med), 50, sum(temp$probes))
int[4] <- ifelse(!is.null(med),med,expectedAt)
## generalized for higher cns
@@ -765,7 +790,7 @@
tix[[thisCn]] <- abs(regions$log2 - expectedAt) < mean(diff(int))/5
temp <- regions[tix[thisCn][[1]],]
med <- weightedMedian(temp$log2,temp$probes)
- probes[cn] <- sum(temp$probes)
+ probes[cn] <- ifelse(is.null(med), 0, sum(temp$probes))
int[cn] <- ifelse(!is.null(med),med,expectedAt)
}
@@ -800,7 +825,7 @@
med <- weightedMedian(data$imba[ix],data$snps[ix]) ## Average allelic imbalance weighted on snp count
ai$cn2m1 <- ifelse (!is.null(med),med,expectedAt)
## loh
- expextedAt=loh_exp
+ expectedAt=loh_exp
ix <- !ix
med <- weightedMedian(data$imba[ix],data$snps[ix])
ai$cn2m0 <- ifelse (!is.null(med),med,expectedAt)
@@ -1372,7 +1397,7 @@
suppressPackageStartupMessages(library(xlsx))
- sampleData <- load.txt('SampleData.txt')
+ sampleData <- load.txt('SampleData.csv')
olddir <- getwd()
if (!is.na(outdir)) {
try(dir.create(outdir), silent=T)
@@ -1535,7 +1560,7 @@
suppressPackageStartupMessages(library(xlsx))
- sampleData=load.txt('SampleData.txt')
+ sampleData=load.txt('SampleData.csv')
subs=as.character(sampleData$Sample)
@@ -3465,11 +3490,37 @@
}
+#Wrapper for getEstimates(). It will execute getEstimates in every sample folder.
+#If SampleData.csv already exists it will create the file SampleData_YYYY-MM-DD_HH-MM-SS.csv instead.
+TAPS_estimates <- function(path=getwd()) {
+ library(foreach)
+ library(TAPS)
+ # setwd("/media/safe/COAD_TCGA/NexusTxtFiles/tumorCELFiles")
+ root <- path
+ samples <- dir()[file.info(dir())$isdir]
+ # samples <- samples[grep('^[A-Z]',samples)]
+ datat <- foreach(sample=samples) %do% {
+ setwd(paste(root,'/',sample,sep=''))
+ if(file.exists('shortRegions.Rdata')) {
+ load('shortRegions.Rdata')
+ cn <- getEstimates(regs$logs,regs$scores)
+ try(rm(regs),T)
+ data.frame(Sample = sample,cn1=cn[1],cn2=cn[2],cn3=cn[3],loh=cn[4])
+ } else {
+ data.frame(Sample = sample,cn1=NA,cn2=NA,cn3=NA,loh=NA)
+ }
+ }
+ sampledata <- do.call(rbind,datat)
+ setwd(root)
+ if (file.exists('SampleData.csv')) {
+ write.table(sampledata,paste('SampleData',format(Sys.time(), "_%F_%T.csv"),sep=''),sep='\t',row.names=F)
+ } else {
+ write.table(sampledata,'SampleData.csv',sep='\t',row.names=F)
+ }
+}
-
-
-getEstimates <- function(logR, imba) {
+getEstimates <- function(logR, imba, cellLines=F) {
#load('shortRegions.Rdata')
#logR=allRegions$regions$log2[allRegions$regions$lengthMB>5]
#imba=allRegions$regions$imba[allRegions$regions$lengthMB>5]
@@ -3482,7 +3533,7 @@
# Find optimal logR's for integer copy numbers
R=2^logR-2^max
- deltas=seq(0.05,0.5,0.01) # Allow a delta-ratio of 5% to 50%
+ deltas=seq(0.05,0.33,0.01) # Allow a delta-ratio of 5% to 50%
n=length(deltas)
dev=rep(NA,n)
for (i in 1:n) {
@@ -3501,7 +3552,7 @@
n=length(d$y)
maxes=which(diff(d$y[1:(n-1)])>0 & diff(d$y[2:n])<0)
## remove crappy maxes:
- maxes=maxes[d$y[maxes] > 0.05*max(d$y[maxes])]
+ maxes=maxes[d$y[maxes] > 0.1*max(d$y[maxes])]
cn=3
if (d$x[maxes][1]<0.15) # even copy number
@@ -3513,7 +3564,7 @@
if (d$x[maxes]>0.35)
cn=1
- cn1=log2(2^max-(cn-1)*best)
+ try(cn1 <- log2(2^max-(cn-1)*best),silent=T)
try(cn1 <- median(logR[abs(logR-cn1)<0.1]),silent=T)
cn2=log2(2^max-(cn-2)*best)
try(cn2 <- median(logR[abs(logR-cn2)<0.1]),silent=T)
@@ -3523,6 +3574,7 @@
R2=2^cn2
imba2=imba[abs(2^logR-R2)<best/4]
+ if(sum(is.na(imba2)) > (length(imba2)/2) | length(imba2) < 4) return(c(NA,NA,NA,NA))
d=density(imba2,na.rm=T)
n=length(d$y)
maxes=which(diff(d$y[1:(n-1)])>0 & diff(d$y[2:n])<0)
@@ -3534,54 +3586,3 @@
return(round(c(cn1,cn2,cn3,loh),2))
}
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
Modified: pkg/patchwork/inst/perl/mpile2alleles.pl
===================================================================
--- pkg/patchwork/inst/perl/mpile2alleles.pl 2013-12-13 15:28:31 UTC (rev 199)
+++ pkg/patchwork/inst/perl/mpile2alleles.pl 2014-01-13 08:50:26 UTC (rev 200)
@@ -56,92 +56,113 @@
while (<VCF>)
{
chomp;
+ #Next if header
next if /^#/;
- my ($vchr, $vpos, $cons, $consQual) = (split /\t/, $_)[0,1,4,5];
+ # CHROM POS ID REF ALT QUAL FILTER INFO FORMAT HCC1954BL.bam
+ # chr1 11035 . G A 4.77 . DP++ GT++ 0/1:33,0,25:29 // the '++' means there was more information there but i shortened it
+ if ($_ =~ /(\S+)\t(\S+)\t\S+\t\S+\t(\S+)\t(\S+)\t\S+\t\S+\t\S+\t\S+/)
+ {
+ my ($vchr, $vpos, $cons, $consQual) = ($1,$2,$3,$4); #(split /\t/, $_)[0,1,4,5];
- next if $cons eq 'N';
- next if length $cons > 1;
- $cons = uc $cons;
+ next if $cons eq 'N';
+ next if length $cons > 1;
+ $cons = uc $cons;
- while (<PILEUP>)
- {
- chomp;
- my ($chr, $pos, $ref, $depth, $baseString) = (split /\t/, $_)[0,1,2,3,4];
+ while (<PILEUP>)
+ {
+ chomp;
+ # CHROM POS REF COV BASESTR ?
+ # chr1 10296 c 30 ,$++ 9<(A8<.#=#>#<!+:51!@!#!B#!!!!# // the '++' means there was more information there but i shortened it
+ if ($_ =~ /(\S+)\t(\S+)\t(\S+)\t(\S+)\t(\S+)\t\S+/)
+ {
+ my ($chr, $pos, $ref, $depth, $baseString) = ($1,$2,$3,$4,$5); #(split /\t/, $_)[0,1,2,3,4];
- #For testing purposes
- # $j++;
+ #For testing purposes
+ # $j++;
- #uc upper case
- $ref = uc $ref;
- next if $ref eq '*';
+ #uc upper case
+ $ref = uc $ref;
+ next if $ref eq '*';
- #If the position and chromosome match between pileup and vcf
- # add if smaller than
- if ($vchr eq $chr && $vpos == $pos)
- {
- my $snp = ($cons =~ m/[AGCT]/) ? $cons : $iupac{$ref}{$cons};
- $snps{$chr}{$pos}{'ref'} = $ref;
- $snps{$chr}{$pos}{'snp'} = $snp;
- $snps{$chr}{$pos}{'qual'} = $consQual;
+ #If the position and chromosome match between pileup and vcf
+ # add if smaller than
+ if ($vchr eq $chr && $vpos == $pos)
+ {
+ my $snp = ($cons =~ m/[AGCT]/) ? $cons : $iupac{$ref}{$cons};
+ $snps{$chr}{$pos}{'ref'} = $ref;
+ $snps{$chr}{$pos}{'snp'} = $snp;
+ $snps{$chr}{$pos}{'qual'} = $consQual;
- # #For testing purposes
- # $j == the faulty line number so we can see what is actually happening there
- # if($j == 576673 || $j == 683784 || $j == 733447)
- # {
- # print OUT "Line: $. \n";
- # print OUT "VCF: $vchr $vpos $cons $consQual \n";
- # print OUT "PILEUP: $chr $pos $ref $depth $baseString \n";
- # print OUT "snp: $snp \n";
- # print OUT "assigned: $snps{$chr}{$pos}{'snp'} \n";
- # $i++;
- # }
+ # #For testing purposes
+ # $j == the faulty line number so we can see what is actually happening there
+ # if($j == 576673 || $j == 683784 || $j == 733447)
+ # {
+ # print OUT "Line: $. \n";
+ # print OUT "VCF: $vchr $vpos $cons $consQual \n";
+ # print OUT "PILEUP: $chr $pos $ref $depth $baseString \n";
+ # print OUT "snp: $snp \n";
+ # print OUT "assigned: $snps{$chr}{$pos}{'snp'} \n";
+ # $i++;
+ # }
- # print OK "Line: $. \n";
- # print OK "VCF: $vchr $vpos $cons $consQual \n";
- # print OK "PILEUP: $chr $pos $ref $depth $baseString \n";
- # print OK "snp: $snp \n";
- # print OK "assigned: $snps{$chr}{$pos}{'snp'} \n";
-
- # if ($i == 3)
- # {
- # close(OUT);
- # close(OK);
- # }
+ # print OK "Line: $. \n";
+ # print OK "VCF: $vchr $vpos $cons $consQual \n";
+ # print OK "PILEUP: $chr $pos $ref $depth $baseString \n";
+ # print OK "snp: $snp \n";
+ # print OK "assigned: $snps{$chr}{$pos}{'snp'} \n";
+
+ # if ($i == 3)
+ # {
+ # close(OUT);
+ # close(OK);
+ # }
- my $snpCount;
- if ($snps{$chr}{$pos}{'snp'} eq 'A') {
- $snpCount = $baseString =~ tr/Aa/Aa/;
- } elsif ($snps{$chr}{$pos}{'snp'} eq 'C') {
- $snpCount = $baseString =~ tr/Cc/Cc/;
- } elsif ($snps{$chr}{$pos}{'snp'} eq 'G') {
- $snpCount = $baseString =~ tr/Gg/Gg/;
- } elsif ($snps{$chr}{$pos}{'snp'} eq 'T') {
- $snpCount = $baseString =~ tr/Tt/Tt/;
- } elsif ($snps{$chr}{$pos}{'snp'} eq 'A|C') {
- $snpCount = $baseString =~ tr/AaCc/AaCc/;
- } elsif ($snps{$chr}{$pos}{'snp'} eq 'A|G') {
- $snpCount = $baseString =~ tr/AaGg/AaGg/;
- } elsif ($snps{$chr}{$pos}{'snp'} eq 'A|T') {
- $snpCount = $baseString =~ tr/AaTt/AaTt/;
- } elsif ($snps{$chr}{$pos}{'snp'} eq 'G|C') {
- $snpCount = $baseString =~ tr/GgCc/GgCc/;
- } elsif ($snps{$chr}{$pos}{'snp'} eq 'G|T') {
- $snpCount = $baseString =~ tr/GgTt/GgTt/;
- } elsif ($snps{$chr}{$pos}{'snp'} eq 'C|T') {
- $snpCount = $baseString =~ tr/CcTt/CcTt/;
- } else {
- $snpCount = 0;
- }
- $snps{$chr}{$pos}{'depth'} = $depth;
- $snps{$chr}{$pos}{'freq'} = $snpCount;
- $snps{$chr}{$pos}{'pct'} = sprintf '%.2f', ($snpCount/$depth);
+ my $snpCount;
+ if ($snps{$chr}{$pos}{'snp'} eq 'A') {
+ $snpCount = $baseString =~ tr/Aa/Aa/;
+ } elsif ($snps{$chr}{$pos}{'snp'} eq 'C') {
+ $snpCount = $baseString =~ tr/Cc/Cc/;
+ } elsif ($snps{$chr}{$pos}{'snp'} eq 'G') {
+ $snpCount = $baseString =~ tr/Gg/Gg/;
+ } elsif ($snps{$chr}{$pos}{'snp'} eq 'T') {
+ $snpCount = $baseString =~ tr/Tt/Tt/;
+ } elsif ($snps{$chr}{$pos}{'snp'} eq 'A|C') {
+ $snpCount = $baseString =~ tr/AaCc/AaCc/;
+ } elsif ($snps{$chr}{$pos}{'snp'} eq 'A|G') {
+ $snpCount = $baseString =~ tr/AaGg/AaGg/;
+ } elsif ($snps{$chr}{$pos}{'snp'} eq 'A|T') {
+ $snpCount = $baseString =~ tr/AaTt/AaTt/;
+ } elsif ($snps{$chr}{$pos}{'snp'} eq 'G|C') {
+ $snpCount = $baseString =~ tr/GgCc/GgCc/;
+ } elsif ($snps{$chr}{$pos}{'snp'} eq 'G|T') {
+ $snpCount = $baseString =~ tr/GgTt/GgTt/;
+ } elsif ($snps{$chr}{$pos}{'snp'} eq 'C|T') {
+ $snpCount = $baseString =~ tr/CcTt/CcTt/;
+ } else {
+ $snpCount = 0;
+ }
+ $snps{$chr}{$pos}{'depth'} = $depth;
+ $snps{$chr}{$pos}{'freq'} = $snpCount;
+ $snps{$chr}{$pos}{'pct'} = sprintf '%.2f', ($snpCount/$depth);
- last;
+ last;
+ }
+ elsif ($vchr eq $chr && $vpos < $pos) {
+ last;
+ }
+ }
+ else
+ {
+ #print STDERR "In file $filename, line incompatible: $_ \n";
+ print STDERR "MPILEUP line $. incompatible: $_ \n";
+ }
}
- elsif ($vchr eq $chr && $vpos < $pos) {
- last;
- }
}
+ else
+ {
+ #print STDERR "In file $filename, line incompatible: $_ \n";
+ print STDERR "VCF line $. incompatible: $_ \n";
+ }
}
close PILEUP;
Modified: pkg/patchwork/inst/perl/pile2alleles.pl
===================================================================
--- pkg/patchwork/inst/perl/pile2alleles.pl 2013-12-13 15:28:31 UTC (rev 199)
+++ pkg/patchwork/inst/perl/pile2alleles.pl 2014-01-13 08:50:26 UTC (rev 200)
@@ -39,6 +39,7 @@
{
chomp;
#if ($_ == ""){next;}
+ # chr1 10884 c G 6 44 60 3 GGG ??>
if ($_ =~ /(\S+)\t(\S+)\t(\S+)\t(\S+)\t(\S+)\t\S+\t\S+\t(\S+)\t(\S+)\t\S+/)
{
my ($chr, $pos, $ref, $cons, $consQual, $depth, $baseString) = ($1,$2,$3,$4,$5,$6,$7); #(split /\t/, $_)[0,1,2,3,4,7,8];
@@ -80,7 +81,7 @@
else
{
#print STDERR "In file $filename, line incompatible: $_ \n";
- print STDERR "Line incompatible: $_ \n";
+ print STDERR "Line $. incompatible: $_ \n";
}
}
More information about the Patchwork-commits
mailing list