From noreply at r-forge.r-project.org Thu Mar 20 10:35:35 2014 From: noreply at r-forge.r-project.org (noreply at r-forge.r-project.org) Date: Thu, 20 Mar 2014 10:35:35 +0100 (CET) Subject: [Patchwork-commits] r204 - .git .git/logs .git/logs/refs/heads .git/logs/refs/remotes/origin .git/refs/heads .git/refs/remotes/origin pkg/TAPS pkg/TAPS/R pkg/TAPS/man www www/css www/css/fonts www/css/fonts/fontawesome www/css/fonts/fontawesome/css www/css/fonts/fontawesome/font www/css/img www/js Message-ID: <20140320093535.A7460186EED@r-forge.r-project.org> Author: sebastian_d Date: 2014-03-20 10:35:35 +0100 (Thu, 20 Mar 2014) New Revision: 204 Added: pkg/TAPS/R/sysdata.rda pkg/TAPS/man/TAPS_click.Rd pkg/TAPS/man/TAPS_estimates.Rd www/composer.json www/css/fonts/fontawesome/ www/css/fonts/fontawesome/css/ www/css/fonts/fontawesome/css/font-awesome-ie7.min.css www/css/fonts/fontawesome/css/font-awesome.css www/css/fonts/fontawesome/css/font-awesome.min.css www/css/fonts/fontawesome/font/ www/css/fonts/fontawesome/font/FontAwesome.otf www/css/fonts/fontawesome/font/fontawesome-webfont.eot www/css/fonts/fontawesome/font/fontawesome-webfont.svg www/css/fonts/fontawesome/font/fontawesome-webfont.ttf www/css/fonts/fontawesome/font/fontawesome-webfont.woff www/css/img/6295_chr1_region_18000000-23000000.jpeg www/css/img/TAPS_Menu1.png www/css/img/TAPS_Menu2.png www/css/img/TAPS_Slide1.png www/css/img/TAPS_Slide2.png www/css/img/TAPS_Slide3.png www/css/img/TAPS_Slide4.png www/css/img/TAPS_Slide5.png www/css/img/TAPS_Slide6.png www/css/img/patchwork_logo.png www/css/kickstart-slideshow.css Removed: pkg/TAPS/R/sysdata.rda Modified: .git/COMMIT_EDITMSG .git/index .git/logs/HEAD .git/logs/refs/heads/master .git/logs/refs/remotes/origin/master .git/refs/heads/master .git/refs/remotes/origin/master pkg/TAPS/NAMESPACE pkg/TAPS/R/TAPS.r pkg/TAPS/man/TAPS_call.Rd pkg/TAPS/man/TAPS_compare.Rd pkg/TAPS/man/TAPS_freq.Rd pkg/TAPS/man/TAPS_plot.Rd pkg/TAPS/man/TAPS_region.Rd www/TAPS_exec.php www/TAPS_inst.php www/TAPS_requ.php www/TAPS_resu.php www/changelog.php www/css/kickstart.css www/index.php www/js/kickstart.js www/pw_exec.php www/pw_inst.php www/pw_requ.php www/pw_resu.php Log: Big changes to homepage and gene lists as well as other things. TAPS_click(), TAPS_estimate(), TAPS_region() Modified: .git/COMMIT_EDITMSG =================================================================== --- .git/COMMIT_EDITMSG 2014-01-14 15:20:19 UTC (rev 203) +++ .git/COMMIT_EDITMSG 2014-03-20 09:35:35 UTC (rev 204) @@ -1 +1 @@ -small fix xlsx +Big upgrades to homepage and genelist Modified: .git/index =================================================================== (Binary files differ) Modified: .git/logs/HEAD =================================================================== --- .git/logs/HEAD 2014-01-14 15:20:19 UTC (rev 203) +++ .git/logs/HEAD 2014-03-20 09:35:35 UTC (rev 204) @@ -80,3 +80,9 @@ 1c3b0b54598dac7863316a79427364b873bec3f3 78c55c83e03724a24900fbaf881864197d23ab4d Sebastian DiLorenzo 1389602903 +0100 pull: Merge made by the 'recursive' strategy. 78c55c83e03724a24900fbaf881864197d23ab4d 218c5c8fe2ba9eb21c155ba9c5696b8eebfb90e8 Sebastian DiLorenzo 1389712335 +0100 commit: update to xlsx handling (barely needed anymore) 218c5c8fe2ba9eb21c155ba9c5696b8eebfb90e8 a4cb05b888a8eef33784f0ee1fb4f2de5a8c3c61 Sebastian DiLorenzo 1389712434 +0100 commit: small fix xlsx +a4cb05b888a8eef33784f0ee1fb4f2de5a8c3c61 88f638af68fd6e9c505d9cbe0b9f164d6386f681 Sebastian DiLorenzo 1389712866 +0100 commit: update to homepage regarding TAPS installation +88f638af68fd6e9c505d9cbe0b9f164d6386f681 686925e27a9b454ceae8db67f1c5ff2351c0dedd Sebastian DiLorenzo 1393509665 +0100 pull: Fast-forward +686925e27a9b454ceae8db67f1c5ff2351c0dedd d6dbac6a1a12bfaf8dccbe161c2f509dcc53b73d Sebastian DiLorenzo 1393853775 +0100 commit: updates to TAPS functions and homepage +d6dbac6a1a12bfaf8dccbe161c2f509dcc53b73d 0fc6463401c39565b48199c6f4adfe077510620e Sebastian DiLorenzo 1393856755 +0100 commit: last commit before destroying everything as i try to update kickstart +0fc6463401c39565b48199c6f4adfe077510620e 53a860b490c7ac5501d8bb4f6b95e700491e12d4 Sebastian DiLorenzo 1395307680 +0100 commit: Big upgrades to homepage and genelist +53a860b490c7ac5501d8bb4f6b95e700491e12d4 3e3f8aafc4dd6607de55b08e676598fcdd1ac8c4 Sebastian DiLorenzo 1395307764 +0100 pull: Merge made by the 'recursive' strategy. Modified: .git/logs/refs/heads/master =================================================================== --- .git/logs/refs/heads/master 2014-01-14 15:20:19 UTC (rev 203) +++ .git/logs/refs/heads/master 2014-03-20 09:35:35 UTC (rev 204) @@ -80,3 +80,9 @@ 1c3b0b54598dac7863316a79427364b873bec3f3 78c55c83e03724a24900fbaf881864197d23ab4d Sebastian DiLorenzo 1389602903 +0100 pull: Merge made by the 'recursive' strategy. 78c55c83e03724a24900fbaf881864197d23ab4d 218c5c8fe2ba9eb21c155ba9c5696b8eebfb90e8 Sebastian DiLorenzo 1389712335 +0100 commit: update to xlsx handling (barely needed anymore) 218c5c8fe2ba9eb21c155ba9c5696b8eebfb90e8 a4cb05b888a8eef33784f0ee1fb4f2de5a8c3c61 Sebastian DiLorenzo 1389712434 +0100 commit: small fix xlsx +a4cb05b888a8eef33784f0ee1fb4f2de5a8c3c61 88f638af68fd6e9c505d9cbe0b9f164d6386f681 Sebastian DiLorenzo 1389712866 +0100 commit: update to homepage regarding TAPS installation +88f638af68fd6e9c505d9cbe0b9f164d6386f681 686925e27a9b454ceae8db67f1c5ff2351c0dedd Sebastian DiLorenzo 1393509665 +0100 pull: Fast-forward +686925e27a9b454ceae8db67f1c5ff2351c0dedd d6dbac6a1a12bfaf8dccbe161c2f509dcc53b73d Sebastian DiLorenzo 1393853775 +0100 commit: updates to TAPS functions and homepage +d6dbac6a1a12bfaf8dccbe161c2f509dcc53b73d 0fc6463401c39565b48199c6f4adfe077510620e Sebastian DiLorenzo 1393856755 +0100 commit: last commit before destroying everything as i try to update kickstart +0fc6463401c39565b48199c6f4adfe077510620e 53a860b490c7ac5501d8bb4f6b95e700491e12d4 Sebastian DiLorenzo 1395307680 +0100 commit: Big upgrades to homepage and genelist +53a860b490c7ac5501d8bb4f6b95e700491e12d4 3e3f8aafc4dd6607de55b08e676598fcdd1ac8c4 Sebastian DiLorenzo 1395307764 +0100 pull: Merge made by the 'recursive' strategy. Modified: .git/logs/refs/remotes/origin/master =================================================================== --- .git/logs/refs/remotes/origin/master 2014-01-14 15:20:19 UTC (rev 203) +++ .git/logs/refs/remotes/origin/master 2014-03-20 09:35:35 UTC (rev 204) @@ -77,3 +77,9 @@ 65601241ae236432ef8a7f3aab8d7af3068d4344 78c55c83e03724a24900fbaf881864197d23ab4d Sebastian DiLorenzo 1389602990 +0100 update by push 78c55c83e03724a24900fbaf881864197d23ab4d 218c5c8fe2ba9eb21c155ba9c5696b8eebfb90e8 Sebastian DiLorenzo 1389712343 +0100 update by push 218c5c8fe2ba9eb21c155ba9c5696b8eebfb90e8 a4cb05b888a8eef33784f0ee1fb4f2de5a8c3c61 Sebastian DiLorenzo 1389712440 +0100 update by push +a4cb05b888a8eef33784f0ee1fb4f2de5a8c3c61 88f638af68fd6e9c505d9cbe0b9f164d6386f681 Sebastian DiLorenzo 1389712872 +0100 update by push +88f638af68fd6e9c505d9cbe0b9f164d6386f681 686925e27a9b454ceae8db67f1c5ff2351c0dedd Sebastian DiLorenzo 1393509665 +0100 pull: fast-forward +686925e27a9b454ceae8db67f1c5ff2351c0dedd d6dbac6a1a12bfaf8dccbe161c2f509dcc53b73d Sebastian DiLorenzo 1393853806 +0100 update by push +d6dbac6a1a12bfaf8dccbe161c2f509dcc53b73d 0fc6463401c39565b48199c6f4adfe077510620e Sebastian DiLorenzo 1393856763 +0100 update by push +0fc6463401c39565b48199c6f4adfe077510620e bf30fc707ee93417a8fad1f8ab1d8dc7426e128a Sebastian DiLorenzo 1395307764 +0100 pull: fast-forward +bf30fc707ee93417a8fad1f8ab1d8dc7426e128a 3e3f8aafc4dd6607de55b08e676598fcdd1ac8c4 Sebastian DiLorenzo 1395307814 +0100 update by push Modified: .git/refs/heads/master =================================================================== --- .git/refs/heads/master 2014-01-14 15:20:19 UTC (rev 203) +++ .git/refs/heads/master 2014-03-20 09:35:35 UTC (rev 204) @@ -1 +1 @@ -a4cb05b888a8eef33784f0ee1fb4f2de5a8c3c61 +3e3f8aafc4dd6607de55b08e676598fcdd1ac8c4 Modified: .git/refs/remotes/origin/master =================================================================== --- .git/refs/remotes/origin/master 2014-01-14 15:20:19 UTC (rev 203) +++ .git/refs/remotes/origin/master 2014-03-20 09:35:35 UTC (rev 204) @@ -1 +1 @@ -a4cb05b888a8eef33784f0ee1fb4f2de5a8c3c61 +3e3f8aafc4dd6607de55b08e676598fcdd1ac8c4 Modified: pkg/TAPS/NAMESPACE =================================================================== --- pkg/TAPS/NAMESPACE 2014-01-14 15:20:19 UTC (rev 203) +++ pkg/TAPS/NAMESPACE 2014-03-20 09:35:35 UTC (rev 204) @@ -3,7 +3,8 @@ TAPS_region, TAPS_freq, TAPS_compare, - TAPS_estimates) + TAPS_estimates, + TAPS_click) Modified: pkg/TAPS/R/TAPS.r =================================================================== --- pkg/TAPS/R/TAPS.r 2014-01-14 15:20:19 UTC (rev 203) +++ pkg/TAPS/R/TAPS.r 2014-03-20 09:35:35 UTC (rev 204) @@ -1,5 +1,3 @@ -## Det h?r ?r nya versionen. - ## load('~/patchwork/pkg/TAPS/R/sysdata.rda') ## load('shortRegions.Rdata') ## load('allRegions.Rdata') @@ -16,7 +14,7 @@ TAPS_plot <- function(#samples='all', directory=NULL,autoEstimate=FALSE, - bin=400,cores=1) { + bin=400,cores=1,matched=FALSE,allelePeaks=FALSE) { #Automatically check, and if needed install, packages stats and fields #Load stats. It should be in all, at least semi-new, R distributions so we dont need to install.package it or @@ -25,7 +23,7 @@ suppressPackageStartupMessages(library(stats)) suppressPackageStartupMessages(library(DNAcopy)) suppressPackageStartupMessages(library(fields)) - #suppressPackageStartupMessages(library(xlsx)) + suppressPackageStartupMessages(library(xlsx)) suppressPackageStartupMessages(library(foreach)) suppressPackageStartupMessages(library(doMC)) suppressPackageStartupMessages(registerDoMC(cores=cores)) @@ -106,7 +104,6 @@ Chromosome[i]=paste('chr', tempChr[i], sep="") } - Log2=as.data.frame(cbind(Chromosome, Start, End, Value)) colnames(Log2)=c("Chromosome","Start","End","Value") @@ -153,33 +150,33 @@ Log2=Log2[!is.nan(Log2$Value),] Log2=Log2[!is.na(Log2$Value),] + Log2 <- Log2[which(Log2$Value != -Inf & Log2$Value != +Inf ),] alf=alf[!is.nan(alf$Value),] alf=alf[!is.na(alf$Value),] alf$Value[alf$Value<0]=0; alf$Value[alf$Value>1]=1 + alf <- alf[which(alf$Value != -Inf & alf$Value != +Inf ),] segments <- readSegments() ## segments if available (CBS recommended) - - #Remove NA values that some samples give. - segments <- segments[!is.nan(segments$Value),] - segments <- segments[!is.na(segments$Value),] - - + # cat(' ..processing') if (is.null(segments)) { ## segmentation using DNA-copy if needed (must then be installed) segments <- segment_DNAcopy(Log2) save.txt(segments,'_segments.txt') } + + #Remove NA values that some samples give. + segments <- segments[!is.nan(segments$Value),] + segments <- segments[!is.na(segments$Value),] + segments$Value <- segments$Value-median(Log2$Value,na.rm=T) ## Median-centering + Log2$Value <- Log2$Value-median(Log2$Value,na.rm=T) ## Median-centering - segments$Value <- segments$Value-mean(Log2$Value) ## Median-centering - Log2$Value <- Log2$Value-mean(Log2$Value) ## Median-centering - - allRegions=NULL; if ('allRegions.Rdata' %in% dir()) load('allRegions.Rdata') - if (is.null(allRegions)) allRegions <- makeRegions(Log2, alf, segments) ## Calculates necessary data for segments (all functions are in this file) + allRegions=NULL; #if ('allRegions.Rdata' %in% dir()) load('allRegions.Rdata') + if (is.null(allRegions)) allRegions <- makeRegions(Log2, alf, segments,matched=matched,allelePeaks=allelePeaks) ## Calculates necessary data for segments (all functions are in this file) save(allRegions,file='allRegions.Rdata') - regs=NULL; if ('shortRegions.Rdata' %in% dir()) load('shortRegions.Rdata') + regs=NULL;# if ('shortRegions.Rdata' %in% dir()) load('shortRegions.Rdata') if (is.null(regs)) { - regs <- regsFromSegs(Log2,alf,segments,bin=bin,min=5) ## Calculates the same data for shortened segments + regs <- regsFromSegs(Log2,alf,segments,bin=bin,min=30,matched=matched,allelePeaks=allelePeaks) ## Calculates the same data for shortened segments save(regs,file='shortRegions.Rdata') } @@ -207,14 +204,11 @@ #Test if hg18 or hg19 should be used. length of (hg18 chr19) > (hg19 chr19) hgtest=regs[regs$chr=="chr19",] - if(hgtest$end[length(hgtest$chr)] > 60000000) - { + if(hgtest$end[length(hgtest$chr)] > 60000000) { hg18=T - } - else - { + } else { hg18=F - } + } # cat('..plotting.\n') OverviewPlot(regs$chr,regs$start,regs$end,regs$logs,regs$scores,hg18=hg18, @@ -399,11 +393,11 @@ 1 } ### -regsFromSegs <- function (Log2,alf, segments, bin=200,min=1) { +regsFromSegs <- function (Log2,alf, segments, bin=200,min=1,matched=F,allelePeaks=FALSE) { ## This function builds short segments and calcualtes their average Log-R and Allelic Imbalance Ratio. rownames(Log2)=1:nrows(Log2) rownames(alf)=1:nrows(alf) - regs=list('chr'=NULL,'start'=NULL,'end'=NULL,'logs'=NULL,'scores'=NULL,'het'=NULL,'hom'=NULL,'probes'=NULL,'snps'=NULL) + regs=list('chr'=NULL,'start'=NULL,'end'=NULL,'logs'=NULL,'scores'=NULL,'probes'=NULL,'snps'=NULL) #,'key1'=rep(NA,nrow(Log2)),'key2'=rep(NA,nrow(alf))) n=nrow(segments) s_check=NULL @@ -434,40 +428,72 @@ #regs$or_seg=c(regs$or_seg,c) regs$start=c(regs$start,s_) ## store start and end positions regs$end=c(regs$end,e_) - - if (nrow(thisalf)>min) { ## Time to calculate Allelic Imbalance Ratio (if enough SNPs) - t1=sort( abs(thisalf$Value-0.5) ) ## distance from middle (het) in the allele freq pattern, ascending - if (length(unique(t1))>3) { ## do not attempt clustering with too few snps - xx=NULL - try(xx <- kmeans(t1, 2),silent=T) ## Attempt k-means (Hartigan-Wong: has proven very stable) - if (!is.null(xx)) if (min(xx$size) > 0.05*max(xx$size)) { ## On some occations data quality is poor, requiring 5%+ heterozygous SNPs avoids most such cases. - xx=xx$centers - } else xx=NA - } else xx=NA - } else xx=NA - #try (if (is.na(xx)) xx=0:1, silent=T) - try (if (length(xx)==0) xx=0:1, silent=T) - regs$scores=c(regs$scores, min(xx)/max(xx) ) ## Allelic Imbalance Ratio = inner / outer cluster. - regs$het=c(regs$het, min(xx)) ## $het and $hom are no longer in use. - regs$hom=c(regs$hom, max(xx)) + temp = NA + try(temp <- allelicImbalance(thisalf$Value,min,matched,allelePeaks),silent=T) + regs$scores =c(regs$scores,temp) + # if (nrow(thisalf)>min) { ## Time to calculate Allelic Imbalance Ratio (if enough SNPs) + # t1=sort( abs(thisalf$Value-0.5) ) ## distance from middle (het) in the allele freq pattern, ascending + # if (length(unique(t1))>3) { ## do not attempt clustering with too few snps + # xx=NULL + # try(xx <- kmeans(t1, 2),silent=T) ## Attempt k-means (Hartigan-Wong: has proven very stable) + # if (!is.null(xx)) if (min(xx$size) > 0.05*max(xx$size)) { ## On some occations data quality is poor, requiring 5%+ heterozygous SNPs avoids most such cases. + # xx=xx$centers + # } else xx=NA + # } else xx=NA + # } else xx=NA + ##try (if (is.na(xx)) xx=0:1, silent=T) + # try (if (length(xx)==0) xx=0:1, silent=T) + # regs$scores=c(regs$scores, min(xx)/max(xx) ) ## Allelic Imbalance Ratio = inner / outer cluster. + # regs$het=c(regs$het, min(xx)) ## $het and $hom are no longer in use. + # regs$hom=c(regs$hom, max(xx)) + # if(matched == T | is.na(regs$scores[length(regs$scores)])) { + # regs$scores[length(regs$scores)] <- 2*median(abs(thisalf$Value-.5),na.rm=T) + # } } } regs=as.data.frame(regs) regs=regs[!is.na(regs$logs),] ### MODDAT MARKUS MAJ 2013 return (regs) } +allelicImbalance <- function (data,min,matched=F,allelePeaks=F) { + if(matched == T ) { + return(2*median(abs(data$Value-.5),na.rm=T)) + } + if(allelePeaks == F) { + if (length(data)>min) { ## Time to calculate Allelic Imbalance Ratio (if enough SNPs) + t1=sort( abs(data-0.5) ) ## distance from middle (het) in the allele freq pattern, ascending + if (length(unique(t1))>3) { ## do not attempt clustering with too few snps + xx=NULL + try(xx <- kmeans(t1, 2),silent=T) ## Attempt k-means (Hartigan-Wong: has proven very stable) + if (!is.null(xx)) if (min(xx$size) > 0.05*max(xx$size)) { ## On some occations data quality is poor, requiring 5%+ heterozygous SNPs avoids most such cases. + xx=xx$centers + } else xx=NA + } else xx=NA + } else xx=NA + #try (if (is.na(xx)) xx=0:1, silent=T) + #try (if (length(xx)==0) xx=0:1, silent=T) + #regs$scores=c(regs$scores, min(xx)/max(xx) ) ## Allelic Imbalance Ratio = inner / outer cluster. + #regs$het=c(regs$het, min(xx)) ## $het and $hom are no longer in use. + #regs$hom=c(regs$hom, max(xx)) + return( min(xx)/max(xx) ) + } + #The new shiet + if(allelePeaks == T) { + return(1-max(kmeans(data,2)$centers)) + } +} ### segment_DNAcopy <- function(Log2) { ## If segmentation is required, DNAcopy is a good choice. Must be installed. #library(DNAcopy) - cat('..Using DNAcopy to create segments:\n') + # cat('..Using DNAcopy to create segments:\n') segs=NULL chroms=c(as.character(1:22),'X','Y') chroms=paste('chr',chroms,sep='') for (c in 1:24) { # segment chromosome tlog=Log2[Log2$Chromosome==chroms[c],] ## Log-R of this chromosome if (nrow(tlog)>0) { ## (ChrY may be absent) - cnaObject=segment(smooth.CNA(CNA(tlog$Value, rep(c,nrow(tlog)), tlog$Start, data.type='logratio',sampleid=paste('chr',c))), undo.splits='sdundo', undo.SD=1) + cnaObject=segment(smooth.CNA(CNA(tlog$Value, rep(c,nrow(tlog)), tlog$Start, data.type='logratio',sampleid=paste('chr',c))), undo.splits='sdundo', undo.SD=1,verbose=0) segs=rbind(segs,cnaObject$output) ## Add result to data frame } } @@ -478,13 +504,31 @@ ### readLog2 <- function() { ## This function reads Log-ratio from the file "probes.txt" which must be present in the current directory. - Log2=NULL - try( Log2 <- read.csv(file='probes.txt',header=T,sep='\t'), silent=T) - # if (!is.null(Log2)) cat(' ..found probes.txt') - if (is.null(Log2)) { - try( Log2 <- read.csv(file='_probes.txt',header=T,sep='\t'), silent=T) - if (!is.null(Log2)) cat(' ..found _probes.txt') + # Log2=NULL + # try( Log2 <- read.csv(file='probes.txt',header=T,sep='\t'), silent=T) + # if (is.null(Log2)) { + # try( Log2 <- read.csv(file='_probes.txt',header=T,sep='\t'), silent=T) + # if (!is.null(Log2)) cat(' ..found _probes.txt') + # } + + if(file.exists('probes.txt')) { + Log2 <- read.csv(file='probes.txt',header=T,sep='\t') + } else if(file.exists('_probes.txt')) { + Log2 <- read.csv(file='_probes.txt',header=T,sep='\t') + } else if(file.exists('raw.txt')) { + Log2 <- read.csv(file='raw.txt',header=T,sep='\t') + colnames(Log2)[4:5] <- c('Start','Value') + Log2$Chromosome <- paste('chr',Log2$Chromosome,sep='') + Log2$Chromosome[Log2$Chromosome == 'chr24'] <- 'chrX' + Log2$Chromosome[Log2$Chromosome == 'chr25'] <- 'chrY' + } else { + print('No probes.txt found!') } + + Log2$Chromosome <- as.character(Log2$Chromosome) + #c("Chromosome", "Start", "End", "Value", "Array") + + ## This code was used if Log-R must be read from .CNCHP file (Affymetrix Genotyping Console or APT). ## NOT currently supported downstream as .CNCHP is lacks allele-specific information for Affy 250k/500k # if (is.null(Log2)) { @@ -517,14 +561,28 @@ ### readAlf <- function(localDir=NULL) { ## This funciton reads allele frequency [B/(A+B)] from the file 'snps.txt', which must be present in the current directory. - alf=NULL - try( alf <- read.csv(file='snps.txt',header=T,sep='\t'), silent=T) - # if (!is.null(alf)) cat(' ..found snps.txt') - if (is.null(alf)) { - try( alf <- read.csv(file='_snps.txt',header=T,sep='\t'), silent=T) - if (!is.null(alf)) cat(' ..found _snps.txt') + # alf=NULL + # try( alf <- read.csv(file='snps.txt',header=T,sep='\t'), silent=T) + # if (is.null(alf)) { + # try( alf <- read.csv(file='_snps.txt',header=T,sep='\t'), silent=T) + # if (!is.null(alf)) cat(' ..found _snps.txt') + # } + + if(file.exists('snps.txt')) { + alf <- read.csv(file='snps.txt',header=T,sep='\t') + } else if(file.exists('_snps.txt')) { + alf <- read.csv(file='_snps.txt',header=T,sep='\t') + } else if(file.exists('raw_snps.txt')) { + alf <- read.csv(file='raw_snps.txt',header=T,sep='\t') + colnames(alf)[c(4,6)] <- c('Start','Value') + alf$Chromosome <- paste('chr',alf$Chromosome,sep='') + alf$Chromosome[alf$Chromosome == 'chr24'] <- 'chrX' + alf$Chromosome[alf$Chromosome == 'chr25'] <- 'chrY' + } else { + print('No snps.txt found!') } - #alf=alf[alf$Value!=0.5,] + #c("Chromosome", "Start", "End", "Value", "Array") + return (alf) } ### @@ -533,16 +591,22 @@ ## The author recommends SNP-rank segmentation (NEXUS) or another CBS such as that in DNACopy. ## Using a HMM is not recommended unless you have a homogenous, diploid sample. (And then there is more user-friendly software anyway.) segments=NULL - try( segments <- read.csv(file='segments.txt',header=T,sep='\t'),silent=T) + if(file.exists('segments.txt')) { + segments <- read.csv(file='segments.txt',header=T,sep='\t') + } + # try( segments <- read.csv(file='segments.txt',header=T,sep='\t'),silent=T) # if (!is.null(segments)) cat(' ..found segments.txt') if (is.null(segments)) { - try( segments <- read.csv(file='_segments.txt',header=T,sep='\t'),silent=T) - if (!is.null(segments)) cat(' ..found _segments.txt') + # try( segments <- read.csv(file='_segments.txt',header=T,sep='\t'),silent=T) + # if (!is.null(segments)) cat(' ..found _segments.txt') } + if(!is.null(segments)) { + segments$Chromosome <- as.character(segments$Chromosome) + } return (segments) } ### -makeRegions <- function(Log2, alf, segments,dataType='Nexus') { +makeRegions <- function(Log2, alf, segments,dataType='Nexus',matched=FALSE,allelePeaks=F) { ## makeRegions is similar to "regsfromsegs" except regions are not subdivided before calculation of mean Log-R and Allelic Imbalance Ratio. regions=segments regions$Chromosome=as.character(segments$Chromosome) ## Chromosome @@ -552,6 +616,8 @@ #regions$log2=round(regions$Value,4) regions$imba=NA ## Allelic Imbalance Ratio regionIx=NULL ## Not currently used + regionIx$Log2 <- list() + regionIx$alf <- list() for (i in 1:nrows(regions)) { log2temp=which(equals(Log2$Chromosome,regions$Chromosome[i])) ## index of Log-R (current chrom) alftemp=which(equals(alf$Chromosome,regions$Chromosome[i])) ## index of Allele frequency (current chrom) @@ -565,18 +631,25 @@ log2temp=Log2$Value[log2temp] regions$log2[i]=median(log2temp) alftemp=alf$Value[alftemp] - if (length(alftemp)>3) { ## Prepare to calculate Allelic Imbalance Ratio - if (dataType=="Nexus") t1=sort( abs(alftemp-0.5) ) ## distance from middle (het), ascending - if (dataType=="CNCHP") t1=sort( abs(alftemp)-0 ) ## This is not currently in use, was intended for analysis of SNP6 .CNCHP data. - if (length(unique(t1))>5) { ## Avoid calculating Allelic Imbalance Ratio unless there are several different values to cluster - xx=NA - xx=try(kmeans(t1, 2), silent=T) ## This part is nearly identical to that of 'regsfromsegs()' - try( if (min(xx$size) > 0.05*max(xx$size)) { - xx=xx$centers - } else xx=NA, silent=T) - } else xx=NA - try(regions$imba[i] <- round( min(xx)/max(xx) ,2), silent=T) - } + temp = NA + try(temp <- allelicImbalance(alftemp,min,matched=matched,allelePeaks=allelePeaks),silent=T) + regions$imba[i] <- temp + + # if (length(alftemp)>3) { ## Prepare to calculate Allelic Imbalance Ratio + # if (dataType=="Nexus") t1=sort( abs(alftemp-0.5) ) ## distance from middle (het), ascending + # if (dataType=="CNCHP") t1=sort( abs(alftemp)-0 ) ## This is not currently in use, was intended for analysis of SNP6 .CNCHP data. + # if (length(unique(t1))>5) { ## Avoid calculating Allelic Imbalance Ratio unless there are several different values to cluster + # xx=NA + # xx=try(kmeans(t1, 2), silent=T) ## This part is nearly identical to that of 'regsfromsegs()' + # try( if (min(xx$size) > 0.05*max(xx$size)) { + # xx=xx$centers + # } else xx=NA, silent=T) + # } else xx=NA + # try(regions$imba[i] <- round( min(xx)/max(xx) ,2), silent=T) + # } + # if(matched == T | is.na(regions$imba[i])) { + # regions$imba[i] <- 2*median(abs(alftemp-.5),na.rm=T) + # } } return(list('regions'=regions,'regionIx'=regionIx)) } @@ -996,12 +1069,14 @@ ### Calculate model based Cns Cnx=Cn; for (cn in 0:maxCn) { - Cnx[Cn==cn]=cn + (2^regions$log2[Cn==cn]-2^int[[paste('cn',cn,sep='')]])/model$k + ix <- Cn==cn + ix[is.na(ix)] <- F + Cnx[ix]=(cn + (2^regions$log2[ix]-2^int[[paste('cn',cn,sep='')]])/model$k) }; Cnx[Cnx<0]=0 Cn=round(Cnx) ## Set minor CN - for (i in 1:nrow(regions)) { + for (i in (1:nrow(regions))[!is.na(Cn)]) { # set minor CN distance <- Inf if (Cn[i]<=1) { @@ -1048,7 +1123,7 @@ } } - ### ber?kna tum?rcellshalt!! + ### ber?kna tumorcellshalt!! # Mha modellparameter ## empirical delta for whole copy: 0.55 model$k2=model$k*model$meanCn @@ -1388,7 +1463,6 @@ ix=which(genes$chrom==as.character(data$Chromosome[i]) & genes$gtxStartdata$Start[i]) if (length(ix)>0) data$genes[i]=paste(sort(unique(genes$name2[ix])),collapse=', ') } - #browser() return(data) } @@ -1488,7 +1562,7 @@ ### Total frequency plot #quartz(file=paste(comparison,'freq_dif',name1,name2,'png',sep='.'),width=15,height=4.5,dpi=300,type='png') - png(file=paste(comparison,'png',sep='.'),width=3000,height=800) + png(filename=paste(comparison,'png',sep='.'),width=3000,height=800) ylim <- c( -15, 110) plot(1,1,type='n', bty='n', ann=T, mar=c(0,0,0,0), oma=c(0,0,0,0), @@ -1669,7 +1743,7 @@ ### Total and Differential frequency plot #quartz(file=paste(comparison,'freq_dif',name1,name2,'png',sep='.'),width=15,height=4.5,dpi=300,type='png') - png(file=paste(comparison,'allFreqs',name1,name2,'png',sep='.'),width=3000,height=800) + png(filename=paste(comparison,'allFreqs',name1,name2,'png',sep='.'),width=3000,height=800) ylim <- c(-100, 110) plot(1,1,type='n', bty='n', ann=T, mar=c(0,0,0,0), oma=c(0,0,0,0), @@ -1980,6 +2054,9 @@ par(mgp =c(1,0.5,0)) par(lend=1) + #Remove chrY,M,XY + mval[mchr=='chrY'|mchr=='chrXY'|mchr=='chrM']=NA + #Calculate previous distance of whole genome so the next chromsome is added #at the correct coordinate pre=rep(NA,23) @@ -2134,6 +2211,9 @@ par(oma = c(0,0,0,0)) par(mgp =c(1,0.5,0)) + #Remove chrY,M,XY + sval[mchr=='chrY'|mchr=='chrXY'|mchr=='chrM']=NA + #Index to avoid spos/sval values that are all the way at 0 or 1. ix = !(sval %in% c(0,1)) @@ -3513,11 +3593,16 @@ } sampledata <- do.call(rbind,datat) setwd(root) + # if (file.exists('SampleData.csv')) { + # write.table(sampledata,paste('SampleData',format(Sys.time(), "_%F_%T.csv"),sep=''),sep='\t',row.names=F) + # } else { + # write.table(sampledata,'SampleData.csv',sep='\t',row.names=F) + # } if (file.exists('SampleData.csv')) { - write.table(sampledata,paste('SampleData',format(Sys.time(), "_%F_%T.csv"),sep=''),sep='\t',row.names=F) - } else { - write.table(sampledata,'SampleData.csv',sep='\t',row.names=F) + file.rename('SampleData.csv',paste('SampleData',format(Sys.time(), "_old_TAPS_estimates_%F_%T.csv"),sep='')) } + write.table(sampledata,'SampleData.csv',sep='\t',row.names=F) + } @@ -3587,3 +3672,315 @@ return(round(c(cn1,cn2,cn3,loh),2)) } + + +# _ _ _ +# ___| (_) ___| | __ +# / __| | |/ __| |/ / +#| (__| | | (__| < +# \___|_|_|\___|_|\_\ +# +#click + +drawImage <- function(file,dev=T,xlim,ylim) { + jpeg <- readJPEG(file) + if(dev==T) dev.new(height=11,width=15.5) + par(mar=c(2,3,1,1),las=1,xaxs='i',yaxs='i',lend=1) + plot(1, type="n", xlim=xlim, ylim=ylim,axes=F) + rasterImage(jpeg,xlim[1], ylim[1], xlim[2], ylim[2], interpolate =T) + # axis(1,at=seq(-3,3,0.25),lend=1) + # axis(2,at=seq(ylim[1],ylim[2],0.05),lend=1) +} + +clickWrap <- function(n,xlim,ylim) { + y <- list() + opar <- par + for(i in 1:n) { + print(i) + col <- ifelse(i==1,'green','red') + y[[i]] <- clicker(col) + } + dev.new() + par(mfrow=c(n,1),mar=c(2,2.5,.5,.5),xaxs='i',yaxs='i') + lapply(y, FUN = function(y) { + x <- 1:length(y) + fit <- lm(y ~ x) + xlim <- c(0.5,length(x)+.5) + ylim <- c(min(y)-min(y)*.1,max(y)+max(y)*.1) + plot(x,x*fit$coefficients[2]+fit$coefficients[1],type='l',ylim=ylim,xlim=xlim,las=1,lend=1) + par(new=T) + plot(x,y,xlab='',ylab='',axes=F,ylim=ylim,xlim=xlim,pch=20,cex=1.5) + legend('topright',legend=paste('R^2: ',round(as.numeric(summary(fit)[8]),4),sep='')) + }) + locator(1) + try(dev.off(),T) + par(opar) +} + +clicker <- function(col='black') { + print('Click on your values. Right click to close (This will redraw the image).') + print(col) + lastLogR <- locator(1,type='p',pch=20,col=col,cex=1.8)$x + y <- 2^lastLogR + cat(paste('#','logR','R','delta-R','\n',sep='\t')) + cat(paste(1,round(lastLogR,2),round(2^lastLogR,2),0,'\n',sep="\t")) + j <- 2 + while(T) { + logR <- locator(1,type='p',pch=20,col=col,cex=1.8)$x + if(is.null(logR)) break + R <- 2^logR + y <- append(y,R) + lastR <- 2^lastLogR + diff <- R - lastR + cat(paste(j,round(logR,3),round(R,3),round(diff,3),'\n',sep="\t")) + lastLogR <- logR + j = j + 1 + } + return(y) +} + +copyNumberClicker <- function(maxy,miny) { + coord <- list() + for(i in 1:4) { + if(i<4) { + print(paste("Left click on cn",i,". Right click if it doesn't exist",sep='')) + } else { + print('Please select LOH') + } + tmp <- locator(n=1,type='p',pch=20,col='green',cex=1.8) + if (is.null(tmp$x)) tmp$x <- tmp$y <- NA + text(tmp, labels=c('cn1','cn2','cn3','LOH')[i],adj=1.5) + text(tmp, labels=if(i < 4) round(tmp$x,2) else round((tmp$y - miny) / (maxy - miny),2) ,adj=c(1.3,+2.2)) + coord$x[i] <- tmp$x + coord$y[i] <- tmp$y + } + return(coord) +} + + +returnSkippedValues <- function(sample,sampleDataOri,done,skipped){ + index <- sampleDataOri$Sample == sample + return(list(sample,sampleDataOri[index,2],sampleDataOri[index,3],sampleDataOri[index,4],sampleDataOri[index,5],done,skipped)) +} + +TAPS_click <- function(path = getwd()) { + + suppressPackageStartupMessages(library(tcltk)) + suppressPackageStartupMessages(library(jpeg)) + suppressPackageStartupMessages(library(foreach)) + # require(xlsx) + + root <- path + chr <- paste('Chromosome',c(1:22,'X')) + xlim <- c(-2.387, 2.16) + ylim <- c(0, 1) + maxy <- 0.9336337 + miny <- 0.5170546 + + # sampleData <- read.csv('SampleData.csv',sep='\t',header=T,colClasses=c('character',rep('numeric',4)),stringsAsFactors=F) [TRUNCATED] To get the complete diff run: svnlook diff /svnroot/patchwork -r 204 From noreply at r-forge.r-project.org Wed Mar 26 11:37:57 2014 From: noreply at r-forge.r-project.org (noreply at r-forge.r-project.org) Date: Wed, 26 Mar 2014 11:37:57 +0100 (CET) Subject: [Patchwork-commits] r205 - .git .git/logs .git/logs/refs/heads .git/logs/refs/remotes/origin .git/refs/heads .git/refs/remotes/origin pkg/TAPS/R Message-ID: <20140326103757.E955418728E@r-forge.r-project.org> Author: sebastian_d Date: 2014-03-26 11:37:57 +0100 (Wed, 26 Mar 2014) New Revision: 205 Modified: .git/index .git/logs/HEAD .git/logs/refs/heads/master .git/logs/refs/remotes/origin/master .git/refs/heads/master .git/refs/remotes/origin/master pkg/TAPS/R/TAPS.r Log: bugfixing bjorn Modified: .git/index =================================================================== (Binary files differ) Modified: .git/logs/HEAD =================================================================== --- .git/logs/HEAD 2014-03-20 09:35:35 UTC (rev 204) +++ .git/logs/HEAD 2014-03-26 10:37:57 UTC (rev 205) @@ -86,3 +86,4 @@ d6dbac6a1a12bfaf8dccbe161c2f509dcc53b73d 0fc6463401c39565b48199c6f4adfe077510620e Sebastian DiLorenzo 1393856755 +0100 commit: last commit before destroying everything as i try to update kickstart 0fc6463401c39565b48199c6f4adfe077510620e 53a860b490c7ac5501d8bb4f6b95e700491e12d4 Sebastian DiLorenzo 1395307680 +0100 commit: Big upgrades to homepage and genelist 53a860b490c7ac5501d8bb4f6b95e700491e12d4 3e3f8aafc4dd6607de55b08e676598fcdd1ac8c4 Sebastian DiLorenzo 1395307764 +0100 pull: Merge made by the 'recursive' strategy. +3e3f8aafc4dd6607de55b08e676598fcdd1ac8c4 79c9a62cd0b934e0f252e2fb660c26ffeeeea825 Sebastian DiLorenzo 1395830196 +0100 pull: Fast-forward Modified: .git/logs/refs/heads/master =================================================================== --- .git/logs/refs/heads/master 2014-03-20 09:35:35 UTC (rev 204) +++ .git/logs/refs/heads/master 2014-03-26 10:37:57 UTC (rev 205) @@ -86,3 +86,4 @@ d6dbac6a1a12bfaf8dccbe161c2f509dcc53b73d 0fc6463401c39565b48199c6f4adfe077510620e Sebastian DiLorenzo 1393856755 +0100 commit: last commit before destroying everything as i try to update kickstart 0fc6463401c39565b48199c6f4adfe077510620e 53a860b490c7ac5501d8bb4f6b95e700491e12d4 Sebastian DiLorenzo 1395307680 +0100 commit: Big upgrades to homepage and genelist 53a860b490c7ac5501d8bb4f6b95e700491e12d4 3e3f8aafc4dd6607de55b08e676598fcdd1ac8c4 Sebastian DiLorenzo 1395307764 +0100 pull: Merge made by the 'recursive' strategy. +3e3f8aafc4dd6607de55b08e676598fcdd1ac8c4 79c9a62cd0b934e0f252e2fb660c26ffeeeea825 Sebastian DiLorenzo 1395830196 +0100 pull: Fast-forward Modified: .git/logs/refs/remotes/origin/master =================================================================== --- .git/logs/refs/remotes/origin/master 2014-03-20 09:35:35 UTC (rev 204) +++ .git/logs/refs/remotes/origin/master 2014-03-26 10:37:57 UTC (rev 205) @@ -83,3 +83,4 @@ d6dbac6a1a12bfaf8dccbe161c2f509dcc53b73d 0fc6463401c39565b48199c6f4adfe077510620e Sebastian DiLorenzo 1393856763 +0100 update by push 0fc6463401c39565b48199c6f4adfe077510620e bf30fc707ee93417a8fad1f8ab1d8dc7426e128a Sebastian DiLorenzo 1395307764 +0100 pull: fast-forward bf30fc707ee93417a8fad1f8ab1d8dc7426e128a 3e3f8aafc4dd6607de55b08e676598fcdd1ac8c4 Sebastian DiLorenzo 1395307814 +0100 update by push +3e3f8aafc4dd6607de55b08e676598fcdd1ac8c4 79c9a62cd0b934e0f252e2fb660c26ffeeeea825 Sebastian DiLorenzo 1395830195 +0100 pull: fast-forward Modified: .git/refs/heads/master =================================================================== --- .git/refs/heads/master 2014-03-20 09:35:35 UTC (rev 204) +++ .git/refs/heads/master 2014-03-26 10:37:57 UTC (rev 205) @@ -1 +1 @@ -3e3f8aafc4dd6607de55b08e676598fcdd1ac8c4 +79c9a62cd0b934e0f252e2fb660c26ffeeeea825 Modified: .git/refs/remotes/origin/master =================================================================== --- .git/refs/remotes/origin/master 2014-03-20 09:35:35 UTC (rev 204) +++ .git/refs/remotes/origin/master 2014-03-26 10:37:57 UTC (rev 205) @@ -1 +1 @@ -3e3f8aafc4dd6607de55b08e676598fcdd1ac8c4 +79c9a62cd0b934e0f252e2fb660c26ffeeeea825 Modified: pkg/TAPS/R/TAPS.r =================================================================== --- pkg/TAPS/R/TAPS.r 2014-03-20 09:35:35 UTC (rev 204) +++ pkg/TAPS/R/TAPS.r 2014-03-26 10:37:57 UTC (rev 205) @@ -172,7 +172,7 @@ Log2$Value <- Log2$Value-median(Log2$Value,na.rm=T) ## Median-centering allRegions=NULL; #if ('allRegions.Rdata' %in% dir()) load('allRegions.Rdata') - if (is.null(allRegions)) allRegions <- makeRegions(Log2, alf, segments,matched=matched,allelePeaks=allelePeaks) ## Calculates necessary data for segments (all functions are in this file) + if (is.null(allRegions)) allRegions <- makeRegions(Log2, alf, segments,matched=matched,min=30,allelePeaks=allelePeaks) ## Calculates necessary data for segments (all functions are in this file) save(allRegions,file='allRegions.Rdata') regs=NULL;# if ('shortRegions.Rdata' %in% dir()) load('shortRegions.Rdata') if (is.null(regs)) { @@ -365,13 +365,11 @@ #Test if hg18 or hg19 should be used. length of (hg18 chr19) > (hg19 chr19) hgtest=regions[regions$Chromosome=="chr19",] if(hgtest$End[length(hgtest$Chromosome)] > 60000000) - { + { hg18=T - } - else - { + } else { hg18=F - } + } #save parameters as strings parameters=paste("Parameters given: cn2:",sampleInfo$cn2," delta:",sampleInfo$delta," loh:",sampleInfo$loh) @@ -455,7 +453,7 @@ regs=regs[!is.na(regs$logs),] ### MODDAT MARKUS MAJ 2013 return (regs) } -allelicImbalance <- function (data,min,matched=F,allelePeaks=F) { +allelicImbalance <- function (data,min=30,matched=F,allelePeaks=F) { if(matched == T ) { return(2*median(abs(data$Value-.5),na.rm=T)) } @@ -606,7 +604,7 @@ return (segments) } ### -makeRegions <- function(Log2, alf, segments,dataType='Nexus',matched=FALSE,allelePeaks=F) { +makeRegions <- function(Log2, alf, segments,dataType='Nexus',min=30,matched=FALSE,allelePeaks=F) { ## makeRegions is similar to "regsfromsegs" except regions are not subdivided before calculation of mean Log-R and Allelic Imbalance Ratio. regions=segments regions$Chromosome=as.character(segments$Chromosome) ## Chromosome @@ -2605,11 +2603,10 @@ karyotype_chromsCN <- function(chr,start,end,int,ai,Cn,mCn,hg18,mchr,mpos,mval,schr,spos,sval,t,name='',xlim=c(-1.02,1.82),ylim=0:1, maxCn=8,parameters) { - if(hg18==T) - { + if(hg18==T) { chroms=chroms_hg18 chromData=chromData_hg18 - } + } #Get ideogram #ideogram=getIdeogram() @@ -2706,8 +2703,7 @@ par(oma = c(0,0,0,0)) par(mgp =c(0.5,0.25,0)) - if(c!=23) - { + if(c!=23) { #Whole genome overview plot #Note that we are plotting chrY in the background, despite the fact that it is not an "active" chromosome. plot(c(int[notix],int[ix]),c(ai[notix],ai[ix]), @@ -2720,9 +2716,7 @@ col = c(col[notix],col[ix]), xlim = c(-1,1.5), ylim = ylim) - } - else - { + } else { plot(int[notix],ai[notix], pch=16, cex=size[notix], @@ -2736,12 +2730,9 @@ } xix=chr=="chrX" - if(c!=23) - { + if(c!=23) { points(int[xix],ai[xix],pch=4,col='#46464680') - } - else - { + } else { points(int[xix],ai[xix],pch=4,col=col[ix]) } @@ -3779,6 +3770,7 @@ # sampleData <- read.csv('SampleData.csv',sep='\t',header=T,colClasses=c('character',rep('numeric',4)),stringsAsFactors=F) sampleData <- read.csv('SampleData.csv',sep='\t',header=T,stringsAsFactors=F) + if(any(colnames(sampleData) %in% c('MAPD','MHOF'))) sampleData <- sampleData[,1:5] sampleDataOri <- sampleData if(any(T == sampleData$done)) {