[Patchwork-commits] r204 - .git .git/logs .git/logs/refs/heads .git/logs/refs/remotes/origin .git/refs/heads .git/refs/remotes/origin pkg/TAPS pkg/TAPS/R pkg/TAPS/man www www/css www/css/fonts www/css/fonts/fontawesome www/css/fonts/fontawesome/css www/css/fonts/fontawesome/font www/css/img www/js
noreply at r-forge.r-project.org
noreply at r-forge.r-project.org
Thu Mar 20 10:35:35 CET 2014
Author: sebastian_d
Date: 2014-03-20 10:35:35 +0100 (Thu, 20 Mar 2014)
New Revision: 204
Added:
pkg/TAPS/R/sysdata.rda
pkg/TAPS/man/TAPS_click.Rd
pkg/TAPS/man/TAPS_estimates.Rd
www/composer.json
www/css/fonts/fontawesome/
www/css/fonts/fontawesome/css/
www/css/fonts/fontawesome/css/font-awesome-ie7.min.css
www/css/fonts/fontawesome/css/font-awesome.css
www/css/fonts/fontawesome/css/font-awesome.min.css
www/css/fonts/fontawesome/font/
www/css/fonts/fontawesome/font/FontAwesome.otf
www/css/fonts/fontawesome/font/fontawesome-webfont.eot
www/css/fonts/fontawesome/font/fontawesome-webfont.svg
www/css/fonts/fontawesome/font/fontawesome-webfont.ttf
www/css/fonts/fontawesome/font/fontawesome-webfont.woff
www/css/img/6295_chr1_region_18000000-23000000.jpeg
www/css/img/TAPS_Menu1.png
www/css/img/TAPS_Menu2.png
www/css/img/TAPS_Slide1.png
www/css/img/TAPS_Slide2.png
www/css/img/TAPS_Slide3.png
www/css/img/TAPS_Slide4.png
www/css/img/TAPS_Slide5.png
www/css/img/TAPS_Slide6.png
www/css/img/patchwork_logo.png
www/css/kickstart-slideshow.css
Removed:
pkg/TAPS/R/sysdata.rda
Modified:
.git/COMMIT_EDITMSG
.git/index
.git/logs/HEAD
.git/logs/refs/heads/master
.git/logs/refs/remotes/origin/master
.git/refs/heads/master
.git/refs/remotes/origin/master
pkg/TAPS/NAMESPACE
pkg/TAPS/R/TAPS.r
pkg/TAPS/man/TAPS_call.Rd
pkg/TAPS/man/TAPS_compare.Rd
pkg/TAPS/man/TAPS_freq.Rd
pkg/TAPS/man/TAPS_plot.Rd
pkg/TAPS/man/TAPS_region.Rd
www/TAPS_exec.php
www/TAPS_inst.php
www/TAPS_requ.php
www/TAPS_resu.php
www/changelog.php
www/css/kickstart.css
www/index.php
www/js/kickstart.js
www/pw_exec.php
www/pw_inst.php
www/pw_requ.php
www/pw_resu.php
Log:
Big changes to homepage and gene lists as well as other things. TAPS_click(), TAPS_estimate(), TAPS_region()
Modified: .git/COMMIT_EDITMSG
===================================================================
--- .git/COMMIT_EDITMSG 2014-01-14 15:20:19 UTC (rev 203)
+++ .git/COMMIT_EDITMSG 2014-03-20 09:35:35 UTC (rev 204)
@@ -1 +1 @@
-small fix xlsx
+Big upgrades to homepage and genelist
Modified: .git/index
===================================================================
(Binary files differ)
Modified: .git/logs/HEAD
===================================================================
--- .git/logs/HEAD 2014-01-14 15:20:19 UTC (rev 203)
+++ .git/logs/HEAD 2014-03-20 09:35:35 UTC (rev 204)
@@ -80,3 +80,9 @@
1c3b0b54598dac7863316a79427364b873bec3f3 78c55c83e03724a24900fbaf881864197d23ab4d Sebastian DiLorenzo <dilorenzo.sebastian at gmail.com> 1389602903 +0100 pull: Merge made by the 'recursive' strategy.
78c55c83e03724a24900fbaf881864197d23ab4d 218c5c8fe2ba9eb21c155ba9c5696b8eebfb90e8 Sebastian DiLorenzo <dilorenzo.sebastian at gmail.com> 1389712335 +0100 commit: update to xlsx handling (barely needed anymore)
218c5c8fe2ba9eb21c155ba9c5696b8eebfb90e8 a4cb05b888a8eef33784f0ee1fb4f2de5a8c3c61 Sebastian DiLorenzo <dilorenzo.sebastian at gmail.com> 1389712434 +0100 commit: small fix xlsx
+a4cb05b888a8eef33784f0ee1fb4f2de5a8c3c61 88f638af68fd6e9c505d9cbe0b9f164d6386f681 Sebastian DiLorenzo <dilorenzo.sebastian at gmail.com> 1389712866 +0100 commit: update to homepage regarding TAPS installation
+88f638af68fd6e9c505d9cbe0b9f164d6386f681 686925e27a9b454ceae8db67f1c5ff2351c0dedd Sebastian DiLorenzo <dilorenzo.sebastian at gmail.com> 1393509665 +0100 pull: Fast-forward
+686925e27a9b454ceae8db67f1c5ff2351c0dedd d6dbac6a1a12bfaf8dccbe161c2f509dcc53b73d Sebastian DiLorenzo <dilorenzo.sebastian at gmail.com> 1393853775 +0100 commit: updates to TAPS functions and homepage
+d6dbac6a1a12bfaf8dccbe161c2f509dcc53b73d 0fc6463401c39565b48199c6f4adfe077510620e Sebastian DiLorenzo <dilorenzo.sebastian at gmail.com> 1393856755 +0100 commit: last commit before destroying everything as i try to update kickstart
+0fc6463401c39565b48199c6f4adfe077510620e 53a860b490c7ac5501d8bb4f6b95e700491e12d4 Sebastian DiLorenzo <dilorenzo.sebastian at gmail.com> 1395307680 +0100 commit: Big upgrades to homepage and genelist
+53a860b490c7ac5501d8bb4f6b95e700491e12d4 3e3f8aafc4dd6607de55b08e676598fcdd1ac8c4 Sebastian DiLorenzo <dilorenzo.sebastian at gmail.com> 1395307764 +0100 pull: Merge made by the 'recursive' strategy.
Modified: .git/logs/refs/heads/master
===================================================================
--- .git/logs/refs/heads/master 2014-01-14 15:20:19 UTC (rev 203)
+++ .git/logs/refs/heads/master 2014-03-20 09:35:35 UTC (rev 204)
@@ -80,3 +80,9 @@
1c3b0b54598dac7863316a79427364b873bec3f3 78c55c83e03724a24900fbaf881864197d23ab4d Sebastian DiLorenzo <dilorenzo.sebastian at gmail.com> 1389602903 +0100 pull: Merge made by the 'recursive' strategy.
78c55c83e03724a24900fbaf881864197d23ab4d 218c5c8fe2ba9eb21c155ba9c5696b8eebfb90e8 Sebastian DiLorenzo <dilorenzo.sebastian at gmail.com> 1389712335 +0100 commit: update to xlsx handling (barely needed anymore)
218c5c8fe2ba9eb21c155ba9c5696b8eebfb90e8 a4cb05b888a8eef33784f0ee1fb4f2de5a8c3c61 Sebastian DiLorenzo <dilorenzo.sebastian at gmail.com> 1389712434 +0100 commit: small fix xlsx
+a4cb05b888a8eef33784f0ee1fb4f2de5a8c3c61 88f638af68fd6e9c505d9cbe0b9f164d6386f681 Sebastian DiLorenzo <dilorenzo.sebastian at gmail.com> 1389712866 +0100 commit: update to homepage regarding TAPS installation
+88f638af68fd6e9c505d9cbe0b9f164d6386f681 686925e27a9b454ceae8db67f1c5ff2351c0dedd Sebastian DiLorenzo <dilorenzo.sebastian at gmail.com> 1393509665 +0100 pull: Fast-forward
+686925e27a9b454ceae8db67f1c5ff2351c0dedd d6dbac6a1a12bfaf8dccbe161c2f509dcc53b73d Sebastian DiLorenzo <dilorenzo.sebastian at gmail.com> 1393853775 +0100 commit: updates to TAPS functions and homepage
+d6dbac6a1a12bfaf8dccbe161c2f509dcc53b73d 0fc6463401c39565b48199c6f4adfe077510620e Sebastian DiLorenzo <dilorenzo.sebastian at gmail.com> 1393856755 +0100 commit: last commit before destroying everything as i try to update kickstart
+0fc6463401c39565b48199c6f4adfe077510620e 53a860b490c7ac5501d8bb4f6b95e700491e12d4 Sebastian DiLorenzo <dilorenzo.sebastian at gmail.com> 1395307680 +0100 commit: Big upgrades to homepage and genelist
+53a860b490c7ac5501d8bb4f6b95e700491e12d4 3e3f8aafc4dd6607de55b08e676598fcdd1ac8c4 Sebastian DiLorenzo <dilorenzo.sebastian at gmail.com> 1395307764 +0100 pull: Merge made by the 'recursive' strategy.
Modified: .git/logs/refs/remotes/origin/master
===================================================================
--- .git/logs/refs/remotes/origin/master 2014-01-14 15:20:19 UTC (rev 203)
+++ .git/logs/refs/remotes/origin/master 2014-03-20 09:35:35 UTC (rev 204)
@@ -77,3 +77,9 @@
65601241ae236432ef8a7f3aab8d7af3068d4344 78c55c83e03724a24900fbaf881864197d23ab4d Sebastian DiLorenzo <dilorenzo.sebastian at gmail.com> 1389602990 +0100 update by push
78c55c83e03724a24900fbaf881864197d23ab4d 218c5c8fe2ba9eb21c155ba9c5696b8eebfb90e8 Sebastian DiLorenzo <dilorenzo.sebastian at gmail.com> 1389712343 +0100 update by push
218c5c8fe2ba9eb21c155ba9c5696b8eebfb90e8 a4cb05b888a8eef33784f0ee1fb4f2de5a8c3c61 Sebastian DiLorenzo <dilorenzo.sebastian at gmail.com> 1389712440 +0100 update by push
+a4cb05b888a8eef33784f0ee1fb4f2de5a8c3c61 88f638af68fd6e9c505d9cbe0b9f164d6386f681 Sebastian DiLorenzo <dilorenzo.sebastian at gmail.com> 1389712872 +0100 update by push
+88f638af68fd6e9c505d9cbe0b9f164d6386f681 686925e27a9b454ceae8db67f1c5ff2351c0dedd Sebastian DiLorenzo <S_D at array-47-13.medsci.uu.se> 1393509665 +0100 pull: fast-forward
+686925e27a9b454ceae8db67f1c5ff2351c0dedd d6dbac6a1a12bfaf8dccbe161c2f509dcc53b73d Sebastian DiLorenzo <dilorenzo.sebastian at gmail.com> 1393853806 +0100 update by push
+d6dbac6a1a12bfaf8dccbe161c2f509dcc53b73d 0fc6463401c39565b48199c6f4adfe077510620e Sebastian DiLorenzo <dilorenzo.sebastian at gmail.com> 1393856763 +0100 update by push
+0fc6463401c39565b48199c6f4adfe077510620e bf30fc707ee93417a8fad1f8ab1d8dc7426e128a Sebastian DiLorenzo <S_D at array-47-13.medsci.uu.se> 1395307764 +0100 pull: fast-forward
+bf30fc707ee93417a8fad1f8ab1d8dc7426e128a 3e3f8aafc4dd6607de55b08e676598fcdd1ac8c4 Sebastian DiLorenzo <dilorenzo.sebastian at gmail.com> 1395307814 +0100 update by push
Modified: .git/refs/heads/master
===================================================================
--- .git/refs/heads/master 2014-01-14 15:20:19 UTC (rev 203)
+++ .git/refs/heads/master 2014-03-20 09:35:35 UTC (rev 204)
@@ -1 +1 @@
-a4cb05b888a8eef33784f0ee1fb4f2de5a8c3c61
+3e3f8aafc4dd6607de55b08e676598fcdd1ac8c4
Modified: .git/refs/remotes/origin/master
===================================================================
--- .git/refs/remotes/origin/master 2014-01-14 15:20:19 UTC (rev 203)
+++ .git/refs/remotes/origin/master 2014-03-20 09:35:35 UTC (rev 204)
@@ -1 +1 @@
-a4cb05b888a8eef33784f0ee1fb4f2de5a8c3c61
+3e3f8aafc4dd6607de55b08e676598fcdd1ac8c4
Modified: pkg/TAPS/NAMESPACE
===================================================================
--- pkg/TAPS/NAMESPACE 2014-01-14 15:20:19 UTC (rev 203)
+++ pkg/TAPS/NAMESPACE 2014-03-20 09:35:35 UTC (rev 204)
@@ -3,7 +3,8 @@
TAPS_region,
TAPS_freq,
TAPS_compare,
- TAPS_estimates)
+ TAPS_estimates,
+ TAPS_click)
Modified: pkg/TAPS/R/TAPS.r
===================================================================
--- pkg/TAPS/R/TAPS.r 2014-01-14 15:20:19 UTC (rev 203)
+++ pkg/TAPS/R/TAPS.r 2014-03-20 09:35:35 UTC (rev 204)
@@ -1,5 +1,3 @@
-## Det här är nya versionen.
-
## load('~/patchwork/pkg/TAPS/R/sysdata.rda')
## load('shortRegions.Rdata')
## load('allRegions.Rdata')
@@ -16,7 +14,7 @@
TAPS_plot <- function(#samples='all',
directory=NULL,autoEstimate=FALSE,
- bin=400,cores=1) {
+ bin=400,cores=1,matched=FALSE,allelePeaks=FALSE) {
#Automatically check, and if needed install, packages stats and fields
#Load stats. It should be in all, at least semi-new, R distributions so we dont need to install.package it or
@@ -25,7 +23,7 @@
suppressPackageStartupMessages(library(stats))
suppressPackageStartupMessages(library(DNAcopy))
suppressPackageStartupMessages(library(fields))
- #suppressPackageStartupMessages(library(xlsx))
+ suppressPackageStartupMessages(library(xlsx))
suppressPackageStartupMessages(library(foreach))
suppressPackageStartupMessages(library(doMC))
suppressPackageStartupMessages(registerDoMC(cores=cores))
@@ -106,7 +104,6 @@
Chromosome[i]=paste('chr', tempChr[i], sep="")
}
-
Log2=as.data.frame(cbind(Chromosome, Start, End, Value))
colnames(Log2)=c("Chromosome","Start","End","Value")
@@ -153,33 +150,33 @@
Log2=Log2[!is.nan(Log2$Value),]
Log2=Log2[!is.na(Log2$Value),]
+ Log2 <- Log2[which(Log2$Value != -Inf & Log2$Value != +Inf ),]
alf=alf[!is.nan(alf$Value),]
alf=alf[!is.na(alf$Value),]
alf$Value[alf$Value<0]=0; alf$Value[alf$Value>1]=1
+ alf <- alf[which(alf$Value != -Inf & alf$Value != +Inf ),]
segments <- readSegments() ## segments if available (CBS recommended)
-
- #Remove NA values that some samples give.
- segments <- segments[!is.nan(segments$Value),]
- segments <- segments[!is.na(segments$Value),]
-
-
+
# cat(' ..processing')
if (is.null(segments)) { ## segmentation using DNA-copy if needed (must then be installed)
segments <- segment_DNAcopy(Log2)
save.txt(segments,'_segments.txt')
}
+
+ #Remove NA values that some samples give.
+ segments <- segments[!is.nan(segments$Value),]
+ segments <- segments[!is.na(segments$Value),]
+ segments$Value <- segments$Value-median(Log2$Value,na.rm=T) ## Median-centering
+ Log2$Value <- Log2$Value-median(Log2$Value,na.rm=T) ## Median-centering
- segments$Value <- segments$Value-mean(Log2$Value) ## Median-centering
- Log2$Value <- Log2$Value-mean(Log2$Value) ## Median-centering
-
- allRegions=NULL; if ('allRegions.Rdata' %in% dir()) load('allRegions.Rdata')
- if (is.null(allRegions)) allRegions <- makeRegions(Log2, alf, segments) ## Calculates necessary data for segments (all functions are in this file)
+ allRegions=NULL; #if ('allRegions.Rdata' %in% dir()) load('allRegions.Rdata')
+ if (is.null(allRegions)) allRegions <- makeRegions(Log2, alf, segments,matched=matched,allelePeaks=allelePeaks) ## Calculates necessary data for segments (all functions are in this file)
save(allRegions,file='allRegions.Rdata')
- regs=NULL; if ('shortRegions.Rdata' %in% dir()) load('shortRegions.Rdata')
+ regs=NULL;# if ('shortRegions.Rdata' %in% dir()) load('shortRegions.Rdata')
if (is.null(regs)) {
- regs <- regsFromSegs(Log2,alf,segments,bin=bin,min=5) ## Calculates the same data for shortened segments
+ regs <- regsFromSegs(Log2,alf,segments,bin=bin,min=30,matched=matched,allelePeaks=allelePeaks) ## Calculates the same data for shortened segments
save(regs,file='shortRegions.Rdata')
}
@@ -207,14 +204,11 @@
#Test if hg18 or hg19 should be used. length of (hg18 chr19) > (hg19 chr19)
hgtest=regs[regs$chr=="chr19",]
- if(hgtest$end[length(hgtest$chr)] > 60000000)
- {
+ if(hgtest$end[length(hgtest$chr)] > 60000000) {
hg18=T
- }
- else
- {
+ } else {
hg18=F
- }
+ }
# cat('..plotting.\n')
OverviewPlot(regs$chr,regs$start,regs$end,regs$logs,regs$scores,hg18=hg18,
@@ -399,11 +393,11 @@
1
}
###
-regsFromSegs <- function (Log2,alf, segments, bin=200,min=1) {
+regsFromSegs <- function (Log2,alf, segments, bin=200,min=1,matched=F,allelePeaks=FALSE) {
## This function builds short segments and calcualtes their average Log-R and Allelic Imbalance Ratio.
rownames(Log2)=1:nrows(Log2)
rownames(alf)=1:nrows(alf)
- regs=list('chr'=NULL,'start'=NULL,'end'=NULL,'logs'=NULL,'scores'=NULL,'het'=NULL,'hom'=NULL,'probes'=NULL,'snps'=NULL)
+ regs=list('chr'=NULL,'start'=NULL,'end'=NULL,'logs'=NULL,'scores'=NULL,'probes'=NULL,'snps'=NULL)
#,'key1'=rep(NA,nrow(Log2)),'key2'=rep(NA,nrow(alf)))
n=nrow(segments)
s_check=NULL
@@ -434,40 +428,72 @@
#regs$or_seg=c(regs$or_seg,c)
regs$start=c(regs$start,s_) ## store start and end positions
regs$end=c(regs$end,e_)
-
- if (nrow(thisalf)>min) { ## Time to calculate Allelic Imbalance Ratio (if enough SNPs)
- t1=sort( abs(thisalf$Value-0.5) ) ## distance from middle (het) in the allele freq pattern, ascending
- if (length(unique(t1))>3) { ## do not attempt clustering with too few snps
- xx=NULL
- try(xx <- kmeans(t1, 2),silent=T) ## Attempt k-means (Hartigan-Wong: has proven very stable)
- if (!is.null(xx)) if (min(xx$size) > 0.05*max(xx$size)) { ## On some occations data quality is poor, requiring 5%+ heterozygous SNPs avoids most such cases.
- xx=xx$centers
- } else xx=NA
- } else xx=NA
- } else xx=NA
- #try (if (is.na(xx)) xx=0:1, silent=T)
- try (if (length(xx)==0) xx=0:1, silent=T)
- regs$scores=c(regs$scores, min(xx)/max(xx) ) ## Allelic Imbalance Ratio = inner / outer cluster.
- regs$het=c(regs$het, min(xx)) ## $het and $hom are no longer in use.
- regs$hom=c(regs$hom, max(xx))
+ temp = NA
+ try(temp <- allelicImbalance(thisalf$Value,min,matched,allelePeaks),silent=T)
+ regs$scores =c(regs$scores,temp)
+ # if (nrow(thisalf)>min) { ## Time to calculate Allelic Imbalance Ratio (if enough SNPs)
+ # t1=sort( abs(thisalf$Value-0.5) ) ## distance from middle (het) in the allele freq pattern, ascending
+ # if (length(unique(t1))>3) { ## do not attempt clustering with too few snps
+ # xx=NULL
+ # try(xx <- kmeans(t1, 2),silent=T) ## Attempt k-means (Hartigan-Wong: has proven very stable)
+ # if (!is.null(xx)) if (min(xx$size) > 0.05*max(xx$size)) { ## On some occations data quality is poor, requiring 5%+ heterozygous SNPs avoids most such cases.
+ # xx=xx$centers
+ # } else xx=NA
+ # } else xx=NA
+ # } else xx=NA
+ ##try (if (is.na(xx)) xx=0:1, silent=T)
+ # try (if (length(xx)==0) xx=0:1, silent=T)
+ # regs$scores=c(regs$scores, min(xx)/max(xx) ) ## Allelic Imbalance Ratio = inner / outer cluster.
+ # regs$het=c(regs$het, min(xx)) ## $het and $hom are no longer in use.
+ # regs$hom=c(regs$hom, max(xx))
+ # if(matched == T | is.na(regs$scores[length(regs$scores)])) {
+ # regs$scores[length(regs$scores)] <- 2*median(abs(thisalf$Value-.5),na.rm=T)
+ # }
}
}
regs=as.data.frame(regs)
regs=regs[!is.na(regs$logs),] ### MODDAT MARKUS MAJ 2013
return (regs)
}
+allelicImbalance <- function (data,min,matched=F,allelePeaks=F) {
+ if(matched == T ) {
+ return(2*median(abs(data$Value-.5),na.rm=T))
+ }
+ if(allelePeaks == F) {
+ if (length(data)>min) { ## Time to calculate Allelic Imbalance Ratio (if enough SNPs)
+ t1=sort( abs(data-0.5) ) ## distance from middle (het) in the allele freq pattern, ascending
+ if (length(unique(t1))>3) { ## do not attempt clustering with too few snps
+ xx=NULL
+ try(xx <- kmeans(t1, 2),silent=T) ## Attempt k-means (Hartigan-Wong: has proven very stable)
+ if (!is.null(xx)) if (min(xx$size) > 0.05*max(xx$size)) { ## On some occations data quality is poor, requiring 5%+ heterozygous SNPs avoids most such cases.
+ xx=xx$centers
+ } else xx=NA
+ } else xx=NA
+ } else xx=NA
+ #try (if (is.na(xx)) xx=0:1, silent=T)
+ #try (if (length(xx)==0) xx=0:1, silent=T)
+ #regs$scores=c(regs$scores, min(xx)/max(xx) ) ## Allelic Imbalance Ratio = inner / outer cluster.
+ #regs$het=c(regs$het, min(xx)) ## $het and $hom are no longer in use.
+ #regs$hom=c(regs$hom, max(xx))
+ return( min(xx)/max(xx) )
+ }
+ #The new shiet
+ if(allelePeaks == T) {
+ return(1-max(kmeans(data,2)$centers))
+ }
+}
###
segment_DNAcopy <- function(Log2) {
## If segmentation is required, DNAcopy is a good choice. Must be installed.
#library(DNAcopy)
- cat('..Using DNAcopy to create segments:\n')
+ # cat('..Using DNAcopy to create segments:\n')
segs=NULL
chroms=c(as.character(1:22),'X','Y')
chroms=paste('chr',chroms,sep='')
for (c in 1:24) { # segment chromosome
tlog=Log2[Log2$Chromosome==chroms[c],] ## Log-R of this chromosome
if (nrow(tlog)>0) { ## (ChrY may be absent)
- cnaObject=segment(smooth.CNA(CNA(tlog$Value, rep(c,nrow(tlog)), tlog$Start, data.type='logratio',sampleid=paste('chr',c))), undo.splits='sdundo', undo.SD=1)
+ cnaObject=segment(smooth.CNA(CNA(tlog$Value, rep(c,nrow(tlog)), tlog$Start, data.type='logratio',sampleid=paste('chr',c))), undo.splits='sdundo', undo.SD=1,verbose=0)
segs=rbind(segs,cnaObject$output) ## Add result to data frame
}
}
@@ -478,13 +504,31 @@
###
readLog2 <- function() {
## This function reads Log-ratio from the file "probes.txt" which must be present in the current directory.
- Log2=NULL
- try( Log2 <- read.csv(file='probes.txt',header=T,sep='\t'), silent=T)
- # if (!is.null(Log2)) cat(' ..found probes.txt')
- if (is.null(Log2)) {
- try( Log2 <- read.csv(file='_probes.txt',header=T,sep='\t'), silent=T)
- if (!is.null(Log2)) cat(' ..found _probes.txt')
+ # Log2=NULL
+ # try( Log2 <- read.csv(file='probes.txt',header=T,sep='\t'), silent=T)
+ # if (is.null(Log2)) {
+ # try( Log2 <- read.csv(file='_probes.txt',header=T,sep='\t'), silent=T)
+ # if (!is.null(Log2)) cat(' ..found _probes.txt')
+ # }
+
+ if(file.exists('probes.txt')) {
+ Log2 <- read.csv(file='probes.txt',header=T,sep='\t')
+ } else if(file.exists('_probes.txt')) {
+ Log2 <- read.csv(file='_probes.txt',header=T,sep='\t')
+ } else if(file.exists('raw.txt')) {
+ Log2 <- read.csv(file='raw.txt',header=T,sep='\t')
+ colnames(Log2)[4:5] <- c('Start','Value')
+ Log2$Chromosome <- paste('chr',Log2$Chromosome,sep='')
+ Log2$Chromosome[Log2$Chromosome == 'chr24'] <- 'chrX'
+ Log2$Chromosome[Log2$Chromosome == 'chr25'] <- 'chrY'
+ } else {
+ print('No probes.txt found!')
}
+
+ Log2$Chromosome <- as.character(Log2$Chromosome)
+ #c("Chromosome", "Start", "End", "Value", "Array")
+
+
## This code was used if Log-R must be read from .CNCHP file (Affymetrix Genotyping Console or APT).
## NOT currently supported downstream as .CNCHP is lacks allele-specific information for Affy 250k/500k
# if (is.null(Log2)) {
@@ -517,14 +561,28 @@
###
readAlf <- function(localDir=NULL) {
## This funciton reads allele frequency [B/(A+B)] from the file 'snps.txt', which must be present in the current directory.
- alf=NULL
- try( alf <- read.csv(file='snps.txt',header=T,sep='\t'), silent=T)
- # if (!is.null(alf)) cat(' ..found snps.txt')
- if (is.null(alf)) {
- try( alf <- read.csv(file='_snps.txt',header=T,sep='\t'), silent=T)
- if (!is.null(alf)) cat(' ..found _snps.txt')
+ # alf=NULL
+ # try( alf <- read.csv(file='snps.txt',header=T,sep='\t'), silent=T)
+ # if (is.null(alf)) {
+ # try( alf <- read.csv(file='_snps.txt',header=T,sep='\t'), silent=T)
+ # if (!is.null(alf)) cat(' ..found _snps.txt')
+ # }
+
+ if(file.exists('snps.txt')) {
+ alf <- read.csv(file='snps.txt',header=T,sep='\t')
+ } else if(file.exists('_snps.txt')) {
+ alf <- read.csv(file='_snps.txt',header=T,sep='\t')
+ } else if(file.exists('raw_snps.txt')) {
+ alf <- read.csv(file='raw_snps.txt',header=T,sep='\t')
+ colnames(alf)[c(4,6)] <- c('Start','Value')
+ alf$Chromosome <- paste('chr',alf$Chromosome,sep='')
+ alf$Chromosome[alf$Chromosome == 'chr24'] <- 'chrX'
+ alf$Chromosome[alf$Chromosome == 'chr25'] <- 'chrY'
+ } else {
+ print('No snps.txt found!')
}
- #alf=alf[alf$Value!=0.5,]
+ #c("Chromosome", "Start", "End", "Value", "Array")
+
return (alf)
}
###
@@ -533,16 +591,22 @@
## The author recommends SNP-rank segmentation (NEXUS) or another CBS such as that in DNACopy.
## Using a HMM is not recommended unless you have a homogenous, diploid sample. (And then there is more user-friendly software anyway.)
segments=NULL
- try( segments <- read.csv(file='segments.txt',header=T,sep='\t'),silent=T)
+ if(file.exists('segments.txt')) {
+ segments <- read.csv(file='segments.txt',header=T,sep='\t')
+ }
+ # try( segments <- read.csv(file='segments.txt',header=T,sep='\t'),silent=T)
# if (!is.null(segments)) cat(' ..found segments.txt')
if (is.null(segments)) {
- try( segments <- read.csv(file='_segments.txt',header=T,sep='\t'),silent=T)
- if (!is.null(segments)) cat(' ..found _segments.txt')
+ # try( segments <- read.csv(file='_segments.txt',header=T,sep='\t'),silent=T)
+ # if (!is.null(segments)) cat(' ..found _segments.txt')
}
+ if(!is.null(segments)) {
+ segments$Chromosome <- as.character(segments$Chromosome)
+ }
return (segments)
}
###
-makeRegions <- function(Log2, alf, segments,dataType='Nexus') {
+makeRegions <- function(Log2, alf, segments,dataType='Nexus',matched=FALSE,allelePeaks=F) {
## makeRegions is similar to "regsfromsegs" except regions are not subdivided before calculation of mean Log-R and Allelic Imbalance Ratio.
regions=segments
regions$Chromosome=as.character(segments$Chromosome) ## Chromosome
@@ -552,6 +616,8 @@
#regions$log2=round(regions$Value,4)
regions$imba=NA ## Allelic Imbalance Ratio
regionIx=NULL ## Not currently used
+ regionIx$Log2 <- list()
+ regionIx$alf <- list()
for (i in 1:nrows(regions)) {
log2temp=which(equals(Log2$Chromosome,regions$Chromosome[i])) ## index of Log-R (current chrom)
alftemp=which(equals(alf$Chromosome,regions$Chromosome[i])) ## index of Allele frequency (current chrom)
@@ -565,18 +631,25 @@
log2temp=Log2$Value[log2temp]
regions$log2[i]=median(log2temp)
alftemp=alf$Value[alftemp]
- if (length(alftemp)>3) { ## Prepare to calculate Allelic Imbalance Ratio
- if (dataType=="Nexus") t1=sort( abs(alftemp-0.5) ) ## distance from middle (het), ascending
- if (dataType=="CNCHP") t1=sort( abs(alftemp)-0 ) ## This is not currently in use, was intended for analysis of SNP6 .CNCHP data.
- if (length(unique(t1))>5) { ## Avoid calculating Allelic Imbalance Ratio unless there are several different values to cluster
- xx=NA
- xx=try(kmeans(t1, 2), silent=T) ## This part is nearly identical to that of 'regsfromsegs()'
- try( if (min(xx$size) > 0.05*max(xx$size)) {
- xx=xx$centers
- } else xx=NA, silent=T)
- } else xx=NA
- try(regions$imba[i] <- round( min(xx)/max(xx) ,2), silent=T)
- }
+ temp = NA
+ try(temp <- allelicImbalance(alftemp,min,matched=matched,allelePeaks=allelePeaks),silent=T)
+ regions$imba[i] <- temp
+
+ # if (length(alftemp)>3) { ## Prepare to calculate Allelic Imbalance Ratio
+ # if (dataType=="Nexus") t1=sort( abs(alftemp-0.5) ) ## distance from middle (het), ascending
+ # if (dataType=="CNCHP") t1=sort( abs(alftemp)-0 ) ## This is not currently in use, was intended for analysis of SNP6 .CNCHP data.
+ # if (length(unique(t1))>5) { ## Avoid calculating Allelic Imbalance Ratio unless there are several different values to cluster
+ # xx=NA
+ # xx=try(kmeans(t1, 2), silent=T) ## This part is nearly identical to that of 'regsfromsegs()'
+ # try( if (min(xx$size) > 0.05*max(xx$size)) {
+ # xx=xx$centers
+ # } else xx=NA, silent=T)
+ # } else xx=NA
+ # try(regions$imba[i] <- round( min(xx)/max(xx) ,2), silent=T)
+ # }
+ # if(matched == T | is.na(regions$imba[i])) {
+ # regions$imba[i] <- 2*median(abs(alftemp-.5),na.rm=T)
+ # }
}
return(list('regions'=regions,'regionIx'=regionIx))
}
@@ -996,12 +1069,14 @@
### Calculate model based Cns
Cnx=Cn; for (cn in 0:maxCn) {
- Cnx[Cn==cn]=cn + (2^regions$log2[Cn==cn]-2^int[[paste('cn',cn,sep='')]])/model$k
+ ix <- Cn==cn
+ ix[is.na(ix)] <- F
+ Cnx[ix]=(cn + (2^regions$log2[ix]-2^int[[paste('cn',cn,sep='')]])/model$k)
}; Cnx[Cnx<0]=0
Cn=round(Cnx)
## Set minor CN
- for (i in 1:nrow(regions)) {
+ for (i in (1:nrow(regions))[!is.na(Cn)]) {
# set minor CN
distance <- Inf
if (Cn[i]<=1) {
@@ -1048,7 +1123,7 @@
}
}
- ### beräkna tumörcellshalt!!
+ ### beräkna tumorcellshalt!!
# Mha modellparameter
## empirical delta for whole copy: 0.55
model$k2=model$k*model$meanCn
@@ -1388,7 +1463,6 @@
ix=which(genes$chrom==as.character(data$Chromosome[i]) & genes$gtxStart<data$End[i] & genes$gtxEnd>data$Start[i])
if (length(ix)>0) data$genes[i]=paste(sort(unique(genes$name2[ix])),collapse=', ')
}
- #browser()
return(data)
}
@@ -1488,7 +1562,7 @@
### Total frequency plot
#quartz(file=paste(comparison,'freq_dif',name1,name2,'png',sep='.'),width=15,height=4.5,dpi=300,type='png')
- png(file=paste(comparison,'png',sep='.'),width=3000,height=800)
+ png(filename=paste(comparison,'png',sep='.'),width=3000,height=800)
ylim <- c( -15, 110)
plot(1,1,type='n',
bty='n', ann=T, mar=c(0,0,0,0), oma=c(0,0,0,0),
@@ -1669,7 +1743,7 @@
### Total and Differential frequency plot
#quartz(file=paste(comparison,'freq_dif',name1,name2,'png',sep='.'),width=15,height=4.5,dpi=300,type='png')
- png(file=paste(comparison,'allFreqs',name1,name2,'png',sep='.'),width=3000,height=800)
+ png(filename=paste(comparison,'allFreqs',name1,name2,'png',sep='.'),width=3000,height=800)
ylim <- c(-100, 110)
plot(1,1,type='n',
bty='n', ann=T, mar=c(0,0,0,0), oma=c(0,0,0,0),
@@ -1980,6 +2054,9 @@
par(mgp =c(1,0.5,0))
par(lend=1)
+ #Remove chrY,M,XY
+ mval[mchr=='chrY'|mchr=='chrXY'|mchr=='chrM']=NA
+
#Calculate previous distance of whole genome so the next chromsome is added
#at the correct coordinate
pre=rep(NA,23)
@@ -2134,6 +2211,9 @@
par(oma = c(0,0,0,0))
par(mgp =c(1,0.5,0))
+ #Remove chrY,M,XY
+ sval[mchr=='chrY'|mchr=='chrXY'|mchr=='chrM']=NA
+
#Index to avoid spos/sval values that are all the way at 0 or 1.
ix = !(sval %in% c(0,1))
@@ -3513,11 +3593,16 @@
}
sampledata <- do.call(rbind,datat)
setwd(root)
+ # if (file.exists('SampleData.csv')) {
+ # write.table(sampledata,paste('SampleData',format(Sys.time(), "_%F_%T.csv"),sep=''),sep='\t',row.names=F)
+ # } else {
+ # write.table(sampledata,'SampleData.csv',sep='\t',row.names=F)
+ # }
if (file.exists('SampleData.csv')) {
- write.table(sampledata,paste('SampleData',format(Sys.time(), "_%F_%T.csv"),sep=''),sep='\t',row.names=F)
- } else {
- write.table(sampledata,'SampleData.csv',sep='\t',row.names=F)
+ file.rename('SampleData.csv',paste('SampleData',format(Sys.time(), "_old_TAPS_estimates_%F_%T.csv"),sep=''))
}
+ write.table(sampledata,'SampleData.csv',sep='\t',row.names=F)
+
}
@@ -3587,3 +3672,315 @@
return(round(c(cn1,cn2,cn3,loh),2))
}
+
+
+# _ _ _
+# ___| (_) ___| | __
+# / __| | |/ __| |/ /
+#| (__| | | (__| <
+# \___|_|_|\___|_|\_\
+#
+#click
+
+drawImage <- function(file,dev=T,xlim,ylim) {
+ jpeg <- readJPEG(file)
+ if(dev==T) dev.new(height=11,width=15.5)
+ par(mar=c(2,3,1,1),las=1,xaxs='i',yaxs='i',lend=1)
+ plot(1, type="n", xlim=xlim, ylim=ylim,axes=F)
+ rasterImage(jpeg,xlim[1], ylim[1], xlim[2], ylim[2], interpolate =T)
+ # axis(1,at=seq(-3,3,0.25),lend=1)
+ # axis(2,at=seq(ylim[1],ylim[2],0.05),lend=1)
+}
+
+clickWrap <- function(n,xlim,ylim) {
+ y <- list()
+ opar <- par
+ for(i in 1:n) {
+ print(i)
+ col <- ifelse(i==1,'green','red')
+ y[[i]] <- clicker(col)
+ }
+ dev.new()
+ par(mfrow=c(n,1),mar=c(2,2.5,.5,.5),xaxs='i',yaxs='i')
+ lapply(y, FUN = function(y) {
+ x <- 1:length(y)
+ fit <- lm(y ~ x)
+ xlim <- c(0.5,length(x)+.5)
+ ylim <- c(min(y)-min(y)*.1,max(y)+max(y)*.1)
+ plot(x,x*fit$coefficients[2]+fit$coefficients[1],type='l',ylim=ylim,xlim=xlim,las=1,lend=1)
+ par(new=T)
+ plot(x,y,xlab='',ylab='',axes=F,ylim=ylim,xlim=xlim,pch=20,cex=1.5)
+ legend('topright',legend=paste('R^2: ',round(as.numeric(summary(fit)[8]),4),sep=''))
+ })
+ locator(1)
+ try(dev.off(),T)
+ par(opar)
+}
+
+clicker <- function(col='black') {
+ print('Click on your values. Right click to close (This will redraw the image).')
+ print(col)
+ lastLogR <- locator(1,type='p',pch=20,col=col,cex=1.8)$x
+ y <- 2^lastLogR
+ cat(paste('#','logR','R','delta-R','\n',sep='\t'))
+ cat(paste(1,round(lastLogR,2),round(2^lastLogR,2),0,'\n',sep="\t"))
+ j <- 2
+ while(T) {
+ logR <- locator(1,type='p',pch=20,col=col,cex=1.8)$x
+ if(is.null(logR)) break
+ R <- 2^logR
+ y <- append(y,R)
+ lastR <- 2^lastLogR
+ diff <- R - lastR
+ cat(paste(j,round(logR,3),round(R,3),round(diff,3),'\n',sep="\t"))
+ lastLogR <- logR
+ j = j + 1
+ }
+ return(y)
+}
+
+copyNumberClicker <- function(maxy,miny) {
+ coord <- list()
+ for(i in 1:4) {
+ if(i<4) {
+ print(paste("Left click on cn",i,". Right click if it doesn't exist",sep=''))
+ } else {
+ print('Please select LOH')
+ }
+ tmp <- locator(n=1,type='p',pch=20,col='green',cex=1.8)
+ if (is.null(tmp$x)) tmp$x <- tmp$y <- NA
+ text(tmp, labels=c('cn1','cn2','cn3','LOH')[i],adj=1.5)
+ text(tmp, labels=if(i < 4) round(tmp$x,2) else round((tmp$y - miny) / (maxy - miny),2) ,adj=c(1.3,+2.2))
+ coord$x[i] <- tmp$x
+ coord$y[i] <- tmp$y
+ }
+ return(coord)
+}
+
+
+returnSkippedValues <- function(sample,sampleDataOri,done,skipped){
+ index <- sampleDataOri$Sample == sample
+ return(list(sample,sampleDataOri[index,2],sampleDataOri[index,3],sampleDataOri[index,4],sampleDataOri[index,5],done,skipped))
+}
+
+TAPS_click <- function(path = getwd()) {
+
+ suppressPackageStartupMessages(library(tcltk))
+ suppressPackageStartupMessages(library(jpeg))
+ suppressPackageStartupMessages(library(foreach))
+ # require(xlsx)
+
+ root <- path
+ chr <- paste('Chromosome',c(1:22,'X'))
+ xlim <- c(-2.387, 2.16)
+ ylim <- c(0, 1)
+ maxy <- 0.9336337
+ miny <- 0.5170546
+
+ # sampleData <- read.csv('SampleData.csv',sep='\t',header=T,colClasses=c('character',rep('numeric',4)),stringsAsFactors=F)
[TRUNCATED]
To get the complete diff run:
svnlook diff /svnroot/patchwork -r 204
More information about the Patchwork-commits
mailing list