[Patchwork-commits] r175 - / .git .git/logs .git/logs/refs/heads .git/logs/refs/remotes/origin .git/refs/heads .git/refs/remotes/origin pkg/TAPS/R www www/css/img
noreply at r-forge.r-project.org
noreply at r-forge.r-project.org
Tue Jun 18 16:42:46 CEST 2013
Author: sebastian_d
Date: 2013-06-18 16:42:45 +0200 (Tue, 18 Jun 2013)
New Revision: 175
Added:
www/AI_ALR.php
www/TAPS_exec.php
www/TAPS_inst.php
www/TAPS_requ.php
www/TAPS_resu.php
www/css/img/TAPS_AI_ALR.jpeg
www/css/img/TAPS_call_help.png
www/css/img/TAPS_check2.png
www/css/img/TAPS_chromosomal2.jpg
www/css/img/TAPS_chromosomalCN.jpg
www/css/img/TAPS_overview2.jpg
www/css/img/flowchartTAPS.jpg
Modified:
.git/COMMIT_EDITMSG
.git/index
.git/logs/HEAD
.git/logs/refs/heads/master
.git/logs/refs/remotes/origin/master
.git/refs/heads/master
.git/refs/remotes/origin/master
/
pkg/TAPS/R/TAPS.r
www/AI_Cov.php
www/changelog.php
www/index.php
www/intro.php
www/pw_exec.php
www/pw_inst.php
www/pw_resu.php
Log:
substantial changes to homepage including adding TAPS guide. Some changes to TAPS
Property changes on:
___________________________________________________________________
Modified: svn:ignore
- .git
+ .git/objects
Modified: .git/COMMIT_EDITMSG
===================================================================
--- .git/COMMIT_EDITMSG 2013-06-12 14:26:17 UTC (rev 174)
+++ .git/COMMIT_EDITMSG 2013-06-18 14:42:45 UTC (rev 175)
@@ -1 +1 @@
-updates to fix gooch bugg
+Substantial changes to homepage which now incudes TAPS tutorial as well as some changes to TAPS
Modified: .git/index
===================================================================
(Binary files differ)
Modified: .git/logs/HEAD
===================================================================
--- .git/logs/HEAD 2013-06-12 14:26:17 UTC (rev 174)
+++ .git/logs/HEAD 2013-06-18 14:42:45 UTC (rev 175)
@@ -43,3 +43,7 @@
ac162be51fdf83fec9291a9caa7d90fe8884cafd 6f84921e7c20baa037383bd5abbe2bbbd9ccf494 Sebastian DiLorenzo <dilorenzo.sebastian at gmail.com> 1369817275 +0200 commit: Buggfix where patchwork.alleledata would return an emtpy array.
6f84921e7c20baa037383bd5abbe2bbbd9ccf494 eec89ae45e1cadef3ec00c7ad9f556b9c5c86789 Sebastian DiLorenzo <dilorenzo.sebastian at gmail.com> 1370248828 +0200 commit: updating TAPS
eec89ae45e1cadef3ec00c7ad9f556b9c5c86789 cb78a64920a2a019a496e6ec35bd383e34b3dff7 Sebastian DiLorenzo <dilorenzo.sebastian at gmail.com> 1370434914 +0200 commit: updates to fix gooch bugg
+cb78a64920a2a019a496e6ec35bd383e34b3dff7 5eeba51632467a2cf6300bb53b26c98e0a03d735 Sebastian DiLorenzo <dilorenzo.sebastian at gmail.com> 1371209333 +0200 pull : Fast-forward
+5eeba51632467a2cf6300bb53b26c98e0a03d735 b6c91dc80295b120969877f8e582708fb89038cc Sebastian DiLorenzo <dilorenzo.sebastian at gmail.com> 1371462080 +0200 commit: some changes to TAPS
+b6c91dc80295b120969877f8e582708fb89038cc 532c860ab5cdbc3cbfd3537ef8fe60f35d4935de Sebastian DiLorenzo <dilorenzo.sebastian at gmail.com> 1371462084 +0200 pull : Merge made by recursive.
+532c860ab5cdbc3cbfd3537ef8fe60f35d4935de 8fa9d868a51e8b4bdee100d16c6219d2541df0a6 Sebastian DiLorenzo <dilorenzo.sebastian at gmail.com> 1371566516 +0200 commit: Substantial changes to homepage which now incudes TAPS tutorial as well as some changes to TAPS
Modified: .git/logs/refs/heads/master
===================================================================
--- .git/logs/refs/heads/master 2013-06-12 14:26:17 UTC (rev 174)
+++ .git/logs/refs/heads/master 2013-06-18 14:42:45 UTC (rev 175)
@@ -43,3 +43,7 @@
ac162be51fdf83fec9291a9caa7d90fe8884cafd 6f84921e7c20baa037383bd5abbe2bbbd9ccf494 Sebastian DiLorenzo <dilorenzo.sebastian at gmail.com> 1369817275 +0200 commit: Buggfix where patchwork.alleledata would return an emtpy array.
6f84921e7c20baa037383bd5abbe2bbbd9ccf494 eec89ae45e1cadef3ec00c7ad9f556b9c5c86789 Sebastian DiLorenzo <dilorenzo.sebastian at gmail.com> 1370248828 +0200 commit: updating TAPS
eec89ae45e1cadef3ec00c7ad9f556b9c5c86789 cb78a64920a2a019a496e6ec35bd383e34b3dff7 Sebastian DiLorenzo <dilorenzo.sebastian at gmail.com> 1370434914 +0200 commit: updates to fix gooch bugg
+cb78a64920a2a019a496e6ec35bd383e34b3dff7 5eeba51632467a2cf6300bb53b26c98e0a03d735 Sebastian DiLorenzo <dilorenzo.sebastian at gmail.com> 1371209333 +0200 pull : Fast-forward
+5eeba51632467a2cf6300bb53b26c98e0a03d735 b6c91dc80295b120969877f8e582708fb89038cc Sebastian DiLorenzo <dilorenzo.sebastian at gmail.com> 1371462080 +0200 commit: some changes to TAPS
+b6c91dc80295b120969877f8e582708fb89038cc 532c860ab5cdbc3cbfd3537ef8fe60f35d4935de Sebastian DiLorenzo <dilorenzo.sebastian at gmail.com> 1371462084 +0200 pull : Merge made by recursive.
+532c860ab5cdbc3cbfd3537ef8fe60f35d4935de 8fa9d868a51e8b4bdee100d16c6219d2541df0a6 Sebastian DiLorenzo <dilorenzo.sebastian at gmail.com> 1371566516 +0200 commit: Substantial changes to homepage which now incudes TAPS tutorial as well as some changes to TAPS
Modified: .git/logs/refs/remotes/origin/master
===================================================================
--- .git/logs/refs/remotes/origin/master 2013-06-12 14:26:17 UTC (rev 174)
+++ .git/logs/refs/remotes/origin/master 2013-06-18 14:42:45 UTC (rev 175)
@@ -42,3 +42,6 @@
e43d6d23579d2fc2b6590271f8db30a1afcd9a25 6f84921e7c20baa037383bd5abbe2bbbd9ccf494 Sebastian DiLorenzo <dilorenzo.sebastian at gmail.com> 1369817296 +0200 update by push
6f84921e7c20baa037383bd5abbe2bbbd9ccf494 eec89ae45e1cadef3ec00c7ad9f556b9c5c86789 Sebastian DiLorenzo <dilorenzo.sebastian at gmail.com> 1370248841 +0200 update by push
eec89ae45e1cadef3ec00c7ad9f556b9c5c86789 cb78a64920a2a019a496e6ec35bd383e34b3dff7 Sebastian DiLorenzo <dilorenzo.sebastian at gmail.com> 1370434931 +0200 update by push
+cb78a64920a2a019a496e6ec35bd383e34b3dff7 5eeba51632467a2cf6300bb53b26c98e0a03d735 Sebastian DiLorenzo <S_D at imv096.medsci.uu.se> 1371209333 +0200 pull : fast-forward
+5eeba51632467a2cf6300bb53b26c98e0a03d735 6eb4b7b50b8a1e1e97b2abc30404fa09d78e029e Sebastian DiLorenzo <S_D at imv096.medsci.uu.se> 1371461678 +0200 pull : fast-forward
+6eb4b7b50b8a1e1e97b2abc30404fa09d78e029e 8fa9d868a51e8b4bdee100d16c6219d2541df0a6 Sebastian DiLorenzo <dilorenzo.sebastian at gmail.com> 1371566540 +0200 update by push
Modified: .git/refs/heads/master
===================================================================
--- .git/refs/heads/master 2013-06-12 14:26:17 UTC (rev 174)
+++ .git/refs/heads/master 2013-06-18 14:42:45 UTC (rev 175)
@@ -1 +1 @@
-cb78a64920a2a019a496e6ec35bd383e34b3dff7
+8fa9d868a51e8b4bdee100d16c6219d2541df0a6
Modified: .git/refs/remotes/origin/master
===================================================================
--- .git/refs/remotes/origin/master 2013-06-12 14:26:17 UTC (rev 174)
+++ .git/refs/remotes/origin/master 2013-06-18 14:42:45 UTC (rev 175)
@@ -1 +1 @@
-cb78a64920a2a019a496e6ec35bd383e34b3dff7
+8fa9d868a51e8b4bdee100d16c6219d2541df0a6
Modified: pkg/TAPS/R/TAPS.r
===================================================================
--- pkg/TAPS/R/TAPS.r 2013-06-12 14:26:17 UTC (rev 174)
+++ pkg/TAPS/R/TAPS.r 2013-06-18 14:42:45 UTC (rev 175)
@@ -1,3180 +1,3274 @@
-TAPS_plot <- function(directory=NULL,#xlim=c(-1,2),ylim=c(0,1),
- bin=400) {
-#Automatically check, and if needed install, packages stats and fields
-
-#Load stats. It should be in all, at least semi-new, R distributions so we dont need to install.package it or
-#pre-install it
-library(stats)
-
-#list.of.packages <- c("stats", "fields")
-#new.packages <- list.of.packages[!(list.of.packages %in% installed.packages()[,"Package"])]
-#if(length(new.packages)) install.packages(new.packages)
-
-if (is.null(directory))
- {
- cat("No directory supplied, using working directory.")
- directory = "."
- #cat("You have not assigned a directory containing one or more folders of samples for TAPS_plot to execute. \n")
- #cat("Example: \"/user/mysamples/\" or, to run it in your current working directory, \".\" \n")
- #directory = readline("Please supply such a directory now: ")
- }
-
-## This function takes a directory as input, then builds short-segment TAPS scatter plots for each sample (subdirectory) in the directory.
- setwd(directory)
- subs <- getSubdirs()
- if (is.null(subs)) { ## check samples = subdirectories or a single sample = current directory
- subs=thisSubdir()
- setwd('..')
- }
-
- # create SampleData file if there is none.
- if (length(grep('SampleData.txt',dir()))==0) save.txt(data.frame(Sample=subs,cn2='',delta='',loh='',completed.analysis='no'),file='SampleData.txt')
-
- for (i in 1:length(subs)) try( {
- setwd(subs[i])
- name <- subs[i]
- #if (length(grep('SampleData.txt',dir()))==0) save.txt(data.frame(Sample=name,cn2='',delta='',loh='',completed.analysis='no'),file='SampleData.txt')
- cat(' ..loading', subs[i])
-
- if(length(grep("*.cyhd.cychp",dir()))==1) ##cyhd sample
- {
- ##read CYCHP file from ChAS with logratio info##
- ################################################
- library(affxparser)
-
- name=list.files(".",pattern="*.cychp")
- temp=readCcg(name)
- tempLog2=temp$dataGroups$ProbeSets$dataSets$CopyNumber$table[,1:4]
- #Value=NULL
- Start=tempLog2$Position
- End=tempLog2$Position
- Value=tempLog2$Log2Ratio
- tempChr=tempLog2$Chromosome
- nrows=dim(tempLog2)[1]
- Chromosome=rep(NA,nrows)
- for (i in 1:nrows) {
- Chromosome[i]=paste('chr', tempChr[i], sep="")
- }
-
-
- Log2=as.data.frame(cbind(Chromosome, Start, End, Value))
-
- colnames(Log2)=c("Chromosome","Start","End","Value")
-
- Log2$Chromosome = as.character(Log2$Chromosome)
- Log2$Chromosome[Log2$Chromosome=="chr24"] = "chrX"
- Log2$Chromosome[Log2$Chromosome=="chr25"] = "chrY"
- Log2$Chromosome = as.factor(Log2$Chromosome)
-
- Log2$Start = as.integer(as.character(Log2$Start))
- Log2$End = as.integer(as.character(Log2$End))
- Log2$Value = as.numeric(as.character(Log2$Value))
-
- ##read txt file from ChAS with snp info##
- #########################################
-
- name=list.files(".",pattern="*.cyhd.txt")
- tempalf=read.table(file=name, header=FALSE, sep="\t", skip=12)
- SignalA=tempalf$V4
- SignalB=tempalf$V5
- tempChr=tempalf$V8
- Start=tempalf$V9
- End=tempalf$V9
- Chromosome=Value=rep(NA,nrows)
- nrows=dim(tempalf)[1]
- for (i in 1:nrows) {
- Value[i]=SignalB[i]/(SignalB[i]+SignalA[i])
- Chromosome[i]=paste('chr', tempChr[i], sep="")
- }
-
- alf=as.data.frame(cbind(Chromosome, Start, End, Value))
-
- colnames(alf)=c("Chromosome","Start","End","Value")
-
- alf$Start = as.integer(as.character(alf$Start))
- alf$End = as.integer(as.character(alf$End))
- alf$Value = as.numeric(as.character(alf$Value))
- }
- else
- {
- Log2 <- readLog2() ## Log-R
- alf <- readAlf() ## Allele Frequency
- }
-
- Log2=Log2[!is.nan(Log2$Value),]
- Log2=Log2[!is.na(Log2$Value),]
- alf=alf[!is.nan(alf$Value),]
- alf=alf[!is.na(alf$Value),]
-
- segments <- readSegments() ## segments if available (CBS recommended)
-
- #Remove NA values that some samples give.
- segments <- segments[!is.nan(segments$Value),]
-
- cat(' ..processing')
- if (is.null(segments)) { ## segmentation using DNA-copy if needed (must then be installed)
- segments <- segment_DNAcopy(Log2)
- save.txt(segments,'_segments.txt')
- }
-
- segments$Value <- segments$Value-median(Log2$Value) ## Median-centering
- Log2$Value <- Log2$Value-median(Log2$Value) ## Median-centering
-
-
- allRegions <- makeRegions(Log2, alf, segments) ## Calculates necessary data for segments (all functions are in this file)
- save(allRegions,file='allRegions.Rdata')
- regs <- regsFromSegs(Log2,alf,segments,bin=bin,min=5) ## Calculates the same data for shortened segments, very useful for visualization
- save(regs,file='shortRegions.Rdata')
-
- #Save for TAPS_region()
- save(regs,Log2,alf,segments,file="TAPS_plot_output.Rdata")
-
- #Clear warnings generated previously so hopefully I can see what is actually causing the program to fail.
- #assign("last.warning", NULL, envir = baseenv())
-
- cat('..plotting.\n')
-
- OverviewPlot(regs$chr,regs$start,regs$end,regs$logs,regs$scores,ideogram=NULL,
- as.character(Log2$Chromosome),Log2$Start,Log2$Value,as.character(alf$Chromosome),alf$Start,alf$Value,
- name=name,xlim=c(-1,1.5),ylim=c(0,1))
-
- ## Chromosome-wise plots for manual analysis
- regions=allRegions$regions
-
- karyotype_chroms(regs$chr,regs$start,regs$end,regs$logs,regs$scores,ideogram=NULL,
- as.character(Log2$Chromosome),Log2$Start,Log2$Value,as.character(alf$Chromosome),alf$Start,
- alf$Value,name=name,xlim=c(-1,2),ylim=c(0,1))
-
- cat('..done\n')
- setwd('..')
- }, silent=T)
-}
-###
-
-###
-TAPS_call <- function(directory=NULL,#xlim=c(-1,1),ylim=c(0,1),
- minseg=1,maxCn=12) {
-## TAPS_call outputs the total and minor allele copy numbers of all segments as a text file, and as images for visual confirmation.
-## sampleInfo_TAPS.txt must be present in each sample folder. If TAPS_plot could not make a good guess of the Log-R of copy number 2
-## and the Log-R difference to a deletion, you must interpret the scatter plots and edit sampleInfo_TAPS.txt.
- if (is.null(directory))
- {
- cat("No directory supplied, using working directory.")
- directory = "."
- #cat("You have not assigned a directory containing one or more folders of samples for TAPS_call to execute. \n")
- #cat("Example: \"/user/mysamples/\" or, to run it in your current working directory, \".\" \n")
- #directory = readline("Please supply such a directory now: ")
- }
-
-
- setwd(directory)
- #subs <- getSubdirs()
-
- if (length(grep('SampleData.txt',dir()))==1)
- {
- sampleData <- load.txt('SampleData.txt')
- }
- else
- {
- sampleData <- load.txt('../SampleData.txt')
- }
- subs=as.character(sampleData$Sample)
- #save.txt(sampleData,file='sampleData_old.csv')
-
- if (is.null(subs)) {
- subs=thisSubdir()
- setwd('..')
- }
- for (i in 1:length(subs)) if (sampleData$completed.analysis[i]=='no') {
- setwd(subs[i])
- name <- subs[i]
- sampleInfo <- sampleData[sampleData$Sample==subs[i],]
- if (nrow(sampleInfo)==1) {
-
- cat(' ..loading', subs[i])
- Log2 <- readLog2()
- alf <- readAlf(localDir)
- segments <- readSegments()
-
- #Some samples throw NA values, we simply remove these.
- Log2=Log2[!is.nan(Log2$Value),]
- Log2=Log2[!is.na(Log2$Value),]
-
- alf=alf[!is.nan(alf$Value),]
- alf=alf[!is.na(alf$Value),]
-
- segments <- segments[!is.nan(segments$Value),]
-
- segments$Value <- segments$Value-median(Log2$Value)
- Log2$Value <- Log2$Value-median(Log2$Value)
-
- cat(' ..processing.\n')
-
- load('allRegions.Rdata') ## These were prepared in TAPS_plot
- #allRegions <- makeRegions(Log2, alf, segments)
-
- t <- findCNs(Log2,alf,allRegions,dmin=0.9,maxCn=maxCn,ceiling=1,sampleInfo=sampleInfo) ## estimates the Log-R and Allelic Imbalance Ration of all variants up to maxCn
-
- u <- setCNs(allRegions,t$int,t$ai,maxCn) ## Assigns copy number variant for all segments
- allRegions$regions <- u$regions
- save.txt(u$regions,file=paste(name,'_segmentCN.txt',sep='')) ## adjacent segments with idendical copy number are merged (except over centromere) and all are saved to a text file
- regions=allRegions$regions
- save(t,regions,file="regions_t.Rdata")
-
- karyotype_check(regions$Chromosome,regions$Start,regions$End,regions$log2,regions$imba,regions$Cn,regions$mCn,t,ideogram=NULL,name=name)
-
- karyotype_chromsCN(regions$Chromosome,regions$Start,regions$End,regions$log2,
- regions$imba,regions$Cn,regions$mCn,ideogram=NULL,
- Log2$Chromosome,Log2$Start,Log2$Value,alf$Chromosome,
- alf$Start,alf$Value,t,name=name,xlim=c(-1,1),ylim=c(0,1))
-
- cat('..done\n')
- sampleData$completed.analysis[i] <- ''
- } else cat('Skipped',name,'\n')
-
- setwd('..')
- }
- save.txt(sampleData,file='SampleData_new.csv')
-}
-###
-regsFromSegs <- function (Log2,alf, segments, bin=200,min=1) {
-## This function builds short segments and calcualtes their average Log-R and Allelic Imbalance Ratio.
- rownames(Log2)=1:nrows(Log2)
- rownames(alf)=1:nrows(alf)
- regs=list('chr'=NULL,'start'=NULL,'end'=NULL,'logs'=NULL,'scores'=NULL,'het'=NULL,'hom'=NULL,'probes'=NULL,'snps'=NULL,'key1'=rep(NA,nrow(Log2)),'key2'=rep(NA,nrow(alf)))
- n=nrow(segments)
- s_check=NULL
-for (c in 1:n) { ## for every segment
- tlog=Log2[Log2$Chromosome==segments$Chromosome[c],] ## Log-R on this chromosome
- talf=alf[alf$Chromosome==segments$Chromosome[c],] ## Allele Freq on this chromosome
- tlog=tlog[(tlog$Start>=segments$Start[c])&(tlog$Start<segments$End[c]),] ## Log-R on this segment
- talf=talf[(talf$Start>=segments$Start[c])&(talf$Start<segments$End[c]),] ## Allele Freq on this segment
-
- tsnps=nrow(talf) ## number of snps and probes in this segment
- tprobes=nrow(tlog)
- tnregs=max(trunc(tsnps/bin),1) ## Further split into how many (a minimum of 1) subsegments
- tcuts=segments$Start[c] ## The first start pos
- tlength=segments$End[c]-segments$Start[c] ## Length of this whole segment
- for (i in 1:tnregs) tcuts = c(tcuts, tcuts[1]+round(i/tnregs*tlength)) ## Break the segment at these positions
-
- for (r in 1:(tnregs)) { ## build the subsegments
- regs$chr=c(regs$chr,as.character(segments$Chromosome[c])) ## Chromosome
- s_=tcuts[r] ## Start
- e_=tcuts[r+1] ## End
- thisalf=talf[(talf$Start>=s_)&(talf$Start<=e_),] ## get the Log-R values
- thislog=tlog[(tlog$Start>=s_)&(tlog$Start<=e_),] ## and the allele frequency
- regs$key1[as.integer(rownames(thislog))]=length(regs$chr) ## store their positions for fast access during plotting
- regs$key2[as.integer(rownames(thisalf))]=length(regs$chr) ## --"--
- regs$logs=c( regs$logs, mean(thislog$Value) ) ## store average log ratio of this segment
- regs$probes=c(regs$probes,nrow(thislog)) ## store number of probes
- regs$snps=c(regs$snps,nrow(thisalf)) ## store number of bi-allelic probes (SNPs)
- #regs$or_seg=c(regs$or_seg,c)
- regs$start=c(regs$start,s_) ## store start and end positions
- regs$end=c(regs$end,e_)
-
- if (nrow(thisalf)>min) { ## Time to calculate Allelic Imbalance Ratio (if enough SNPs)
- t1=sort( abs(thisalf$Value-0.5) ) ## distance from middle (het) in the allele freq pattern, ascending
- if (length(unique(t1))>3) { ## do not attempt clustering with too few snps
- xx=NULL
- try(xx <- kmeans(t1, 2),silent=T) ## Attempt k-means (Hartigan-Wong: has proven very stable)
- if (!is.null(xx)) if (min(xx$size) > 0.05*max(xx$size)) { ## On some occations data quality is poor, requiring 5%+ heterozygous SNPs avoids most such cases.
- xx=xx$centers
- } else xx=NA
- } else xx=NA
- } else xx=NA
- #try (if (is.na(xx)) xx=0:1, silent=T)
- try (if (length(xx)==0) xx=0:1, silent=T)
- regs$scores=c(regs$scores, min(xx)/max(xx) ) ## Allelic Imbalance Ratio = inner / outer cluster.
- regs$het=c(regs$het, min(xx)) ## $het and $hom are no longer in use.
- regs$hom=c(regs$hom, max(xx))
- }
-}
-return (regs)
-}
-###
-segment_DNAcopy <- function(Log2) {
-## If segmentation is required, DNAcopy is a good choice. Must be installed.
- #library(DNAcopy)
- cat('..Using DNAcopy to create segments:\n')
- segs=NULL
- chroms=c(as.character(1:22),'X','Y')
- chroms=paste('chr',chroms,sep='')
- for (c in 1:24) { # segment chromosome
- tlog=Log2[Log2$Chromosome==chroms[c],] ## Log-R of this chromosome
- if (nrow(tlog)>0) { ## (ChrY may be absent)
- cnaObject=segment(smooth.CNA(CNA(tlog$Value, rep(c,nrow(tlog)), tlog$Start, data.type='logratio',sampleid=paste('chr',c))), undo.splits='sdundo', undo.SD=1) ## Segments this chromosome
- segs=rbind(segs,cnaObject$output) ## Add result to data frame
- }
- }
- colnames(segs)=c('x','Chromosome','Start','End','Markers','Value')
- segs$Chromosome=chrom_ucsc(segs$Chromosome) ## Adjust chromosome names to standard
- return(segs[,2:6])
-}
-###
-readLog2 <- function() {
-## This function reads Log-ratio from the file "probes.txt" which must be present in the current directory.
- Log2=NULL
- try( Log2 <- read.csv(file='probes.txt',header=T,sep='\t'), silent=T)
- if (!is.null(Log2)) cat(' ..found probes.txt')
- if (is.null(Log2)) {
- try( Log2 <- read.csv(file='_probes.txt',header=T,sep='\t'), silent=T)
- if (!is.null(Log2)) cat(' ..found _probes.txt')
- }
-## This code was used if Log-R must be read from .CNCHP file (Affymetrix Genotyping Console or APT). NOT currently supported downstream as .CNCHP is lacks allele-specific information for Affy 250k/500k
- # if (is.null(Log2)) {
- # dir=dir()
- # ix=grep('cnchp',dir,ignore.case=TRUE)
- # if (length(ix)==1)
- # cat(' ..found',dir[ix])
- # library(affxparser)
- # temp=readCcg(dir[ix])
- # temp=temp$dataGroups$MultiData$dataSets$CopyNumber$table
- # temp$Chromosome[temp$Chromosome==24]=23 # strange numbers in cnchp
- # temp$Chromosome[temp$Chromosome==25]=24
- # Log2 = data.frame('Chromosome'=chrom_ucsc(temp$Chromosome), 'Start'=temp$Position, 'End'=temp$Position, 'Value'=temp$Log2Ratio)
- # save.txt(Log2, file='_probes.txt')
- # cat(' ..wrote _probes.txt')
- # adif=NULL
- # try(
- # adif <- temp$Allele,
- # silent=T
- # )
- # if (!is.null(adif)) {
- # ix= !is.nan(adif)
- # alf=data.frame('Chromosome'=chrom_ucsc(temp$Chromosome[ix]), 'Start'=temp$Position[ix], 'End'=temp$Position[ix], 'Value'=0.5+adif[ix])
- # save.txt(alf, file='_snps.txt')
- # cat(' ..wrote _snps.txt')
- # }
- # }
- return (Log2)
-}
-###
-readAlf <- function(localDir=NULL) {
-## This funciton reads allele frequency [B/(A+B)] from the file 'snps.txt', which must be present in the current directory.
- alf=NULL
- try( alf <- read.csv(file='snps.txt',header=T,sep='\t'), silent=T)
- if (!is.null(alf)) cat(' ..found snps.txt')
- if (is.null(alf)) {
- try( alf <- read.csv(file='_snps.txt',header=T,sep='\t'), silent=T)
- if (!is.null(alf)) cat(' ..found _snps.txt')
- }
- #alf=alf[alf$Value!=0.5,]
- return (alf)
-}
-###
-readSegments <- function() {
-## This function reads segment information in 'segments.txt' which must be present in the current folder.
-## The author recommends SNP-rank segmentation (NEXUS) or another CBS such as that in DNACopy.
-## Using a HMM is not recommended unless you have a homogenous, diploid sample. (And then there is more user-friendly software anyway.)
- segments=NULL
- try( segments <- read.csv(file='segments.txt',header=T,sep='\t'),silent=T)
- if (!is.null(segments)) cat(' ..found segments.txt')
- if (is.null(segments)) {
- try( segments <- read.csv(file='_segments.txt',header=T,sep='\t'),silent=T)
- if (!is.null(segments)) cat(' ..found _segments.txt')
- }
- return (segments)
-}
-###
-makeRegions <- function(Log2, alf, segments,dataType='Nexus') {
-## makeRegions is similar to "regsfromsegs" except regions are not subdivided before calculation of mean Log-R and Allelic Imbalance Ratio.
-regions=segments
-regions$Chromosome=as.character(segments$Chromosome) ## Chromosome
-regions$lengthMB=round((regions$End-regions$Start)/1000000,3) ## length in megabases
-regions$probes=0 ## # of probes
-regions$snps=0 ## # of bi-allelic probes (SNPs)
-#regions$log2=round(regions$Value,4)
-regions$imba=NA ## Allelic Imbalance Ratio
-regionIx=NULL ## Not currently used
-for (i in 1:nrows(regions)) {
- log2temp=which(equals(Log2$Chromosome,regions$Chromosome[i])) ## index of Log-R (current chrom)
- alftemp=which(equals(alf$Chromosome,regions$Chromosome[i])) ## index of Allele frequency (current chrom)
-
- log2temp=log2temp [Log2$Start[log2temp]>=regions$Start[i] & Log2$Start[log2temp]<regions$End[i]] ## index of Log-R (current segment)
- alftemp=alftemp [alf$Start[alftemp]>=regions$Start[i] & alf$Start[alftemp]<regions$End[i]] ## index of Allele frequency (current segment)
- regions$probes[i]=length(log2temp)
- regions$snps[i]=length(alftemp)
- regionIx$Log2[[i]]=log2temp ## indexes of Log-R and Allele Frequency are saved for future use (plot color coding)
- regionIx$alf[[i]]=alftemp
- log2temp=Log2$Value[log2temp]
- regions$log2[i]=median(log2temp)
- alftemp=alf$Value[alftemp]
- if (length(alftemp)>3) { ## Prepare to calculate Allelic Imbalance Ratio
- if (dataType=="Nexus") t1=sort( abs(alftemp-0.5) ) ## distance from middle (het), ascending
- if (dataType=="CNCHP") t1=sort( abs(alftemp)-0 ) ## This is not currently in use, was intended for analysis of SNP6 .CNCHP data.
- if (length(unique(t1))>5) { ## Avoid calculating Allelic Imbalance Ratio unless there are several different values to cluster
- xx=NA
- xx=try(kmeans(t1, 2), silent=T) ## This part is nearly identical to that of 'regsfromsegs()'
- try( if (min(xx$size) > 0.05*max(xx$size)) {
- xx=xx$centers
- } else xx=NA, silent=T)
- } else xx=NA
- try(regions$imba[i] <- round( min(xx)/max(xx) ,2), silent=T)
- }
-}
-return(list('regions'=regions,'regionIx'=regionIx))
-}
-###
-equals <- function(a,b) {
-## a helper function in case factors are compared
-a <- as.character(a)
-b <- as.character(b)
-return (a==b)
-}
-###
-nrows <- function(data) dim(data)[1] ## don't ask me about this one.
-###
-load.txt <- function(file) { ## just to be rid of "sep"
- read.csv(file,sep='\t')
-}
-###
-save.txt <- function(data,file='temp', row.names=F, col.names=T, append=F) { ## simplified for convenience
- write.table(data,file=file,sep='\t',quote=F,row.names=row.names, col.names=col.names, append=append)
-}
-###
-deChrom_ucsc <- function(data) {
-## chroms in 1:24 and chr1:chrY format conversion
-if (length(data)==0) return (data)
-keep_index=NULL
-CHR=rep(0,length(data))
-existing=unique(data)
-for (i in 1:length(existing)) {
- temp=strsplit(as.character(existing[i]),'_')
- temp=strsplit(temp[[1]],'chr')[[1]][2]
- if (temp=='X') temp='23'
- if (temp=='_X') temp='23'
- if (temp=='Y') temp='24'
- CHR[data==existing[i]]=as.integer(temp)
-}
-return (CHR)
-}
-###
-chrom_ucsc <- function(data) {
-## same as above, backwards
- n=length(data)
- out=rep("",n)
- for (i in 1:n) {
- temp=as.character(data[i])
- if (temp=='23') temp='X'
- if (temp=='24') temp='Y'
- out[i]=paste('chr',temp, sep='')
- }
- return(out)
-}
-###
-subplot <- function(x, y) viewport(layout.pos.col=x, layout.pos.row=y) ## for subplots
-vplayout <- function(x, y) {
- grid.newpage()
- pushViewport(viewport(layout=grid.layout(y,x)))
-}
-###
-getSubdirs <- function() {
-## Reach for the subdirectories of current directory.
- dir=dir()
- here=getwd()
- n=length(dir)
- subdir=F
- dir_ix=rep(FALSE,n)
- for (i in 1:n) {
- try(setwd(dir[i]), silent=T)
- if (getwd()!=here) { # in another directory
- setwd('..')
- subdir=T
- dir_ix[i]=T
- }
- }
- if (subdir) return (dir[dir_ix])
- return (NULL)
-}
-###
-thisSubdir <- function() {
-## get the name of this subdirectory
- here=getwd()
- here=strsplit(here,'/')[[1]]
- here=here[length(here)]
- return(here)
-}
-###
-mySorter <- function(data) {
-## sort data on $Chromosome and $Start
- temp=data[order(data$Start),]
- t=deChrom_ucsc(temp$Chromosome)
- temp=temp[!is.na(t),]
- t=t[!is.na(t)]
- temp=temp[order(t),]
- return (temp)
-}
-###
-probesInRegions <- function(regions,probes) {
-## This function is not in use.
- rchr=as.character(regions$Chromosome)
- pchr=as.character(probes$Chr)
- nprobes=0
-
- for (i in 1:length(rchr)) {
- t=strsplit(rchr[i],'chr')[[1]][2]
- here=probes[(t==pchr),] #finding probe subset on this chr
- matching=(here$Position>=regions$Start[i]) & (here$Position<=regions$End[i])
- nprobes[i]=sum(matching)
- }
- return(nprobes)
-}
-###
-getOverlap <- function(s1,e1,s2,e2) { ## This is not currently used anywhere - it is destined for group studies and pileups.
- return(max((min(e1,e2)-max(s1,s2)),0))
-}
-###
-weightedMedian <- function(data,weights) {
- try( if (length(weights)==0) return (NULL), silent=T)
- try( return (median(rep(data,weights),na.rm=T)), silent=T)
- return (NULL)
-}
-weightedMean <- function(data,weights) {
- if (length(weights)==0) return (NULL)
- return (mean(rep(data,weights),na.rm=T))
-}
-###
-myMeans <- function(data,k) { ## Not currently used
- best <- 0
- clus <- list()
- for (i in 1:10) {
- clus[[1]] <- kmeans(data,k)
- var <- var(clus[[1]]$centers)
- if (var>best) {
- ix <- i
- best <- var
- }
- }
- return (clus[[ix]])
-}
-is.even <- function(data) {
- return (trunc(data/2)*2 == data)
-}
-is.odd <- function(data) {
- return (trunc(data/2)*2 != data)
-}
-neighbour <- function(data) {
- n <- length(data)
- dif <- data[2:n]-data[1:(n-1)]
- return (mean(dif))
-}
-is.autosome <- function(vector) {
- temp <- deChrom_ucsc(vector)
- return (temp<23)
-}
-
-##
-###
-findCNs <- function(Log2,alf,allRegions,name=thisSubdir(),dmin=0.9,maxCn=10,ceiling=1,sampleInfo=NULL) {
-## This function takes an estimate of the Log-R of copy number two (shift) and the difference in log-R between copy numbers 2 and 1 (delta)
-## (3 and 2 works too). Then, the Log-R and Allelic Imbalance Ratio of all possible copy number variants up to maxCn are estimated from
-## the Log-R and Allelic Imbalance Ratio of all the segments. This function will NOT be useful unless there is already a solid estimate
-## of 'shift' and 'delta'. See the previous function.
-
- shift=sampleInfo$cn2
- delta=sampleInfo$delta
-
- tix=NULL #temporary index
- int=NULL ## contains Log-R estimate of each (total) copy number
- ai=NULL ## contains Allelic Imbalance Ratio estimate of each copy number variant.
- regions <- allRegions$regions
- regions <- regions[(is.autosome(regions$Chromosome)®ions$lengthMB>1)&(!is.na(regions$imba)),] ## will use these regions
-
- ## likely cn2 regions sit within delta/3 from shift.
- expectedAt <- shift
- tix$cn2 <- abs(regions$log2 - expectedAt) < (delta/3) ## index of likely cn2 regions
- temp <- regions[tix$cn2,] ## cn2 regions
- med <- weightedMedian(temp$log2,temp$probes) ## improved value of Log-R at cn2 (returns NULL if theres nothing there)
- int$cn2 <- ifelse(!is.null(med),med,expectedAt) ## saved to int.
-
- ## likely cn1 regions sit at about cn2 - delta:
- d <- delta ## the (Log-R) distance to cn1
- expectedAt <- int$cn2-d ## cn1 is expected here
- tix$cn1 <- abs(regions$log2 - expectedAt) < (d/3) ## index of likely cn1 regions
- temp <- regions[tix$cn1,]
- med <- weightedMedian(temp$log2,temp$probes)
- int$cn1 <- ifelse(!is.null(med),med,expectedAt)
-
- ## likely cn0 regions sit below cn1 - delta:
- d <- int$cn2-int$cn1
- expectedAt <- int$cn1-d
- tix$cn0 <- regions$log2 < expectedAt
- temp <- regions[tix$cn0,]
- med <- weightedMedian(temp$log2,temp$probes)
- int$cn0 <- ifelse(!is.null(med),med,expectedAt)
-
- ## likely cn3 regions sit at about cn2+delta*dmin (dmin is about 0.9, the factor by which the distance between consecutive copy numbers diminish)
- d <- delta*dmin
- expectedAt <- int$cn2+d
- tix$cn3 <- abs(regions$log2 - expectedAt) < (d/3)
- temp <- regions[tix$cn3,]
- med <- weightedMedian(temp$log2,temp$probes)
- int$cn3 <- ifelse(!is.null(med),med,expectedAt)
-
- ## cn4 follows at ...
- d <- dmin*(int$cn3-int$cn2)
- expectedAt <- int$cn3+d
- tix$cn4 <- abs(regions$log2 - expectedAt) < (d/4)
- temp <- regions[tix$cn4,]
- med <- weightedMedian(temp$log2,temp$probes)
- int$cn4 <- ifelse(!is.null(med),med,expectedAt)
-
- ## generalized for higher cns
- for (cn in 5:maxCn) {
- thisCn <- paste('cn',cn,sep='')
- prevCn <- paste('cn',cn-1,sep='')
- pprevCn <- paste('cn',cn-2,sep='')
- d <- dmin*(int[prevCn][[1]]-int[pprevCn][[1]])
- expectedAt <- int[prevCn][[1]]+d
- tix[[thisCn]] <- abs(regions$log2 - expectedAt) < (d/5)
- temp <- regions[tix[thisCn][[1]],]
- med <- weightedMedian(temp$log2,temp$probes)
- int[thisCn] <- ifelse(!is.null(med),med,expectedAt)
- }
-
-
- ## at cn2, find the variant clusters (normal and CNNLOH)
- ix <- (abs(regions$log2 - int$cn2) < 0.2*(int$cn3-int$cn2) ) # taking only closely-matching segments
- data <- regions[ix,]
- data <- data[!is.na(data$imba),] # ...with a calculated allelic imbalance.
-
- ix <- (abs(regions$log2 - int$cn3) < 0.2*(int$cn4-int$cn3) )
- data3 <- regions[ix,]
- data3 <- data[!is.na(data$imba),]
- expectedAt <- 0.1
- ix <- data$imba<min(data3$imba)
- med <- weightedMedian(data$imba[ix],data$snps[ix]) ## Average allelic imbalance weighted on snp count
- ai$cn2m1 <- ifelse (!is.null(med),med,expectedAt)
-
- expectedAt <- sampleInfo$loh
- ix <- abs(data$imba-sampleInfo$loh) < 0.2* (sampleInfo$loh-ai$cn2m1)
- med <- weightedMedian(data$imba[ix],data$snps[ix])
- ai$cn2m0 <- ifelse (!is.null(med),med,expectedAt)
-
-
-
- ## for cn1 (and 0)
- ix <- (abs(regions$log2 - int$cn1) < 0.2*(int$cn2-int$cn1) )
- data <- regions[ix,]
- data <- data[!is.na(data$imba),]
- expectedAt <- (ai$cn2m0+ai$cn2m1)*3/5 ## Decent estimate.
[TRUNCATED]
To get the complete diff run:
svnlook diff /svnroot/patchwork -r 175
More information about the Patchwork-commits
mailing list