[Patchwork-commits] r175 - / .git .git/logs .git/logs/refs/heads .git/logs/refs/remotes/origin .git/refs/heads .git/refs/remotes/origin pkg/TAPS/R www www/css/img

noreply at r-forge.r-project.org noreply at r-forge.r-project.org
Tue Jun 18 16:42:46 CEST 2013


Author: sebastian_d
Date: 2013-06-18 16:42:45 +0200 (Tue, 18 Jun 2013)
New Revision: 175

Added:
   www/AI_ALR.php
   www/TAPS_exec.php
   www/TAPS_inst.php
   www/TAPS_requ.php
   www/TAPS_resu.php
   www/css/img/TAPS_AI_ALR.jpeg
   www/css/img/TAPS_call_help.png
   www/css/img/TAPS_check2.png
   www/css/img/TAPS_chromosomal2.jpg
   www/css/img/TAPS_chromosomalCN.jpg
   www/css/img/TAPS_overview2.jpg
   www/css/img/flowchartTAPS.jpg
Modified:
   .git/COMMIT_EDITMSG
   .git/index
   .git/logs/HEAD
   .git/logs/refs/heads/master
   .git/logs/refs/remotes/origin/master
   .git/refs/heads/master
   .git/refs/remotes/origin/master
   /
   pkg/TAPS/R/TAPS.r
   www/AI_Cov.php
   www/changelog.php
   www/index.php
   www/intro.php
   www/pw_exec.php
   www/pw_inst.php
   www/pw_resu.php
Log:
substantial changes to homepage including adding TAPS guide. Some changes to TAPS


Property changes on: 
___________________________________________________________________
Modified: svn:ignore
   - .git

   + .git/objects


Modified: .git/COMMIT_EDITMSG
===================================================================
--- .git/COMMIT_EDITMSG	2013-06-12 14:26:17 UTC (rev 174)
+++ .git/COMMIT_EDITMSG	2013-06-18 14:42:45 UTC (rev 175)
@@ -1 +1 @@
-updates to fix gooch bugg
+Substantial changes to homepage which now incudes TAPS tutorial as well as some changes to TAPS

Modified: .git/index
===================================================================
(Binary files differ)

Modified: .git/logs/HEAD
===================================================================
--- .git/logs/HEAD	2013-06-12 14:26:17 UTC (rev 174)
+++ .git/logs/HEAD	2013-06-18 14:42:45 UTC (rev 175)
@@ -43,3 +43,7 @@
 ac162be51fdf83fec9291a9caa7d90fe8884cafd 6f84921e7c20baa037383bd5abbe2bbbd9ccf494 Sebastian DiLorenzo <dilorenzo.sebastian at gmail.com> 1369817275 +0200	commit: Buggfix where patchwork.alleledata would return an emtpy array.
 6f84921e7c20baa037383bd5abbe2bbbd9ccf494 eec89ae45e1cadef3ec00c7ad9f556b9c5c86789 Sebastian DiLorenzo <dilorenzo.sebastian at gmail.com> 1370248828 +0200	commit: updating TAPS
 eec89ae45e1cadef3ec00c7ad9f556b9c5c86789 cb78a64920a2a019a496e6ec35bd383e34b3dff7 Sebastian DiLorenzo <dilorenzo.sebastian at gmail.com> 1370434914 +0200	commit: updates to fix gooch bugg
+cb78a64920a2a019a496e6ec35bd383e34b3dff7 5eeba51632467a2cf6300bb53b26c98e0a03d735 Sebastian DiLorenzo <dilorenzo.sebastian at gmail.com> 1371209333 +0200	pull : Fast-forward
+5eeba51632467a2cf6300bb53b26c98e0a03d735 b6c91dc80295b120969877f8e582708fb89038cc Sebastian DiLorenzo <dilorenzo.sebastian at gmail.com> 1371462080 +0200	commit: some changes to TAPS
+b6c91dc80295b120969877f8e582708fb89038cc 532c860ab5cdbc3cbfd3537ef8fe60f35d4935de Sebastian DiLorenzo <dilorenzo.sebastian at gmail.com> 1371462084 +0200	pull : Merge made by recursive.
+532c860ab5cdbc3cbfd3537ef8fe60f35d4935de 8fa9d868a51e8b4bdee100d16c6219d2541df0a6 Sebastian DiLorenzo <dilorenzo.sebastian at gmail.com> 1371566516 +0200	commit: Substantial changes to homepage which now incudes TAPS tutorial as well as some changes to TAPS

Modified: .git/logs/refs/heads/master
===================================================================
--- .git/logs/refs/heads/master	2013-06-12 14:26:17 UTC (rev 174)
+++ .git/logs/refs/heads/master	2013-06-18 14:42:45 UTC (rev 175)
@@ -43,3 +43,7 @@
 ac162be51fdf83fec9291a9caa7d90fe8884cafd 6f84921e7c20baa037383bd5abbe2bbbd9ccf494 Sebastian DiLorenzo <dilorenzo.sebastian at gmail.com> 1369817275 +0200	commit: Buggfix where patchwork.alleledata would return an emtpy array.
 6f84921e7c20baa037383bd5abbe2bbbd9ccf494 eec89ae45e1cadef3ec00c7ad9f556b9c5c86789 Sebastian DiLorenzo <dilorenzo.sebastian at gmail.com> 1370248828 +0200	commit: updating TAPS
 eec89ae45e1cadef3ec00c7ad9f556b9c5c86789 cb78a64920a2a019a496e6ec35bd383e34b3dff7 Sebastian DiLorenzo <dilorenzo.sebastian at gmail.com> 1370434914 +0200	commit: updates to fix gooch bugg
+cb78a64920a2a019a496e6ec35bd383e34b3dff7 5eeba51632467a2cf6300bb53b26c98e0a03d735 Sebastian DiLorenzo <dilorenzo.sebastian at gmail.com> 1371209333 +0200	pull : Fast-forward
+5eeba51632467a2cf6300bb53b26c98e0a03d735 b6c91dc80295b120969877f8e582708fb89038cc Sebastian DiLorenzo <dilorenzo.sebastian at gmail.com> 1371462080 +0200	commit: some changes to TAPS
+b6c91dc80295b120969877f8e582708fb89038cc 532c860ab5cdbc3cbfd3537ef8fe60f35d4935de Sebastian DiLorenzo <dilorenzo.sebastian at gmail.com> 1371462084 +0200	pull : Merge made by recursive.
+532c860ab5cdbc3cbfd3537ef8fe60f35d4935de 8fa9d868a51e8b4bdee100d16c6219d2541df0a6 Sebastian DiLorenzo <dilorenzo.sebastian at gmail.com> 1371566516 +0200	commit: Substantial changes to homepage which now incudes TAPS tutorial as well as some changes to TAPS

Modified: .git/logs/refs/remotes/origin/master
===================================================================
--- .git/logs/refs/remotes/origin/master	2013-06-12 14:26:17 UTC (rev 174)
+++ .git/logs/refs/remotes/origin/master	2013-06-18 14:42:45 UTC (rev 175)
@@ -42,3 +42,6 @@
 e43d6d23579d2fc2b6590271f8db30a1afcd9a25 6f84921e7c20baa037383bd5abbe2bbbd9ccf494 Sebastian DiLorenzo <dilorenzo.sebastian at gmail.com> 1369817296 +0200	update by push
 6f84921e7c20baa037383bd5abbe2bbbd9ccf494 eec89ae45e1cadef3ec00c7ad9f556b9c5c86789 Sebastian DiLorenzo <dilorenzo.sebastian at gmail.com> 1370248841 +0200	update by push
 eec89ae45e1cadef3ec00c7ad9f556b9c5c86789 cb78a64920a2a019a496e6ec35bd383e34b3dff7 Sebastian DiLorenzo <dilorenzo.sebastian at gmail.com> 1370434931 +0200	update by push
+cb78a64920a2a019a496e6ec35bd383e34b3dff7 5eeba51632467a2cf6300bb53b26c98e0a03d735 Sebastian DiLorenzo <S_D at imv096.medsci.uu.se> 1371209333 +0200	pull : fast-forward
+5eeba51632467a2cf6300bb53b26c98e0a03d735 6eb4b7b50b8a1e1e97b2abc30404fa09d78e029e Sebastian DiLorenzo <S_D at imv096.medsci.uu.se> 1371461678 +0200	pull : fast-forward
+6eb4b7b50b8a1e1e97b2abc30404fa09d78e029e 8fa9d868a51e8b4bdee100d16c6219d2541df0a6 Sebastian DiLorenzo <dilorenzo.sebastian at gmail.com> 1371566540 +0200	update by push

Modified: .git/refs/heads/master
===================================================================
--- .git/refs/heads/master	2013-06-12 14:26:17 UTC (rev 174)
+++ .git/refs/heads/master	2013-06-18 14:42:45 UTC (rev 175)
@@ -1 +1 @@
-cb78a64920a2a019a496e6ec35bd383e34b3dff7
+8fa9d868a51e8b4bdee100d16c6219d2541df0a6

Modified: .git/refs/remotes/origin/master
===================================================================
--- .git/refs/remotes/origin/master	2013-06-12 14:26:17 UTC (rev 174)
+++ .git/refs/remotes/origin/master	2013-06-18 14:42:45 UTC (rev 175)
@@ -1 +1 @@
-cb78a64920a2a019a496e6ec35bd383e34b3dff7
+8fa9d868a51e8b4bdee100d16c6219d2541df0a6

Modified: pkg/TAPS/R/TAPS.r
===================================================================
--- pkg/TAPS/R/TAPS.r	2013-06-12 14:26:17 UTC (rev 174)
+++ pkg/TAPS/R/TAPS.r	2013-06-18 14:42:45 UTC (rev 175)
@@ -1,3180 +1,3274 @@
-TAPS_plot <- function(directory=NULL,#xlim=c(-1,2),ylim=c(0,1),
-  bin=400) {
-#Automatically check, and if needed install, packages stats and fields
-
-#Load stats. It should be in all, at least semi-new, R distributions so we dont need to install.package it or
-#pre-install it
-library(stats)
-
-#list.of.packages <- c("stats", "fields")
-#new.packages <- list.of.packages[!(list.of.packages %in% installed.packages()[,"Package"])]
-#if(length(new.packages)) install.packages(new.packages)
-  
-if (is.null(directory))
-  {
-  cat("No directory supplied, using working directory.")
-  directory = "."
-  #cat("You have not assigned a directory containing one or more folders of samples for TAPS_plot to execute. \n")
-  #cat("Example: \"/user/mysamples/\" or, to run it in your current working directory, \".\" \n")
-  #directory = readline("Please supply such a directory now: ")
-  }
-
-## This function takes a directory as input, then builds short-segment TAPS scatter plots for each sample (subdirectory) in the directory.
-  setwd(directory)
-  subs <- getSubdirs()
-  if (is.null(subs)) {                                         ## check samples = subdirectories or a single sample = current directory
-    subs=thisSubdir()
-    setwd('..')
-  }
-    
-  # create SampleData file if there is none.
-  if (length(grep('SampleData.txt',dir()))==0) save.txt(data.frame(Sample=subs,cn2='',delta='',loh='',completed.analysis='no'),file='SampleData.txt')
-  
-  for (i in 1:length(subs)) try( {
-    setwd(subs[i])
-    name <- subs[i]
-    #if (length(grep('SampleData.txt',dir()))==0) save.txt(data.frame(Sample=name,cn2='',delta='',loh='',completed.analysis='no'),file='SampleData.txt')
-    cat(' ..loading', subs[i])
-	
-	if(length(grep("*.cyhd.cychp",dir()))==1)				##cyhd sample
-		{
-		##read CYCHP file from ChAS with logratio info##
-		################################################
-		library(affxparser)
-
-		name=list.files(".",pattern="*.cychp")
-		temp=readCcg(name)
-		tempLog2=temp$dataGroups$ProbeSets$dataSets$CopyNumber$table[,1:4]
-		#Value=NULL
-		Start=tempLog2$Position
-		End=tempLog2$Position
-		Value=tempLog2$Log2Ratio
-		tempChr=tempLog2$Chromosome
-		nrows=dim(tempLog2)[1]
-		Chromosome=rep(NA,nrows)
-		for (i in 1:nrows) {
-			Chromosome[i]=paste('chr', tempChr[i], sep="")
-		  }
-
-
-		Log2=as.data.frame(cbind(Chromosome, Start, End, Value))
-
-		colnames(Log2)=c("Chromosome","Start","End","Value")
-
-    Log2$Chromosome = as.character(Log2$Chromosome)
-    Log2$Chromosome[Log2$Chromosome=="chr24"] = "chrX"
-    Log2$Chromosome[Log2$Chromosome=="chr25"] = "chrY"
-    Log2$Chromosome = as.factor(Log2$Chromosome)
-   
-    Log2$Start = as.integer(as.character(Log2$Start))
-    Log2$End = as.integer(as.character(Log2$End))
-    Log2$Value = as.numeric(as.character(Log2$Value))
-
-		##read txt file from ChAS with snp info##
-		#########################################
-
-		name=list.files(".",pattern="*.cyhd.txt")
-		tempalf=read.table(file=name, header=FALSE, sep="\t", skip=12)
-		SignalA=tempalf$V4
-		SignalB=tempalf$V5
-		tempChr=tempalf$V8
-		Start=tempalf$V9
-		End=tempalf$V9
-		Chromosome=Value=rep(NA,nrows)
-		nrows=dim(tempalf)[1]
-		for (i in 1:nrows) {
-			Value[i]=SignalB[i]/(SignalB[i]+SignalA[i])
-			Chromosome[i]=paste('chr', tempChr[i], sep="")
-		  }
-		  
-		alf=as.data.frame(cbind(Chromosome, Start, End, Value))
-
-		colnames(alf)=c("Chromosome","Start","End","Value")
-
-		alf$Start = as.integer(as.character(alf$Start))
-		alf$End = as.integer(as.character(alf$End))
-		alf$Value = as.numeric(as.character(alf$Value))
-		}
-	else
-		{
-		Log2 <- readLog2()                                     ## Log-R
-		alf <- readAlf()                                         ## Allele Frequency
-		}
-		
-    Log2=Log2[!is.nan(Log2$Value),]
-    Log2=Log2[!is.na(Log2$Value),]
-    alf=alf[!is.nan(alf$Value),]
-    alf=alf[!is.na(alf$Value),]
-    
-    segments <- readSegments()                                 ## segments if available (CBS recommended)
-    
-    #Remove NA values that some samples give.
-    segments <- segments[!is.nan(segments$Value),]
-
-    cat(' ..processing')
-    if (is.null(segments)) {                                 ## segmentation using DNA-copy if needed (must then be installed)
-    segments <- segment_DNAcopy(Log2)
-    save.txt(segments,'_segments.txt') 
-    }
-
-    segments$Value <- segments$Value-median(Log2$Value)     ## Median-centering
-    Log2$Value <- Log2$Value-median(Log2$Value)             ## Median-centering
-
-
-    allRegions <- makeRegions(Log2, alf, segments)            ## Calculates necessary data for segments (all functions are in this file)
-    save(allRegions,file='allRegions.Rdata')
-    regs <- regsFromSegs(Log2,alf,segments,bin=bin,min=5)    ## Calculates the same data for shortened segments, very useful for visualization
-    save(regs,file='shortRegions.Rdata')
-
-    #Save for TAPS_region()
-    save(regs,Log2,alf,segments,file="TAPS_plot_output.Rdata")
-
-    #Clear warnings generated previously so hopefully I can see what is actually causing the program to fail.
-    #assign("last.warning", NULL, envir = baseenv())
-
-    cat('..plotting.\n')
-
-    OverviewPlot(regs$chr,regs$start,regs$end,regs$logs,regs$scores,ideogram=NULL,
-    as.character(Log2$Chromosome),Log2$Start,Log2$Value,as.character(alf$Chromosome),alf$Start,alf$Value,
-    name=name,xlim=c(-1,1.5),ylim=c(0,1))                
-
-    ## Chromosome-wise plots for manual analysis
-    regions=allRegions$regions
-
-    karyotype_chroms(regs$chr,regs$start,regs$end,regs$logs,regs$scores,ideogram=NULL,
-      as.character(Log2$Chromosome),Log2$Start,Log2$Value,as.character(alf$Chromosome),alf$Start,
-      alf$Value,name=name,xlim=c(-1,2),ylim=c(0,1))
-
-    cat('..done\n')
-    setwd('..')
-  }, silent=T)
-}
-###
-
-###
-TAPS_call <- function(directory=NULL,#xlim=c(-1,1),ylim=c(0,1),
-  minseg=1,maxCn=12) {
-## TAPS_call outputs the total and minor allele copy numbers of all segments as a text file, and as images for visual confirmation.
-## sampleInfo_TAPS.txt must be present in each sample folder. If TAPS_plot could not make a good guess of the Log-R of copy number 2 
-## and the Log-R difference to a deletion, you must interpret the scatter plots and edit sampleInfo_TAPS.txt.
-  if (is.null(directory))
-  {
-  cat("No directory supplied, using working directory.")
-  directory = "."
-  #cat("You have not assigned a directory containing one or more folders of samples for TAPS_call to execute. \n")
-  #cat("Example: \"/user/mysamples/\" or, to run it in your current working directory, \".\" \n")
-  #directory = readline("Please supply such a directory now: ")
-  }
-
-
-  setwd(directory)
-  #subs <- getSubdirs()
-
-  if (length(grep('SampleData.txt',dir()))==1)
-    {
-    sampleData <- load.txt('SampleData.txt')
-    }
-  else
-    {
-    sampleData <- load.txt('../SampleData.txt')
-    }
-  subs=as.character(sampleData$Sample)
-  #save.txt(sampleData,file='sampleData_old.csv')
-
-  if (is.null(subs)) {
-    subs=thisSubdir()
-    setwd('..')
-  }
-  for (i in 1:length(subs)) if (sampleData$completed.analysis[i]=='no') {
-    setwd(subs[i])
-    name <- subs[i]
-    sampleInfo <- sampleData[sampleData$Sample==subs[i],]
-   if (nrow(sampleInfo)==1) {
-  
-    cat(' ..loading', subs[i])
-    Log2 <- readLog2()
-    alf <- readAlf(localDir)
-    segments <- readSegments()
-
-    #Some samples throw NA values, we simply remove these.
-    Log2=Log2[!is.nan(Log2$Value),]
-    Log2=Log2[!is.na(Log2$Value),]
-
-    alf=alf[!is.nan(alf$Value),]
-    alf=alf[!is.na(alf$Value),]
-    
-    segments <- segments[!is.nan(segments$Value),]
-
-    segments$Value <- segments$Value-median(Log2$Value) 
-    Log2$Value <- Log2$Value-median(Log2$Value)
-
-    cat(' ..processing.\n')
-  
-    load('allRegions.Rdata')                            ## These were prepared in TAPS_plot
-    #allRegions <- makeRegions(Log2, alf, segments)
-
-    t <- findCNs(Log2,alf,allRegions,dmin=0.9,maxCn=maxCn,ceiling=1,sampleInfo=sampleInfo) ## estimates the Log-R and Allelic Imbalance Ration of all variants up to maxCn
-
-    u <- setCNs(allRegions,t$int,t$ai,maxCn)            ## Assigns copy number variant for all segments
-    allRegions$regions <- u$regions
-    save.txt(u$regions,file=paste(name,'_segmentCN.txt',sep='')) ## adjacent segments with idendical copy number are merged (except over centromere) and all are saved to a text file
-    regions=allRegions$regions
-    save(t,regions,file="regions_t.Rdata")
-
-    karyotype_check(regions$Chromosome,regions$Start,regions$End,regions$log2,regions$imba,regions$Cn,regions$mCn,t,ideogram=NULL,name=name)
-
-    karyotype_chromsCN(regions$Chromosome,regions$Start,regions$End,regions$log2,
-      regions$imba,regions$Cn,regions$mCn,ideogram=NULL,
-      Log2$Chromosome,Log2$Start,Log2$Value,alf$Chromosome,
-      alf$Start,alf$Value,t,name=name,xlim=c(-1,1),ylim=c(0,1))
-    
-    cat('..done\n')
-    sampleData$completed.analysis[i] <- ''
-   } else cat('Skipped',name,'\n')
-    
-    setwd('..')
-  }
-  save.txt(sampleData,file='SampleData_new.csv')
-}
-###
-regsFromSegs <- function (Log2,alf, segments, bin=200,min=1) {
-## This function builds short segments and calcualtes their average Log-R and Allelic Imbalance Ratio.
-  rownames(Log2)=1:nrows(Log2)
-  rownames(alf)=1:nrows(alf)
-  regs=list('chr'=NULL,'start'=NULL,'end'=NULL,'logs'=NULL,'scores'=NULL,'het'=NULL,'hom'=NULL,'probes'=NULL,'snps'=NULL,'key1'=rep(NA,nrow(Log2)),'key2'=rep(NA,nrow(alf)))
-  n=nrow(segments)
-  s_check=NULL
-for (c in 1:n) { ## for every segment
-  tlog=Log2[Log2$Chromosome==segments$Chromosome[c],] ## Log-R on this chromosome
-  talf=alf[alf$Chromosome==segments$Chromosome[c],] ## Allele Freq on this chromosome
-  tlog=tlog[(tlog$Start>=segments$Start[c])&(tlog$Start<segments$End[c]),] ## Log-R on this segment
-  talf=talf[(talf$Start>=segments$Start[c])&(talf$Start<segments$End[c]),] ## Allele Freq on this segment
-
-  tsnps=nrow(talf)    ## number of snps and probes in this segment
-  tprobes=nrow(tlog)
-  tnregs=max(trunc(tsnps/bin),1) ## Further split into how many (a minimum of 1) subsegments
-  tcuts=segments$Start[c] ## The first start pos
-  tlength=segments$End[c]-segments$Start[c]    ## Length of this whole segment
-  for (i in 1:tnregs) tcuts = c(tcuts, tcuts[1]+round(i/tnregs*tlength)) ## Break the segment at these positions
-
-  for (r in 1:(tnregs)) {    ## build the subsegments
-    regs$chr=c(regs$chr,as.character(segments$Chromosome[c]))    ## Chromosome
-    s_=tcuts[r]                                                     ## Start
-    e_=tcuts[r+1]                                                 ## End
-    thisalf=talf[(talf$Start>=s_)&(talf$Start<=e_),]             ## get the Log-R values
-    thislog=tlog[(tlog$Start>=s_)&(tlog$Start<=e_),]             ## and the allele frequency
-    regs$key1[as.integer(rownames(thislog))]=length(regs$chr)    ## store their positions for fast access during plotting
-    regs$key2[as.integer(rownames(thisalf))]=length(regs$chr)    ## --"--
-    regs$logs=c( regs$logs, mean(thislog$Value) )                ## store average log ratio of this segment
-    regs$probes=c(regs$probes,nrow(thislog))                    ## store number of probes
-    regs$snps=c(regs$snps,nrow(thisalf))                        ## store number of bi-allelic probes (SNPs)
-    #regs$or_seg=c(regs$or_seg,c)    
-    regs$start=c(regs$start,s_)                                    ## store start and end positions
-    regs$end=c(regs$end,e_)
-
-    if (nrow(thisalf)>min) {                                    ## Time to calculate Allelic Imbalance Ratio (if enough SNPs)
-      t1=sort( abs(thisalf$Value-0.5) )                            ## distance from middle (het) in the allele freq pattern, ascending
-      if (length(unique(t1))>3) {                                ## do not attempt clustering with too few snps
-    xx=NULL
-    try(xx <- kmeans(t1, 2),silent=T)                            ## Attempt k-means (Hartigan-Wong: has proven very stable)
-    if (!is.null(xx)) if (min(xx$size) > 0.05*max(xx$size)) {    ## On some occations data quality is poor, requiring 5%+ heterozygous SNPs avoids most such cases.
-      xx=xx$centers
-    } else xx=NA
-      } else xx=NA      
-    } else xx=NA
-    #try (if (is.na(xx)) xx=0:1, silent=T)
-    try (if (length(xx)==0) xx=0:1, silent=T)
-    regs$scores=c(regs$scores, min(xx)/max(xx) )                ## Allelic Imbalance Ratio = inner / outer cluster.
-    regs$het=c(regs$het, min(xx))                                ## $het and $hom are no longer in use.
-    regs$hom=c(regs$hom, max(xx))
-  }
-}
-return (regs)
-}
-###
-segment_DNAcopy <- function(Log2) {
-## If segmentation is required, DNAcopy is a good choice. Must be installed. 
-  #library(DNAcopy)
-  cat('..Using DNAcopy to create segments:\n')
-  segs=NULL
-  chroms=c(as.character(1:22),'X','Y')
-  chroms=paste('chr',chroms,sep='')
-  for (c in 1:24) { # segment chromosome
-    tlog=Log2[Log2$Chromosome==chroms[c],]    ## Log-R of this chromosome
-    if (nrow(tlog)>0) {                        ## (ChrY may be absent)
-      cnaObject=segment(smooth.CNA(CNA(tlog$Value, rep(c,nrow(tlog)), tlog$Start, data.type='logratio',sampleid=paste('chr',c))), undo.splits='sdundo', undo.SD=1) ## Segments this chromosome
-      segs=rbind(segs,cnaObject$output)        ## Add result to data frame
-    }
-  }
-  colnames(segs)=c('x','Chromosome','Start','End','Markers','Value')
-  segs$Chromosome=chrom_ucsc(segs$Chromosome)    ## Adjust chromosome names to standard
-  return(segs[,2:6]) 
-}
-###
-readLog2 <- function() {
-## This function reads Log-ratio from the file "probes.txt" which must be present in the current directory.
-  Log2=NULL
-  try( Log2 <- read.csv(file='probes.txt',header=T,sep='\t'), silent=T)
-  if (!is.null(Log2)) cat(' ..found probes.txt')
-  if (is.null(Log2)) {
-    try( Log2 <- read.csv(file='_probes.txt',header=T,sep='\t'), silent=T)
-    if (!is.null(Log2)) cat(' ..found _probes.txt')
-  }
-## This code was used if Log-R must be read from .CNCHP file (Affymetrix Genotyping Console or APT). NOT currently supported downstream as .CNCHP is lacks allele-specific information for Affy 250k/500k
-  # if (is.null(Log2)) {   
-    # dir=dir()
-    # ix=grep('cnchp',dir,ignore.case=TRUE)
-    # if (length(ix)==1)
-      # cat(' ..found',dir[ix])
-      # library(affxparser)
-      # temp=readCcg(dir[ix])
-      # temp=temp$dataGroups$MultiData$dataSets$CopyNumber$table
-      # temp$Chromosome[temp$Chromosome==24]=23 # strange numbers in cnchp
-      # temp$Chromosome[temp$Chromosome==25]=24
-      # Log2 = data.frame('Chromosome'=chrom_ucsc(temp$Chromosome), 'Start'=temp$Position, 'End'=temp$Position, 'Value'=temp$Log2Ratio)
-      # save.txt(Log2, file='_probes.txt')
-      # cat(' ..wrote _probes.txt')
-      # adif=NULL
-      # try(
-    # adif <- temp$Allele,
-    # silent=T
-      # )      
-      # if (!is.null(adif)) {
-    # ix= !is.nan(adif)
-    # alf=data.frame('Chromosome'=chrom_ucsc(temp$Chromosome[ix]), 'Start'=temp$Position[ix], 'End'=temp$Position[ix], 'Value'=0.5+adif[ix])
-    # save.txt(alf, file='_snps.txt')
-    # cat(' ..wrote _snps.txt')
-      # }
-  # }
-  return (Log2) 
-}
-###
-readAlf <- function(localDir=NULL) {
-## This funciton reads allele frequency [B/(A+B)] from the file 'snps.txt', which must be present in the current directory.
-  alf=NULL
-  try( alf <- read.csv(file='snps.txt',header=T,sep='\t'), silent=T)
-  if (!is.null(alf)) cat(' ..found snps.txt')
-  if (is.null(alf)) {
-    try( alf <- read.csv(file='_snps.txt',header=T,sep='\t'), silent=T)
-    if (!is.null(alf)) cat(' ..found _snps.txt')
-  }
-  #alf=alf[alf$Value!=0.5,]
-  return (alf) 
-}
-###
-readSegments <- function() {
-## This function reads segment information in 'segments.txt' which must be present in the current folder.
-## The author recommends SNP-rank segmentation (NEXUS) or another CBS such as that in DNACopy.
-## Using a HMM is not recommended unless you have a homogenous, diploid sample. (And then there is more user-friendly software anyway.)
-  segments=NULL
-  try( segments <- read.csv(file='segments.txt',header=T,sep='\t'),silent=T)
-  if (!is.null(segments)) cat(' ..found segments.txt')
-  if (is.null(segments)) { 
-    try( segments <- read.csv(file='_segments.txt',header=T,sep='\t'),silent=T)
-    if (!is.null(segments)) cat(' ..found _segments.txt')
-  }
-  return (segments)
-}
-###
-makeRegions <- function(Log2, alf, segments,dataType='Nexus') {
-## makeRegions is similar to "regsfromsegs" except regions are not subdivided before calculation of mean Log-R and Allelic Imbalance Ratio.
-regions=segments
-regions$Chromosome=as.character(segments$Chromosome)            ## Chromosome
-regions$lengthMB=round((regions$End-regions$Start)/1000000,3)    ## length in megabases
-regions$probes=0                                                ## # of probes
-regions$snps=0                                                    ## # of bi-allelic probes (SNPs)
-#regions$log2=round(regions$Value,4)
-regions$imba=NA                                                 ## Allelic Imbalance Ratio
-regionIx=NULL                                                   ## Not currently used
-for (i in 1:nrows(regions)) {
-  log2temp=which(equals(Log2$Chromosome,regions$Chromosome[i])) ## index of Log-R (current chrom)
-  alftemp=which(equals(alf$Chromosome,regions$Chromosome[i]))    ## index of Allele frequency (current chrom)
-  
-  log2temp=log2temp [Log2$Start[log2temp]>=regions$Start[i] & Log2$Start[log2temp]<regions$End[i]]    ## index of Log-R (current segment)
-  alftemp=alftemp [alf$Start[alftemp]>=regions$Start[i] & alf$Start[alftemp]<regions$End[i]]        ## index of Allele frequency (current segment)
-  regions$probes[i]=length(log2temp)
-  regions$snps[i]=length(alftemp)
-  regionIx$Log2[[i]]=log2temp            ## indexes of Log-R and Allele Frequency are saved for future use (plot color coding)
-  regionIx$alf[[i]]=alftemp
-  log2temp=Log2$Value[log2temp]
-  regions$log2[i]=median(log2temp)
-  alftemp=alf$Value[alftemp]
-  if (length(alftemp)>3) {                ## Prepare to calculate Allelic Imbalance Ratio
-    if (dataType=="Nexus") t1=sort( abs(alftemp-0.5) ) ## distance from middle (het), ascending
-    if (dataType=="CNCHP") t1=sort( abs(alftemp)-0 ) ## This is not currently in use, was intended for analysis of SNP6 .CNCHP data.
-    if (length(unique(t1))>5) { ## Avoid calculating Allelic Imbalance Ratio unless there are several different values to cluster
-      xx=NA
-      xx=try(kmeans(t1, 2), silent=T)            ## This part is nearly identical to that of 'regsfromsegs()'
-      try( if (min(xx$size) > 0.05*max(xx$size)) {
-      xx=xx$centers
-      } else xx=NA, silent=T)    
-    } else xx=NA
-    try(regions$imba[i] <- round( min(xx)/max(xx) ,2), silent=T)
-  }
-}
-return(list('regions'=regions,'regionIx'=regionIx))
-}
-###
-equals <- function(a,b) {
-## a helper function in case factors are compared
-a <- as.character(a)
-b <- as.character(b)
-return (a==b)
-}
-###
-nrows <- function(data) dim(data)[1] ## don't ask me about this one.
-###
-load.txt <- function(file) { ## just to be rid of "sep"
-  read.csv(file,sep='\t') 
-}
-###
-save.txt <- function(data,file='temp', row.names=F, col.names=T, append=F) {   ## simplified for convenience
-  write.table(data,file=file,sep='\t',quote=F,row.names=row.names, col.names=col.names, append=append)
-}
-###
-deChrom_ucsc <- function(data) {
-##  chroms in 1:24 and chr1:chrY format conversion
-if (length(data)==0) return (data)
-keep_index=NULL
-CHR=rep(0,length(data))
-existing=unique(data)
-for (i in 1:length(existing))   {
-  temp=strsplit(as.character(existing[i]),'_')
-  temp=strsplit(temp[[1]],'chr')[[1]][2]
-  if (temp=='X') temp='23'
-  if (temp=='_X') temp='23'
-  if (temp=='Y') temp='24'
-  CHR[data==existing[i]]=as.integer(temp)
-}
-return (CHR)
-}
-###
-chrom_ucsc <- function(data) {
-## same as above, backwards
-  n=length(data)
-  out=rep("",n)
-  for (i in 1:n) {
-    temp=as.character(data[i])
-    if (temp=='23') temp='X'
-    if (temp=='24') temp='Y'
-    out[i]=paste('chr',temp, sep='')
-  }
-  return(out)
-}
-###
-subplot <- function(x, y) viewport(layout.pos.col=x, layout.pos.row=y) ## for subplots
-vplayout <- function(x, y) {        
-  grid.newpage()
-  pushViewport(viewport(layout=grid.layout(y,x)))
-}
-###
-getSubdirs <- function() {
-## Reach for the subdirectories of current directory.
-  dir=dir()
-  here=getwd()
-  n=length(dir)
-  subdir=F
-  dir_ix=rep(FALSE,n)
-  for (i in 1:n) {
-    try(setwd(dir[i]), silent=T)
-    if (getwd()!=here) { # in another directory
-      setwd('..')
-      subdir=T
-      dir_ix[i]=T
-    }
-  }
-  if (subdir) return (dir[dir_ix])
-  return (NULL)
-}
-###
-thisSubdir <- function() { 
-## get the name of this subdirectory
-  here=getwd()
-  here=strsplit(here,'/')[[1]]
-  here=here[length(here)]
-  return(here)
-}
-###
-mySorter <- function(data) {
-## sort data on $Chromosome and $Start
-  temp=data[order(data$Start),]
-  t=deChrom_ucsc(temp$Chromosome)
-  temp=temp[!is.na(t),]
-  t=t[!is.na(t)]
-  temp=temp[order(t),]
-  return (temp)
-}
-###
-probesInRegions <- function(regions,probes) {
-## This function is not in use.
-  rchr=as.character(regions$Chromosome)
-  pchr=as.character(probes$Chr)
-  nprobes=0
-
-  for (i in 1:length(rchr)) {
-    t=strsplit(rchr[i],'chr')[[1]][2]
-    here=probes[(t==pchr),] #finding probe subset on this chr
-    matching=(here$Position>=regions$Start[i]) & (here$Position<=regions$End[i])
-    nprobes[i]=sum(matching)
-  }
-  return(nprobes)
-}
-###
-getOverlap <- function(s1,e1,s2,e2) { ## This is not currently used anywhere - it is destined for group studies and pileups.
-  return(max((min(e1,e2)-max(s1,s2)),0))
-}
-###
-weightedMedian <- function(data,weights) {
-    try( if (length(weights)==0) return (NULL), silent=T)
-    try( return (median(rep(data,weights),na.rm=T)), silent=T)
-    return (NULL)
-}
-weightedMean <- function(data,weights) {
-    if (length(weights)==0) return (NULL)
-    return (mean(rep(data,weights),na.rm=T))
-}
-###
-myMeans <- function(data,k) { ## Not currently used
-    best <- 0
-    clus <- list()
-    for (i in 1:10) {
-    clus[[1]] <- kmeans(data,k)
-    var <- var(clus[[1]]$centers)
-    if (var>best) {
-        ix <- i
-        best <- var
-    }
-    }
-    return (clus[[ix]])
-}
-is.even <- function(data) {
-    return (trunc(data/2)*2 == data)
-}
-is.odd <- function(data) {
-    return (trunc(data/2)*2 != data)
-}
-neighbour <- function(data) {
-    n <- length(data)
-    dif <- data[2:n]-data[1:(n-1)]
-    return (mean(dif))
-}
-is.autosome <- function(vector) {
-    temp <- deChrom_ucsc(vector)
-    return (temp<23)
-}
-
-## 
-###
-findCNs <- function(Log2,alf,allRegions,name=thisSubdir(),dmin=0.9,maxCn=10,ceiling=1,sampleInfo=NULL) {
-## This function takes an estimate of the Log-R of copy number two (shift) and the difference in log-R between copy numbers 2 and 1 (delta)
-## (3 and 2 works too). Then, the Log-R and Allelic Imbalance Ratio of all possible copy number variants up to maxCn are estimated from
-## the Log-R and Allelic Imbalance Ratio of all the segments. This function will NOT be useful unless there is already a solid estimate 
-## of 'shift' and 'delta'. See the previous function.
-
-    shift=sampleInfo$cn2
-    delta=sampleInfo$delta
-
-    tix=NULL     #temporary index
-    int=NULL     ## contains Log-R estimate of each (total) copy number
-    ai=NULL         ## contains Allelic Imbalance Ratio estimate of each copy number variant.
-    regions <- allRegions$regions
-    regions <- regions[(is.autosome(regions$Chromosome)&regions$lengthMB>1)&(!is.na(regions$imba)),] ## will use these regions
-
-    ## likely cn2 regions sit within delta/3 from shift.
-    expectedAt <- shift
-    tix$cn2 <- abs(regions$log2 - expectedAt) < (delta/3)    ## index of likely cn2 regions
-    temp <- regions[tix$cn2,]                                ## cn2 regions
-    med <- weightedMedian(temp$log2,temp$probes)            ## improved value of Log-R at cn2 (returns NULL if theres nothing there)
-    int$cn2 <- ifelse(!is.null(med),med,expectedAt)            ## saved to int.
-
-    ## likely cn1 regions sit at about cn2 - delta:
-    d <- delta                                                 ## the (Log-R) distance to cn1
-    expectedAt <- int$cn2-d                                     ## cn1 is expected here
-    tix$cn1 <- abs(regions$log2 - expectedAt) < (d/3)        ## index of likely cn1 regions
-    temp <- regions[tix$cn1,]
-    med <- weightedMedian(temp$log2,temp$probes)
-    int$cn1 <- ifelse(!is.null(med),med,expectedAt)
-
-    ## likely cn0 regions sit below cn1 - delta:
-    d <- int$cn2-int$cn1
-    expectedAt <- int$cn1-d                                     
-    tix$cn0 <- regions$log2 < expectedAt
-    temp <- regions[tix$cn0,]
-    med <- weightedMedian(temp$log2,temp$probes)
-    int$cn0 <- ifelse(!is.null(med),med,expectedAt)
-
-    ## likely cn3 regions sit at about cn2+delta*dmin (dmin is about 0.9, the factor by which the distance between consecutive copy numbers diminish)
-    d <- delta*dmin
-    expectedAt <- int$cn2+d
-    tix$cn3 <- abs(regions$log2 - expectedAt) < (d/3)
-    temp <- regions[tix$cn3,]
-    med <- weightedMedian(temp$log2,temp$probes)
-    int$cn3 <- ifelse(!is.null(med),med,expectedAt)
-
-    ## cn4 follows at ...
-    d <- dmin*(int$cn3-int$cn2) 
-    expectedAt <- int$cn3+d
-    tix$cn4 <- abs(regions$log2 - expectedAt) < (d/4)
-    temp <- regions[tix$cn4,]
-    med <- weightedMedian(temp$log2,temp$probes)
-    int$cn4 <- ifelse(!is.null(med),med,expectedAt)
-
-    ## generalized for higher cns
-    for (cn in 5:maxCn) {
-    thisCn <- paste('cn',cn,sep='')
-    prevCn <- paste('cn',cn-1,sep='')
-    pprevCn <- paste('cn',cn-2,sep='')
-    d <- dmin*(int[prevCn][[1]]-int[pprevCn][[1]])
-    expectedAt <- int[prevCn][[1]]+d
-    tix[[thisCn]] <- abs(regions$log2 - expectedAt) < (d/5)
-    temp <- regions[tix[thisCn][[1]],]
-    med <- weightedMedian(temp$log2,temp$probes)
-    int[thisCn] <- ifelse(!is.null(med),med,expectedAt)
-    }
-
-
-    ## at cn2, find the variant clusters (normal and CNNLOH)
-    ix <- (abs(regions$log2 - int$cn2) < 0.2*(int$cn3-int$cn2) ) # taking only closely-matching segments
-    data <- regions[ix,]
-    data <- data[!is.na(data$imba),] # ...with a calculated allelic imbalance.
-
-    ix <- (abs(regions$log2 - int$cn3) < 0.2*(int$cn4-int$cn3) )
-    data3 <- regions[ix,]
-    data3 <- data[!is.na(data$imba),]
-    expectedAt <- 0.1
-    ix <- data$imba<min(data3$imba)
-    med <- weightedMedian(data$imba[ix],data$snps[ix]) ## Average allelic imbalance weighted on snp count
-    ai$cn2m1 <- ifelse (!is.null(med),med,expectedAt)
-
-    expectedAt <- sampleInfo$loh
-    ix <- abs(data$imba-sampleInfo$loh) < 0.2* (sampleInfo$loh-ai$cn2m1)
-    med <- weightedMedian(data$imba[ix],data$snps[ix]) 
-    ai$cn2m0 <- ifelse (!is.null(med),med,expectedAt)
-    
-
-  
-    ## for cn1 (and 0)
-    ix <- (abs(regions$log2 - int$cn1) < 0.2*(int$cn2-int$cn1) )
-    data <- regions[ix,]
-    data <- data[!is.na(data$imba),]
-    expectedAt <- (ai$cn2m0+ai$cn2m1)*3/5     ## Decent estimate.
[TRUNCATED]

To get the complete diff run:
    svnlook diff /svnroot/patchwork -r 175


More information about the Patchwork-commits mailing list