[Robast-commits] r301 - in pkg/RobLoxBioC: . inst/scripts man
noreply at r-forge.r-project.org
noreply at r-forge.r-project.org
Wed Apr 29 12:13:12 CEST 2009
Author: stamats
Date: 2009-04-29 12:13:11 +0200 (Wed, 29 Apr 2009)
New Revision: 301
Modified:
pkg/RobLoxBioC/DESCRIPTION
pkg/RobLoxBioC/inst/scripts/IlluminaExample.R
pkg/RobLoxBioC/man/0RobLoxBioC-package.Rd
Log:
reduced the Illumina example code to relevant part ...
Modified: pkg/RobLoxBioC/DESCRIPTION
===================================================================
--- pkg/RobLoxBioC/DESCRIPTION 2009-04-25 09:42:50 UTC (rev 300)
+++ pkg/RobLoxBioC/DESCRIPTION 2009-04-29 10:13:11 UTC (rev 301)
@@ -1,6 +1,6 @@
Package: RobLoxBioC
Version: 0.5
-Date: 2009-04-25
+Date: 2009-04-29
Title: Infinitesimally robust estimators for preprocessing omics data
Description: Functions for the determination of optimally robust
influence curves and estimators for preprocessing omics data,
Modified: pkg/RobLoxBioC/inst/scripts/IlluminaExample.R
===================================================================
--- pkg/RobLoxBioC/inst/scripts/IlluminaExample.R 2009-04-25 09:42:50 UTC (rev 300)
+++ pkg/RobLoxBioC/inst/scripts/IlluminaExample.R 2009-04-29 10:13:11 UTC (rev 301)
@@ -272,210 +272,8 @@
###########################################################
-## Read targets information
-targets <- read.table("./SpikeInData/spike_targets.txt",header=TRUE)
-
-
-###########################################################
-## Read text and tif files from all BeadChips from spike experiment
-## sharpen images, no local background subtraction
-bld.sharpen.nobgc <- readIllumina(path = "./SpikeInData", textType=".csv",
- arrayNames=targets$ArrayNo,
- imageManipulation="sharpen",
- backgroundMethod="none")
-save(bld.sharpen.nobgc, file="bld.sharpen.nobgc.rda")
-rm(bld.sharpen.nobgc); gc()
-
-
-
-## Take the values found in the bead level text files as sharpened, subtracted intensities
-bld.sharpen.subtract <- readIllumina(path = "./SpikeInData", textType=".csv",
- arrayNames=targets$ArrayNo,
- useImages=FALSE,
- backgroundMethod="none")
-save(bld.sharpen.subtract, file="bld.sharpen.subtract.rda")
-rm(bld.sharpen.subtract); gc()
-
-## sharpen images, with local background subtraction and
-## normal-exponential convolution model (avoids negative background
-## corrected intensities)
-bld.sharpen.normexp <- readIllumina(path = "./SpikeInData", textType=".csv",
- arrayNames=targets$ArrayNo,
- useImages=FALSE,
- backgroundMethod="normexp")
-save(bld.sharpen.normexp, file="bld.sharpen.normexp.rda")
-rm(bld.sharpen.normexp); gc()
-
-
-## no sharpening, no local background subtraction
-bld.nosharpen.nobgc <- readIllumina(path = "./SpikeInData", textType=".csv",
- arrayNames=targets$ArrayNo,
- imageManipulation="none",
- backgroundMethod="none")
-save(bld.nosharpen.nobgc, file="bld.nosharpen.nobgc.rda")
-rm(bld.nosharpen.nobgc); gc()
-
-## no sharpening with local background subtraction
-bld.nosharpen.subtract <- readIllumina(path = "./SpikeInData", textType=".csv",
- arrayNames=targets$ArrayNo,
- imageManipulation="none",
- backgroundMethod="subtract")
-save(bld.nosharpen.subtract, file="bld.nosharpen.subtract.rda")
-rm(bld.nosharpen.subtract); gc()
-
-
-###########################################################
-## Figure 1 - plots of foreground and background intensities
-###########################################################
-## Read text and tif files from first array only (to save time and memory)
-## sharpen images, no local background subtraction
-B <- readIllumina(path = "./SpikeInData", textType=".csv",
- arrayNames=targets$ArrayNo[1:12],
- imageManipulation="sharpen",
- backgroundMethod="none")
-
-## sharpen images, with local background subtraction
-B.bc <- backgroundCorrect(B, method="subtract")
-
-## Setup colours
-pal <- brewer.pal(6, "Set1")
-
-ylim <- c(8.5,10.5)
-names <- c("Array1 Strip1", "Array1 Strip2", "Array2 Strip1", "Array2 Strip2",
- "Array3 Strip1", "Array3 Strip2", "Array4 Strip1", "Array4 Strip2",
- "Array5 Strip1", "Array5 Strip2", "Array6 Strip1", "Array6 Strip2")
-
-pdf("Figure1.pdf", width=12, height=8)
-par(mfrow=c(1,2), mar=c(6,3,1.5,0.15), oma=c(0,1,0,0))
-boxplotBeads(B, names=names, las=2, outline=FALSE, col=rep(pal, each=2),
- main="Raw Foreground", what="G", ylim=ylim, ylab="", medlwd=1)
-boxplotBeads(B, names=names, las=2, outline=FALSE, col=rep(pal, each=2),
- main="Raw Background", what="Gb", ylim=ylim, ylab="", medlwd=1)
-mtext("log2 intensity", side=2, outer=TRUE)
-dev.off()
-
-
-###########################################################
-## Derive low-level properties of Illumina data quoted in the text
-## Number of negative corrected intensities from first BeadChip
-negs <- numeric(12)
-for(i in 1:12)
- negs[i] <- sum(B.bc[[i]]$G<0)
-
-## Percentage negative from first BeadChip - sharpened
-round(negs/numBeads(B.bc)*100,2)
-# [1] 1.04 0.76 0.93 0.68 0.92 0.68 0.94 0.82 0.84 0.74 0.90 0.82
-# old figures [1] 7.87 7.53 7.32 6.95 6.94 6.29 6.18 5.91 5.54 5.25 4.80 4.92
-
-## quartiles
-quantile(round(negs/numBeads(B.bc)*100,2), c(0,0.25,0.5,0.75,1))
-# 0% 25% 50% 75% 100%
-# 0.6800 0.7550 0.8300 0.9225 1.0400
-
-## How many missing beads (contaminated left-hand edge - 0 intensities)
-artefact = NULL
-for(i in 1:12)
- artefact[i] = sum(B[[i]]$G==0)
-
-## Percentage negative from first BeadChip - sharpened
-round(artefact/numBeads(B)*100,2)
-# [1] 7.32 7.21 6.88 6.71 6.49 6.08 5.70 5.56 5.16 4.96 4.42 4.67
-
-## quartiles
-quantile(round(artefact/numBeads(B)*100,2), c(0,0.25,0.5,0.75,1))
-# 0% 25% 50% 75% 100%
-# 4.4200 5.1100 5.8900 6.7525 7.3200
-
-## Calculate median background level on both original and log2-scale
-bgorig = NULL
-bglog = NULL
-for(i in 1:12) {
- bgorig[[i]] = getArrayData(B, wh="Gb", log=FALSE, array=i)
- bglog[[i]] = getArrayData(B, wh="Gb", log=TRUE, array=i)
-}
-
-## log2-scale
-sapply(bglog, FUN="median", na.rm=TRUE)
-# [1] 9.308339 9.308339 9.308339 9.306062 9.306062 9.306062 9.308339 9.306062 9.308339 9.306062 9.308339 9.308339
-
-## original scale
-sapply(bgorig, FUN="median")
-# [1] 634 634 634 633 633 633 634 633 634 633 634 634
-
-
-###########################################################
-## Number of negative corrected intensities from full data set - sharpened
-load("bld.sharpen.subtract.rda")
-narrays <- length(arrayNames(bld.sharpen.subtract))
-
-negs <- numeric(12)
-for(i in 1:narrays)
- negs[i] = sum(bld.sharpen.subtract[[i]]$G<0)
-
-## Percentage negative from full data set
-round(negs/numBeads(bld.sharpen.subtract)*100,2)
-# [1] 1.04 0.76 0.93 0.68 0.92 0.68 0.94 0.82 0.84 0.74 0.90 0.82 0.29 0.13 0.54
-#[16] 0.15 0.41 0.22 0.29 0.15 0.38 0.14 0.41 0.13 0.61 0.19 0.46 0.13 0.52 0.18
-#[31] 0.38 0.21 0.38 0.17 0.40 0.16 0.29 0.10 0.48 0.11 0.28 0.12 0.32 0.15 0.39
-#[46] 0.15 0.32 0.14 0.50 0.17 0.46 0.26 0.64 0.22 0.57 0.16 0.52 0.34 0.49 0.22
-#[61] 0.61 0.14 0.63 0.29 0.61 0.20 0.54 0.14 0.39 0.20 0.48 0.19 0.50 0.28 0.79
-#[76] 0.32 0.75 0.30 0.87 0.21 0.63 0.32 0.81 0.30 0.22 0.18 0.41 0.22 0.57 0.14
-#[91] 0.46 0.23 0.22 0.12 0.55 0.13
-
-# quartiles
-quantile(round(negs/numBeads(bld.sharpen.subtract)*100,2), c(0,0.25,0.5,0.75,1))
-# 0% 25% 50% 75% 100%
-#0.100 0.190 0.320 0.555 1.040
-
-
-###########################################################
-## Create bead summary data for each pre-processing option using
-## the default Illumina method of removing outliers > 3 MADs
-## from the median before averaging the intensities
-
-## sharpen images, no local background subtraction
-load("bld.sharpen.nobgc.rda")
-bsd.sharpen.nobgc <- createBeadSummaryData(bld.sharpen.nobgc, log=TRUE,
- imagesPerArray=2, what="G",
- method="illumina", n=3)
-rm(bld.sharpen.nobgc)
-
-## sharpen images, with local background subtraction
-load("bld.sharpen.subtract.rda")
-bsd.sharpen.subtract <- createBeadSummaryData(bld.sharpen.subtract, log=TRUE,
- imagesPerArray=2, what="G",
- method="illumina", n=3)
-bsd.sharpen.subtract.raw <- createBeadSummaryData(bld.sharpen.subtract, log=FALSE,
- imagesPerArray=2, what="G",
- method="illumina", n=3)
-rm(bld.sharpen.subtract)
-
-
-## sharpen images, with local background subtraction and
-## normal-exponential convolution model (avoids negative background
-## corrected intensities)
-load("bld.sharpen.normexp.rda")
-bsd.sharpen.normexp <- createBeadSummaryData(bld.sharpen.normexp, log=TRUE,
- imagesPerArray=2, what="G",
- method="illumina", n=3)
-rm(bld.sharpen.normexp)
-
-## no sharpening, no local background subtraction
-load("bld.nosharpen.nobgc.rda")
-bsd.nosharpen.nobgc <- createBeadSummaryData(bld.nosharpen.nobgc, log=TRUE, imagesPerArray=2, what="G", method="illumina", n=3)
-rm(bld.nosharpen.nobgc)
-
-## no sharpening, with local background subtraction
-load("bld.nosharpen.subtract.rda")
-bsd.nosharpen.subtract = createBeadSummaryData(bld.nosharpen.subtract, log=TRUE, imagesPerArray=2, what="G", method="illumina", n=3)
-rm(bld.nosharpen.subtract)
-
-## Save summary data
-save(bsd.sharpen.nobgc, bsd.sharpen.subtract, bsd.sharpen.normexp, bsd.nosharpen.nobgc, bsd.nosharpen.subtract, file="bsd.log.ill.rda")
-
-
-###########################################################
-## Figure 2 - plot results from simulation study (varying number of outliers
+## Modified Figure 2 of Dunning et al (2008)
+## plot results from simulation study (varying number of outliers
## were introduced at the saturation level to assess how many outliers can be
## tolerated before serious bias is introduced in to the ## expression measures)
@@ -840,857 +638,3 @@
points(x, log2(avevar.rmx.log.array2)[sel], type="l", col=5, lwd=lwd)
mtext("% outliers simulated", side=1, outer=TRUE)
dev.off()
-
-###########################################################
-## Figure 3 - plot intensities and variances for spike probes only
-pal <- brewer.pal(6, "Set1")
-
-## Read in spike intensities (file supplied by Illumina) to retrieve IDs
-spikecsv <- read.csv("spikeins_profile.csv")
-spikeIDs <- as.character(spikecsv$ProbeID)
-
-nspikes <- length(spikeIDs)
-nprobes <- nrow(exprs(bsd.sharpen.subtract))
-
-spikeInd <- NULL
-for(i in 1:33) {
- spikeInd <- c(spikeInd, seq(1:nprobes)[rownames(exprs(bsd.sharpen.subtract))==spikeIDs[i]])
- }
-
-
-concs <- rep(c(1000,300,100,30,10,3),4)
-concs <- c(concs, rep(c(1,0.3,0.1,0.03,0.01,0),4))
-concs <- rep(concs,3)
-
-o <- order(concs, decreasing=TRUE)
-
-allExprs <- cbind(exprs(bsd.sharpen.nobgc), exprs(bsd.sharpen.subtract),exprs(bsd.sharpen.normexp))
-
-#se.exprs(bsd.nosharp.nobgc) = assayData(bsd.nosharp.nobgc)$BeadStDev
-#se.exprs(bsd.sharpen.subtract) = assayData(bsd.sharpen.subtract)$BeadStDev
-
-allVar <- cbind(NoBeads(bsd.sharpen.nobgc)*(se.exprs(bsd.sharpen.nobgc))^2,
- NoBeads(bsd.sharpen.subtract)*(se.exprs(bsd.sharpen.subtract))^2,
- NoBeads(bsd.sharpen.normexp)*(se.exprs(bsd.sharpen.normexp))^2)
-
-allVar <- log2(allVar)
-
-
-pdf("Figure3.pdf", width=12, height=16)
-
-par(mfrow=c(2,1), mar=c(2,4,1.5,0.15))
-boxplot(as.data.frame(allExprs[spikeInd,o]), col=c(rep(pal[1],4), rep(pal[2],4),rep(pal[3],4)),
- outline=FALSE, xlab="", ylab="Non-normalised log2 intensity", names=NULL, xaxt="n",
- ylim=c(4,16), main="A",font.main=2,medlwd=1)
-abline(v=c(13,25,37,49,61,73,85,97,109,121,133)-0.5, lty=2)
-legend("topright", c("No background","Subtract","Normexp"), fill=c(pal[1], pal[2], pal[3]), bg="white")
-axis(side=1,at=c(13,25,37,49,61,73,85,97,109,121,133,145)-6,
- labels=c("1000pM", "300pM", "100pM", "30pM", "10pM", "3pM", "1pM", "0.3pM", "0.1pM", "0.03pM", "0.01pM", "0pM"))
-boxplot(as.data.frame(allVar[spikeInd,o]), col=c(rep(pal[1],4), rep(pal[2],4),rep(pal[3],4)),
- outline=FALSE, xlab="", ylab="Non-normalised log2 variance",names=NULL, xaxt="n",
- main="B", font.main=2,medlwd=1)
-abline(v=c(13,25,37,49,61,73,85,97,109,121,133)-0.5, lty=2)
-axis(side=1,at=c(13,25,37,49,61,73,85,97,109,121,133,145)-6,
- labels=c("1000pM", "300pM", "100pM", "30pM", "10pM", "3pM", "1pM", "0.3pM", "0.1pM", "0.03pM", "0.01pM", "0pM"))
-dev.off()
-
-
-###########################################################
-## Figure 4 - plot intensities of the spike probes at each concentration
-## Average over replicate arrays, separate line for each probe
-## Use sharpened, background corrected intensities
-targets <- read.table("spike_targets.txt",header=TRUE)
-## set up colours
-cls <- rainbow(33)
-
-arraynms <- as.character(targets$ArrayNo)
-narrays <- length(arraynms)
-
-## set up design matrix for linear model
-design <- cbind(as.numeric(targets$SpikeConc==1000),
- as.numeric(targets$SpikeConc==300),
- as.numeric(targets$SpikeConc==100),
- as.numeric(targets$SpikeConc==30),
- as.numeric(targets$SpikeConc==10),
- as.numeric(targets$SpikeConc==3),
- as.numeric(targets$SpikeConc==1),
- as.numeric(targets$SpikeConc==0.3),
- as.numeric(targets$SpikeConc==0.1),
- as.numeric(targets$SpikeConc==0.03),
- as.numeric(targets$SpikeConc==0.01),
- as.numeric(targets$SpikeConc==0))[seq(1,narrays, by=2),]
-
-colnames(design) <- paste("pm", unique(targets$SpikeConc), sep="")
-
-## set up contrasts matrix for the most similar concentration differences
-conts <- makeContrasts(pm1000-pm300, pm300-pm100, pm100-pm30, pm30-pm10,
- pm10-pm3, pm3-pm1, pm1-pm0.3, pm0.3-pm0.1, pm0.3-pm0.1,
- pm0.1-pm0.03,pm0.03-pm0.01, levels=design)
-
-spikecsv <- read.csv("spikeins_profile.csv")
-spikeIDs <- as.character(spikecsv$ProbeID)
-targetnames <- as.character(spikecsv$TargetID)
-
-nspikes <- length(spikeIDs)
-nprobes <- nrow(exprs(bsd.sharpen.subtract))
-
-
-spikeInd <- NULL
-for(i in 1:nspikes) {
- spikeInd <- c(spikeInd, seq(1:nprobes)[rownames(exprs(bsd.sharpen.subtract))==spikeIDs[i]])
-}
-
-
-concs <- rep(c(1000,300,100,30,10,3),4)
-concs <- c(concs, rep(c(1,0.3,0.1,0.03,0.01,0),4))
-concs <- rep(c(1000,300,100,30,10,3),4)
-concs <- c(concs, rep(c(1,0.3,0.1,0.03,0.01,0),4))
-
-o <- order(concs, decreasing=TRUE)
-
-l1 <- lmFit(exprs(bsd.sharpen.subtract), design=design)
-
-pdf("Figure4.pdf", width=12, height=12)
-par(mar=c(4,4,1.5,0.15))
-plot(l1$coeff[spikeInd[1],], ylim=range(5.5,16), type="n", ylab="log2 intensity",
- xlab="Concentration",xaxt="n", cex=0.6, main="Non-normalised intensities of the spike genes")
-for(i in 1:33){
- text(labels=targetnames[i],x=1:12,y=as.numeric(l1$coeff[spikeInd[i],]),col=cls[i],pch=16,cex=0.8)
- plot.smooth.line(1:12, as.numeric(l1$coeff[spikeInd[i],]), col=cls[i])
-}
-axis(side=1, at=1:12, labels=c("1000pM", "300pM", "100pM", "30pM", "10pM", "3pM", "1pM",
- "0.3pM", "0.1pM", "0.03pM", "0.01pM", "0pM"))
-dev.off()
-
-
-###########################################################
-## Figure 5 - MA plots of data
-## without background adjustment, with background subtracted data
-## and background corrected and normalised data
-spikeInd <- NULL
-for(i in 1:nspikes) {
- spikeInd <- c(spikeInd, seq(1:nprobes)[rownames(exprs(bsd.sharpen.subtract))==spikeIDs[i]])
-}
-
-controlInfo <- read.table("SpikesAveragedControls.txt", sep="\t", header=T)
-
-controlInfo <- controlInfo[,grep("Signal", colnames(controlInfo))]
-
-# 7th row are theaverages of the negative control beads
-negControls <- controlInfo[7,]
-negControls <- as.numeric(negControls)
-negControlsMatrix <- matrix(rep(negControls, each=nprobes), nprobes, narrays/2)
-
-B <- exprs(bsd.sharpen.subtract.raw) - negControlsMatrix
-bsd.bgnorm <- assayDataElementReplace(bsd.sharpen.subtract.raw, "exprs",log2(B))
-
-MAplot <- function(bsd, a1, a2,...){
- M <- exprs(bsd)[,a1] - exprs(bsd)[,a2]
- A <- 0.5*(exprs(bsd)[,a1] + exprs(bsd)[,a2])
-
- smoothScatter(A, M, pch=16, cex=0.5,...)
- points(A[spikeInd], M[spikeInd], col="red", pch=16)
-
- abline(h=c(log2(3),0), lty=2)
-}
-
-xlim <- c(-1,16)
-ylim <- c(-3,3)
-
-
-pdf("Figure5.pdf", width=12, height=8)
-par(mfrow=c(1,3))
-par(oma=c(1.1,1.1,0,0))
-par(mar=c(2.2,2.2,1.5,0.15))
- MAplot(bsd.sharpen.nobgc, 24, 25,xlim=xlim, ylim=ylim, ylab="", xlab="")
- mtext("A", side=3, font=2)
-
- MAplot(bsd.sharpen.subtract,24, 25, xlim=xlim, ylim=ylim, ylab="", xlab="")
- mtext("B", side=3, font=2)
-
- MAplot(bsd.bgnorm, 24, 25, xlim=xlim, ylim=ylim, ylab="", xlab="", nbin=512)
- mtext("C", side=3, font=2)
-
- mtext("log-ratios", side=2, outer=TRUE)
- mtext("average expression", side=1, outer=TRUE)
-dev.off()
-
-
-## percentage negative values after background normalisation
-apply(B<0, 2, FUN="sum", na.rm=TRUE)/dim(B)[1]*100
-
-round(quantile(apply(B<0, 2, FUN="sum", na.rm=TRUE)/dim(B)[1]*100, c(0,0.25,0.5,0.75,1)),2)
-
-
-###########################################################
-## Fitting the linear model to separate background methods
-
-## Make design matrix with 12 columns, one for each concentration
-targets <- read.table("spike_targets.txt",header=TRUE)
-
-arraynms <- as.character(targets$ArrayNo)
-narrays <- length(arraynms)
-
-design <- cbind(as.numeric(targets$SpikeConc==1000),
- as.numeric(targets$SpikeConc==300),
- as.numeric(targets$SpikeConc==100),
- as.numeric(targets$SpikeConc==30),
- as.numeric(targets$SpikeConc==10),
- as.numeric(targets$SpikeConc==3),
- as.numeric(targets$SpikeConc==1),
- as.numeric(targets$SpikeConc==0.3),
- as.numeric(targets$SpikeConc==0.1),
- as.numeric(targets$SpikeConc==0.03),
- as.numeric(targets$SpikeConc==0.01),
- as.numeric(targets$SpikeConc==0))[seq(1,narrays, by=2),]
-
-colnames(design) <- paste("pm", unique(targets$SpikeConc), sep="")
-
-## Make pairwise contrasts between all concentrations
-contr.1000 <- makeContrasts(pm1000-pm300, pm1000-pm100, pm1000-pm30, pm1000-pm10, pm1000-pm3,
- pm1000-pm1, pm1000-pm0.3, pm1000-pm0.1, pm1000-pm0.03,
- pm1000-pm0.01, pm1000-pm0, levels=design)
-contr.300 <- makeContrasts(pm300-pm100, pm300-pm30, pm300-pm10, pm300-pm3, pm300-pm1,
- pm300-pm0.3, pm300-pm0.1, pm300-pm0.03, pm300-pm0.01, pm300-pm0,
- levels=design)
-contr.100 <- makeContrasts(pm100-pm30, pm100-pm10, pm100-pm3, pm100-pm1, pm100-pm0.3,
- pm100-pm0.1, pm100-pm0.03, pm100-pm0.01, pm100-pm0, levels=design)
-contr.30 <- makeContrasts(pm30-pm10, pm30-pm3, pm30-pm1, pm30-pm0.3, pm30-pm0.1, pm30-pm0.03,
- pm30-pm0.01, pm30-pm0, levels=design)
-contr.10 <- makeContrasts(pm10-pm3, pm10-pm1, pm10-pm0.3, pm10-pm0.1, pm10-pm0.03,levels=design)
-contr.3 <- makeContrasts(pm3-pm1, pm3-pm0.3, pm3-pm0.1, pm3-pm0.03, levels=design)
-contr.1 <- makeContrasts(pm1-pm0.3, pm1-pm0.1, pm1-pm0.03, levels=design)
-contr.0.3 <- makeContrasts(pm0.3-pm0.1, pm0.3-pm0.03, levels=design)
-contr.0.1 <- makeContrasts(pm0.1-pm0.03, levels=design)
-
-contr.all <- cbind(contr.1000,contr.300,contr.100,contr.30,contr.10, contr.3, contr.1, contr.0.3, contr.0.1)
-
-
-## Normalise and fit the model to sharpened subtracted data
-fit.sharpen.subtract <- lmFit(normalizeQuantiles(exprs(bsd.sharpen.subtract)), design=design)
-contr.sharpen.subtract <- contrasts.fit(fit.sharpen.subtract, contr.all)
-
-## Empirical bayes smoothing of variances
-contr.sharpen.subtract <- eBayes(contr.sharpen.subtract)
-
-## Calculate a weight for each bead type using the standard error
-## and number of replicate observations
-w.probes <- se.exprs(bsd.sharpen.subtract)^2*bsd.sharpen.subtract at assayData$NoBeads
-w.probes <- 1/w.probes
-
-##Re-fit the model using weights
-fit.pwts.subtract <- lmFit(normalizeQuantiles(exprs(bsd.sharpen.subtract)),
- design=design, weights=w.probes)
-contr.pwts <- contrasts.fit(fit.pwts.subtract, contr.all)
-contr.pwts.subtract <- eBayes(contr.pwts)
-
-fit.sharpen.nobgc <- lmFit(normalizeQuantiles(exprs(bsd.sharpen.nobgc)), design=design)
-contr.sharpen.nobgc <- contrasts.fit(fit.sharpen.nobgc, contr.all)
-contr.sharpen.nobgc <- eBayes(contr.sharpen.nobgc)
-w.probes <- se.exprs(bsd.sharpen.nobgc)^2*bsd.sharpen.nobgc at assayData$NoBeads
-
-w.probes <- 1/w.probes
-fit.pwts.nobgc <- lmFit(normalizeQuantiles(exprs(bsd.sharpen.nobgc)), design=design, weights=w.probes)
-
-contr.pwts <- contrasts.fit(fit.pwts.nobgc, contr.all)
-contr.pwts.nobgc <- eBayes(contr.pwts)
-
-fit.sharpen.normexp <- lmFit(normalizeQuantiles(exprs(bsd.sharpen.normexp)), design=design)
-contr.sharpen.normexp <- contrasts.fit(fit.sharpen.normexp, contr.all)
-contr.sharpen.normexp <- eBayes(contr.sharpen.normexp)
-w.probes <- se.exprs(bsd.sharpen.normexp)^2*bsd.sharpen.normexp at assayData$NoBeads
-
-fit.pwts.normexp <- lmFit(normalizeQuantiles(exprs(bsd.sharpen.normexp)), design=design, weights=w.probes)
-
-contr.pwts <- contrasts.fit(fit.pwts.normexp, contr.all)
-contr.pwts.normexp <- eBayes(contr.pwts)
-
-spikeInd <- match(spikeIDs, rownames(contr.sharpen.subtract$t))
-nonspikes <- setdiff(1:nrow(exprs(bsd.sharpen.subtract)), spikeInd)
-
-
-###########################################################
-## Figure 6 - log-odds and log-ratios for
-## contrast between 3pM and 1pM concentrations
-
-methods <- c("no bg", "no bg + weights", "subtract", "subtract + weights",
- "normexp", "normexp + weights")
-col <- c(pal[1],pal[1],pal[2],pal[2],pal[3],pal[3])
-
-transcol <- NULL
-for(i in 1:6){
- temp <- col2rgb(col[i])
- transcol[i] <- rgb(temp[1]/256, temp[2]/256,temp[3]/256, alpha=0.3)
-}
-
-
-pdf("Figure6.pdf",width=12, height=8)
-par(mar=c(8,4,1,0.15))
-par(mfrow=c(1,2))
-i <- 44
-cont <- cbind(contr.sharpen.nobgc$lods[,i],contr.pwts.nobgc$lods[,i],contr.sharpen.subtract$lods[,i], contr.pwts.subtract$lods[,i],contr.sharpen.normexp$lods[,i], contr.pwts.normexp$lods[,i])
-boxplot(as.data.frame(cont[spikeInd,]), main="A", ylim=c(-10,95), names=methods, las=2,
- ylab="log-odds", col=col,cex.lab=0.8)
-boxplot(as.data.frame(cont[nonspikes,]), main="", names=rep("",6), las=2, col=transcol,
- add=TRUE, pch="x")
-
-cont <- cbind(contr.sharpen.nobgc$coeff[,i], contr.pwts.nobgc$coeff[,i], contr.sharpen.subtract$coeff[,i],
- contr.pwts.subtract$coeff[,i], contr.sharpen.normexp$coeff[,i], contr.pwts.normexp$coeff[,i])
-boxplot(as.data.frame(cont[spikeInd,]), main="B", # log-ratios for contrast 3pM - 1pM",
- ylim=c(-0.2,2.5), names=methods, las=2, ylab="log-ratios", col=col,cex.lab=0.8)
-boxplot(as.data.frame(cont[nonspikes,]), main="", names=rep("",6), las=2, add=TRUE,pch="x",col=transcol)
-abline(h=log2(3),lty=2)
-dev.off()
-
-
-###########################################################
-## Figure 7 - plot intensities of negative control probes
-
-## Read in control probe summary profile (output by BeadStudio)
-controlInfo <- read.table("FullSpikeControls.txt", sep="\t",header=TRUE)
-
-## select negatives
-nControls <- which(controlInfo[,1] == "negative")
-controlInfo <- controlInfo[nControls,]
-
-sigCols <- grep("Signal", colnames(controlInfo))
-
-set.seed(13092007)
-s <- sample(1:nrow(controlInfo), 50)
-subset <- controlInfo[s,sigCols]
-
-require(Biobase)
-med <- rowMedians(subset.1)
-ordmed <- order(med)
-subset <- subset[ordmed,]
-s <- s[ordmed]
-
-pdf("Figure7.pdf",width=12, height=8)
-par(mar=c(5.2,4,1.5,0.15))
-boxplot(as.data.frame(log2(t(subset))), ylab="log2 intensity",
- las=2, xlab="", names=controlInfo[s,2], col=pal[1],
- main="Distribution of intensities for 50 negative controls across all arrays")
-dev.off()
-
-
-###########################################################
-## Figure 8 - Plotting array intensities in terms of base composition
-## quantile normalised, background corrected data is used
-
-## Read annotation file and process sequences (stored in column 8)
-
-## Plot for probes on strip 1 only
-M1 <- read.table("NewAnnotationM1.txt", sep="\t", header=T,fill=TRUE,quote="")
-
-M1probestring <- gsub(", ","",toString(as.character(M1[,8])))
-M1probestring <- (strsplit(M1probestring,split=NULL))[[1]]
-
-## Ade, Gdes, Cdes, Tdes are the A, G, C, T
-## matrices referred to in Method
-## ie 48,000 rows by 50 columns
-M1Ades <- as.numeric(M1probestring=="A")
-M1Ades <- matrix(M1Ades,ncol=50,byrow=T)
-M1Gdes <- as.numeric(M1probestring=="G")
-M1Gdes <- matrix(M1Gdes,ncol=50,byrow=T)
-M1Cdes <- as.numeric(M1probestring=="C")
-M1Cdes <- matrix(M1Cdes,ncol=50,byrow=T)
-M1Tdes <- as.numeric(M1probestring=="T")
-M1Tdes <- matrix(M1Tdes,ncol=50,byrow=T)
-rownames(M1Ades) <- rownames(M1Gdes) <- rownames(M1Cdes) <- rownames(M1Tdes) <- M1[,3]
-
-## Match the ids on the array to their position
-## in the annotation file
-m <- match(M1[,3],rownames(exprs(bsd.sharpen.subtract)))
-
-
-M1Acount <- rowSums((M1Ades)
-M1Gcount <- rowSums(M1Gdes)
-M1Ccount <- rowSums(M1Cdes)
-M1Tcount <- rowSums(M1Tdes)
-M1GCcount <- M1Gcount + M1Ccount
-M1GCdes <- M1Gdes + M1Cdes
-
-##Get probes found on the first strip only
-M1oddstrips <- which(nchar(M1[,3]) < 9)
-
-temp <- normalizeQuantiles(exprs(bsd.sharpen.subtract))[m,]
-
-var <- log2(NoBeads(bsd.sharpen.subtract)*se.exprs(bsd.sharpen.subtract)^2)[m,1]
-
-pdf("Figure8.pdf", width=12, height=14)
-par(mar=c(4,4,1,0.15))
-par(mfrow=c(4,2))
-boxplot(temp[M1oddstrips,1]~M1Acount[M1oddstrips], col=pal[1], outline=FALSE,
- ylim=c(4,11), main="Number of A bases", ylab="Normalised log2 intensity",
- varwidth=TRUE)
-abline(h=6.38)
-boxplot(temp[M1oddstrips,1]~M1Tcount[M1oddstrips], col=pal[1], outline=FALSE,
- ylim=c(4,11), main="Number of T bases", ylab="Normalised log2 intensity",
- varwidth=TRUE)
-abline(h=6.38)
-boxplot(temp[M1oddstrips,1]~M1Gcount[M1oddstrips], col=pal[1], outline=FALSE,
- ylim=c(4,11), main="Number of G bases", ylab="Normalised log2 intensity",
- varwidth=TRUE)
-abline(h=6.38)
-boxplot(temp[M1oddstrips,1]~M1Ccount[M1oddstrips], col=pal[1], outline=FALSE,
- ylim=c(4,11), main="Number of C bases", ylab="Normalised log2 intensity",
- varwidth=TRUE)
-abline(h=6.38)
-boxplot(temp[M1oddstrips,1]~M1GCcount[M1oddstrips], col=pal[1], outline=FALSE,
- ylim=c(4,11), main="GC count", ylab="Normalised log2 intensity",
- varwidth=TRUE)
-abline(h=6.38)
-boxplot(var[M1oddstrips]~M1GCcount[M1oddstrips], col=pal[1], outline=FALSE,
- main="GC Count", ylab="log2 variance", varwidth=TRUE)
-abline(h=-2.17)
-
-## Fit the model to estimate fhe relative effect of having
-## an A, C, G at each position
-l <- lm(temp[M1oddstrips,1]~M1Ades[M1oddstrips,]+M1Cdes[M1oddstrips,]+M1Gdes[M1oddstrips,])
-plot(1:50,l$coefficients[2:51], type="n", xlab="Base Position",
- main="Effect of base on log2 intensity relative to T", ylab="Effect Size",
- ylim=c(-0.4,0.4))
-text(1:50,l$coefficients[2:51], labels="A", col="red")
-text(1:50,l$coefficients[52:101], labels="C", col="blue")
-text(1:50,l$coefficients[102:151], labels="G", col="green")
-abline(h=0)
-
-l <- lm(var[M1oddstrips]~M1Ades[M1oddstrips,]+M1Cdes[M1oddstrips,]+M1Gdes[M1oddstrips,])
-plot(1:50,l$coefficients[2:51], type="n", xlab="Base Position",
- main="Effect of base on log2 variance relative to T", ylim=c(-0.4,0.4), ylab="Effect Size")
-text(1:50,l$coefficients[2:51], labels="A", col="red")
-text(1:50,l$coefficients[52:101], labels="C", col="blue")
-text(1:50,l$coefficients[102:151], labels="G", col="green")
-abline(h=0)
-
-dev.off()
-
-
-###########################################################
-## Plot the discrepancy in ranking of log-odds due to GC count
-pdf("Figure9.pdf", width=12, height=8)
-par(mar=c(4,4,1.5,0.15))
-
-## Choose the contrast 3pM - 1pM
-colnames(contr.sharpen.subtract$lods)[44]
-i <- 44
-rk <- rank(contr.sharpen.subtract$lods[,i])[m]
-boxplot(rk[M1oddstrips]~M1GCcount[M1oddstrips], col=pal[1],
- ylab="Ranking based on log-odds", xlab="GC count",
- main="3pM - 1pM", varwidth=TRUE, outline=TRUE, pch="x")
-dev.off()
-
-###############################################################################
-################## Annotation Details ##########################
-###############################################################################
-## Bead types on Strip 2 have 9 character probe IDs, everything else is on strip1
-
-M1oddstrips <- which(nchar(M1[,3]) < 9)
-M1evenstrips <- which(nchar(M1[,3]) == 9)
-
-
-## Column 21 contains comments about the mapping of the probe
-## -including intronic and intergenic matches
-intronicGenes <- grep("Intronic", M1[,21])
-intergenicGenes <- grep("Intergenic", M1[,21])
-
-probeCats <- matrix(nrow=3, ncol=2)
-colnames(probeCats) <- c("First Strip", "Second Strip")
-rownames(probeCats) <- c("Total","Intronic", "Intergenic")
-
-probeCats[1,1] <- length(M1oddstrips)
-probeCats[1,2] <- length(M1evenstrips)
-
-probeCats[2,1] <- length(intersect(intronicGenes, M1oddstrips))
-probeCats[2,2] <- length(intersect(intronicGenes, M1evenstrips))
-
-probeCats[3,1] <- length(intersect(intergenicGenes, M1oddstrips))
-probeCats[3,2] <- length(intersect(intergenicGenes, M1evenstrips))
-
-## Percentage of intronic probes on 1st strip
-probeCats[2,1]/probeCats[1,1]*100
-
-## Percentage of intergenic probes on 1st strip
-probeCats[3,1]/probeCats[1,1]*100
-
-## Percentage of intronic probes on 2nd strip
-probeCats[2,2]/probeCats[1,2]*100
-
-## Percentage of intergenic probes on 2nd strip
-probeCats[3,2]/probeCats[1,2]*100
-
-##Numbers of genes found on strip 1 and strip 2 and their intersection
-strip1Transcripts <- M1[M1oddstrips,5]
-strip2Transcripts <- M1[M1evenstrips,5]
-
-strip1Genes <- unique(M1[M1oddstrips,4])
-strip2Genes <- unique(M1[M1evenstrips,4])
-
-length(intersect(strip1Genes, strip2Genes))
-
-
-###############################################################################
-######Probe Thermodynamics calculations
-###############################################################################
-## thermodynamic properties calculated using Citation: Kibbe WA.
-## 'OligoCalc: an online oligonucleotide properties calculator'. (2007) Nucleic Acids Res. 35(webserver issue): May 25.
-
-## Load pre-calculated thermodynamic properties
-spikeTP <- read.csv("spikeThermoProperties.csv")
-
-## calculate thermodynamic properties
-deltaH <- c(-8,-5.6,-6.6,-8.2,-6.6,-8.8,-9.4,-11.8,-10.5,-10.9)
-deltaS <- c(-21.9,-15.2,-18.4,-21,-16.4,-23.5,-25.5,-29,-26.4,-28.4)
-deltaG <- c(-1.2,-0.9,-0.9,-1.7,-1.5,-1.5,-1.5,-2.8,-2.3,-2.1)
-
-deltaH <- deltaH[c(1,2,3,5,4,7,6,8,10,9)]
-deltaS <- deltaS[c(1,2,3,5,4,7,6,8,10,9)]
-deltaG <- deltaG[c(1,2,3,5,4,7,6,8,10,9)]
-
-listH <- rep(NA,17)
-listS <- rep(NA,17)
-listG <- rep(NA,17)
-
-for(k in 1:17){
- strvec <- (strsplit(as.character(spikeTP$Seq[k]),"")[[1]])
- dh <- 0
- ds <- 0
- dg <- 0
- for(i in 1:(length(strvec)-1)){
- if(strvec[i]=="G"){
- if(strvec[i+1]=="G"){
- ind <- 9
- }
- if(strvec[i+1]=="C"){
- ind <- 10
- }
- if(strvec[i+1]=="A"){
- ind <- 7
- }
- if(strvec[i+1]=="T"){
- ind<-6
- }
- }
- if(strvec[i]=="C"){
- if(strvec[i+1]=="G"){
- ind <- 8
- }
- if(strvec[i+1]=="C"){
- ind <- 9
- }
- if(strvec[i+1]=="A"){
- ind <- 5
- }
- if(strvec[i+1]=="T"){
- ind <- 4
- }
- }
- if(strvec[i]=="A"){
- if(strvec[i+1]=="G"){
- ind <- 4
- }
- if(strvec[i+1]=="C"){
- ind <- 6
- }
- if(strvec[i+1]=="A"){
- ind <- 1
- }
- if(strvec[i+1]=="T"){
- ind <- 2
- }
- }
- if(strvec[i]=="T"){
- if(strvec[i+1]=="G"){
- ind <- 5
- }
- if(strvec[i+1]=="C"){
- ind <- 7
- }
- if(strvec[i+1]=="A"){
- ind <- 3
- }
- if(strvec[i+1]=="T"){
- ind <- 1
- }
- }
- dh <- dh+deltaH[ind]
- dg <- dg+deltaG[ind]
- ds <- ds+deltaS[ind]
- }
-#dh<-dh+0.6
-#dg<-dg+3.4
-#ds<-ds-9
-
- listH[k] <- dh
- listS[k] <- ds
- listG[k] <- dg
-}
-
-listH <- -listH
-listG <- -listG
-listS <- -listS
-
-##check both sets agree
-listS - spiketherm[,8]
-listH - spiketherm[,7]
-listG - spiketherm[,6]
-
-
-###########################################################
-## Draw figure 10
-pdf("Figure10.pdf", width=10, height=10)
-par(mar=c(5.2,4,1.5,0.15))
-plot(cor(cbind(spikeTP[,2:13], spikeTP[,15:19]))[1:12,14], axes=F, xlab="concentration",
[TRUNCATED]
To get the complete diff run:
svnlook diff /svnroot/robast -r 301
More information about the Robast-commits
mailing list