[Genabel-commits] r1553 - pkg/MetABEL/R
noreply at r-forge.r-project.org
noreply at r-forge.r-project.org
Tue Jan 21 16:00:30 CET 2014
Author: lckarssen
Date: 2014-01-21 16:00:30 +0100 (Tue, 21 Jan 2014)
New Revision: 1553
Modified:
pkg/MetABEL/R/metagwa.tables.R
Log:
Made one of the MetABEL R source files adhere more to our coding standards:
- remove spaces at end of line
- insert space after comma
- insert space between operators
- etc.
No functional changes.
Modified: pkg/MetABEL/R/metagwa.tables.R
===================================================================
--- pkg/MetABEL/R/metagwa.tables.R 2014-01-21 14:33:19 UTC (rev 1552)
+++ pkg/MetABEL/R/metagwa.tables.R 2014-01-21 15:00:30 UTC (rev 1553)
@@ -1,345 +1,457 @@
-"metagwa.tables" <-
-function(data.x,data.y,name.x="P1",name.y="P2",precorrect=TRUE,correct.pooled=FALSE) {
- required <- c("name","allele1","allele2","effallele","beta","sebeta")
- optional <- c("chromosome","position","n","effallelefreq")
- n.x <- names(data.x)
- essen <- match(required,n.x)
- if (any(is.na(essen))) {cat("Required fields missing in data.x:",required[is.na(essen)],"\n"); stop()}
- opt <- match(optional,n.x)
- if (any(is.na(essen))) {cat("Optional fields missing in data.x:",required[is.na(essen)],"\n");}
- n.y <- names(data.y)
- essen <- match(required,n.y)
- if (any(is.na(essen))) {cat("Required fields missing in data.y:",required[is.na(essen)],"\n"); stop()}
- opt <- match(optional,n.y)
- if (any(is.na(essen))) {cat("Optional fields missing in data.y:",required[is.na(essen)],"\n");}
-
- pos.x <- 1
- if (is.na(match("position",names(data.x)))) pos.x <- 0
- pos.y <- 1
- if (is.na(match("position",names(data.y)))) pos.y <- 0
- chr.x <- 1
- if (is.na(match("chromosome",names(data.x)))) chr.x <- 0
- chr.y <- 1
- if (is.na(match("chromosome",names(data.y)))) chr.y <- 0
+"metagwa.tables" <-
+ function(data.x, data.y, name.x="P1", name.y="P2",
+ precorrect=TRUERUE, correct.pooled=FALSE) {
- df <- merge(data.x,data.y,by="name",all.x=T,all.y=T)
+ required <- c("name", "allele1", "allele2", "effallele", "beta", "sebeta")
+ optional <- c("chromosome", "position", "n", "effallelefreq")
+ n.x <- names(data.x)
+ essen <- match(required, n.x)
-# do checks
- err1 <- which(df$allele1.x != df$effallele.x & df$allele2.x != df$effallele.x)
- if (length(err1)>0) {
- cat("Effective allele does not correspond to neither allele1 nor allele2 in",name.x,"for SNPs\n")
- print(df$name[err1])
- stop("Stopped")
- }
- err2 <- which(df$allele1.y != df$effallele.y & df$allele2.y != df$effallele.y)
- if (length(err1)>0) {
- cat("Effective allele does not correspond to neither allele1 nor allele2 in",name.x,"for SNPs\n")
- print(df$name[err1])
- stop("Stopped")
- }
- gc()
-# OK, now check that effective allele is the second one
- swall1 <- which(df$allele2.x != df$effallele.x)
- if (length(swall1)>0) {
- keep <- df$allele2.x[swall1]
- df$allele2.x[swall1] <- df$allele1.x[swall1]
- df$allele1.x[swall1] <- keep
- }
- swall2 <- which(df$allele2.y != df$effallele.y)
- if (length(swall2)>0) {
- keep <- df$allele2.y[swall2]
- df$allele2.y[swall2] <- df$allele1.y[swall2]
- df$allele1.y[swall2] <- keep
- }
- gc()
-# OK, now put TA->AT, GA->AG etc (A>T>G>C)
- recX <- which(df$allele1.x=="T" & df$allele2.x=="A")
- if (length(recX)>0) df <- swap.x(df,recX)
- recX <- which(df$allele1.x=="G" & df$allele2.x=="A")
- if (length(recX)>0) df <- swap.x(df,recX)
- recX <- which(df$allele1.x=="C" & df$allele2.x=="A")
- if (length(recX)>0) df <- swap.x(df,recX)
- recX <- which(df$allele1.x=="G" & df$allele2.x=="T")
- if (length(recX)>0) df <- swap.x(df,recX)
- recX <- which(df$allele1.x=="C" & df$allele2.x=="T")
- if (length(recX)>0) df <- swap.x(df,recX)
- recX <- which(df$allele1.x=="C" & df$allele2.x=="G")
- if (length(recX)>0) df <- swap.x(df,recX)
- recX <- which(df$allele1.y=="T" & df$allele2.y=="A")
- if (length(recX)>0) df <- swap.y(df,recX)
- recX <- which(df$allele1.y=="G" & df$allele2.y=="A")
- if (length(recX)>0) df <- swap.y(df,recX)
- recX <- which(df$allele1.y=="C" & df$allele2.y=="A")
- if (length(recX)>0) df <- swap.y(df,recX)
- recX <- which(df$allele1.y=="G" & df$allele2.y=="T")
- if (length(recX)>0) df <- swap.y(df,recX)
- recX <- which(df$allele1.y=="C" & df$allele2.y=="T")
- if (length(recX)>0) df <- swap.y(df,recX)
- recX <- which(df$allele1.y=="C" & df$allele2.y=="G")
- if (length(recX)>0) df <- swap.y(df,recX)
- gc()
-# OK, now try to unify coding between studies
- swcode <- which(df$effallele.x != df$effallele.y | df$allele1.x != df$allele1.y | df$allele2.x != df$allele2.y)
- if (length(swcode)>0) {
- df <- recode.y(df,swcode)
- recX <- which(df$allele1.y=="T" & df$allele2.y=="A")
- if (length(recX)>0) df <- swap.y(df,recX)
- recX <- which(df$allele1.y=="G" & df$allele2.y=="A")
- if (length(recX)>0) df <- swap.y(df,recX)
- recX <- which(df$allele1.y=="C" & df$allele2.y=="A")
- if (length(recX)>0) df <- swap.y(df,recX)
- recX <- which(df$allele1.y=="G" & df$allele2.y=="T")
- if (length(recX)>0) df <- swap.y(df,recX)
- recX <- which(df$allele1.y=="C" & df$allele2.y=="T")
- if (length(recX)>0) df <- swap.y(df,recX)
- recX <- which(df$allele1.y=="C" & df$allele2.y=="G")
- if (length(recX)>0) df <- swap.y(df,recX)
- }
- gc()
-# OK, now nullify ambigous coding with wrong strand info
- toerX <- which(df$allele1.x == "A" & df$allele2.x == "T" & (df$strand.x!="+" & df$strand.x!="-" & df$strand.x!="TOP" & df$strand.x!="BOT"))
- if (length(toerX)>0) {
- cat("AT coding without +/-/TOP/BOT strand info in",name.x,"\n")
- cat(length(toerX),"SNPs nullified\n")
- df$n.x[toerX] <- NA
- df$beta.x[toerX] <- NA
- }
- gc()
- toerX <- which(df$allele1.x == "G" & df$allele2.x == "C" & (df$strand.x!="+" & df$strand.x!="-" & df$strand.x!="TOP" & df$strand.x!="BOT"))
- if (length(toerX)>0) {
- cat("GC coding without +/-/TOP/BOT strand info in",name.x,"\n")
- cat(length(toerX),"SNPs nullified\n")
- df$n.x[toerX] <- NA
- df$beta.x[toerX] <- NA
- }
- gc()
- toerX <- which(df$allele1.y == "A" & df$allele2.y == "T" & (df$strand.y!="+" & df$strand.y!="-" & df$strand.y!="TOP" & df$strand.y!="BOT"))
- if (length(toerX)>0) {
- cat("AT coding without +/-/TOP/BOT strand info in",name.y,"\n")
- cat(length(toerX),"SNPs nullified\n")
- df$n.y[toerX] <- NA
- df$beta.y[toerX] <- NA
- }
- gc()
- toerX <- which(df$allele1.y == "G" & df$allele2.y == "C" & (df$strand.y!="+" & df$strand.y!="-" & df$strand.y!="TOP" & df$strand.y!="BOT"))
- if (length(toerX)>0) {
- cat("GC coding without +/-/TOP/BOT strand info in",name.y,"\n")
- cat(length(toerX),"SNPs nullified\n")
- df$n.y[toerX] <- NA
- df$beta.y[toerX] <- NA
- }
- gc()
- swcode <- which(df$effallele.x != df$effallele.y | df$allele1.x != df$allele1.y | df$allele2.x != df$allele2.y)
- if (length(swcode)>0) {
- cat("Different coding in two populaions\n")
- cat(length(swcode),"SNPs removed\n")
- tmp <- df[swcode,]
- write.csv(tmp,file="failedcode.csv",row.names=F)
- df <- df[!(c(1:(dim(df)[1])) %in% swcode),]
- }
- gc()
- swcode <- which(is.na(df$beta.x) & is.na(df$beta.y))
- if (length(swcode)>0) {
- cat("NA for betas in both populaions\n")
- cat(length(swcode),"SNPs removed\n")
- df <- df[!(c(1:(dim(df)[1])) %in% swcode),]
- }
- gc()
+ if (any(is.na(essen))) {
+ cat("Required fields missing in data.x:",
+ required[is.na(essen)], "\n")
+ stop()
+ }
-# OK
- cat("analysing ... \n")
+ opt <- match(optional, n.x)
+
+ if (any(is.na(essen))) {
+ cat("Optional fields missing in data.x:",
+ required[is.na(essen)], "\n")
+ }
+
+ n.y <- names(data.y)
+ essen <- match(required, n.y)
+
+ if (any(is.na(essen))) {
+ cat("Required fields missing in data.y:",
+ required[is.na(essen)], "\n");
+ stop()
+ }
+
+ opt <- match(optional, n.y)
+ if (any(is.na(essen))) {
+ cat("Optional fields missing in data.y:",
+ required[is.na(essen)], "\n");
+ }
+
+ pos.x <- 1
+ if (is.na(match("position", names(data.x)))) pos.x <- 0
+ pos.y <- 1
+ if (is.na(match("position", names(data.y)))) pos.y <- 0
+ chr.x <- 1
+ if (is.na(match("chromosome", names(data.x)))) chr.x <- 0
+ chr.y <- 1
+ if (is.na(match("chromosome", names(data.y)))) chr.y <- 0
+
+ df <- merge(data.x, data.y, by="name", all.x=TRUE, all.y=TRUE)
+
+ ## Do checks
+ err1 <- which(df$allele1.x != df$effallele.x &
+ df$allele2.x != df$effallele.x)
+ if (length(err1) > 0) {
+ cat("Effective allele does not correspond to neither allele1 nor allele2 in",
+ name.x, "for SNPs\n")
+ print(df$name[err1])
+ stop("Stopped")
+ }
+
+ err2 <- which(df$allele1.y != df$effallele.y &
+ df$allele2.y != df$effallele.y)
+ if (length(err1) > 0) {
+ cat("Effective allele does not correspond to neither allele1 nor allele2 in",
+ name.x, "for SNPs\n")
+ print(df$name[err1])
+ stop("Stopped")
+ }
+ gc()
+
+ ## OK, now check that effective allele is the second one
+ swall1 <- which(df$allele2.x != df$effallele.x)
+ if (length(swall1) > 0) {
+ keep <- df$allele2.x[swall1]
+ df$allele2.x[swall1] <- df$allele1.x[swall1]
+ df$allele1.x[swall1] <- keep
+ }
+
+ swall2 <- which(df$allele2.y != df$effallele.y)
+ if (length(swall2) > 0) {
+ keep <- df$allele2.y[swall2]
+ df$allele2.y[swall2] <- df$allele1.y[swall2]
+ df$allele1.y[swall2] <- keep
+ }
+ gc()
+
+ ## OK, now put TA->AT, GA->AG etc (A>T>G>C)
+ recX <- which(df$allele1.x=="T" & df$allele2.x=="A")
+ if (length(recX)>0) df <- swap.x(df, recX)
+ recX <- which(df$allele1.x=="G" & df$allele2.x=="A")
+ if (length(recX)>0) df <- swap.x(df, recX)
+ recX <- which(df$allele1.x=="C" & df$allele2.x=="A")
+ if (length(recX)>0) df <- swap.x(df, recX)
+ recX <- which(df$allele1.x=="G" & df$allele2.x=="T")
+ if (length(recX)>0) df <- swap.x(df, recX)
+ recX <- which(df$allele1.x=="C" & df$allele2.x=="T")
+ if (length(recX)>0) df <- swap.x(df, recX)
+ recX <- which(df$allele1.x=="C" & df$allele2.x=="G")
+ if (length(recX)>0) df <- swap.x(df, recX)
+ recX <- which(df$allele1.y=="T" & df$allele2.y=="A")
+ if (length(recX)>0) df <- swap.y(df, recX)
+ recX <- which(df$allele1.y=="G" & df$allele2.y=="A")
+ if (length(recX)>0) df <- swap.y(df, recX)
+ recX <- which(df$allele1.y=="C" & df$allele2.y=="A")
+ if (length(recX)>0) df <- swap.y(df, recX)
+ recX <- which(df$allele1.y=="G" & df$allele2.y=="T")
+ if (length(recX)>0) df <- swap.y(df, recX)
+ recX <- which(df$allele1.y=="C" & df$allele2.y=="T")
+ if (length(recX)>0) df <- swap.y(df, recX)
+ recX <- which(df$allele1.y=="C" & df$allele2.y=="G")
+ if (length(recX)>0) df <- swap.y(df, recX)
+ gc()
+
+ ## OK, now try to unify coding between studies
+ swcode <- which(df$effallele.x != df$effallele.y |
+ df$allele1.x != df$allele1.y |
+ df$allele2.x != df$allele2.y)
+ if (length(swcode) > 0) {
+ df <- recode.y(df, swcode)
+ recX <- which(df$allele1.y=="T" & df$allele2.y=="A")
+ if (length(recX)>0) df <- swap.y(df, recX)
+ recX <- which(df$allele1.y=="G" & df$allele2.y=="A")
+ if (length(recX)>0) df <- swap.y(df, recX)
+ recX <- which(df$allele1.y=="C" & df$allele2.y=="A")
+ if (length(recX)>0) df <- swap.y(df, recX)
+ recX <- which(df$allele1.y=="G" & df$allele2.y=="T")
+ if (length(recX)>0) df <- swap.y(df, recX)
+ recX <- which(df$allele1.y=="C" & df$allele2.y=="T")
+ if (length(recX)>0) df <- swap.y(df, recX)
+ recX <- which(df$allele1.y=="C" & df$allele2.y=="G")
+ if (length(recX)>0) df <- swap.y(df, recX)
+ }
+ gc()
+
+ ## OK, now nullify ambigous coding with wrong strand info
+ toerX <- which(df$allele1.x == "A" & df$allele2.x == "T" &
+ (df$strand.x!="+" & df$strand.x!="-" &
+ df$strand.x!="TOP" & df$strand.x!="BOT"))
+ if (length(toerX) > 0) {
+ cat("AT coding without +/-/TOP/BOT strand info in",
+ name.x, "\n")
+ cat(length(toerX), "SNPs nullified\n")
+ df$n.x[toerX] <- NA
+ df$beta.x[toerX] <- NA
+ }
+ gc()
+
+ toerX <- which(df$allele1.x == "G" & df$allele2.x == "C" &
+ (df$strand.x!="+" & df$strand.x!="-" &
+ df$strand.x!="TOP" & df$strand.x!="BOT"))
+ if (length(toerX) > 0) {
+ cat("GC coding without +/-/TOP/BOT strand info in",
+ name.x, "\n")
+ cat(length(toerX), "SNPs nullified\n")
+ df$n.x[toerX] <- NA
+ df$beta.x[toerX] <- NA
+ }
+ gc()
+
+ toerX <- which(df$allele1.y == "A" & df$allele2.y == "T" &
+ (df$strand.y!="+" & df$strand.y!="-" &
+ df$strand.y!="TOP" & df$strand.y!="BOT"))
+ if (length(toerX) > 0) {
+ cat("AT coding without +/-/TOP/BOT strand info in",
+ name.y, "\n")
+ cat(length(toerX), "SNPs nullified\n")
+ df$n.y[toerX] <- NA
+ df$beta.y[toerX] <- NA
+ }
+ gc()
+
+ toerX <- which(df$allele1.y == "G" & df$allele2.y == "C" &
+ (df$strand.y!="+" & df$strand.y!="-" &
+ df$strand.y!="TOP" & df$strand.y!="BOT"))
+ if (length(toerX) > 0) {
+ cat("GC coding without +/-/TOP/BOT strand info in",
+ name.y, "\n")
+ cat(length(toerX), "SNPs nullified\n")
+ df$n.y[toerX] <- NA
+ df$beta.y[toerX] <- NA
+ }
+ gc()
+
+ swcode <- which(df$effallele.x != df$effallele.y |
+ df$allele1.x != df$allele1.y |
+ df$allele2.x != df$allele2.y)
+ if (length(swcode) > 0) {
+ cat("Different coding in two populaions\n")
+ cat(length(swcode), "SNPs removed\n")
+ tmp <- df[swcode, ]
+ write.csv(tmp, file="failedcode.csv", row.names=FALSE)
+ df <- df[!(c(1:(dim(df)[1])) %in% swcode), ]
+ }
+ gc()
+
+ swcode <- which(is.na(df$beta.x) & is.na(df$beta.y))
+ if (length(swcode)>0) {
+ cat("NA for betas in both populaions\n")
+ cat(length(swcode), "SNPs removed\n")
+ df <- df[!(c(1:(dim(df)[1])) %in% swcode), ]
+ }
+ gc()
+
+ ## OK
+ cat("analysing ... \n")
# if (pop==pops[2]) name.x <- pops[1] else name.x <- "POOLED"
- metaout <- dometa(df,name.x=name.x,name.y=name.y,precorrect=precorrect,correct.pooled=correct.pooled,pos.x,pos.y,chr.x,chr.y)
- cat("... DONE\n")
- gc()
- metaout
+ metaout <- dometa(df, name.x=name.x, name.y=name.y,
+ precorrect=precorrect,
+ correct.pooled=correct.pooled,
+ pos.x, pos.y, chr.x, chr.y)
+ cat("... DONE\n")
+ gc()
+ metaout
}
-dometa <- function(data,name.x,name.y,precorrect=FALSE,correct.pooled=FALSE,pos.x,pos.y,chr.x,chr.y) {
+
+dometa <- function(data, name.x, name.y, precorrect=FALSE,
+ correct.pooled=FALSE, pos.x, pos.y, chr.x, chr.y) {
# prop <- 0.98
- chi2med <- qchisq(.5,1)
- b.x <- (data$beta.x)
- b.y <- (data$beta.y)
- se.x <- abs(data$sebeta.x)
- se.y <- abs(data$sebeta.y)
- if (name.x == "POOLED" & !correct.pooled) lam.x <- 1.0 else lam.x <- median(b.x*b.x/(se.x*se.x),na.rm=T)/chi2med
- lam.y <- median(b.y*b.y/(se.y*se.y),na.rm=T)/chi2med
- if (name.x != "POOLED")
- cat("Lambda",name.x,"=",lam.x,"\n")
- else if (correct.pooled) cat("Lambda POOLED previous =",lam.x,"\n")
- cat("Lambda",name.y,"=",lam.y,"\n")
- if (lam.x < 1) {warning(paste("Lambda",name.x,"<1; constrained to 1"),immediate.=T); lam.x <- 1}
- if (lam.y < 1) {warning(paste("Lambda",name.y,"<1; constrained to 1"),immediate.=T); lam.y <- 1}
- if (precorrect & name.x!="POOLED") {
- se.x <- sqrt(se.x*se.x*lam.x)
- }
- if (precorrect) {
- se.y <- sqrt(se.y*se.y*lam.y)
- }
- lam.x <- median(b.x*b.x/(se.x*se.x),na.rm=T)/chi2med
- lam.y <- median(b.y*b.y/(se.y*se.y),na.rm=T)/chi2med
- if (name.x != "POOLED")
- cat("Corrected Lambda",name.x,"=",lam.x,"\n")
- else if (correct.pooled) cat("Corrected Lambda POOLED previous =",lam.x,"\n")
- cat("Corrected Lambda",name.y,"=",lam.y,"\n")
- w2.x <- 1./(se.x*se.x)
- w2.y <- 1./(se.y*se.y)
- invsumw2 <- 1./(w2.x+w2.y)
- mbeta <- (b.x*w2.x + b.y*w2.y)*invsumw2
- mse <- sqrt(invsumw2)
- mp <- pchisq(mbeta*mbeta/(mse*mse),1,lower=F)
- if (name.x != "POOLED")
- npops <- 1*(!is.na(data$beta.x)) + 1*(!is.na(data$beta.y))
- else {
- npops <- data$npops + 1*(!is.na(data$beta.y))
- if (any(is.na(npops))) npops[is.na(npops)] <- 1*(!is.na(data$beta.x[is.na(npops)])) + 1*(!is.na(data$beta.y[is.na(npops)]))
- }
- na.x <- which(is.na(b.x) & !is.na(b.y))
- na.y <- which(is.na(b.y) & !is.na(b.x))
+ chi2med <- qchisq(.5, 1)
+ b.x <- (data$beta.x)
+ b.y <- (data$beta.y)
+ se.x <- abs(data$sebeta.x)
+ se.y <- abs(data$sebeta.y)
+
+ if (name.x == "POOLED" & !correct.pooled) {
+ lam.x <- 1.0
+ } else {
+ lam.x <- median(b.x * b.x / (se.x * se.x), na.rm=TRUE) / chi2med
+ }
+
+ lam.y <- median(b.y * b.y / (se.y * se.y), na.rm=TRUE) / chi2med
+
+ if (name.x != "POOLED")
+ cat("Lambda", name.x, "=", lam.x, "\n")
+ else if (correct.pooled)
+ cat("Lambda POOLED previous =", lam.x, "\n")
+
+ cat("Lambda", name.y, "=", lam.y, "\n")
+ if (lam.x < 1) {
+ warning(paste("Lambda", name.x, "<1; constrained to 1"),
+ immediate.=TRUE); lam.x <- 1
+ }
+ if (lam.y < 1) {
+ warning(paste("Lambda", name.y, "<1; constrained to 1"),
+ immediate.=TRUE); lam.y <- 1
+ }
+ if (precorrect & name.x!="POOLED") {
+ se.x <- sqrt(se.x*se.x*lam.x)
+ }
+ if (precorrect) {
+ se.y <- sqrt(se.y*se.y*lam.y)
+ }
+
+ lam.x <- median(b.x * b.x / (se.x * se.x), na.rm=TRUE)/chi2med
+ lam.y <- median(b.y * b.y / (se.y * se.y), na.rm=TRUE)/chi2med
+
+ if (name.x != "POOLED")
+ cat("Corrected Lambda", name.x, "=", lam.x, "\n")
+ else if (correct.pooled)
+ cat("Corrected Lambda POOLED previous =", lam.x, "\n")
+
+ cat("Corrected Lambda", name.y, "=", lam.y, "\n")
+ w2.x <- 1./(se.x * se.x)
+ w2.y <- 1./(se.y * se.y)
+ invsumw2 <- 1./(w2.x + w2.y)
+ mbeta <- (b.x*w2.x + b.y*w2.y)*invsumw2
+ mse <- sqrt(invsumw2)
+ mp <- pchisq(mbeta*mbeta/(mse*mse), 1, lower=FALSE)
+
+ if (name.x != "POOLED") {
+ npops <- 1*(!is.na(data$beta.x)) + 1*(!is.na(data$beta.y))
+ } else {
+ npops <- data$npops + 1*(!is.na(data$beta.y))
+ if (any(is.na(npops))) {
+ npops[is.na(npops)] <-
+ 1 * (!is.na(data$beta.x[is.na(npops)])) +
+ 1 * (!is.na(data$beta.y[is.na(npops)]))
+ }
+ }
+ na.x <- which(is.na(b.x) & !is.na(b.y))
+ na.y <- which(is.na(b.y) & !is.na(b.x))
# na.xy <- which(is.na(b.y) & is.na(b.x))
# if (length(na.xy)>0) stop("Number of subjects missing in both data sets...")
- out <- data.frame(name=data$name,stringsAsFactors=F)
- out$strand <- data$strand.x
- out$allele1 <- data$allele1.x
- out$allele2 <- data$allele2.x
- out$effallele <- data$effallele.x
- if (chr.x & chr.y) {
- out$chromosome <- data$chromosome.x
- out$chromosome[na.x] <- data$chromosome.y[na.x]
- } else if (chr.x) {
- out$chromosome <- data$chromosome.x
- } else if (chr.y) {
- out$chromosome <- data$chromosome.y
- }
-# print(c(pos.x,pos.y))
- if (pos.x & pos.y) {
- out$position <- data$position.x
- out$position[na.x] <- data$position.y[na.x]
- } else if (pos.x | pos.y) {
- out$position <- data$position
- }
- out$strand[na.x] <- data$strand.y[na.x]
- out$allele1[na.x] <- data$allele1.y[na.x]
- out$allele2[na.x] <- data$allele2.y[na.x]
- out$effallele[na.x] <- data$effallele.y[na.x]
- out$n <- data$n.x + data$n.y
- out$n[na.x] <- data$n.y[na.x]
- out$n[na.y] <- data$n.x[na.y]
- out$npops <- npops
- out$beta <- mbeta
- out$sebeta <- mse
- out$beta[na.x] <- b.y[na.x]
- out$sebeta[na.x] <- se.y[na.x]
- out$beta[na.y] <- b.x[na.y]
- out$sebeta[na.y] <- se.x[na.y]
+ out <- data.frame(name=data$name, stringsAsFactors=FALSE)
+ out$strand <- data$strand.x
+ out$allele1 <- data$allele1.x
+ out$allele2 <- data$allele2.x
+ out$effallele <- data$effallele.x
- na.x <- which(is.na(data$effallelefreq.x))
- na.y <- which(is.na(data$effallelefreq.y))
- nNAb <- which(!is.na(data$effallelefreq.x) & !is.na(data$effallelefreq.y))
- out$effallelefreq <- rep(NA,dim(data)[1])
- out$effallelefreq[nNAb] <- (data$effallelefreq.x[nNAb]*data$n.x[nNAb]+data$effallelefreq.y[nNAb]*data$n.y[nNAb])/(data$n.x[nNAb]+data$n.y[nNAb])
- out$effallelefreq[na.x] <- data$effallelefreq.y[na.x]
- out$effallelefreq[na.y] <- data$effallelefreq.x[na.y]
+ if (chr.x & chr.y) {
+ out$chromosome <- data$chromosome.x
+ out$chromosome[na.x] <- data$chromosome.y[na.x]
+ } else if (chr.x) {
+ out$chromosome <- data$chromosome.x
+ } else if (chr.y) {
+ out$chromosome <- data$chromosome.y
+ }
+# print(c(pos.x, pos.y))
+
+ if (pos.x & pos.y) {
+ out$position <- data$position.x
+ out$position[na.x] <- data$position.y[na.x]
+ } else if (pos.x | pos.y) {
+ out$position <- data$position
+ }
+
+ out$strand[na.x] <- data$strand.y[na.x]
+ out$allele1[na.x] <- data$allele1.y[na.x]
+ out$allele2[na.x] <- data$allele2.y[na.x]
+ out$effallele[na.x] <- data$effallele.y[na.x]
+ out$n <- data$n.x + data$n.y
+ out$n[na.x] <- data$n.y[na.x]
+ out$n[na.y] <- data$n.x[na.y]
+ out$npops <- npops
+ out$beta <- mbeta
+ out$sebeta <- mse
+ out$beta[na.x] <- b.y[na.x]
+ out$sebeta[na.x] <- se.y[na.x]
+ out$beta[na.y] <- b.x[na.y]
+ out$sebeta[na.y] <- se.x[na.y]
+
+ na.x <- which(is.na(data$effallelefreq.x))
+ na.y <- which(is.na(data$effallelefreq.y))
+ nNAb <- which(!is.na(data$effallelefreq.x) &
+ !is.na(data$effallelefreq.y))
+
+ out$effallelefreq <- rep(NA, dim(data)[1])
+ out$effallelefreq[nNAb] <- (data$effallelefreq.x[nNAb] *
+ data$n.x[nNAb] +
+ data$effallelefreq.y[nNAb] *
+ data$n.y[nNAb]) / (data$n.x[nNAb]
+ +
+ data$n.y[nNAb])
+ out$effallelefreq[na.x] <- data$effallelefreq.y[na.x]
+ out$effallelefreq[na.y] <- data$effallelefreq.x[na.y]
# print(data$effallelefreq.x)
# print(data$effallelefreq.y)
# print(out$effallelefreq)
- na.x <- which(is.na(data$call.x))
- na.y <- which(is.na(data$call.y))
- nNAb <- which(!is.na(data$call.x) & !is.na(data$call.y))
- out$call <- rep(NA,dim(data)[1])
+ na.x <- which(is.na(data$call.x))
+ na.y <- which(is.na(data$call.y))
+ nNAb <- which(!is.na(data$call.x) & !is.na(data$call.y))
+ out$call <- rep(NA, dim(data)[1])
# print(na.x)
# print(nNAb)
- out$call[nNAb] <- (data$call.x[nNAb]*data$n.x[nNAb]+data$call.y[nNAb]*data$n.y[nNAb])/(data$n.x[nNAb]+data$n.y[nNAb])
- out$call[na.x] <- data$call.y[na.x]
- out$call[na.y] <- data$call.x[na.y]
+ out$call[nNAb] <- (data$call.x[nNAb] * data$n.x[nNAb] +
+ data$call.y[nNAb] * data$n.y[nNAb]) /
+ (data$n.x[nNAb] + data$n.y[nNAb])
+ out$call[na.x] <- data$call.y[na.x]
+ out$call[na.y] <- data$call.x[na.y]
# print(data$pexhwe.y)
- if (all(data$pexhwe.x <= 1.0,na.rm=T)) {data$pexhwe.x <- qchisq(1.-data$pexhwe.x,1)}
- if (all(data$pexhwe.y <= 1.0,na.rm=T)) {data$pexhwe.y <- qchisq(1.-data$pexhwe.y,1)}
+ if (all(data$pexhwe.x <= 1.0, na.rm=TRUE)) {
+ data$pexhwe.x <- qchisq(1. - data$pexhwe.x, 1)
+ }
+ if (all(data$pexhwe.y <= 1.0, na.rm=TRUE)) {
+ data$pexhwe.y <- qchisq(1. - data$pexhwe.y, 1)
+ }
# print(data$pexhwe.y)
- na.x <- which(is.na(data$pexhwe.x))
- na.y <- which(is.na(data$pexhwe.y))
- nNAb <- which(!is.na(data$pexhwe.x) & !is.na(data$pexhwe.y))
- out$pexhwe <- rep(NA,dim(data)[1])
- out$pexhwe[nNAb] <- data$pexhwe.x[nNAb]+data$pexhwe.y[nNAb]
- out$pexhwe[na.x] <- data$pexhwe.y[na.x]
- out$pexhwe[na.y] <- data$pexhwe.x[na.y]
+ na.x <- which(is.na(data$pexhwe.x))
+ na.y <- which(is.na(data$pexhwe.y))
+ nNAb <- which(!is.na(data$pexhwe.x) & !is.na(data$pexhwe.y))
+ out$pexhwe <- rep(NA, dim(data)[1])
+ out$pexhwe[nNAb] <- data$pexhwe.x[nNAb]+data$pexhwe.y[nNAb]
+ out$pexhwe[na.x] <- data$pexhwe.y[na.x]
+ out$pexhwe[na.y] <- data$pexhwe.x[na.y]
- s <- match(substr(names(data),1,5),"obeta")
- if (any(!is.na(s))) s <- which(!is.na(s))
- if (any(!is.na(s))) out <- cbind(out,data[,names(data)[s]])
- if (name.x != "POOLED") out[,paste("obeta",name.x,sep="")] <- b.x
- out[,paste("obeta",name.y,sep="")] <- b.y
- s <- match(substr(names(data),1,3),"ose")
- if (any(!is.na(s))) s <- which(!is.na(s))
- if (any(!is.na(s))) out <- cbind(out,data[,names(data)[s]])
- if (name.x != "POOLED") out[,paste("ose",name.x,sep="")] <- se.x
- out[,paste("ose",name.y,sep="")] <- se.y
- cat("Lambda POOLED data =",median((out$beta/out$sebeta)^2,na.rm=T)/chi2med,"\n")
- out$chi2 <- out$beta*out$beta/(out$sebeta*out$sebeta)
- out$p <- pchisq(out$chi2,1,lower=F)
- out
-}
+ s <- match(substr(names(data), 1, 5), "obeta")
+ if (any(!is.na(s))) s <- which(!is.na(s))
+ if (any(!is.na(s))) out <- cbind(out, data[, names(data)[s]])
+ if (name.x != "POOLED") out[, paste("obeta", name.x,sep="")] <- b.x
-swap.y <- function(df,swcode) {
- df$beta.y[swcode] <- (-1.)*df$beta.y[swcode]
- df$effallelefreq.y[swcode] <- (1. - df$effallelefreq.y[swcode])
- a1.save <- df$allele1.y[swcode]
- a2.save <- df$allele2.y[swcode]
- df$effallele.y[swcode] <- a1.save
- df$allele1.y[swcode] <- a2.save
- df$allele2.y[swcode] <- a1.save
- df
+ out[, paste("obeta", name.y,sep="")] <- b.y
+ s <- match(substr(names(data), 1, 3), "ose")
+
+ if (any(!is.na(s))) s <- which(!is.na(s))
+ if (any(!is.na(s))) out <- cbind(out, data[, names(data)[s]])
+ if (name.x != "POOLED") out[, paste("ose", name.x,sep="")] <- se.x
+
+ out[, paste("ose", name.y,sep="")] <- se.y
+ cat("Lambda POOLED data =", median((out$beta / out$sebeta)^2,
+ na.rm=TRUE) / chi2med, "\n")
+ out$chi2 <- out$beta * out$beta / (out$sebeta * out$sebeta)
+ out$p <- pchisq(out$chi2, 1, lower=FALSE)
+ out
}
-swap.x <- function(df,swcode) {
- df$beta.x[swcode] <- (-1.)*df$beta.x[swcode]
- df$effallelefreq.x[swcode] <- (1. - df$effallelefreq.x[swcode])
- a1.save <- df$allele1.x[swcode]
- a2.save <- df$allele2.x[swcode]
- df$effallele.x[swcode] <- a1.save
- df$allele1.x[swcode] <- a2.save
- df$allele2.x[swcode] <- a1.save
- df
+
+swap.y <- function(df, swcode) {
+ df$beta.y[swcode] <- (-1.) * df$beta.y[swcode]
+ df$effallelefreq.y[swcode] <- (1. - df$effallelefreq.y[swcode])
+ a1.save <- df$allele1.y[swcode]
+ a2.save <- df$allele2.y[swcode]
+ df$effallele.y[swcode] <- a1.save
+ df$allele1.y[swcode] <- a2.save
+ df$allele2.y[swcode] <- a1.save
+ df
}
-recode.y <- function(df,swcode) {
- a2t <- swcode[df$effallele.y[swcode]=="A"]
- t2a <- swcode[df$effallele.y[swcode]=="T"]
- g2c <- swcode[df$effallele.y[swcode]=="G"]
- c2g <- swcode[df$effallele.y[swcode]=="C"]
- df$effallele.y[a2t] <-"T"
- df$effallele.y[t2a] <-"A"
- df$effallele.y[g2c] <-"C"
- df$effallele.y[c2g] <-"G"
- a2t <- swcode[df$allele1.y[swcode]=="A"]
- t2a <- swcode[df$allele1.y[swcode]=="T"]
- g2c <- swcode[df$allele1.y[swcode]=="G"]
- c2g <- swcode[df$allele1.y[swcode]=="C"]
- df$allele1.y[a2t] <-"T"
- df$allele1.y[t2a] <-"A"
- df$allele1.y[g2c] <-"C"
- df$allele1.y[c2g] <-"G"
- a2t <- swcode[df$allele2.y[swcode]=="A"]
- t2a <- swcode[df$allele2.y[swcode]=="T"]
- g2c <- swcode[df$allele2.y[swcode]=="G"]
- c2g <- swcode[df$allele2.y[swcode]=="C"]
- df$allele2.y[a2t] <-"T"
- df$allele2.y[t2a] <-"A"
- df$allele2.y[g2c] <-"C"
- df$allele2.y[c2g] <-"G"
- f2r <- swcode[df$strand.y[swcode]=="+"]
- r2f <- swcode[df$strand.y[swcode]=="-"]
- t2b <- swcode[df$strand.y[swcode]=="TOP"]
- b2t <- swcode[df$strand.y[swcode]=="BOT"]
- df$strand.y[f2r] <- "-"
- df$strand.y[r2f] <- "+"
- df$strand.y[t2b] <- "BOT"
- df$strand.y[b2t] <- "TOP"
- if (any(is.na(df$strand.y[swcode]))) warning("NAs in strand!")
- if (any(df$strand.y[swcode]!="+" & df$strand.y[swcode]!="-" & df$strand.y[swcode]!="TOP" & df$strand.y[swcode]!="BOT")) warning("Unrecognised coding in strand!")
- df
+
+swap.x <- function(df, swcode) {
+ df$beta.x[swcode] <- (-1.) * df$beta.x[swcode]
+ df$effallelefreq.x[swcode] <- (1. - df$effallelefreq.x[swcode])
+ a1.save <- df$allele1.x[swcode]
+ a2.save <- df$allele2.x[swcode]
+ df$effallele.x[swcode] <- a1.save
+ df$allele1.x[swcode] <- a2.save
+ df$allele2.x[swcode] <- a1.save
+ df
}
+recode.y <- function(df, swcode) {
+ a2t <- swcode[df$effallele.y[swcode]=="A"]
+ t2a <- swcode[df$effallele.y[swcode]=="T"]
+ g2c <- swcode[df$effallele.y[swcode]=="G"]
+ c2g <- swcode[df$effallele.y[swcode]=="C"]
+ df$effallele.y[a2t] <-"T"
+ df$effallele.y[t2a] <-"A"
+ df$effallele.y[g2c] <-"C"
+ df$effallele.y[c2g] <-"G"
+ a2t <- swcode[df$allele1.y[swcode]=="A"]
+ t2a <- swcode[df$allele1.y[swcode]=="T"]
+ g2c <- swcode[df$allele1.y[swcode]=="G"]
+ c2g <- swcode[df$allele1.y[swcode]=="C"]
+ df$allele1.y[a2t] <-"T"
+ df$allele1.y[t2a] <-"A"
+ df$allele1.y[g2c] <-"C"
+ df$allele1.y[c2g] <-"G"
+ a2t <- swcode[df$allele2.y[swcode]=="A"]
+ t2a <- swcode[df$allele2.y[swcode]=="T"]
+ g2c <- swcode[df$allele2.y[swcode]=="G"]
+ c2g <- swcode[df$allele2.y[swcode]=="C"]
+ df$allele2.y[a2t] <-"T"
+ df$allele2.y[t2a] <-"A"
+ df$allele2.y[g2c] <-"C"
+ df$allele2.y[c2g] <-"G"
+ f2r <- swcode[df$strand.y[swcode]=="+"]
+ r2f <- swcode[df$strand.y[swcode]=="-"]
+ t2b <- swcode[df$strand.y[swcode]=="TOP"]
+ b2t <- swcode[df$strand.y[swcode]=="BOT"]
+ df$strand.y[f2r] <- "-"
+ df$strand.y[r2f] <- "+"
+ df$strand.y[t2b] <- "BOT"
+ df$strand.y[b2t] <- "TOP"
+
+ if (any(is.na(df$strand.y[swcode]))) {
+ warning("NAs in strand!")
+ }
+
+ if (any(df$strand.y[swcode]!="+" &
+ df$strand.y[swcode]!="-" &
+ df$strand.y[swcode]!="TOP" &
+ df$strand.y[swcode]!="BOT")) {
+ warning("Unrecognised coding in strand!")
+ }
+ df
+}
More information about the Genabel-commits
mailing list