[Genabel-commits] r1553 - pkg/MetABEL/R

noreply at r-forge.r-project.org noreply at r-forge.r-project.org
Tue Jan 21 16:00:30 CET 2014


Author: lckarssen
Date: 2014-01-21 16:00:30 +0100 (Tue, 21 Jan 2014)
New Revision: 1553

Modified:
   pkg/MetABEL/R/metagwa.tables.R
Log:
Made one of the MetABEL R source files adhere more to our coding standards:
- remove spaces at end of line
- insert space after comma
- insert space between operators
- etc.

No functional changes.


Modified: pkg/MetABEL/R/metagwa.tables.R
===================================================================
--- pkg/MetABEL/R/metagwa.tables.R	2014-01-21 14:33:19 UTC (rev 1552)
+++ pkg/MetABEL/R/metagwa.tables.R	2014-01-21 15:00:30 UTC (rev 1553)
@@ -1,345 +1,457 @@
-"metagwa.tables" <- 
-function(data.x,data.y,name.x="P1",name.y="P2",precorrect=TRUE,correct.pooled=FALSE) {
-	required <- c("name","allele1","allele2","effallele","beta","sebeta")
-	optional <- c("chromosome","position","n","effallelefreq")
-	n.x <- names(data.x)
-	essen <- match(required,n.x)
-	if (any(is.na(essen))) {cat("Required fields missing in data.x:",required[is.na(essen)],"\n"); stop()}
-	opt <- match(optional,n.x)
-	if (any(is.na(essen))) {cat("Optional fields missing in data.x:",required[is.na(essen)],"\n");}
-	n.y <- names(data.y)
-	essen <- match(required,n.y)
-	if (any(is.na(essen))) {cat("Required fields missing in data.y:",required[is.na(essen)],"\n"); stop()}
-	opt <- match(optional,n.y)
-	if (any(is.na(essen))) {cat("Optional fields missing in data.y:",required[is.na(essen)],"\n");}
-	
-	pos.x <- 1
-	if (is.na(match("position",names(data.x)))) pos.x <- 0
-	pos.y <- 1
-	if (is.na(match("position",names(data.y)))) pos.y <- 0
-	chr.x <- 1
-	if (is.na(match("chromosome",names(data.x)))) chr.x <- 0
-	chr.y <- 1
-	if (is.na(match("chromosome",names(data.y)))) chr.y <- 0
+"metagwa.tables" <-
+    function(data.x, data.y, name.x="P1", name.y="P2",
+             precorrect=TRUERUE, correct.pooled=FALSE) {
 
-	df <- merge(data.x,data.y,by="name",all.x=T,all.y=T)
+        required <- c("name", "allele1", "allele2", "effallele", "beta", "sebeta")
+        optional <- c("chromosome", "position", "n", "effallelefreq")
+        n.x <- names(data.x)
+        essen <- match(required, n.x)
 
-# do checks
-	err1 <- which(df$allele1.x != df$effallele.x & df$allele2.x != df$effallele.x)
-	if (length(err1)>0) {
-		cat("Effective allele does not correspond to neither allele1 nor allele2 in",name.x,"for SNPs\n")
-		print(df$name[err1])
-		stop("Stopped")
-	}
-	err2 <- which(df$allele1.y != df$effallele.y & df$allele2.y != df$effallele.y)
-	if (length(err1)>0) {
-		cat("Effective allele does not correspond to neither allele1 nor allele2 in",name.x,"for SNPs\n")
-		print(df$name[err1])
-		stop("Stopped")
-	}
-	gc()
-# OK, now check that effective allele is the second one
-	swall1 <- which(df$allele2.x != df$effallele.x)
-	if (length(swall1)>0) {
-		keep <- df$allele2.x[swall1]
-		df$allele2.x[swall1] <- df$allele1.x[swall1]
-		df$allele1.x[swall1] <- keep
-	}
-	swall2 <- which(df$allele2.y != df$effallele.y)
-	if (length(swall2)>0) {
-		keep <- df$allele2.y[swall2]
-		df$allele2.y[swall2] <- df$allele1.y[swall2]
-		df$allele1.y[swall2] <- keep
-	}
-	gc()
-# OK, now put TA->AT, GA->AG etc (A>T>G>C)
-	recX <- which(df$allele1.x=="T" & df$allele2.x=="A")
-	if (length(recX)>0) df <- swap.x(df,recX)
-	recX <- which(df$allele1.x=="G" & df$allele2.x=="A")
-	if (length(recX)>0) df <- swap.x(df,recX)
-	recX <- which(df$allele1.x=="C" & df$allele2.x=="A")
-	if (length(recX)>0) df <- swap.x(df,recX)
-	recX <- which(df$allele1.x=="G" & df$allele2.x=="T")
-	if (length(recX)>0) df <- swap.x(df,recX)
-	recX <- which(df$allele1.x=="C" & df$allele2.x=="T")
-	if (length(recX)>0) df <- swap.x(df,recX)
-	recX <- which(df$allele1.x=="C" & df$allele2.x=="G")
-	if (length(recX)>0) df <- swap.x(df,recX)
-	recX <- which(df$allele1.y=="T" & df$allele2.y=="A")
-	if (length(recX)>0) df <- swap.y(df,recX)
-	recX <- which(df$allele1.y=="G" & df$allele2.y=="A")
-	if (length(recX)>0) df <- swap.y(df,recX)
-	recX <- which(df$allele1.y=="C" & df$allele2.y=="A")
-	if (length(recX)>0) df <- swap.y(df,recX)
-	recX <- which(df$allele1.y=="G" & df$allele2.y=="T")
-	if (length(recX)>0) df <- swap.y(df,recX)
-	recX <- which(df$allele1.y=="C" & df$allele2.y=="T")
-	if (length(recX)>0) df <- swap.y(df,recX)
-	recX <- which(df$allele1.y=="C" & df$allele2.y=="G")
-	if (length(recX)>0) df <- swap.y(df,recX)
-	gc()
-# OK, now try to unify coding between studies
-	swcode <- which(df$effallele.x != df$effallele.y | df$allele1.x != df$allele1.y | df$allele2.x != df$allele2.y)
-	if (length(swcode)>0) {
-		df <- recode.y(df,swcode)
-		recX <- which(df$allele1.y=="T" & df$allele2.y=="A")
-		if (length(recX)>0) df <- swap.y(df,recX)
-		recX <- which(df$allele1.y=="G" & df$allele2.y=="A")
-		if (length(recX)>0) df <- swap.y(df,recX)
-		recX <- which(df$allele1.y=="C" & df$allele2.y=="A")
-		if (length(recX)>0) df <- swap.y(df,recX)
-		recX <- which(df$allele1.y=="G" & df$allele2.y=="T")
-		if (length(recX)>0) df <- swap.y(df,recX)
-		recX <- which(df$allele1.y=="C" & df$allele2.y=="T")
-		if (length(recX)>0) df <- swap.y(df,recX)
-		recX <- which(df$allele1.y=="C" & df$allele2.y=="G")
-		if (length(recX)>0) df <- swap.y(df,recX)
-	}
-	gc()
-# OK, now nullify ambigous coding with wrong strand info
-	toerX <- which(df$allele1.x == "A" & df$allele2.x == "T" & (df$strand.x!="+" & df$strand.x!="-" & df$strand.x!="TOP" & df$strand.x!="BOT"))
-	if (length(toerX)>0) {
-		cat("AT coding without +/-/TOP/BOT strand info in",name.x,"\n")
-		cat(length(toerX),"SNPs nullified\n")
-		df$n.x[toerX] <- NA
-		df$beta.x[toerX] <- NA
-	}
-	gc()
-	toerX <- which(df$allele1.x == "G" & df$allele2.x == "C" & (df$strand.x!="+" & df$strand.x!="-" & df$strand.x!="TOP" & df$strand.x!="BOT"))
-	if (length(toerX)>0) {
-		cat("GC coding without +/-/TOP/BOT strand info in",name.x,"\n")
-		cat(length(toerX),"SNPs nullified\n")
-		df$n.x[toerX] <- NA
-		df$beta.x[toerX] <- NA
-	}
-	gc()
-	toerX <- which(df$allele1.y == "A" & df$allele2.y == "T" & (df$strand.y!="+" & df$strand.y!="-" & df$strand.y!="TOP" & df$strand.y!="BOT"))
-	if (length(toerX)>0) {
-		cat("AT coding without +/-/TOP/BOT strand info in",name.y,"\n")
-		cat(length(toerX),"SNPs nullified\n")
-		df$n.y[toerX] <- NA
-		df$beta.y[toerX] <- NA
-	}
-	gc()
-	toerX <- which(df$allele1.y == "G" & df$allele2.y == "C" & (df$strand.y!="+" & df$strand.y!="-" & df$strand.y!="TOP" & df$strand.y!="BOT"))
-	if (length(toerX)>0) {
-		cat("GC coding without +/-/TOP/BOT strand info in",name.y,"\n")
-		cat(length(toerX),"SNPs nullified\n")
-		df$n.y[toerX] <- NA
-		df$beta.y[toerX] <- NA
-	}
-	gc()
-	swcode <- which(df$effallele.x != df$effallele.y | df$allele1.x != df$allele1.y | df$allele2.x != df$allele2.y)
-	if (length(swcode)>0) {
-		cat("Different coding in two populaions\n")
-		cat(length(swcode),"SNPs removed\n")
-		tmp <- df[swcode,]
-		write.csv(tmp,file="failedcode.csv",row.names=F)
-		df <- df[!(c(1:(dim(df)[1])) %in% swcode),]
-	}
-	gc()
-	swcode <- which(is.na(df$beta.x) & is.na(df$beta.y))
-	if (length(swcode)>0) {
-		cat("NA for betas in both populaions\n")
-		cat(length(swcode),"SNPs removed\n")
-		df <- df[!(c(1:(dim(df)[1])) %in% swcode),]
-	}
-	gc()
+        if (any(is.na(essen))) {
+            cat("Required fields missing in data.x:",
+                required[is.na(essen)], "\n")
+            stop()
+        }
 
-# OK
-	cat("analysing ... \n")
+        opt <- match(optional, n.x)
+
+        if (any(is.na(essen))) {
+            cat("Optional fields missing in data.x:",
+                required[is.na(essen)], "\n")
+        }
+
+        n.y <- names(data.y)
+        essen <- match(required, n.y)
+
+        if (any(is.na(essen))) {
+            cat("Required fields missing in data.y:",
+                required[is.na(essen)], "\n");
+            stop()
+        }
+
+        opt <- match(optional, n.y)
+        if (any(is.na(essen))) {
+            cat("Optional fields missing in data.y:",
+                required[is.na(essen)], "\n");
+        }
+
+        pos.x <- 1
+        if (is.na(match("position", names(data.x)))) pos.x <- 0
+        pos.y <- 1
+        if (is.na(match("position", names(data.y)))) pos.y <- 0
+        chr.x <- 1
+        if (is.na(match("chromosome", names(data.x)))) chr.x <- 0
+        chr.y <- 1
+        if (is.na(match("chromosome", names(data.y)))) chr.y <- 0
+
+        df <- merge(data.x, data.y, by="name", all.x=TRUE, all.y=TRUE)
+
+        ## Do checks
+        err1 <- which(df$allele1.x != df$effallele.x &
+                      df$allele2.x != df$effallele.x)
+        if (length(err1) > 0) {
+                cat("Effective allele does not correspond to neither allele1 nor allele2 in",
+                    name.x, "for SNPs\n")
+                print(df$name[err1])
+                stop("Stopped")
+        }
+
+        err2 <- which(df$allele1.y != df$effallele.y &
+                      df$allele2.y != df$effallele.y)
+        if (length(err1) > 0) {
+                cat("Effective allele does not correspond to neither allele1 nor allele2 in",
+                    name.x, "for SNPs\n")
+                print(df$name[err1])
+                stop("Stopped")
+        }
+        gc()
+
+        ## OK, now check that effective allele is the second one
+        swall1 <- which(df$allele2.x != df$effallele.x)
+        if (length(swall1) > 0) {
+                keep <- df$allele2.x[swall1]
+                df$allele2.x[swall1] <- df$allele1.x[swall1]
+                df$allele1.x[swall1] <- keep
+        }
+
+        swall2 <- which(df$allele2.y != df$effallele.y)
+        if (length(swall2) > 0) {
+                keep <- df$allele2.y[swall2]
+                df$allele2.y[swall2] <- df$allele1.y[swall2]
+                df$allele1.y[swall2] <- keep
+        }
+        gc()
+
+        ## OK, now put TA->AT, GA->AG etc (A>T>G>C)
+        recX <- which(df$allele1.x=="T" & df$allele2.x=="A")
+        if (length(recX)>0) df <- swap.x(df, recX)
+        recX <- which(df$allele1.x=="G" & df$allele2.x=="A")
+        if (length(recX)>0) df <- swap.x(df, recX)
+        recX <- which(df$allele1.x=="C" & df$allele2.x=="A")
+        if (length(recX)>0) df <- swap.x(df, recX)
+        recX <- which(df$allele1.x=="G" & df$allele2.x=="T")
+        if (length(recX)>0) df <- swap.x(df, recX)
+        recX <- which(df$allele1.x=="C" & df$allele2.x=="T")
+        if (length(recX)>0) df <- swap.x(df, recX)
+        recX <- which(df$allele1.x=="C" & df$allele2.x=="G")
+        if (length(recX)>0) df <- swap.x(df, recX)
+        recX <- which(df$allele1.y=="T" & df$allele2.y=="A")
+        if (length(recX)>0) df <- swap.y(df, recX)
+        recX <- which(df$allele1.y=="G" & df$allele2.y=="A")
+        if (length(recX)>0) df <- swap.y(df, recX)
+        recX <- which(df$allele1.y=="C" & df$allele2.y=="A")
+        if (length(recX)>0) df <- swap.y(df, recX)
+        recX <- which(df$allele1.y=="G" & df$allele2.y=="T")
+        if (length(recX)>0) df <- swap.y(df, recX)
+        recX <- which(df$allele1.y=="C" & df$allele2.y=="T")
+        if (length(recX)>0) df <- swap.y(df, recX)
+        recX <- which(df$allele1.y=="C" & df$allele2.y=="G")
+        if (length(recX)>0) df <- swap.y(df, recX)
+        gc()
+
+        ## OK, now try to unify coding between studies
+        swcode <- which(df$effallele.x != df$effallele.y |
+                        df$allele1.x != df$allele1.y |
+                        df$allele2.x != df$allele2.y)
+        if (length(swcode) > 0) {
+                df <- recode.y(df, swcode)
+                recX <- which(df$allele1.y=="T" & df$allele2.y=="A")
+                if (length(recX)>0) df <- swap.y(df, recX)
+                recX <- which(df$allele1.y=="G" & df$allele2.y=="A")
+                if (length(recX)>0) df <- swap.y(df, recX)
+                recX <- which(df$allele1.y=="C" & df$allele2.y=="A")
+                if (length(recX)>0) df <- swap.y(df, recX)
+                recX <- which(df$allele1.y=="G" & df$allele2.y=="T")
+                if (length(recX)>0) df <- swap.y(df, recX)
+                recX <- which(df$allele1.y=="C" & df$allele2.y=="T")
+                if (length(recX)>0) df <- swap.y(df, recX)
+                recX <- which(df$allele1.y=="C" & df$allele2.y=="G")
+                if (length(recX)>0) df <- swap.y(df, recX)
+        }
+        gc()
+
+        ## OK, now nullify ambigous coding with wrong strand info
+        toerX <- which(df$allele1.x == "A" & df$allele2.x == "T" &
+                       (df$strand.x!="+" & df$strand.x!="-" &
+                        df$strand.x!="TOP" & df$strand.x!="BOT"))
+        if (length(toerX) > 0) {
+                cat("AT coding without +/-/TOP/BOT strand info in",
+                    name.x, "\n")
+                cat(length(toerX), "SNPs nullified\n")
+                df$n.x[toerX] <- NA
+                df$beta.x[toerX] <- NA
+        }
+        gc()
+
+        toerX <- which(df$allele1.x == "G" & df$allele2.x == "C" &
+                       (df$strand.x!="+" & df$strand.x!="-" &
+                        df$strand.x!="TOP" & df$strand.x!="BOT"))
+        if (length(toerX) > 0) {
+                cat("GC coding without +/-/TOP/BOT strand info in",
+                    name.x, "\n")
+                cat(length(toerX), "SNPs nullified\n")
+                df$n.x[toerX] <- NA
+                df$beta.x[toerX] <- NA
+        }
+        gc()
+
+        toerX <- which(df$allele1.y == "A" & df$allele2.y == "T" &
+                       (df$strand.y!="+" & df$strand.y!="-" &
+                        df$strand.y!="TOP" & df$strand.y!="BOT"))
+        if (length(toerX) > 0) {
+                cat("AT coding without +/-/TOP/BOT strand info in",
+                    name.y, "\n")
+                cat(length(toerX), "SNPs nullified\n")
+                df$n.y[toerX] <- NA
+                df$beta.y[toerX] <- NA
+        }
+        gc()
+
+        toerX <- which(df$allele1.y == "G" & df$allele2.y == "C" &
+                       (df$strand.y!="+" & df$strand.y!="-" &
+                        df$strand.y!="TOP" & df$strand.y!="BOT"))
+        if (length(toerX) > 0) {
+                cat("GC coding without +/-/TOP/BOT strand info in",
+                    name.y, "\n")
+                cat(length(toerX), "SNPs nullified\n")
+                df$n.y[toerX] <- NA
+                df$beta.y[toerX] <- NA
+        }
+        gc()
+
+        swcode <- which(df$effallele.x != df$effallele.y |
+                        df$allele1.x != df$allele1.y |
+                        df$allele2.x != df$allele2.y)
+        if (length(swcode) > 0) {
+                cat("Different coding in two populaions\n")
+                cat(length(swcode), "SNPs removed\n")
+                tmp <- df[swcode, ]
+                write.csv(tmp, file="failedcode.csv", row.names=FALSE)
+                df <- df[!(c(1:(dim(df)[1])) %in% swcode), ]
+        }
+        gc()
+
+        swcode <- which(is.na(df$beta.x) & is.na(df$beta.y))
+        if (length(swcode)>0) {
+                cat("NA for betas in both populaions\n")
+                cat(length(swcode), "SNPs removed\n")
+                df <- df[!(c(1:(dim(df)[1])) %in% swcode), ]
+        }
+        gc()
+
+        ## OK
+        cat("analysing ... \n")
 #	if (pop==pops[2]) name.x <- pops[1] else name.x <- "POOLED"
-	metaout <- dometa(df,name.x=name.x,name.y=name.y,precorrect=precorrect,correct.pooled=correct.pooled,pos.x,pos.y,chr.x,chr.y)
-	cat("... DONE\n")
-	gc()
-	metaout
+        metaout <- dometa(df, name.x=name.x, name.y=name.y,
+                          precorrect=precorrect,
+                          correct.pooled=correct.pooled,
+                          pos.x, pos.y, chr.x, chr.y)
+        cat("... DONE\n")
+        gc()
+        metaout
 }
 
-dometa <- function(data,name.x,name.y,precorrect=FALSE,correct.pooled=FALSE,pos.x,pos.y,chr.x,chr.y) {
+
+dometa <- function(data, name.x, name.y, precorrect=FALSE,
+                   correct.pooled=FALSE, pos.x, pos.y, chr.x, chr.y) {
 #	prop <- 0.98
-	chi2med <- qchisq(.5,1)
-	b.x <- (data$beta.x)
-	b.y <- (data$beta.y)
-	se.x <- abs(data$sebeta.x)
-	se.y <- abs(data$sebeta.y)
-	if (name.x == "POOLED" & !correct.pooled) lam.x <- 1.0 else lam.x <- median(b.x*b.x/(se.x*se.x),na.rm=T)/chi2med
-	lam.y <- median(b.y*b.y/(se.y*se.y),na.rm=T)/chi2med
-	if (name.x != "POOLED") 
-		cat("Lambda",name.x,"=",lam.x,"\n")
-	else if (correct.pooled) cat("Lambda POOLED previous =",lam.x,"\n")
-	cat("Lambda",name.y,"=",lam.y,"\n")
-	if (lam.x < 1) {warning(paste("Lambda",name.x,"<1; constrained to 1"),immediate.=T); lam.x <- 1}
-	if (lam.y < 1) {warning(paste("Lambda",name.y,"<1; constrained to 1"),immediate.=T); lam.y <- 1}
-	if (precorrect & name.x!="POOLED") {
-		se.x <- sqrt(se.x*se.x*lam.x)
-	}
-	if (precorrect) {
-		se.y <- sqrt(se.y*se.y*lam.y)
-	}
-	lam.x <- median(b.x*b.x/(se.x*se.x),na.rm=T)/chi2med
-	lam.y <- median(b.y*b.y/(se.y*se.y),na.rm=T)/chi2med
-	if (name.x != "POOLED") 
-		cat("Corrected Lambda",name.x,"=",lam.x,"\n")
-	else if (correct.pooled) cat("Corrected Lambda POOLED previous =",lam.x,"\n")
-	cat("Corrected Lambda",name.y,"=",lam.y,"\n")
-	w2.x <- 1./(se.x*se.x)
-	w2.y <- 1./(se.y*se.y)
-	invsumw2 <- 1./(w2.x+w2.y)
-	mbeta <- (b.x*w2.x + b.y*w2.y)*invsumw2
-	mse <- sqrt(invsumw2)
-	mp <- pchisq(mbeta*mbeta/(mse*mse),1,lower=F)
-	if (name.x != "POOLED") 
-		npops <- 1*(!is.na(data$beta.x)) + 1*(!is.na(data$beta.y))
-	else {
-		npops <- data$npops + 1*(!is.na(data$beta.y))
-		if (any(is.na(npops))) npops[is.na(npops)] <- 1*(!is.na(data$beta.x[is.na(npops)])) + 1*(!is.na(data$beta.y[is.na(npops)]))
-	}
-	na.x <- which(is.na(b.x) & !is.na(b.y))
-	na.y <- which(is.na(b.y) & !is.na(b.x))
+        chi2med <- qchisq(.5, 1)
+        b.x <- (data$beta.x)
+        b.y <- (data$beta.y)
+        se.x <- abs(data$sebeta.x)
+        se.y <- abs(data$sebeta.y)
+
+        if (name.x == "POOLED" & !correct.pooled) {
+            lam.x <- 1.0
+        } else {
+            lam.x <- median(b.x * b.x / (se.x * se.x), na.rm=TRUE) / chi2med
+        }
+
+        lam.y <- median(b.y * b.y / (se.y * se.y), na.rm=TRUE) / chi2med
+
+        if (name.x != "POOLED")
+            cat("Lambda", name.x, "=", lam.x, "\n")
+        else if (correct.pooled)
+            cat("Lambda POOLED previous =", lam.x, "\n")
+
+        cat("Lambda", name.y, "=", lam.y, "\n")
+        if (lam.x < 1) {
+            warning(paste("Lambda", name.x, "<1; constrained to 1"),
+                    immediate.=TRUE); lam.x <- 1
+        }
+        if (lam.y < 1) {
+            warning(paste("Lambda", name.y, "<1; constrained to 1"),
+                    immediate.=TRUE); lam.y <- 1
+        }
+        if (precorrect & name.x!="POOLED") {
+            se.x <- sqrt(se.x*se.x*lam.x)
+        }
+        if (precorrect) {
+            se.y <- sqrt(se.y*se.y*lam.y)
+        }
+
+        lam.x <- median(b.x * b.x / (se.x * se.x), na.rm=TRUE)/chi2med
+        lam.y <- median(b.y * b.y / (se.y * se.y), na.rm=TRUE)/chi2med
+
+        if (name.x != "POOLED")
+            cat("Corrected Lambda", name.x, "=", lam.x, "\n")
+        else if (correct.pooled)
+            cat("Corrected Lambda POOLED previous =", lam.x, "\n")
+
+        cat("Corrected Lambda", name.y, "=", lam.y, "\n")
+        w2.x     <- 1./(se.x * se.x)
+        w2.y     <- 1./(se.y * se.y)
+        invsumw2 <- 1./(w2.x + w2.y)
+        mbeta    <- (b.x*w2.x + b.y*w2.y)*invsumw2
+        mse      <- sqrt(invsumw2)
+        mp       <- pchisq(mbeta*mbeta/(mse*mse), 1, lower=FALSE)
+
+        if (name.x != "POOLED") {
+            npops <- 1*(!is.na(data$beta.x)) + 1*(!is.na(data$beta.y))
+        } else {
+            npops <- data$npops + 1*(!is.na(data$beta.y))
+            if (any(is.na(npops))) {
+                npops[is.na(npops)] <-
+                    1 * (!is.na(data$beta.x[is.na(npops)])) +
+                        1 * (!is.na(data$beta.y[is.na(npops)]))
+            }
+        }
+        na.x <- which(is.na(b.x) & !is.na(b.y))
+        na.y <- which(is.na(b.y) & !is.na(b.x))
 #	na.xy <- which(is.na(b.y) & is.na(b.x))
 #	if (length(na.xy)>0) stop("Number of subjects missing in both data sets...")
-	out <- data.frame(name=data$name,stringsAsFactors=F)
-	out$strand <- data$strand.x
-	out$allele1 <- data$allele1.x
-	out$allele2 <- data$allele2.x
-	out$effallele <- data$effallele.x
-	if (chr.x & chr.y) {
-		out$chromosome <- data$chromosome.x
-		out$chromosome[na.x] <- data$chromosome.y[na.x]
-	} else if (chr.x) {
-		out$chromosome <- data$chromosome.x
-	} else if (chr.y) {
-		out$chromosome <- data$chromosome.y
-	}
-#	print(c(pos.x,pos.y))
-	if (pos.x & pos.y) {
-		out$position <- data$position.x
-		out$position[na.x] <- data$position.y[na.x]
-	} else if (pos.x | pos.y) {
-		out$position <- data$position
-	}
-	out$strand[na.x] <- data$strand.y[na.x]
-	out$allele1[na.x] <- data$allele1.y[na.x]
-	out$allele2[na.x] <- data$allele2.y[na.x]
-	out$effallele[na.x] <- data$effallele.y[na.x]
-	out$n <- data$n.x + data$n.y
-	out$n[na.x] <- data$n.y[na.x]
-	out$n[na.y] <- data$n.x[na.y]
-	out$npops <- npops
-	out$beta <- mbeta
-	out$sebeta <- mse
-	out$beta[na.x] <- b.y[na.x]
-	out$sebeta[na.x] <- se.y[na.x]
-	out$beta[na.y] <- b.x[na.y]
-	out$sebeta[na.y] <- se.x[na.y]
+        out           <- data.frame(name=data$name, stringsAsFactors=FALSE)
+        out$strand    <- data$strand.x
+        out$allele1   <- data$allele1.x
+        out$allele2   <- data$allele2.x
+        out$effallele <- data$effallele.x
 
-	na.x <- which(is.na(data$effallelefreq.x))
-	na.y <- which(is.na(data$effallelefreq.y))
-	nNAb <- which(!is.na(data$effallelefreq.x) & !is.na(data$effallelefreq.y))
-	out$effallelefreq <- rep(NA,dim(data)[1])
-	out$effallelefreq[nNAb] <- (data$effallelefreq.x[nNAb]*data$n.x[nNAb]+data$effallelefreq.y[nNAb]*data$n.y[nNAb])/(data$n.x[nNAb]+data$n.y[nNAb])
-	out$effallelefreq[na.x] <- data$effallelefreq.y[na.x]
-	out$effallelefreq[na.y] <- data$effallelefreq.x[na.y]
+        if (chr.x & chr.y) {
+            out$chromosome <- data$chromosome.x
+            out$chromosome[na.x] <- data$chromosome.y[na.x]
+        } else if (chr.x) {
+            out$chromosome <- data$chromosome.x
+        } else if (chr.y) {
+            out$chromosome <- data$chromosome.y
+        }
+#	print(c(pos.x, pos.y))
+
+        if (pos.x & pos.y) {
+            out$position <- data$position.x
+            out$position[na.x] <- data$position.y[na.x]
+        } else if (pos.x | pos.y) {
+            out$position <- data$position
+        }
+
+        out$strand[na.x] <- data$strand.y[na.x]
+        out$allele1[na.x] <- data$allele1.y[na.x]
+        out$allele2[na.x] <- data$allele2.y[na.x]
+        out$effallele[na.x] <- data$effallele.y[na.x]
+        out$n <- data$n.x + data$n.y
+        out$n[na.x] <- data$n.y[na.x]
+        out$n[na.y] <- data$n.x[na.y]
+        out$npops <- npops
+        out$beta <- mbeta
+        out$sebeta <- mse
+        out$beta[na.x] <- b.y[na.x]
+        out$sebeta[na.x] <- se.y[na.x]
+        out$beta[na.y] <- b.x[na.y]
+        out$sebeta[na.y] <- se.x[na.y]
+
+        na.x <- which(is.na(data$effallelefreq.x))
+        na.y <- which(is.na(data$effallelefreq.y))
+        nNAb <- which(!is.na(data$effallelefreq.x) &
+                      !is.na(data$effallelefreq.y))
+
+        out$effallelefreq <- rep(NA, dim(data)[1])
+        out$effallelefreq[nNAb] <- (data$effallelefreq.x[nNAb] *
+                                    data$n.x[nNAb] +
+                                    data$effallelefreq.y[nNAb] *
+                                    data$n.y[nNAb]) / (data$n.x[nNAb]
+                                                       +
+                                                       data$n.y[nNAb])
+        out$effallelefreq[na.x] <- data$effallelefreq.y[na.x]
+        out$effallelefreq[na.y] <- data$effallelefreq.x[na.y]
 #	print(data$effallelefreq.x)
 #	print(data$effallelefreq.y)
 #	print(out$effallelefreq)
 
-	na.x <- which(is.na(data$call.x))
-	na.y <- which(is.na(data$call.y))
-	nNAb <- which(!is.na(data$call.x) & !is.na(data$call.y))
-	out$call <- rep(NA,dim(data)[1])
+        na.x <- which(is.na(data$call.x))
+        na.y <- which(is.na(data$call.y))
+        nNAb <- which(!is.na(data$call.x) & !is.na(data$call.y))
+        out$call <- rep(NA, dim(data)[1])
 #	print(na.x)
 #	print(nNAb)
-	out$call[nNAb] <- (data$call.x[nNAb]*data$n.x[nNAb]+data$call.y[nNAb]*data$n.y[nNAb])/(data$n.x[nNAb]+data$n.y[nNAb])
-	out$call[na.x] <- data$call.y[na.x]
-	out$call[na.y] <- data$call.x[na.y]
+        out$call[nNAb] <- (data$call.x[nNAb] * data$n.x[nNAb] +
+                           data$call.y[nNAb] * data$n.y[nNAb]) /
+                               (data$n.x[nNAb] + data$n.y[nNAb])
+        out$call[na.x] <- data$call.y[na.x]
+        out$call[na.y] <- data$call.x[na.y]
 
 #	print(data$pexhwe.y)
-	if (all(data$pexhwe.x <= 1.0,na.rm=T)) {data$pexhwe.x <- qchisq(1.-data$pexhwe.x,1)}
-	if (all(data$pexhwe.y <= 1.0,na.rm=T)) {data$pexhwe.y <- qchisq(1.-data$pexhwe.y,1)}
+        if (all(data$pexhwe.x <= 1.0, na.rm=TRUE)) {
+            data$pexhwe.x <- qchisq(1. - data$pexhwe.x, 1)
+        }
+        if (all(data$pexhwe.y <= 1.0, na.rm=TRUE)) {
+            data$pexhwe.y <- qchisq(1. - data$pexhwe.y, 1)
+        }
 #	print(data$pexhwe.y)
-	na.x <- which(is.na(data$pexhwe.x))
-	na.y <- which(is.na(data$pexhwe.y))
-	nNAb <- which(!is.na(data$pexhwe.x) & !is.na(data$pexhwe.y))
-	out$pexhwe <-  rep(NA,dim(data)[1])
-	out$pexhwe[nNAb] <- data$pexhwe.x[nNAb]+data$pexhwe.y[nNAb]
-	out$pexhwe[na.x] <- data$pexhwe.y[na.x]
-	out$pexhwe[na.y] <- data$pexhwe.x[na.y]
+        na.x <- which(is.na(data$pexhwe.x))
+        na.y <- which(is.na(data$pexhwe.y))
+        nNAb <- which(!is.na(data$pexhwe.x) & !is.na(data$pexhwe.y))
+        out$pexhwe <-  rep(NA, dim(data)[1])
+        out$pexhwe[nNAb] <- data$pexhwe.x[nNAb]+data$pexhwe.y[nNAb]
+        out$pexhwe[na.x] <- data$pexhwe.y[na.x]
+        out$pexhwe[na.y] <- data$pexhwe.x[na.y]
 
-	s <- match(substr(names(data),1,5),"obeta")
-	if (any(!is.na(s))) s <- which(!is.na(s))
-	if (any(!is.na(s))) out <- cbind(out,data[,names(data)[s]])
-	if (name.x != "POOLED") out[,paste("obeta",name.x,sep="")] <- b.x 
-	out[,paste("obeta",name.y,sep="")] <- b.y 
-	s <- match(substr(names(data),1,3),"ose")
-	if (any(!is.na(s))) s <- which(!is.na(s))
-	if (any(!is.na(s))) out <- cbind(out,data[,names(data)[s]])
-	if (name.x != "POOLED") out[,paste("ose",name.x,sep="")] <- se.x 
-	out[,paste("ose",name.y,sep="")] <- se.y 
-	cat("Lambda POOLED data =",median((out$beta/out$sebeta)^2,na.rm=T)/chi2med,"\n")
-	out$chi2 <- out$beta*out$beta/(out$sebeta*out$sebeta)
-	out$p <- pchisq(out$chi2,1,lower=F)
-	out
-}
+        s <- match(substr(names(data), 1, 5), "obeta")
+        if (any(!is.na(s))) s <- which(!is.na(s))
+        if (any(!is.na(s))) out <- cbind(out, data[, names(data)[s]])
+        if (name.x != "POOLED") out[, paste("obeta", name.x,sep="")] <- b.x
 
-swap.y <- function(df,swcode) {
-	df$beta.y[swcode] <- (-1.)*df$beta.y[swcode]
-	df$effallelefreq.y[swcode] <- (1. - df$effallelefreq.y[swcode])
-	a1.save <- df$allele1.y[swcode]
-	a2.save <- df$allele2.y[swcode]
-	df$effallele.y[swcode] <- a1.save
-	df$allele1.y[swcode] <- a2.save 
-	df$allele2.y[swcode] <- a1.save
-	df
+        out[, paste("obeta", name.y,sep="")] <- b.y
+        s <- match(substr(names(data), 1, 3), "ose")
+
+        if (any(!is.na(s))) s <- which(!is.na(s))
+        if (any(!is.na(s))) out <- cbind(out, data[, names(data)[s]])
+        if (name.x != "POOLED") out[, paste("ose", name.x,sep="")] <- se.x
+
+        out[, paste("ose", name.y,sep="")] <- se.y
+        cat("Lambda POOLED data =", median((out$beta / out$sebeta)^2,
+                                           na.rm=TRUE) / chi2med, "\n")
+        out$chi2 <- out$beta * out$beta / (out$sebeta * out$sebeta)
+        out$p <- pchisq(out$chi2, 1, lower=FALSE)
+        out
 }
 
-swap.x <- function(df,swcode) {
-	df$beta.x[swcode] <- (-1.)*df$beta.x[swcode]
-	df$effallelefreq.x[swcode] <- (1. - df$effallelefreq.x[swcode])
-	a1.save <- df$allele1.x[swcode]
-	a2.save <- df$allele2.x[swcode]
-	df$effallele.x[swcode] <- a1.save
-	df$allele1.x[swcode] <- a2.save 
-	df$allele2.x[swcode] <- a1.save
-	df
+
+swap.y <- function(df, swcode) {
+        df$beta.y[swcode] <- (-1.) * df$beta.y[swcode]
+        df$effallelefreq.y[swcode] <- (1. - df$effallelefreq.y[swcode])
+        a1.save <- df$allele1.y[swcode]
+        a2.save <- df$allele2.y[swcode]
+        df$effallele.y[swcode] <- a1.save
+        df$allele1.y[swcode]   <- a2.save
+        df$allele2.y[swcode]   <- a1.save
+        df
 }
 
-recode.y <- function(df,swcode) {
-	a2t <- swcode[df$effallele.y[swcode]=="A"]
-	t2a <- swcode[df$effallele.y[swcode]=="T"]
-	g2c <- swcode[df$effallele.y[swcode]=="G"]
-	c2g <- swcode[df$effallele.y[swcode]=="C"]
-	df$effallele.y[a2t] <-"T"
-	df$effallele.y[t2a] <-"A"
-	df$effallele.y[g2c] <-"C"
-	df$effallele.y[c2g] <-"G"
-	a2t <- swcode[df$allele1.y[swcode]=="A"]
-	t2a <- swcode[df$allele1.y[swcode]=="T"]
-	g2c <- swcode[df$allele1.y[swcode]=="G"]
-	c2g <- swcode[df$allele1.y[swcode]=="C"]
-	df$allele1.y[a2t] <-"T"
-	df$allele1.y[t2a] <-"A"
-	df$allele1.y[g2c] <-"C"
-	df$allele1.y[c2g] <-"G"
-	a2t <- swcode[df$allele2.y[swcode]=="A"]
-	t2a <- swcode[df$allele2.y[swcode]=="T"]
-	g2c <- swcode[df$allele2.y[swcode]=="G"]
-	c2g <- swcode[df$allele2.y[swcode]=="C"]
-	df$allele2.y[a2t] <-"T"
-	df$allele2.y[t2a] <-"A"
-	df$allele2.y[g2c] <-"C"
-	df$allele2.y[c2g] <-"G"
-	f2r <- swcode[df$strand.y[swcode]=="+"]
-	r2f <- swcode[df$strand.y[swcode]=="-"]
-	t2b <- swcode[df$strand.y[swcode]=="TOP"]
-	b2t <- swcode[df$strand.y[swcode]=="BOT"]
-	df$strand.y[f2r] <- "-"
-	df$strand.y[r2f] <- "+"
-	df$strand.y[t2b] <- "BOT"
-	df$strand.y[b2t] <- "TOP"
-	if (any(is.na(df$strand.y[swcode]))) warning("NAs in strand!")
-	if (any(df$strand.y[swcode]!="+" & df$strand.y[swcode]!="-" & df$strand.y[swcode]!="TOP" & df$strand.y[swcode]!="BOT")) warning("Unrecognised coding in strand!")
-	df
+
+swap.x <- function(df, swcode) {
+        df$beta.x[swcode] <- (-1.) * df$beta.x[swcode]
+        df$effallelefreq.x[swcode] <- (1. - df$effallelefreq.x[swcode])
+        a1.save <- df$allele1.x[swcode]
+        a2.save <- df$allele2.x[swcode]
+        df$effallele.x[swcode] <- a1.save
+        df$allele1.x[swcode]   <- a2.save
+        df$allele2.x[swcode]   <- a1.save
+        df
 }
 
 
+recode.y <- function(df, swcode) {
+        a2t <- swcode[df$effallele.y[swcode]=="A"]
+        t2a <- swcode[df$effallele.y[swcode]=="T"]
+        g2c <- swcode[df$effallele.y[swcode]=="G"]
+        c2g <- swcode[df$effallele.y[swcode]=="C"]
+        df$effallele.y[a2t] <-"T"
+        df$effallele.y[t2a] <-"A"
+        df$effallele.y[g2c] <-"C"
+        df$effallele.y[c2g] <-"G"
+        a2t <- swcode[df$allele1.y[swcode]=="A"]
+        t2a <- swcode[df$allele1.y[swcode]=="T"]
+        g2c <- swcode[df$allele1.y[swcode]=="G"]
+        c2g <- swcode[df$allele1.y[swcode]=="C"]
+        df$allele1.y[a2t] <-"T"
+        df$allele1.y[t2a] <-"A"
+        df$allele1.y[g2c] <-"C"
+        df$allele1.y[c2g] <-"G"
+        a2t <- swcode[df$allele2.y[swcode]=="A"]
+        t2a <- swcode[df$allele2.y[swcode]=="T"]
+        g2c <- swcode[df$allele2.y[swcode]=="G"]
+        c2g <- swcode[df$allele2.y[swcode]=="C"]
+        df$allele2.y[a2t] <-"T"
+        df$allele2.y[t2a] <-"A"
+        df$allele2.y[g2c] <-"C"
+        df$allele2.y[c2g] <-"G"
+        f2r <- swcode[df$strand.y[swcode]=="+"]
+        r2f <- swcode[df$strand.y[swcode]=="-"]
+        t2b <- swcode[df$strand.y[swcode]=="TOP"]
+        b2t <- swcode[df$strand.y[swcode]=="BOT"]
+        df$strand.y[f2r] <- "-"
+        df$strand.y[r2f] <- "+"
+        df$strand.y[t2b] <- "BOT"
+        df$strand.y[b2t] <- "TOP"
+
+        if (any(is.na(df$strand.y[swcode]))) {
+            warning("NAs in strand!")
+        }
+
+        if (any(df$strand.y[swcode]!="+" &
+                df$strand.y[swcode]!="-" &
+                df$strand.y[swcode]!="TOP" &
+                df$strand.y[swcode]!="BOT")) {
+            warning("Unrecognised coding in strand!")
+        }
+        df
+}



More information about the Genabel-commits mailing list