[Genabel-commits] r1791 - pkg/MixABEL/R

noreply at r-forge.r-project.org noreply at r-forge.r-project.org
Sun Aug 10 17:22:52 CEST 2014


Author: lckarssen
Date: 2014-08-10 17:22:52 +0200 (Sun, 10 Aug 2014)
New Revision: 1791

Modified:
   pkg/MixABEL/R/fgls.R
Log:
Only whitespace changes (removal of white space at end of line and conversion of tabs to spaces).)


Modified: pkg/MixABEL/R/fgls.R
===================================================================
--- pkg/MixABEL/R/fgls.R	2014-08-10 14:35:31 UTC (rev 1790)
+++ pkg/MixABEL/R/fgls.R	2014-08-10 15:22:52 UTC (rev 1791)
@@ -1,181 +1,181 @@
 #' Feasible GLS
-#' 
+#'
 #' Feasible Generalised Least Squares
-#' 
+#'
 #' @param Y dependent variable
 #' @param X design matrix (including intercept, if necessary)
 #' @param test test to be applied, one of 'wald', 'score' or 'robust'
 #' @param whichtest which independent variables to be tested (set to 'TRUE')
-#' @param W for GLS, inverse variance-covariance matrix, as such returned by 
+#' @param W for GLS, inverse variance-covariance matrix, as such returned by
 #' GenABEL's polygenic(...)$InvSigma, or NULL for LS
-#' 
-#' @return List with elements 'beta' -- estimates fo the regression 
-#' coefficients; 'V' -- variance covariance matrix for parameters 
-#' estimates; 'T2' -- test statistics (distributed as Chi-squared under 
-#' the null) for the testing of whichtest parameters; 'df' -- the number of 
-#' degrees of freedom of the T2 test; 'tested' -- which parameters were 
+#'
+#' @return List with elements 'beta' -- estimates fo the regression
+#' coefficients; 'V' -- variance covariance matrix for parameters
+#' estimates; 'T2' -- test statistics (distributed as Chi-squared under
+#' the null) for the testing of whichtest parameters; 'df' -- the number of
+#' degrees of freedom of the T2 test; 'tested' -- which parameters were
 #' tested with T2; 'meanColX' -- mean value of variable in columns of X;
 #' 'n' -- length of Y (== height of X)
 #'
 #' @author Yurii Aulchenko
 #'
-FGLS <- function(Y,X,W=NULL,test="wald",whichtest = c(FALSE,rep(TRUE,dim(X)[2]-1))) 
+FGLS <- function(Y,X,W=NULL,test="wald",whichtest = c(FALSE,rep(TRUE,dim(X)[2]-1)))
 {
-	# do checks
-	if (!is(whichtest,"logical") || length(whichtest)!=dim(X)[2]) 
-		stop("argument whichtest should be logical of length dim(X)[2]")
-	if (dim(X)[1] != length(Y))
-		stop("dimensions of X and Y do not match")
-	if (any(is.na(Y)) || any(is.na(X))) {
-		warning("missing data points in Y or X, dropping")
-		IScomplete <- !is.na(Y) & complete.cases(X)
-		Y <- Y[IScomplete]
-		X <- X[IScomplete,]
-		if (!is.null(W)) W <- W[IScomplete,IScomplete]
-	}
-	
-	# precompute tXW
-	if (is.null(W)) {
-		tXW <- t(X)
-	} else {
-		tXW <- t(X) %*% W
-	}
-	#print(tXW)
-	
-	# estimate beta
-	XpWXm1 <- ginv(tXW %*% X)
-	XpWY <- tXW %*% Y
-	betaW <- XpWXm1 %*% XpWY
-	#print(XpWXm1)
-	
-	# estimate V
-	if (test=="wald") {
-		YmXB <- Y - X %*% betaW
-		if (is.null(W)) {
-			sigma2 <- as.vector(((t(YmXB) %*% YmXB)/(dim(X)[1] - dim(X)[2])))
-		} else {
-			sigma2 <- as.vector(((t(YmXB) %*% W %*% YmXB)/(dim(X)[1] - dim(X)[2])))
-		}
-		#print(sigma2)
-		V <- sigma2*XpWXm1
-	} else if (test=="score") {
-		Xm <- X[,!whichtest,drop=FALSE]
-		if (is.null(W)) {
-			bt <- ginv(t(Xm) %*% Xm) %*% (t(Xm) %*% Y)
-			YmXB <- Y - Xm %*% bt
-			sigma2 <- as.vector(((t(YmXB) %*% YmXB)/(dim(Xm)[1] - dim(Xm)[2])))
-		} else {
-			bt <- ginv(t(Xm) %*% W %*% Xm) %*% (t(Xm) %*% W %*% Y)
-			YmXB <- Y - Xm %*% bt
-			sigma2 <- as.vector(((t(YmXB) %*% W %*% YmXB)/(dim(Xm)[1] - dim(Xm)[2])))
-		}
-		V <- sigma2*XpWXm1
-	} else if (test=="robust") {
-		
-		YmXB <- Y - X %*% betaW
-		if (is.null(W)) {
-			sigma2vec <- as.vector(YmXB * YmXB)
-		} else {
-			sigma2vec <- as.vector(as.vector(t(YmXB) %*% W) * YmXB)
-		}
+        # do checks
+        if (!is(whichtest,"logical") || length(whichtest)!=dim(X)[2])
+                stop("argument whichtest should be logical of length dim(X)[2]")
+        if (dim(X)[1] != length(Y))
+                stop("dimensions of X and Y do not match")
+        if (any(is.na(Y)) || any(is.na(X))) {
+                warning("missing data points in Y or X, dropping")
+                IScomplete <- !is.na(Y) & complete.cases(X)
+                Y <- Y[IScomplete]
+                X <- X[IScomplete,]
+                if (!is.null(W)) W <- W[IScomplete,IScomplete]
+        }
+
+        # precompute tXW
+        if (is.null(W)) {
+                tXW <- t(X)
+        } else {
+                tXW <- t(X) %*% W
+        }
+        #print(tXW)
+
+        # estimate beta
+        XpWXm1 <- ginv(tXW %*% X)
+        XpWY <- tXW %*% Y
+        betaW <- XpWXm1 %*% XpWY
+        #print(XpWXm1)
+
+        # estimate V
+        if (test=="wald") {
+                YmXB <- Y - X %*% betaW
+                if (is.null(W)) {
+                        sigma2 <- as.vector(((t(YmXB) %*% YmXB)/(dim(X)[1] - dim(X)[2])))
+                } else {
+                        sigma2 <- as.vector(((t(YmXB) %*% W %*% YmXB)/(dim(X)[1] - dim(X)[2])))
+                }
+                #print(sigma2)
+                V <- sigma2*XpWXm1
+        } else if (test=="score") {
+                Xm <- X[,!whichtest,drop=FALSE]
+                if (is.null(W)) {
+                        bt <- ginv(t(Xm) %*% Xm) %*% (t(Xm) %*% Y)
+                        YmXB <- Y - Xm %*% bt
+                        sigma2 <- as.vector(((t(YmXB) %*% YmXB)/(dim(Xm)[1] - dim(Xm)[2])))
+                } else {
+                        bt <- ginv(t(Xm) %*% W %*% Xm) %*% (t(Xm) %*% W %*% Y)
+                        YmXB <- Y - Xm %*% bt
+                        sigma2 <- as.vector(((t(YmXB) %*% W %*% YmXB)/(dim(Xm)[1] - dim(Xm)[2])))
+                }
+                V <- sigma2*XpWXm1
+        } else if (test=="robust") {
+
+                YmXB <- Y - X %*% betaW
+                if (is.null(W)) {
+                        sigma2vec <- as.vector(YmXB * YmXB)
+                } else {
+                        sigma2vec <- as.vector(as.vector(t(YmXB) %*% W) * YmXB)
+                }
 #  V <- XpWXm1 %*% (t(X) %*% W %*% (diag(as(YmXB,"vector")^2) %*% ginv(W)) %*% W %*% X) %*% XpWXm1
 #  V <- XpWXm1 %*% (t(X) %*% W %*% diag(as(YmXB,"vector")^2) %*% X) %*% XpWXm1
 #  V <- XpWXm1 %*% (t(X) %*% diag(as(YmXB,"vector")^2) %*% X) %*% XpWXm1
 #  V <- ginv(t(X)%*%X) %*% (t(X) %*% diag(as(YmXB,"vector")^2) %*% X) %*% ginv(t(X)%*%X)
-		##	V <- XpWXm1 %*% (tXW %*% diag(as(YmXB,"vector")^2) %*% X) %*% XpWXm1
-		V <- XpWXm1 %*% (tXW %*% diag(sigma2vec) %*% X) %*% XpWXm1
+                ##	V <- XpWXm1 %*% (tXW %*% diag(as(YmXB,"vector")^2) %*% X) %*% XpWXm1
+                V <- XpWXm1 %*% (tXW %*% diag(sigma2vec) %*% X) %*% XpWXm1
 # print(sigma2vec)
 #		V <- XpWXm1 %*% (tXW %*% diag(sigma2vec) %*% t(tXW)) %*% XpWXm1
-	} else {
-		stop("test not recognised")
-	}
-	
-	# do the test
-	if (sum(whichtest)>1) {
-		Vinv <- ginv(V)
-		#print("V")
-		#print(V)
-		#print("Vinv")
-		#print(Vinv)
-		#print("det(V)")
-		#print(det(V))
-		#print(log(abs(det(V))))
-		Vbg <- Vinv[whichtest,whichtest]-Vinv[whichtest,!whichtest] %*% ginv(Vinv[!whichtest,!whichtest]) %*% Vinv[!whichtest,whichtest]
-		#Vbg <- Vinv[whichtest,whichtest]-Vinv[whichtest,!whichtest] %*% V[!whichtest,!whichtest,drop=F] %*% Vinv[!whichtest,whichtest]
-		T2 <- t(betaW[whichtest]) %*% Vbg %*% betaW[whichtest]
-	} else if (sum(whichtest) == 1) {
-		T2 <- betaW[whichtest]^2/diag(V)[whichtest]
-	} else {
-		stop("unreachable statement")
-	}
-	
-	rownames(betaW) <- colnames(X)
-	dimnames(V) <- list(colnames(X),colnames(X))
-	out <- list(beta=betaW,V=V,T2=T2,df=sum(whichtest),tested=whichtest,
-			meanColX = apply(X,FUN=mean,MAR=2), n = dim(X)[1])
-	class(out) <- "FGLS"
-	out
+        } else {
+                stop("test not recognised")
+        }
+
+        # do the test
+        if (sum(whichtest)>1) {
+                Vinv <- ginv(V)
+                #print("V")
+                #print(V)
+                #print("Vinv")
+                #print(Vinv)
+                #print("det(V)")
+                #print(det(V))
+                #print(log(abs(det(V))))
+                Vbg <- Vinv[whichtest,whichtest]-Vinv[whichtest,!whichtest] %*% ginv(Vinv[!whichtest,!whichtest]) %*% Vinv[!whichtest,whichtest]
+                #Vbg <- Vinv[whichtest,whichtest]-Vinv[whichtest,!whichtest] %*% V[!whichtest,!whichtest,drop=F] %*% Vinv[!whichtest,whichtest]
+                T2 <- t(betaW[whichtest]) %*% Vbg %*% betaW[whichtest]
+        } else if (sum(whichtest) == 1) {
+                T2 <- betaW[whichtest]^2/diag(V)[whichtest]
+        } else {
+                stop("unreachable statement")
+        }
+
+        rownames(betaW) <- colnames(X)
+        dimnames(V) <- list(colnames(X),colnames(X))
+        out <- list(beta=betaW,V=V,T2=T2,df=sum(whichtest),tested=whichtest,
+                        meanColX = apply(X,FUN=mean,MAR=2), n = dim(X)[1])
+        class(out) <- "FGLS"
+        out
 }
 #' summary for FGLS
-#' 
+#'
 #' summary for Feasible GLS
-#' 
+#'
 #' @param x object returned by FGLS function
-#' @param verbosity what to report; 0 -- only test stats, 
-#' 1 -- test stats and estimates concerning 'whichtest' parameters, 
+#' @param verbosity what to report; 0 -- only test stats,
+#' 1 -- test stats and estimates concerning 'whichtest' parameters,
 #' 2 -- test stats and estimates of all parameters
-#' @param varcov wheather var-cov matrix for estimated parameters 
+#' @param varcov wheather var-cov matrix for estimated parameters
 #' is to be reported
-#' @param include.means weather mean values of predictors are to 
+#' @param include.means weather mean values of predictors are to
 #' be reported
-#' 
+#'
 #' @return one-line summary of output from FGLS function
-#' 
+#'
 #' @author Yurii Aulchenko
 #'
 summaryFGLS <- function(x,verbosity=1,varcov=FALSE,include.means=FALSE) {
-	out <- c(x$n,x$T2,x$df,pchisq(x$T2,df=x$df,low=FALSE))
-	names(out) <- c("n","Chisq","df","P-value")
-	if (verbosity == 0) {
-		return(out)
-	} else if (verbosity == 1) {
-		toout <- x$tested
-	} else if (verbosity == 2) {
-		toout <- rep(TRUE,length(x$tested))
-	}
-	
-	if (is.null(colnames(x$V))) {
-		xnames <- paste("pred",1:dim(x$V)[2],sep="")
-	} else {
-		xnames <- colnames(x$V)[toout]
-	}
-	names.save <- names(out)
-	out <- c(out,x$beta[toout],sqrt(diag(x$V))[toout])
-	names(out) <- c(names.save,paste("beta_",xnames,sep=""),
-			paste("se_",xnames,sep=""))
-	
-	if (include.means) {
-		names.save <- names(out)
-		out <- c(out,x$meanColX[toout])
-		names(out) <- c(names.save,paste("mean_",xnames,sep=""))
-	}
-	if (varcov) {
-		Vout <- x$V[toout,toout]
-		if (length(Vout) > 1) {
-			names.Vout <- outer(rownames(Vout),colnames(Vout),FUN=paste,sep="_")
-			Vout <- Vout[lower.tri(Vout)]
-			names.Vout <- names.Vout[lower.tri(names.Vout)]
-			names.save <- names(out)
-			out <- c(out,Vout)
-			names(out) <- c(names.save,paste("cov_",names.Vout,sep=""))
-		}
-	}
-	return(out)
+        out <- c(x$n,x$T2,x$df,pchisq(x$T2,df=x$df,low=FALSE))
+        names(out) <- c("n","Chisq","df","P-value")
+        if (verbosity == 0) {
+                return(out)
+        } else if (verbosity == 1) {
+                toout <- x$tested
+        } else if (verbosity == 2) {
+                toout <- rep(TRUE,length(x$tested))
+        }
+
+        if (is.null(colnames(x$V))) {
+                xnames <- paste("pred",1:dim(x$V)[2],sep="")
+        } else {
+                xnames <- colnames(x$V)[toout]
+        }
+        names.save <- names(out)
+        out <- c(out,x$beta[toout],sqrt(diag(x$V))[toout])
+        names(out) <- c(names.save,paste("beta_",xnames,sep=""),
+                        paste("se_",xnames,sep=""))
+
+        if (include.means) {
+                names.save <- names(out)
+                out <- c(out,x$meanColX[toout])
+                names(out) <- c(names.save,paste("mean_",xnames,sep=""))
+        }
+        if (varcov) {
+                Vout <- x$V[toout,toout]
+                if (length(Vout) > 1) {
+                        names.Vout <- outer(rownames(Vout),colnames(Vout),FUN=paste,sep="_")
+                        Vout <- Vout[lower.tri(Vout)]
+                        names.Vout <- names.Vout[lower.tri(names.Vout)]
+                        names.save <- names(out)
+                        out <- c(out,Vout)
+                        names(out) <- c(names.save,paste("cov_",names.Vout,sep=""))
+                }
+        }
+        return(out)
 }
 #' Genome-wide FGLS
-#' 
+#'
 #' Genome-wide Feasible GLS
-#' 
+#'
 #' @param formula analysis formula; should contain special 'SNP' term
 #' @param data phenotypic data frame, or 'gwaa.data-class' object
 #' @param subset subset of data (individuals)
@@ -183,35 +183,35 @@
 #' @param na.action RESERVED FOR FURTURE USE
 #' @param contrasts RESERVED FOR FURTURE USE
 #' @param offset RESERVED FOR FURTURE USE
-#' @param W for GLS, (inverse of) variance-covariance matrix, as such returned by 
+#' @param W for GLS, (inverse of) variance-covariance matrix, as such returned by
 #' GenABEL's polygenic(...)\$InvSigma, or NULL for LS
-#' @param inverse wheather W is already inverted 
-#' @param na.SNP how to deal with missing SNP data; 'impute' -- substitute 
+#' @param inverse wheather W is already inverted
+#' @param na.SNP how to deal with missing SNP data; 'impute' -- substitute
 #' with mean, 'drop' -- drop rows with missing genotypes
 #' @param mincall minimall call rate for a SNP (if less, the SNP is dropped)
 #' @param residuals use residuals for analysis? (faster, but less precise)
 #' @param test test to be applied, one of 'wald', 'score' or 'robust'
-#' @param model.SNP SNP model to apply, one of 
+#' @param model.SNP SNP model to apply, one of
 #' c("additive","dominantB","recessiveB","overdominant","genotypic")
 #' @param genodata genotypic data; can be missing if data is of 'gwaa.data-class'.
 #' Otherwise can be regular matrix or 'databel' matrix
 #' @param gtcoding one of c("typed","dose","probability")
 #' 'typed' -- coded with NA, 0, 1, 2
-#' @param verbosity what to report; 0 -- only test stats, 
-#' 1 -- test stats and estimates concerning 'whichtest' parameters, 
+#' @param verbosity what to report; 0 -- only test stats,
+#' 1 -- test stats and estimates concerning 'whichtest' parameters,
 #' 2 -- test stats and estimates of all parameters
-#' @param varcov wheather var-cov matrix for estimated parameters 
+#' @param varcov wheather var-cov matrix for estimated parameters
 #' is to be reported
-#' @param include.means weather mean values of predictors are to 
+#' @param include.means weather mean values of predictors are to
 #' be reported
-#' @param singular what to do with linear dependencies in X (now only 
+#' @param singular what to do with linear dependencies in X (now only
 #' 'ignore' implemented)
-#' @param with.lm whether LM should be run along (only test purposes; always 
+#' @param with.lm whether LM should be run along (only test purposes; always
 #' falls back to 'old' R-only implementation)
-#' @param old if TRUE, old R-only code implementation is running (testing purposes, 
+#' @param old if TRUE, old R-only code implementation is running (testing purposes,
 #' slow)
-#' 
-#' @examples 
+#'
+#' @examples
 #' library(MASS)
 #' library(mvtnorm)
 #' library(GenABEL)
@@ -222,12 +222,12 @@
 #' maf <- pmin(s$Q.2,1-s$Q.2)
 #' df <- df[,(maf>0.05)]
 #' gkin <- ibs(df[,autosomal(df)],w="freq")
-#' 
+#'
 #' modelh2 <- 0.8
 #' covars <- c("sex","age")
 #' s2 <- 12^2
 #' betas_cov <- c(170,12,0.01)
-#' 
+#'
 #' X <- as.matrix(cbind(rep(1,NIDS),phdata(ge03d2.clean)[1:NIDS,covars]))
 #' grel <- gkin
 #' grel[upper.tri(grel)] <- t(grel)[upper.tri(grel)]
@@ -236,12 +236,12 @@
 #' length(Y)
 #' Y[2] <- NA
 #' df@@phdata$Y <- Y
-#' 
+#'
 #' mdl <- Y~age+sex
 #' h2 <- polygenic(mdl,data=df,kin=gkin)
 #' h2$h2an
 #' mdl <- Y~age+sex+SNP
-#' 
+#'
 #' gtOld <- df@@gtdata[,1:10]
 #' gtReal <- as.double(gtOld)
 #' gtNew <- as(gtReal,"databel")
@@ -254,22 +254,22 @@
 #' aIN
 #' aWN
 #' complex_model <- GWFGLS(Y~age+sex*SNP,data=phdata(df),W=h2$InvSigma,
-#' 	genodata = gtReal,verbosity=2,varcov=TRUE,include.means=TRUE,model="genotypic")
+#'      genodata = gtReal,verbosity=2,varcov=TRUE,include.means=TRUE,model="genotypic")
 #' complex_model
-#' 
+#'
 #' @author Yurii Aulchenko
-#' 
 #'
-GWFGLS <- 
-		function (formula, data, 
-				subset, weights, na.action, 
-				contrasts = NULL, offset, 
-				W = NULL, inverse = TRUE,
-				na.SNP = "impute", mincall = 0.95, residuals = FALSE, test = "wald", 
-				model.SNP = "additive",
-				genodata,gtcoding = "typed",verbosity=1, varcov = FALSE, 
-				include.means = TRUE, singular = "ignore", with.lm = FALSE,
-				old=FALSE)
+#'
+GWFGLS <-
+                function (formula, data,
+                                subset, weights, na.action,
+                                contrasts = NULL, offset,
+                                W = NULL, inverse = TRUE,
+                                na.SNP = "impute", mincall = 0.95, residuals = FALSE, test = "wald",
+                                model.SNP = "additive",
+                                genodata,gtcoding = "typed",verbosity=1, varcov = FALSE,
+                                include.means = TRUE, singular = "ignore", with.lm = FALSE,
+                                old=FALSE)
 # test in c("wald","score","robust")
 # model.SNP in c("additive","dominantB","recessiveB","overdominant","genotypic")
 # verbosity 0 : only chi2 and df, 1: only SNP-related; 2 -- all betas and se of betas
@@ -279,296 +279,296 @@
 # singular: what to do with linear dependencies in X? in c("ignore","drop")
 # mincall: do not analyze the SNP if call is less (<) then mincall
 {
-	
-	if (gtcoding == "dose" && model.SNP != "additive") stop("when gtcoding is 'dose', only 'additive' model allowed")
+
+        if (gtcoding == "dose" && model.SNP != "additive") stop("when gtcoding is 'dose', only 'additive' model allowed")
 # how to check consistency between genodata & gtcoding???
-	
-	allstattests <- c("wald","score","robust")
-	intstattests = pmatch(test, allstattests, nomatch = -1, duplicates.ok = FALSE)
-	if (intstattests < 0 || intstattests > length(allstattests)) 
-		stop(paste("stat test",test,"not in any of ",allstattests,"\n"))
-	
-	
+
+        allstattests <- c("wald","score","robust")
+        intstattests = pmatch(test, allstattests, nomatch = -1, duplicates.ok = FALSE)
+        if (intstattests < 0 || intstattests > length(allstattests))
+                stop(paste("stat test",test,"not in any of ",allstattests,"\n"))
+
+
 # start with checking consistency between phen- and geno-data
-	if (is(data,"gwaa.data")) {
-		if (!missing(genodata)) warning("data has gwaa.data-class; dropping data provided by genodata argument")
-		genodata <- gtdata(data)
-		data <- phdata(data)
-	} else {
-		if (missing(genodata)) stop("genodata argument missing with no default")
-	}
-	if (any(dim(data)[1] != dim(genodata)[1])) stop("dimensions of data and genodata do not match")
-	
-	
+        if (is(data,"gwaa.data")) {
+                if (!missing(genodata)) warning("data has gwaa.data-class; dropping data provided by genodata argument")
+                genodata <- gtdata(data)
+                data <- phdata(data)
+        } else {
+                if (missing(genodata)) stop("genodata argument missing with no default")
+        }
+        if (any(dim(data)[1] != dim(genodata)[1])) stop("dimensions of data and genodata do not match")
+
+
 # prepare pheno-data & filter geno-data
-	mydata <- 
-			deliver.data(formula = formula, data = data) #, subset = subset, weights = weights, na.action = na.action, 
-	#contrasts = contrasts, offset = offset) 
-	
-	genodata <- genodata[mydata$keep_IDs,,drop=FALSE]
-	
-	if (residuals) {
-		mydata$Y <- lm.fit(x=mydata$Xx[,mydata$elXx_NOTin_Xg,drop=FALSE],y=mydata$Y,offset=mydata$offset)$residuals
-		mydata$Xx <- mydata$Xx[,mydata$elXx_in_Xg,drop=FALSE]
-		mydata$Xx <- cbind(1,mydata$Xx)
-		dimnames(mydata$Xx)[[2]][1] <- "(Intercept)"
-	}
-	
-# figure out the genotypic data type 
-	charcoding <- c("typed","dose","probability")
-	if (length(gtcoding) != 1) {
-		cat("gtcoding argument (now",gtcoding,") should be of length one, one of",charcoding,"\n")
-		stop()
-	}
-	intcoding = pmatch(gtcoding, charcoding, nomatch = -1, duplicates.ok = FALSE)
-	if (intcoding < 0 || intcoding > length(charcoding)) stop(paste("coding",gtcoding,"not in any of ",charcoding,"\n"))
-	gtcoding <- charcoding[intcoding]
-	step <- 1
-	if (gtcoding == "probability") step <- 2
-	
+        mydata <-
+                        deliver.data(formula = formula, data = data) #, subset = subset, weights = weights, na.action = na.action,
+        #contrasts = contrasts, offset = offset)
+
+        genodata <- genodata[mydata$keep_IDs,,drop=FALSE]
+
+        if (residuals) {
+                mydata$Y <- lm.fit(x=mydata$Xx[,mydata$elXx_NOTin_Xg,drop=FALSE],y=mydata$Y,offset=mydata$offset)$residuals
+                mydata$Xx <- mydata$Xx[,mydata$elXx_in_Xg,drop=FALSE]
+                mydata$Xx <- cbind(1,mydata$Xx)
+                dimnames(mydata$Xx)[[2]][1] <- "(Intercept)"
+        }
+
+# figure out the genotypic data type
+        charcoding <- c("typed","dose","probability")
+        if (length(gtcoding) != 1) {
+                cat("gtcoding argument (now",gtcoding,") should be of length one, one of",charcoding,"\n")
+                stop()
+        }
+        intcoding = pmatch(gtcoding, charcoding, nomatch = -1, duplicates.ok = FALSE)
+        if (intcoding < 0 || intcoding > length(charcoding)) stop(paste("coding",gtcoding,"not in any of ",charcoding,"\n"))
+        gtcoding <- charcoding[intcoding]
+        step <- 1
+        if (gtcoding == "probability") step <- 2
+
 # figure out the model
-	genomodel <- c("additive","dominantB","recessiveB","overdominant","genotypic")
-	if (length(model.SNP) != 1) {
-		cat("model.SNP argument (now",model.SNP,") should be of length one, one of",genomodel,"\n")
-		stop()
-	}
-	intmodelcoding = pmatch(model.SNP, genomodel, nomatch = -1, duplicates.ok = FALSE)
-	if (intmodelcoding < 0 || intmodelcoding > length(genomodel)) stop(paste("coding",model.SNP,"not in any of ",genomodel,"\n"))
-	model.SNP <- genomodel[intmodelcoding]
-	nColG <- 1
-	if (model.SNP == "genotypic") nColG <- 2
-	
-	
+        genomodel <- c("additive","dominantB","recessiveB","overdominant","genotypic")
+        if (length(model.SNP) != 1) {
+                cat("model.SNP argument (now",model.SNP,") should be of length one, one of",genomodel,"\n")
+                stop()
+        }
+        intmodelcoding = pmatch(model.SNP, genomodel, nomatch = -1, duplicates.ok = FALSE)
+        if (intmodelcoding < 0 || intmodelcoding > length(genomodel)) stop(paste("coding",model.SNP,"not in any of ",genomodel,"\n"))
+        model.SNP <- genomodel[intmodelcoding]
+        nColG <- 1
+        if (model.SNP == "genotypic") nColG <- 2
+
+
 # prepare genotypic data
 #  if (!is.null(names(mydata$Y))) idnamesY <- names(mydata$Y)
 #  else if (!is.null(dimnames(mydata$Y)[[1]])) idnamesY <- dimnames(mydata$Y)[[1]]
 #  else idnamesY <- NULL
-	
+
 # Prepare final data
-	finaldata <- list()
-	finaldata$Y <- mydata$Y
-	if (nColG==1) {
-		finaldata$X <- cbind(mydata$Xx,mydata$Xg)
-		finaldata$snpinvolved <- c(rep(0,dim(mydata$Xx)[2]),rep(1,dim(mydata$Xg)[2]))
-	} else if (nColG == 2) {
-		names.save <- colnames(mydata$Xg)
-		colnames(mydata$Xg) <- paste(names.save,".het",sep="")
-		finaldata$X <- cbind(mydata$Xx,mydata$Xg)
-		colnames(mydata$Xg) <- paste(names.save,".hom",sep="")
-		finaldata$X <- cbind(finaldata$X,mydata$Xg)
-		finaldata$snpinvolved <- c(rep(0,dim(mydata$Xx)[2]),rep(1,dim(mydata$Xg)[2]),rep(2,dim(mydata$Xg)[2]))
-	} else {
-		stop("nColG != 1, != 2")
-	}
-	#print(finaldata)
-	
+        finaldata <- list()
+        finaldata$Y <- mydata$Y
+        if (nColG==1) {
+                finaldata$X <- cbind(mydata$Xx,mydata$Xg)
+                finaldata$snpinvolved <- c(rep(0,dim(mydata$Xx)[2]),rep(1,dim(mydata$Xg)[2]))
+        } else if (nColG == 2) {
+                names.save <- colnames(mydata$Xg)
+                colnames(mydata$Xg) <- paste(names.save,".het",sep="")
+                finaldata$X <- cbind(mydata$Xx,mydata$Xg)
+                colnames(mydata$Xg) <- paste(names.save,".hom",sep="")
+                finaldata$X <- cbind(finaldata$X,mydata$Xg)
+                finaldata$snpinvolved <- c(rep(0,dim(mydata$Xx)[2]),rep(1,dim(mydata$Xg)[2]),rep(2,dim(mydata$Xg)[2]))
+        } else {
+                stop("nColG != 1, != 2")
+        }
+        #print(finaldata)
+
 # pre-compute different stuff
-	
-	fixed_df <- sum(finaldata$snpinvolved>0)
-	
-	resid.score <- lm.fit(mydata$Xx,mydata$Y)$resid
-	if (class(mydata$Y) == "matrix") {
-		stop("matrix-Y not implemented")
-		#sigma2.score <- apply(resid.score^2,FUN=sum,MAR=2)/(dim(mydata$Xx)[1] - dim(mydata$Xx)[2])
-	} else {
-		Xm <- mydata$Xx
-		Y <- finaldata$Y
-		if (is.null(W)) {
-			bt <- ginv(t(Xm) %*% Xm) %*% (t(Xm) %*% finaldata$Y)
-			YmXB <- Y - Xm %*% bt
-			sigma2.score <- as.vector(((t(YmXB) %*% YmXB)/(dim(Xm)[1] - dim(Xm)[2])))
-		} else {
-			bt <- ginv(t(Xm) %*% W %*% Xm) %*% (t(Xm) %*% W %*% finaldata$Y)
-			YmXB <- Y - Xm %*% bt
-			sigma2.score <- as.vector(((t(YmXB) %*% W %*% YmXB)/(dim(Xm)[1] - dim(Xm)[2])))
-		}
-		rm(Xm,YmXB,bt,Y);gc()
-	}
-	#cat("sigma2.score =",sigma2.score,"\n")
-	
-	if (!is.null(W)) {
-		require(MASS)
-		if (!inverse) {
-			V <- W
-			W <- ginv(W)
-		} else {
-			V <- ginv(W)
-		}
-		tXW.fixed <- t(finaldata$X) %*% W
-	} else {
-		tXW.fixed <- t(finaldata$X)
-	}
-	
+
+        fixed_df <- sum(finaldata$snpinvolved>0)
+
+        resid.score <- lm.fit(mydata$Xx,mydata$Y)$resid
+        if (class(mydata$Y) == "matrix") {
+                stop("matrix-Y not implemented")
+                #sigma2.score <- apply(resid.score^2,FUN=sum,MAR=2)/(dim(mydata$Xx)[1] - dim(mydata$Xx)[2])
+        } else {
+                Xm <- mydata$Xx
+                Y <- finaldata$Y
+                if (is.null(W)) {
+                        bt <- ginv(t(Xm) %*% Xm) %*% (t(Xm) %*% finaldata$Y)
+                        YmXB <- Y - Xm %*% bt
+                        sigma2.score <- as.vector(((t(YmXB) %*% YmXB)/(dim(Xm)[1] - dim(Xm)[2])))
+                } else {
+                        bt <- ginv(t(Xm) %*% W %*% Xm) %*% (t(Xm) %*% W %*% finaldata$Y)
+                        YmXB <- Y - Xm %*% bt
+                        sigma2.score <- as.vector(((t(YmXB) %*% W %*% YmXB)/(dim(Xm)[1] - dim(Xm)[2])))
+                }
+                rm(Xm,YmXB,bt,Y);gc()
+        }
+        #cat("sigma2.score =",sigma2.score,"\n")
+
+        if (!is.null(W)) {
+                require(MASS)
+                if (!inverse) {
+                        V <- W
+                        W <- ginv(W)
+                } else {
+                        V <- ginv(W)
+                }
+                tXW.fixed <- t(finaldata$X) %*% W
+        } else {
+                tXW.fixed <- t(finaldata$X)
+        }
+
 # run 1 time to get output dimensions and names
-	mgls <- FGLS(Y=finaldata$Y,X=finaldata$X,whichtest=(finaldata$snpinvolved>0))
-	smr <- summaryFGLS(mgls,verbosity=verbosity,varcov=varcov,include.means=include.means)
-	#print(smr)
-	
+        mgls <- FGLS(Y=finaldata$Y,X=finaldata$X,whichtest=(finaldata$snpinvolved>0))
+        smr <- summaryFGLS(mgls,verbosity=verbosity,varcov=varcov,include.means=include.means)
+        #print(smr)
+
 # NOW ITERATE OVER SNPs
-	
-	
-	out <- list()
-	snpseq <- seq(1,dim(genodata)[2],step) 
-	outsnpnames <- c(dimnames(genodata)[[2]][snpseq])
-	if (is.null(outsnpnames)) outsnpnames <- paste("mrk",c(1:length(snpseq)),sep="")
-	if (with.lm) {
-		out$LM <- rep(NA,length(snpseq))
-		warning("with.lm specified; falling to 'old' R-only implementation (slow)");
-		old <- TRUE
-	}
-	
-	if (old) {
-		out$FGLS <- matrix(NA,ncol=length(smr),nrow=length(snpseq))
-		#out$FGLS <- matrix(NA,ncol=2*dim(finaldata$X)[2]+2,nrow=length(snpseq))
-		dimnames(out$FGLS) <- list(outsnpnames,names(smr))
-		#c(paste("beta_",colnames(finaldata$X),sep=""),paste("se_",colnames(finaldata$X),sep=""),"Chi2","P-value")
-		
-		#print("AAA")
-		
-		k <- 1
-		for (csnp in snpseq) {
-			
-			GGG <- get_snp_data(genodata,csnp,model.SNP,gtcoding)
-			nocalls <- sum(is.na(GGG))
-			callrate <- (length(GGG)-nocalls)/length(GGG)
-			
-			if ( callrate >= mincall) {
-				
-				if (nocalls > 0) {
-					if (na.SNP=="impute") {
-						mn <- apply(GGG,MAR=2,FUN=mean,na.rm=TRUE)
-						for (iii in 1:dim(GGG)[2]) GGG[is.na(GGG[,iii]),iii] <- mn[iii]
-					} else if (na.SNP == "drop") {
-						#stop("na.SNP != 'impute' and nocalls > 0; 'drop' not yet implemented")
-						nomiss <- complete.cases(GGG)
-						finaldata$Y <- finaldata$Y[nomiss]
-						finaldata$X <- finaldata$X[nomiss,]
-						GGG <- GGG[nomiss,]
-					} else {
-						stop(paste("na.SNP argument not recognised:",na.SNP))
-					}
-				}
-				XXX <- finaldata$X
-				for (i in which(finaldata$snpinvolved==1)) XXX[,i] <- XXX[,i]*GGG[,1]
-				for (i in which(finaldata$snpinvolved==2)) XXX[,i] <- XXX[,i]*GGG[,2]
-				if (singular == "drop") {
-					# identify collinear and mono columns
-					# probably use gsl_linalg_QRPT_decomp? or other QR with pivoting?
-				} else {
-					current_df <- fixed_df
-				} 
-				#print(c("NOW SNP",csnp))
-				#print(cbind(mydata$Y,XXX))
-				mgls <- FGLS(Y=mydata$Y,X=XXX,W=W,test=test,whichtest=(finaldata$snpinvolved>0))
-				out$FGLS[k,] <- summaryFGLS(mgls,verbosity=verbosity,varcov=varcov,include.means=include.means)
-				#out$FGLS[k,] <- c(mgls$beta,sqrt(diag(mgls$V)),mgls$T2,pchisq(mgls$T2,df=current_df,low=F))
-				#print(mgls$beta)
-				#print(pchisq(mgls$T2,df=sum(finaldata$snpinvolved>0),low=F))
-				#print(det(t(XXX)%*%XXX))
-				#print(rankMatrix(XXX))
-				#print(qr(XXX))
-				#print(svd(t(XXX)%*%XXX))
-				if (with.lm) {
-					lm0 <- lm(mydata$Y ~ mydata$Xx)
-					lm1 <- lm(mydata$Y ~ 0+XXX)
-					#print(summary(lm1))
-					#print(anova(lm0,lm1,test="Chisq"))
-					out$LM[k] <- anova(lm0,lm1,test="Chisq")$"P(>|Chi|)"[2]
-				}
-			} else {
-				out$FGLS[k,] <- rep(NA,dim(out$FGLS[2]))
-				if (with.lm) out$LM[k] <- NA
-				warning(paste("call < mincall; skipping SNP",csnp))
-			}
-			
-			k <- k + 1
-		}
-	} else {
-		# new C++ procedure
-		if (class(genodata) == "snp.data") {
-			gtNrow <- dim(genodata)[1]
-			gtNcol <- dim(genodata)[2]
-			genodata <- genodata at gtps
-		} else if (class(genodata) == "matrix") {
-			gtNrow <- dim(genodata)[1]
-			gtNcol <- dim(genodata)[2]
-			storage.mode(genodata) <- "double"
-		} else if (class(genodata)=="databel") {
-			gtNrow <- dim(genodata)[1]
-			gtNcol <- dim(genodata)[2]
-			genodata <- genodata at data
-		} else {
-			stop(paste("genodata class not recognised ('",class(genodata),"')",sep=""))
-		}
-		
-		# check NaNs and create gsl_matrix data
-		if (any(is.na(finaldata$Y))) stop("NANs not allowed in Y")
-		if (any(is.na(finaldata$X))) stop("NANs not allowed in X")
-		if (!is.null(tXW.fixed)) if (any(is.na(tXW.fixed))) stop("NANs not allowed in tXW.fixed")
-		if (!is.null(W)) if (any(is.na(W))) stop("NANs not allowed in W")
-		SEXP_ptr_to_gsl_Y <- .Call("gslVector_from_SEXP",finaldata$Y)
-		SEXP_ptr_to_gsl_X <- .Call("gslMatrix_from_SEXP",finaldata$X)
-		if (!is.null(tXW.fixed)) 
-		{SEXP_ptr_to_gsl_tXW_fixed <- .Call("gslMatrix_from_SEXP",tXW.fixed);} 
-		else {SEXP_ptr_to_gsl_tXW_fixed <- NULL}
-		if (!is.null(W)) {SEXP_ptr_to_gsl_W <- .Call("gslMatrix_from_SEXP",W);} else {SEXP_ptr_to_gsl_W <- NULL;}
-		
-		#mgls <- FGLS(Y=finaldata$Y,X=finaldata$X,whichtest=(finaldata$snpinvolved>0))
-		#print("before fake_iterator")
-		#print(finaldata$X)
-		resIteratorNew <- .Call("iterator", genodata, 
-				as.integer(gtNrow), as.integer(gtNcol),
-				as.character("fgls"),
-				"R", as.integer(2), 
-				as.integer(step), # STEP 
-				as.integer(10), 
-				SEXP_ptr_to_gsl_Y, SEXP_ptr_to_gsl_X,
-				SEXP_ptr_to_gsl_tXW_fixed, SEXP_ptr_to_gsl_W,
-				as.integer(intstattests),
-				as.integer(finaldata$snpinvolved),
-				as.integer(which(model.SNP==genomodel)),
-				as.integer(which(gtcoding==charcoding)),
-				as.double(mincall),
-				as.double(sigma2.score))
-		#if (!is(resIteratorNew,"matrix")) resIteratorNew <- matrix(resIteratorNew,nrow=1)
-		#print("after fake_iterator")
-		mgls <- FGLS(Y=finaldata$Y,X=finaldata$X,whichtest=(finaldata$snpinvolved>0))
-		smr <- summaryFGLS(mgls,verbosity=2,varcov=TRUE,include.means=TRUE)	
-		#print(smr)
-		#print(resIteratorNew)
-		outdf <- sum(finaldata$snpinvolved != 0)
-		outN <- dim(finaldata$X)[1]
-		nBeta <- dim(finaldata$X)[2]
-		tmpMtr <- matrix(NA,ncol=nBeta,nrow=nBeta)
-		tmpMtr[lower.tri(tmpMtr,diag=T)] <- 1:(nBeta*(nBeta+1)/2)
-		varSeq <- diag(tmpMtr)
-		rIsize <- dim(resIteratorNew)[2]
-		#print(rIsize)
-		vcoSeq <- as.vector(tmpMtr[lower.tri(tmpMtr)])
-		#print("before cbind")
-		out$FGLS <- cbind(outN,resIteratorNew[,1,drop=FALSE],
-				outdf,pchisq(resIteratorNew[,1,drop=FALSE],df=outdf,low=FALSE),
-				resIteratorNew[,c(2:(1+nBeta)),drop=FALSE],
-				sqrt(resIteratorNew[,c((1+nBeta)+varSeq),drop=FALSE]),
-				resIteratorNew[,c((rIsize-nBeta+1):rIsize),drop=FALSE],
-				resIteratorNew[,c(1+nBeta+vcoSeq),drop=FALSE]
-		)
-		#if (!is(out$FGLS,"matrix")) out$FGLS <- matrix(out$FGLS,nrow=1)
-		#print(dim(out$FGLS))
-		#print(out$FGLS)
-		#print(smr)
-		dimnames(out$FGLS)[[2]] <- names(smr)
-		smr <- summaryFGLS(mgls,verbosity=verbosity,varcov=varcov,include.means=include.means)
-		out$FGLS <- out$FGLS[,names(smr),drop=FALSE]
-		#print(out$FGLS)
-		dimnames(out$FGLS)[[1]] <- outsnpnames
-		out$FGLS[which(out$FGLS[,"Chisq"]<(-0.1)),"P-value"] <- NA
-		#print(out$FGLS)
-		#print("before gc")
-		gc();
-		#print("after gc")
-	}
-	
-	if (with.lm) return(out)
-	else return(out$FGLS)
-} 
+
+
+        out <- list()
+        snpseq <- seq(1,dim(genodata)[2],step)
+        outsnpnames <- c(dimnames(genodata)[[2]][snpseq])
[TRUNCATED]

To get the complete diff run:
    svnlook diff /svnroot/genabel -r 1791


More information about the Genabel-commits mailing list