[Genabel-commits] r2023 - in pkg: . RepeatABEL RepeatABEL/R RepeatABEL/data RepeatABEL/inst RepeatABEL/inst/doc RepeatABEL/man RepeatABEL/vignettes RepeatABEL/vignettes/marginnote
noreply at r-forge.r-project.org
noreply at r-forge.r-project.org
Fri Nov 20 12:56:57 CET 2015
Author: larsronn
Date: 2015-11-20 12:56:57 +0100 (Fri, 20 Nov 2015)
New Revision: 2023
Added:
pkg/RepeatABEL/
pkg/RepeatABEL/DESCRIPTION
pkg/RepeatABEL/NAMESPACE
pkg/RepeatABEL/R/
pkg/RepeatABEL/R/Create_gwaa_scan.R
pkg/RepeatABEL/R/SmoothSNPmatrix.R
pkg/RepeatABEL/R/compute.GRM.R
pkg/RepeatABEL/R/constructV.R
pkg/RepeatABEL/R/preFitModel.R
pkg/RepeatABEL/R/rGLS.R
pkg/RepeatABEL/R/simulate_PhenData.R
pkg/RepeatABEL/data/
pkg/RepeatABEL/data/Phen.Data.rda
pkg/RepeatABEL/data/datalist
pkg/RepeatABEL/data/flycatchers.rda
pkg/RepeatABEL/data/gen.data.rda
pkg/RepeatABEL/data/thinned.flycatchers.rda
pkg/RepeatABEL/inst/
pkg/RepeatABEL/inst/CITATION
pkg/RepeatABEL/inst/doc/
pkg/RepeatABEL/inst/doc/RepeatABEL_vignette.pdf
pkg/RepeatABEL/man/
pkg/RepeatABEL/man/.Rapp.history
pkg/RepeatABEL/man/Create_gwaa_scan.Rd
pkg/RepeatABEL/man/Phen.Data.Rd
pkg/RepeatABEL/man/RepeatABEL-package.Rd
pkg/RepeatABEL/man/SmoothSNPmatrix.Rd
pkg/RepeatABEL/man/compute.GRM.Rd
pkg/RepeatABEL/man/constructV.Rd
pkg/RepeatABEL/man/flycatchers.Rd
pkg/RepeatABEL/man/gen.data.Rd
pkg/RepeatABEL/man/preFitModel.Rd
pkg/RepeatABEL/man/rGLS.Rd
pkg/RepeatABEL/man/simulate_PhenData.Rd
pkg/RepeatABEL/man/thinned.flycatchers.Rd
pkg/RepeatABEL/vignettes/
pkg/RepeatABEL/vignettes/.Rhistory
pkg/RepeatABEL/vignettes/Figure_Manhattan.pdf
pkg/RepeatABEL/vignettes/RepeatABEL_vignette.Rnw
pkg/RepeatABEL/vignettes/marginnote/
pkg/RepeatABEL/vignettes/marginnote/README
pkg/RepeatABEL/vignettes/marginnote/marginnote.dtx
pkg/RepeatABEL/vignettes/marginnote/marginnote.ins
pkg/RepeatABEL/vignettes/marginnote/marginnote.pdf
pkg/RepeatABEL/vignettes/marginnote/marginnote.sty
Log:
RepeatABEL package uploaded
Added: pkg/RepeatABEL/DESCRIPTION
===================================================================
--- pkg/RepeatABEL/DESCRIPTION (rev 0)
+++ pkg/RepeatABEL/DESCRIPTION 2015-11-20 11:56:57 UTC (rev 2023)
@@ -0,0 +1,11 @@
+Package: RepeatABEL
+Type: Package
+Title: GWAS for Multiple Observations on Related Individuals
+Version: 1.0
+Date: 2015-11-03
+Author: Lars Ronnegard
+Maintainer: Lars Ronnegard <lrn at du.se>
+Description: The RepeatABEL package is used to perform genome-wide association studies on individuals that are both related and have repeated measurements.
+License: GPL
+Depends: R (>= 2.10), hglm, GenABEL
+
Added: pkg/RepeatABEL/NAMESPACE
===================================================================
--- pkg/RepeatABEL/NAMESPACE (rev 0)
+++ pkg/RepeatABEL/NAMESPACE 2015-11-20 11:56:57 UTC (rev 2023)
@@ -0,0 +1,4 @@
+exportPattern("^[^\\.]")
+import("hglm","GenABEL")
+
+
Added: pkg/RepeatABEL/R/Create_gwaa_scan.R
===================================================================
--- pkg/RepeatABEL/R/Create_gwaa_scan.R (rev 0)
+++ pkg/RepeatABEL/R/Create_gwaa_scan.R 2015-11-20 11:56:57 UTC (rev 2023)
@@ -0,0 +1,22 @@
+#' @title Creates a scan.gwaa object
+#'
+#' @description
+#' Creates a scan.gwaa object from a GenABEL object and p-values.
+#'
+#' @param data A gwaa.data object
+#' @param P1df P-values computed from external analysis
+#' @param SNP.eff Estimated additive SNP effects
+#'
+#' @author Lars Ronnegard
+#'
+Create_gwaa_scan <- function(data, P1df, SNP.eff) {
+ chi2.1df <- qchisq(P1df, df=1, lower.tail=FALSE)
+ lambda <- try( estlambda(chi2.1df), silent=TRUE )
+ se_effB <- abs(SNP.eff)/qnorm(P1df/2, lower.tail=FALSE)
+ if ( class(lambda) == "try-error" ) lambda <- list( estimate = NA, se = NA )
+ results <- data.frame(effB = SNP.eff, se_effB = se_effB, chi2.1df = NA, P1df = P1df, Pc1df = NA, stringsAsFactors = FALSE)
+ rownames(results) = snpnames(data)
+ out <- new("scan.gwaa", results = results, annotation = annotation(data), lambda = lambda, idnames = idnames(data), call = as.call( list("rGLS") ), family = "gaussian")
+ return(out)
+}
+
Added: pkg/RepeatABEL/R/SmoothSNPmatrix.R
===================================================================
--- pkg/RepeatABEL/R/SmoothSNPmatrix.R (rev 0)
+++ pkg/RepeatABEL/R/SmoothSNPmatrix.R 2015-11-20 11:56:57 UTC (rev 2023)
@@ -0,0 +1,19 @@
+#' @title Imputes column means to missing genotypes
+#'
+#' @description
+#' Imputes column means to missing genotypes.
+#'
+#' @param SNP A matrix including SNP coding.
+#'
+#' @author Lars Ronnegard
+#'
+SmoothSNPmatrix <-
+function(SNP) {
+ #Imputes column means to missing values
+ m <- ncol(SNP)
+ miss <- is.na(colMeans(SNP))
+ for (i in (1:m)[miss]){
+ SNP[is.na(SNP[,i]), i] <- mean( SNP[!is.na(SNP[ , i]), i] )
+ }
+ return(SNP)
+}
Added: pkg/RepeatABEL/R/compute.GRM.R
===================================================================
--- pkg/RepeatABEL/R/compute.GRM.R (rev 0)
+++ pkg/RepeatABEL/R/compute.GRM.R 2015-11-20 11:56:57 UTC (rev 2023)
@@ -0,0 +1,28 @@
+#' @title Computes a Genetic Relationship Matrix from a GenABEL object
+#'
+#' @description
+#' Two alternative methods for GRM computations implemented.
+#'
+#' @param gen.data The GenABEL object.
+#' @param method Method to be used.
+#'
+#' @author Lars Ronnegard
+#'
+compute.GRM <-
+function(gen.data, method = "GenABEL") {
+ if (method == "GenABEL"){
+ gkins <- ibs(gen.data, weight = "freq")
+ GRM <- gkins
+ GRM[t(lower.tri(gkins))] <- 0
+ rm(gkins)
+ GRM <- 2*(GRM + t(GRM) - diag(diag(GRM)))
+ }
+ if (method == "ZZt"){
+ SNP.matrix <- scale(as.double(gen.data))
+ m <- ncol(SNP.matrix)
+ markers.to.fit <- (1:m)[!is.na(colMeans(SNP.matrix))]
+ GRM <- tcrossprod(SNP.matrix[,markers.to.fit])/length(markers.to.fit)
+ rm(SNP.matrix)
+ }
+ return(GRM)
+}
Added: pkg/RepeatABEL/R/constructV.R
===================================================================
--- pkg/RepeatABEL/R/constructV.R (rev 0)
+++ pkg/RepeatABEL/R/constructV.R 2015-11-20 11:56:57 UTC (rev 2023)
@@ -0,0 +1,22 @@
+#' @title Constructs the (co)variance matrix for y
+#'
+#' @description
+#' Constructs the (co)variance matrix for y.
+#'
+#' @param Z The incidence matrix for the random effects column binded with the Cholesky of the GRM
+#' @param RandC The number of columns in the two matrices combined in Z.
+#' @param ratio The ratios between random effect variances and the residual variance.
+#'
+#' @author Lars Ronnegard
+#'
+constructV <-
+function(Z, RandC, ratio) {
+ V <- diag(nrow(Z))
+ indx <- 1
+ for (i in 1:length(ratio)) {
+ Z.tmp <- Z[,indx:(indx + RandC[i] - 1)]
+ indx = indx + RandC[i]
+ V <- V + tcrossprod(Z.tmp) * ratio[i]
+ }
+ return(V)
+}
Added: pkg/RepeatABEL/R/preFitModel.R
===================================================================
--- pkg/RepeatABEL/R/preFitModel.R (rev 0)
+++ pkg/RepeatABEL/R/preFitModel.R 2015-11-20 11:56:57 UTC (rev 2023)
@@ -0,0 +1,175 @@
+#' @title Fits a linear mixed model (without fixed SNP effects) and computes the fitted variance-covariance matrix for later use in the rGLS function.
+#'
+#' @description Uses a GenABEL object and phenotype data as input. The model is fitted using the \code{hglm} function in the hglm package.
+#'
+#' @param fixed A formula including the response and fixed effects
+#' @param random A formula for the random effects
+#' @param id.name The column name of the IDs in phen.data
+#' @param genabel.data An GenABEL object including marker information. This object has one observation per individual.
+#' @param phenotype.data A data frame including the repeated observations and IDs.
+#' @param corStruc A list specifying the correlation structure for each random effect. The options are: \code{"Ind"} for iid random effects, \code{"GRM"} for a correlation structure given by a genetic relationship matrix, or \code{"CAR"} for a spatial correlation structure given by a Conditional Autoregressive model specified by a neighborhood matrix.
+#' @param GRM A genetic relationship matrix. If not specified whilst the \code{"GRM"} option is given for \code{corStruc} then the GRM is computed internally within the function.
+#' @param Neighbor.Matrix A neighborhood matrix having non-zero value for an element (i,j) where the observations i and j come from neighboring locations. The diagonal elements should be zero.
+#' @return Returns a list including the fitted hglm object \code{fitted.hglm}, the variance-covariance matrix \code{V} and the ratios between estimated variance components for the random effects divided by the residual variance, \code{ratio}.
+#' @author Lars Ronnegard
+#'
+#' @examples
+#' ####### FIRST EXAMPLE USING GRM #############
+#' data(Phen.Data) #Phenotype data with repeated observations
+#' data(gen.data) #GenABEL object including IDs and marker genotypes
+#' GWAS1 <- rGLS(y ~ age + sex, genabel.data = gen.data, phenotype.data = Phen.Data)
+#' plot(GWAS1, main="")
+#' summary(GWAS1)
+#' #Summary for variance component estimation without SNP effects
+#' summary(GWAS1@@call$hglm)
+#' #The same results can be computed using the preFitModel as follows
+#' fixed = y ~ age + sex
+#' Mod1 <- preFitModel(fixed, random=~1|id, genabel.data = gen.data,
+#' phenotype.data = Phen.Data, corStruc=list( id=list("GRM","Ind") ))
+#' GWAS1b <- rGLS(fixed, genabel.data = gen.data,
+#' phenotype.data = Phen.Data, V = Mod1$V)
+#' plot(GWAS1b, main="Results using the preFitModel function")
+#' ####### SECOND EXAMPLE USING CAR #############
+#' # Add a fake nest variable to the data just to run the example
+#' #In this example there are 6 nests and 60 observations per nest
+#' Phen.Data$nest <- rep(1:6, each=60)
+#' #A model including polygenic effects, permanent environmental effects,
+#' #and nest effect as random
+#' Mod2 <- preFitModel(fixed, random=~1|id + 1|nest, genabel.data = gen.data,
+#' phenotype.data = Phen.Data, corStruc=list( id=list("GRM","Ind"), nest=list("Ind")) )
+#' GWAS2 <- rGLS(fixed, genabel.data = gen.data, phenotype.data = Phen.Data, V = Mod2$V)
+#' plot(GWAS2)
+#' #Similar to previous plot because the nest effect variance component is almost 0.
+#' ###################
+#' #Construct a fake nighbourhood matrix
+#' D = matrix(0,6,6)
+#' D[1,2] = D[2,1] = 1
+#' D[5,6] = D[6,5] = 1
+#' D[2,4] = D[4,2] = 1
+#' D[3,5] = D[5,3] = 1
+#' D[1,6] = D[6,1] = 1
+#' D[3,4] = D[4,3] = 1
+#' #The matrix shows which pair of nests that can be considered as neighbours
+#' image(Matrix(D), main="Neighbourhood matrix")
+#' Mod3 <- preFitModel(fixed, random=~1|id + 1|nest, genabel.data = gen.data,
+#' phenotype.data = Phen.Data, corStruc=list( id=list("GRM","Ind"),
+#' nest=list("CAR")), Neighbor.Matrix=D )
+#' GWAS2b <- rGLS(fixed, genabel.data = gen.data,
+#' phenotype.data = Phen.Data, V = Mod3$V)
+#' plot(GWAS2b)
+#'
+preFitModel <- function(fixed = y~1, random = ~1|id, id.name = "id", genabel.data, phenotype.data, corStruc = NULL, GRM = NULL, Neighbor.Matrix = NULL) {
+ if (!inherits(fixed, "formula") || length(fixed) != 3) stop("\n Fixed-effects model must be a formula of the form \"resp ~ pred\"")
+ if (!inherits(random, "formula")) stop("\n Random part must be a one-sided formula of the form \" ~ effect|Cluster\"")
+ if (attr(terms(random), "response") != 0) stop("\n Random part must be a one-sided formula of the form \" ~ effect|Cluster\"")
+ if (all.names(random)[2] != "|") stop("The subjects/clusters in Random must be separated by \" |\"")
+ if (class(genabel.data)!="gwaa.data") stop("The input of genabel.data is not a GenABEL object")
+ if (is.null(genabel.data at phdata$id)) stop("IDs not given as id in the phdata list")
+ #Get trait name
+ trait <- all.vars(fixed)[1]
+ #Remove NAs from phenotypic data
+ y.all <- phenotype.data[,names(phenotype.data) %in% trait]
+ phenotype.data <- phenotype.data[!is.na(y.all),]
+ #Connect IDs in GenABEL data set with IDs in the phenotype file
+ id1 <- phenotype.data[,names(phenotype.data) %in% id.name] #ID for phenotype data
+ id2 <- genabel.data at phdata$id #ID for genotype data
+ test1 <- id1 %in% id2
+ test2 <- id2 %in% id1
+ genabel.data = genabel.data[test2, ] #Exclude individuals having no phenotype information
+ phenotype.data = phenotype.data[test1, ] #Exclude individuals having no genotype information
+ id1 <- phenotype.data[,names(phenotype.data) %in% id.name] #ID for phenotype data for cleaned data
+ id2 <- genabel.data at phdata$id #ID for genotype data for cleaned data
+ ### Create design matrix for the fixed effects ###
+ Y <- phenotype.data[,names(phenotype.data) %in% trait]
+ X <- model.matrix(fixed, data = phenotype.data)
+ if (nrow(X) != length(Y)) stop("Remove all lines with NA from the phenotype input data before running this function.")
+ row.names(X) <- NULL
+ ###############################################
+ ## Create design matrix for random effects ####
+ ###############################################
+ #Construct incidence matrix for repeated observations
+ N <- length(id2)
+ n <- length(id1)
+ indx <- numeric(n)
+ for (i in 1:N) {
+ indx <- indx + i * (id1 %in% id2[i])
+ }
+ ### Get SQRT of GRM first
+ if (is.null(GRM)) {
+ autosomalMarkers <- which(chromosome(genabel.data) != "X")
+ GRM <- compute.GRM(genabel.data[,snpnames(genabel.data)[autosomalMarkers]])
+ }
+ eig <- eigen(GRM)
+ if (max(diag(GRM))>1.6) print("There seems to be highly inbred individuals in your data")
+ if (min(eig$values) < -0.5 ) print("The genetic relationship matrix is far from positive definite")
+ non_zero.eigenvalues = eig$values>(1e-6) #Put numerically small eigenvalues to zero
+ eig$values[!non_zero.eigenvalues] <- 0
+ print("GRM ready")
+ Z.GRM <- ( eig$vectors %*% diag(sqrt(eig$values)) )[indx,]
+ ##Create Z
+ RanTermVec <- unlist(strsplit(attr(terms(random), "term.labels"), split = c("+"), fixed = TRUE))
+ k.rand <- length(RanTermVec)
+ Z <- NULL
+ RandC <- NULL
+ data <- phenotype.data
+ for (i in 1:k.rand) {
+ RanTerm <- unlist(strsplit(RanTermVec[i], split = "|", fixed = TRUE))
+ RanTerm[2] <- gsub(" ", "", RanTerm[2], fixed = TRUE)
+ ranf <- paste("~", "as.factor(", RanTerm[2], ")", "-1", sep = "")
+ ranf <- as.formula(ranf)
+ rmf <- model.frame(ranf, data)
+ z <- model.matrix(attr(rmf, "terms"), data = rmf)
+ if (nrow(z) != length(Y)) stop("Remove all lines with NA from the phenotype input data before running this function.")
+ row.names(z) <- NULL
+ if (is.null(corStruc)) {
+ RandC <- c(RandC, ncol(z))
+ Z <- cbind(Z, z)
+ }else{
+ cs <- unlist( corStruc[names(corStruc) %in% RanTerm[2]] )
+ for (j in 1:length(cs)) {
+ zL <- switch(cs[j],
+ "Ind" = z,
+ "GRM" = Z.GRM,
+ "CAR" = z,
+ stop("Input correlation structure not available")
+ )
+
+ RandC <- c(RandC, ncol(zL))
+ Z <- cbind(Z, zL)
+ j.rand <- length(RandC)
+ if (cs[j]=="CAR") {
+ if ( is.null(Neighbor.Matrix) ) stop("CAR correlation requires a Neighbor.Matrix to be specified")
+ if ( ncol(z) != ncol(Neighbor.Matrix) ) stop("Incorrect dimension of Neighbor.Matrix")
+ if ( j.rand == 1) {
+ rand.family = list( CAR(D=Neighbor.Matrix) )
+ } else {
+ rand.family[[j.rand]] = CAR(D=Neighbor.Matrix)
+ }
+ } else {
+ if (j.rand == 1) rand.family = list(gaussian())
+ if (j.rand > 1) rand.family[[j.rand]] = gaussian()
+ }
+ }
+ }
+ }
+ #####################################
+ ### Fit the model
+ if (length(RandC) > 1) mod1 <- hglm(y=Y, X=X, Z=Z, RandC=RandC, maxit=200, rand.family=rand.family)
+ if (length(RandC) == 1) mod1 <- hglm(y=Y, X=X, Z=Z, RandC=RandC, maxit=200, rand.family=rand.family[[1]])
+ ratio <- mod1$varRanef/mod1$varFix
+ #stop("The constructV function needs to be changed if CAR model is to be used")
+ #V <- constructV(Z, RandC, ratio)
+ V = diag(nrow(Z))
+ indx = 1
+ for (i in 1:length(ratio)) {
+ Z.tmp <- Z[, indx:(indx + RandC[i] - 1)]
+ indx = indx + RandC[i]
+ if (rand.family[[i]]$family != "CAR") {
+ V <- V + tcrossprod(Z.tmp) * ratio[i]
+ } else {
+ ratio[i] <- mod1$CAR.tau/mod1$varFix
+ V <- V + Z.tmp %*% solve( diag( ncol(Z.tmp) ) - mod1$CAR.rho * Neighbor.Matrix ) %*% t(Z.tmp) * ratio[i]
+ }
+ }
+ list( fitted.hglm = mod1, V = V, ratio = ratio)
+}
Added: pkg/RepeatABEL/R/rGLS.R
===================================================================
--- pkg/RepeatABEL/R/rGLS.R (rev 0)
+++ pkg/RepeatABEL/R/rGLS.R 2015-11-20 11:56:57 UTC (rev 2023)
@@ -0,0 +1,155 @@
+#' @title GWAS for Studies having Repeated Measurements on Related Individuals
+#'
+#' @description
+#' It is used to perform genome-wide association studies on individuals that are both related and have repeated measurements.
+#' The function computes score statistic based p-values for a linear mixed model including random polygenic effects and
+#' a random effect for repeated measurements. A p-value is computed for each marker and the null hypothesis tested is a
+#' zero additive marker effect.
+#'
+#' @param formula.FixedEffects Formula including the response variable and cofactors as fixed effects.
+#' @param genabel.data An GenABEL object including marker information. This object has one observtion per individuals.
+#' @param phenotype.data A data frame including the repeated observations and IDs.
+#' @param id.name The column name of the IDs in phen.data
+#' @param GRM An optional genetic relationship matrix (GRM) can be included as input. Otherwise the GRM is computed within the function.
+#' @param V An optional (co)variance matrix can be included as input. Otherwise it is computed using the hglm function.
+#' @param memory Used to optimize computations. The maximum number of elements in a matrix that can be stored efficiently.
+#' @details
+#' A generalized squares (GLS) is fitted for each marker given a (co)variance matrix V.
+#' The computations are made fast by transforming the GLS to
+#' an ordinary least-squares (OLS) problem using an eigen-decomposition of V.
+#' The OLS are computed using QR-factorization. If V is not specified then a model
+#' including random polygenic effects and permanent environmental effects is
+#' fitted (using the hglm package) to compute V. A GenABEL object (scan.gwaa class)
+#' is returned (including also the \code{hglm} results).
+#' Let e.g. GWAS1 be an object returned by the \code{rGLS} function.
+#' Then a Manhattan plot can be produced by calling \code{plot(GWAS1)} and
+#' the top SNPs using \code{summary(GWAS1)}. Both of these functions are
+#' generic GenABEL functions. \cr
+#' The results from the fitted linear mixed model without any SNP effect included
+#' are produced by calling \code{summary(GWAS1@@call$hglm)}.
+#'
+#' @author Lars Ronnegard
+#'
+#'
+#' @examples
+#' data(Phen.Data) #Phenotype data with repeated observations
+#' data(gen.data) #GenABEL object including IDs and marker genotypes
+#' GWAS1 <- rGLS(y ~ age + sex, genabel.data = gen.data, phenotype.data = Phen.Data)
+#' plot(GWAS1, main="")
+#' summary(GWAS1)
+#' #Summary for variance component estimation without SNP effects
+#' summary(GWAS1@@call$hglm)
+#'
+rGLS <-
+function(formula.FixedEffects = y ~ 1, genabel.data, phenotype.data, id.name = "id", GRM = NULL, V = NULL, memory=1e8) {
+ #Check input data
+ if (class(genabel.data) != "gwaa.data") stop("The input of genabel.data is not a GenABEL object")
+ if (is.null(genabel.data at phdata$id)) stop("IDs not given as id in the phdata list")
+ if (!is.null(GRM)) { if(!isSymmetric(GRM)) warning("The given GRM must be a symmetric matrix!") }
+ if (!is.null(V)) { if(!isSymmetric(V)) warning("The given V must be a symmetric matrix!") }
+ V.input <- V
+ #require(hglm)
+ #require(GenABEL)
+ #Get trait name
+ trait <- all.vars(formula.FixedEffects)[1]
+ #Remove NAs from phenotypic data
+ y.all <- phenotype.data[,names(phenotype.data)%in%trait]
+ phenotype.data <- phenotype.data[!is.na(y.all),]
+ #Connect IDs in GenABEL data set with IDs in the phenotype file
+ id1 <- phenotype.data[,names(phenotype.data) %in% id.name] #ID for phenotype data
+ id2 <- genabel.data at phdata$id #ID for genotype data
+ test1 <- id1 %in% id2
+ test2 <- id2 %in% id1
+ genabel.data <- genabel.data[test2,] #Exclude individuals having no phenotype information
+ phenotype.data <- phenotype.data[test1,] #Exclude individuals having no genotype information
+ id1 <- phenotype.data[,names(phenotype.data) %in% id.name] #ID for phenotype data for cleaned data
+ id2 <- genabel.data at phdata$id #ID for genotype data for cleaned data
+ #####################
+ #Construct incidence matrix for repeated observations
+ N=length(id2)
+ n=length(id1)
+ indx <- numeric(n)
+ for (i in 1:N) {
+ indx <- indx + i * (id1 %in% id2[i])
+ }
+ Z.indx <- diag(N)[indx,]
+ #Construct response and design matrix
+ y <- phenotype.data[ , names(phenotype.data) %in% trait] #Create the response variable
+ X <- model.matrix(formula.FixedEffects, data = phenotype.data) #Fixed effect design matrix
+ #####################
+ if (is.null(V)) {
+ #Construct GRM
+ if (is.null(GRM)) {
+ autosomalMarkers <- which(chromosome(genabel.data)!= "X")
+ GRM <- compute.GRM(genabel.data[ , snpnames(genabel.data)[autosomalMarkers]])
+ }
+ eig <- eigen(GRM)
+ if (max(diag(GRM)) > 1.6) print("There seems to be highly inbred individuals in your data")
+ if (min(eig$values < -0.5)) print("The genetic relationship matrix is far from positive definite")
+ non_zero.eigenvalues <- eig$values>(1e-6) #Put numerically small eigenvalues to zero
+ eig$values[ !non_zero.eigenvalues ] <- 0
+ print("GRM ready")
+ #####################
+ #Fit hglm
+ Z.GRM <- ( eig$vectors %*% diag(sqrt(eig$values)) )[indx, ]
+ Z <- (cbind(Z.GRM, Z.indx))
+ mod1 <- hglm(y=y, X=X, Z=Z, RandC = c(ncol(Z.GRM), ncol(Z.indx)), maxit = 200)
+ if (mod1$Converge != "converged") stop("The variance component estimation did not converge in 200 iterations. Try to estimate them separately and provide the estimated (co)variance matrix V as input. \n\n")
+ print("Variance component estimation ready")
+ #####################
+ #Construct rotation matrix
+ ratio <- mod1$varRanef/mod1$varFix
+ V <- constructV(Z=Z, RandC = c(ncol(Z.GRM), ncol(Z.indx)), ratio)
+ }
+ eig.V <- eigen(V)
+ transf.matrix <- diag(1/sqrt(eig.V$values)) %*% t(eig.V$vectors)
+ y.new <- transf.matrix %*% y
+ X.new <- transf.matrix %*% X
+ print("Rotation matrix ready")
+ #####################
+ #Fit a linear model for each SNP
+ SNP.matrix <- as.double(genabel.data)
+ if (sum(is.na(SNP.matrix)) > 0) {
+ SNP.matrix <- SmoothSNPmatrix(SNP.matrix)
+ }
+ m <- ncol(SNP.matrix)
+ p.val <- SNP.est <- rep(1, m)
+ colnames(X.new) <- as.character(1:ncol(X.new)) #To avoid columns having strange names
+ print("Rotate LMM started")
+ #Fit using QR factorization
+ #Null model
+ qr0 <- qr(X.new)
+ est0 <- qr.coef(qr0, y.new)
+ res <- y.new - X.new %*% est0
+ n <- length(y.new)
+ RSS.0 <- sum(res^2)/n
+ #Split computations into reasonably sized blocks
+ if (memory < n) memory <- n
+ step.size <- floor(memory/n)
+ steps <- ceiling(m/step.size)
+ jj=1
+ kk=0
+ for (step.i in 1:steps) {
+ if (step.i == steps) kk <- kk + m %% step.size else kk <- kk + step.size
+ markers.to.fit <- jj:kk
+ snp.new <- transf.matrix %*% SNP.matrix[indx, markers.to.fit]
+ mm <- 0
+ for (j in markers.to.fit) {
+ mm <- mm+1
+ X1 <- cbind(snp.new[, mm], X.new)
+ qr1 <- qr(X1)
+ est1 <- qr.coef(qr1, y.new)
+ res <- y.new - X1 %*% est1
+ RSS.1 <- sum(res^2)/n
+ SNP.est[j] <- est1[1]
+ LRT <- -n * (log(RSS.1) - log(RSS.0))
+ p.val[j] <- 1 - pchisq(LRT, df=1)
+ }
+ jj <- jj + step.size
+ }
+ print("Rotate LMM ready")
+ #####################
+ qt.results <- Create_gwaa_scan(genabel.data, p.val, SNP.est)
+ if (is.null(V.input)) qt.results at call$hglm <- mod1
+ return(qt.results)
+}
Added: pkg/RepeatABEL/R/simulate_PhenData.R
===================================================================
--- pkg/RepeatABEL/R/simulate_PhenData.R (rev 0)
+++ pkg/RepeatABEL/R/simulate_PhenData.R 2015-11-20 11:56:57 UTC (rev 2023)
@@ -0,0 +1,88 @@
+#' @title Simulation function for the RepeatABEL package.
+#'
+#' @description The function takes a GenABEL object as input and generates simulated phenotypic values for related individuals having repeated obserevations.
+#' @param formula.FixedEffects A formula including the name of the simulated variable as response, and cofactors as fixed effects.
+#' @param genabel.data A GenABEL object of class gwaa.data.
+#' @param n.obs A vector including the number of observations per individual. The length of n.obs must be equal to the number if individuals in genabel.data.
+#' @param SNP.eff The size of a simulated SNP.effect.
+#' @param SNP.nr The SNP genotype that the SNP effect is simulated on. SNP.nr=i is the i:th SNP.
+#' @param beta The simulated fixed effects. Must be equal to the number of cofactors simulated (including the intercept term).
+#' @param VC A vector of length 3 including the simulated variances of the polygenic effect, permanent environmental effect and residuals, respectively.
+#' @param GRM An optional input where the Genetic Relationship Matrix can be given. Otherwise it is computed using the GenABEL package.
+#' @param sim.gamma A logical parameter specifying whether the residuals shuld be simulated from a gamma distribution or not. If specified as TRUE then residuals are drawn from a gamma distribution with variance equal to the residual variance specified in \code{VC[3]}
+
+#' @return Returns a data frame including the simulated phenotypic values, cofactors and IDs.
+#' @author Lars Ronnegard
+
+#' @examples
+#' data(gen.data)
+#' #Simulate 4 observations per individual
+#' set.seed(1234)
+#' Phen.Sim <- simulate_PhenData(y ~ age, genabel.data=gen.data,
+#' n.obs=rep(4, nids(gen.data)), SNP.eff=1, SNP.nr=1000, VC=c(1,1,1))
+#' GWAS1 <- rGLS(y ~ age, genabel.data = gen.data, phenotype.data = Phen.Sim)
+#' plot(GWAS1, main="Simulated Data Results")
+#'
+simulate_PhenData <-
+function(formula.FixedEffects = y~1, genabel.data, n.obs, SNP.eff = NULL, SNP.nr = NULL, beta = NULL, VC = c(1,1,1), GRM = NULL, sim.gamma = FALSE){
+ #library(GenABEL)
+ if (length(SNP.eff) != length(SNP.nr)) stop("The number of elements in SNP.eff and SNP.nr must be the same")
+ if (is.null(SNP.eff)) {
+ SNP.eff <- 0
+ SNP.nr <- 1
+ }
+ SNP <- as.double(genabel.data at gtdata[,SNP.nr])
+ rownames(SNP) <- NULL
+ SNP <- SmoothSNPmatrix(SNP)
+ n <- nids(genabel.data)
+ if (length(n.obs) != n) stop("The number of individuals and the length of n.obs must be the same")
+ N <- sum(n.obs)
+ id1 <- rep(genabel.data at phdata$id, n.obs)
+ id2 <- genabel.data at phdata$id
+ #Construct incidence matrix for repeated observations
+ indx <- numeric(N)
+ for (i in 1:n) {
+ indx <- indx + i * (id1 %in% id2[i])
+ }
+ genabel.data at phdata$temp.dummy <- 0
+ n.names <- length(colnames(genabel.data at phdata))
+ colnames(genabel.data at phdata) <- c(colnames(genabel.data at phdata)[-n.names], all.vars(formula.FixedEffects)[1])
+ ###########
+ if (is.null(GRM)) {
+ autosomalMarkers <- which(chromosome(genabel.data) != "X")
+ GRM <- compute.GRM(genabel.data[ , snpnames(genabel.data)[autosomalMarkers]])
+ }
+ eig <- eigen(GRM)
+ if (max(diag(GRM)) > 1.6) print("There seems to be highly inbred individuals in your data")
+ if (min(eig$values < -0.5)) print("The genetic relationship matrix is far from positive definite")
+ non_zero.eigenvalues <- eig$values>(1e-6) #Put numerically small eigenvalues to zero
+ eig$values[!non_zero.eigenvalues] <- 0
+ Z.GRM <- ( eig$vectors %*% diag(sqrt(eig$values)) )[indx, ]
+ rownames(Z.GRM) <- NULL
+ ############
+ X0 <- model.matrix(formula.FixedEffects, data = genabel.data at phdata)
+ row.names(X0) <- NULL
+ if (is.null(beta)) beta <- rep(0, ncol(X0))
+ if (ncol(X0) != length(beta)) stop("The length of beta must be the same as the number columns in the cofactor design matrix")
+ X <- X0[indx, 1:ncol(X0)]
+ if (class(X)=="numeric") X <- matrix(X, length(X), 1)
+ sigma2a <- VC[1]
+ sigma2p <- VC[2]
+ sigma2e <- VC[3]
+ a <- rnorm(n, 0, sqrt(sigma2a))
+ p <- rnorm(n, 0, sqrt(sigma2p))
+ e <- rnorm(N, 0, sqrt(sigma2e))
+ if (sim.gamma) e <- rgamma(N, sigma2e) - sigma2e #Follows a gamma distribution of variance sigma2e and mean=0
+ y0 <- X %*% beta + Z.GRM %*% a + p[indx] + e
+ if (length(SNP.nr)==1) y <- y0 + SNP[indx,] * SNP.eff
+ if (length(SNP.nr)>1) y <- y0 + SNP[indx,] %*% SNP.eff
+ id <- rep(genabel.data at phdata$id, n.obs)
+ for (i in 1:length(all.vars(formula.FixedEffects))) {
+ var.name <- all.vars(formula.FixedEffects)[i]
+ if (i == 1) xx <- y
+ if (i > 1) xx <- cbind(xx, rep(genabel.data at phdata[ , names(genabel.data at phdata) %in% var.name], n.obs))
+ }
+ colnames(xx) <- all.vars(formula.FixedEffects)
+ Phen.Data <- data.frame(xx, id)
+ return(Phen.Data)
+}
Added: pkg/RepeatABEL/data/Phen.Data.rda
===================================================================
(Binary files differ)
Property changes on: pkg/RepeatABEL/data/Phen.Data.rda
___________________________________________________________________
Added: svn:mime-type
+ application/octet-stream
Added: pkg/RepeatABEL/data/datalist
===================================================================
--- pkg/RepeatABEL/data/datalist (rev 0)
+++ pkg/RepeatABEL/data/datalist 2015-11-20 11:56:57 UTC (rev 2023)
@@ -0,0 +1,4 @@
+Phen.Data
+flycatchers
+gen.data
+thinned.flycatchers
Added: pkg/RepeatABEL/data/flycatchers.rda
===================================================================
(Binary files differ)
Property changes on: pkg/RepeatABEL/data/flycatchers.rda
___________________________________________________________________
Added: svn:mime-type
+ application/octet-stream
Added: pkg/RepeatABEL/data/gen.data.rda
===================================================================
(Binary files differ)
Property changes on: pkg/RepeatABEL/data/gen.data.rda
___________________________________________________________________
Added: svn:mime-type
+ application/octet-stream
Added: pkg/RepeatABEL/data/thinned.flycatchers.rda
===================================================================
(Binary files differ)
Property changes on: pkg/RepeatABEL/data/thinned.flycatchers.rda
___________________________________________________________________
Added: svn:mime-type
+ application/octet-stream
Added: pkg/RepeatABEL/inst/CITATION
===================================================================
--- pkg/RepeatABEL/inst/CITATION (rev 0)
+++ pkg/RepeatABEL/inst/CITATION 2015-11-20 11:56:57 UTC (rev 2023)
@@ -0,0 +1,34 @@
+citHeader("To cite RepeatABEL in publications use:")
+
+citEntry(entry="Article",
+ title = "Increasing the power of genome wide association studies in natural populations using repeated measures: evaluation and implementation",
+ author = personList(as.person("Lars Ronnegard"),
+ as.person("S. Eryn McFarlane"),
+ as.person("Arild Husby"),
+ as.person("Takeshi Kawakami"),
+ as.person("Hans Ellegren"),
+ as.person("Anna Qvarnstrom")),
+ journal = "Methods in Ecology and Evolution",
+ year = "2016",
+ volume = "1",
+ number = "1",
+ pages = "1-1",
+
+ textVersion =
+ paste("Lars Ronnegard, S. Eryn McFarlane, Arild Husby, Takeshi Kawakami, Hans Ellegren and Anna Qvarnstrom (2016)",
+ "Increasing the power of genome wide association studies in natural populations using repeated measures: evaluation and implementation.",
+ "Methods in Ecology and Evolution, (pending revision).")
+)
+
+citEntry(entry="Manual",
+title = "GenABEL: genome-wide SNP association analysis",
+author = "GenABEL project developers",
+ year = "2013",
+ note = "R package version 1.8-0",
+ url = "http://CRAN.R-project.org/package=GenABEL",
+
+textVersion =
+ paste("GenABEL project developers (2013).",
+"GenABEL: genome-wide SNP association analysis.",
+"R package version 1.8-0.","http://CRAN.R-project.org/package=GenABEL")
+)
Added: pkg/RepeatABEL/inst/doc/RepeatABEL_vignette.pdf
===================================================================
--- pkg/RepeatABEL/inst/doc/RepeatABEL_vignette.pdf (rev 0)
+++ pkg/RepeatABEL/inst/doc/RepeatABEL_vignette.pdf 2015-11-20 11:56:57 UTC (rev 2023)
@@ -0,0 +1,6997 @@
+%PDF-1.5
+%ÐÔÅØ
+4 0 obj <<
+/Length 521
+>>
+stream
+concordance:RepeatABEL_vignette.tex:RepeatABEL_vignette.Rnw:1 101 1 1 5 37 1 1 2 1 0 1 2 1 0 1 2 4 0 1 2 2 1 1 3 12 0 1 2 5 1 1 2 29 0 1 2 4 1 1 2 1 0 1 3 10 0 1 2 3 1 1 2 1 0 1 4 12 0 1 2 3 1 1 4 9 0 1 2 8 1 1 3 2 0 1 4 8 0 1 2 8 0 1 1 28 0 1 2 6 1 1 3 2 0 1 6 4 0 1 2 4 0 1 2 4 1 1 2 1 0 6 1 3 0 1 2 2 1 1 5 4 0 1 2 4 0 1 2 5 1 1 2 1 0 1 1 3 0 1 2 2 1 1 5 7 0 1 2 2 1 1 3 5 0 1 2 2 1 1 4 6 0 1 2 2 1 1 3 2 0 1 1 3 0 1 2 8 1 1 4 3 0 1 2 1 0 2 1 1 5 4 0 1 7 6 0 1 3 2 0 1 2 1 0 1 4 6 0 1 2 4 1 1 4 3 0 2 1 1 2 4 0 1 2 3 1
+endstream
+endobj
+7 0 obj <<
+/Length 270
+/Filter /FlateDecode
+>>
+stream
+xÚ
=O1÷ûØI.¹J¥ªNÙá ×ø8tVþ:vST118vòÚ¯ØÁ¬÷O^æüÒgÀd c²lM¡
í"AÙÀ*Õ#Ǩw¨úëwm(ªqàr±XrZêX at 5<hJêYGRÒ°;ÍÚ4ÔÛ
O9¦ùIÆ^ô]¹b<³0Jѯ;@d"AÚB) ÞfßVȵÌ||°7µª/ðÁfô`
+)×ÆïÀÞ§§ÇÞ12-yuë(éñð±ÝÙêêÀøÎf"0mî°ú\O²úÏË^åvÏjÕ8UTþ¬fbÙq`ücû¥ùºb6
+endstream
+endobj
+16 0 obj <<
+/Length 3463
+/Filter /FlateDecode
+>>
+stream
+xÚÅZ[Û¶~÷¯Ðø¡CM- At§Î4Þq&ÍxìmÒIÒt¸V«Z·èâK}Ï
+ HA^eÚ&»@\¾óá j´©ÑÍ%¿_Þ>ùâeåFÚæ
Ìèö~¤Í2£ZU¹6Íèv6ú1{5ÖÙf\èìýv<)T6;MKÈn7ãÜ~ýÅK7jò¦2v¢F£¡ÇÍo Ö=@Kk³÷]làßûÄ¿9~,²ÅòýØ8È
êí¿,I
+øæ¥0u6ä þ¦Gés+1¿åö|ðØì
+v½ÖU6oñÛ/¿ÿßð×$Ûéº~7.«¬]ÌóñÄÖ%OBdÜa
+j pÎZçMYòe=4ºßâÿ5ͦÎn¾km²o9)1#YZ̸ì
+¶÷#ºnQY¥)O
+'9ãoÛ;Ô%µ|OwºXi5<¯´J^¿íêÐeųôbA¢RÙºEÍ}âÌÐ3$à«B
+ûÅPá»±+x.$ÓéÐ5hIíKxJ?)]̧O©fbL¨l;t>dÄ7g]³æ¶(ߧVÝ ,ÓÔÂÎQì¤ ¬Sa)0}6*ßf0CÏïQ
+Ôí
+k>Ñtݨ%wâ¶Ùª±¿=ôõ%$ÙÜH6EÍðøÏʸD¤f=] fl
`¼ 9£²Ãίé`¥ºÄ¢ÐbÛ¢É^p«5['0/>,E¤¨KeâF¹!ûÑ£Þ~ûZÆìÃÊ<éåYKsè0H´V}^È_ bÖçh\ÛZY'_²õcèÎkqô)BºÓÝ/ì°ÈÒÅ°#enPG¸ÆOªTÄ ´Ø<«*æTt²¾{ÚÌÇ¥ñ5âR³g¨3½%¢Å)ý^]«4ì_ØÜ6®¯.Ã( ©ªÈ°ûâá9X at Y B¬B.Ie4
Ø´ a@L{Ðf!-Ï{jMBclÍCb÷©¸Úmùo*ǹr½Õ2è¡a»Ð?CÁ
+³]Uôh[+&ÞÜ<ý8©ÎnzÊ«³¯øТ7dAPdª&øßI÷¼i÷qV»¼,
[TRUNCATED]
To get the complete diff run:
svnlook diff /svnroot/genabel -r 2023
More information about the Genabel-commits
mailing list