[Genabel-commits] r2023 - in pkg: . RepeatABEL RepeatABEL/R RepeatABEL/data RepeatABEL/inst RepeatABEL/inst/doc RepeatABEL/man RepeatABEL/vignettes RepeatABEL/vignettes/marginnote

noreply at r-forge.r-project.org noreply at r-forge.r-project.org
Fri Nov 20 12:56:57 CET 2015


Author: larsronn
Date: 2015-11-20 12:56:57 +0100 (Fri, 20 Nov 2015)
New Revision: 2023

Added:
   pkg/RepeatABEL/
   pkg/RepeatABEL/DESCRIPTION
   pkg/RepeatABEL/NAMESPACE
   pkg/RepeatABEL/R/
   pkg/RepeatABEL/R/Create_gwaa_scan.R
   pkg/RepeatABEL/R/SmoothSNPmatrix.R
   pkg/RepeatABEL/R/compute.GRM.R
   pkg/RepeatABEL/R/constructV.R
   pkg/RepeatABEL/R/preFitModel.R
   pkg/RepeatABEL/R/rGLS.R
   pkg/RepeatABEL/R/simulate_PhenData.R
   pkg/RepeatABEL/data/
   pkg/RepeatABEL/data/Phen.Data.rda
   pkg/RepeatABEL/data/datalist
   pkg/RepeatABEL/data/flycatchers.rda
   pkg/RepeatABEL/data/gen.data.rda
   pkg/RepeatABEL/data/thinned.flycatchers.rda
   pkg/RepeatABEL/inst/
   pkg/RepeatABEL/inst/CITATION
   pkg/RepeatABEL/inst/doc/
   pkg/RepeatABEL/inst/doc/RepeatABEL_vignette.pdf
   pkg/RepeatABEL/man/
   pkg/RepeatABEL/man/.Rapp.history
   pkg/RepeatABEL/man/Create_gwaa_scan.Rd
   pkg/RepeatABEL/man/Phen.Data.Rd
   pkg/RepeatABEL/man/RepeatABEL-package.Rd
   pkg/RepeatABEL/man/SmoothSNPmatrix.Rd
   pkg/RepeatABEL/man/compute.GRM.Rd
   pkg/RepeatABEL/man/constructV.Rd
   pkg/RepeatABEL/man/flycatchers.Rd
   pkg/RepeatABEL/man/gen.data.Rd
   pkg/RepeatABEL/man/preFitModel.Rd
   pkg/RepeatABEL/man/rGLS.Rd
   pkg/RepeatABEL/man/simulate_PhenData.Rd
   pkg/RepeatABEL/man/thinned.flycatchers.Rd
   pkg/RepeatABEL/vignettes/
   pkg/RepeatABEL/vignettes/.Rhistory
   pkg/RepeatABEL/vignettes/Figure_Manhattan.pdf
   pkg/RepeatABEL/vignettes/RepeatABEL_vignette.Rnw
   pkg/RepeatABEL/vignettes/marginnote/
   pkg/RepeatABEL/vignettes/marginnote/README
   pkg/RepeatABEL/vignettes/marginnote/marginnote.dtx
   pkg/RepeatABEL/vignettes/marginnote/marginnote.ins
   pkg/RepeatABEL/vignettes/marginnote/marginnote.pdf
   pkg/RepeatABEL/vignettes/marginnote/marginnote.sty
Log:
RepeatABEL package uploaded

Added: pkg/RepeatABEL/DESCRIPTION
===================================================================
--- pkg/RepeatABEL/DESCRIPTION	                        (rev 0)
+++ pkg/RepeatABEL/DESCRIPTION	2015-11-20 11:56:57 UTC (rev 2023)
@@ -0,0 +1,11 @@
+Package: RepeatABEL
+Type: Package
+Title: GWAS for Multiple Observations on Related Individuals
+Version: 1.0
+Date: 2015-11-03
+Author: Lars Ronnegard
+Maintainer: Lars Ronnegard <lrn at du.se>
+Description: The RepeatABEL package is used to perform genome-wide association studies on individuals that are both related and have repeated measurements.
+License: GPL
+Depends: R (>= 2.10), hglm, GenABEL
+

Added: pkg/RepeatABEL/NAMESPACE
===================================================================
--- pkg/RepeatABEL/NAMESPACE	                        (rev 0)
+++ pkg/RepeatABEL/NAMESPACE	2015-11-20 11:56:57 UTC (rev 2023)
@@ -0,0 +1,4 @@
+exportPattern("^[^\\.]")
+import("hglm","GenABEL")
+
+

Added: pkg/RepeatABEL/R/Create_gwaa_scan.R
===================================================================
--- pkg/RepeatABEL/R/Create_gwaa_scan.R	                        (rev 0)
+++ pkg/RepeatABEL/R/Create_gwaa_scan.R	2015-11-20 11:56:57 UTC (rev 2023)
@@ -0,0 +1,22 @@
+#' @title Creates a scan.gwaa object
+#'
+#' @description
+#' Creates a scan.gwaa object from a GenABEL object and p-values.
+#'
+#' @param data A gwaa.data object
+#' @param P1df P-values computed from external analysis
+#' @param SNP.eff Estimated additive SNP effects
+#' 
+#' @author Lars Ronnegard
+#' 
+Create_gwaa_scan <- function(data, P1df, SNP.eff) {
+	chi2.1df <- qchisq(P1df, df=1, lower.tail=FALSE)
+	lambda <- try( estlambda(chi2.1df), silent=TRUE )
+	se_effB <- abs(SNP.eff)/qnorm(P1df/2, lower.tail=FALSE)
+    if ( class(lambda) == "try-error" ) lambda <- list( estimate = NA, se = NA )
+	results <- data.frame(effB = SNP.eff,  se_effB = se_effB, chi2.1df = NA, P1df = P1df, Pc1df = NA, stringsAsFactors = FALSE)
+  rownames(results) = snpnames(data)
+	out <- new("scan.gwaa", results = results, annotation = annotation(data), lambda = lambda, idnames = idnames(data), call = as.call( list("rGLS") ), family = "gaussian")
+	return(out)
+}
+

Added: pkg/RepeatABEL/R/SmoothSNPmatrix.R
===================================================================
--- pkg/RepeatABEL/R/SmoothSNPmatrix.R	                        (rev 0)
+++ pkg/RepeatABEL/R/SmoothSNPmatrix.R	2015-11-20 11:56:57 UTC (rev 2023)
@@ -0,0 +1,19 @@
+#' @title Imputes column means to missing genotypes
+#'
+#' @description
+#' Imputes column means to missing genotypes.
+#'
+#' @param SNP A matrix including SNP coding.
+#' 
+#' @author Lars Ronnegard
+#' 
+SmoothSNPmatrix <-
+function(SNP) {
+	#Imputes column means to missing values
+	m <- ncol(SNP)
+    miss <- is.na(colMeans(SNP))
+	for (i in (1:m)[miss]){
+	SNP[is.na(SNP[,i]), i] <- mean( SNP[!is.na(SNP[ , i]), i] )
+	}
+	return(SNP)
+}

Added: pkg/RepeatABEL/R/compute.GRM.R
===================================================================
--- pkg/RepeatABEL/R/compute.GRM.R	                        (rev 0)
+++ pkg/RepeatABEL/R/compute.GRM.R	2015-11-20 11:56:57 UTC (rev 2023)
@@ -0,0 +1,28 @@
+#' @title Computes a Genetic Relationship Matrix from a GenABEL object
+#'
+#' @description
+#' Two alternative methods for GRM computations implemented.
+#'
+#' @param gen.data The GenABEL object.
+#' @param method Method to be used.
+#' 
+#' @author Lars Ronnegard
+#' 
+compute.GRM <-
+function(gen.data, method = "GenABEL") {
+  if (method == "GenABEL"){
+    gkins <- ibs(gen.data, weight = "freq")
+    GRM <- gkins
+    GRM[t(lower.tri(gkins))] <- 0
+    rm(gkins)
+    GRM <- 2*(GRM + t(GRM) - diag(diag(GRM)))
+  }
+  if (method == "ZZt"){
+    SNP.matrix <- scale(as.double(gen.data))
+    m <- ncol(SNP.matrix)
+    markers.to.fit <- (1:m)[!is.na(colMeans(SNP.matrix))] 
+    GRM <- tcrossprod(SNP.matrix[,markers.to.fit])/length(markers.to.fit)
+    rm(SNP.matrix)
+  }
+  return(GRM)
+}

Added: pkg/RepeatABEL/R/constructV.R
===================================================================
--- pkg/RepeatABEL/R/constructV.R	                        (rev 0)
+++ pkg/RepeatABEL/R/constructV.R	2015-11-20 11:56:57 UTC (rev 2023)
@@ -0,0 +1,22 @@
+#' @title Constructs the (co)variance matrix for y
+#'
+#' @description
+#' Constructs the (co)variance matrix for y.
+#'
+#' @param Z The incidence matrix for the random effects column binded with the Cholesky of the GRM 
+#' @param RandC The number of columns in the two matrices combined in Z. 
+#' @param ratio The ratios between random effect variances and the residual variance.
+#' 
+#' @author Lars Ronnegard
+#' 
+constructV <-
+function(Z, RandC, ratio) {
+  V <- diag(nrow(Z))
+  indx <- 1
+  for (i in 1:length(ratio)) {
+    Z.tmp <- Z[,indx:(indx + RandC[i] - 1)]
+    indx = indx + RandC[i]
+    V <- V + tcrossprod(Z.tmp) * ratio[i]
+ }
+ return(V)
+}

Added: pkg/RepeatABEL/R/preFitModel.R
===================================================================
--- pkg/RepeatABEL/R/preFitModel.R	                        (rev 0)
+++ pkg/RepeatABEL/R/preFitModel.R	2015-11-20 11:56:57 UTC (rev 2023)
@@ -0,0 +1,175 @@
+#' @title Fits a linear mixed model (without fixed SNP effects) and computes the fitted variance-covariance matrix for later use in the rGLS function.
+#'
+#' @description Uses a GenABEL object and phenotype data as input. The model is fitted using the \code{hglm} function in the hglm package.
+#'
+#' @param fixed A formula including the response and fixed effects
+#' @param random A formula for the random effects
+#' @param id.name The column name of the IDs in phen.data
+#' @param genabel.data An GenABEL object including marker information. This object has one observation per individual. 
+#' @param phenotype.data A data frame including the repeated observations and IDs. 
+#' @param corStruc A list specifying the correlation structure for each random effect. The options are: \code{"Ind"} for iid random effects, \code{"GRM"} for a correlation structure given by a genetic relationship matrix, or \code{"CAR"} for a spatial correlation structure given by a Conditional Autoregressive model specified by a neighborhood matrix. 
+#' @param GRM A genetic relationship matrix. If not specified whilst the \code{"GRM"} option is given for \code{corStruc} then the GRM is computed internally within the function. 
+#' @param Neighbor.Matrix A neighborhood matrix having non-zero value for an element (i,j) where the observations i and j come from neighboring locations. The diagonal elements should be zero. 
+#' @return Returns a list including the fitted hglm object \code{fitted.hglm}, the variance-covariance matrix \code{V} and the ratios between estimated variance components for the random effects divided by the residual variance, \code{ratio}. 
+#' @author Lars Ronnegard
+#' 
+#' @examples
+#'  ####### FIRST EXAMPLE USING GRM #############
+#'  data(Phen.Data) #Phenotype data with repeated observations
+#'  data(gen.data) #GenABEL object including IDs and marker genotypes
+#'  GWAS1 <- rGLS(y ~ age + sex, genabel.data = gen.data, phenotype.data = Phen.Data) 
+#'  plot(GWAS1, main="")
+#'  summary(GWAS1)
+#'  #Summary for variance component estimation without SNP effects
+#'  summary(GWAS1@@call$hglm) 
+#'  #The same results can be computed using the preFitModel as follows
+#'  fixed = y ~ age + sex
+#'  Mod1 <- preFitModel(fixed, random=~1|id, genabel.data = gen.data, 
+#'                      phenotype.data = Phen.Data, corStruc=list( id=list("GRM","Ind") )) 
+#'  GWAS1b <- rGLS(fixed, genabel.data = gen.data, 
+#'                 phenotype.data = Phen.Data, V = Mod1$V) 
+#'  plot(GWAS1b, main="Results using the preFitModel function")
+#'  ####### SECOND EXAMPLE USING CAR #############
+#'  # Add a fake nest variable to the data just to run the example
+#'  #In this example there are 6 nests and 60 observations per nest
+#'  Phen.Data$nest <- rep(1:6, each=60)
+#'  #A model including polygenic effects, permanent environmental effects, 
+#'  #and nest effect as random
+#'  Mod2 <- preFitModel(fixed, random=~1|id + 1|nest, genabel.data = gen.data, 
+#'           phenotype.data = Phen.Data, corStruc=list( id=list("GRM","Ind"), nest=list("Ind")) )
+#'  GWAS2 <- rGLS(fixed, genabel.data = gen.data, phenotype.data = Phen.Data, V = Mod2$V) 
+#'  plot(GWAS2) 
+#'  #Similar to previous plot because the nest effect variance component is almost 0.
+#'  ###################
+#'  #Construct a fake nighbourhood matrix
+#'  D = matrix(0,6,6)
+#'  D[1,2] = D[2,1] = 1
+#'  D[5,6] = D[6,5] = 1
+#'  D[2,4] = D[4,2] = 1
+#'  D[3,5] = D[5,3] = 1
+#'  D[1,6] = D[6,1] = 1
+#'  D[3,4] = D[4,3] = 1
+#'  #The matrix shows which pair of nests that can be considered as neighbours
+#'  image(Matrix(D), main="Neighbourhood matrix")
+#'  Mod3 <- preFitModel(fixed, random=~1|id + 1|nest, genabel.data = gen.data, 
+#'           phenotype.data = Phen.Data, corStruc=list( id=list("GRM","Ind"), 
+#'                                      nest=list("CAR")), Neighbor.Matrix=D )
+#'  GWAS2b <- rGLS(fixed, genabel.data = gen.data, 
+#'                 phenotype.data = Phen.Data, V = Mod3$V) 
+#'  plot(GWAS2b)
+#'
+preFitModel <- function(fixed = y~1, random = ~1|id, id.name = "id", genabel.data, phenotype.data, corStruc = NULL, GRM = NULL, Neighbor.Matrix = NULL) {
+  if (!inherits(fixed, "formula") || length(fixed) != 3) stop("\n Fixed-effects model must be a formula of the form \"resp ~ pred\"")
+  if (!inherits(random, "formula")) stop("\n Random part must be a one-sided formula of the form \" ~ effect|Cluster\"")
+  if (attr(terms(random), "response") != 0) stop("\n Random part must be a one-sided formula of the form \" ~ effect|Cluster\"")
+  if (all.names(random)[2] != "|") stop("The subjects/clusters in Random must be separated by \" |\"")
+  if (class(genabel.data)!="gwaa.data") stop("The input of genabel.data is not a GenABEL object")
+  if (is.null(genabel.data at phdata$id)) stop("IDs not given as id in the phdata list")
+  #Get trait name
+  trait <- all.vars(fixed)[1]
+  #Remove NAs from phenotypic data
+  y.all <- phenotype.data[,names(phenotype.data) %in% trait]
+  phenotype.data <- phenotype.data[!is.na(y.all),]
+  #Connect IDs in GenABEL data set with IDs in the phenotype file
+  id1 <- phenotype.data[,names(phenotype.data) %in% id.name] #ID for phenotype data
+  id2 <- genabel.data at phdata$id #ID for genotype data
+  test1 <- id1 %in% id2
+  test2 <- id2 %in% id1
+  genabel.data = genabel.data[test2, ] #Exclude individuals having no phenotype information
+  phenotype.data = phenotype.data[test1, ] #Exclude individuals having no genotype information
+  id1 <- phenotype.data[,names(phenotype.data) %in% id.name] #ID for phenotype data for cleaned data
+  id2 <- genabel.data at phdata$id #ID for genotype data for cleaned data
+  ### Create design matrix for the fixed effects ###
+  Y <- phenotype.data[,names(phenotype.data) %in% trait]
+  X <- model.matrix(fixed, data = phenotype.data)
+  if (nrow(X) != length(Y)) stop("Remove all lines with NA from the phenotype input data before running this function.")
+  row.names(X) <- NULL
+  ###############################################
+  ## Create design matrix for random effects ####
+  ###############################################
+  #Construct incidence matrix for repeated observations
+  N <- length(id2)
+  n <- length(id1)
+  indx <- numeric(n)
+  for (i in 1:N) {
+    indx <- indx + i * (id1 %in% id2[i])
+  }
+  ### Get SQRT of GRM first
+  if (is.null(GRM)) {
+    autosomalMarkers <- which(chromosome(genabel.data) != "X")
+    GRM <- compute.GRM(genabel.data[,snpnames(genabel.data)[autosomalMarkers]])  
+  }
+  eig <- eigen(GRM)
+  if (max(diag(GRM))>1.6) print("There seems to be highly inbred individuals in your data")
+  if (min(eig$values) < -0.5 ) print("The genetic relationship matrix is far from positive definite")
+  non_zero.eigenvalues = eig$values>(1e-6) #Put numerically small eigenvalues to zero
+  eig$values[!non_zero.eigenvalues] <- 0
+  print("GRM ready")
+  Z.GRM <- ( eig$vectors %*% diag(sqrt(eig$values)) )[indx,]
+  ##Create Z
+  RanTermVec <- unlist(strsplit(attr(terms(random), "term.labels"), split = c("+"), fixed = TRUE))
+  k.rand <- length(RanTermVec)
+  Z <- NULL
+  RandC <- NULL
+  data <- phenotype.data
+  for (i in 1:k.rand) {
+    RanTerm <- unlist(strsplit(RanTermVec[i], split = "|", fixed = TRUE))
+    RanTerm[2] <- gsub(" ", "", RanTerm[2], fixed = TRUE)
+    ranf <- paste("~", "as.factor(", RanTerm[2], ")", "-1", sep = "")
+    ranf <- as.formula(ranf)
+    rmf <- model.frame(ranf, data)
+    z <- model.matrix(attr(rmf, "terms"), data = rmf)
+    if (nrow(z) != length(Y)) stop("Remove all lines with NA from the phenotype input data before running this function.")
+    row.names(z) <- NULL
+    if (is.null(corStruc)) {
+      RandC <- c(RandC, ncol(z))
+      Z <- cbind(Z, z)
+    }else{
+      cs <- unlist( corStruc[names(corStruc) %in% RanTerm[2]] )
+      for (j in 1:length(cs)) {
+        zL <- switch(cs[j],
+                     "Ind" = z,
+                     "GRM" = Z.GRM,
+                     "CAR" = z,
+                     stop("Input correlation structure not available")
+        )
+        
+        RandC <- c(RandC, ncol(zL))
+        Z <- cbind(Z, zL)
+        j.rand <- length(RandC)
+        if (cs[j]=="CAR") {
+          if ( is.null(Neighbor.Matrix) ) stop("CAR correlation requires a Neighbor.Matrix to be specified")
+          if ( ncol(z) != ncol(Neighbor.Matrix) ) stop("Incorrect dimension of Neighbor.Matrix")
+          if ( j.rand == 1) {
+            rand.family = list( CAR(D=Neighbor.Matrix) )
+          } else {
+            rand.family[[j.rand]] = CAR(D=Neighbor.Matrix)
+          }
+        } else {
+          if (j.rand == 1) rand.family = list(gaussian())
+          if (j.rand > 1) rand.family[[j.rand]] = gaussian()
+        }
+      }
+    }
+  }
+  #####################################
+  ### Fit the model 
+  if (length(RandC) > 1) mod1 <- hglm(y=Y, X=X, Z=Z, RandC=RandC, maxit=200, rand.family=rand.family)
+  if (length(RandC) == 1) mod1 <- hglm(y=Y, X=X, Z=Z, RandC=RandC, maxit=200, rand.family=rand.family[[1]])
+  ratio <- mod1$varRanef/mod1$varFix
+  #stop("The constructV function needs to be changed if CAR model is to be used")
+  #V <- constructV(Z, RandC, ratio)
+   V = diag(nrow(Z))
+    indx = 1
+    for (i in 1:length(ratio)) {
+    	Z.tmp <- Z[, indx:(indx + RandC[i] - 1)]
+		indx = indx + RandC[i]
+    	if (rand.family[[i]]$family != "CAR") {
+			V <- V + tcrossprod(Z.tmp) * ratio[i]
+    	} else {
+    		ratio[i] <- mod1$CAR.tau/mod1$varFix
+    		V <- V + Z.tmp %*% solve( diag( ncol(Z.tmp) ) - mod1$CAR.rho * Neighbor.Matrix ) %*% t(Z.tmp) * ratio[i]
+    	}
+    }
+  list( fitted.hglm = mod1, V = V, ratio = ratio)
+}

Added: pkg/RepeatABEL/R/rGLS.R
===================================================================
--- pkg/RepeatABEL/R/rGLS.R	                        (rev 0)
+++ pkg/RepeatABEL/R/rGLS.R	2015-11-20 11:56:57 UTC (rev 2023)
@@ -0,0 +1,155 @@
+#' @title GWAS for Studies having Repeated Measurements on Related Individuals
+#'
+#' @description
+#'  It is used to perform genome-wide association studies on individuals that are both related and have repeated measurements. 
+#'  The function computes score statistic based p-values for a linear mixed model including random polygenic effects and 
+#'  a random effect for repeated measurements. A p-value is computed for each marker and the null hypothesis tested is a
+#'   zero additive marker effect. 
+#'
+#' @param formula.FixedEffects Formula including the response variable and cofactors as fixed effects.
+#' @param genabel.data An GenABEL object including marker information. This object has one observtion per individuals. 
+#' @param phenotype.data A data frame including the repeated observations and IDs. 
+#' @param id.name The column name of the IDs in phen.data
+#' @param GRM An optional genetic relationship matrix (GRM) can be included as input. Otherwise the GRM is computed within the function.
+#' @param V An optional (co)variance matrix can be included as input. Otherwise it is computed using the hglm function.
+#' @param memory Used to optimize computations. The maximum number of elements in a matrix that can be stored efficiently.
+#' @details 
+#' A generalized squares (GLS) is fitted for each marker given a (co)variance matrix V. 
+#' The computations are made fast by transforming the GLS to 
+#' an ordinary least-squares (OLS) problem using an eigen-decomposition of V. 
+#' The OLS are computed using QR-factorization. If V is not specified then a model 
+#' including random polygenic effects and permanent environmental effects is 
+#' fitted (using the hglm package) to compute V. A GenABEL object (scan.gwaa class) 
+#' is returned (including also the \code{hglm} results).
+#' Let e.g. GWAS1 be an object returned by the \code{rGLS} function. 
+#' Then a Manhattan plot can be produced by calling \code{plot(GWAS1)} and 
+#' the top SNPs using \code{summary(GWAS1)}. Both of these functions are 
+#' generic GenABEL functions. \cr
+#' The results from the fitted linear mixed model without any SNP effect included 
+#' are produced by calling \code{summary(GWAS1@@call$hglm)}.
+#' 
+#' @author Lars Ronnegard
+#' 
+#' 
+#' @examples
+#'  data(Phen.Data) #Phenotype data with repeated observations
+#'  data(gen.data) #GenABEL object including IDs and marker genotypes
+#'  GWAS1 <- rGLS(y ~ age + sex, genabel.data = gen.data, phenotype.data = Phen.Data) 
+#'  plot(GWAS1, main="")
+#'  summary(GWAS1)
+#'  #Summary for variance component estimation without SNP effects
+#'  summary(GWAS1@@call$hglm) 
+#'
+rGLS <-
+function(formula.FixedEffects = y ~ 1, genabel.data, phenotype.data, id.name = "id", GRM = NULL, V = NULL, memory=1e8) {
+	#Check input data
+	if (class(genabel.data) != "gwaa.data") stop("The input of genabel.data is not a GenABEL object")
+	if (is.null(genabel.data at phdata$id)) stop("IDs not given as id in the phdata list")
+    if (!is.null(GRM)) { if(!isSymmetric(GRM)) warning("The given GRM must be a symmetric matrix!") }
+    if (!is.null(V)) { if(!isSymmetric(V)) warning("The given V must be a symmetric matrix!") }
+    V.input  <- V
+    #require(hglm)
+    #require(GenABEL)
+    #Get trait name
+	trait <- all.vars(formula.FixedEffects)[1]
+    #Remove NAs from phenotypic data
+    y.all <- phenotype.data[,names(phenotype.data)%in%trait]
+    phenotype.data <- phenotype.data[!is.na(y.all),]
+	#Connect IDs in GenABEL data set with IDs in the phenotype file
+	id1 <- phenotype.data[,names(phenotype.data) %in% id.name] #ID for phenotype data
+	id2 <- genabel.data at phdata$id #ID for genotype data
+    test1 <- id1 %in% id2
+    test2 <- id2 %in% id1
+    genabel.data <- genabel.data[test2,] #Exclude individuals having no phenotype information
+    phenotype.data <- phenotype.data[test1,] #Exclude individuals having no genotype information
+    id1 <- phenotype.data[,names(phenotype.data) %in% id.name] #ID for phenotype data for cleaned data
+	id2 <- genabel.data at phdata$id #ID for genotype data for cleaned data
+	#####################
+	#Construct incidence matrix for repeated observations
+    N=length(id2)
+    n=length(id1)
+    indx <- numeric(n)
+    for (i in 1:N) {
+        indx <- indx + i * (id1 %in% id2[i])
+    }
+    Z.indx <- diag(N)[indx,]
+    #Construct response and design matrix
+	y <- phenotype.data[ , names(phenotype.data) %in% trait] #Create the response variable
+    X <- model.matrix(formula.FixedEffects, data = phenotype.data) #Fixed effect design matrix
+	#####################
+ if (is.null(V)) {
+    #Construct GRM
+    if (is.null(GRM)) {
+      autosomalMarkers <- which(chromosome(genabel.data)!= "X")
+      GRM <- compute.GRM(genabel.data[ , snpnames(genabel.data)[autosomalMarkers]])
+    }
+    eig <- eigen(GRM)
+    if (max(diag(GRM)) > 1.6) print("There seems to be highly inbred individuals in your data")
+    if (min(eig$values < -0.5)) print("The genetic relationship matrix is far from positive definite")
+    non_zero.eigenvalues <- eig$values>(1e-6) #Put numerically small eigenvalues to zero
+    eig$values[ !non_zero.eigenvalues ] <- 0
+    print("GRM ready")
+    #####################
+    #Fit hglm
+    Z.GRM <- ( eig$vectors %*% diag(sqrt(eig$values)) )[indx, ]
+    Z <- (cbind(Z.GRM, Z.indx))
+    mod1 <- hglm(y=y, X=X, Z=Z, RandC = c(ncol(Z.GRM), ncol(Z.indx)), maxit = 200)
+    if (mod1$Converge != "converged") stop("The variance component estimation did not converge in 200 iterations. Try to estimate them separately and provide the estimated (co)variance matrix V as input. \n\n")
+    print("Variance component estimation ready")
+    #####################
+    #Construct rotation matrix
+    ratio <- mod1$varRanef/mod1$varFix
+    V <- constructV(Z=Z, RandC = c(ncol(Z.GRM), ncol(Z.indx)), ratio)
+  }
+	eig.V <- eigen(V)
+	transf.matrix <- diag(1/sqrt(eig.V$values)) %*% t(eig.V$vectors)
+	y.new <- transf.matrix %*% y
+	X.new <- transf.matrix %*% X
+	print("Rotation matrix ready")
+	#####################
+	#Fit a linear model for each SNP
+	SNP.matrix <- as.double(genabel.data)
+    if (sum(is.na(SNP.matrix)) > 0) {
+        SNP.matrix <- SmoothSNPmatrix(SNP.matrix)
+    }
+	m <- ncol(SNP.matrix)
+	p.val <- SNP.est <- rep(1, m)
+	colnames(X.new) <- as.character(1:ncol(X.new)) #To avoid columns having strange names
+	print("Rotate LMM started")
+    #Fit using QR factorization
+    #Null model
+	qr0 <- qr(X.new)
+	est0 <- qr.coef(qr0, y.new)
+	res <- y.new - X.new %*% est0
+    n <- length(y.new)
+	RSS.0 <- sum(res^2)/n
+	#Split computations into reasonably sized blocks
+    if (memory < n) memory <- n
+	step.size <- floor(memory/n)
+    steps <- ceiling(m/step.size)
+	jj=1
+	kk=0
+	for (step.i in 1:steps) {
+		if (step.i == steps) kk <- kk + m %% step.size else kk <- kk + step.size
+		markers.to.fit <- jj:kk
+		snp.new <- transf.matrix %*% SNP.matrix[indx, markers.to.fit]
+		mm <- 0
+		for (j in markers.to.fit) {
+			mm <- mm+1
+			X1 <- cbind(snp.new[, mm], X.new)
+			qr1 <- qr(X1)
+			est1 <- qr.coef(qr1, y.new)
+			res <- y.new - X1 %*% est1
+			RSS.1 <- sum(res^2)/n
+			SNP.est[j] <- est1[1]
+			LRT <- -n * (log(RSS.1) - log(RSS.0))
+			p.val[j] <- 1 - pchisq(LRT, df=1)
+        }
+        jj <- jj + step.size
+	}
+	print("Rotate LMM ready")
+	#####################
+	qt.results <- Create_gwaa_scan(genabel.data, p.val, SNP.est)
+	if (is.null(V.input)) qt.results at call$hglm <- mod1
+	return(qt.results)
+}

Added: pkg/RepeatABEL/R/simulate_PhenData.R
===================================================================
--- pkg/RepeatABEL/R/simulate_PhenData.R	                        (rev 0)
+++ pkg/RepeatABEL/R/simulate_PhenData.R	2015-11-20 11:56:57 UTC (rev 2023)
@@ -0,0 +1,88 @@
+#' @title Simulation function for the RepeatABEL package.
+#'
+#' @description  The function takes a GenABEL object as input and generates simulated phenotypic values for related individuals having repeated obserevations.
+#' @param formula.FixedEffects A formula including the name of the simulated variable as response, and cofactors as fixed effects.
+#' @param genabel.data A GenABEL object of class gwaa.data.
+#' @param n.obs A vector including the number of observations per individual. The length of n.obs must be equal to the number if individuals in genabel.data.
+#' @param SNP.eff The size of a simulated SNP.effect.
+#' @param SNP.nr The SNP genotype that the SNP effect is simulated on. SNP.nr=i is the i:th SNP.
+#' @param beta The simulated fixed effects. Must be equal to the number of cofactors simulated (including the intercept term).
+#' @param VC A vector of length 3 including the simulated variances of the polygenic effect, permanent environmental effect and residuals, respectively.
+#' @param GRM  An optional input where the Genetic Relationship Matrix can be given. Otherwise it is computed using the GenABEL package.
+#' @param sim.gamma A logical parameter specifying whether the residuals shuld be simulated from a gamma distribution or not. If specified as TRUE then residuals are drawn from a gamma distribution with variance equal to the residual variance specified in \code{VC[3]} 
+
+#' @return Returns a data frame including the simulated phenotypic values, cofactors and IDs.
+#' @author Lars Ronnegard
+
+#' @examples
+#' data(gen.data)
+#'  #Simulate 4 observations per individual
+#'  set.seed(1234)
+#'  Phen.Sim <- simulate_PhenData(y ~ age, genabel.data=gen.data, 
+#'                  n.obs=rep(4, nids(gen.data)), SNP.eff=1, SNP.nr=1000, VC=c(1,1,1))
+#'  GWAS1 <- rGLS(y ~ age, genabel.data = gen.data, phenotype.data = Phen.Sim)
+#'  plot(GWAS1, main="Simulated Data Results")
+#'  
+simulate_PhenData <-
+function(formula.FixedEffects = y~1, genabel.data, n.obs, SNP.eff = NULL, SNP.nr = NULL, beta = NULL, VC = c(1,1,1), GRM = NULL, sim.gamma = FALSE){
+    #library(GenABEL)
+  if (length(SNP.eff) != length(SNP.nr)) stop("The number of elements in SNP.eff and SNP.nr must be the same")
+  if (is.null(SNP.eff)) {
+      SNP.eff <- 0
+      SNP.nr <- 1
+  }
+  SNP <- as.double(genabel.data at gtdata[,SNP.nr])
+  rownames(SNP) <- NULL
+  SNP <- SmoothSNPmatrix(SNP)
+  n <- nids(genabel.data)
+  if (length(n.obs) != n) stop("The number of individuals and the length of n.obs must be the same")
+  N <- sum(n.obs)
+  id1 <- rep(genabel.data at phdata$id, n.obs)
+  id2 <- genabel.data at phdata$id
+  #Construct incidence matrix for repeated observations
+  indx <- numeric(N)
+  for (i in 1:n) {
+      indx <- indx + i * (id1 %in% id2[i])
+  }
+  genabel.data at phdata$temp.dummy <- 0
+  n.names <- length(colnames(genabel.data at phdata))
+  colnames(genabel.data at phdata) <- c(colnames(genabel.data at phdata)[-n.names], all.vars(formula.FixedEffects)[1])
+  ###########
+  if (is.null(GRM)) {
+    autosomalMarkers <- which(chromosome(genabel.data) != "X")
+    GRM <- compute.GRM(genabel.data[ , snpnames(genabel.data)[autosomalMarkers]])
+  }
+  eig <- eigen(GRM)
+  if (max(diag(GRM)) > 1.6) print("There seems to be highly inbred individuals in your data")
+  if (min(eig$values < -0.5)) print("The genetic relationship matrix is far from positive definite")
+  non_zero.eigenvalues <- eig$values>(1e-6) #Put numerically small eigenvalues to zero
+  eig$values[!non_zero.eigenvalues] <- 0
+  Z.GRM <- ( eig$vectors %*% diag(sqrt(eig$values)) )[indx, ]
+  rownames(Z.GRM) <- NULL
+  ############
+  X0 <- model.matrix(formula.FixedEffects, data = genabel.data at phdata)
+  row.names(X0) <- NULL
+  if (is.null(beta)) beta <- rep(0, ncol(X0))
+  if (ncol(X0) != length(beta)) stop("The length of beta must be the same as the number columns in the cofactor design matrix")
+  X <- X0[indx, 1:ncol(X0)]
+  if (class(X)=="numeric") X <- matrix(X, length(X), 1)
+  sigma2a <- VC[1]
+  sigma2p <- VC[2]
+  sigma2e <- VC[3]
+  a <- rnorm(n, 0, sqrt(sigma2a))
+  p <- rnorm(n, 0, sqrt(sigma2p))
+  e <- rnorm(N, 0, sqrt(sigma2e))
+  if (sim.gamma) e <- rgamma(N, sigma2e) - sigma2e #Follows a gamma distribution of variance sigma2e and mean=0
+  y0 <- X %*% beta + Z.GRM %*% a + p[indx] + e
+  if (length(SNP.nr)==1) y <- y0 + SNP[indx,] * SNP.eff
+  if (length(SNP.nr)>1) y <- y0 + SNP[indx,] %*% SNP.eff
+  id <- rep(genabel.data at phdata$id, n.obs)
+  for (i in 1:length(all.vars(formula.FixedEffects))) {
+    var.name <- all.vars(formula.FixedEffects)[i]
+    if (i == 1) xx <- y
+    if (i > 1) xx <- cbind(xx, rep(genabel.data at phdata[ , names(genabel.data at phdata) %in% var.name], n.obs))
+  }
+  colnames(xx) <- all.vars(formula.FixedEffects)
+  Phen.Data <- data.frame(xx, id)
+  return(Phen.Data)
+}

Added: pkg/RepeatABEL/data/Phen.Data.rda
===================================================================
(Binary files differ)


Property changes on: pkg/RepeatABEL/data/Phen.Data.rda
___________________________________________________________________
Added: svn:mime-type
   + application/octet-stream

Added: pkg/RepeatABEL/data/datalist
===================================================================
--- pkg/RepeatABEL/data/datalist	                        (rev 0)
+++ pkg/RepeatABEL/data/datalist	2015-11-20 11:56:57 UTC (rev 2023)
@@ -0,0 +1,4 @@
+Phen.Data
+flycatchers
+gen.data
+thinned.flycatchers

Added: pkg/RepeatABEL/data/flycatchers.rda
===================================================================
(Binary files differ)


Property changes on: pkg/RepeatABEL/data/flycatchers.rda
___________________________________________________________________
Added: svn:mime-type
   + application/octet-stream

Added: pkg/RepeatABEL/data/gen.data.rda
===================================================================
(Binary files differ)


Property changes on: pkg/RepeatABEL/data/gen.data.rda
___________________________________________________________________
Added: svn:mime-type
   + application/octet-stream

Added: pkg/RepeatABEL/data/thinned.flycatchers.rda
===================================================================
(Binary files differ)


Property changes on: pkg/RepeatABEL/data/thinned.flycatchers.rda
___________________________________________________________________
Added: svn:mime-type
   + application/octet-stream

Added: pkg/RepeatABEL/inst/CITATION
===================================================================
--- pkg/RepeatABEL/inst/CITATION	                        (rev 0)
+++ pkg/RepeatABEL/inst/CITATION	2015-11-20 11:56:57 UTC (rev 2023)
@@ -0,0 +1,34 @@
+citHeader("To cite RepeatABEL in publications use:")
+
+citEntry(entry="Article",
+         title = "Increasing the power of genome wide association studies in natural populations using repeated measures: evaluation and implementation",
+         author = personList(as.person("Lars Ronnegard"),
+                             as.person("S. Eryn McFarlane"),
+                             as.person("Arild Husby"),
+                             as.person("Takeshi Kawakami"),
+                             as.person("Hans Ellegren"),
+                             as.person("Anna Qvarnstrom")),
+         journal      = "Methods in Ecology and Evolution",
+         year         = "2016",
+         volume       = "1",
+         number       = "1",
+         pages        = "1-1",
+         
+         textVersion = 
+         paste("Lars Ronnegard, S. Eryn McFarlane, Arild Husby, Takeshi Kawakami, Hans Ellegren and Anna Qvarnstrom (2016)", 
+               "Increasing the power of genome wide association studies in natural populations using repeated measures: evaluation and implementation.",
+               "Methods in Ecology and Evolution, (pending revision).")
+)
+
+citEntry(entry="Manual",
+title = "GenABEL: genome-wide SNP association analysis",
+author = "GenABEL project developers",
+    year = "2013",
+    note = "R package version 1.8-0",
+    url = "http://CRAN.R-project.org/package=GenABEL",
+
+textVersion = 
+         paste("GenABEL project developers (2013).",
+"GenABEL: genome-wide SNP association analysis.",
+"R package version 1.8-0.","http://CRAN.R-project.org/package=GenABEL")
+)

Added: pkg/RepeatABEL/inst/doc/RepeatABEL_vignette.pdf
===================================================================
--- pkg/RepeatABEL/inst/doc/RepeatABEL_vignette.pdf	                        (rev 0)
+++ pkg/RepeatABEL/inst/doc/RepeatABEL_vignette.pdf	2015-11-20 11:56:57 UTC (rev 2023)
@@ -0,0 +1,6997 @@
+%PDF-1.5
+%ÐÔÅØ
+4 0 obj <<
+/Length 521       
+>>
+stream
+concordance:RepeatABEL_vignette.tex:RepeatABEL_vignette.Rnw:1 101 1 1 5 37 1 1 2 1 0 1 2 1 0 1 2 4 0 1 2 2 1 1 3 12 0 1 2 5 1 1 2 29 0 1 2 4 1 1 2 1 0 1 3 10 0 1 2 3 1 1 2 1 0 1 4 12 0 1 2 3 1 1 4 9 0 1 2 8 1 1 3 2 0 1 4 8 0 1 2 8 0 1 1 28 0 1 2 6 1 1 3 2 0 1 6 4 0 1 2 4 0 1 2 4 1 1 2 1 0 6 1 3 0 1 2 2 1 1 5 4 0 1 2 4 0 1 2 5 1 1 2 1 0 1 1 3 0 1 2 2 1 1 5 7 0 1 2 2 1 1 3 5 0 1 2 2 1 1 4 6 0 1 2 2 1 1 3 2 0 1 1 3 0 1 2 8 1 1 4 3 0 1 2 1 0 2 1 1 5 4 0 1 7 6 0 1 3 2 0 1 2 1 0 1 4 6 0 1 2 4 1 1 4 3 0 2 1 1 2 4 0 1 2 3 1
+endstream
+endobj
+7 0 obj <<
+/Length 270       
+/Filter /FlateDecode
+>>
+stream
+xÚ…=O1†÷û‰„ØI.¹‘J¥ªNÙ€á ×‚ø8tVþ:vST118vòÚ¯žØÁ¬÷O^”æüÒgÀd	c€²lƒM¡…“í"AÙÀ*Õ#Ǩw¨ú‘ëwm(ªqàr±XrZ‹êX at 5<hJêYGRÒ°;ÍšÚ4ÔÛž…O9¦ùIÆ^ô]¹b<³0Jѯ˜;@d"AÚB‡)	ÞfßVȵÌ||°7µª/ðÁfô`œ
+)×ÆïƒÀÞ§§‰ÇÞŽ12-yuë(éñð±ÝÙêêÀøÎf"0”mî°ú\Oš²ú’ÏËŽ^åvÏ‹jÕ8UTþ¬fbÙq`ücû›—¥ùºb6
+endstream
+endobj
+16 0 obj <<
+/Length 3463      
+/Filter /FlateDecode
+>>
+stream
+xÚÅZ[“Û¶~÷¯Ðø¡CM- At§Î4Þq&ÍxìmÒIÒt¸’V«Z·èâK}Ï
+ HA^eÚ&»@\¾óá j´©ÑÍ%¿_Þ>ùâeåFÚæ…­Ìèö~¤Í2£ZU¹6Íèv6ú1{5ÖÙf\èì‰ýv<)T6;MKÈn7ãÜ~ýÅK7jò¦2v¢F£¡ÇÍo Ö=@Kk³÷’]làßûÄ¿9~,²ÅòýØ8È…ê톿,I
+øæ¥0u6ƒä	þ¦Gés+•1¿åö˜|ðØì
+v½ƒÖU6oñÛ‹/¿‚ÿßð×$Ûéº~7.«¬]ÌóñÄÖ%OBdÜa
+j pÎZçMYòœe€=4ºßâÿ5ͦÎn¾km²o9‹)1#YZ̸ì
+¶÷#ºnQY¥)O
+'9ãoÛ;Ô%µ|Owº™XiŽ5<“¯´J^¿íêЛeųôbA—“¢RÙºEÍ}âÌ­Ðœ3$àŠ«B
+ûÅ–Pá»±+x.$ÓéÐ5hI˜íKxJ?)]̧‚˜—O©fbL¨l;t>dÄ7g]³æ¶(ߧVÝ ,ÓÔÂÎQ쐤 ¬Sa)0}6*߉f0CÏïQ
+Ôí
+k>Ñt•Ý¨%wŠâ¶œÙ’ª±¿=ôõƒ%$Ù܃H6EÍðøŽÏʸDƒœ¤f=]	fl…`¼ 9£²Ãίé’`¥º‰”Ä¢ÐbÛ¢É^p«5['0/­>,E¤‡­¨Keâ”F¹!ûÑ£ŠÞ~ûZÆìÃÊ<é囆YKsè0˜“ŒH´V—}^‹•‚È_ bÖçhŠ\ÛZY'_²õc‚èÎkqô)B™ºÓÝ/ì°ÈÒÅ”°#en‰PG¸ÆOªTÄ ´Ø<«*æTt²¾—š{ÚÌÇ¥ñ5âR³g¨3›½%¢Å)›ý^Œ]«–4ì_ØÜ6®¯.‘Ã(­ ©ªÈ°ûâá9X at Y B¬B.Ie4‰…Ø´ a‚@L{Ðf!-Ï{jMBcl͐C€b÷©¸‘Ú†­mùo*ǹr½Õ2è¡•a»Ð?C—Á
+³†]Uôh[+&ÞÜ<ý8©”Înž†zƒÊ«³¯‰øТ7dAPdž“ª&øßI÷¼i÷qV»¼,
[TRUNCATED]

To get the complete diff run:
    svnlook diff /svnroot/genabel -r 2023


More information about the Genabel-commits mailing list