[Genabel-commits] r701 - in pkg/GenABEL: . R inst/unitTests man
noreply at r-forge.r-project.org
noreply at r-forge.r-project.org
Tue Mar 22 02:17:57 CET 2011
Author: yurii
Date: 2011-03-22 02:17:57 +0100 (Tue, 22 Mar 2011)
New Revision: 701
Added:
pkg/GenABEL/R/blurGenotype.R
pkg/GenABEL/R/findRelatives.R
pkg/GenABEL/R/generateOffspring.R
pkg/GenABEL/R/getLogLikelihoodGivenRelation.R
pkg/GenABEL/R/makeTransitionMatrix.R
pkg/GenABEL/inst/unitTests/runit.findRelatives.R
pkg/GenABEL/man/blurGenotype.Rd
pkg/GenABEL/man/findRelatives.Rd
pkg/GenABEL/man/generateOffspring.Rd
pkg/GenABEL/man/getLogLikelihoodGivenRelation.Rd
pkg/GenABEL/man/makeTransitionMatrix.Rd
Modified:
pkg/GenABEL/CHANGES.LOG
pkg/GenABEL/NAMESPACE
pkg/GenABEL/generate_documentation.R
pkg/GenABEL/inst/unitTests/report.html
pkg/GenABEL/inst/unitTests/report.txt
pkg/GenABEL/inst/unitTests/reportSummary.txt
pkg/GenABEL/man/GenABEL-package.Rd
pkg/GenABEL/man/polygenic.Rd
pkg/GenABEL/man/polygenic_hglm.Rd
Log:
added 'findRelatives' and four supporting functions; also some accumulated changes
Modified: pkg/GenABEL/CHANGES.LOG
===================================================================
--- pkg/GenABEL/CHANGES.LOG 2011-03-14 09:15:43 UTC (rev 700)
+++ pkg/GenABEL/CHANGES.LOG 2011-03-22 01:17:57 UTC (rev 701)
@@ -1,13 +1,22 @@
-*** v. 1.6-6 (2011.03.06)
+*** v. 1.6-6 (2011.03.22)
-Changes convert.snp.mach documentation to reflect the fact that
+Added a number of functions facilitating relationship checks.
+The core function is 'findRelatives'. Compared
+to guessing relations from genomic kinship matrix, this
+procedure offers several enhancements: (1) by use of IBD/IBS
+3-state space, it allows to distinguish between some pairs,
+which have the same kinship (e.g. parent-offspring from
+brother-sister; uncle-nephew from grandparent-grandchild, etc.)
+(2) it reports likelihood, allowing for more rigorous inferences
+
+Changes in convert.snp.mach documentation to reflect the fact that
the map-file should have header; changes to convert.snp.illumina
documentation to reflect the nature of the data better; added
option 'mapHasHeaderLine' to convert.snp.ped and updated documentation
(resolving feature request #1317).
With suggestion and guidance from Xia Shen, added procedure 'polygenic_hglm'.
-At the moment very experimental, and needs to pass checks before can be released.
+At the moment experimental.
***** v. 1.6-5 (2011.02.07)
Modified: pkg/GenABEL/NAMESPACE
===================================================================
--- pkg/GenABEL/NAMESPACE 2011-03-14 09:15:43 UTC (rev 700)
+++ pkg/GenABEL/NAMESPACE 2011-03-22 01:17:57 UTC (rev 701)
@@ -22,6 +22,7 @@
as.hsgeno.gwaa.data,
as.hsgeno.snp.data,
autosomal,
+ blurGenotype,
catable,
ccfast,
check.marker,
@@ -50,8 +51,11 @@
export.plink,
extract.annotation.impute,
extract.annotation.mach,
+ findRelatives,
formetascore,
GASurv,
+ generateOffspring,
+ getLogLikelihoodGivenRelation,
grammar,
HWE.show,
hom,
@@ -62,6 +66,7 @@
impute2mach,
load.gwaa.data,
mach2databel,
+ makeTransitionMatrix,
merge.gwaa.data,
merge.snp.data,
mlreg,
Added: pkg/GenABEL/R/blurGenotype.R
===================================================================
--- pkg/GenABEL/R/blurGenotype.R (rev 0)
+++ pkg/GenABEL/R/blurGenotype.R 2011-03-22 01:17:57 UTC (rev 701)
@@ -0,0 +1,96 @@
+#' blur genotype calls into probabilites
+#'
+#' 'blurs' genotype calls into probabilities: translates
+#' single genotype g2, into probability distribution P(g1|g2),
+#' that is probability that true genotype is g1
+#' given g2 is the observed 'called' genotype and error rate is
+#' epsilon. Probability that 'true' genotype is called genotype
+#' is set to (1-epsilon)^2, the probability that true genotype
+#' differs at 1 allele is set to epsilon*(1-epsilon), and
+#' both allelel differ = epsilon^2.
+#'
+#' @param g vector of genotypes for a particular person
+#' (at locus 1, locus 2, etc., coded as 0, 1, 2 (corresponding
+#' to genotypes AA, AB, and BB, respectively) and NA.
+#' @param q (optional) vector of coded allele freqeuncies for
+#' locus 1, locus 2, etc.
+#' @param epsilon error rate
+#'
+#' @return matrix with columns corresponding to SNPs
+#' and rows corresponding to 'g0', 'g1', 'g2'. For
+#' a particular SNP, a vale in cell 'gK' is the
+#' probability that true genotype is 'K', given
+#' thw original call and error-rate.
+#'
+#' @author Yurii Aulchenko
+#'
+#' @examples
+#' data(srdta)
+#' # select 10 first SNPs
+#' df <- srdta[,1:10]
+#' # compute effect allele freq
+#' EAF <- summary(gtdata(df))$"Q.2"
+#' EAF
+#' # get genotypes of first 5 people
+#' g1 <- as.numeric(df[1:5,])
+#' g1
+#' # blur the genotype of person 1, snp 1
+#' blurGenotype(g1[1,1])
+#' # blur all genotypes of person 2; assume no info for missing
+#' blurGenotype(g1[2,])
+#' # blur all genotypes of person 2; use HWE to infer missing
+#' blurGenotype(g1[2,],q=EAF)
+#'
+#'
+blurGenotype <- function(g,q=NULL,epsilon=0.01) {
+# sanity checks
+ if (is.vector(g)) {
+ g1 <- matrix(g,nrow=1);
+ if (!is.null(names(g))) colnames(g1) <- names(g)
+ g <- g1
+ rm(g1)
+ }
+ if (is.matrix(g) && nrow(g)!=1)
+ stop("g should be a vector or an one-row matrix");
+ if (!is.null(q)) {
+ if (length(q) != dim(g)[2])
+ stop("length of q should be the same as length of g");
+ if (!is(q,"numeric"))
+ warning("q should be real (scalar or vector) between 0 and 1")
+ if (any(q<0) | any(q>1))
+ stop("(some) q<0 or q>1")
+ }
+# set P(g1|g2): probability that true genotype is g1
+# if g2 is observed and error rate is epsilon
+# as a function of IBS (0,1,2)
+ delta <- c((1-epsilon)^2,epsilon*(1-epsilon),epsilon^2)
+# function to convert calls to probabilities assuming
+# error epsilon
+ getNondegenerate <- function(glocal,delta) {
+ if (!is.na(glocal)) {
+ trma <- matrix(c(0,1,2,1,0,1,1,2,1,2,1,0),ncol=4)+1
+ out <- delta[trma[glocal+1,]]
+ out[2] <- out[2] + out[3]
+ out <- out[-3]
+ return(out)
+ } else {
+ return(c(1/4,1/2,1/4))
+ }
+ }
+# setting missing to HWE
+ setToHWE <- function(qlocal) {
+ plocal <- 1 - qlocal
+ out <- matrix(c(plocal*plocal,2*plocal*qlocal,qlocal*qlocal),ncol=3)
+ out <- t(out)
+ out
+ }
+# blur genotypes
+ gx3 <- apply(g,MAR=2,FUN=getNondegenerate,delta=delta)
+# apply HWE to missing
+ if (!is.null(q))
+ gx3[,is.na(g)] <- setToHWE(q[is.na(g)])
+# done
+ if (!is.null(colnames(g))) colnames(gx3) <- colnames(g)
+ rownames(gx3) <- c("g0","g1","g2")
+ return(gx3)
+}
Added: pkg/GenABEL/R/findRelatives.R
===================================================================
--- pkg/GenABEL/R/findRelatives.R (rev 0)
+++ pkg/GenABEL/R/findRelatives.R 2011-03-22 01:17:57 UTC (rev 701)
@@ -0,0 +1,275 @@
+#' guesses relations between individuals
+#'
+#' This function guesses relationships (expressed as estimated
+#' number meiotic connection(s)) using genomic data. Compared
+#' to guessing relations from genomic kinship matrix, this
+#' procedure offers several enhancements:
+#'
+#' (1) by use of IBD/IBS
+#' 3-state space, it allows to distinguish between some pairs,
+#' which have the same kinship (e.g. parent-offspring from
+#' brother-sister; uncle-nephew from grandparent-grandchild, etc.)
+#'
+#' (2) it reports likelihood, allowing for more rigorous inferences
+#'
+#' If 'gtdata' are provided as a matrix (or 'databel' matrix),
+#' genotypes should be coded as 0, 1, or 2; each SNP corresponds
+#' to a column and each ID is a row. 'q' corresponds to the frequency
+#' of 'effect' (aka 'coded') allele, which is also equivalent to the
+#' mean(SNP)/2.0 provided coding is correct.
+#'
+#' 'nmeivec' is a sequence of integers, e.g. c(1,2) will test for
+#' parent-offspring pairs and pairs separated by two meioses (sibs,
+#' grandparent-grandchild, etc.). If 'nmeivec' does not contain '0'
+#' as its first element, it will be automatically added (testing for
+#' twins). Also, nmeivec will be updated with
+#' c(nmeivec,max(nmeivec)+1,100) to allow for thesting of
+#' testing vs. 'null' (unrelated, 100) and 'most distant'
+#' specified by user (max(nmeivec)).
+#'
+#' While one may be interested to test only a sub-set of the data
+#' for relationships,
+#' it is recommended to provide 'q' estimated using all data
+#' available (see example).
+#'
+#' 'gkinCutOff' allows use of genomic kinship matrix
+#' (computed internally) to pre-screen pairs to be tested.
+#' Use of this option with value '-1' is recommended:
+#' in this case threshod is set to 0.5^(max(nmeivec)+2).
+#' If not NULL, only pairs passing gkinCutOff are tested with
+#' the likelihood procedure.
+#'
+#' After likelihood estimation, inference on relationship is made.
+#' Releationship (in terms of number of meioses) is 'guessed'
+#' if odds of likelihoods under the meiotic distance providing max likelihood
+#' and under the 'null' (maximal meiotic distance tested + 100)
+#' is greater than 'OddsVsNull' parameter AND odds max-lik vs.
+#' the next-best meiotic distance
+#' is greater than 'OddsVsNextBest' parameter.
+#'
+#' @param gtdata genotypic data, either 'gwaa.data' or 'snp.data'
+#' class, or matrix or 'databel' matrix (see details for format).
+#' @param nmeivec vector providing the degree of relationship
+#' to be tested (1: parent-offspring; 2: sibs, grandparent-grandchild;
+#' etc.).
+#' @param q vector of effect allele frequencis for the data.
+#' @param epsilon genotyping error rate
+#' @param quiet if TRUE, screen outputs supressed
+#' @param OddsVsNull threshold used in relationships inferences
+#' (see details)
+#' @param OddsVsNextBest threshold used in relationships inferences
+#' (see details)
+#' @param twoWayPenalty penalty on likelihoods resulting from models
+#' assuming two meiotic pathways
+#' @param doTwoWay or not
+#' @param vsIDs specific IDs to be tested vs others
+#' @param gkinCutOff if not null, sets a threshold used
+#' to pre-screen pairs before guessing relations. If value
+#' < 0 provided, procedure sets threshold automatically (recommended)
+#' @param kinshipMatrix (genomic) kinship matrix (used if gkinCutOff!=NULL)
+#'
+#' @return
+#' A list with elements
+#' call -- details of the call;
+#' profile -- table detailing likelihood for all pairs
+#' tested;
+#' estimatedNmeioses -- nids x nids matrix containing
+#' maximum likelihood estimate of
+#' meiotic distance for all pairs of individuals
+#' guess -- same as estimatedNmeioses, but all estimates not
+#' passing inference criteria (OddsVsNull, OddsVsNextBest) are NAed;
+#' compressedGuess -- same as above, but removing cows and cols
+#' with missing-only elemnts
+#'
+#' @keywords htest
+#'
+#' @examples
+#' data(ge03d2.clean)
+#' df <- ge03d2.clean[,autosomal(ge03d2.clean)]
+#' df <- df[,sort(sample(1:nsnps(df),1000))]
+#' eaf <- summary(gtdata(df))$"Q.2"
+#' relInfo <- findRelatives(df[27:30,],q=eaf)
+#' relInfo
+#' # look only for 1st and 2nd degree relatives
+#' relInfo1 <- findRelatives(df[27:30],q=eaf,gkinCutOff=-1,nmeivec=c(1,2,3))
+#' relInfo1
+#'
+findRelatives <- function(gtdata,nmeivec=c(1:2),q=NULL,epsilon=0.01,
+ quiet=FALSE,OddsVsNull=1000,OddsVsNextBest=100,
+ twoWayPenalty=log(10),doTwoWay=TRUE,vsIDs=NULL,
+ gkinCutOff=NULL,kinshipMatrix=NULL) {
+# sanity checks
+ nmeivec <- sort(nmeivec)
+ if (nmeivec[1]<0) stop("negative elements in nmeivec")
+ if (nmeivec[1] != 0) nmeivec <- c(0,nmeivec)
+ nmeivec <- c(nmeivec,nmeivec[length(nmeivec)]+1,nmeivec[length(nmeivec)]+100)
+ if (class(gtdata) != "matrix" && class(gtdata) != "gwaa.data" &&
+ class(gtdata) != "snp.data" && class(gtdata) != "databel")
+ stop("gtdata should be gwaa.data, snp.data, databel or a matrix")
+ if (class(gtdata) == "gwaa.data") gtdata <- gtdata(gtdata)
+# get ID names
+ if (!is.null(rownames(gtdata)))
+ idnam <- rownames(gtdata)
+ else
+ idnam <- as.character(1:dim(gtdata)[1])
+# get Q if not provided
+ if (is.null(q)) {
+ if (class(gtdata) =="snp.data") q <- summary(gtdata)$"Q.2"
+ else if (class(gtdata) == "matrix") q <- colMeans(gtdata,na.rm=T)/2.
+ else if (class(gtdata) == "databel") {
+ q <- rep(NA,dim(gtdata)[2])
+ for (i in 1:dim(gtdata)[2]) q[i] <- mean(gtdata[,1],na.rm=T)/2.
+ }
+ warning('"q" is not specified; inferring from data')
+ }
+# set vsIDs if used
+ if (!is.null(vsIDs)) if (is.character(vsIDs)) {
+ if (!all(vsIDs %in% idnam)) stop("can not find some of vsIDs")
+ vsIDs <- which(idnam %in% vsIDs)
+ if (length(vsIDs)>1) {ma <- max(vsIDs); mi <- min(vsIDs)}
+ else ma <- mi <- vsIDs
+ if (ma>dim(gtdata)[1] || mi<1) stop("vsIDs out of range")
+ }
+
+# get gkin if used
+ lengthOut <- dim(gtdata)[1]
+ if (is.null(vsIDs)) lengthOut <- lengthOut*(lengthOut-1)/2
+ else lengthOut <- (lengthOut-1)*length(vsIDs)
+ if (!is.null(gkinCutOff)) {
+ if (is.null(kinshipMatrix)) {
+ if (is(gtdata,"snp.data")) {
+ if (is.null(q)) {
+ kinshipMatrix <- ibs(gtdata,weight="freq")
+ } else {
+ kinshipMatrix <- ibs(gtdata,weight="freq",snpfreq=q)
+ }
+ }
+ else stop("gkinCutOff can only be used with gtdata of gwaa.data or snp.data class")
+ } else {
+ kinshipMatrix <- kinshipMatrix[idnam,idnam]
+ }
+ if (gkinCutOff<(-0.5)) {
+ gkinCutOff <- .5^(max(nmeivec[1:(length(nmeivec)-1)])+1)
+ if (!quiet) cat("*** gkinCutOff set to ",gkinCutOff,"\n")
+ }
+ if (is.null(vsIDs)) {
+ testUs <- as.vector(kinshipMatrix[lower.tri(kinshipMatrix)])
+ testUs <- (testUs>gkinCutOff)
+ } else {
+ testUs <- c()
+ for (jj in vsIDs) {
+ testUs <- c(testUs,as.vector((kinshipMatrix[jj,])[1:(jj-1)]),
+ as.vector((kinshipMatrix[,jj])[c((jj+1):dim(kinshipMatrix)[1])]))
+ }
+ #print(testUs)
+ testUs <- (testUs>gkinCutOff)
+ }
+ if (length(testUs) != lengthOut)
+ stop(paste("length(testUs) != lengthOut:",
+ length(testUs),";",lengthOut))
+ } else {
+ testUs <- rep(TRUE,lengthOut)
+ }
+ if (all(testUs==FALSE)) stop("no test to be performed according to specifid criteria")
+# prepare 2-way meiotic table to iterate over
+ meiTab <- list()
+ #print(nmeivec)
+ maxNMeiNotNullNotBoundary <- nmeivec[(length(nmeivec)-2)]
+ for (i in nmeivec[1:(length(nmeivec)-1)]) {
+ fromTo <- c(max(2,i):maxNMeiNotNullNotBoundary)
+ if (doTwoWay && i>0 && i <= maxNMeiNotNullNotBoundary) {
+ series <- matrix(c(rep(i,length(fromTo)),fromTo),ncol=2)
+ if (dim(series)[1]>=1)
+ for (j in 1:dim(series)[1]) {
+ #print(c(i,j))
+ cname <- paste(series[j,1],series[j,2],sep="+")
+ meiTab[[cname]] <- series[j,]
+ }
+ }
+ meiTab[[as.character(i)]] <- i
+ }
+ meiTab[[ as.character(nmeivec[length(nmeivec)]) ]] <- nmeivec[length(nmeivec)]
+ #print(meiTab)
+ #print(names(meiTab))
+ #stop()
+# iterate over IDs
+ outI <- 1
+ out <- matrix(ncol=(length(meiTab)),nrow=lengthOut)
+ outN1 <- matrix(ncol=1,nrow=lengthOut)
+ outN2 <- matrix(ncol=1,nrow=lengthOut)
+ if (!quiet) pb <- txtProgressBar(style = 3)
+ if (is.null(vsIDs)) {id1list <- c(1:(dim(gtdata)[1]-1))}
+ else { id1list <- vsIDs }
+ testsDone <- 0
+ for (id1 in id1list) {
+ name1 <- idnam[id1]
+ if (class(gtdata) == "snp.data")
+ gt1 <- as.numeric(gtdata[id1,])
+ else
+ gt1 <- gtdata[id1,]
+ bgt1 <- blurGenotype(gt1,q=q,epsilon=epsilon)
+ if (is.null(vsIDs)) id2list <- (id1+1):dim(gtdata)[1]
+ else id2list <- c(c(1:(id1-1)),c((id1+1):dim(gtdata)[1]))
+ for (id2 in id2list) {
+ name2 <- idnam[id2]
+ #print(c(outI,testUs))
+ if (testUs[outI]) {
+ if (class(gtdata) == "snp.data")
+ gt2 <- as.numeric(gtdata[id2,])
+ else
+ gt2 <- gtdata[id2,]
+ bgt2 <- blurGenotype(gt2,q=q,epsilon=epsilon)
+ acuL <- rep(NA,length(nmeivec));
+ k <- 1
+ for (mei in meiTab) {
+ # AAA
+ TM <- makeTransitionMatrix(q,nmeioses=mei)
+ acuL[k] <- getLogLikelihoodGivenRelation(bgt1,bgt2,TM)$logLik
+ if (length(mei)>1) acuL[k] <- acuL[k] - twoWayPenalty
+ k <- k+1
+ }
+ testsDone <- testsDone + 1
+ if (!quiet) setTxtProgressBar(pb, testsDone/sum(testUs))
+ } else {
+ acuL <- c(1:length(meiTab))
+ }
+ outN1[outI,] <- name1
+ outN2[outI,] <- name2
+ #cat("id1list",id1list,"\n")
+ #cat("id2list",id2list,"\n")
+ #cat("\n",acuL,"\n")
+ out[outI,] <- c(acuL[1:(length(acuL))]-acuL[length(acuL)])
+ #print(out[outI,])
+ outI <- outI+1
+ }
+ }
+ if (!quiet) cat("\n")
+ #print(out)
+ meiMtmp <- apply(out,MAR=1,FUN=function(x){return(which.max(x))})
+ #print(relMtmp)
+ meiM <- matrix(ncol=dim(gtdata)[1],nrow=dim(gtdata)[1])
+ diag(meiM) <- 0
+ #print(meiMtmp)
+ meiM[lower.tri(meiM)] <- names(meiTab)[meiMtmp]
+ meiM[upper.tri(meiM)] <- t(meiM)[upper.tri(meiM)]
+ colnames(meiM) <- rownames(meiM) <- idnam
+ firstBestLik <- apply(out,MAR=1,FUN=function(x){return(max(x))})
+ which_firstBestLik <- apply(out,MAR=1,FUN=function(x){return(which.max(x))})
+ secondBestLik <- apply(out,MAR=1,FUN=function(x){x[order(x,decreasing=TRUE)[2]]})
+ cndX <- !(exp(firstBestLik-secondBestLik)>OddsVsNextBest
+ & exp(firstBestLik)>OddsVsNull & (which_firstBestLik != (length(meiTab)-1)))
+ cnd <- diag(FALSE,dim(gtdata)[1],nrow=dim(gtdata)[1])
+ cnd[lower.tri(cnd)] <- cndX
+ cnd[upper.tri(cnd)] <- t(cnd)[upper.tri(cnd)]
+ guess <- meiM
+ guess[which(cnd==1)] <- NA
+ out <- data.frame(outN1,outN2,out,stringsAsFactors=FALSE)
+ names(out) <- c("id1","id2",names(meiTab))
+ tmp <- guess; diag(tmp) <- NA;
+ todrop <- apply(tmp,MAR=1,FUN=function(x){return(all(is.na(x)))})
+ compressedGuess <- guess[!todrop,!todrop]
+ finalOut <- list(call=match.call(),profile=out,estimatedNmeioses=meiM,
+ firstBestLik=firstBestLik,
+ secondBestLik=secondBestLik,guess=guess,compressedGuess=compressedGuess)
+ finalOut
+}
Added: pkg/GenABEL/R/generateOffspring.R
===================================================================
--- pkg/GenABEL/R/generateOffspring.R (rev 0)
+++ pkg/GenABEL/R/generateOffspring.R 2011-03-22 01:17:57 UTC (rev 701)
@@ -0,0 +1,64 @@
+#' simulates offspring's genotypes
+#'
+#' Simulat offspring's genotypes given genotypes of
+#' the parents. No LD is assumed.
+#'
+#' @param g1 vector of genotypes of parent 1, coded
+#' with 0, 1, 2 and NA
+#' @param g2 vector of genotypes of parent 2, coded
+#' with 0, 1, 2 and NA
+#' @param q vector of the frequencies of effects
+#' alleles (used to simulate missings in parental
+#' genotypes). If NULL, c(0.25,0.5,0.25) is used.
+#'
+#' @return a vector containing simulated genotypes
+#' of offspring
+#'
+#' @author Yurii Aulchenko
+#'
+#' @examples
+#' eaf <- runif(10)
+#' g1 <- rbinom(10,2,eaf)
+#' g2 <- rbinom(10,2,eaf)
+#' generateOffspring(g1,g2)
+#'
+generateOffspring <- function(g1,g2,q=NULL) {
+ tm <- array(dim=c(3,3,3))
+ tm[1,1,] <- c(1,1,1) #c(1,0,0)
+ tm[1,2,] <- tm[2,1,] <- c(.5,1,1) #c(.5,.5,0)
+ tm[1,3,] <- tm[3,1,] <- c(0,1,1) #c(0,1,0)
+ tm[2,2,] <- c(.25,.75,1) #c(.25,.5,.25)
+ tm[2,3,] <- tm[3,2,] <- c(0,.5,1) #c(0,.5,.5)
+ tm[3,3,] <- c(0,0,1) #c(0,0,1)
+ genSOG <- function(localg) {
+ vc <- tm[localg[1],localg[2],]
+ #print(vc)
+ rn <- runif(1)
+ #print(rn)
+ gt <- which(vc>=rn)[1]-1
+ #print(gt)
+ gt
+ }
+# infer missing genotypes, if any
+ if (any(is.na(c(g1,g2)))) {
+ warning("NA's in g1 and/or g2; inferring")
+ infer <- function(q) {
+ return(g <- rbinom(length(q),2,q))
+ }
+ if (!is.null(q)) {
+ miss <- which(is.na(g1))
+ g1[miss] <- rbinom(length(miss),2,q[miss])
+ miss <- which(is.na(g2))
+ g2[miss] <- rbinom(length(miss),2,q[miss])
+ } else {
+ miss <- which(is.na(g1))
+ g1[miss] <- rbinom(length(miss),2,rep(0.5,length(miss)))
+ miss <- which(is.na(g2))
+ g1[miss] <- rbinom(length(miss),2,rep(0.5,length(miss)))
+ }
+ }
+# generate offspring genotypes
+ g1g2 <- t(matrix(c(g1+1,g2+1),ncol=2))
+ res <- apply(g1g2,MAR=2,FUN=genSOG)
+ res
+}
Added: pkg/GenABEL/R/getLogLikelihoodGivenRelation.R
===================================================================
--- pkg/GenABEL/R/getLogLikelihoodGivenRelation.R (rev 0)
+++ pkg/GenABEL/R/getLogLikelihoodGivenRelation.R 2011-03-22 01:17:57 UTC (rev 701)
@@ -0,0 +1,74 @@
+#' computes logLik of two blurGenotypes
+#'
+#' Compute logLik of genotypes of person 1
+#' given genotypes of person 2 and assumed relation
+#' between the two persons (expressed with transition
+#' probability matrix; as returned with
+#' 'makeTransitionMatrix').
+#'
+#' @param bGenotype1 blurred genotype of person 1
+#' @param bGenotype2 blurred genotype of person 2
+#' @param TransitionMatrix transition probability matrix
+#' @param q vector of effect allele frequencies
+#'
+#' @author Yurii Aulchenko
+#'
+#' @examples
+#' data(srdta)
+#' # select 10 first SNPs
+#' df <- srdta[,1:10]
+#' # compute effect allele freq
+#' EAF <- summary(gtdata(df))$"Q.2"
+#' # get genotypes of first 2 people
+#' g1 <- as.numeric(df[1:2,])
+#' # blur all genotypes of person 1; use HWE to infer missing
+#' bg1 <- blurGenotype(g1[1,],q=EAF)
+#' # blur all genotypes of person 2; use HWE to infer missing
+#' bg2 <- blurGenotype(g1[2,],q=EAF)
+#' # generate sib-sib transision matrices
+#' trss <- makeTransitionMatrix(EAF,nmei=c(2,2))
+#' getLogLikelihoodGivenRelation(bg1,bg2,trss,EAF)
+#'
+getLogLikelihoodGivenRelation <- function(bGenotype1,bGenotype2,TransitionMatrix,q) {
+# sanity checks
+ if (class(bGenotype1) != "matrix")
+ stop("bGenotype1 must be a matrix with 3 rows")
+ if (class(bGenotype2) != "matrix")
+ stop("bGenotype2 must be a matrix with 3 rows")
+ if (any(dim(bGenotype1) != dim(bGenotype2)))
+ stop("dim(bGenotype1) != dim(bGenotype2)")
+ if (nrow(bGenotype1)!=3)
+ stop("bGenotype1 must be a matrix with 3 rows")
+ if (nrow(bGenotype2)!=3)
+ stop("bGenotype2 must be a matrix with 3 rows")
+ if (any(is.na(bGenotype1)))
+ stop("missing values not allowed in bGenotype1")
+ if (any(is.na(bGenotype2)))
+ stop("missing values not allowed in bGenotype2")
+ nloci <- dim(bGenotype1)[2]
+ if (nloci == 1) {
+ dmTM <- dim(TransitionMatrix)
+ if ( (length(dmTM)!=2 && length(dmTM)!=3) ||
+ (length(dmTM)==2 && any(dmTM!=c(3,3))) ||
+ (length(dmTM)==3 && any(dmTM!=c(3,3,1))) )
+ stop("for single locus, TransitionMatrix must be 3x3 matrix or 3x3x1 array")
+ }
+
+# compute likelihood
+# P(call1,call2|nmeioses) = SUM_{g1,g2} P(call1|g1) P(call2|g2)
+# P(g1|g2,nmeioses) P(g2)
+ mm <- matrix(ncol=nloci,nrow=3)
+ mm[1,] <- bGenotype1[1,] * TransitionMatrix[1,1,] +
+ bGenotype1[2,] * TransitionMatrix[2,1,] +
+ bGenotype1[3,] * TransitionMatrix[3,1,]
+ mm[2,] <- bGenotype1[1,] * TransitionMatrix[1,2,] +
+ bGenotype1[2,] * TransitionMatrix[2,2,] +
+ bGenotype1[3,] * TransitionMatrix[3,2,]
+ mm[3,] <- bGenotype1[1,] * TransitionMatrix[1,3,] +
+ bGenotype1[2,] * TransitionMatrix[2,3,] +
+ bGenotype1[3,] * TransitionMatrix[3,3,]
+ x <- mm * bGenotype2
+ x <- apply(x,FUN=sum,MAR=2)
+ out <- list(locusLik = x, logLik = sum(log(x)))
+ out
+}
Added: pkg/GenABEL/R/makeTransitionMatrix.R
===================================================================
--- pkg/GenABEL/R/makeTransitionMatrix.R (rev 0)
+++ pkg/GenABEL/R/makeTransitionMatrix.R 2011-03-22 01:17:57 UTC (rev 701)
@@ -0,0 +1,113 @@
+#' Genotype transition probabilities matrices
+#'
+#' Function to generate genotypic transition probablilites
+#' matrices, which represent conditional probabilities
+#' P(g1|g2,nmeioses), where g1 is henotype of person one
+#' (AA, AB or BB), g2 is genotype of person two, and
+#' nmeioses is the number of meioses separating these
+#' two individuals (0 for twins, 1 for parent-offspring,
+#' c(2,2) for sibs, 2 for grandparent-grandchild pairs, etc.)
+#'
+#' @param nmeioses number of meioses separating two
+#' individuals ((a vector) of non-negative integers).
+#' If a vector, it is assumed it lists all meiotic paths
+#' connecting the pair
+#' @param q (a vector of) the coded allele frequency(ies)
+#' (e.g. "Q.2" of GenABEL-package)
+#'
+#' @return If q is scalar, a 3x3 matrix is returned,
+#' where elements represent conditional transition
+#' probabilities P(g1|g2,nmeioses); rows correspond to
+#' the genotypes of g1, and columns correspond to the
+#' genotypes of g2. If coded allele is 'B', then
+#' e.g. element [1,2] gives the probability
+#' P(g1='AA'|g2='AB',nmeioses=nmeioses).
+#'
+#' If q is a vector, series of above-described matrices
+#' are returned as an 'array' object. A matrix constructed
+#' for certain element q[i] can be accessed via
+#' result[,,i].
+#'
+#' @examples
+#' # transition matrix for parent-offspring, for q=0.1
+#' makeTransitionMatrix(0.1,nmeioses=1)
+#' # for a set of q's
+#' makeTransitionMatrix(c(0.1,0.9),nmeioses=1)
+#' # for sibs
+#' makeTransitionMatrix(0.1,nmeioses=c(2,2))
+#' # for half-sibs (or grandparent-grandchild)
+#' makeTransitionMatrix(0.1,nmeioses=2)
+#' # for remote relatives
+#' makeTransitionMatrix(0.1,nmeioses=10)
+#' # for independent
+#' makeTransitionMatrix(0.1,nmeioses=1000)
+#'
+#' @author Yurii Aulchenko
+#'
+#'
+makeTransitionMatrix <- function(q,nmeioses=1000) {
+# do sanity checks
+ if (!is(q,"numeric"))
+ warning("q should be real (scalar or vector) between 0 and 1")
+ if (any(q<0) | any(q>1))
+ stop("(some) q<0 or q>1")
+ if (!is(nmeioses,"numeric") || min(nmeioses)<0)
+ stop("nmeioses should be a positive integer or 0")
+ if (length(nmeioses)>2)
+ stop("current implementation assumes 2 meiotic paths max")
+ # compute transition matrix: P(g1|g2) where g1 is first (rows) and g2 is second dimention (columns)
+ getTT <- function(q,pIBD) {
+ p <- 1-q
+ p2 <- p*p; pq <- p*q; pq2 <- 2*pq; q2 <- q*q;
+ if (length(pIBD)==2) {
+ J0 <- (1-pIBD[1])*(1-pIBD[2]);
+ J1 <- pIBD[1]*(1-pIBD[2]) + (1-pIBD[1])*pIBD[2];
+ J2 <- pIBD[1]*pIBD[2]
+ } else {
+ J0 <- (1-pIBD)
+ J1 <- pIBD[1]
+ J2 <- 0
+ }
+ #coe <- 0.5^(nmei-1); OMcoe <- 1. - coe
+ if (all(pIBD<=1)) {
+ #print(c(pIBD,J0,J1,J2))
+ out <- array(c(
+ J0*p2 + J1*p + J2, J0*pq2 + J1*q , J0*q2,
+ J0*p2 + J1*p/2 , J0*pq2 + J1*(p+q)/2 + J2, J0*q2 + J1*q/2,
+ J0*p2 , J0*pq2 + J1*p , J0*q2 + J1*q + J2
+ ),
+ dim=c(3,3))
+# out <- array(c(
+# J0*p2*p2 + J1*p2 + J2, J0*p2*pq2 + J1*p*q , J0*p2*q2,
+# J0*p2*pq2 + J1*p*q , J0*pq2*pq2 + J1*(p2+q2) + J2, J0*q2*pq2 + J1*p*q,
+# J0*p2*q2 , J0*q2*pq2 + J1*p*q , J0*q2*q2 + J1*q2 + J2
+# ),
+# dim=c(3,3))
+# out <- array(c(coe*p + OMcoe*(p2), coe*q + OMcoe*pq2, OMcoe*(q2),
+# coe*0.5*p + OMcoe*(p2), coe*0.5 + OMcoe*pq2, coe*0.5*q + OMcoe*(q2),
+# OMcoe*(p2), coe*p + OMcoe*pq2, coe*q + OMcoe*(q2)),
+# dim=c(3,3))
+# out <- array(c(coe*p + OMcoe*(p2), coe*q + OMcoe*pq2, OMcoe*(q2),
+# coe*0.5*p + OMcoe*(p2), coe*0.5 + OMcoe*pq2, coe*0.5*q + OMcoe*(q2),
+# OMcoe*(p2), coe*p + OMcoe*pq2, coe*q + OMcoe*(q2)),
+# dim=c(3,3))
+# out <- array(c(coe*p + OMcoe*(p2+pq*F), coe*q + OMcoe*pq2*(1-F), OMcoe*(q2+pq*F),
+# coe*0.5*p + OMcoe*(p2+pq*F), coe*0.5 + OMcoe*pq2*(1-F), coe*0.5*q + OMcoe*(q2+pq*F),
+# OMcoe*(p2+pq*F), coe*p + OMcoe*pq2*(1-F), coe*q + OMcoe*(q2+pq*F)),
+# dim=c(3,3))
+ } else {
+ out <- array(c(1, 0, 0,
+ 0, 1, 0,
+ 0, 0, 1),
+ dim=c(3,3))
+ }
+ return(out)
+ }
+ if (length(q)>1) {
+ q <- array(q)
+ out <- sapply(q,FUN=getTT,pIBD=.5^(nmeioses-1),simplify="array")
+ } else {
+ out <- getTT(q,pIBD=.5^(nmeioses-1))
+ }
+ return(out)
+}
Modified: pkg/GenABEL/generate_documentation.R
===================================================================
--- pkg/GenABEL/generate_documentation.R 2011-03-14 09:15:43 UTC (rev 700)
+++ pkg/GenABEL/generate_documentation.R 2011-03-22 01:17:57 UTC (rev 701)
@@ -2,14 +2,19 @@
"add.phdata.R",
#"annotation",
"arrange_probabel_phe.R",
+ "blurGenotype.R",
"del.phdata.R",
"export.plink.R",
"extract.annotation.impute.R",
"extract.annotation.mach.R",
+ "findRelatives.R",
"GenABEL-package.R",
+ "generateOffspring.R",
+ "getLogLikelihoodGivenRelation.R",
"impute2databel.R",
"impute2mach.R",
"mach2databel.R",
+ "makeTransitionMatrix.R",
#"phdata.R",
"polygenic.R",
"polygenic_hglm.R",
Modified: pkg/GenABEL/inst/unitTests/report.html
===================================================================
--- pkg/GenABEL/inst/unitTests/report.html 2011-03-14 09:15:43 UTC (rev 700)
+++ pkg/GenABEL/inst/unitTests/report.html 2011-03-22 01:17:57 UTC (rev 701)
@@ -1,9 +1,9 @@
<!DOCTYPE HTML PUBLIC "-//W3C//DTD HTML 4.01 Transitional//EN"
"http://www.w3.org/TR/html4/transitional.dtd">
-<html><head><title>RUNIT TEST PROTOCOL--Sat Sep 4 18:40:13 2010</title>
+<html><head><title>RUNIT TEST PROTOCOL--Tue Mar 22 01:41:46 2011</title>
</head>
-<body><h1 TRUE>RUNIT TEST PROTOCOL--Sat Sep 4 18:40:13 2010</h1>
-<p>Number of test functions: 5</p>
+<body><h1 TRUE>RUNIT TEST PROTOCOL--Tue Mar 22 01:41:46 2011</h1>
+<p>Number of test functions: 6</p>
<p>Number of errors: 0</p>
<p>Number of failures: 0</p>
<hr>
@@ -15,7 +15,7 @@
<th width="20%">Failures</th>
</tr>
<tr><td><a href="#GenABEL unit testing">GenABEL unit testing</a></td>
-<td>5</td>
+<td>6</td>
<td>0</td>
<td>0</td>
</tr>
@@ -23,7 +23,7 @@
<hr>
<h3 TRUE>Details</h3>
<p><a name="GenABEL unit testing"><h5 TRUE>Test Suite: GenABEL unit testing</h5>
-</a>Test function regexp: ^test.+<br/>Test file regexp: ^runit.+\.[rR]$<br/>Involved directory:<br/>/Users/yuriiaulchenko/eclipse_workspace/GenABEL/tests/../inst/unitTests<br/><ul><li><a href="/Users/yuriiaulchenko/eclipse_workspace/GenABEL/tests/../inst/unitTests/runit.impute2xxx.R">Test file: runit.impute2xxx.R</a><ul><li><a name="GenABEL unit testing__Users_yuriiaulchenko_eclipse_workspace_GenABEL_tests_.._inst_unitTests_runit.impute2xxx.R_test.impute2databel">test.impute2databel: (23 checks) ... OK (0.52 seconds)<br/></a></li></ul></li><li><a href="/Users/yuriiaulchenko/eclipse_workspace/GenABEL/tests/../inst/unitTests/runit.impute2xxx_large.R">Test file: runit.impute2xxx_large.R</a><ul><li><a name="GenABEL unit testing__Users_yuriiaulchenko_eclipse_workspace_GenABEL_tests_.._inst_unitTests_runit.impute2xxx_large.R_test.impute2xxx_large">test.impute2xxx_large: (5 checks) ... OK (93.94 seconds)<br/></a></li></ul></li><li><a href="/Users/yuriiaulchenko/eclipse_workspace/GenABEL/tests/../inst/unitTests/runit.iterator.R">Test file: runit.iterator.R</a><ul><li><a name="GenABEL unit testing__Users_yuriiaulchenko_eclipse_workspace_GenABEL_tests_.._inst_unitTests_runit.iterator.R_test.qtscore">test.qtscore: (0 checks) ... OK (0 seconds)<br/></a></li><li><a name="GenABEL unit testing__Users_yuriiaulchenko_eclipse_workspace_GenABEL_tests_.._inst_unitTests_runit.iterator.R_test.summary_snp_data">test.summary_snp_data: (3 checks) ... OK (7.84 seconds)<br/></a></li></ul></li><li><a href="/Users/yuriiaulchenko/eclipse_workspace/GenABEL/tests/../inst/unitTests/runit.mach2databel.R">Test file: runit.mach2databel.R</a><ul><li><a name="GenABEL unit testing__Users_yuriiaulchenko_eclipse_workspace_GenABEL_tests_.._inst_unitTests_runit.mach2databel.R_test.mach2databel">test.mach2databel: (8 checks) ... OK (0.25 seconds)<br/></a></li></ul></li></ul><hr>
+</a>Test function regexp: ^test.+<br/>Test file regexp: ^runit.+\.[rR]$<br/>Involved directory:<br/>/Users/yuriiaulchenko/eclipse_workspace/pkg/GenABEL/tests/../inst/unitTests<br/><ul><li><a href="/Users/yuriiaulchenko/eclipse_workspace/pkg/GenABEL/tests/../inst/unitTests/runit.findRelatives.R">Test file: runit.findRelatives.R</a><ul><li><a name="GenABEL unit testing__Users_yuriiaulchenko_eclipse_workspace_pkg_GenABEL_tests_.._inst_unitTests_runit.findRelatives.R_test.findRelatives">test.findRelatives: (7 checks) ... OK (68.51 seconds)<br/></a></li></ul></li><li><a href="/Users/yuriiaulchenko/eclipse_workspace/pkg/GenABEL/tests/../inst/unitTests/runit.impute2xxx.R">Test file: runit.impute2xxx.R</a><ul><li><a name="GenABEL unit testing__Users_yuriiaulchenko_eclipse_workspace_pkg_GenABEL_tests_.._inst_unitTests_runit.impute2xxx.R_test.impute2databel">test.impute2databel: (23 checks) ... OK (0.83 seconds)<br/></a></li></ul></li><li><a href="/Users/yuriiaulchenko/eclipse_workspace/pkg/GenABEL/tests/../inst/unitTests/runit.impute2xxx_large.R">Test file: runit.impute2xxx_large.R</a><ul><li><a name="GenABEL unit testing__Users_yuriiaulchenko_eclipse_workspace_pkg_GenABEL_tests_.._inst_unitTests_runit.impute2xxx_large.R_test.impute2xxx_large">test.impute2xxx_large: (0 checks) ... OK (0 seconds)<br/></a></li></ul></li><li><a href="/Users/yuriiaulchenko/eclipse_workspace/pkg/GenABEL/tests/../inst/unitTests/runit.iterator.R">Test file: runit.iterator.R</a><ul><li><a name="GenABEL unit testing__Users_yuriiaulchenko_eclipse_workspace_pkg_GenABEL_tests_.._inst_unitTests_runit.iterator.R_test.qtscore">test.qtscore: (0 checks) ... OK (0 seconds)<br/></a></li><li><a name="GenABEL unit testing__Users_yuriiaulchenko_eclipse_workspace_pkg_GenABEL_tests_.._inst_unitTests_runit.iterator.R_test.summary_snp_data">test.summary_snp_data: (3 checks) ... OK (12.39 seconds)<br/></a></li></ul></li><li><a href="/Users/yuriiaulchenko/eclipse_workspace/pkg/GenABEL/tests/../inst/unitTests/runit.mach2databel.R">Test file: runit.mach2databel.R</a><ul><li><a name="GenABEL unit testing__Users_yuriiaulchenko_eclipse_workspace_pkg_GenABEL_tests_.._inst_unitTests_runit.mach2databel.R_test.mach2databel">test.mach2databel: (8 checks) ... OK (0.53 seconds)<br/></a></li></ul></li></ul><hr>
<table border="0" width="80%" >
<tr><th>Name</th>
<th>Value</th>
@@ -47,25 +47,25 @@
<td>2</td>
</tr>
<tr><td>minor</td>
-<td>12.0</td>
+<td>13.0</td>
</tr>
<tr><td>year</td>
-<td>2010</td>
+<td>2011</td>
</tr>
<tr><td>month</td>
-<td>04</td>
+<td>01</td>
</tr>
<tr><td>day</td>
-<td>02</td>
+<td>26</td>
</tr>
<tr><td>svn rev</td>
-<td>51566</td>
+<td>54118</td>
</tr>
<tr><td>language</td>
<td>R</td>
</tr>
<tr><td>version.string</td>
-<td>R version 2.12.0 Under development (unstable) (2010-04-02 r51566)</td>
+<td>R version 2.13.0 Under development (unstable) (2011-01-26 r54118)</td>
</tr>
<tr><td>host</td>
<td>new-host.home</td>
Modified: pkg/GenABEL/inst/unitTests/report.txt
===================================================================
--- pkg/GenABEL/inst/unitTests/report.txt 2011-03-14 09:15:43 UTC (rev 700)
[TRUNCATED]
To get the complete diff run:
svnlook diff /svnroot/genabel -r 701
More information about the Genabel-commits
mailing list