[Genabel-commits] r1183 - in pkg/GenABEL: . R man

Thu Apr 4 11:02:17 CEST 2013

Author: yurii
Date: 2013-04-04 11:02:17 +0200 (Thu, 04 Apr 2013)
New Revision: 1183

Modified:
   pkg/GenABEL/CHANGES.LOG
   pkg/GenABEL/R/hom.R
   pkg/GenABEL/generate_documentation.R
   pkg/GenABEL/man/hom.Rd
Log:
converted hom documentation to roxygen format

Modified: pkg/GenABEL/CHANGES.LOG
===================================================================

--- pkg/GenABEL/CHANGES.LOG	2013-04-04 08:53:28 UTC (rev 1182)
+++ pkg/GenABEL/CHANGES.LOG	2013-04-04 09:02:17 UTC (rev 1183)
@@ -1,5 +1,8 @@
 ***  v. 1.7-5
 
+(2013.04.04)
+Updated documentation files
+ 
 (2013.04.03) 
 Added patch to bug [#1287] - example(ibs) producing strange clusters.  
 Thanks to Vladimir Naumov for submitting the patch!

Modified: pkg/GenABEL/R/hom.R
===================================================================
--- pkg/GenABEL/R/hom.R	2013-04-04 08:53:28 UTC (rev 1182)
+++ pkg/GenABEL/R/hom.R	2013-04-04 09:02:17 UTC (rev 1183)
@@ -1,3 +1,81 @@
+#' function to compute average homozygosity within a person
+#' 
+#' This function computes average homozygosity (inbreeding) for a set of
+#' people, across multiple markers. Can be used for Quality Control (e.g.
+#' contamination checks)
+#' 
+#' Homozygosity is measured as proportion of homozygous genotypes observed in a
+#' person.
+#' 
+#' Inbreeding for person \eqn{i} is estimated with
+#' 
+#' \deqn{ }{ f_i = ((O_i - E_i))/((L_i - E_i)) }\deqn{ f_i = \frac{(O_i -
+#' E_i)}{(L_i - E_i)} }{ f_i = ((O_i - E_i))/((L_i - E_i)) }\deqn{ }{ f_i =
+#' ((O_i - E_i))/((L_i - E_i)) }
+#' 
+#' where \eqn{O_i} is observed homozygosity, \eqn{L_i} is the number of SNPs
+#' measured in individual \eqn{i} and
+#' 
+#' \deqn{ }{ E_i = Sigma_(j=1)^(L_i) (1 - 2 p_j (1 - p_j) (T_(Aj))/(T_(Aj)-1))
+#' }\deqn{ E_i = \Sigma_{j=1}^{L_i} (1 - 2 p_j (1 - p_j)
+#' \frac{T_{Aj}}{T_{Aj}-1}) }{ E_i = Sigma_(j=1)^(L_i) (1 - 2 p_j (1 - p_j)
+#' (T_(Aj))/(T_(Aj)-1)) }\deqn{ }{ E_i = Sigma_(j=1)^(L_i) (1 - 2 p_j (1 - p_j)
+#' (T_(Aj))/(T_(Aj)-1)) }
+#' 
+#' where \eqn{T_{Aj}} is the number of measured genotypes at locus \eqn{j};
+#' \eqn{T_{Aj}} is either estimated from data or provided by "n.snpfreq"
+#' parameter (vector). Allelic frequencies are either estimated from data or
+#' provided by the "snpfreq" vector.
+#' 
+#' This measure is the same as used by PLINK (see reference).
+#' 
+#' The variance (Var) is estimated as
+#' 
+#' \deqn{ V_{i} = \frac(1)(N) \Sigma_k \frac{(x_{i,k} - p_k)^2}{(p_k * (1 -
+#' p_k))} }
+#' 
+#' where k changes from 1 to N = number of SNPs, \eqn{x_{i,k}} is a genotype of
+#' ith person at the kth SNP, coded as 0, 1/2, 1 and \eqn{p_k} is the frequency
+#' of the "+" allele.
+#' 
+#' Only polymorphic loci with number of measured genotypes >1 are used with
+#' this option.
+#' 
+#' This variance is used as diagonal of the genomic kinship matrix when using
+#' EIGENSTRAT method.
+#' 
+#' You should use as many people and markers as possible when estimating
+#' inbreeding/variance from marker data.
+#' 
+#' @param data Object of \link{gwaa.data-class} or \link{snp.data-class}
+#' @param snpsubset Subset of SNPs to be used
+#' @param idsubset People for whom average homozygosity is to be computed
+#' @param snpfreq when option weight="freq" used, you can provide fixed allele
+#' frequencies
+#' @param n.snpfreq when option weight="freq" used, you can provide a vector
+#' supplying the number of people used to estimate allele frequencies at the
+#' particular marker, or a fixed number
+#' @return A matrix with rows corresponding to the ID names and columns showing
+#' the number of SNPs measured in this person (NoMeasured), the number of
+#' measured polymorphic SNPs (NoPoly), homozygosity (Hom), expected
+#' homozygosity (E(Hom)), variance, and the estimate of inbreeding, F.
+#' @author Yurii Aulchenko, partly based on code by John Barnard
+#' @seealso \code{\link{ibs}}, \code{\link{gwaa.data-class}},
+#' \code{\link{snp.data-class}}
+#' @references Purcell S. et al, (2007) PLINK: a toolset for whole genome
+#' association and population-based linkage analyses. Am. J. Hum. Genet.
+#' @keywords htest
+#' @examples
+#' 
+#' data(ge03d2)
+#' h <- hom(ge03d2[,c(1:100)])
+#' h[1:5,]
+#' homsem <- h[,"Hom"]*(1-h[,"Hom"])/h[,"NoMeasured"]
+#' plot(h[,"Hom"],homsem)
+#' # wrong analysis: one should use all people (for right frequency) and markers (for right F) available!
+#' h <- hom(ge03d2[,c(1:10)])
+#' h
+#' 
 "hom" <- 
 		function(data,snpsubset,idsubset,snpfreq,n.snpfreq=1000) {
 	if (is(data,"gwaa.data")) {

Modified: pkg/GenABEL/generate_documentation.R
===================================================================
--- pkg/GenABEL/generate_documentation.R	2013-04-04 08:53:28 UTC (rev 1182)
+++ pkg/GenABEL/generate_documentation.R	2013-04-04 09:02:17 UTC (rev 1183)
@@ -17,6 +17,7 @@
 		"generateOffspring.R",
 		"getLogLikelihoodGivenRelation.R",
 		"grammar.R",
+		"hom.R",
 		"ibs.R",
 		"impute2databel.R",
 		"impute2mach.R",
@@ -28,10 +29,10 @@
 		"PGC.R",
 		"polygenic.R",
 		"polygenic_hglm.R",
+		"qtscore.R",
 		"recodeChromosome.R",
 		"reconstructNPs.R",
-		"sortmap.internal.R",
-		"qtscore.R"
+		"sortmap.internal.R"
 		#,
 		#"summary.scan.gwaa.R"
 )

Modified: pkg/GenABEL/man/hom.Rd
===================================================================
--- pkg/GenABEL/man/hom.Rd	2013-04-04 08:53:28 UTC (rev 1182)
+++ pkg/GenABEL/man/hom.Rd	2013-04-04 09:02:17 UTC (rev 1183)
@@ -1,92 +1,87 @@
 \name{hom}
 \alias{hom}
 \title{function to compute average homozygosity within a person}
-\description{
-This function computes average homozygosity (inbreeding) for a set of 
-people, across multiple markers. Can be used for Quality Control
-(e.g. contamination checks)
-}
 \usage{
-	hom(data, snpsubset, idsubset, snpfreq, n.snpfreq = 1000)
+  hom(data, snpsubset, idsubset, snpfreq, n.snpfreq = 1000)
 }
 \arguments{
-  \item{data}{Object of \link{gwaa.data-class} or \link{snp.data-class}}
+  \item{data}{Object of \link{gwaa.data-class} or
+  \link{snp.data-class}}
+
   \item{snpsubset}{Subset of SNPs to be used}
-  \item{idsubset}{People for whom average homozygosity is to be computed}
-  \item{snpfreq}{when option weight="freq" used, you can provide 
-		fixed allele frequencies}
-  \item{n.snpfreq}{when option weight="freq" used, you can provide 
-		a vector supplying the number of people used to estimate allele 
-		frequencies at the particular marker, or a fixed number}
+
+  \item{idsubset}{People for whom average homozygosity is
+  to be computed}
+
+  \item{snpfreq}{when option weight="freq" used, you can
+  provide fixed allele frequencies}
+
+  \item{n.snpfreq}{when option weight="freq" used, you can
+  provide a vector supplying the number of people used to
+  estimate allele frequencies at the particular marker, or
+  a fixed number}
 }
+\value{
+  A matrix with rows corresponding to the ID names and
+  columns showing the number of SNPs measured in this
+  person (NoMeasured), the number of measured polymorphic
+  SNPs (NoPoly), homozygosity (Hom), expected homozygosity
+  (E(Hom)), variance, and the estimate of inbreeding, F.
+}
+\description{
+  This function computes average homozygosity (inbreeding)
+  for a set of people, across multiple markers. Can be used
+  for Quality Control (e.g. contamination checks)
+}
 \details{
-	Homozygosity is measured as proportion of 
-	homozygous genotypes observed in a person.
+  Homozygosity is measured as proportion of homozygous
+  genotypes observed in a person.
 
-	Inbreeding for person \eqn{i} is estimated with 
+  Inbreeding for person \eqn{i} is estimated with
 
-	\deqn{
-	f_i = \frac{(O_i - E_i)}{(L_i - E_i)}
-	}{
-	f_i = ((O_i - E_i))/((L_i - E_i))
-	}
+  \deqn{ }{ f_i = ((O_i - E_i))/((L_i - E_i)) }\deqn{ f_i =
+  \frac{(O_i - E_i)}{(L_i - E_i)} }{ f_i = ((O_i -
+  E_i))/((L_i - E_i)) }\deqn{ }{ f_i = ((O_i - E_i))/((L_i
+  - E_i)) }
 
-	where \eqn{O_i} is observed homozygosity, \eqn{L_i} is the number of SNPs 
-	measured in individual \eqn{i} and 
+  where \eqn{O_i} is observed homozygosity, \eqn{L_i} is
+  the number of SNPs measured in individual \eqn{i} and
 
-	\deqn{
-	E_i = \Sigma_{j=1}^{L_i} (1 - 2 p_j (1 - p_j) \frac{T_{Aj}}{T_{Aj}-1})
-	}{
-	E_i = Sigma_(j=1)^(L_i) (1 - 2 p_j (1 - p_j) (T_(Aj))/(T_(Aj)-1))
-	}
+  \deqn{ }{ E_i = Sigma_(j=1)^(L_i) (1 - 2 p_j (1 - p_j)
+  (T_(Aj))/(T_(Aj)-1)) }\deqn{ E_i = \Sigma_{j=1}^{L_i} (1
+  - 2 p_j (1 - p_j) \frac{T_{Aj}}{T_{Aj}-1}) }{ E_i =
+  Sigma_(j=1)^(L_i) (1 - 2 p_j (1 - p_j)
+  (T_(Aj))/(T_(Aj)-1)) }\deqn{ }{ E_i = Sigma_(j=1)^(L_i)
+  (1 - 2 p_j (1 - p_j) (T_(Aj))/(T_(Aj)-1)) }
 
-	where \eqn{T_{Aj}} is the number of measured genotypes at locus \eqn{j}; 
-	\eqn{T_{Aj}} is either estimated from data or provided by "n.snpfreq" 
-	parameter (vector). Allelic frequencies are either estimated from 
-	data or provided by the "snpfreq" vector.
+  where \eqn{T_{Aj}} is the number of measured genotypes at
+  locus \eqn{j}; \eqn{T_{Aj}} is either estimated from data
+  or provided by "n.snpfreq" parameter (vector). Allelic
+  frequencies are either estimated from data or provided by
+  the "snpfreq" vector.
 
-	This measure is the same as used by PLINK (see reference).
+  This measure is the same as used by PLINK (see
+  reference).
 
-	The variance (Var) is estimated as
-	
-	\deqn{
-	V_{i} = \frac(1)(N) \Sigma_k \frac{(x_{i,k} - p_k)^2}{(p_k * (1 - p_k))}
-	}
+  The variance (Var) is estimated as
 
-	where k changes from 1 to N = number of SNPs, \eqn{x_{i,k}} is 
-	a genotype of ith person at the kth SNP, coded as 0, 1/2, 1 and 
-	\eqn{p_k} is the frequency 
-	of the "+" allele.
+  \deqn{ V_{i} = \frac(1)(N) \Sigma_k \frac{(x_{i,k} -
+  p_k)^2}{(p_k * (1 - p_k))} }
 
-	Only polymorphic loci with number of measured genotypes >1 are used
-	with this option.
+  where k changes from 1 to N = number of SNPs,
+  \eqn{x_{i,k}} is a genotype of ith person at the kth SNP,
+  coded as 0, 1/2, 1 and \eqn{p_k} is the frequency of the
+  "+" allele.
 
-	This variance is used as diagonal of the genomic 
-	kinship matrix when using EIGENSTRAT method. 
+  Only polymorphic loci with number of measured genotypes
+  >1 are used with this option.
 
-	You should use as many people and markers as possible when estimating 
-	inbreeding/variance from marker data.
+  This variance is used as diagonal of the genomic kinship
+  matrix when using EIGENSTRAT method.
+
+  You should use as many people and markers as possible
+  when estimating inbreeding/variance from marker data.
 }
-\value{
-	A matrix with rows corresponding to the ID names and columns
-	showing the number of SNPs measured in this person (NoMeasured), 
-	the number of measured polymorphic SNPs (NoPoly),  
-	homozygosity (Hom),
-	expected homozygosity (E(Hom)), variance, and
-	the estimate of inbreeding, F. 
-}
-\references{
-	Purcell S. et al, (2007) PLINK: a toolset for whole genome association and population-based 
-	linkage analyses. Am. J. Hum. Genet.
-}
-\author{Yurii Aulchenko, partly based on code by John Barnard}
-%\note{
-%}
-\seealso{
-\code{\link{ibs}},
-\code{\link{gwaa.data-class}},
-\code{\link{snp.data-class}}
-}
 \examples{
 data(ge03d2)
 h <- hom(ge03d2[,c(1:100)])
@@ -97,4 +92,17 @@
 h <- hom(ge03d2[,c(1:10)])
 h
 }
-\keyword{htest}% at least one, from doc/KEYWORDS
+\author{
+  Yurii Aulchenko, partly based on code by John Barnard
+}
+\references{
+  Purcell S. et al, (2007) PLINK: a toolset for whole
+  genome association and population-based linkage analyses.
+  Am. J. Hum. Genet.
+}
+\seealso{
+  \code{\link{ibs}}, \code{\link{gwaa.data-class}},
+  \code{\link{snp.data-class}}
+}
+\keyword{htest}
+