[Gtdb-commits] r46 - in pkg/gt.db: R inst/doc man src
noreply at r-forge.r-project.org
noreply at r-forge.r-project.org
Tue Mar 9 08:37:16 CET 2010
Author: dahinds
Date: 2010-03-09 08:37:16 +0100 (Tue, 09 Mar 2010)
New Revision: 46
Modified:
pkg/gt.db/R/hapmap.R
pkg/gt.db/R/sample.R
pkg/gt.db/inst/doc/gt.db.pdf
pkg/gt.db/man/init.gt.db.Rd
pkg/gt.db/man/load.hapmap.data.Rd
pkg/gt.db/man/ls.sample.Rd
pkg/gt.db/man/progress.bar.Rd
pkg/gt.db/src/encode.c
pkg/gt.db/src/raw.c
pkg/gt.db/src/str.c
Log:
- changed ls.sample() to sort results by position
- updated help page for load.hapmap.data()
- other minor cleanups
Modified: pkg/gt.db/R/hapmap.R
===================================================================
--- pkg/gt.db/R/hapmap.R 2010-03-04 01:23:42 UTC (rev 45)
+++ pkg/gt.db/R/hapmap.R 2010-03-09 07:37:16 UTC (rev 46)
@@ -103,7 +103,7 @@
pos <- unique(map[map$assay.name == rsid,]$position)
if (length(pos) > 1) {
warning(rsid, ": found at multiple map positions",
- call.=FALSE, immediate=TRUE)
+ call.=FALSE, immediate.=TRUE)
for (i in 1:length(data)) {
rn <- which(data[[i]]$assay.name == rsid)
data[[i]]$assay.name[rn] <-
Modified: pkg/gt.db/R/sample.R
===================================================================
--- pkg/gt.db/R/sample.R 2010-03-04 01:23:42 UTC (rev 45)
+++ pkg/gt.db/R/sample.R 2010-03-09 07:37:16 UTC (rev 46)
@@ -1,5 +1,6 @@
#
# Copyright (C) 2009, Perlegen Sciences, Inc.
+# Copyright (C) 2010, 23andMe, Inc.
#
# Written by David A. Hinds <dhinds at sonic.net>
#
@@ -38,7 +39,10 @@
from sample s, subject u
where s.dataset_id=:1 and s.subject_id=u.subject_id'
r <- sql.query(gt.db::.gt.db, sql, lookup.id('dataset', dataset.name))
- r$gender <- .fixup.gender(r$gender)
+ if (nrow(r)) {
+ r$gender <- .fixup.gender(r$gender)
+ r <- r[order(r$position),]
+ }
.filter.ids(data.frame(r, row.names=r$sample.name), show.ids)
}
Modified: pkg/gt.db/inst/doc/gt.db.pdf
===================================================================
(Binary files differ)
Modified: pkg/gt.db/man/init.gt.db.Rd
===================================================================
--- pkg/gt.db/man/init.gt.db.Rd 2010-03-04 01:23:42 UTC (rev 45)
+++ pkg/gt.db/man/init.gt.db.Rd 2010-03-09 07:37:16 UTC (rev 46)
@@ -1,5 +1,6 @@
%
% Copyright (C) 2009, Perlegen Sciences, Inc.
+% Copyright (C) 2010, 23andMe, Inc.
%
% Written by David A. Hinds <dhinds at sonic.net>
%
Modified: pkg/gt.db/man/load.hapmap.data.Rd
===================================================================
--- pkg/gt.db/man/load.hapmap.data.Rd 2010-03-04 01:23:42 UTC (rev 45)
+++ pkg/gt.db/man/load.hapmap.data.Rd 2010-03-09 07:37:16 UTC (rev 46)
@@ -24,22 +24,37 @@
International HapMap Project.
}
\usage{
-load.hapmap.data(files, project.name='HapMap', verbose=TRUE)
+load.hapmap.data(files, project.name='HapMap', map=TRUE, verbose=TRUE)
}
\arguments{
- \item{files}{a vector of HapMap genotype data files.}
+ \item{files}{a vector of phased or unphased HapMap genotype data
+ file names.}
\item{project.name}{the project to be associated with this data.}
+ \item{map}{logical: indicates whether to load assay and map
+ information into the database, or to assume it is already there.}
\item{verbose}{logical: indicates whether to report progress.}
}
\details{
This function supports loading recent HapMap Phase II and Phase III
- genotype files. If files include data from several population panels,
- then the genotype data is merged into a single dataset spanning all
- those panels. Only non-redundant forward-orientation files are
- supported.
+ genotype data. If files include data from several population panels,
+ then the genotype data is merged by rsID into a single dataset
+ spanning all those panels. Only non-redundant forward-orientation
+ files are supported.
It has been tested against Phase II unphased r22 and r24, Phase II
- phased r22, and unphased Phase III r2.
+ phased r22, and Phase III unphased r2.
+
+ In principle, it should be possible to define a \dQuote{Phase II}
+ platform once, and have multiple HapMap datasets refer to that
+ platform, with release-specific map information. In practice, this is
+ challenging because releases can mix and match data from multiple
+ assays of the same SNP (i.e. CEU on one assay, JPT+CHB and YRI on
+ another). We do not currently load the underlying assay information
+ and instead treat each rsID in a release as an \dQuote{assay}.
+
+ It is possible to load both phased and unphased datasets in the same
+ release so they refer to the same mapping. First load the unphased
+ dataset, then load the phased data with \code{map=FALSE}.
}
\seealso{
\code{\link{hapmap.subjects}}.
Modified: pkg/gt.db/man/ls.sample.Rd
===================================================================
--- pkg/gt.db/man/ls.sample.Rd 2010-03-04 01:23:42 UTC (rev 45)
+++ pkg/gt.db/man/ls.sample.Rd 2010-03-09 07:37:16 UTC (rev 46)
@@ -32,7 +32,8 @@
database keys.}
}
\value{
- A data frame with one row per dataset, and 4 or 6 columns:
+ A data frame with one row per dataset, and 4 or 6 columns, sorted by
+ \code{position}:
\item{sample.id}{if \code{show.ids} is set: the unique integer key
for this sample.}
\item{sample.name}{a short, unique identifier for the sample.}
Modified: pkg/gt.db/man/progress.bar.Rd
===================================================================
--- pkg/gt.db/man/progress.bar.Rd 2010-03-04 01:23:42 UTC (rev 45)
+++ pkg/gt.db/man/progress.bar.Rd 2010-03-09 07:37:16 UTC (rev 46)
@@ -1,5 +1,6 @@
%
% Copyright (C) 2009, Perlegen Sciences, Inc.
+% Copyright (C) 2010, 23andMe, Inc.
%
% Written by David A. Hinds <dhinds at sonic.net>
%
Modified: pkg/gt.db/src/encode.c
===================================================================
--- pkg/gt.db/src/encode.c 2010-03-04 01:23:42 UTC (rev 45)
+++ pkg/gt.db/src/encode.c 2010-03-09 07:37:16 UTC (rev 46)
@@ -1,6 +1,6 @@
/*
- Copyright (C) 2009, 23andMe, Inc.
+ Copyright (C) 2010, 23andMe, Inc.
Written by David A. Hinds <dhinds at sonic.net>
Modified: pkg/gt.db/src/raw.c
===================================================================
--- pkg/gt.db/src/raw.c 2010-03-04 01:23:42 UTC (rev 45)
+++ pkg/gt.db/src/raw.c 2010-03-09 07:37:16 UTC (rev 46)
@@ -96,7 +96,7 @@
a = toupper(str[i]); b = toupper(str[i+1]);
a = (a >= 'A') ? a - 'A' + 10 : a - '0';
b = (b >= 'A') ? b - 'A' + 10 : b - '0';
- RAW(ans)[j*len + i>>1] = (a<<4)+b;
+ RAW(ans)[(j*len+i)>>1] = (a<<4)+b;
}
}
UNPROTECT(1);
Modified: pkg/gt.db/src/str.c
===================================================================
--- pkg/gt.db/src/str.c 2010-03-04 01:23:42 UTC (rev 45)
+++ pkg/gt.db/src/str.c 2010-03-09 07:37:16 UTC (rev 46)
@@ -1,6 +1,7 @@
/*
Copyright (C) 2009, Perlegen Sciences, Inc.
+ Copyright (C) 2010, 23andMe, Inc.
Written by David A. Hinds <dhinds at sonic.net>
More information about the Gtdb-commits
mailing list