[Gtdb-commits] r46 - in pkg/gt.db: R inst/doc man src

noreply at r-forge.r-project.org noreply at r-forge.r-project.org
Tue Mar 9 08:37:16 CET 2010


Author: dahinds
Date: 2010-03-09 08:37:16 +0100 (Tue, 09 Mar 2010)
New Revision: 46

Modified:
   pkg/gt.db/R/hapmap.R
   pkg/gt.db/R/sample.R
   pkg/gt.db/inst/doc/gt.db.pdf
   pkg/gt.db/man/init.gt.db.Rd
   pkg/gt.db/man/load.hapmap.data.Rd
   pkg/gt.db/man/ls.sample.Rd
   pkg/gt.db/man/progress.bar.Rd
   pkg/gt.db/src/encode.c
   pkg/gt.db/src/raw.c
   pkg/gt.db/src/str.c
Log:
- changed ls.sample() to sort results by position
- updated help page for load.hapmap.data()
- other minor cleanups



Modified: pkg/gt.db/R/hapmap.R
===================================================================
--- pkg/gt.db/R/hapmap.R	2010-03-04 01:23:42 UTC (rev 45)
+++ pkg/gt.db/R/hapmap.R	2010-03-09 07:37:16 UTC (rev 46)
@@ -103,7 +103,7 @@
         pos <- unique(map[map$assay.name == rsid,]$position)
         if (length(pos) > 1) {
             warning(rsid, ": found at multiple map positions",
-                    call.=FALSE, immediate=TRUE)
+                    call.=FALSE, immediate.=TRUE)
             for (i in 1:length(data)) {
                 rn <- which(data[[i]]$assay.name == rsid)
                 data[[i]]$assay.name[rn] <-

Modified: pkg/gt.db/R/sample.R
===================================================================
--- pkg/gt.db/R/sample.R	2010-03-04 01:23:42 UTC (rev 45)
+++ pkg/gt.db/R/sample.R	2010-03-09 07:37:16 UTC (rev 46)
@@ -1,5 +1,6 @@
 #
 # Copyright (C) 2009, Perlegen Sciences, Inc.
+# Copyright (C) 2010, 23andMe, Inc.
 #
 # Written by David A. Hinds <dhinds at sonic.net>
 #
@@ -38,7 +39,10 @@
       from sample s, subject u
       where s.dataset_id=:1 and s.subject_id=u.subject_id'
     r <- sql.query(gt.db::.gt.db, sql, lookup.id('dataset', dataset.name))
-    r$gender <- .fixup.gender(r$gender)
+    if (nrow(r)) {
+        r$gender <- .fixup.gender(r$gender)
+        r <- r[order(r$position),]
+    }
     .filter.ids(data.frame(r, row.names=r$sample.name), show.ids)
 }
 

Modified: pkg/gt.db/inst/doc/gt.db.pdf
===================================================================
(Binary files differ)

Modified: pkg/gt.db/man/init.gt.db.Rd
===================================================================
--- pkg/gt.db/man/init.gt.db.Rd	2010-03-04 01:23:42 UTC (rev 45)
+++ pkg/gt.db/man/init.gt.db.Rd	2010-03-09 07:37:16 UTC (rev 46)
@@ -1,5 +1,6 @@
 %
 % Copyright (C) 2009, Perlegen Sciences, Inc.
+% Copyright (C) 2010, 23andMe, Inc.
 % 
 % Written by David A. Hinds <dhinds at sonic.net>
 % 

Modified: pkg/gt.db/man/load.hapmap.data.Rd
===================================================================
--- pkg/gt.db/man/load.hapmap.data.Rd	2010-03-04 01:23:42 UTC (rev 45)
+++ pkg/gt.db/man/load.hapmap.data.Rd	2010-03-09 07:37:16 UTC (rev 46)
@@ -24,22 +24,37 @@
   International HapMap Project.
 }
 \usage{
-load.hapmap.data(files, project.name='HapMap', verbose=TRUE)
+load.hapmap.data(files, project.name='HapMap', map=TRUE, verbose=TRUE)
 }
 \arguments{
-  \item{files}{a vector of HapMap genotype data files.}
+  \item{files}{a vector of phased or unphased HapMap genotype data
+    file names.}
   \item{project.name}{the project to be associated with this data.}
+  \item{map}{logical: indicates whether to load assay and map
+    information into the database, or to assume it is already there.}
   \item{verbose}{logical: indicates whether to report progress.}
 }
 \details{
   This function supports loading recent HapMap Phase II and Phase III
-  genotype files.  If files include data from several population panels,
-  then the genotype data is merged into a single dataset spanning all
-  those panels.  Only non-redundant forward-orientation files are
-  supported.
+  genotype data.  If files include data from several population panels,
+  then the genotype data is merged by rsID into a single dataset
+  spanning all those panels.  Only non-redundant forward-orientation
+  files are supported.
 
   It has been tested against Phase II unphased r22 and r24, Phase II
-  phased r22, and unphased Phase III r2.
+  phased r22, and Phase III unphased r2.
+
+  In principle, it should be possible to define a \dQuote{Phase II}
+  platform once, and have multiple HapMap datasets refer to that
+  platform, with release-specific map information.  In practice, this is
+  challenging because releases can mix and match data from multiple
+  assays of the same SNP (i.e. CEU on one assay, JPT+CHB and YRI on
+  another).  We do not currently load the underlying assay information
+  and instead treat each rsID in a release as an \dQuote{assay}.
+
+  It is possible to load both phased and unphased datasets in the same
+  release so they refer to the same mapping.  First load the unphased
+  dataset, then load the phased data with \code{map=FALSE}.
 }
 \seealso{
   \code{\link{hapmap.subjects}}.

Modified: pkg/gt.db/man/ls.sample.Rd
===================================================================
--- pkg/gt.db/man/ls.sample.Rd	2010-03-04 01:23:42 UTC (rev 45)
+++ pkg/gt.db/man/ls.sample.Rd	2010-03-09 07:37:16 UTC (rev 46)
@@ -32,7 +32,8 @@
     database keys.}
 }
 \value{
-  A data frame with one row per dataset, and 4 or 6 columns:
+  A data frame with one row per dataset, and 4 or 6 columns, sorted by
+  \code{position}:
   \item{sample.id}{if \code{show.ids} is set: the unique integer key
     for this sample.}
   \item{sample.name}{a short, unique identifier for the sample.}

Modified: pkg/gt.db/man/progress.bar.Rd
===================================================================
--- pkg/gt.db/man/progress.bar.Rd	2010-03-04 01:23:42 UTC (rev 45)
+++ pkg/gt.db/man/progress.bar.Rd	2010-03-09 07:37:16 UTC (rev 46)
@@ -1,5 +1,6 @@
 %
 % Copyright (C) 2009, Perlegen Sciences, Inc.
+% Copyright (C) 2010, 23andMe, Inc.
 % 
 % Written by David A. Hinds <dhinds at sonic.net>
 % 

Modified: pkg/gt.db/src/encode.c
===================================================================
--- pkg/gt.db/src/encode.c	2010-03-04 01:23:42 UTC (rev 45)
+++ pkg/gt.db/src/encode.c	2010-03-09 07:37:16 UTC (rev 46)
@@ -1,6 +1,6 @@
 /*
 
-  Copyright (C) 2009, 23andMe, Inc.
+  Copyright (C) 2010, 23andMe, Inc.
   
   Written by David A. Hinds <dhinds at sonic.net>
   

Modified: pkg/gt.db/src/raw.c
===================================================================
--- pkg/gt.db/src/raw.c	2010-03-04 01:23:42 UTC (rev 45)
+++ pkg/gt.db/src/raw.c	2010-03-09 07:37:16 UTC (rev 46)
@@ -96,7 +96,7 @@
 			a = toupper(str[i]); b = toupper(str[i+1]);
 			a = (a >= 'A') ? a - 'A' + 10 : a - '0';
 			b = (b >= 'A') ? b - 'A' + 10 : b - '0';
-			RAW(ans)[j*len + i>>1] = (a<<4)+b;
+			RAW(ans)[(j*len+i)>>1] = (a<<4)+b;
 		}
 	}
 	UNPROTECT(1);

Modified: pkg/gt.db/src/str.c
===================================================================
--- pkg/gt.db/src/str.c	2010-03-04 01:23:42 UTC (rev 45)
+++ pkg/gt.db/src/str.c	2010-03-09 07:37:16 UTC (rev 46)
@@ -1,6 +1,7 @@
 /*
 
   Copyright (C) 2009, Perlegen Sciences, Inc.
+  Copyright (C) 2010, 23andMe, Inc.
   
   Written by David A. Hinds <dhinds at sonic.net>
   



More information about the Gtdb-commits mailing list