[Gtdb-commits] r44 - in pkg/gt.db: R inst/schema man

noreply at r-forge.r-project.org noreply at r-forge.r-project.org
Wed Mar 3 02:25:50 CET 2010


Author: dahinds
Date: 2010-03-03 02:25:50 +0100 (Wed, 03 Mar 2010)
New Revision: 44

Modified:
   pkg/gt.db/R/assay.R
   pkg/gt.db/R/genotype.R
   pkg/gt.db/R/misc.R
   pkg/gt.db/R/sql.R
   pkg/gt.db/inst/schema/mk_mysql.sql
   pkg/gt.db/inst/schema/mk_oracle.sql
   pkg/gt.db/inst/schema/mk_sqlite.sql
   pkg/gt.db/inst/schema/rm_mysql.sql
   pkg/gt.db/inst/schema/rm_oracle.sql
   pkg/gt.db/inst/schema/rm_sqlite.sql
   pkg/gt.db/man/init.gt.db.Rd
   pkg/gt.db/man/load.hapmap.data.Rd
Log:
- Added support for compression of genotypes, quality scores, and raw data
- Added new 'gtdb_option' table to the schema



Modified: pkg/gt.db/R/assay.R
===================================================================
--- pkg/gt.db/R/assay.R	2010-03-03 01:21:10 UTC (rev 43)
+++ pkg/gt.db/R/assay.R	2010-03-03 01:25:50 UTC (rev 44)
@@ -161,19 +161,24 @@
 
     db.mode <- .gt.db.options('db.mode')
     tx.mode <- .gt.db.options('tx.mode')
-    if ((db.mode == tx.mode) ||
+    if ((db.mode == tx.mode) || (db.mode == 'raw') &&
         all(is.na(data$qscore) && is.na(data$raw.data))) {
+        txt.fn <- ''
         raw.fn <- ''
     } else if (db.mode == 'raw' && tx.mode == 'hex') {
+        txt.fn <- ''
         raw.fn <- ':unhex:'
+    } else if (db.mode == 'zip' && tx.mode == 'hex') {
+        txt.fn <- ':zip:'
+        raw.fn <- ':zip.unhex:'
     } else {
         stop('unknown conversion!')
     }
 
     sql <-
      'insert into assay_data
-      values (null,:1,:2,:3,:4,%1$s(:5),%1$s(:6))'
-    sql <- sprintf(sql, raw.fn)
+      values (null,:1,:2,:3,%1$s(:4),%2$s(:5),%2$s(:6))'
+    sql <- sprintf(sql, txt.fn, raw.fn)
     cols <- c('assay.id','flags','genotype','qscore','raw.data')
     sql.exec(gt.db::.gt.db, sql, dset.id, data[cols], progress=progress)
 }

Modified: pkg/gt.db/R/genotype.R
===================================================================
--- pkg/gt.db/R/genotype.R	2010-03-03 01:21:10 UTC (rev 43)
+++ pkg/gt.db/R/genotype.R	2010-03-03 01:25:50 UTC (rev 44)
@@ -42,19 +42,24 @@
     db.mode <- .gt.db.options('db.mode')
     tx.mode <- .gt.db.options('tx.mode')
     if (db.mode == tx.mode) {
-        cvt.fn <- ':blob:'
+        txt.fn <- ''
+        raw.fn <- ':blob:'
     } else if (db.mode == 'raw' && tx.mode == 'hex') {
-        cvt.fn <- ':hex.blob:'
+        txt.fn <- ''
+        raw.fn <- ':hex.blob:'
+    } else if (db.mode == 'zip' && tx.mode == 'hex') {
+        txt.fn <- 'unzip'
+        raw.fn <- ':hex.unzip.blob:'
     } else {
         stop('unknown conversion!')
     }
 
     if (genotype)
-        sql <- paste(sql, ', :clob:(genotype)')
+        sql <- sprintf("%s, %s(genotype)", sql, txt.fn)
     if (qscore)
-        sql <- sprintf("%s, %s(qscore)", sql, cvt.fn)
+        sql <- sprintf("%s, %s(qscore)", sql, raw.fn)
     if (raw.data)
-        sql <- sprintf("%s, %s(raw_data)", sql, cvt.fn)
+        sql <- sprintf("%s, %s(raw_data)", sql, raw.fn)
 
     sql <- paste(sql,
      'from assay_data d, assay a, assay_position p

Modified: pkg/gt.db/R/misc.R
===================================================================
--- pkg/gt.db/R/misc.R	2010-03-03 01:21:10 UTC (rev 43)
+++ pkg/gt.db/R/misc.R	2010-03-03 01:25:50 UTC (rev 44)
@@ -70,10 +70,15 @@
 use.gt.db <- function(dbConnection)
 {
     assign('.gt.db', dbConnection, 'package:gt.db')
+    x <- try(sql.query(gt.db::.gt.db, 'select * from gtdb_option'),
+             silent=TRUE)
+    if (class(x) == 'data.frame') {
+        mapply(.gt.db.options, x$name, x$value)
+    }
     invisible()
 }
 
-init.gt.db <- function()
+init.gt.db <- function(db.mode='raw')
 {
     path <- library(help='gt.db')$path
     schema <- switch(class(gt.db::.gt.db),
@@ -85,6 +90,9 @@
     s <- s[-grep('^--',s)]
     s <- strsplit(paste(s, collapse='\n'), ';\n')[[1]]
     sapply(s, sql.exec, db=gt.db::.gt.db, USE.NAMES=FALSE)
+    .gt.db.options(db.mode=db.mode)
+    sql.exec(gt.db::.gt.db, 'insert into gtdb_option values (?,?)',
+             'db.mode', db.mode)
 }
 
 gt.demo.check <- function()
@@ -115,8 +123,7 @@
             db <- dbConnect(dbDriver('SQLite'), fn, loadable.extensions=TRUE)
             unlink(fn)
             use.gt.db(db)
-            init.gt.db()
-            .gt.db.options(db.mode='hex')
+            init.gt.db(db.mode='hex')
             demo('setup.gt.demo')
         } else {
             stop('No GT.DB database connection', call.=FALSE)

Modified: pkg/gt.db/R/sql.R
===================================================================
--- pkg/gt.db/R/sql.R	2010-03-03 01:21:10 UTC (rev 43)
+++ pkg/gt.db/R/sql.R	2010-03-03 01:25:50 UTC (rev 44)
@@ -116,6 +116,11 @@
         sql <- gsub(':blob:\\((\\w+)\\)', '\\1', sql)
         sql <- gsub(':hex.blob:\\((\\w+)\\)', 'hex(\\1) \\1', sql)
         sql <- gsub(':unhex:', 'unhex', sql)
+        sql <- gsub(':zip:', 'compress', sql)
+        sql <- gsub(':unzip:', 'uncompress', sql)
+        sql <- gsub(':zip.unhex:\\(([^)]+)\\)', 'compress(unhex(\\1))', sql)
+        sql <- gsub(':hex.unzip.blob:\\((\\w+)\\)',
+                    'hex(uncompress(\\1)) \\1', sql)
         sql <- gsub(':fromdual:', '', sql)
         sql <- gsub(':floor:', 'floor', sql)
         sql <- gsub(':mod:', 'mod', sql)

Modified: pkg/gt.db/inst/schema/mk_mysql.sql
===================================================================
--- pkg/gt.db/inst/schema/mk_mysql.sql	2010-03-03 01:21:10 UTC (rev 43)
+++ pkg/gt.db/inst/schema/mk_mysql.sql	2010-03-03 01:25:50 UTC (rev 44)
@@ -18,6 +18,12 @@
 -- along with this program.  If not, see <http://www.gnu.org/licenses/>
 -- 
 
+create table gtdb_option
+(
+    name varchar(64) primary key,
+    value varchar(255)
+);
+
 --
 -- Assay Definitions and Mapping Information
 --

Modified: pkg/gt.db/inst/schema/mk_oracle.sql
===================================================================
--- pkg/gt.db/inst/schema/mk_oracle.sql	2010-03-03 01:21:10 UTC (rev 43)
+++ pkg/gt.db/inst/schema/mk_oracle.sql	2010-03-03 01:25:50 UTC (rev 44)
@@ -18,6 +18,12 @@
 -- along with this program.  If not, see <http://www.gnu.org/licenses/>
 --
 
+create table gtdb_option
+(
+    name varchar(64) primary key,
+    value varchar(255)
+);
+
 --
 -- Assay Definitions and Mapping Information
 --

Modified: pkg/gt.db/inst/schema/mk_sqlite.sql
===================================================================
--- pkg/gt.db/inst/schema/mk_sqlite.sql	2010-03-03 01:21:10 UTC (rev 43)
+++ pkg/gt.db/inst/schema/mk_sqlite.sql	2010-03-03 01:25:50 UTC (rev 44)
@@ -18,6 +18,12 @@
 -- along with this program.  If not, see <http://www.gnu.org/licenses/>
 -- 
 
+create table gtdb_option
+(
+    name varchar(64) primary key,
+    value varchar(255)
+);
+
 --
 -- Assay Definitions and Mapping Information
 --

Modified: pkg/gt.db/inst/schema/rm_mysql.sql
===================================================================
--- pkg/gt.db/inst/schema/rm_mysql.sql	2010-03-03 01:21:10 UTC (rev 43)
+++ pkg/gt.db/inst/schema/rm_mysql.sql	2010-03-03 01:25:50 UTC (rev 44)
@@ -38,3 +38,5 @@
 drop table assay_flag;
 drop table assay;
 drop table platform;
+drop table gtdb_option;
+

Modified: pkg/gt.db/inst/schema/rm_oracle.sql
===================================================================
--- pkg/gt.db/inst/schema/rm_oracle.sql	2010-03-03 01:21:10 UTC (rev 43)
+++ pkg/gt.db/inst/schema/rm_oracle.sql	2010-03-03 01:25:50 UTC (rev 44)
@@ -63,3 +63,4 @@
 drop table assay_flag;
 drop table assay;
 drop table platform;
+drop table gtdb_option;

Modified: pkg/gt.db/inst/schema/rm_sqlite.sql
===================================================================
--- pkg/gt.db/inst/schema/rm_sqlite.sql	2010-03-03 01:21:10 UTC (rev 43)
+++ pkg/gt.db/inst/schema/rm_sqlite.sql	2010-03-03 01:25:50 UTC (rev 44)
@@ -38,3 +38,4 @@
 drop table assay_flag
 drop table assay;
 drop table platform;
+drop table gtdb_option;

Modified: pkg/gt.db/man/init.gt.db.Rd
===================================================================
--- pkg/gt.db/man/init.gt.db.Rd	2010-03-03 01:21:10 UTC (rev 43)
+++ pkg/gt.db/man/init.gt.db.Rd	2010-03-03 01:25:50 UTC (rev 44)
@@ -24,13 +24,28 @@
   empty database.
 }
 \usage{
-init.gt.db()
+init.gt.db(db.mode='raw')
 }
+\arguments{
+  \item{db.mode}{storage mode for packed objects in the database: either
+    \code{'hex'}, \code{'raw'}, or \code{'zip'}.  See details.}
+}
 \details{
   This should be called after connecting to a new database using
   \code{\link[DBI:dbConnect]{dbConnect}} and \code{\link{use.gt.db}}.
   Scripts for creating GT.DB tables and indexes are installed under
   \code{library(help='gt.db')$path} in the \file{schema} subdirectory.
+
+  The \code{db.mode} argument controls how genotypes, quality scores,
+  and raw data are stored in the database.  The default (\code{'raw'})
+  is to store genotypes as character data, and quality scores and
+  underlying data as binary blobs.  In \code{'hex'} mode, quality scores
+  and underlying data are stored as strings of hex digits.  In
+  \code{'zip'} mode, all are stored in compressed form.
+
+  Without appropriate plugins, the SQLite interface supports only
+  \code{'hex'} mode.  At the moment, \code{'zip'} mode is only supported
+  in MySQL.
 }
 \seealso{
   \code{\link[DBI:dbConnect]{dbConnect}},
@@ -44,7 +59,7 @@
 # unlink it so it will go away at the end of the session
 unlink(fn)
 use.gt.db(dbx)
-init.gt.db()
+init.gt.db(db.mode='hex')
 demo('setup.gt.demo')
 }}
 \keyword{database}

Modified: pkg/gt.db/man/load.hapmap.data.Rd
===================================================================
--- pkg/gt.db/man/load.hapmap.data.Rd	2010-03-03 01:21:10 UTC (rev 43)
+++ pkg/gt.db/man/load.hapmap.data.Rd	2010-03-03 01:25:50 UTC (rev 44)
@@ -37,6 +37,8 @@
   then the genotype data is merged into a single dataset spanning all
   those panels.  Only non-redundant forward-orientation files are
   supported.
+
+  It has been tested against Phase II r22 and r24, and Phase III r2.
 }
 \seealso{
   \code{\link{hapmap.subjects}}.



More information about the Gtdb-commits mailing list