[Gtdb-commits] r44 - in pkg/gt.db: R inst/schema man
noreply at r-forge.r-project.org
noreply at r-forge.r-project.org
Wed Mar 3 02:25:50 CET 2010
Author: dahinds
Date: 2010-03-03 02:25:50 +0100 (Wed, 03 Mar 2010)
New Revision: 44
Modified:
pkg/gt.db/R/assay.R
pkg/gt.db/R/genotype.R
pkg/gt.db/R/misc.R
pkg/gt.db/R/sql.R
pkg/gt.db/inst/schema/mk_mysql.sql
pkg/gt.db/inst/schema/mk_oracle.sql
pkg/gt.db/inst/schema/mk_sqlite.sql
pkg/gt.db/inst/schema/rm_mysql.sql
pkg/gt.db/inst/schema/rm_oracle.sql
pkg/gt.db/inst/schema/rm_sqlite.sql
pkg/gt.db/man/init.gt.db.Rd
pkg/gt.db/man/load.hapmap.data.Rd
Log:
- Added support for compression of genotypes, quality scores, and raw data
- Added new 'gtdb_option' table to the schema
Modified: pkg/gt.db/R/assay.R
===================================================================
--- pkg/gt.db/R/assay.R 2010-03-03 01:21:10 UTC (rev 43)
+++ pkg/gt.db/R/assay.R 2010-03-03 01:25:50 UTC (rev 44)
@@ -161,19 +161,24 @@
db.mode <- .gt.db.options('db.mode')
tx.mode <- .gt.db.options('tx.mode')
- if ((db.mode == tx.mode) ||
+ if ((db.mode == tx.mode) || (db.mode == 'raw') &&
all(is.na(data$qscore) && is.na(data$raw.data))) {
+ txt.fn <- ''
raw.fn <- ''
} else if (db.mode == 'raw' && tx.mode == 'hex') {
+ txt.fn <- ''
raw.fn <- ':unhex:'
+ } else if (db.mode == 'zip' && tx.mode == 'hex') {
+ txt.fn <- ':zip:'
+ raw.fn <- ':zip.unhex:'
} else {
stop('unknown conversion!')
}
sql <-
'insert into assay_data
- values (null,:1,:2,:3,:4,%1$s(:5),%1$s(:6))'
- sql <- sprintf(sql, raw.fn)
+ values (null,:1,:2,:3,%1$s(:4),%2$s(:5),%2$s(:6))'
+ sql <- sprintf(sql, txt.fn, raw.fn)
cols <- c('assay.id','flags','genotype','qscore','raw.data')
sql.exec(gt.db::.gt.db, sql, dset.id, data[cols], progress=progress)
}
Modified: pkg/gt.db/R/genotype.R
===================================================================
--- pkg/gt.db/R/genotype.R 2010-03-03 01:21:10 UTC (rev 43)
+++ pkg/gt.db/R/genotype.R 2010-03-03 01:25:50 UTC (rev 44)
@@ -42,19 +42,24 @@
db.mode <- .gt.db.options('db.mode')
tx.mode <- .gt.db.options('tx.mode')
if (db.mode == tx.mode) {
- cvt.fn <- ':blob:'
+ txt.fn <- ''
+ raw.fn <- ':blob:'
} else if (db.mode == 'raw' && tx.mode == 'hex') {
- cvt.fn <- ':hex.blob:'
+ txt.fn <- ''
+ raw.fn <- ':hex.blob:'
+ } else if (db.mode == 'zip' && tx.mode == 'hex') {
+ txt.fn <- 'unzip'
+ raw.fn <- ':hex.unzip.blob:'
} else {
stop('unknown conversion!')
}
if (genotype)
- sql <- paste(sql, ', :clob:(genotype)')
+ sql <- sprintf("%s, %s(genotype)", sql, txt.fn)
if (qscore)
- sql <- sprintf("%s, %s(qscore)", sql, cvt.fn)
+ sql <- sprintf("%s, %s(qscore)", sql, raw.fn)
if (raw.data)
- sql <- sprintf("%s, %s(raw_data)", sql, cvt.fn)
+ sql <- sprintf("%s, %s(raw_data)", sql, raw.fn)
sql <- paste(sql,
'from assay_data d, assay a, assay_position p
Modified: pkg/gt.db/R/misc.R
===================================================================
--- pkg/gt.db/R/misc.R 2010-03-03 01:21:10 UTC (rev 43)
+++ pkg/gt.db/R/misc.R 2010-03-03 01:25:50 UTC (rev 44)
@@ -70,10 +70,15 @@
use.gt.db <- function(dbConnection)
{
assign('.gt.db', dbConnection, 'package:gt.db')
+ x <- try(sql.query(gt.db::.gt.db, 'select * from gtdb_option'),
+ silent=TRUE)
+ if (class(x) == 'data.frame') {
+ mapply(.gt.db.options, x$name, x$value)
+ }
invisible()
}
-init.gt.db <- function()
+init.gt.db <- function(db.mode='raw')
{
path <- library(help='gt.db')$path
schema <- switch(class(gt.db::.gt.db),
@@ -85,6 +90,9 @@
s <- s[-grep('^--',s)]
s <- strsplit(paste(s, collapse='\n'), ';\n')[[1]]
sapply(s, sql.exec, db=gt.db::.gt.db, USE.NAMES=FALSE)
+ .gt.db.options(db.mode=db.mode)
+ sql.exec(gt.db::.gt.db, 'insert into gtdb_option values (?,?)',
+ 'db.mode', db.mode)
}
gt.demo.check <- function()
@@ -115,8 +123,7 @@
db <- dbConnect(dbDriver('SQLite'), fn, loadable.extensions=TRUE)
unlink(fn)
use.gt.db(db)
- init.gt.db()
- .gt.db.options(db.mode='hex')
+ init.gt.db(db.mode='hex')
demo('setup.gt.demo')
} else {
stop('No GT.DB database connection', call.=FALSE)
Modified: pkg/gt.db/R/sql.R
===================================================================
--- pkg/gt.db/R/sql.R 2010-03-03 01:21:10 UTC (rev 43)
+++ pkg/gt.db/R/sql.R 2010-03-03 01:25:50 UTC (rev 44)
@@ -116,6 +116,11 @@
sql <- gsub(':blob:\\((\\w+)\\)', '\\1', sql)
sql <- gsub(':hex.blob:\\((\\w+)\\)', 'hex(\\1) \\1', sql)
sql <- gsub(':unhex:', 'unhex', sql)
+ sql <- gsub(':zip:', 'compress', sql)
+ sql <- gsub(':unzip:', 'uncompress', sql)
+ sql <- gsub(':zip.unhex:\\(([^)]+)\\)', 'compress(unhex(\\1))', sql)
+ sql <- gsub(':hex.unzip.blob:\\((\\w+)\\)',
+ 'hex(uncompress(\\1)) \\1', sql)
sql <- gsub(':fromdual:', '', sql)
sql <- gsub(':floor:', 'floor', sql)
sql <- gsub(':mod:', 'mod', sql)
Modified: pkg/gt.db/inst/schema/mk_mysql.sql
===================================================================
--- pkg/gt.db/inst/schema/mk_mysql.sql 2010-03-03 01:21:10 UTC (rev 43)
+++ pkg/gt.db/inst/schema/mk_mysql.sql 2010-03-03 01:25:50 UTC (rev 44)
@@ -18,6 +18,12 @@
-- along with this program. If not, see <http://www.gnu.org/licenses/>
--
+create table gtdb_option
+(
+ name varchar(64) primary key,
+ value varchar(255)
+);
+
--
-- Assay Definitions and Mapping Information
--
Modified: pkg/gt.db/inst/schema/mk_oracle.sql
===================================================================
--- pkg/gt.db/inst/schema/mk_oracle.sql 2010-03-03 01:21:10 UTC (rev 43)
+++ pkg/gt.db/inst/schema/mk_oracle.sql 2010-03-03 01:25:50 UTC (rev 44)
@@ -18,6 +18,12 @@
-- along with this program. If not, see <http://www.gnu.org/licenses/>
--
+create table gtdb_option
+(
+ name varchar(64) primary key,
+ value varchar(255)
+);
+
--
-- Assay Definitions and Mapping Information
--
Modified: pkg/gt.db/inst/schema/mk_sqlite.sql
===================================================================
--- pkg/gt.db/inst/schema/mk_sqlite.sql 2010-03-03 01:21:10 UTC (rev 43)
+++ pkg/gt.db/inst/schema/mk_sqlite.sql 2010-03-03 01:25:50 UTC (rev 44)
@@ -18,6 +18,12 @@
-- along with this program. If not, see <http://www.gnu.org/licenses/>
--
+create table gtdb_option
+(
+ name varchar(64) primary key,
+ value varchar(255)
+);
+
--
-- Assay Definitions and Mapping Information
--
Modified: pkg/gt.db/inst/schema/rm_mysql.sql
===================================================================
--- pkg/gt.db/inst/schema/rm_mysql.sql 2010-03-03 01:21:10 UTC (rev 43)
+++ pkg/gt.db/inst/schema/rm_mysql.sql 2010-03-03 01:25:50 UTC (rev 44)
@@ -38,3 +38,5 @@
drop table assay_flag;
drop table assay;
drop table platform;
+drop table gtdb_option;
+
Modified: pkg/gt.db/inst/schema/rm_oracle.sql
===================================================================
--- pkg/gt.db/inst/schema/rm_oracle.sql 2010-03-03 01:21:10 UTC (rev 43)
+++ pkg/gt.db/inst/schema/rm_oracle.sql 2010-03-03 01:25:50 UTC (rev 44)
@@ -63,3 +63,4 @@
drop table assay_flag;
drop table assay;
drop table platform;
+drop table gtdb_option;
Modified: pkg/gt.db/inst/schema/rm_sqlite.sql
===================================================================
--- pkg/gt.db/inst/schema/rm_sqlite.sql 2010-03-03 01:21:10 UTC (rev 43)
+++ pkg/gt.db/inst/schema/rm_sqlite.sql 2010-03-03 01:25:50 UTC (rev 44)
@@ -38,3 +38,4 @@
drop table assay_flag
drop table assay;
drop table platform;
+drop table gtdb_option;
Modified: pkg/gt.db/man/init.gt.db.Rd
===================================================================
--- pkg/gt.db/man/init.gt.db.Rd 2010-03-03 01:21:10 UTC (rev 43)
+++ pkg/gt.db/man/init.gt.db.Rd 2010-03-03 01:25:50 UTC (rev 44)
@@ -24,13 +24,28 @@
empty database.
}
\usage{
-init.gt.db()
+init.gt.db(db.mode='raw')
}
+\arguments{
+ \item{db.mode}{storage mode for packed objects in the database: either
+ \code{'hex'}, \code{'raw'}, or \code{'zip'}. See details.}
+}
\details{
This should be called after connecting to a new database using
\code{\link[DBI:dbConnect]{dbConnect}} and \code{\link{use.gt.db}}.
Scripts for creating GT.DB tables and indexes are installed under
\code{library(help='gt.db')$path} in the \file{schema} subdirectory.
+
+ The \code{db.mode} argument controls how genotypes, quality scores,
+ and raw data are stored in the database. The default (\code{'raw'})
+ is to store genotypes as character data, and quality scores and
+ underlying data as binary blobs. In \code{'hex'} mode, quality scores
+ and underlying data are stored as strings of hex digits. In
+ \code{'zip'} mode, all are stored in compressed form.
+
+ Without appropriate plugins, the SQLite interface supports only
+ \code{'hex'} mode. At the moment, \code{'zip'} mode is only supported
+ in MySQL.
}
\seealso{
\code{\link[DBI:dbConnect]{dbConnect}},
@@ -44,7 +59,7 @@
# unlink it so it will go away at the end of the session
unlink(fn)
use.gt.db(dbx)
-init.gt.db()
+init.gt.db(db.mode='hex')
demo('setup.gt.demo')
}}
\keyword{database}
Modified: pkg/gt.db/man/load.hapmap.data.Rd
===================================================================
--- pkg/gt.db/man/load.hapmap.data.Rd 2010-03-03 01:21:10 UTC (rev 43)
+++ pkg/gt.db/man/load.hapmap.data.Rd 2010-03-03 01:25:50 UTC (rev 44)
@@ -37,6 +37,8 @@
then the genotype data is merged into a single dataset spanning all
those panels. Only non-redundant forward-orientation files are
supported.
+
+ It has been tested against Phase II r22 and r24, and Phase III r2.
}
\seealso{
\code{\link{hapmap.subjects}}.
More information about the Gtdb-commits
mailing list