[Gtdb-commits] r28 - in pkg/gt.db: R man
noreply at r-forge.r-project.org
noreply at r-forge.r-project.org
Fri Feb 12 08:58:41 CET 2010
Author: dahinds
Date: 2010-02-12 08:58:41 +0100 (Fri, 12 Feb 2010)
New Revision: 28
Modified:
pkg/gt.db/R/assay.R
pkg/gt.db/R/sql.R
pkg/gt.db/man/mk.assay.Rd
pkg/gt.db/man/mk.assay.data.Rd
pkg/gt.db/man/mk.assay.position.Rd
pkg/gt.db/man/sql.query.Rd
Log:
- Added 'progress' argument to sql.exec, mk.assay, mk.assay.position,
and mk.assay.data for viewing progress during long inserts.
- Fixed bug in my.sql.exec for insert with no bind data.
Modified: pkg/gt.db/R/assay.R
===================================================================
--- pkg/gt.db/R/assay.R 2009-11-06 08:33:51 UTC (rev 27)
+++ pkg/gt.db/R/assay.R 2010-02-12 07:58:41 UTC (rev 28)
@@ -1,5 +1,6 @@
#
# Copyright (C) 2009, Perlegen Sciences, Inc.
+# Copyright (C) 2010, 23andMe, Inc.
#
# Written by David A. Hinds <dhinds at sonic.net>
#
@@ -114,15 +115,19 @@
.filter.ids(data.frame(r, row.names=r$assay.name), show.ids)
}
-mk.assay <- function(platform.name, data)
+mk.assay <- function(platform.name, data, progress=FALSE)
{
plat.id <- lookup.id('platform', platform.name)
grp.id <- lookup.id('assay_group', unique(data$assay.group),
platform.id=plat.id)
.check.name(data$assay.name)
+ r <- (regexpr('^[a-zA-Z]*_[a-zA-Z]*$', data$probe.seq) < 0)
+ if (any(r,na.rm=TRUE))
+ stop("invalid probe sequence(s)", call.=FALSE)
sql <- 'insert into assay values (null,:1,:2,:3,:4,:5)'
sql.exec(gt.db::.gt.db, sql, plat.id, grp.id[data$assay.group],
- data[c('assay.name','alleles','probe.seq')])
+ data[c('assay.name','alleles','probe.seq')],
+ progress=progress)
}
#---------------------------------------------------------------------
@@ -149,7 +154,8 @@
.filter.ids(data.frame(r,row.names=r$assay.name), show.ids)
}
-mk.assay.position <- function(platform.name, mapping.name, data)
+mk.assay.position <-
+function(platform.name, mapping.name, data, progress=FALSE)
{
map.id <- lookup.mapping.id(platform.name, mapping.name)
if (is.null(data$assay.id)) {
@@ -166,12 +172,13 @@
data$ploidy <- .fixup.ploidy(data$ploidy)
sql.exec(gt.db::.gt.db, sql, map.id,
data[c('assay.id', 'scaffold', 'position', 'strand',
- 'ploidy', 'dbsnp.rsid', 'dbsnp.orient')])
+ 'ploidy', 'dbsnp.rsid', 'dbsnp.orient')],
+ progress=progress)
}
#---------------------------------------------------------------------
-mk.assay.data <- function(dataset.name, data)
+mk.assay.data <- function(dataset.name, data, progress=FALSE)
{
dset.id <- lookup.id('dataset', dataset.name)
if (is.null(data$flags)) data$flags <- 0
@@ -199,5 +206,5 @@
values (null,:1,:2,:3,:4,%1$s(:5),%1$s(:6))'
sql <- sprintf(sql, cvt.fn)
cols <- c('assay.id','flags','genotype','qscore','raw.data')
- sql.exec(gt.db::.gt.db, sql, dset.id, data[cols])
+ sql.exec(gt.db::.gt.db, sql, dset.id, data[cols], progress=progress)
}
Modified: pkg/gt.db/R/sql.R
===================================================================
--- pkg/gt.db/R/sql.R 2009-11-06 08:33:51 UTC (rev 27)
+++ pkg/gt.db/R/sql.R 2010-02-12 07:58:41 UTC (rev 28)
@@ -1,5 +1,6 @@
#
# Copyright (C) 2009, Perlegen Sciences, Inc.
+# Copyright (C) 2010, 23andMe, Inc.
#
# Written by David A. Hinds <dhinds at sonic.net>
#
@@ -188,7 +189,7 @@
max(1, floor(1024 * chunk.kb / rowsz))
}
-.ora.sql.exec <- function(db, sql, ..., chunk.kb=256)
+.ora.sql.exec <- function(db, sql, ..., chunk.kb=256, progress=FALSE)
{
efn <- function(e)
{
@@ -201,9 +202,14 @@
chunk.rows <- .chunk.rows(data, chunk.kb)
ps <- tryCatch(dbPrepareStatement(db, sql, data), error=efn)
- for (lo in seq(1,max(1,nrow(data)),chunk.rows)) {
- d <- data[lo:min(lo+chunk.rows-1, nrow(data)),,drop=FALSE]
+ nr <- 0
+ nd <- nrow(data)
+ if (progress) progress.bar(0, nd)
+ for (lo in seq(1,max(1,nd),chunk.rows)) {
+ d <- data[lo:min(lo+chunk.rows-1,nd),,drop=FALSE]
tryCatch(dbExecStatement(ps, d), error=efn)
+ nr <- nr + nrow(d)
+ if (progress) progress.bar(nr, nd)
}
nr <- dbGetRowsAffected(ps)
@@ -228,7 +234,7 @@
dbSendQuery(stmt$db, sql, ...)
}
-.my.sql.exec <- function(db, sql, ..., chunk.kb=256)
+.my.sql.exec <- function(db, sql, ..., chunk.kb=256, progress=FALSE)
{
efn <- function(e)
{
@@ -240,23 +246,27 @@
nr <- 0
ps <- tryCatch(.myPrepareStatement(db, sql, data), error=efn)
- if (regexpr('^insert.*\\svalues\\s*\\(', sql) > 0) {
+ nd <- nrow(data)
+ if (nd && (regexpr('^insert.*\\svalues\\s*\\(', sql) > 0)) {
+ if (progress) progress.bar(0, nd)
chunk.rows <- .chunk.rows(data, chunk.kb)
- for (lo in seq(1,nrow(data),chunk.rows)) {
- d <- data[lo:min(lo+chunk.rows-1, nrow(data)),,drop=FALSE]
+ for (lo in seq(1,nd,chunk.rows)) {
+ d <- data[lo:min(lo+chunk.rows-1,nd),,drop=FALSE]
rs <- tryCatch(.myInsertMultiple(ps, d), error=efn)
nr <- nr + dbGetRowsAffected(rs)
+ if (progress) progress.bar(nr, nd)
}
} else {
- for (i in 1:max(1,nrow(data))) {
- rs <- tryCatch(.myExecStatement(ps, data[i,,drop=FALSE]), error=efn)
+ for (i in 1:max(1,nd)) {
+ rs <- tryCatch(.myExecStatement(ps, data[i,,drop=FALSE]),
+ error=efn)
nr <- nr + dbGetRowsAffected(rs)
}
}
nr
}
-.lite.sql.exec <- function(db, sql, ..., chunk.kb=256)
+.lite.sql.exec <- function(db, sql, ..., chunk.kb=256, progress=FALSE)
{
efn <- function(e)
{
@@ -269,18 +279,21 @@
sql <- gsub(":[0-9]+", "?", sql)
nr <- 0
+ nd <- nrow(data)
chunk.rows <- .chunk.rows(data, chunk.kb)
dbBeginTransaction(db)
+ if (progress) progress.bar(0, nd)
for (lo in seq(1,max(1,nrow(data)),chunk.rows)) {
- d <- data[lo:min(lo+chunk.rows-1, nrow(data)),,drop=FALSE]
+ d <- data[lo:min(lo+chunk.rows-1,nd),,drop=FALSE]
rs <- tryCatch(dbSendPreparedQuery(db, sql, d), error=efn)
nr <- nr + dbGetRowsAffected(rs)
+ if (progress) progress.bar(nr, nd)
}
dbCommit(db)
nr
}
-sql.exec <- function(db, sql, ..., chunk.kb=256)
+sql.exec <- function(db, sql, ..., chunk.kb=256, progress=FALSE)
standardGeneric('sql.exec')
setGeneric('sql.exec', sql.exec)
Modified: pkg/gt.db/man/mk.assay.Rd
===================================================================
--- pkg/gt.db/man/mk.assay.Rd 2009-11-06 08:33:51 UTC (rev 27)
+++ pkg/gt.db/man/mk.assay.Rd 2010-02-12 07:58:41 UTC (rev 28)
@@ -1,5 +1,6 @@
%
% Copyright (C) 2009, Perlegen Sciences, Inc.
+% Copyright (C) 2010, 23andMe, Inc.
%
% Written by David A. Hinds <dhinds at sonic.net>
%
@@ -24,11 +25,13 @@
defined genotyping platform.
}
\usage{
-mk.assay(platform.name, data)
+mk.assay(platform.name, data, progress=FALSE)
}
\arguments{
\item{platform.name}{a short unique identifier for the platform.}
\item{data}{a data frame with one row per assay. See details.}
+ \item{progress}{logical: indicates whether to report progress while
+ the data is loaded.}
}
\details{
A data frame of assay information should provide four columns:
Modified: pkg/gt.db/man/mk.assay.data.Rd
===================================================================
--- pkg/gt.db/man/mk.assay.data.Rd 2009-11-06 08:33:51 UTC (rev 27)
+++ pkg/gt.db/man/mk.assay.data.Rd 2010-02-12 07:58:41 UTC (rev 28)
@@ -1,5 +1,6 @@
%
% Copyright (C) 2009, Perlegen Sciences, Inc.
+% Copyright (C) 2010, 23andMe, Inc.
%
% Written by David A. Hinds <dhinds at sonic.net>
%
@@ -25,11 +26,13 @@
defined genotype assays.
}
\usage{
-mk.assay.data(dataset.name, data)
+mk.assay.data(dataset.name, data, progress=FALSE)
}
\arguments{
\item{dataset.name}{a short unique identifier for the genotype dataset.}
\item{data}{a data frame with one row per assay. See details.}
+ \item{progress}{logical: indicates whether to report progress while
+ the data is loaded.}
}
\details{
A data frame of genotype data can provide up to five columns of
Modified: pkg/gt.db/man/mk.assay.position.Rd
===================================================================
--- pkg/gt.db/man/mk.assay.position.Rd 2009-11-06 08:33:51 UTC (rev 27)
+++ pkg/gt.db/man/mk.assay.position.Rd 2010-02-12 07:58:41 UTC (rev 28)
@@ -1,5 +1,6 @@
%
% Copyright (C) 2009, Perlegen Sciences, Inc.
+% Copyright (C) 2010, 23andMe, Inc.
%
% Written by David A. Hinds <dhinds at sonic.net>
%
@@ -25,12 +26,14 @@
genotyping assays.
}
\usage{
-mk.assay.position(platform.name, mapping.name, data)
+mk.assay.position(platform.name, mapping.name, data, progress=FALSE)
}
\arguments{
\item{platform.name}{a short unique identifier for the platform.}
\item{mapping.name}{the platform mapping to be populated.}
\item{data}{a data frame with one row per assay. See details.}
+ \item{progress}{logical: indicates whether to report progress while
+ the data is loaded.}
}
\details{
If \code{mapping.name} is missing, it will default to the current
Modified: pkg/gt.db/man/sql.query.Rd
===================================================================
--- pkg/gt.db/man/sql.query.Rd 2009-11-06 08:33:51 UTC (rev 27)
+++ pkg/gt.db/man/sql.query.Rd 2010-02-12 07:58:41 UTC (rev 28)
@@ -1,5 +1,6 @@
%
% Copyright (C) 2009, Perlegen Sciences, Inc.
+% Copyright (C) 2010, 23andMe, Inc.
%
% Written by David A. Hinds <dhinds at sonic.net>
%
@@ -32,7 +33,7 @@
}
\usage{
sql.query(db, sql, ...)
- sql.exec(db, sql, ..., chunk.kb=256)
+ sql.exec(db, sql, ..., chunk.kb=256, progress=FALSE)
}
\arguments{
\item{db}{a DBI connection object returned by
@@ -43,6 +44,8 @@
construct a data frame of bind variables.}
\item{chunk.kb}{when processing multiple rows of bind variables,
a rough limit on the amount of data to send per query.}
+ \item{progress}{logical: indicates whether to report progress during
+ long operations.}
}
\details{
To facilitate database agnostic code, several special elements can
More information about the Gtdb-commits
mailing list