[Gtdb-commits] r28 - in pkg/gt.db: R man

noreply at r-forge.r-project.org noreply at r-forge.r-project.org
Fri Feb 12 08:58:41 CET 2010


Author: dahinds
Date: 2010-02-12 08:58:41 +0100 (Fri, 12 Feb 2010)
New Revision: 28

Modified:
   pkg/gt.db/R/assay.R
   pkg/gt.db/R/sql.R
   pkg/gt.db/man/mk.assay.Rd
   pkg/gt.db/man/mk.assay.data.Rd
   pkg/gt.db/man/mk.assay.position.Rd
   pkg/gt.db/man/sql.query.Rd
Log:
- Added 'progress' argument to sql.exec, mk.assay, mk.assay.position,
  and mk.assay.data for viewing progress during long inserts.
- Fixed bug in my.sql.exec for insert with no bind data.



Modified: pkg/gt.db/R/assay.R
===================================================================
--- pkg/gt.db/R/assay.R	2009-11-06 08:33:51 UTC (rev 27)
+++ pkg/gt.db/R/assay.R	2010-02-12 07:58:41 UTC (rev 28)
@@ -1,5 +1,6 @@
 #
 # Copyright (C) 2009, Perlegen Sciences, Inc.
+# Copyright (C) 2010, 23andMe, Inc.
 #
 # Written by David A. Hinds <dhinds at sonic.net>
 #
@@ -114,15 +115,19 @@
     .filter.ids(data.frame(r, row.names=r$assay.name), show.ids)
 }
 
-mk.assay <- function(platform.name, data)
+mk.assay <- function(platform.name, data, progress=FALSE)
 {
     plat.id <- lookup.id('platform', platform.name)
     grp.id <- lookup.id('assay_group', unique(data$assay.group),
                         platform.id=plat.id)
     .check.name(data$assay.name)
+    r <- (regexpr('^[a-zA-Z]*_[a-zA-Z]*$', data$probe.seq) < 0)
+    if (any(r,na.rm=TRUE))
+        stop("invalid probe sequence(s)", call.=FALSE)
     sql <- 'insert into assay values (null,:1,:2,:3,:4,:5)'
     sql.exec(gt.db::.gt.db, sql, plat.id, grp.id[data$assay.group],
-             data[c('assay.name','alleles','probe.seq')])
+             data[c('assay.name','alleles','probe.seq')],
+             progress=progress)
 }
 
 #---------------------------------------------------------------------
@@ -149,7 +154,8 @@
     .filter.ids(data.frame(r,row.names=r$assay.name), show.ids)
 }
 
-mk.assay.position <- function(platform.name, mapping.name, data)
+mk.assay.position <-
+function(platform.name, mapping.name, data, progress=FALSE)
 {
     map.id <- lookup.mapping.id(platform.name, mapping.name)
     if (is.null(data$assay.id)) {
@@ -166,12 +172,13 @@
     data$ploidy <- .fixup.ploidy(data$ploidy)
     sql.exec(gt.db::.gt.db, sql, map.id,
              data[c('assay.id', 'scaffold', 'position', 'strand',
-                    'ploidy', 'dbsnp.rsid', 'dbsnp.orient')])
+                    'ploidy', 'dbsnp.rsid', 'dbsnp.orient')],
+             progress=progress)
 }
 
 #---------------------------------------------------------------------
 
-mk.assay.data <- function(dataset.name, data)
+mk.assay.data <- function(dataset.name, data, progress=FALSE)
 {
     dset.id <- lookup.id('dataset', dataset.name)
     if (is.null(data$flags)) data$flags <- 0
@@ -199,5 +206,5 @@
       values (null,:1,:2,:3,:4,%1$s(:5),%1$s(:6))'
     sql <- sprintf(sql, cvt.fn)
     cols <- c('assay.id','flags','genotype','qscore','raw.data')
-    sql.exec(gt.db::.gt.db, sql, dset.id, data[cols])
+    sql.exec(gt.db::.gt.db, sql, dset.id, data[cols], progress=progress)
 }

Modified: pkg/gt.db/R/sql.R
===================================================================
--- pkg/gt.db/R/sql.R	2009-11-06 08:33:51 UTC (rev 27)
+++ pkg/gt.db/R/sql.R	2010-02-12 07:58:41 UTC (rev 28)
@@ -1,5 +1,6 @@
 #
 # Copyright (C) 2009, Perlegen Sciences, Inc.
+# Copyright (C) 2010, 23andMe, Inc.
 #
 # Written by David A. Hinds <dhinds at sonic.net>
 #
@@ -188,7 +189,7 @@
     max(1, floor(1024 * chunk.kb / rowsz))
 }
 
-.ora.sql.exec <- function(db, sql, ..., chunk.kb=256)
+.ora.sql.exec <- function(db, sql, ..., chunk.kb=256, progress=FALSE)
 {
     efn <- function(e)
     {
@@ -201,9 +202,14 @@
 
     chunk.rows <- .chunk.rows(data, chunk.kb)
     ps <- tryCatch(dbPrepareStatement(db, sql, data), error=efn)
-    for (lo in seq(1,max(1,nrow(data)),chunk.rows)) {
-        d <- data[lo:min(lo+chunk.rows-1, nrow(data)),,drop=FALSE]
+    nr <- 0
+    nd <- nrow(data)
+    if (progress) progress.bar(0, nd)
+    for (lo in seq(1,max(1,nd),chunk.rows)) {
+        d <- data[lo:min(lo+chunk.rows-1,nd),,drop=FALSE]
         tryCatch(dbExecStatement(ps, d), error=efn)
+        nr <- nr + nrow(d)
+        if (progress) progress.bar(nr, nd)
     }
 
     nr <- dbGetRowsAffected(ps)
@@ -228,7 +234,7 @@
     dbSendQuery(stmt$db, sql, ...)
 }
 
-.my.sql.exec <- function(db, sql, ..., chunk.kb=256)
+.my.sql.exec <- function(db, sql, ..., chunk.kb=256, progress=FALSE)
 {
     efn <- function(e)
     {
@@ -240,23 +246,27 @@
     nr <- 0
 
     ps <- tryCatch(.myPrepareStatement(db, sql, data), error=efn)
-    if (regexpr('^insert.*\\svalues\\s*\\(', sql) > 0) {
+    nd <- nrow(data)
+    if (nd && (regexpr('^insert.*\\svalues\\s*\\(', sql) > 0)) {
+        if (progress) progress.bar(0, nd)
         chunk.rows <- .chunk.rows(data, chunk.kb)
-        for (lo in seq(1,nrow(data),chunk.rows)) {
-            d <- data[lo:min(lo+chunk.rows-1, nrow(data)),,drop=FALSE]
+        for (lo in seq(1,nd,chunk.rows)) {
+            d <- data[lo:min(lo+chunk.rows-1,nd),,drop=FALSE]
             rs <- tryCatch(.myInsertMultiple(ps, d), error=efn)
             nr <- nr + dbGetRowsAffected(rs)
+            if (progress) progress.bar(nr, nd)
         }
     } else {
-        for (i in 1:max(1,nrow(data))) {
-            rs <- tryCatch(.myExecStatement(ps, data[i,,drop=FALSE]), error=efn)
+        for (i in 1:max(1,nd)) {
+            rs <- tryCatch(.myExecStatement(ps, data[i,,drop=FALSE]),
+                           error=efn)
             nr <- nr + dbGetRowsAffected(rs)
         }
     }
     nr
 }
 
-.lite.sql.exec <- function(db, sql, ..., chunk.kb=256)
+.lite.sql.exec <- function(db, sql, ..., chunk.kb=256, progress=FALSE)
 {
     efn <- function(e)
     {
@@ -269,18 +279,21 @@
     sql <- gsub(":[0-9]+", "?", sql)
 
     nr <- 0
+    nd <- nrow(data)
     chunk.rows <- .chunk.rows(data, chunk.kb)
     dbBeginTransaction(db)
+    if (progress) progress.bar(0, nd)
     for (lo in seq(1,max(1,nrow(data)),chunk.rows)) {
-        d <- data[lo:min(lo+chunk.rows-1, nrow(data)),,drop=FALSE]
+        d <- data[lo:min(lo+chunk.rows-1,nd),,drop=FALSE]
         rs <- tryCatch(dbSendPreparedQuery(db, sql, d), error=efn)
         nr <- nr + dbGetRowsAffected(rs)
+        if (progress) progress.bar(nr, nd)
     }
     dbCommit(db)
     nr
 }
 
-sql.exec <- function(db, sql, ..., chunk.kb=256)
+sql.exec <- function(db, sql, ..., chunk.kb=256, progress=FALSE)
 standardGeneric('sql.exec')
 
 setGeneric('sql.exec', sql.exec)

Modified: pkg/gt.db/man/mk.assay.Rd
===================================================================
--- pkg/gt.db/man/mk.assay.Rd	2009-11-06 08:33:51 UTC (rev 27)
+++ pkg/gt.db/man/mk.assay.Rd	2010-02-12 07:58:41 UTC (rev 28)
@@ -1,5 +1,6 @@
 %
 % Copyright (C) 2009, Perlegen Sciences, Inc.
+% Copyright (C) 2010, 23andMe, Inc.
 % 
 % Written by David A. Hinds <dhinds at sonic.net>
 % 
@@ -24,11 +25,13 @@
   defined genotyping platform.
 }
 \usage{
-mk.assay(platform.name, data)
+mk.assay(platform.name, data, progress=FALSE)
 }
 \arguments{
   \item{platform.name}{a short unique identifier for the platform.}
   \item{data}{a data frame with one row per assay.  See details.}
+  \item{progress}{logical: indicates whether to report progress while
+    the data is loaded.}
 }
 \details{
   A data frame of assay information should provide four columns:

Modified: pkg/gt.db/man/mk.assay.data.Rd
===================================================================
--- pkg/gt.db/man/mk.assay.data.Rd	2009-11-06 08:33:51 UTC (rev 27)
+++ pkg/gt.db/man/mk.assay.data.Rd	2010-02-12 07:58:41 UTC (rev 28)
@@ -1,5 +1,6 @@
 %
 % Copyright (C) 2009, Perlegen Sciences, Inc.
+% Copyright (C) 2010, 23andMe, Inc.
 % 
 % Written by David A. Hinds <dhinds at sonic.net>
 % 
@@ -25,11 +26,13 @@
   defined genotype assays.
 }
 \usage{
-mk.assay.data(dataset.name, data)
+mk.assay.data(dataset.name, data, progress=FALSE)
 }
 \arguments{
   \item{dataset.name}{a short unique identifier for the genotype dataset.}
   \item{data}{a data frame with one row per assay.  See details.}
+  \item{progress}{logical: indicates whether to report progress while
+    the data is loaded.}
 }
 \details{
   A data frame of genotype data can provide up to five columns of

Modified: pkg/gt.db/man/mk.assay.position.Rd
===================================================================
--- pkg/gt.db/man/mk.assay.position.Rd	2009-11-06 08:33:51 UTC (rev 27)
+++ pkg/gt.db/man/mk.assay.position.Rd	2010-02-12 07:58:41 UTC (rev 28)
@@ -1,5 +1,6 @@
 %
 % Copyright (C) 2009, Perlegen Sciences, Inc.
+% Copyright (C) 2010, 23andMe, Inc.
 % 
 % Written by David A. Hinds <dhinds at sonic.net>
 % 
@@ -25,12 +26,14 @@
   genotyping assays.
 }
 \usage{
-mk.assay.position(platform.name, mapping.name, data)
+mk.assay.position(platform.name, mapping.name, data, progress=FALSE)
 }
 \arguments{
   \item{platform.name}{a short unique identifier for the platform.}
   \item{mapping.name}{the platform mapping to be populated.}
   \item{data}{a data frame with one row per assay.  See details.}
+  \item{progress}{logical: indicates whether to report progress while
+    the data is loaded.}
 }
 \details{
   If \code{mapping.name} is missing, it will default to the current

Modified: pkg/gt.db/man/sql.query.Rd
===================================================================
--- pkg/gt.db/man/sql.query.Rd	2009-11-06 08:33:51 UTC (rev 27)
+++ pkg/gt.db/man/sql.query.Rd	2010-02-12 07:58:41 UTC (rev 28)
@@ -1,5 +1,6 @@
 %
 % Copyright (C) 2009, Perlegen Sciences, Inc.
+% Copyright (C) 2010, 23andMe, Inc.
 % 
 % Written by David A. Hinds <dhinds at sonic.net>
 % 
@@ -32,7 +33,7 @@
 }
 \usage{
   sql.query(db, sql, ...)
-  sql.exec(db, sql, ..., chunk.kb=256)
+  sql.exec(db, sql, ..., chunk.kb=256, progress=FALSE)
 }
 \arguments{
   \item{db}{a DBI connection object returned by
@@ -43,6 +44,8 @@
     construct a data frame of bind variables.}
   \item{chunk.kb}{when processing multiple rows of bind variables,
     a rough limit on the amount of data to send per query.}
+  \item{progress}{logical: indicates whether to report progress during
+    long operations.}
 }
 \details{
   To facilitate database agnostic code, several special elements can



More information about the Gtdb-commits mailing list