[Gtdb-commits] r29 - in pkg/gt.db: . R man src
noreply at r-forge.r-project.org
noreply at r-forge.r-project.org
Sat Feb 13 09:37:32 CET 2010
Author: dahinds
Date: 2010-02-13 09:37:31 +0100 (Sat, 13 Feb 2010)
New Revision: 29
Added:
pkg/gt.db/R/rawdata.R
Modified:
pkg/gt.db/INDEX
pkg/gt.db/R/admin.R
pkg/gt.db/R/cluster.R
pkg/gt.db/R/genotype.R
pkg/gt.db/R/misc.R
pkg/gt.db/R/sql.R
pkg/gt.db/man/mk.assay.data.Rd
pkg/gt.db/man/mk.dataset.Rd
pkg/gt.db/man/rawToHex.Rd
pkg/gt.db/man/reshape.gt.data.Rd
pkg/gt.db/src/raw.c
Log:
- reorganized code for working with "raw data"
- added a new raw data format 'chpdata' for Affymetrix CHP data
- minor updates to SQL code to accommodate latest RSQLite
- made rawToHex/hexToRaw more flexible
Modified: pkg/gt.db/INDEX
===================================================================
--- pkg/gt.db/INDEX 2010-02-12 07:58:41 UTC (rev 28)
+++ pkg/gt.db/INDEX 2010-02-13 08:37:31 UTC (rev 29)
@@ -1,131 +1,131 @@
-adjust.gt.calls Manually Assign Genotype Calls
-apply.gt.dataset Apply a Function to a Genotype Dataset
-apply.loadings Apply PCA SNP Loadings to a New Genotype Set
-as.mask Convert To/From Character Masks
-assay.dat.01 Genotype Data for 5000 SNPs on 270 HapMap
- Samples
-assay.dat.02 Genotype Data for 371 Chr21 SNPs on 275 HapMap
- Samples
-big.loadings Identify Large Sample or SNP Loadings from a
- PCA Analysis
-ch.table Character Based Contingency Table
-fetch.gt.data Load Genotype Data for a Genotyping Dataset
-fetch.prcomp Load Principal Components Results for a
- Genotyping Dataset
-fetch.pt.data Load Phenotype Data for a Genotype Dataset
-fetch.sample.data Load Sample Data for a Genotype Dataset
-fetch.subject.data Load Subject Data for a Genotyping Project
-fetch.test.scores Load Association Test Results for a Genotyping
- Dataset
-gplot Genome-Wide Level Plot
-gplot.prcomp Genome-Wide Level Plots of SNP Loadings from
- PCA
-gt.cluster.plot Plot Genotype Cluster Data
-gt.dataset Genotype Dataset Specification
-gt.demo.check Check for Presence of GT.DB Demo Datasets
-gt.dist Calculate Pairwise Genotype Distances
-gt.split Convert between Packed Genotype Strings and
- Genotype Vectors
-hapmap.subjects Subject Data from the International HapMap
- Project
-hexToRaw Convert between Raw Vectors and Hex Strings
-hwe.test Tests for Hardy Weinberg Equilibrium
-ibd.dataset Calculate Identity by Descent for a Genotype
- Dataset
-ibd.gt.data Estimate Pairwise Identity by Descent for
- Genotype Data
-ibd.plot Plot Identity by Descent Data
-ibd.summary Summarize Identity by Descent Analysis Results
-ibs.gt.data Calculate Pairwise Identity by State for
- Genotype Data
-if.na Conditional Element Selection for Missing
- Values
-init.gt.db Initialize GT.DB Database
-jt.test Jonckheere-Terpstra Nonparametric Test for
- Trend
-keep.attr Keep User Attributes
-ld.gt.data Compute Pairwise Linkage Disequilibrium
-ld.plot Pairwise Linkage Disequilibrium Plot
-ld.prune Prune SNP List to Limit Linkage Disequilibrium
-ls.assay List Assay Definitions
-ls.assay.group List Assay Groups
-ls.assay.position List Assay Positions
-ls.dataset List Genotype Datasets
-ls.mapping List Assay Mapping Sets
-ls.platform List Genotyping Platforms
-ls.prcomp List Principal Components Result Sets
-ls.project List Genotyping Projects
-ls.sample List Samples in a Genotype Dataset
-ls.subject List Subjects in a Genotyping Project
-ls.test List Association Test Result Sets
-mask.gt.data Mask Sample Genotypes
-mask.str Mask Character Strings
-match.gt.data Identify Equivalent Genotyping Assays
-mk.assay Create Genotyping Assay Definitions
-mk.assay.data Create Genotyping Assay Data
-mk.assay.group Create or Remove a Genotype Assay Group
-mk.assay.position Create Genotyping Assay Positions
-mk.attr Create, Remove, and List Attribute Definitions
-mk.dataset Create or Remove Genotype Datasets
-mk.mapping Create or Remove a Mapping for a Genotyping
- Platform
-mk.platform Create or Remove a Genotyping Platform
-mk.project Create or Remove a Genotyping Project
-mk.sample Create or Remove Dataset Samples
-mk.sample.attr Create, Remove, or List Sample Attribute
- Definitions
-mk.subject Create or Remove Project Subjects
-mk.subject.attr Create, Remove, or List Subject Attribute
- Definitions
-na.if Conditional Conversion to Missing Values
-nsubstr Count Substring Instances
-nw.align Needleman and Wunsch Sequence Alignment
-nw.orient.assay Orient Assay Sequences
-orient.gt.data Flip Assay Strands and/or Swap Alleles
-panel.cluster Panel Function for Drawing Elliptical Cluster
- Boundaries
-panel.qqpval Quantile-Quantile Plots for P Values: Panel
- Functions
-panel.qqthin Sparse Normal Quantile-Quantile Plots: Panel
- Function
-prcomp.gt.data Principal Components Analysis of Genotype Data
-progress.bar Console Text-Based Progress Bar
-qqprcomp Quantile-Quantile Plots of PCA Loadings
-qqpval Quantile-Quantile Plots for P Values
-qqthin Sparse Normal Quantile-Quantile Plots
-reshape.gt.data Reshape Genotype Data
-revcomp Reverse Complement DNA Sequences
-score.and.store Test SNPs for Association and Store Results
-score.chisq Chi-Squared Test for Genotypic Association
-score.chisq.2x2 Chi-Squared Test for Allelic Association
-score.fisher Fisher's Exact Test for Genotypic Association
-score.glm Test for Association using Logistic Regression
-score.glm.general Association Test with a Logistic Model and
- General Mode of Action
-score.glm.groups Test for Association using a Logistic Model
- with Subgroup Effects
-score.gt.data Test SNPs for Association
-score.jt Jonckheere-Terpstra Nonparametric Test for
- Association
-score.kruskal Kruskal-Wallis Nonparametric Test for
- Association
-score.lm Test for Association using a Simple Linear
- Model
-score.lm.general Test for Association using a Linear Model and
- General
-score.lm.groups Test for Association using a Linear Model with
- Subgroup Effects
-score.prcomp Test Phenotypic Association with Principal
- Components
-score.trend Cochran-Armitage Trend Test for Association
-set.hidden Update Hidden Status
-snp.loadings SNP Loadings from Principal Components Analyses
-sql.query Simplified SQL Statement Execution
-store.prcomp Store or Remove Principal Components Results
-store.sample.data Store Sample Data
-store.subject.data Store Subject Data
-store.test.scores Store or Remove Association Test Results
-summary.gt.data Genotype Data Summary
-unpack.gt.matrix Convert Packed Genotype Strings to a Genotype
- Matrix
-use.gt.db Define GT.DB Database Connection
+adjust.gt.calls Manually Assign Genotype Calls
+apply.gt.dataset Apply a Function to a Genotype Dataset
+apply.loadings Apply PCA SNP Loadings to a New Genotype Set
+as.mask Convert To/From Character Masks
+assay.dat.01 Genotype Data for 5000 SNPs on 270 HapMap
+ Samples
+assay.dat.02 Genotype Data for 371 Chr21 SNPs on 275 HapMap
+ Samples
+big.loadings Identify Large Sample or SNP Loadings from a
+ PCA Analysis
+ch.table Character Based Contingency Table
+fetch.gt.data Load Genotype Data for a Genotyping Dataset
+fetch.prcomp Load Principal Components Results for a
+ Genotyping Dataset
+fetch.pt.data Load Phenotype Data for a Genotype Dataset
+fetch.sample.data Load Sample Data for a Genotype Dataset
+fetch.subject.data Load Subject Data for a Genotyping Project
+fetch.test.scores Load Association Test Results for a Genotyping
+ Dataset
+gplot Genome-Wide Level Plot
+gplot.prcomp Genome-Wide Level Plots of SNP Loadings from
+ PCA
+gt.cluster.plot Plot Genotype Cluster Data
+gt.dataset Genotype Dataset Specification
+gt.demo.check Check for Presence of GT.DB Demo Datasets
+gt.dist Calculate Pairwise Genotype Distances
+gt.split Convert between Packed Genotype Strings and
+ Genotype Vectors
+hapmap.subjects Subject Data from the International HapMap
+ Project
+hexToRaw Convert between Raw Vectors and Hex Strings
+hwe.test Tests for Hardy Weinberg Equilibrium
+ibd.dataset Calculate Identity by Descent for a Genotype
+ Dataset
+ibd.gt.data Estimate Pairwise Identity by Descent for
+ Genotype Data
+ibd.plot Plot Identity by Descent Data
+ibd.summary Summarize Identity by Descent Analysis Results
+ibs.gt.data Calculate Pairwise Identity by State for
+ Genotype Data
+if.na Conditional Element Selection for Missing
+ Values
+init.gt.db Initialize GT.DB Database
+jt.test Jonckheere-Terpstra Nonparametric Test for
+ Trend
+keep.attr Keep User Attributes
+ld.gt.data Compute Pairwise Linkage Disequilibrium
+ld.plot Pairwise Linkage Disequilibrium Plot
+ld.prune Prune SNP List to Limit Linkage Disequilibrium
+ls.assay List Assay Definitions
+ls.assay.group List Assay Groups
+ls.assay.position List Assay Positions
+ls.dataset List Genotype Datasets
+ls.mapping List Assay Mapping Sets
+ls.platform List Genotyping Platforms
+ls.prcomp List Principal Components Result Sets
+ls.project List Genotyping Projects
+ls.sample List Samples in a Genotype Dataset
+ls.subject List Subjects in a Genotyping Project
+ls.test List Association Test Result Sets
+mask.gt.data Mask Sample Genotypes
+mask.str Mask Character Strings
+match.gt.data Identify Equivalent Genotyping Assays
+mk.assay Create Genotyping Assay Definitions
+mk.assay.data Create Genotyping Assay Data
+mk.assay.group Create or Remove a Genotype Assay Group
+mk.assay.position Create Genotyping Assay Positions
+mk.attr Create, Remove, and List Attribute Definitions
+mk.dataset Create or Remove Genotype Datasets
+mk.mapping Create or Remove a Mapping for a Genotyping
+ Platform
+mk.platform Create or Remove a Genotyping Platform
+mk.project Create or Remove a Genotyping Project
+mk.sample Create or Remove Dataset Samples
+mk.sample.attr Create, Remove, or List Sample Attribute
+ Definitions
+mk.subject Create or Remove Project Subjects
+mk.subject.attr Create, Remove, or List Subject Attribute
+ Definitions
+na.if Conditional Conversion to Missing Values
+nsubstr Count Substring Instances
+nw.align Needleman and Wunsch Sequence Alignment
+nw.orient.assay Orient Assay Sequences
+orient.gt.data Flip Assay Strands and/or Swap Alleles
+panel.cluster Panel Function for Drawing Elliptical Cluster
+ Boundaries
+panel.qqpval Quantile-Quantile Plots for P Values: Panel
+ Functions
+panel.qqthin Sparse Normal Quantile-Quantile Plots: Panel
+ Function
+prcomp.gt.data Principal Components Analysis of Genotype Data
+progress.bar Console Text-Based Progress Bar
+qqprcomp Quantile-Quantile Plots of PCA Loadings
+qqpval Quantile-Quantile Plots for P Values
+qqthin Sparse Normal Quantile-Quantile Plots
+reshape.gt.data Reshape Genotype Data
+revcomp Reverse Complement DNA Sequences
+score.and.store Test SNPs for Association and Store Results
+score.chisq Chi-Squared Test for Genotypic Association
+score.chisq.2x2 Chi-Squared Test for Allelic Association
+score.fisher Fisher's Exact Test for Genotypic Association
+score.glm Test for Association using Logistic Regression
+score.glm.general Association Test with a Logistic Model and
+ General Mode of Action
+score.glm.groups Test for Association using a Logistic Model
+ with Subgroup Effects
+score.gt.data Test SNPs for Association
+score.jt Jonckheere-Terpstra Nonparametric Test for
+ Association
+score.kruskal Kruskal-Wallis Nonparametric Test for
+ Association
+score.lm Test for Association using a Simple Linear
+ Model
+score.lm.general Test for Association using a Linear Model and
+ General
+score.lm.groups Test for Association using a Linear Model with
+ Subgroup Effects
+score.prcomp Test Phenotypic Association with Principal
+ Components
+score.trend Cochran-Armitage Trend Test for Association
+set.hidden Update Hidden Status
+snp.loadings SNP Loadings from Principal Components Analyses
+sql.query Simplified SQL Statement Execution
+store.prcomp Store or Remove Principal Components Results
+store.sample.data Store Sample Data
+store.subject.data Store Subject Data
+store.test.scores Store or Remove Association Test Results
+summary.gt.data Genotype Data Summary
+unpack.gt.matrix Convert Packed Genotype Strings to a Genotype
+ Matrix
+use.gt.db Define GT.DB Database Connection
Modified: pkg/gt.db/R/admin.R
===================================================================
--- pkg/gt.db/R/admin.R 2010-02-12 07:58:41 UTC (rev 28)
+++ pkg/gt.db/R/admin.R 2010-02-13 08:37:31 UTC (rev 29)
@@ -1,5 +1,6 @@
#
# Copyright (C) 2009, Perlegen Sciences, Inc.
+# Copyright (C) 2010, 23andMe, Inc.
#
# Written by David A. Hinds <dhinds at sonic.net>
#
@@ -113,7 +114,7 @@
mk.dataset <-
function(dataset.name, project.name, platform.name,
- description, raw.layout=c(NA,'signal','seqread'),
+ description, raw.layout=c(NA,'signal','seqread','chpdata'),
is.hidden=FALSE)
{
raw.layout <- match.arg(raw.layout)
Modified: pkg/gt.db/R/cluster.R
===================================================================
--- pkg/gt.db/R/cluster.R 2010-02-12 07:58:41 UTC (rev 28)
+++ pkg/gt.db/R/cluster.R 2010-02-13 08:37:31 UTC (rev 29)
@@ -1,5 +1,6 @@
#
# Copyright (C) 2009, Perlegen Sciences, Inc.
+# Copyright (C) 2010, 23andMe, Inc.
#
# Written by David A. Hinds <dhinds at sonic.net>
#
@@ -54,28 +55,56 @@
between=list(x=0.5,y=0.5), scales=list(alternating=0),
xlab=NULL, ylab=NULL, par.settings=.gt.settings, ...)
{
- prepanel.equal <- function(...)
- {
- p <- prepanel.default.xyplot(...)
- p$xlim <- c(min(p$xlim,p$ylim), max(p$xlim,p$ylim))
- p
- }
if ('signal.a' %in% names(data)) {
x <- data$signal.a
y <- data$signal.b
+ equal <- TRUE
} else if ('fwd.a' %in% names(data)) {
x <- data$fwd.a+data$rev.a
y <- data$fwd.b+data$rev.b
+ equal <- TRUE
+ } else if ('strength' %in% names(data)) {
+ x <- data$log.ratio
+ y <- data$strength
+ equal <- FALSE
} else {
stop("raw data not available")
}
+
+ if (equal) {
+ # Equal scaling of X and Y
+ prepanel <- function(...)
+ {
+ p <- prepanel.default.xyplot(...)
+ p$xlim <- p$ylim <- c(min(p$xlim,p$ylim), max(p$xlim,p$ylim))
+ p
+ }
+ } else {
+ # center X at 0
+ prepanel <- function(...)
+ {
+ p <- prepanel.default.xyplot(...)
+ p$xlim <- c(-1,1)*max(abs(p$xlim))
+ p
+ }
+ }
+
n <- factor(data$assay.name)
if (rescale) {
- q <- tapply(c(x,y), rep(n,2), range, na.rm=TRUE)
- q <- do.call('rbind', q)
- x <- 0.01 + 0.98*(x - q[n,1])/(q[n,2]-q[n,1])
- y <- 0.01 + 0.98*(y - q[n,1])/(q[n,2]-q[n,1])
+ if (equal) {
+ q <- tapply(c(x,y), rep(n,2), range, na.rm=TRUE)
+ q <- do.call('rbind', q)
+ x <- 0.01 + 0.98*(x - q[n,1])/(q[n,2]-q[n,1])
+ y <- 0.01 + 0.98*(y - q[n,1])/(q[n,2]-q[n,1])
+ } else {
+ q <- tapply(x, n, function(x) max(abs(x),na.rm=TRUE))
+ x <- x / q[n]
+ q <- tapply(y, n, range, na.rm=TRUE)
+ q <- do.call('rbind', q)
+ y <- 0.01 + 0.98*(y - q[n,1])/(q[n,2]-q[n,1])
+ }
}
+
gt <- data$genotype
if (is.numeric(gt)) {
gt <- factor(gt, levels=0:3)
@@ -88,7 +117,7 @@
gt[is.na(gt)] <- nn
}
p <- xyplot(y~x|n, groups=gt, bounds=bounds, min.points=min.points,
- scales=scales, prepanel=prepanel.equal, aspect=1,
+ scales=scales, prepanel=prepanel, aspect=1,
panel=panel.superpose, panel.groups=panel.cluster,
xlab=xlab, ylab=ylab, between=between,
par.settings=par.settings, ...)
Modified: pkg/gt.db/R/genotype.R
===================================================================
--- pkg/gt.db/R/genotype.R 2010-02-12 07:58:41 UTC (rev 28)
+++ pkg/gt.db/R/genotype.R 2010-02-13 08:37:31 UTC (rev 29)
@@ -1,5 +1,6 @@
#
# Copyright (C) 2009, Perlegen Sciences, Inc.
+# Copyright (C) 2010, 23andMe, Inc.
#
# Written by David A. Hinds <dhinds at sonic.net>
#
@@ -216,23 +217,7 @@
f <- attr(gt.data,'raw.layout')
if (is.null(f))
stop('raw data layout unavailable')
- nr <- nrow(d)
- if (f == 'signal') {
- i <- readBin(cvt.fn(gt.data$raw.data), what='int',
- n=2*nr, size=2, signed=FALSE, endian='little')
- i <- na.if(i, 65535)
- dim(i) <- c(2,nr)
- cbind(d, signal.a=i[1,], signal.b=i[2,])
- } else if (f == 'seqread') {
- i <- readBin(cvt.fn(gt.data$raw.data), what='int',
- n=4*nr, size=1, signed=FALSE, endian='little')
- i <- na.if(i, 255)
- dim(i) <- c(4,nr)
- cbind(d, fwd.a=i[1,], rev.a=i[2,], fwd.b=i[3,], rev.b=i[4,])
- } else {
- warning('unknown raw data layout')
- d
- }
+ cbind(d, unpack.raw.data(cvt.fn(gt.data$raw.data), f))
}
.mask.dat <- function(str, mask, squeeze=FALSE)
Modified: pkg/gt.db/R/misc.R
===================================================================
--- pkg/gt.db/R/misc.R 2010-02-12 07:58:41 UTC (rev 28)
+++ pkg/gt.db/R/misc.R 2010-02-13 08:37:31 UTC (rev 29)
@@ -1,5 +1,6 @@
#
# Copyright (C) 2009, Perlegen Sciences, Inc.
+# Copyright (C) 2010, 23andMe, Inc.
#
# Written by David A. Hinds <dhinds at sonic.net>
#
@@ -30,8 +31,14 @@
rawToHex <- function(raw)
.Call("raw_to_hex", raw, PACKAGE="gt.db")
-hexToRaw <- function(hex)
-.Call("hex_to_raw", hex, PACKAGE="gt.db")
+hexToRaw <- function(hex, drop=TRUE)
+{
+ x <- .Call("hex_to_raw", hex, PACKAGE="gt.db")
+ if (drop && (ncol(x) <= 1))
+ as.vector(x)
+ else
+ x
+}
#---------------------------------------------------------------------
Added: pkg/gt.db/R/rawdata.R
===================================================================
--- pkg/gt.db/R/rawdata.R (rev 0)
+++ pkg/gt.db/R/rawdata.R 2010-02-13 08:37:31 UTC (rev 29)
@@ -0,0 +1,111 @@
+#
+# Copyright (C) 2010, 23andMe, Inc.
+#
+# Written by David A. Hinds <dhinds at sonic.net>
+#
+# This is free software; you can redistribute it and/or modify it
+# under the terms of the GNU General Public License as published by
+# the Free Software Foundation; either version 3 of the license, or
+# (at your option) any later version.
+#
+# This program is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+# GNU General Public License for more details.
+#
+# You should have received a copy of the GNU General Public License
+# along with this program. If not, see <http://www.gnu.org/licenses/>
+#
+
+#---------------------------------------------------------------------
+
+.unpack.signal <- function(data)
+{
+ nr <- length(data)/4
+ i <- readBin(data, what='int', n=2*nr, size=2,
+ signed=FALSE, endian='little')
+ i <- na.if(i, 65535)
+ dim(i) <- c(2,nr)
+ data.frame(signal.a=i[1,], signal.b=i[2,])
+}
+
+.pack.signal <- function(data)
+{
+ a <- writeBin(if.na(data$signal.a,65535),
+ raw(), size=2, endian='little')
+ b <- writeBin(if.na(data$signal.b,65535),
+ raw(), size=2, endian='little')
+ dim(a) <- dim(b) <- c(2,nrow(data))
+ as.vector(rbind(a,b))
+}
+
+#---------------------------------------------------------------------
+
+.unpack.seqread <- function(data)
+{
+ i <- na.if(as.integer(data), 255)
+ dim(i) <- c(4,length(i)/4)
+ data.frame(fwd.a=i[1,], rev.a=i[2,], fwd.b=i[3,], rev.b=i[4,])
+}
+
+.pack.seqread <- function(data)
+{
+ as.vector(rbind(as.raw(if.na(data$fwd.a,255)),
+ as.raw(if.na(data$rev.a,255)),
+ as.raw(if.na(data$fwd.b,255)),
+ as.raw(if.na(data$rev.b,255))))
+}
+
+#---------------------------------------------------------------------
+
+.unpack.chpdata <- function(data)
+{
+ nr <- length(data)/5
+ dim(data) <- c(5,nr)
+ i <- readBin(data[1:2,], what='int', n=nr, size=2,
+ signed=TRUE, endian='little')
+ j <- readBin(data[3:4,], what='int', n=nr, size=2,
+ signed=FALSE, endian='little')
+ k <- factor(as.integer(data[5,]), levels=1:4,
+ labels=c('a','h','b','n'))
+ data.frame(log.ratio=i/256, strength=j/256, forced.call=k)
+}
+
+.pack.chpdata <- function(data)
+{
+ x <- writeBin(as.integer(256*data$log.ratio),
+ raw(), size=2, endian='little')
+ y <- writeBin(as.integer(256*data$strength),
+ raw(), size=2, endian='little')
+ z <- as.raw(data$forced.call)
+ dim(x) <- dim(y) <- c(2,nrow(data))
+ as.vector(rbind(x,y,z))
+}
+
+#---------------------------------------------------------------------
+
+unpack.raw.data <- function(data, raw.layout)
+{
+ if (raw.layout == 'signal') {
+ .unpack.signal(data)
+ } else if (raw.layout == 'seqread') {
+ .unpack.seqread(data)
+ } else if (raw.layout == 'chpdata') {
+ .unpack.chpdata(data)
+ } else {
+ stop('unknown raw data layout')
+ }
+}
+
+pack.raw.data <- function(data, raw.layout)
+{
+ if (raw.layout == 'signal') {
+ .pack.signal(data)
+ } else if (raw.layout == 'seqread') {
+ .pack.seqread(data)
+ } else if (raw.layout == 'chpdata') {
+ .pack.chpdata(data)
+ } else {
+ stop('unknown raw data layout')
+ }
+}
Modified: pkg/gt.db/R/sql.R
===================================================================
--- pkg/gt.db/R/sql.R 2010-02-12 07:58:41 UTC (rev 28)
+++ pkg/gt.db/R/sql.R 2010-02-13 08:37:31 UTC (rev 29)
@@ -204,12 +204,12 @@
ps <- tryCatch(dbPrepareStatement(db, sql, data), error=efn)
nr <- 0
nd <- nrow(data)
- if (progress) progress.bar(0, nd)
+ if (nd && progress) progress.bar(0, nd)
for (lo in seq(1,max(1,nd),chunk.rows)) {
d <- data[lo:min(lo+chunk.rows-1,nd),,drop=FALSE]
tryCatch(dbExecStatement(ps, d), error=efn)
nr <- nr + nrow(d)
- if (progress) progress.bar(nr, nd)
+ if (nd && progress) progress.bar(nr, nd)
}
nr <- dbGetRowsAffected(ps)
@@ -247,8 +247,8 @@
ps <- tryCatch(.myPrepareStatement(db, sql, data), error=efn)
nd <- nrow(data)
+ if (nd && progress) progress.bar(0, nd)
if (nd && (regexpr('^insert.*\\svalues\\s*\\(', sql) > 0)) {
- if (progress) progress.bar(0, nd)
chunk.rows <- .chunk.rows(data, chunk.kb)
for (lo in seq(1,nd,chunk.rows)) {
d <- data[lo:min(lo+chunk.rows-1,nd),,drop=FALSE]
@@ -261,6 +261,7 @@
rs <- tryCatch(.myExecStatement(ps, data[i,,drop=FALSE]),
error=efn)
nr <- nr + dbGetRowsAffected(rs)
+ if (nd && progress) progress.bar(i, nd)
}
}
nr
@@ -274,20 +275,30 @@
.dbClearAll(db)
stop(e)
}
- data <- .sql.prep.data(...)
sql <- .fixup.sql(db, sql)
sql <- gsub(":[0-9]+", "?", sql)
- nr <- 0
+ if (!length(list(...))) {
+ dbBeginTransaction(db)
+ rs <- tryCatch(dbSendQuery(db, sql), error=efn)
+ nr <- dbGetRowsAffected(rs)
+ dbCommit(db)
+ return(nr)
+ }
+
+ data <- .sql.prep.data(...)
+ nu <- nr <- 0
nd <- nrow(data)
+ if (!nd) return(0)
chunk.rows <- .chunk.rows(data, chunk.kb)
dbBeginTransaction(db)
if (progress) progress.bar(0, nd)
- for (lo in seq(1,max(1,nrow(data)),chunk.rows)) {
+ for (lo in seq(1,nd,chunk.rows)) {
d <- data[lo:min(lo+chunk.rows-1,nd),,drop=FALSE]
rs <- tryCatch(dbSendPreparedQuery(db, sql, d), error=efn)
nr <- nr + dbGetRowsAffected(rs)
- if (progress) progress.bar(nr, nd)
+ nu <- nu + nrow(d)
+ if (progress) progress.bar(nu, nd)
}
dbCommit(db)
nr
Modified: pkg/gt.db/man/mk.assay.data.Rd
===================================================================
--- pkg/gt.db/man/mk.assay.data.Rd 2010-02-12 07:58:41 UTC (rev 28)
+++ pkg/gt.db/man/mk.assay.data.Rd 2010-02-13 08:37:31 UTC (rev 29)
@@ -48,11 +48,13 @@
An \code{assay.name} column can be supplied in place of
\code{assay.id}.
- Two raw data layouts are currently supported. The 'signal' layout
+ Three raw data layouts are currently supported. The 'signal' layout
consists of a 16-bit unsigned little endian value for each allele.
- The 'seqread' layout consists of 8-bit unsigned counts for forward
- and reverse orientations for each allele, representing read counts
- from a sequencing experiment.
+ The 'seqread' layout consists of 8-bit unsigned counts for forward and
+ reverse orientations for each allele, representing read counts from a
+ sequencing experiment. The 'chpdata' layout includes a 16-bit 8.8
+ fixed point log allele ratio, a 16-bit 8.8 log signal strength, and
+ a 'forced' genotype call.
}
\value{
If successful, the number of rows inserted into the assay data
Modified: pkg/gt.db/man/mk.dataset.Rd
===================================================================
--- pkg/gt.db/man/mk.dataset.Rd 2010-02-12 07:58:41 UTC (rev 28)
+++ pkg/gt.db/man/mk.dataset.Rd 2010-02-13 08:37:31 UTC (rev 29)
@@ -1,5 +1,6 @@
%
% Copyright (C) 2009, Perlegen Sciences, Inc.
+% Copyright (C) 2010, 23andMe, Inc.
%
% Written by David A. Hinds <dhinds at sonic.net>
%
@@ -26,7 +27,7 @@
}
\usage{
mk.dataset(dataset.name, project.name, platform.name,
- description, raw.layout=c(NA,'signal','seqread'),
+ description, raw.layout=c(NA,'signal','seqread','chpdata'),
is.hidden=FALSE)
rm.dataset(dataset.name)
}
Modified: pkg/gt.db/man/rawToHex.Rd
===================================================================
--- pkg/gt.db/man/rawToHex.Rd 2010-02-12 07:58:41 UTC (rev 28)
+++ pkg/gt.db/man/rawToHex.Rd 2010-02-13 08:37:31 UTC (rev 29)
@@ -1,5 +1,6 @@
%
% Copyright (C) 2009, Perlegen Sciences, Inc.
+% Copyright (C) 2010, 23andMe, Inc.
%
% Written by David A. Hinds <dhinds at sonic.net>
%
@@ -24,18 +25,25 @@
Convert between raw vectors and hex strings.
}
\usage{
-hexToRaw(hex)
+hexToRaw(hex, drop=TRUE)
rawToHex(raw)
}
\arguments{
- \item{hex}{a string of hex digits.}
- \item{raw}{a raw vector.}
+ \item{hex}{a character vector composed of strings of hex digits, all
+ the same length.}
+ \item{drop}{logical: indicates whether to convert a single-column
+ result to a vector.}
+ \item{raw}{a raw vector or matrix.}
}
\value{
- For \code{hexToRaw}, a raw vector constructed by converting each
- consecutive pair of hex digits to one byte. For \code{rawToHex}, a
- character string formed by converting each byte of the input vector
- to its two-digit hex representation.
+ For \code{hexToRaw}, a raw matrix with one column per string in
+ \code{hex}, constructed by converting each consecutive pair of hex
+ digits to one byte.
+
+ For \code{rawToHex}, a character string formed by converting each byte
+ of the input vector to its two-digit hex representation, or a vector
+ formed by converting each column of an input matrix to its hex
+ representation.
}
\seealso{
\code{\link{charToRaw}}, \code{\link{rawToChar}}.
@@ -43,5 +51,6 @@
\examples{
hexToRaw('1a3b1a3b')
rawToHex(as.raw(seq(0,100,10)))
+hexToRaw(c('1234','5678','9abc'))
}
\keyword{classes}
Modified: pkg/gt.db/man/reshape.gt.data.Rd
===================================================================
--- pkg/gt.db/man/reshape.gt.data.Rd 2010-02-12 07:58:41 UTC (rev 28)
+++ pkg/gt.db/man/reshape.gt.data.Rd 2010-02-13 08:37:31 UTC (rev 29)
@@ -1,5 +1,6 @@
%
% Copyright (C) 2009, Perlegen Sciences, Inc.
+% Copyright (C) 2010, 23andMe, Inc.
%
% Written by David A. Hinds <dhinds at sonic.net>
%
@@ -44,7 +45,9 @@
dataset. For datasets with signal intensities, there will be two
columns: \code{signal.a} and \code{signal.b}. For data with
sequencing read counts, there will be four columns: \code{fwd.a},
- \code{rev.a}, \code{fwd.b}, and \code{rev.b}.}
+ \code{rev.a}, \code{fwd.b}, and \code{rev.b}. For Affymetrix CHP
+ data, there will be three columns: \code{log.ratio},
+ \code{strength}, and \code{forced.call}.}
}
\seealso{
\code{\link{fetch.gt.data}}, \code{\link{unpack.gt.matrix}},
Modified: pkg/gt.db/src/raw.c
===================================================================
--- pkg/gt.db/src/raw.c 2010-02-12 07:58:41 UTC (rev 28)
+++ pkg/gt.db/src/raw.c 2010-02-13 08:37:31 UTC (rev 29)
@@ -1,6 +1,7 @@
/*
Copyright (C) 2009, Perlegen Sciences, Inc.
+ Copyright (C) 2010, 23andMe, Inc.
Written by David A. Hinds <dhinds at sonic.net>
@@ -42,16 +43,29 @@
SEXP raw_to_hex(SEXP a)
{
char *str;
- SEXP ans;
- int i, len = LENGTH(a);
+ SEXP ans, dim;
+ int nr, nc, i, j;
if (!isRaw(a))
error(_("argument should be a raw vector"));
- str = (char *)R_alloc(2*len+1, sizeof(char));
- for (i = 0; i < len; i++) {
- sprintf(str+i*2, "%02X", RAW(a)[i]);
+ PROTECT(dim = getAttrib(a, R_DimSymbol));
+ if (isMatrix(a)) {
+ nr = INTEGER(dim)[0];
+ nc = INTEGER(dim)[1];
+ } else {
+ nr = LENGTH(a);
+ nc = 1;
}
- ans = mkString(str);
+ PROTECT(ans = allocVector(STRSXP, nc));
+ str = (char *)R_alloc(2*nr+1, sizeof(char));
+ str[2*nr] = '\0';
+ for (j = 0; j < nc; j++) {
+ for (i = 0; i < nr; i++) {
+ sprintf(str+i*2, "%02X", RAW(a)[i+j*nr]);
+ }
+ SET_STRING_ELT(ans, j, mkChar(str));
+ }
+ UNPROTECT(2);
return ans;
}
@@ -59,20 +73,31 @@
{
SEXP ans;
const char *str;
- int i, a, b, len;
+ int i, j, a, b, len, nc;
- if (!isString(s) || (LENGTH(s) != 1))
- error(_("argument should be a character vector of length 1"));
- str = CHAR(STRING_ELT(s, 0));
- len = strlen(str);
+ if (!isString(s))
+ error(_("argument should be a character vector"));
+ nc = LENGTH(s);
+ if (nc) {
+ str = CHAR(STRING_ELT(s, 0));
+ len = strlen(str);
+ } else {
+ len = 0;
+ }
if (len & 1)
error(_("string length should be a multiple of 2"));
- PROTECT(ans = allocVector(RAWSXP, len>>1));
- for (i = 0; i < len; i += 2) {
- a = toupper(str[i]); b = toupper(str[i+1]);
- a = (a >= 'A') ? a - 'A' + 10 : a - '0';
- b = (b >= 'A') ? b - 'A' + 10 : b - '0';
- RAW(ans)[i>>1] = (a<<4)+b;
+ for (i = 1; i < nc; i++)
+ if (strlen(CHAR(STRING_ELT(s, i))) != len)
+ error(_("string length mismatch"));
+ PROTECT(ans = allocMatrix(RAWSXP, len>>1, nc));
+ for (j = 0; j < nc; j++) {
+ str = CHAR(STRING_ELT(s, j));
+ for (i = 0; i < len; i += 2) {
+ a = toupper(str[i]); b = toupper(str[i+1]);
+ a = (a >= 'A') ? a - 'A' + 10 : a - '0';
+ b = (b >= 'A') ? b - 'A' + 10 : b - '0';
+ RAW(ans)[j*len + i>>1] = (a<<4)+b;
+ }
}
[TRUNCATED]
To get the complete diff run:
svnlook diff /svnroot/gtdb -r 29
More information about the Gtdb-commits
mailing list