[Gtdb-commits] r29 - in pkg/gt.db: . R man src

noreply at r-forge.r-project.org noreply at r-forge.r-project.org
Sat Feb 13 09:37:32 CET 2010


Author: dahinds
Date: 2010-02-13 09:37:31 +0100 (Sat, 13 Feb 2010)
New Revision: 29

Added:
   pkg/gt.db/R/rawdata.R
Modified:
   pkg/gt.db/INDEX
   pkg/gt.db/R/admin.R
   pkg/gt.db/R/cluster.R
   pkg/gt.db/R/genotype.R
   pkg/gt.db/R/misc.R
   pkg/gt.db/R/sql.R
   pkg/gt.db/man/mk.assay.data.Rd
   pkg/gt.db/man/mk.dataset.Rd
   pkg/gt.db/man/rawToHex.Rd
   pkg/gt.db/man/reshape.gt.data.Rd
   pkg/gt.db/src/raw.c
Log:
- reorganized code for working with "raw data"
- added a new raw data format 'chpdata' for Affymetrix CHP data
- minor updates to SQL code to accommodate latest RSQLite
- made rawToHex/hexToRaw more flexible



Modified: pkg/gt.db/INDEX
===================================================================
--- pkg/gt.db/INDEX	2010-02-12 07:58:41 UTC (rev 28)
+++ pkg/gt.db/INDEX	2010-02-13 08:37:31 UTC (rev 29)
@@ -1,131 +1,131 @@
-adjust.gt.calls         Manually Assign Genotype Calls
-apply.gt.dataset        Apply a Function to a Genotype Dataset
-apply.loadings          Apply PCA SNP Loadings to a New Genotype Set
-as.mask                 Convert To/From Character Masks
-assay.dat.01            Genotype Data for 5000 SNPs on 270 HapMap
-                        Samples
-assay.dat.02            Genotype Data for 371 Chr21 SNPs on 275 HapMap
-                        Samples
-big.loadings            Identify Large Sample or SNP Loadings from a
-                        PCA Analysis
-ch.table                Character Based Contingency Table
-fetch.gt.data           Load Genotype Data for a Genotyping Dataset
-fetch.prcomp            Load Principal Components Results for a
-                        Genotyping Dataset
-fetch.pt.data           Load Phenotype Data for a Genotype Dataset
-fetch.sample.data       Load Sample Data for a Genotype Dataset
-fetch.subject.data      Load Subject Data for a Genotyping Project
-fetch.test.scores       Load Association Test Results for a Genotyping
-                        Dataset
-gplot                   Genome-Wide Level Plot
-gplot.prcomp            Genome-Wide Level Plots of SNP Loadings from
-                        PCA
-gt.cluster.plot         Plot Genotype Cluster Data
-gt.dataset              Genotype Dataset Specification
-gt.demo.check           Check for Presence of GT.DB Demo Datasets
-gt.dist                 Calculate Pairwise Genotype Distances
-gt.split                Convert between Packed Genotype Strings and
-                        Genotype Vectors
-hapmap.subjects         Subject Data from the International HapMap
-                        Project
-hexToRaw                Convert between Raw Vectors and Hex Strings
-hwe.test                Tests for Hardy Weinberg Equilibrium
-ibd.dataset             Calculate Identity by Descent for a Genotype
-                        Dataset
-ibd.gt.data             Estimate Pairwise Identity by Descent for
-                        Genotype Data
-ibd.plot                Plot Identity by Descent Data
-ibd.summary             Summarize Identity by Descent Analysis Results
-ibs.gt.data             Calculate Pairwise Identity by State for
-                        Genotype Data
-if.na                   Conditional Element Selection for Missing
-                        Values
-init.gt.db              Initialize GT.DB Database
-jt.test                 Jonckheere-Terpstra Nonparametric Test for
-                        Trend
-keep.attr               Keep User Attributes
-ld.gt.data              Compute Pairwise Linkage Disequilibrium
-ld.plot                 Pairwise Linkage Disequilibrium Plot
-ld.prune                Prune SNP List to Limit Linkage Disequilibrium
-ls.assay                List Assay Definitions
-ls.assay.group          List Assay Groups
-ls.assay.position       List Assay Positions
-ls.dataset              List Genotype Datasets
-ls.mapping              List Assay Mapping Sets
-ls.platform             List Genotyping Platforms
-ls.prcomp               List Principal Components Result Sets
-ls.project              List Genotyping Projects
-ls.sample               List Samples in a Genotype Dataset
-ls.subject              List Subjects in a Genotyping Project
-ls.test                 List Association Test Result Sets
-mask.gt.data            Mask Sample Genotypes
-mask.str                Mask Character Strings
-match.gt.data           Identify Equivalent Genotyping Assays
-mk.assay                Create Genotyping Assay Definitions
-mk.assay.data           Create Genotyping Assay Data
-mk.assay.group          Create or Remove a Genotype Assay Group
-mk.assay.position       Create Genotyping Assay Positions
-mk.attr                 Create, Remove, and List Attribute Definitions
-mk.dataset              Create or Remove Genotype Datasets
-mk.mapping              Create or Remove a Mapping for a Genotyping
-                        Platform
-mk.platform             Create or Remove a Genotyping Platform
-mk.project              Create or Remove a Genotyping Project
-mk.sample               Create or Remove Dataset Samples
-mk.sample.attr          Create, Remove, or List Sample Attribute
-                        Definitions
-mk.subject              Create or Remove Project Subjects
-mk.subject.attr         Create, Remove, or List Subject Attribute
-                        Definitions
-na.if                   Conditional Conversion to Missing Values
-nsubstr                 Count Substring Instances
-nw.align                Needleman and Wunsch Sequence Alignment
-nw.orient.assay         Orient Assay Sequences
-orient.gt.data          Flip Assay Strands and/or Swap Alleles
-panel.cluster           Panel Function for Drawing Elliptical Cluster
-                        Boundaries
-panel.qqpval            Quantile-Quantile Plots for P Values: Panel
-                        Functions
-panel.qqthin            Sparse Normal Quantile-Quantile Plots: Panel
-                        Function
-prcomp.gt.data          Principal Components Analysis of Genotype Data
-progress.bar            Console Text-Based Progress Bar
-qqprcomp                Quantile-Quantile Plots of PCA Loadings
-qqpval                  Quantile-Quantile Plots for P Values
-qqthin                  Sparse Normal Quantile-Quantile Plots
-reshape.gt.data         Reshape Genotype Data
-revcomp                 Reverse Complement DNA Sequences
-score.and.store         Test SNPs for Association and Store Results
-score.chisq             Chi-Squared Test for Genotypic Association
-score.chisq.2x2         Chi-Squared Test for Allelic Association
-score.fisher            Fisher's Exact Test for Genotypic Association
-score.glm               Test for Association using Logistic Regression
-score.glm.general       Association Test with a Logistic Model and
-                        General Mode of Action
-score.glm.groups        Test for Association using a Logistic Model
-                        with Subgroup Effects
-score.gt.data           Test SNPs for Association
-score.jt                Jonckheere-Terpstra Nonparametric Test for
-                        Association
-score.kruskal           Kruskal-Wallis Nonparametric Test for
-                        Association
-score.lm                Test for Association using a Simple Linear
-                        Model
-score.lm.general        Test for Association using a Linear Model and
-                        General
-score.lm.groups         Test for Association using a Linear Model with
-                        Subgroup Effects
-score.prcomp            Test Phenotypic Association with Principal
-                        Components
-score.trend             Cochran-Armitage Trend Test for Association
-set.hidden              Update Hidden Status
-snp.loadings            SNP Loadings from Principal Components Analyses
-sql.query               Simplified SQL Statement Execution
-store.prcomp            Store or Remove Principal Components Results
-store.sample.data       Store Sample Data
-store.subject.data      Store Subject Data
-store.test.scores       Store or Remove Association Test Results
-summary.gt.data         Genotype Data Summary
-unpack.gt.matrix        Convert Packed Genotype Strings to a Genotype
-                        Matrix
-use.gt.db               Define GT.DB Database Connection
+adjust.gt.calls         Manually Assign Genotype Calls
+apply.gt.dataset        Apply a Function to a Genotype Dataset
+apply.loadings          Apply PCA SNP Loadings to a New Genotype Set
+as.mask                 Convert To/From Character Masks
+assay.dat.01            Genotype Data for 5000 SNPs on 270 HapMap
+                        Samples
+assay.dat.02            Genotype Data for 371 Chr21 SNPs on 275 HapMap
+                        Samples
+big.loadings            Identify Large Sample or SNP Loadings from a
+                        PCA Analysis
+ch.table                Character Based Contingency Table
+fetch.gt.data           Load Genotype Data for a Genotyping Dataset
+fetch.prcomp            Load Principal Components Results for a
+                        Genotyping Dataset
+fetch.pt.data           Load Phenotype Data for a Genotype Dataset
+fetch.sample.data       Load Sample Data for a Genotype Dataset
+fetch.subject.data      Load Subject Data for a Genotyping Project
+fetch.test.scores       Load Association Test Results for a Genotyping
+                        Dataset
+gplot                   Genome-Wide Level Plot
+gplot.prcomp            Genome-Wide Level Plots of SNP Loadings from
+                        PCA
+gt.cluster.plot         Plot Genotype Cluster Data
+gt.dataset              Genotype Dataset Specification
+gt.demo.check           Check for Presence of GT.DB Demo Datasets
+gt.dist                 Calculate Pairwise Genotype Distances
+gt.split                Convert between Packed Genotype Strings and
+                        Genotype Vectors
+hapmap.subjects         Subject Data from the International HapMap
+                        Project
+hexToRaw                Convert between Raw Vectors and Hex Strings
+hwe.test                Tests for Hardy Weinberg Equilibrium
+ibd.dataset             Calculate Identity by Descent for a Genotype
+                        Dataset
+ibd.gt.data             Estimate Pairwise Identity by Descent for
+                        Genotype Data
+ibd.plot                Plot Identity by Descent Data
+ibd.summary             Summarize Identity by Descent Analysis Results
+ibs.gt.data             Calculate Pairwise Identity by State for
+                        Genotype Data
+if.na                   Conditional Element Selection for Missing
+                        Values
+init.gt.db              Initialize GT.DB Database
+jt.test                 Jonckheere-Terpstra Nonparametric Test for
+                        Trend
+keep.attr               Keep User Attributes
+ld.gt.data              Compute Pairwise Linkage Disequilibrium
+ld.plot                 Pairwise Linkage Disequilibrium Plot
+ld.prune                Prune SNP List to Limit Linkage Disequilibrium
+ls.assay                List Assay Definitions
+ls.assay.group          List Assay Groups
+ls.assay.position       List Assay Positions
+ls.dataset              List Genotype Datasets
+ls.mapping              List Assay Mapping Sets
+ls.platform             List Genotyping Platforms
+ls.prcomp               List Principal Components Result Sets
+ls.project              List Genotyping Projects
+ls.sample               List Samples in a Genotype Dataset
+ls.subject              List Subjects in a Genotyping Project
+ls.test                 List Association Test Result Sets
+mask.gt.data            Mask Sample Genotypes
+mask.str                Mask Character Strings
+match.gt.data           Identify Equivalent Genotyping Assays
+mk.assay                Create Genotyping Assay Definitions
+mk.assay.data           Create Genotyping Assay Data
+mk.assay.group          Create or Remove a Genotype Assay Group
+mk.assay.position       Create Genotyping Assay Positions
+mk.attr                 Create, Remove, and List Attribute Definitions
+mk.dataset              Create or Remove Genotype Datasets
+mk.mapping              Create or Remove a Mapping for a Genotyping
+                        Platform
+mk.platform             Create or Remove a Genotyping Platform
+mk.project              Create or Remove a Genotyping Project
+mk.sample               Create or Remove Dataset Samples
+mk.sample.attr          Create, Remove, or List Sample Attribute
+                        Definitions
+mk.subject              Create or Remove Project Subjects
+mk.subject.attr         Create, Remove, or List Subject Attribute
+                        Definitions
+na.if                   Conditional Conversion to Missing Values
+nsubstr                 Count Substring Instances
+nw.align                Needleman and Wunsch Sequence Alignment
+nw.orient.assay         Orient Assay Sequences
+orient.gt.data          Flip Assay Strands and/or Swap Alleles
+panel.cluster           Panel Function for Drawing Elliptical Cluster
+                        Boundaries
+panel.qqpval            Quantile-Quantile Plots for P Values: Panel
+                        Functions
+panel.qqthin            Sparse Normal Quantile-Quantile Plots: Panel
+                        Function
+prcomp.gt.data          Principal Components Analysis of Genotype Data
+progress.bar            Console Text-Based Progress Bar
+qqprcomp                Quantile-Quantile Plots of PCA Loadings
+qqpval                  Quantile-Quantile Plots for P Values
+qqthin                  Sparse Normal Quantile-Quantile Plots
+reshape.gt.data         Reshape Genotype Data
+revcomp                 Reverse Complement DNA Sequences
+score.and.store         Test SNPs for Association and Store Results
+score.chisq             Chi-Squared Test for Genotypic Association
+score.chisq.2x2         Chi-Squared Test for Allelic Association
+score.fisher            Fisher's Exact Test for Genotypic Association
+score.glm               Test for Association using Logistic Regression
+score.glm.general       Association Test with a Logistic Model and
+                        General Mode of Action
+score.glm.groups        Test for Association using a Logistic Model
+                        with Subgroup Effects
+score.gt.data           Test SNPs for Association
+score.jt                Jonckheere-Terpstra Nonparametric Test for
+                        Association
+score.kruskal           Kruskal-Wallis Nonparametric Test for
+                        Association
+score.lm                Test for Association using a Simple Linear
+                        Model
+score.lm.general        Test for Association using a Linear Model and
+                        General
+score.lm.groups         Test for Association using a Linear Model with
+                        Subgroup Effects
+score.prcomp            Test Phenotypic Association with Principal
+                        Components
+score.trend             Cochran-Armitage Trend Test for Association
+set.hidden              Update Hidden Status
+snp.loadings            SNP Loadings from Principal Components Analyses
+sql.query               Simplified SQL Statement Execution
+store.prcomp            Store or Remove Principal Components Results
+store.sample.data       Store Sample Data
+store.subject.data      Store Subject Data
+store.test.scores       Store or Remove Association Test Results
+summary.gt.data         Genotype Data Summary
+unpack.gt.matrix        Convert Packed Genotype Strings to a Genotype
+                        Matrix
+use.gt.db               Define GT.DB Database Connection

Modified: pkg/gt.db/R/admin.R
===================================================================
--- pkg/gt.db/R/admin.R	2010-02-12 07:58:41 UTC (rev 28)
+++ pkg/gt.db/R/admin.R	2010-02-13 08:37:31 UTC (rev 29)
@@ -1,5 +1,6 @@
 #
 # Copyright (C) 2009, Perlegen Sciences, Inc.
+# Copyright (C) 2010, 23andMe, Inc.
 #
 # Written by David A. Hinds <dhinds at sonic.net>
 #
@@ -113,7 +114,7 @@
 
 mk.dataset <-
 function(dataset.name, project.name, platform.name,
-         description, raw.layout=c(NA,'signal','seqread'),
+         description, raw.layout=c(NA,'signal','seqread','chpdata'),
          is.hidden=FALSE)
 {
     raw.layout <- match.arg(raw.layout)

Modified: pkg/gt.db/R/cluster.R
===================================================================
--- pkg/gt.db/R/cluster.R	2010-02-12 07:58:41 UTC (rev 28)
+++ pkg/gt.db/R/cluster.R	2010-02-13 08:37:31 UTC (rev 29)
@@ -1,5 +1,6 @@
 #
 # Copyright (C) 2009, Perlegen Sciences, Inc.
+# Copyright (C) 2010, 23andMe, Inc.
 #
 # Written by David A. Hinds <dhinds at sonic.net>
 #
@@ -54,28 +55,56 @@
          between=list(x=0.5,y=0.5), scales=list(alternating=0),
          xlab=NULL, ylab=NULL, par.settings=.gt.settings, ...)
 {
-    prepanel.equal <- function(...)
-    {
-        p <- prepanel.default.xyplot(...)
-        p$xlim <- c(min(p$xlim,p$ylim), max(p$xlim,p$ylim))
-        p
-    }
     if ('signal.a' %in% names(data)) {
         x <- data$signal.a
         y <- data$signal.b
+        equal <- TRUE
     } else if ('fwd.a' %in% names(data)) {
         x <- data$fwd.a+data$rev.a
         y <- data$fwd.b+data$rev.b
+        equal <- TRUE
+    } else if ('strength' %in% names(data)) {
+        x <- data$log.ratio
+        y <- data$strength
+        equal <- FALSE
     } else {
         stop("raw data not available")
     }
+
+    if (equal) {
+        # Equal scaling of X and Y
+        prepanel <- function(...)
+        {
+            p <- prepanel.default.xyplot(...)
+            p$xlim <- p$ylim <- c(min(p$xlim,p$ylim), max(p$xlim,p$ylim))
+            p
+        }
+    } else {
+        # center X at 0
+        prepanel <- function(...)
+        {
+            p <- prepanel.default.xyplot(...)
+            p$xlim <- c(-1,1)*max(abs(p$xlim))
+            p
+        }
+    }
+
     n <- factor(data$assay.name)
     if (rescale) {
-        q <- tapply(c(x,y), rep(n,2), range, na.rm=TRUE)
-        q <- do.call('rbind', q)
-        x <- 0.01 + 0.98*(x - q[n,1])/(q[n,2]-q[n,1])
-        y <- 0.01 + 0.98*(y - q[n,1])/(q[n,2]-q[n,1])
+        if (equal) {
+            q <- tapply(c(x,y), rep(n,2), range, na.rm=TRUE)
+            q <- do.call('rbind', q)
+            x <- 0.01 + 0.98*(x - q[n,1])/(q[n,2]-q[n,1])
+            y <- 0.01 + 0.98*(y - q[n,1])/(q[n,2]-q[n,1])
+        } else {
+            q <- tapply(x, n, function(x) max(abs(x),na.rm=TRUE))
+            x <- x / q[n]
+            q <- tapply(y, n, range, na.rm=TRUE)
+            q <- do.call('rbind', q)
+            y <- 0.01 + 0.98*(y - q[n,1])/(q[n,2]-q[n,1])
+        }
     }
+
     gt <- data$genotype
     if (is.numeric(gt)) {
         gt <- factor(gt, levels=0:3)
@@ -88,7 +117,7 @@
         gt[is.na(gt)] <- nn
     }
     p <- xyplot(y~x|n, groups=gt, bounds=bounds, min.points=min.points,
-                scales=scales, prepanel=prepanel.equal, aspect=1,
+                scales=scales, prepanel=prepanel, aspect=1,
                 panel=panel.superpose, panel.groups=panel.cluster,
                 xlab=xlab, ylab=ylab, between=between,
                 par.settings=par.settings, ...)

Modified: pkg/gt.db/R/genotype.R
===================================================================
--- pkg/gt.db/R/genotype.R	2010-02-12 07:58:41 UTC (rev 28)
+++ pkg/gt.db/R/genotype.R	2010-02-13 08:37:31 UTC (rev 29)
@@ -1,5 +1,6 @@
 #
 # Copyright (C) 2009, Perlegen Sciences, Inc.
+# Copyright (C) 2010, 23andMe, Inc.
 #
 # Written by David A. Hinds <dhinds at sonic.net>
 #
@@ -216,23 +217,7 @@
     f <- attr(gt.data,'raw.layout')
     if (is.null(f))
         stop('raw data layout unavailable')
-    nr <- nrow(d)
-    if (f == 'signal') {
-        i <- readBin(cvt.fn(gt.data$raw.data), what='int',
-                     n=2*nr, size=2, signed=FALSE, endian='little')
-        i <- na.if(i, 65535)
-        dim(i) <- c(2,nr)
-        cbind(d, signal.a=i[1,], signal.b=i[2,])
-    } else if (f == 'seqread') {
-        i <- readBin(cvt.fn(gt.data$raw.data), what='int',
-                     n=4*nr, size=1, signed=FALSE, endian='little')
-        i <- na.if(i, 255)
-        dim(i) <- c(4,nr)
-        cbind(d, fwd.a=i[1,], rev.a=i[2,], fwd.b=i[3,], rev.b=i[4,])
-    } else {
-        warning('unknown raw data layout')
-        d
-    }
+    cbind(d, unpack.raw.data(cvt.fn(gt.data$raw.data), f))
 }
 
 .mask.dat <- function(str, mask, squeeze=FALSE)

Modified: pkg/gt.db/R/misc.R
===================================================================
--- pkg/gt.db/R/misc.R	2010-02-12 07:58:41 UTC (rev 28)
+++ pkg/gt.db/R/misc.R	2010-02-13 08:37:31 UTC (rev 29)
@@ -1,5 +1,6 @@
 #
 # Copyright (C) 2009, Perlegen Sciences, Inc.
+# Copyright (C) 2010, 23andMe, Inc.
 #
 # Written by David A. Hinds <dhinds at sonic.net>
 #
@@ -30,8 +31,14 @@
 rawToHex <- function(raw)
 .Call("raw_to_hex", raw, PACKAGE="gt.db")
 
-hexToRaw <- function(hex)
-.Call("hex_to_raw", hex, PACKAGE="gt.db")
+hexToRaw <- function(hex, drop=TRUE)
+{
+    x <- .Call("hex_to_raw", hex, PACKAGE="gt.db")
+    if (drop && (ncol(x) <= 1))
+        as.vector(x)
+    else
+        x
+}
 
 #---------------------------------------------------------------------
 

Added: pkg/gt.db/R/rawdata.R
===================================================================
--- pkg/gt.db/R/rawdata.R	                        (rev 0)
+++ pkg/gt.db/R/rawdata.R	2010-02-13 08:37:31 UTC (rev 29)
@@ -0,0 +1,111 @@
+#
+# Copyright (C) 2010, 23andMe, Inc.
+#
+# Written by David A. Hinds <dhinds at sonic.net>
+#
+# This is free software; you can redistribute it and/or modify it
+# under the terms of the GNU General Public License as published by
+# the Free Software Foundation; either version 3 of the license, or
+# (at your option) any later version.
+#
+# This program is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+# GNU General Public License for more details.
+#
+# You should have received a copy of the GNU General Public License
+# along with this program.  If not, see <http://www.gnu.org/licenses/>
+#
+
+#---------------------------------------------------------------------
+
+.unpack.signal <- function(data)
+{
+    nr <- length(data)/4
+    i <- readBin(data, what='int', n=2*nr, size=2,
+                 signed=FALSE, endian='little')
+    i <- na.if(i, 65535)
+    dim(i) <- c(2,nr)
+    data.frame(signal.a=i[1,], signal.b=i[2,])
+}
+
+.pack.signal <- function(data)
+{
+    a <- writeBin(if.na(data$signal.a,65535),
+                  raw(), size=2, endian='little')
+    b <- writeBin(if.na(data$signal.b,65535),
+                  raw(), size=2, endian='little')
+    dim(a) <- dim(b) <- c(2,nrow(data))
+    as.vector(rbind(a,b))
+}
+
+#---------------------------------------------------------------------
+
+.unpack.seqread <- function(data)
+{
+    i <- na.if(as.integer(data), 255)
+    dim(i) <- c(4,length(i)/4)
+    data.frame(fwd.a=i[1,], rev.a=i[2,], fwd.b=i[3,], rev.b=i[4,])
+}
+
+.pack.seqread <- function(data)
+{
+    as.vector(rbind(as.raw(if.na(data$fwd.a,255)),
+                    as.raw(if.na(data$rev.a,255)),
+                    as.raw(if.na(data$fwd.b,255)),
+                    as.raw(if.na(data$rev.b,255))))
+}
+
+#---------------------------------------------------------------------
+
+.unpack.chpdata <- function(data)
+{
+    nr <- length(data)/5
+    dim(data) <- c(5,nr)
+    i <- readBin(data[1:2,], what='int', n=nr, size=2,
+                 signed=TRUE, endian='little')
+    j <- readBin(data[3:4,], what='int', n=nr, size=2,
+                 signed=FALSE, endian='little')
+    k <- factor(as.integer(data[5,]), levels=1:4,
+                labels=c('a','h','b','n'))
+    data.frame(log.ratio=i/256, strength=j/256, forced.call=k)
+}
+
+.pack.chpdata <- function(data)
+{
+    x <- writeBin(as.integer(256*data$log.ratio),
+                  raw(), size=2, endian='little')
+    y <- writeBin(as.integer(256*data$strength),
+                  raw(), size=2, endian='little')
+    z <- as.raw(data$forced.call)
+    dim(x) <- dim(y) <- c(2,nrow(data))
+    as.vector(rbind(x,y,z))
+}
+
+#---------------------------------------------------------------------
+
+unpack.raw.data <- function(data, raw.layout)
+{
+    if (raw.layout == 'signal') {
+        .unpack.signal(data)
+    } else if (raw.layout == 'seqread') {
+        .unpack.seqread(data)
+    } else if (raw.layout == 'chpdata') {
+        .unpack.chpdata(data)
+    } else {
+        stop('unknown raw data layout')
+    }
+}
+
+pack.raw.data <- function(data, raw.layout)
+{
+    if (raw.layout == 'signal') {
+        .pack.signal(data)
+    } else if (raw.layout == 'seqread') {
+        .pack.seqread(data)
+    } else if (raw.layout == 'chpdata') {
+        .pack.chpdata(data)
+    } else {
+        stop('unknown raw data layout')
+    }
+}

Modified: pkg/gt.db/R/sql.R
===================================================================
--- pkg/gt.db/R/sql.R	2010-02-12 07:58:41 UTC (rev 28)
+++ pkg/gt.db/R/sql.R	2010-02-13 08:37:31 UTC (rev 29)
@@ -204,12 +204,12 @@
     ps <- tryCatch(dbPrepareStatement(db, sql, data), error=efn)
     nr <- 0
     nd <- nrow(data)
-    if (progress) progress.bar(0, nd)
+    if (nd && progress) progress.bar(0, nd)
     for (lo in seq(1,max(1,nd),chunk.rows)) {
         d <- data[lo:min(lo+chunk.rows-1,nd),,drop=FALSE]
         tryCatch(dbExecStatement(ps, d), error=efn)
         nr <- nr + nrow(d)
-        if (progress) progress.bar(nr, nd)
+        if (nd && progress) progress.bar(nr, nd)
     }
 
     nr <- dbGetRowsAffected(ps)
@@ -247,8 +247,8 @@
 
     ps <- tryCatch(.myPrepareStatement(db, sql, data), error=efn)
     nd <- nrow(data)
+    if (nd && progress) progress.bar(0, nd)
     if (nd && (regexpr('^insert.*\\svalues\\s*\\(', sql) > 0)) {
-        if (progress) progress.bar(0, nd)
         chunk.rows <- .chunk.rows(data, chunk.kb)
         for (lo in seq(1,nd,chunk.rows)) {
             d <- data[lo:min(lo+chunk.rows-1,nd),,drop=FALSE]
@@ -261,6 +261,7 @@
             rs <- tryCatch(.myExecStatement(ps, data[i,,drop=FALSE]),
                            error=efn)
             nr <- nr + dbGetRowsAffected(rs)
+            if (nd && progress) progress.bar(i, nd)
         }
     }
     nr
@@ -274,20 +275,30 @@
         .dbClearAll(db)
         stop(e)
     }
-    data <- .sql.prep.data(...)
     sql <- .fixup.sql(db, sql)
     sql <- gsub(":[0-9]+", "?", sql)
 
-    nr <- 0
+    if (!length(list(...))) {
+        dbBeginTransaction(db)
+        rs <- tryCatch(dbSendQuery(db, sql), error=efn)
+        nr <- dbGetRowsAffected(rs)
+        dbCommit(db)
+        return(nr)
+    }
+
+    data <- .sql.prep.data(...)
+    nu <- nr <- 0
     nd <- nrow(data)
+    if (!nd) return(0)
     chunk.rows <- .chunk.rows(data, chunk.kb)
     dbBeginTransaction(db)
     if (progress) progress.bar(0, nd)
-    for (lo in seq(1,max(1,nrow(data)),chunk.rows)) {
+    for (lo in seq(1,nd,chunk.rows)) {
         d <- data[lo:min(lo+chunk.rows-1,nd),,drop=FALSE]
         rs <- tryCatch(dbSendPreparedQuery(db, sql, d), error=efn)
         nr <- nr + dbGetRowsAffected(rs)
-        if (progress) progress.bar(nr, nd)
+        nu <- nu + nrow(d)
+        if (progress) progress.bar(nu, nd)
     }
     dbCommit(db)
     nr

Modified: pkg/gt.db/man/mk.assay.data.Rd
===================================================================
--- pkg/gt.db/man/mk.assay.data.Rd	2010-02-12 07:58:41 UTC (rev 28)
+++ pkg/gt.db/man/mk.assay.data.Rd	2010-02-13 08:37:31 UTC (rev 29)
@@ -48,11 +48,13 @@
   An \code{assay.name} column can be supplied in place of
   \code{assay.id}.
 
-  Two raw data layouts are currently supported.  The 'signal' layout
+  Three raw data layouts are currently supported.  The 'signal' layout
   consists of a 16-bit unsigned little endian value for each allele.
-  The 'seqread' layout consists of 8-bit unsigned counts for forward
-  and reverse orientations for each allele, representing read counts
-  from a sequencing experiment.
+  The 'seqread' layout consists of 8-bit unsigned counts for forward and
+  reverse orientations for each allele, representing read counts from a
+  sequencing experiment.  The 'chpdata' layout includes a 16-bit 8.8
+  fixed point log allele ratio, a 16-bit 8.8 log signal strength, and 
+  a 'forced' genotype call.
 }
 \value{
   If successful, the number of rows inserted into the assay data

Modified: pkg/gt.db/man/mk.dataset.Rd
===================================================================
--- pkg/gt.db/man/mk.dataset.Rd	2010-02-12 07:58:41 UTC (rev 28)
+++ pkg/gt.db/man/mk.dataset.Rd	2010-02-13 08:37:31 UTC (rev 29)
@@ -1,5 +1,6 @@
 %
 % Copyright (C) 2009, Perlegen Sciences, Inc.
+% Copyright (C) 2010, 23andMe, Inc.
 % 
 % Written by David A. Hinds <dhinds at sonic.net>
 % 
@@ -26,7 +27,7 @@
 }
 \usage{
 mk.dataset(dataset.name, project.name, platform.name,
-           description, raw.layout=c(NA,'signal','seqread'),
+           description, raw.layout=c(NA,'signal','seqread','chpdata'),
            is.hidden=FALSE)
 rm.dataset(dataset.name)
 }

Modified: pkg/gt.db/man/rawToHex.Rd
===================================================================
--- pkg/gt.db/man/rawToHex.Rd	2010-02-12 07:58:41 UTC (rev 28)
+++ pkg/gt.db/man/rawToHex.Rd	2010-02-13 08:37:31 UTC (rev 29)
@@ -1,5 +1,6 @@
 %
 % Copyright (C) 2009, Perlegen Sciences, Inc.
+% Copyright (C) 2010, 23andMe, Inc.
 % 
 % Written by David A. Hinds <dhinds at sonic.net>
 % 
@@ -24,18 +25,25 @@
   Convert between raw vectors and hex strings.
 }
 \usage{
-hexToRaw(hex)
+hexToRaw(hex, drop=TRUE)
 rawToHex(raw)
 }
 \arguments{
-  \item{hex}{a string of hex digits.}
-  \item{raw}{a raw vector.}
+  \item{hex}{a character vector composed of strings of hex digits, all
+    the same length.}
+  \item{drop}{logical: indicates whether to convert a single-column
+    result to a vector.}
+  \item{raw}{a raw vector or matrix.}
 }
 \value{
-  For \code{hexToRaw}, a raw vector constructed by converting each
-  consecutive pair of hex digits to one byte.  For \code{rawToHex}, a
-  character string formed by converting each byte of the input vector
-  to its two-digit hex representation.
+  For \code{hexToRaw}, a raw matrix with one column per string in
+  \code{hex}, constructed by converting each consecutive pair of hex
+  digits to one byte.
+
+  For \code{rawToHex}, a character string formed by converting each byte
+  of the input vector to its two-digit hex representation, or a vector
+  formed by converting each column of an input matrix to its hex
+  representation.
 }
 \seealso{
   \code{\link{charToRaw}}, \code{\link{rawToChar}}.
@@ -43,5 +51,6 @@
 \examples{
 hexToRaw('1a3b1a3b')
 rawToHex(as.raw(seq(0,100,10)))
+hexToRaw(c('1234','5678','9abc'))
 }
 \keyword{classes}

Modified: pkg/gt.db/man/reshape.gt.data.Rd
===================================================================
--- pkg/gt.db/man/reshape.gt.data.Rd	2010-02-12 07:58:41 UTC (rev 28)
+++ pkg/gt.db/man/reshape.gt.data.Rd	2010-02-13 08:37:31 UTC (rev 29)
@@ -1,5 +1,6 @@
 %
 % Copyright (C) 2009, Perlegen Sciences, Inc.
+% Copyright (C) 2010, 23andMe, Inc.
 % 
 % Written by David A. Hinds <dhinds at sonic.net>
 % 
@@ -44,7 +45,9 @@
     dataset.  For datasets with signal intensities, there will be two
     columns: \code{signal.a} and \code{signal.b}.  For data with
     sequencing read counts, there will be four columns: \code{fwd.a},
-    \code{rev.a}, \code{fwd.b}, and \code{rev.b}.}
+    \code{rev.a}, \code{fwd.b}, and \code{rev.b}.  For Affymetrix CHP
+    data, there will be three columns: \code{log.ratio},
+    \code{strength}, and \code{forced.call}.}
 }
 \seealso{
   \code{\link{fetch.gt.data}}, \code{\link{unpack.gt.matrix}},

Modified: pkg/gt.db/src/raw.c
===================================================================
--- pkg/gt.db/src/raw.c	2010-02-12 07:58:41 UTC (rev 28)
+++ pkg/gt.db/src/raw.c	2010-02-13 08:37:31 UTC (rev 29)
@@ -1,6 +1,7 @@
 /*
 
   Copyright (C) 2009, Perlegen Sciences, Inc.
+  Copyright (C) 2010, 23andMe, Inc.
   
   Written by David A. Hinds <dhinds at sonic.net>
   
@@ -42,16 +43,29 @@
 SEXP raw_to_hex(SEXP a)
 {
 	char *str;
-	SEXP ans;
-	int i, len = LENGTH(a);
+	SEXP ans, dim;
+	int nr, nc, i, j;
 
 	if (!isRaw(a))
 		error(_("argument should be a raw vector"));
-	str = (char *)R_alloc(2*len+1, sizeof(char));
-	for (i = 0; i < len; i++) {
-		sprintf(str+i*2, "%02X", RAW(a)[i]);
+	PROTECT(dim = getAttrib(a, R_DimSymbol));
+	if (isMatrix(a)) {
+		nr = INTEGER(dim)[0];
+		nc = INTEGER(dim)[1];
+	} else {
+		nr = LENGTH(a);
+		nc = 1;
 	}
-	ans = mkString(str);
+	PROTECT(ans = allocVector(STRSXP, nc));
+	str = (char *)R_alloc(2*nr+1, sizeof(char));
+	str[2*nr] = '\0';
+	for (j = 0; j < nc; j++) {
+		for (i = 0; i < nr; i++) {
+			sprintf(str+i*2, "%02X", RAW(a)[i+j*nr]);
+		}
+		SET_STRING_ELT(ans, j, mkChar(str));
+	}
+	UNPROTECT(2);
 	return ans;
 }
 
@@ -59,20 +73,31 @@
 {
 	SEXP ans;
 	const char *str;
-	int i, a, b, len;
+	int i, j, a, b, len, nc;
 
-	if (!isString(s) || (LENGTH(s) != 1))
-		error(_("argument should be a character vector of length 1"));
-	str = CHAR(STRING_ELT(s, 0));
-	len = strlen(str);
+	if (!isString(s))
+		error(_("argument should be a character vector"));
+	nc = LENGTH(s);
+	if (nc) {
+		str = CHAR(STRING_ELT(s, 0));
+		len = strlen(str);
+	} else {
+		len = 0;
+	}
 	if (len & 1)
 		error(_("string length should be a multiple of 2"));
-	PROTECT(ans = allocVector(RAWSXP, len>>1));
-	for (i = 0; i < len; i += 2) {
-		a = toupper(str[i]); b = toupper(str[i+1]);
-		a = (a >= 'A') ? a - 'A' + 10 : a - '0';
-		b = (b >= 'A') ? b - 'A' + 10 : b - '0';
-		RAW(ans)[i>>1] = (a<<4)+b;
+	for (i = 1; i < nc; i++)
+		if (strlen(CHAR(STRING_ELT(s, i))) != len)
+			error(_("string length mismatch"));
+	PROTECT(ans = allocMatrix(RAWSXP, len>>1, nc));
+	for (j = 0; j < nc; j++) {
+		str = CHAR(STRING_ELT(s, j));
+		for (i = 0; i < len; i += 2) {
+			a = toupper(str[i]); b = toupper(str[i+1]);
+			a = (a >= 'A') ? a - 'A' + 10 : a - '0';
+			b = (b >= 'A') ? b - 'A' + 10 : b - '0';
+			RAW(ans)[j*len + i>>1] = (a<<4)+b;
+		}
 	}
[TRUNCATED]

To get the complete diff run:
    svnlook diff /svnroot/gtdb -r 29


More information about the Gtdb-commits mailing list