[Genabel-commits] r750 - in pkg/GenABEL: . R inst/unitTests src/GAlib
noreply at r-forge.r-project.org
noreply at r-forge.r-project.org
Fri Jul 15 16:23:17 CEST 2011
Author: yurii
Date: 2011-07-15 16:23:17 +0200 (Fri, 15 Jul 2011)
New Revision: 750
Modified:
pkg/GenABEL/CHANGES.LOG
pkg/GenABEL/R/alleleID.R
pkg/GenABEL/R/convert.snp.mach.R
pkg/GenABEL/R/merge.snp.data.R
pkg/GenABEL/R/ss.R
pkg/GenABEL/R/zzz.R
pkg/GenABEL/inst/unitTests/report.html
pkg/GenABEL/inst/unitTests/report.txt
pkg/GenABEL/inst/unitTests/reportSummary.txt
pkg/GenABEL/inst/unitTests/runit.convert.snp.R
pkg/GenABEL/src/GAlib/convert_snp_illumina.cpp
pkg/GenABEL/src/GAlib/convert_snp_merlin.cpp
pkg/GenABEL/src/GAlib/convert_snp_merlin_wslash.cpp
pkg/GenABEL/src/GAlib/convert_snp_tped.cpp
Log:
Allowing monomorphic coding and meaningful merge of monomorphics with polymorphics
Modified: pkg/GenABEL/CHANGES.LOG
===================================================================
--- pkg/GenABEL/CHANGES.LOG 2011-07-08 18:09:30 UTC (rev 749)
+++ pkg/GenABEL/CHANGES.LOG 2011-07-15 14:23:17 UTC (rev 750)
@@ -1,8 +1,14 @@
-*** v. 1.6-8 (2011.07.06)
+*** v. 1.6-8 (2011.07.15)
+Updated the merge.snp.data procedure to allow meaningful
+merging of mono- and poly- codings. Take care -- this is
+new and not tested much yet! You best check few results
+manually after the merge!
+
Added coding<- method to gwaa.data and snp.data classes
-Added monomorphic classes to 'snp.data' and convert.snp.*
+Added I/D encoding and monomorphic classes (AA, TT, GG, CC,
+--, II, DD, ...) to 'snp.data' and convert.snp.*
Updated 'polygenic' with use of 'polylik_eigen' developed by
Gulnara Svischeva. Now 'polygenic' works MUCH faster. The
Modified: pkg/GenABEL/R/alleleID.R
===================================================================
--- pkg/GenABEL/R/alleleID.R 2011-07-08 18:09:30 UTC (rev 749)
+++ pkg/GenABEL/R/alleleID.R 2011-07-15 14:23:17 UTC (rev 750)
@@ -15,7 +15,11 @@
idx <- idx + 1
a[[idx]] <- c("B","A")
idx <- idx + 1
- allalleles <- c("1","2","B","A","T","G","C","-")
+ a[[idx]] <- c("I","D")
+ idx <- idx + 1
+ a[[idx]] <- c("D","I")
+ idx <- idx + 1
+ allalleles <- c("1","2","B","I","D","A","T","G","C","-")
for (jj in allalleles) {
a[[idx]] <- c(jj,jj)
idx <- idx + 1
@@ -37,8 +41,8 @@
a <- alleleID.alleles()
out <- c("OPPA")
idx <- 1
- alleles <- c("A","T","G","C","-")
- reval <- c("T","A","C","G","-")
+ alleles <- c("A","T","G","C","-","I","D")
+ reval <- c("T","A","C","G","-","I","D")
for (i in a) {
x <- match(alleles,i)
if (sum(!is.na(x)) == 2) {
@@ -113,4 +117,48 @@
out
}
-
+translate_mono_coding <- function(mono,poly,strandMono,strandPoly,forcestranduse) {
+ hasallele <- function(mono,poly) {
+ mono <- strsplit(mono,"")[[1]][1]
+ spl <- strsplit(poly,"")
+ #print(spl[[1]][1])
+ #print(spl[[1]][2])
+ #print(mono)
+ if (spl[[1]][1] == mono || spl[[1]][2] == mono) {
+ return(TRUE)
+ } else {
+ return(FALSE)
+ }
+ }
+ swapmonofirst <- function(mono,poly) {
+ mono <- strsplit(mono,"")[[1]][1]
+ spl <- strsplit(poly,"")
+ #print(spl[[1]][1])
+ #print(mono)
+ if (spl[[1]][1] == mono)
+ return(paste(spl[[1]][1],spl[[1]][2],sep=""))
+ else if (spl[[1]][2] == mono)
+ return(paste(spl[[1]][2],spl[[1]][1],sep=""))
+ else
+ stop("swapmonofirst: error")
+ }
+ polyN <- which(alleleID.codes() == poly)
+ poly.reverse <- alleleID.codes.reverse()[polyN]
+ if (!(poly %in% c("AT","TA","GC","CG")) & !forcestranduse) {
+ if (hasallele(mono,poly))
+ return(swapmonofirst(mono,poly))
+ else if (hasallele(mono,poly.reverse))
+ return(swapmonofirst(mono,poly.reverse))
+ } else if (forcestranduse) {
+ if (strandMono=="u" || strandPoly=="u") return(mono)
+ if (strandMono!=strandPoly) {
+ polyN <- which(alleleID.codes() == poly)
+ poly <- alleleID.codes.reverse()[polyN]
+ }
+ if (hasallele(mono,poly))
+ return(swapmonofirst(mono,poly))
+ else
+ return(mono)
+ }
+ return(newC)
+}
Modified: pkg/GenABEL/R/convert.snp.mach.R
===================================================================
--- pkg/GenABEL/R/convert.snp.mach.R 2011-07-08 18:09:30 UTC (rev 749)
+++ pkg/GenABEL/R/convert.snp.mach.R 2011-07-15 14:23:17 UTC (rev 750)
@@ -1,5 +1,11 @@
"convert.snp.mach" <- function(pedfile,mapfile,infofile,outfile,quality=0.9, column.quality = 7, strand="+", ... ) {
-
+
+ cat("****************************************\n")
+ cat("THIS PROCDURE IS NOT RECOMMENDED FOR USE\n")
+ cat("CONSIDER USE OF REGRESSION-BASED PROCEDURES\n")
+ cat("FOR ANALYSIS OF IMPUTED DATA!!!\n\n")
+ cat("****************************************\n")
+
cat("Converting data to raw format...\n")
convert.snp.ped(pedf=pedfile,mapf=mapfile,outf=outfile,format="mach", wslash=T, strand=strand, ... )
if (quality<=0) {
Modified: pkg/GenABEL/R/merge.snp.data.R
===================================================================
--- pkg/GenABEL/R/merge.snp.data.R 2011-07-08 18:09:30 UTC (rev 749)
+++ pkg/GenABEL/R/merge.snp.data.R 2011-07-15 14:23:17 UTC (rev 750)
@@ -127,7 +127,80 @@
which_snp_intersect_in_y <- match(x at snpnames, y at snpnames, nomatch = -1)
which_snp_intersect_in_y <- which_snp_intersect_in_y[which_snp_intersect_in_y > 0]
+##### taking care of monomorphics -- YA, 2011.07.11 #####
+#print(which_snp_intersect_in_x)
+#print(which_snp_intersect_in_y)
+
+monos <- function(data) {
+ ss <- strsplit(data,"")
+ ff <- function(a) {
+ if (a[1]==a[2]) return(TRUE) else return(FALSE)
+ }
+ return(unlist(lapply(ss,FUN=ff)))
+}
+#x_filtered_mono_x_not_mono_y <- which(monos(coding(x)[which_snp_intersect_in_x]) &
+# !monos(coding(y)[which_snp_intersect_in_y]))
+if_mono_x_not_mono_y <- monos(coding(x)[which_snp_intersect_in_x]) &
+ !monos(coding(y)[which_snp_intersect_in_y])
+which_mono_x_not_mono_y <- which_snp_intersect_in_x[if_mono_x_not_mono_y]
+#print(if_mono_x_not_mono_y)
+#print(which_mono_x_not_mono_y)
+if_mono_y_not_mono_x <- monos(coding(y)[which_snp_intersect_in_y]) &
+ !monos(coding(x)[which_snp_intersect_in_x])
+which_mono_y_not_mono_x <- which_snp_intersect_in_y[if_mono_y_not_mono_x]
+#print(if_mono_y_not_mono_x)
+#print(which_mono_y_not_mono_x)
+#print(which_snp_intersect_in_y[x_filtered_mono_x_not_mono_y])
+# here need to check legacy based on coding and strand data e.g.
+# AA(+) -> AG(+), TC(-) good; -> TC(+), AG(-), GC(+/-) no good
+#
+# take care of coding in x
+cdng_x <- coding(x)[which_snp_intersect_in_x[if_mono_x_not_mono_y]]
+cdng_y <- coding(y)[which_snp_intersect_in_y[if_mono_x_not_mono_y]]
+strnd_x <- strand(x)[which_snp_intersect_in_x[if_mono_x_not_mono_y]]
+strnd_y <- strand(y)[which_snp_intersect_in_y[if_mono_x_not_mono_y]]
+#print(cdng_x)
+#print(cdng_y)
+#print(strnd_x)
+#print(strnd_y)
+jjj <- 1
+for (iii in which_mono_x_not_mono_y) {
+# print(iii)
+ mono <- cdng_x[jjj]
+ poly <- cdng_y[jjj]
+ monoStrand <- strnd_x[jjj]
+ polyStrand <- strnd_y[jjj]
+# print(c(mono,poly,monoStrand,polyStrand,forcestranduse))
+ newC <- translate_mono_coding(mono,poly,monoStrand,polyStrand,forcestranduse=forcestranduse)
+ jjj <- jjj + 1
+ coding(x)[iii] <- newC
+# print(newC)
+}
+# take care of coding in y
+cdng_x <- coding(x)[which_snp_intersect_in_x[if_mono_y_not_mono_x]]
+cdng_y <- coding(y)[which_snp_intersect_in_y[if_mono_y_not_mono_x]]
+strnd_x <- strand(x)[which_snp_intersect_in_x[if_mono_y_not_mono_x]]
+strnd_y <- strand(y)[which_snp_intersect_in_y[if_mono_y_not_mono_x]]
+#print(cdng_x)
+#print(cdng_y)
+#print(strnd_x)
+#print(strnd_y)
+jjj <- 1
+for (iii in which_mono_y_not_mono_x) {
+# print(iii)
+ mono <- cdng_y[jjj]
+ poly <- cdng_x[jjj]
+ monoStrand <- strnd_y[jjj]
+ polyStrand <- strnd_x[jjj]
+# print(c(mono,poly,monoStrand,polyStrand,forcestranduse))
+ newC <- translate_mono_coding(mono,poly,monoStrand,polyStrand,forcestranduse=forcestranduse)
+ jjj <- jjj + 1
+ coding(y)[iii] <- newC
+# print(newC)
+}
+##### END taking care of monomorphics ###################
+
if(intersected_snps_only)
{
x_new <- x[,which_snp_intersect_in_x]
Modified: pkg/GenABEL/R/ss.R
===================================================================
--- pkg/GenABEL/R/ss.R 2011-07-08 18:09:30 UTC (rev 749)
+++ pkg/GenABEL/R/ss.R 2011-07-15 14:23:17 UTC (rev 750)
@@ -699,8 +699,8 @@
definition = function(x,value)
{
rawVal <- as.raw(alleleID.char2raw()[value])
+ names(rawVal) <- x at snpnames
x at coding <- new("snp.coding",rawVal)
- names(x at coding) <- x at snpnames
if (any(is.na(coding(x)))) {
cat("wrong coding value, should be one of ")
cat(names(alleleID.char2raw()),"\n")
Modified: pkg/GenABEL/R/zzz.R
===================================================================
--- pkg/GenABEL/R/zzz.R 2011-07-08 18:09:30 UTC (rev 749)
+++ pkg/GenABEL/R/zzz.R 2011-07-15 14:23:17 UTC (rev 750)
@@ -1,6 +1,6 @@
.onLoad <- function(lib, pkg) {
GenABEL.version <- "1.6-8"
- cat("GenABEL v.",GenABEL.version,"(May 18, 2011) loaded\n")
+ cat("GenABEL v.",GenABEL.version,"(July 15, 2011) loaded\n")
# check for updates and news
address <- c(
Modified: pkg/GenABEL/inst/unitTests/report.html
===================================================================
--- pkg/GenABEL/inst/unitTests/report.html 2011-07-08 18:09:30 UTC (rev 749)
+++ pkg/GenABEL/inst/unitTests/report.html 2011-07-15 14:23:17 UTC (rev 750)
@@ -1,8 +1,8 @@
<!DOCTYPE HTML PUBLIC "-//W3C//DTD HTML 4.01 Transitional//EN"
"http://www.w3.org/TR/html4/transitional.dtd">
-<html><head><title>RUNIT TEST PROTOCOL--Wed Jul 6 23:29:32 2011</title>
+<html><head><title>RUNIT TEST PROTOCOL--Fri Jul 15 16:20:40 2011</title>
</head>
-<body><h1 TRUE>RUNIT TEST PROTOCOL--Wed Jul 6 23:29:32 2011</h1>
+<body><h1 TRUE>RUNIT TEST PROTOCOL--Fri Jul 15 16:20:40 2011</h1>
<p>Number of test functions: 10</p>
<p>Number of errors: 0</p>
<p>Number of failures: 0</p>
@@ -23,7 +23,7 @@
<hr>
<h3 TRUE>Details</h3>
<p><a name="GenABEL unit testing"><h5 TRUE>Test Suite: GenABEL unit testing</h5>
-</a>Test function regexp: ^test.+<br/>Test file regexp: ^runit.+\.[rR]$<br/>Involved directory:<br/>/Users/yuryaulchenko/eclipse_workspace/genabel/pkg/GenABEL/tests/../inst/unitTests<br/><ul><li><a href="/Users/yuryaulchenko/eclipse_workspace/genabel/pkg/GenABEL/tests/../inst/unitTests/runit.convert.snp.ped.R">Test file: runit.convert.snp.ped.R</a><ul><li><a name="GenABEL unit testing__Users_yuryaulchenko_eclipse_workspace_genabel_pkg_GenABEL_tests_.._inst_unitTests_runit.convert.snp.ped.R_test.convert.snp.ped">test.convert.snp.ped: (0 checks) ... OK (0.07 seconds)<br/></a></li></ul></li><li><a href="/Users/yuryaulchenko/eclipse_workspace/genabel/pkg/GenABEL/tests/../inst/unitTests/runit.convert.snp.tped.R">Test file: runit.convert.snp.tped.R</a><ul><li><a name="GenABEL unit testing__Users_yuryaulchenko_eclipse_workspace_genabel_pkg_GenABEL_tests_.._inst_unitTests_runit.convert.snp.tped.R_test.convert.snp">test.convert.snp: (0 checks) ... OK (0.71 seconds)<br/></a></li></ul></li><li><a href="/Users/yuryaulchenko/eclipse_workspace/genabel/pkg/GenABEL/tests/../inst/unitTests/runit.descriptives.trait.R">Test file: runit.descriptives.trait.R</a><ul><li><a name="GenABEL unit testing__Users_yuryaulchenko_eclipse_workspace_genabel_pkg_GenABEL_tests_.._inst_unitTests_runit.descriptives.trait.R_test.descriptives.trait">test.descriptives.trait: (1 checks) ... OK (0.41 seconds)<br/></a></li></ul></li><li><a href="/Users/yuryaulchenko/eclipse_workspace/genabel/pkg/GenABEL/tests/../inst/unitTests/runit.findRelatives.R">Test file: runit.findRelatives.R</a><ul><li><a name="GenABEL unit testing__Users_yuryaulchenko_eclipse_workspace_genabel_pkg_GenABEL_tests_.._inst_unitTests_runit.findRelatives.R_test.findRelatives">test.findRelatives: (10 checks) ... OK (55.68 seconds)<br/></a></li></ul></li><li><a href="/Users/yuryaulchenko/eclipse_workspace/genabel/pkg/GenABEL/tests/../inst/unitTests/runit.impute2xxx.R">Test file: runit.impute2xxx.R</a><ul><li><a name="GenABEL unit testing__Users_yuryaulchenko_eclipse_workspace_genabel_pkg_GenABEL_tests_.._inst_unitTests_runit.impute2xxx.R_test.impute2databel">test.impute2databel: (23 checks) ... OK (0.82 seconds)<br/></a></li></ul></li><li><a href="/Users/yuryaulchenko/eclipse_workspace/genabel/pkg/GenABEL/tests/../inst/unitTests/runit.impute2xxx_large.R">Test file: runit.impute2xxx_large.R</a><ul><li><a name="GenABEL unit testing__Users_yuryaulchenko_eclipse_workspace_genabel_pkg_GenABEL_tests_.._inst_unitTests_runit.impute2xxx_large.R_test.impute2xxx_large">test.impute2xxx_large: (0 checks) ... OK (0 seconds)<br/></a></li></ul></li><li><a href="/Users/yuryaulchenko/eclipse_workspace/genabel/pkg/GenABEL/tests/../inst/unitTests/runit.iterator.R">Test file: runit.iterator.R</a><ul><li><a name="GenABEL unit testing__Users_yuryaulchenko_eclipse_workspace_genabel_pkg_GenABEL_tests_.._inst_unitTests_runit.iterator.R_test.qtscore">test.qtscore: (0 checks) ... OK (0 seconds)<br/></a></li><li><a name="GenABEL unit testing__Users_yuryaulchenko_eclipse_workspace_genabel_pkg_GenABEL_tests_.._inst_unitTests_runit.iterator.R_test.summary_snp_data">test.summary_snp_data: (3 checks) ... OK (5.19 seconds)<br/></a></li></ul></li><li><a href="/Users/yuryaulchenko/eclipse_workspace/genabel/pkg/GenABEL/tests/../inst/unitTests/runit.mach2databel.R">Test file: runit.mach2databel.R</a><ul><li><a name="GenABEL unit testing__Users_yuryaulchenko_eclipse_workspace_genabel_pkg_GenABEL_tests_.._inst_unitTests_runit.mach2databel.R_test.mach2databel">test.mach2databel: (8 checks) ... OK (0.15 seconds)<br/></a></li></ul></li><li><a href="/Users/yuryaulchenko/eclipse_workspace/genabel/pkg/GenABEL/tests/../inst/unitTests/runit.polylik.R">Test file: runit.polylik.R</a><ul><li><a name="GenABEL unit testing__Users_yuryaulchenko_eclipse_workspace_genabel_pkg_GenABEL_tests_.._inst_unitTests_runit.polylik.R_test.polylik">test.polylik: (6 checks) ... OK (7.97 seconds)<br/></a></li></ul></li></ul><hr>
+</a>Test function regexp: ^test.+<br/>Test file regexp: ^runit.+\.[rR]$<br/>Involved directory:<br/>/Users/yuryaulchenko/eclipse_workspace/genabel/pkg/GenABEL/tests/../inst/unitTests<br/><ul><li><a href="/Users/yuryaulchenko/eclipse_workspace/genabel/pkg/GenABEL/tests/../inst/unitTests/runit.convert.snp.R">Test file: runit.convert.snp.R</a><ul><li><a name="GenABEL unit testing__Users_yuryaulchenko_eclipse_workspace_genabel_pkg_GenABEL_tests_.._inst_unitTests_runit.convert.snp.R_test.convert.snp">test.convert.snp: (4 checks) ... OK (0.98 seconds)<br/></a></li></ul></li><li><a href="/Users/yuryaulchenko/eclipse_workspace/genabel/pkg/GenABEL/tests/../inst/unitTests/runit.convert.snp.ped.R">Test file: runit.convert.snp.ped.R</a><ul><li><a name="GenABEL unit testing__Users_yuryaulchenko_eclipse_workspace_genabel_pkg_GenABEL_tests_.._inst_unitTests_runit.convert.snp.ped.R_test.convert.snp.ped">test.convert.snp.ped: (0 checks) ... OK (0.02 seconds)<br/></a></li></ul></li><li><a href="/Users/yuryaulchenko/eclipse_workspace/genabel/pkg/GenABEL/tests/../inst/unitTests/runit.descriptives.trait.R">Test file: runit.descriptives.trait.R</a><ul><li><a name="GenABEL unit testing__Users_yuryaulchenko_eclipse_workspace_genabel_pkg_GenABEL_tests_.._inst_unitTests_runit.descriptives.trait.R_test.descriptives.trait">test.descriptives.trait: (1 checks) ... OK (0.38 seconds)<br/></a></li></ul></li><li><a href="/Users/yuryaulchenko/eclipse_workspace/genabel/pkg/GenABEL/tests/../inst/unitTests/runit.findRelatives.R">Test file: runit.findRelatives.R</a><ul><li><a name="GenABEL unit testing__Users_yuryaulchenko_eclipse_workspace_genabel_pkg_GenABEL_tests_.._inst_unitTests_runit.findRelatives.R_test.findRelatives">test.findRelatives: (10 checks) ... OK (77.47 seconds)<br/></a></li></ul></li><li><a href="/Users/yuryaulchenko/eclipse_workspace/genabel/pkg/GenABEL/tests/../inst/unitTests/runit.impute2xxx.R">Test file: runit.impute2xxx.R</a><ul><li><a name="GenABEL unit testing__Users_yuryaulchenko_eclipse_workspace_genabel_pkg_GenABEL_tests_.._inst_unitTests_runit.impute2xxx.R_test.impute2databel">test.impute2databel: (23 checks) ... OK (0.47 seconds)<br/></a></li></ul></li><li><a href="/Users/yuryaulchenko/eclipse_workspace/genabel/pkg/GenABEL/tests/../inst/unitTests/runit.impute2xxx_large.R">Test file: runit.impute2xxx_large.R</a><ul><li><a name="GenABEL unit testing__Users_yuryaulchenko_eclipse_workspace_genabel_pkg_GenABEL_tests_.._inst_unitTests_runit.impute2xxx_large.R_test.impute2xxx_large">test.impute2xxx_large: (0 checks) ... OK (0 seconds)<br/></a></li></ul></li><li><a href="/Users/yuryaulchenko/eclipse_workspace/genabel/pkg/GenABEL/tests/../inst/unitTests/runit.iterator.R">Test file: runit.iterator.R</a><ul><li><a name="GenABEL unit testing__Users_yuryaulchenko_eclipse_workspace_genabel_pkg_GenABEL_tests_.._inst_unitTests_runit.iterator.R_test.qtscore">test.qtscore: (0 checks) ... OK (0 seconds)<br/></a></li><li><a name="GenABEL unit testing__Users_yuryaulchenko_eclipse_workspace_genabel_pkg_GenABEL_tests_.._inst_unitTests_runit.iterator.R_test.summary_snp_data">test.summary_snp_data: (3 checks) ... OK (4.41 seconds)<br/></a></li></ul></li><li><a href="/Users/yuryaulchenko/eclipse_workspace/genabel/pkg/GenABEL/tests/../inst/unitTests/runit.mach2databel.R">Test file: runit.mach2databel.R</a><ul><li><a name="GenABEL unit testing__Users_yuryaulchenko_eclipse_workspace_genabel_pkg_GenABEL_tests_.._inst_unitTests_runit.mach2databel.R_test.mach2databel">test.mach2databel: (8 checks) ... OK (0.12 seconds)<br/></a></li></ul></li><li><a href="/Users/yuryaulchenko/eclipse_workspace/genabel/pkg/GenABEL/tests/../inst/unitTests/runit.polylik.R">Test file: runit.polylik.R</a><ul><li><a name="GenABEL unit testing__Users_yuryaulchenko_eclipse_workspace_genabel_pkg_GenABEL_tests_.._inst_unitTests_runit.polylik.R_test.polylik">test.polylik: (6 checks) ... OK (7.73 seconds)<br/></a></li></ul></li></ul><hr>
<table border="0" width="80%" >
<tr><th>Name</th>
<th>Value</th>
Modified: pkg/GenABEL/inst/unitTests/report.txt
===================================================================
--- pkg/GenABEL/inst/unitTests/report.txt 2011-07-08 18:09:30 UTC (rev 749)
+++ pkg/GenABEL/inst/unitTests/report.txt 2011-07-15 14:23:17 UTC (rev 750)
@@ -1,4 +1,4 @@
-RUNIT TEST PROTOCOL -- Wed Jul 6 23:29:31 2011
+RUNIT TEST PROTOCOL -- Fri Jul 15 16:20:40 2011
***********************************************
Number of test functions: 10
Number of errors: 0
@@ -18,30 +18,30 @@
Involved directory:
/Users/yuryaulchenko/eclipse_workspace/genabel/pkg/GenABEL/tests/../inst/unitTests
---------------------------
+Test file: /Users/yuryaulchenko/eclipse_workspace/genabel/pkg/GenABEL/tests/../inst/unitTests/runit.convert.snp.R
+test.convert.snp: (4 checks) ... OK (0.98 seconds)
+---------------------------
Test file: /Users/yuryaulchenko/eclipse_workspace/genabel/pkg/GenABEL/tests/../inst/unitTests/runit.convert.snp.ped.R
-test.convert.snp.ped: (0 checks) ... OK (0.07 seconds)
+test.convert.snp.ped: (0 checks) ... OK (0.02 seconds)
---------------------------
-Test file: /Users/yuryaulchenko/eclipse_workspace/genabel/pkg/GenABEL/tests/../inst/unitTests/runit.convert.snp.tped.R
-test.convert.snp: (0 checks) ... OK (0.71 seconds)
----------------------------
Test file: /Users/yuryaulchenko/eclipse_workspace/genabel/pkg/GenABEL/tests/../inst/unitTests/runit.descriptives.trait.R
-test.descriptives.trait: (1 checks) ... OK (0.41 seconds)
+test.descriptives.trait: (1 checks) ... OK (0.38 seconds)
---------------------------
Test file: /Users/yuryaulchenko/eclipse_workspace/genabel/pkg/GenABEL/tests/../inst/unitTests/runit.findRelatives.R
-test.findRelatives: (10 checks) ... OK (55.68 seconds)
+test.findRelatives: (10 checks) ... OK (77.47 seconds)
---------------------------
Test file: /Users/yuryaulchenko/eclipse_workspace/genabel/pkg/GenABEL/tests/../inst/unitTests/runit.impute2xxx.R
-test.impute2databel: (23 checks) ... OK (0.82 seconds)
+test.impute2databel: (23 checks) ... OK (0.47 seconds)
---------------------------
Test file: /Users/yuryaulchenko/eclipse_workspace/genabel/pkg/GenABEL/tests/../inst/unitTests/runit.impute2xxx_large.R
test.impute2xxx_large: (0 checks) ... OK (0 seconds)
---------------------------
Test file: /Users/yuryaulchenko/eclipse_workspace/genabel/pkg/GenABEL/tests/../inst/unitTests/runit.iterator.R
test.qtscore: (0 checks) ... OK (0 seconds)
-test.summary_snp_data: (3 checks) ... OK (5.19 seconds)
+test.summary_snp_data: (3 checks) ... OK (4.41 seconds)
---------------------------
Test file: /Users/yuryaulchenko/eclipse_workspace/genabel/pkg/GenABEL/tests/../inst/unitTests/runit.mach2databel.R
-test.mach2databel: (8 checks) ... OK (0.15 seconds)
+test.mach2databel: (8 checks) ... OK (0.12 seconds)
---------------------------
Test file: /Users/yuryaulchenko/eclipse_workspace/genabel/pkg/GenABEL/tests/../inst/unitTests/runit.polylik.R
-test.polylik: (6 checks) ... OK (7.97 seconds)
+test.polylik: (6 checks) ... OK (7.73 seconds)
Modified: pkg/GenABEL/inst/unitTests/reportSummary.txt
===================================================================
--- pkg/GenABEL/inst/unitTests/reportSummary.txt 2011-07-08 18:09:30 UTC (rev 749)
+++ pkg/GenABEL/inst/unitTests/reportSummary.txt 2011-07-15 14:23:17 UTC (rev 750)
@@ -1,4 +1,4 @@
-RUNIT TEST PROTOCOL -- Wed Jul 6 23:29:31 2011
+RUNIT TEST PROTOCOL -- Fri Jul 15 16:20:40 2011
***********************************************
Number of test functions: 10
Number of errors: 0
Modified: pkg/GenABEL/inst/unitTests/runit.convert.snp.R
===================================================================
--- pkg/GenABEL/inst/unitTests/runit.convert.snp.R 2011-07-08 18:09:30 UTC (rev 749)
+++ pkg/GenABEL/inst/unitTests/runit.convert.snp.R 2011-07-15 14:23:17 UTC (rev 750)
@@ -22,37 +22,44 @@
test.convert.snp <- function()
{
- library(GenABEL)
- convert.snp.tped(tped="gatest.tped", tfam="gatest.tfam", out="gatest.raw", strand="+")
+ # library(GenABEL)
+ # check equivalence of convertion
+ dta <- read.table("gatest.tped",sep="\t",head=FALSE,strings=FALSE)
+ dta <- dta[,-c(1:4)]
+ dta <- t(dta)
+ dta <- sub(" ","/",dta)
+ dta <- sub("0/0",NA,dta)
+ dimnames(dta) <- NULL
+
+ convert.snp.tped(tped="gatest.tped", tfam="gatest.tfam", out="gatest.raw")
data <- load.gwaa.data(gen="gatest.raw", phe="gatest.phe")
- data
- coding(data)
- as.character(data)
- as.numeric(data)
+ dta1 <- as.character(data)
+ dimnames(dta1) <- NULL
+ checkEquals(dta,dta1)
+
+ convert.snp.ped(pedfile="gatest.ped", mapfile="gatest.map", out="gatest.raw",mapHasHeaderLine=FALSE)
+ data <- load.gwaa.data(gen="gatest.raw", phe="gatest.phe")
+ dta1 <- as.character(data)
+ dimnames(dta1) <- NULL
+ checkEquals(dta,dta1)
+
+ convert.snp.illumina(infile="gatest.illu", out="gatest.raw")
+ data <- load.gwaa.data(gen="gatest.raw", phe="gatest.phe")
+ dta1 <- as.character(data)
+ dimnames(dta1) <- NULL
+ checkEquals(dta,dta1)
+
+ # check possibility of merge
+
convert.snp.tped(tped="gatest1.tped", tfam="gatest1.tfam", out="gatest1.raw", strand="+")
data1 <- load.gwaa.data(gen="gatest1.raw", phe="gatest1.phe")
- data1
+ data1 <- data1[,sample(1:nsnps(data1),nsnps(data1))]
coding(data1)
- as.character(data1)
- as.numeric(data1)
mrg <- merge(gtdata(data),gtdata(data1))
- mrg
- coding(mrg$data)
- as.character(mrg$data)
- as.numeric(mrg$data)
+ mrg1 <- merge(data,data1)
+ checkEquals(as.character(mrg$data),as.character(gtdata(mrg1)))
- coding(data)
- class(data)
- coding(data) <- coding(data1)
- class(data)
- coding(data)
- coding(data1)
- as.character(data)
- as.character(data1)
- mrg <- merge(gtdata(data),gtdata(data1))
- mrg
- coding(mrg$data)
- as.character(mrg$data)
- as.numeric(mrg$data)
+ # here evaluate merged SNP one by one?
+
unlink("*.raw")
}
\ No newline at end of file
Modified: pkg/GenABEL/src/GAlib/convert_snp_illumina.cpp
===================================================================
--- pkg/GenABEL/src/GAlib/convert_snp_illumina.cpp 2011-07-08 18:09:30 UTC (rev 749)
+++ pkg/GenABEL/src/GAlib/convert_snp_illumina.cpp 2011-07-15 14:23:17 UTC (rev 750)
@@ -33,274 +33,274 @@
* strand == 2 -> minus ('-')
* strand == 3 -> four columns (name,chr,pos,strand) expected at the beginning
*
-**/
+ **/
extern "C" {
- void convert_snp_illumina (char** filename, char** outfilename, int* Strandid, int* Bcast, char **allele_codes, int* Ncodes) {
+void convert_snp_illumina (char** filename, char** outfilename, int* Strandid, int* Bcast, char **allele_codes, int* Ncodes) {
- short unsigned int ncodes = *Ncodes;
- short unsigned int strandid = *Strandid;
- long unsigned int bcast = *Bcast;
+ short unsigned int ncodes = *Ncodes;
+ short unsigned int strandid = *Strandid;
+ long unsigned int bcast = *Bcast;
- bool loud = TRUE;
- if (bcast <= 0) loud = FALSE;
+ bool loud = TRUE;
+ if (bcast <= 0) loud = FALSE;
- long unsigned int nsnps=0;
- long unsigned int nids=0;
- unsigned long int nbytes=0;
- unsigned long int byte;
+ long unsigned int nsnps=0;
+ long unsigned int nids=0;
+ unsigned long int nbytes=0;
+ unsigned long int byte;
- string data;
- string tempstr;
+ string data;
+ string tempstr;
- vector<string> iid;
- vector<string> chrom; string tmp_chrom;
- vector<string> snpnam; string tmp_snpnam;
- vector<unsigned long> map; unsigned long tmp_map;
- vector<unsigned char*> gtype; unsigned char* tmp_gtype;
- vector<string> coding; string tmp_coding,tmp_coding1;
- vector<unsigned short int> intcoding;
- vector<unsigned short int> strand; string tmp_strand;
- vector<string> codeset(ncodes);
- char tmp_chcoding [10];
+ vector<string> iid;
+ vector<string> chrom; string tmp_chrom;
+ vector<string> snpnam; string tmp_snpnam;
+ vector<unsigned long> map; unsigned long tmp_map;
+ vector<unsigned char*> gtype; unsigned char* tmp_gtype;
+ vector<string> coding; string tmp_coding,tmp_coding1;
+ vector<unsigned short int> intcoding;
+ vector<unsigned short int> strand; string tmp_strand;
+ vector<string> codeset(ncodes);
+ char tmp_chcoding [10];
- for (int i=0;i<ncodes;i++) codeset[i].assign(allele_codes[i]);
+ for (int i=0;i<ncodes;i++) codeset[i].assign(allele_codes[i]);
- ifstream illfile (filename[0]);
- if (illfile == NULL) {
- error ("could not open file '%s'!",filename[0]);
- }
+ ifstream illfile (filename[0]);
+ if (illfile == NULL) {
+ error ("could not open file '%s'!",filename[0]);
+ }
- if (loud) {
- Rprintf("Reading genotypes from file '%s' ...\n",filename[0]);
- }
+ if (loud) {
+ Rprintf("Reading genotypes from file '%s' ...\n",filename[0]);
+ }
- nsnps = 0;
- unsigned long int lasti = 1;
+ nsnps = 0;
+ unsigned long int lasti = 1;
- //
- // Processing header line
- //
+ //
+ // Processing header line
+ //
- if (getline(illfile,data)) {
- istringstream datas (data);
- datas >> tempstr >> tempstr >> tempstr;
- if (strandid == 3) datas >> tempstr;
- while (datas >> tempstr) {
- iid.push_back(tempstr);
- nids++;
- }
- } else {
- error("Can not read the first line from file '%s'!\n",filename[0]);
- }
+ if (getline(illfile,data)) {
+ istringstream datas (data);
+ datas >> tempstr >> tempstr >> tempstr;
+ if (strandid == 3) datas >> tempstr;
+ while (datas >> tempstr) {
+ iid.push_back(tempstr);
+ nids++;
+ }
+ } else {
+ error("Can not read the first line from file '%s'!\n",filename[0]);
+ }
- char* chgt = new char [nids*2];
- nbytes = (unsigned long int)ceil((double)nids/4.);
+ char* chgt = new char [nids*2];
+ nbytes = (unsigned long int)ceil((double)nids/4.);
- //
- //Processing rest of the file
- //
+ //
+ //Processing rest of the file
+ //
- while (getline(illfile,data)) {
- nsnps++;
+ while (getline(illfile,data)) {
+ nsnps++;
- istringstream datas (data);
+ istringstream datas (data);
- char gdata;
+ char gdata;
- if (!(datas >> tmp_snpnam >> tmp_chrom >> tmp_map))
- error("First three fields are missing in line %i!\n",(nsnps+1));
- else {
- chrom.push_back(tmp_chrom);
- snpnam.push_back(tmp_snpnam);
- map.push_back(tmp_map);
- if (strandid == 3) {
- if (!(datas >> tmp_strand)) error("Strand field is missing in line %i, SNP '%s'!\n",(nsnps+1),tmp_snpnam.c_str());
- if (tmp_strand == "u") strand.push_back(0);
- else if (tmp_strand == "+") strand.push_back(1);
- else if (tmp_strand == "-") strand.push_back(2);
- else error("Bad strand coding ('%s'), only '+', '-' or 'u' is accepted!\n",tmp_strand.c_str());
- } else strand.push_back(strandid);
+ if (!(datas >> tmp_snpnam >> tmp_chrom >> tmp_map))
+ error("First three fields are missing in line %i!\n",(nsnps+1));
+ else {
+ chrom.push_back(tmp_chrom);
+ snpnam.push_back(tmp_snpnam);
+ map.push_back(tmp_map);
+ if (strandid == 3) {
+ if (!(datas >> tmp_strand)) error("Strand field is missing in line %i, SNP '%s'!\n",(nsnps+1),tmp_snpnam.c_str());
+ if (tmp_strand == "u") strand.push_back(0);
+ else if (tmp_strand == "+") strand.push_back(1);
+ else if (tmp_strand == "-") strand.push_back(2);
+ else error("Bad strand coding ('%s'), only '+', '-' or 'u' is accepted!\n",tmp_strand.c_str());
+ } else strand.push_back(strandid);
- unsigned long int idx = 0;
+ unsigned long int idx = 0;
- idx = 0;
- for (unsigned long int i = 0;i<nids;i++) {
- if (!(datas >> tempstr)) error("Too few fields for SNP '%s', line %i (no. IDs = %li, no. fields = %li)!\n",snpnam[nsnps-1].c_str(),(nsnps+1),nids,(i+1));
- char sd[10];
- sprintf(sd,"%s",tempstr.c_str());
- chgt[idx++] = sd[0];
- chgt[idx++] = sd[1];
- }
+ idx = 0;
+ for (unsigned long int i = 0;i<nids;i++) {
+ if (!(datas >> tempstr)) error("Too few fields for SNP '%s', line %i (no. IDs = %li, no. fields = %li)!\n",snpnam[nsnps-1].c_str(),(nsnps+1),nids,(i+1));
+ char sd[10];
+ sprintf(sd,"%s",tempstr.c_str());
+ chgt[idx++] = sd[0];
+ chgt[idx++] = sd[1];
+ }
- //
- //Processing SNP string
- //
- unsigned short int* gnum = new unsigned short int [nids*2];
- unsigned short int ind;
- unsigned short int offset[4] = {6,4,2,0};
- char allele1 = 0;
- char allele2 = 0;
- unsigned long int ca1 = 0;
- unsigned long int ca2 = 0;
- for (unsigned long int idx = 0; idx < 2*nids; idx++) {
+ //
+ //Processing SNP string
+ //
+ unsigned short int* gnum = new unsigned short int [nids*2];
+ unsigned short int ind;
+ unsigned short int offset[4] = {6,4,2,0};
+ char allele1 = 0;
+ char allele2 = 0;
+ unsigned long int ca1 = 0;
+ unsigned long int ca2 = 0;
+ for (unsigned long int idx = 0; idx < 2*nids; idx++) {
- gdata = chgt[idx];
- if (gdata == allele1) {
- gnum[idx] = 1;
- ca1++;
- } else if (gdata == allele2) {
- gnum[idx] = 3;
- ca2++;
- } else if (gdata == '0' || gdata == '-') {
- gnum[idx] = 0;
- } else {
- if (allele1 == 0) {
- allele1 = gdata;
- gnum[idx] = 1;
- ca1++;
- } else if (allele2 == 0) {
- allele2 = gdata;
- gnum[idx] = 3;
- ca2++;
- } else {
- error ("illegal genotype (three alleles) for SNP '%s' (line %li)!",
- snpnam[nsnps-1].c_str(),(nsnps+1));
- }
- }
- }
+ gdata = chgt[idx];
+ if (gdata == allele1) {
+ gnum[idx] = 1;
+ ca1++;
+ } else if (gdata == allele2) {
+ gnum[idx] = 3;
+ ca2++;
+ } else if (gdata == '0' || gdata == '-') {
+ gnum[idx] = 0;
+ } else {
+ if (allele1 == 0) {
+ allele1 = gdata;
+ gnum[idx] = 1;
+ ca1++;
+ } else if (allele2 == 0) {
+ allele2 = gdata;
+ gnum[idx] = 3;
+ ca2++;
+ } else {
+ error ("illegal genotype (three alleles) for SNP '%s' (line %li)!",
+ snpnam[nsnps-1].c_str(),(nsnps+1));
+ }
+ }
+ }
- if (!allele1 && !allele2) tmp_coding="12"; // all genotypes missing
- else if (!allele1 && allele2) sprintf(tmp_chcoding,"%c%c",allele2,allele2); // only one allele present
- else if (allele1 && !allele2) sprintf(tmp_chcoding,"%c%c",allele1,allele1); // only one allele present
- else if (allele1 && allele2) {
- if (ca1 > ca2) sprintf(tmp_chcoding,"%c%c",allele1,allele2);
- else sprintf(tmp_chcoding,"%c%c",allele2,allele1);
- }
- Rprintf("%s\n",tmp_chcoding);
- tmp_coding.assign(tmp_chcoding);
- int ccd = -1;
- for (int i = 0; i < ncodes; i++) {
- if (codeset[i].compare(tmp_coding)==0) {
- ccd = i + 1;
- intcoding.push_back(ccd);
- }
- }
- if (ccd<0) error ("coding '%s' for SNP not recognised !\n",tmp_coding.c_str());
- try {
- tmp_gtype = new unsigned char [nbytes];
- }
- catch (bad_alloc) {
- error ("ran out of memory reading file '%s' line %li !");
- }
- if (tmp_gtype == NULL) {
- error ("ran out of memory reading file '%s' line %li !");
- }
+ if (!allele1 && !allele2) tmp_coding="12"; // all genotypes missing
+ else if (!allele1 && allele2) sprintf(tmp_chcoding,"%c%c",allele2,allele2); // only one allele present
+ else if (allele1 && !allele2) sprintf(tmp_chcoding,"%c%c",allele1,allele1); // only one allele present
+ else if (allele1 && allele2) {
+ if (ca1 > ca2) sprintf(tmp_chcoding,"%c%c",allele1,allele2);
+ else sprintf(tmp_chcoding,"%c%c",allele2,allele1);
+ }
+ //Rprintf("%s\n",tmp_chcoding);
+ tmp_coding.assign(tmp_chcoding);
+ int ccd = -1;
+ for (int i = 0; i < ncodes; i++) {
+ if (codeset[i].compare(tmp_coding)==0) {
+ ccd = i + 1;
+ intcoding.push_back(ccd);
+ }
+ }
+ if (ccd<0) error ("coding '%s' for SNP not recognised !\n",tmp_coding.c_str());
+ try {
+ tmp_gtype = new unsigned char [nbytes];
+ }
+ catch (bad_alloc) {
+ error ("ran out of memory reading file '%s' line %li !");
+ }
+ if (tmp_gtype == NULL) {
+ error ("ran out of memory reading file '%s' line %li !");
+ }
- idx = 0;
- for (byte = 0; byte < nbytes; ++byte) {
+ idx = 0;
+ for (byte = 0; byte < nbytes; ++byte) {
- tmp_gtype[byte] = 0;
- for (ind = 0; ind < 4; ++ind) {
+ tmp_gtype[byte] = 0;
+ for (ind = 0; ind < 4; ++ind) {
- switch (gnum[idx]+gnum[idx+1]) {
- case 2:
- if (ca1 > ca2)
- tmp_gtype[byte] = tmp_gtype[byte] | ((unsigned char)1 << offset[ind]);
- else
- tmp_gtype[byte] = tmp_gtype[byte] | ((unsigned char)3 << offset[ind]);
- break;
- case 4:
[TRUNCATED]
To get the complete diff run:
svnlook diff /svnroot/genabel -r 750
More information about the Genabel-commits
mailing list