[Genabel-commits] r750 - in pkg/GenABEL: . R inst/unitTests src/GAlib

noreply at r-forge.r-project.org noreply at r-forge.r-project.org
Fri Jul 15 16:23:17 CEST 2011


Author: yurii
Date: 2011-07-15 16:23:17 +0200 (Fri, 15 Jul 2011)
New Revision: 750

Modified:
   pkg/GenABEL/CHANGES.LOG
   pkg/GenABEL/R/alleleID.R
   pkg/GenABEL/R/convert.snp.mach.R
   pkg/GenABEL/R/merge.snp.data.R
   pkg/GenABEL/R/ss.R
   pkg/GenABEL/R/zzz.R
   pkg/GenABEL/inst/unitTests/report.html
   pkg/GenABEL/inst/unitTests/report.txt
   pkg/GenABEL/inst/unitTests/reportSummary.txt
   pkg/GenABEL/inst/unitTests/runit.convert.snp.R
   pkg/GenABEL/src/GAlib/convert_snp_illumina.cpp
   pkg/GenABEL/src/GAlib/convert_snp_merlin.cpp
   pkg/GenABEL/src/GAlib/convert_snp_merlin_wslash.cpp
   pkg/GenABEL/src/GAlib/convert_snp_tped.cpp
Log:
Allowing monomorphic coding and meaningful merge of monomorphics with polymorphics

Modified: pkg/GenABEL/CHANGES.LOG
===================================================================
--- pkg/GenABEL/CHANGES.LOG	2011-07-08 18:09:30 UTC (rev 749)
+++ pkg/GenABEL/CHANGES.LOG	2011-07-15 14:23:17 UTC (rev 750)
@@ -1,8 +1,14 @@
-*** v. 1.6-8 (2011.07.06)
+*** v. 1.6-8 (2011.07.15)
 
+Updated the merge.snp.data procedure to allow meaningful 
+merging of mono- and poly- codings. Take care -- this is 
+new and not tested much yet! You best check few results 
+manually after the merge!
+
 Added coding<- method to gwaa.data and snp.data classes
 
-Added monomorphic classes to 'snp.data' and convert.snp.*
+Added I/D encoding and monomorphic classes (AA, TT, GG, CC, 
+--, II, DD, ...) to 'snp.data' and convert.snp.*
 
 Updated 'polygenic' with use of 'polylik_eigen' developed by 
 Gulnara Svischeva. Now 'polygenic' works MUCH faster. The 

Modified: pkg/GenABEL/R/alleleID.R
===================================================================
--- pkg/GenABEL/R/alleleID.R	2011-07-08 18:09:30 UTC (rev 749)
+++ pkg/GenABEL/R/alleleID.R	2011-07-15 14:23:17 UTC (rev 750)
@@ -15,7 +15,11 @@
 	idx <- idx + 1
 	a[[idx]] <- c("B","A")
 	idx <- idx + 1
-	allalleles <- c("1","2","B","A","T","G","C","-")
+	a[[idx]] <- c("I","D")
+	idx <- idx + 1
+	a[[idx]] <- c("D","I")
+	idx <- idx + 1
+	allalleles <- c("1","2","B","I","D","A","T","G","C","-")
 	for (jj in allalleles) {
 		a[[idx]] <- c(jj,jj)
 		idx <- idx + 1
@@ -37,8 +41,8 @@
 	a <- alleleID.alleles()
 	out <- c("OPPA")
 	idx <- 1
-	alleles <- c("A","T","G","C","-")
-	reval <- c("T","A","C","G","-")
+	alleles <- c("A","T","G","C","-","I","D")
+	reval <- c("T","A","C","G","-","I","D")
 	for (i in a) {
 		x <- match(alleles,i)
 		if (sum(!is.na(x)) == 2) {
@@ -113,4 +117,48 @@
 	out
 }
 
-
+translate_mono_coding <- function(mono,poly,strandMono,strandPoly,forcestranduse) {
+	hasallele <- function(mono,poly) {
+		mono <- strsplit(mono,"")[[1]][1]
+		spl <- strsplit(poly,"")
+		#print(spl[[1]][1])
+		#print(spl[[1]][2])
+		#print(mono)
+		if (spl[[1]][1] == mono || spl[[1]][2] == mono) { 
+			return(TRUE)
+		} else {
+			return(FALSE)
+		}
+	}
+	swapmonofirst <- function(mono,poly) {
+		mono <- strsplit(mono,"")[[1]][1]
+		spl <- strsplit(poly,"")
+		#print(spl[[1]][1])
+		#print(mono)
+		if (spl[[1]][1] == mono)
+			return(paste(spl[[1]][1],spl[[1]][2],sep="")) 
+		else if (spl[[1]][2] == mono) 
+			return(paste(spl[[1]][2],spl[[1]][1],sep="")) 
+		else 
+			stop("swapmonofirst: error")
+	}
+	polyN <- which(alleleID.codes() == poly)
+	poly.reverse <- alleleID.codes.reverse()[polyN]
+	if (!(poly %in% c("AT","TA","GC","CG")) & !forcestranduse) {
+		if (hasallele(mono,poly)) 
+			return(swapmonofirst(mono,poly))
+		else if (hasallele(mono,poly.reverse))
+			return(swapmonofirst(mono,poly.reverse))
+	} else if (forcestranduse) {
+		if (strandMono=="u" || strandPoly=="u") return(mono)
+		if (strandMono!=strandPoly) {
+			polyN <- which(alleleID.codes() == poly)
+			poly <- alleleID.codes.reverse()[polyN]
+		}
+		if (hasallele(mono,poly)) 
+			return(swapmonofirst(mono,poly))
+		else 
+			return(mono)
+	}
+	return(newC)
+}

Modified: pkg/GenABEL/R/convert.snp.mach.R
===================================================================
--- pkg/GenABEL/R/convert.snp.mach.R	2011-07-08 18:09:30 UTC (rev 749)
+++ pkg/GenABEL/R/convert.snp.mach.R	2011-07-15 14:23:17 UTC (rev 750)
@@ -1,5 +1,11 @@
 "convert.snp.mach" <- function(pedfile,mapfile,infofile,outfile,quality=0.9, column.quality = 7, strand="+", ... ) {
-
+	
+	cat("****************************************\n")
+	cat("THIS PROCDURE IS NOT RECOMMENDED FOR USE\n")
+	cat("CONSIDER USE OF REGRESSION-BASED PROCEDURES\n")
+	cat("FOR ANALYSIS OF IMPUTED DATA!!!\n\n")
+	cat("****************************************\n")
+	
 	cat("Converting data to raw format...\n")
 	convert.snp.ped(pedf=pedfile,mapf=mapfile,outf=outfile,format="mach", wslash=T, strand=strand, ... )
 	if (quality<=0) {

Modified: pkg/GenABEL/R/merge.snp.data.R
===================================================================
--- pkg/GenABEL/R/merge.snp.data.R	2011-07-08 18:09:30 UTC (rev 749)
+++ pkg/GenABEL/R/merge.snp.data.R	2011-07-15 14:23:17 UTC (rev 750)
@@ -127,7 +127,80 @@
 which_snp_intersect_in_y <- match(x at snpnames, y at snpnames, nomatch = -1)
 which_snp_intersect_in_y <- which_snp_intersect_in_y[which_snp_intersect_in_y > 0]
 
+##### taking care of monomorphics -- YA, 2011.07.11 #####
 
+#print(which_snp_intersect_in_x)
+#print(which_snp_intersect_in_y)
+
+monos <- function(data) {
+	ss <- strsplit(data,"")
+	ff <- function(a) {
+		if (a[1]==a[2]) return(TRUE) else return(FALSE)
+	}
+	return(unlist(lapply(ss,FUN=ff)))
+}
+#x_filtered_mono_x_not_mono_y <- which(monos(coding(x)[which_snp_intersect_in_x]) & 
+#				!monos(coding(y)[which_snp_intersect_in_y]))
+if_mono_x_not_mono_y <- monos(coding(x)[which_snp_intersect_in_x]) & 
+				!monos(coding(y)[which_snp_intersect_in_y])
+which_mono_x_not_mono_y <- which_snp_intersect_in_x[if_mono_x_not_mono_y]
+#print(if_mono_x_not_mono_y)
+#print(which_mono_x_not_mono_y)
+if_mono_y_not_mono_x <- monos(coding(y)[which_snp_intersect_in_y]) & 
+				!monos(coding(x)[which_snp_intersect_in_x])
+which_mono_y_not_mono_x <- which_snp_intersect_in_y[if_mono_y_not_mono_x]
+#print(if_mono_y_not_mono_x)
+#print(which_mono_y_not_mono_x)
+#print(which_snp_intersect_in_y[x_filtered_mono_x_not_mono_y])
+# here need to check legacy based on coding and strand data e.g. 
+# AA(+) -> AG(+), TC(-) good; -> TC(+), AG(-), GC(+/-) no good
+#
+# take care of coding in x
+cdng_x <- coding(x)[which_snp_intersect_in_x[if_mono_x_not_mono_y]]
+cdng_y <- coding(y)[which_snp_intersect_in_y[if_mono_x_not_mono_y]]
+strnd_x <- strand(x)[which_snp_intersect_in_x[if_mono_x_not_mono_y]]
+strnd_y <- strand(y)[which_snp_intersect_in_y[if_mono_x_not_mono_y]]
+#print(cdng_x)
+#print(cdng_y)
+#print(strnd_x)
+#print(strnd_y)
+jjj <- 1
+for (iii in which_mono_x_not_mono_y) {
+#	print(iii)
+	mono <- cdng_x[jjj]
+	poly <- cdng_y[jjj]
+	monoStrand <- strnd_x[jjj]
+	polyStrand <- strnd_y[jjj]
+#	print(c(mono,poly,monoStrand,polyStrand,forcestranduse))
+	newC <- translate_mono_coding(mono,poly,monoStrand,polyStrand,forcestranduse=forcestranduse)
+	jjj <- jjj + 1 
+	coding(x)[iii] <- newC
+#	print(newC)
+}
+# take care of coding in y
+cdng_x <- coding(x)[which_snp_intersect_in_x[if_mono_y_not_mono_x]]
+cdng_y <- coding(y)[which_snp_intersect_in_y[if_mono_y_not_mono_x]]
+strnd_x <- strand(x)[which_snp_intersect_in_x[if_mono_y_not_mono_x]]
+strnd_y <- strand(y)[which_snp_intersect_in_y[if_mono_y_not_mono_x]]
+#print(cdng_x)
+#print(cdng_y)
+#print(strnd_x)
+#print(strnd_y)
+jjj <- 1
+for (iii in which_mono_y_not_mono_x) {
+#	print(iii)
+	mono <- cdng_y[jjj]
+	poly <- cdng_x[jjj]
+	monoStrand <- strnd_y[jjj]
+	polyStrand <- strnd_x[jjj]
+#	print(c(mono,poly,monoStrand,polyStrand,forcestranduse))
+	newC <- translate_mono_coding(mono,poly,monoStrand,polyStrand,forcestranduse=forcestranduse)
+	jjj <- jjj + 1 
+	coding(y)[iii] <- newC
+#	print(newC)
+}
+##### END taking care of monomorphics ###################
+
 if(intersected_snps_only) 
 	{
 	x_new <- x[,which_snp_intersect_in_x]

Modified: pkg/GenABEL/R/ss.R
===================================================================
--- pkg/GenABEL/R/ss.R	2011-07-08 18:09:30 UTC (rev 749)
+++ pkg/GenABEL/R/ss.R	2011-07-15 14:23:17 UTC (rev 750)
@@ -699,8 +699,8 @@
 		definition = function(x,value) 
 		{
 			rawVal <- as.raw(alleleID.char2raw()[value])
+			names(rawVal) <- x at snpnames
 			x at coding <- new("snp.coding",rawVal)
-			names(x at coding) <- x at snpnames
 			if (any(is.na(coding(x)))) {
 				cat("wrong coding value, should be one of ")
 				cat(names(alleleID.char2raw()),"\n")

Modified: pkg/GenABEL/R/zzz.R
===================================================================
--- pkg/GenABEL/R/zzz.R	2011-07-08 18:09:30 UTC (rev 749)
+++ pkg/GenABEL/R/zzz.R	2011-07-15 14:23:17 UTC (rev 750)
@@ -1,6 +1,6 @@
 .onLoad <- function(lib, pkg) {
 	GenABEL.version <- "1.6-8"
-	cat("GenABEL v.",GenABEL.version,"(May 18, 2011) loaded\n")
+	cat("GenABEL v.",GenABEL.version,"(July 15, 2011) loaded\n")
 	
 	# check for updates and news
 	address <- c(

Modified: pkg/GenABEL/inst/unitTests/report.html
===================================================================
--- pkg/GenABEL/inst/unitTests/report.html	2011-07-08 18:09:30 UTC (rev 749)
+++ pkg/GenABEL/inst/unitTests/report.html	2011-07-15 14:23:17 UTC (rev 750)
@@ -1,8 +1,8 @@
 <!DOCTYPE HTML PUBLIC "-//W3C//DTD HTML 4.01 Transitional//EN"
 "http://www.w3.org/TR/html4/transitional.dtd">
-<html><head><title>RUNIT TEST PROTOCOL--Wed Jul  6 23:29:32 2011</title>
+<html><head><title>RUNIT TEST PROTOCOL--Fri Jul 15 16:20:40 2011</title>
 </head>
-<body><h1 TRUE>RUNIT TEST PROTOCOL--Wed Jul  6 23:29:32 2011</h1>
+<body><h1 TRUE>RUNIT TEST PROTOCOL--Fri Jul 15 16:20:40 2011</h1>
 <p>Number of test functions: 10</p>
 <p>Number of errors: 0</p>
 <p>Number of failures: 0</p>
@@ -23,7 +23,7 @@
 <hr>
 <h3 TRUE>Details</h3>
 <p><a name="GenABEL unit testing"><h5 TRUE>Test Suite: GenABEL unit testing</h5>
-</a>Test function regexp: ^test.+<br/>Test file regexp: ^runit.+\.[rR]$<br/>Involved directory:<br/>/Users/yuryaulchenko/eclipse_workspace/genabel/pkg/GenABEL/tests/../inst/unitTests<br/><ul><li><a href="/Users/yuryaulchenko/eclipse_workspace/genabel/pkg/GenABEL/tests/../inst/unitTests/runit.convert.snp.ped.R">Test file: runit.convert.snp.ped.R</a><ul><li><a name="GenABEL unit testing__Users_yuryaulchenko_eclipse_workspace_genabel_pkg_GenABEL_tests_.._inst_unitTests_runit.convert.snp.ped.R_test.convert.snp.ped">test.convert.snp.ped: (0 checks) ... OK (0.07 seconds)<br/></a></li></ul></li><li><a href="/Users/yuryaulchenko/eclipse_workspace/genabel/pkg/GenABEL/tests/../inst/unitTests/runit.convert.snp.tped.R">Test file: runit.convert.snp.tped.R</a><ul><li><a name="GenABEL unit testing__Users_yuryaulchenko_eclipse_workspace_genabel_pkg_GenABEL_tests_.._inst_unitTests_runit.convert.snp.tped.R_test.convert.snp">test.convert.snp: (0 checks) ... OK (0.71 seconds)<br/></a></li></ul></li><li><a href="/Users/yuryaulchenko/eclipse_workspace/genabel/pkg/GenABEL/tests/../inst/unitTests/runit.descriptives.trait.R">Test file: runit.descriptives.trait.R</a><ul><li><a name="GenABEL unit testing__Users_yuryaulchenko_eclipse_workspace_genabel_pkg_GenABEL_tests_.._inst_unitTests_runit.descriptives.trait.R_test.descriptives.trait">test.descriptives.trait: (1 checks) ... OK (0.41 seconds)<br/></a></li></ul></li><li><a href="/Users/yuryaulchenko/eclipse_workspace/genabel/pkg/GenABEL/tests/../inst/unitTests/runit.findRelatives.R">Test file: runit.findRelatives.R</a><ul><li><a name="GenABEL unit testing__Users_yuryaulchenko_eclipse_workspace_genabel_pkg_GenABEL_tests_.._inst_unitTests_runit.findRelatives.R_test.findRelatives">test.findRelatives: (10 checks) ... OK (55.68 seconds)<br/></a></li></ul></li><li><a href="/Users/yuryaulchenko/eclipse_workspace/genabel/pkg/GenABEL/tests/../inst/unitTests/runit.impute2xxx.R">Test file: runit.impute2xxx.R</a><ul><li><a name="GenABEL unit testing__Users_yuryaulchenko_eclipse_workspace_genabel_pkg_GenABEL_tests_.._inst_unitTests_runit.impute2xxx.R_test.impute2databel">test.impute2databel: (23 checks) ... OK (0.82 seconds)<br/></a></li></ul></li><li><a href="/Users/yuryaulchenko/eclipse_workspace/genabel/pkg/GenABEL/tests/../inst/unitTests/runit.impute2xxx_large.R">Test file: runit.impute2xxx_large.R</a><ul><li><a name="GenABEL unit testing__Users_yuryaulchenko_eclipse_workspace_genabel_pkg_GenABEL_tests_.._inst_unitTests_runit.impute2xxx_large.R_test.impute2xxx_large">test.impute2xxx_large: (0 checks) ... OK (0 seconds)<br/></a></li></ul></li><li><a href="/Users/yuryaulchenko/eclipse_workspace/genabel/pkg/GenABEL/tests/../inst/unitTests/runit.iterator.R">Test file: runit.iterator.R</a><ul><li><a name="GenABEL unit testing__Users_yuryaulchenko_eclipse_workspace_genabel_pkg_GenABEL_tests_.._inst_unitTests_runit.iterator.R_test.qtscore">test.qtscore: (0 checks) ... OK (0 seconds)<br/></a></li><li><a name="GenABEL unit testing__Users_yuryaulchenko_eclipse_workspace_genabel_pkg_GenABEL_tests_.._inst_unitTests_runit.iterator.R_test.summary_snp_data">test.summary_snp_data: (3 checks) ... OK (5.19 seconds)<br/></a></li></ul></li><li><a href="/Users/yuryaulchenko/eclipse_workspace/genabel/pkg/GenABEL/tests/../inst/unitTests/runit.mach2databel.R">Test file: runit.mach2databel.R</a><ul><li><a name="GenABEL unit testing__Users_yuryaulchenko_eclipse_workspace_genabel_pkg_GenABEL_tests_.._inst_unitTests_runit.mach2databel.R_test.mach2databel">test.mach2databel: (8 checks) ... OK (0.15 seconds)<br/></a></li></ul></li><li><a href="/Users/yuryaulchenko/eclipse_workspace/genabel/pkg/GenABEL/tests/../inst/unitTests/runit.polylik.R">Test file: runit.polylik.R</a><ul><li><a name="GenABEL unit testing__Users_yuryaulchenko_eclipse_workspace_genabel_pkg_GenABEL_tests_.._inst_unitTests_runit.polylik.R_test.polylik">test.polylik: (6 checks) ... OK (7.97 seconds)<br/></a></li></ul></li></ul><hr>
+</a>Test function regexp: ^test.+<br/>Test file regexp: ^runit.+\.[rR]$<br/>Involved directory:<br/>/Users/yuryaulchenko/eclipse_workspace/genabel/pkg/GenABEL/tests/../inst/unitTests<br/><ul><li><a href="/Users/yuryaulchenko/eclipse_workspace/genabel/pkg/GenABEL/tests/../inst/unitTests/runit.convert.snp.R">Test file: runit.convert.snp.R</a><ul><li><a name="GenABEL unit testing__Users_yuryaulchenko_eclipse_workspace_genabel_pkg_GenABEL_tests_.._inst_unitTests_runit.convert.snp.R_test.convert.snp">test.convert.snp: (4 checks) ... OK (0.98 seconds)<br/></a></li></ul></li><li><a href="/Users/yuryaulchenko/eclipse_workspace/genabel/pkg/GenABEL/tests/../inst/unitTests/runit.convert.snp.ped.R">Test file: runit.convert.snp.ped.R</a><ul><li><a name="GenABEL unit testing__Users_yuryaulchenko_eclipse_workspace_genabel_pkg_GenABEL_tests_.._inst_unitTests_runit.convert.snp.ped.R_test.convert.snp.ped">test.convert.snp.ped: (0 checks) ... OK (0.02 seconds)<br/></a></li></ul></li><li><a href="/Users/yuryaulchenko/eclipse_workspace/genabel/pkg/GenABEL/tests/../inst/unitTests/runit.descriptives.trait.R">Test file: runit.descriptives.trait.R</a><ul><li><a name="GenABEL unit testing__Users_yuryaulchenko_eclipse_workspace_genabel_pkg_GenABEL_tests_.._inst_unitTests_runit.descriptives.trait.R_test.descriptives.trait">test.descriptives.trait: (1 checks) ... OK (0.38 seconds)<br/></a></li></ul></li><li><a href="/Users/yuryaulchenko/eclipse_workspace/genabel/pkg/GenABEL/tests/../inst/unitTests/runit.findRelatives.R">Test file: runit.findRelatives.R</a><ul><li><a name="GenABEL unit testing__Users_yuryaulchenko_eclipse_workspace_genabel_pkg_GenABEL_tests_.._inst_unitTests_runit.findRelatives.R_test.findRelatives">test.findRelatives: (10 checks) ... OK (77.47 seconds)<br/></a></li></ul></li><li><a href="/Users/yuryaulchenko/eclipse_workspace/genabel/pkg/GenABEL/tests/../inst/unitTests/runit.impute2xxx.R">Test file: runit.impute2xxx.R</a><ul><li><a name="GenABEL unit testing__Users_yuryaulchenko_eclipse_workspace_genabel_pkg_GenABEL_tests_.._inst_unitTests_runit.impute2xxx.R_test.impute2databel">test.impute2databel: (23 checks) ... OK (0.47 seconds)<br/></a></li></ul></li><li><a href="/Users/yuryaulchenko/eclipse_workspace/genabel/pkg/GenABEL/tests/../inst/unitTests/runit.impute2xxx_large.R">Test file: runit.impute2xxx_large.R</a><ul><li><a name="GenABEL unit testing__Users_yuryaulchenko_eclipse_workspace_genabel_pkg_GenABEL_tests_.._inst_unitTests_runit.impute2xxx_large.R_test.impute2xxx_large">test.impute2xxx_large: (0 checks) ... OK (0 seconds)<br/></a></li></ul></li><li><a href="/Users/yuryaulchenko/eclipse_workspace/genabel/pkg/GenABEL/tests/../inst/unitTests/runit.iterator.R">Test file: runit.iterator.R</a><ul><li><a name="GenABEL unit testing__Users_yuryaulchenko_eclipse_workspace_genabel_pkg_GenABEL_tests_.._inst_unitTests_runit.iterator.R_test.qtscore">test.qtscore: (0 checks) ... OK (0 seconds)<br/></a></li><li><a name="GenABEL unit testing__Users_yuryaulchenko_eclipse_workspace_genabel_pkg_GenABEL_tests_.._inst_unitTests_runit.iterator.R_test.summary_snp_data">test.summary_snp_data: (3 checks) ... OK (4.41 seconds)<br/></a></li></ul></li><li><a href="/Users/yuryaulchenko/eclipse_workspace/genabel/pkg/GenABEL/tests/../inst/unitTests/runit.mach2databel.R">Test file: runit.mach2databel.R</a><ul><li><a name="GenABEL unit testing__Users_yuryaulchenko_eclipse_workspace_genabel_pkg_GenABEL_tests_.._inst_unitTests_runit.mach2databel.R_test.mach2databel">test.mach2databel: (8 checks) ... OK (0.12 seconds)<br/></a></li></ul></li><li><a href="/Users/yuryaulchenko/eclipse_workspace/genabel/pkg/GenABEL/tests/../inst/unitTests/runit.polylik.R">Test file: runit.polylik.R</a><ul><li><a name="GenABEL unit testing__Users_yuryaulchenko_eclipse_workspace_genabel_pkg_GenABEL_tests_.._inst_unitTests_runit.polylik.R_test.polylik">test.polylik: (6 checks) ... OK (7.73 seconds)<br/></a></li></ul></li></ul><hr>
 <table border="0" width="80%" >
 <tr><th>Name</th>
 <th>Value</th>

Modified: pkg/GenABEL/inst/unitTests/report.txt
===================================================================
--- pkg/GenABEL/inst/unitTests/report.txt	2011-07-08 18:09:30 UTC (rev 749)
+++ pkg/GenABEL/inst/unitTests/report.txt	2011-07-15 14:23:17 UTC (rev 750)
@@ -1,4 +1,4 @@
-RUNIT TEST PROTOCOL -- Wed Jul  6 23:29:31 2011 
+RUNIT TEST PROTOCOL -- Fri Jul 15 16:20:40 2011 
 *********************************************** 
 Number of test functions: 10 
 Number of errors: 0 
@@ -18,30 +18,30 @@
 Involved directory: 
 /Users/yuryaulchenko/eclipse_workspace/genabel/pkg/GenABEL/tests/../inst/unitTests 
 --------------------------- 
+Test file: /Users/yuryaulchenko/eclipse_workspace/genabel/pkg/GenABEL/tests/../inst/unitTests/runit.convert.snp.R 
+test.convert.snp: (4 checks) ... OK (0.98 seconds)
+--------------------------- 
 Test file: /Users/yuryaulchenko/eclipse_workspace/genabel/pkg/GenABEL/tests/../inst/unitTests/runit.convert.snp.ped.R 
-test.convert.snp.ped: (0 checks) ... OK (0.07 seconds)
+test.convert.snp.ped: (0 checks) ... OK (0.02 seconds)
 --------------------------- 
-Test file: /Users/yuryaulchenko/eclipse_workspace/genabel/pkg/GenABEL/tests/../inst/unitTests/runit.convert.snp.tped.R 
-test.convert.snp: (0 checks) ... OK (0.71 seconds)
---------------------------- 
 Test file: /Users/yuryaulchenko/eclipse_workspace/genabel/pkg/GenABEL/tests/../inst/unitTests/runit.descriptives.trait.R 
-test.descriptives.trait: (1 checks) ... OK (0.41 seconds)
+test.descriptives.trait: (1 checks) ... OK (0.38 seconds)
 --------------------------- 
 Test file: /Users/yuryaulchenko/eclipse_workspace/genabel/pkg/GenABEL/tests/../inst/unitTests/runit.findRelatives.R 
-test.findRelatives: (10 checks) ... OK (55.68 seconds)
+test.findRelatives: (10 checks) ... OK (77.47 seconds)
 --------------------------- 
 Test file: /Users/yuryaulchenko/eclipse_workspace/genabel/pkg/GenABEL/tests/../inst/unitTests/runit.impute2xxx.R 
-test.impute2databel: (23 checks) ... OK (0.82 seconds)
+test.impute2databel: (23 checks) ... OK (0.47 seconds)
 --------------------------- 
 Test file: /Users/yuryaulchenko/eclipse_workspace/genabel/pkg/GenABEL/tests/../inst/unitTests/runit.impute2xxx_large.R 
 test.impute2xxx_large: (0 checks) ... OK (0 seconds)
 --------------------------- 
 Test file: /Users/yuryaulchenko/eclipse_workspace/genabel/pkg/GenABEL/tests/../inst/unitTests/runit.iterator.R 
 test.qtscore: (0 checks) ... OK (0 seconds)
-test.summary_snp_data: (3 checks) ... OK (5.19 seconds)
+test.summary_snp_data: (3 checks) ... OK (4.41 seconds)
 --------------------------- 
 Test file: /Users/yuryaulchenko/eclipse_workspace/genabel/pkg/GenABEL/tests/../inst/unitTests/runit.mach2databel.R 
-test.mach2databel: (8 checks) ... OK (0.15 seconds)
+test.mach2databel: (8 checks) ... OK (0.12 seconds)
 --------------------------- 
 Test file: /Users/yuryaulchenko/eclipse_workspace/genabel/pkg/GenABEL/tests/../inst/unitTests/runit.polylik.R 
-test.polylik: (6 checks) ... OK (7.97 seconds)
+test.polylik: (6 checks) ... OK (7.73 seconds)

Modified: pkg/GenABEL/inst/unitTests/reportSummary.txt
===================================================================
--- pkg/GenABEL/inst/unitTests/reportSummary.txt	2011-07-08 18:09:30 UTC (rev 749)
+++ pkg/GenABEL/inst/unitTests/reportSummary.txt	2011-07-15 14:23:17 UTC (rev 750)
@@ -1,4 +1,4 @@
-RUNIT TEST PROTOCOL -- Wed Jul  6 23:29:31 2011 
+RUNIT TEST PROTOCOL -- Fri Jul 15 16:20:40 2011 
 *********************************************** 
 Number of test functions: 10 
 Number of errors: 0 

Modified: pkg/GenABEL/inst/unitTests/runit.convert.snp.R
===================================================================
--- pkg/GenABEL/inst/unitTests/runit.convert.snp.R	2011-07-08 18:09:30 UTC (rev 749)
+++ pkg/GenABEL/inst/unitTests/runit.convert.snp.R	2011-07-15 14:23:17 UTC (rev 750)
@@ -22,37 +22,44 @@
 
 test.convert.snp <- function()
 {
-	library(GenABEL)
-	convert.snp.tped(tped="gatest.tped", tfam="gatest.tfam", out="gatest.raw", strand="+")
+	# library(GenABEL)
+	# check equivalence of convertion
+	dta <- read.table("gatest.tped",sep="\t",head=FALSE,strings=FALSE)
+	dta <- dta[,-c(1:4)]
+	dta <- t(dta)
+	dta <- sub(" ","/",dta)
+	dta <- sub("0/0",NA,dta)
+	dimnames(dta) <- NULL
+
+	convert.snp.tped(tped="gatest.tped", tfam="gatest.tfam", out="gatest.raw")
 	data <- load.gwaa.data(gen="gatest.raw", phe="gatest.phe")
-	data
-	coding(data)
-	as.character(data)
-	as.numeric(data)
+	dta1 <- as.character(data)
+	dimnames(dta1) <- NULL
+	checkEquals(dta,dta1)
+	
+	convert.snp.ped(pedfile="gatest.ped", mapfile="gatest.map", out="gatest.raw",mapHasHeaderLine=FALSE)
+	data <- load.gwaa.data(gen="gatest.raw", phe="gatest.phe")
+	dta1 <- as.character(data)
+	dimnames(dta1) <- NULL
+	checkEquals(dta,dta1)
+	
+	convert.snp.illumina(infile="gatest.illu", out="gatest.raw")
+	data <- load.gwaa.data(gen="gatest.raw", phe="gatest.phe")
+	dta1 <- as.character(data)
+	dimnames(dta1) <- NULL
+	checkEquals(dta,dta1)
+	
+	# check possibility of merge
+	
 	convert.snp.tped(tped="gatest1.tped", tfam="gatest1.tfam", out="gatest1.raw", strand="+")
 	data1 <- load.gwaa.data(gen="gatest1.raw", phe="gatest1.phe")
-	data1
+	data1 <- data1[,sample(1:nsnps(data1),nsnps(data1))]
 	coding(data1)
-	as.character(data1)
-	as.numeric(data1)
 	mrg <- merge(gtdata(data),gtdata(data1))
-	mrg
-	coding(mrg$data)
-	as.character(mrg$data)
-	as.numeric(mrg$data)
+	mrg1 <- merge(data,data1)
+	checkEquals(as.character(mrg$data),as.character(gtdata(mrg1)))
 	
-	coding(data)
-	class(data)
-	coding(data) <- coding(data1)
-	class(data)
-	coding(data)
-	coding(data1)
-	as.character(data)
-	as.character(data1)
-	mrg <- merge(gtdata(data),gtdata(data1))
-	mrg
-	coding(mrg$data)
-	as.character(mrg$data)
-	as.numeric(mrg$data)
+	# here evaluate merged SNP one by one?
+
 	unlink("*.raw")
 }
\ No newline at end of file

Modified: pkg/GenABEL/src/GAlib/convert_snp_illumina.cpp
===================================================================
--- pkg/GenABEL/src/GAlib/convert_snp_illumina.cpp	2011-07-08 18:09:30 UTC (rev 749)
+++ pkg/GenABEL/src/GAlib/convert_snp_illumina.cpp	2011-07-15 14:23:17 UTC (rev 750)
@@ -33,274 +33,274 @@
  * strand == 2 -> minus ('-')
  * strand == 3 -> four columns (name,chr,pos,strand) expected at the beginning
  *
-**/
+ **/
 
 extern "C" {
-  void convert_snp_illumina (char** filename, char** outfilename, int* Strandid, int* Bcast, char **allele_codes, int* Ncodes) {
+void convert_snp_illumina (char** filename, char** outfilename, int* Strandid, int* Bcast, char **allele_codes, int* Ncodes) {
 
-  short unsigned int ncodes = *Ncodes;
-  short unsigned int strandid = *Strandid;
-  long unsigned int bcast = *Bcast;
+	short unsigned int ncodes = *Ncodes;
+	short unsigned int strandid = *Strandid;
+	long unsigned int bcast = *Bcast;
 
-  bool loud = TRUE;
-  if (bcast <= 0) loud = FALSE;
+	bool loud = TRUE;
+	if (bcast <= 0) loud = FALSE;
 
-  long unsigned int nsnps=0;
-  long unsigned int nids=0;
-  unsigned long int nbytes=0;
-  unsigned long int byte;
+	long unsigned int nsnps=0;
+	long unsigned int nids=0;
+	unsigned long int nbytes=0;
+	unsigned long int byte;
 
-  string data;
-  string tempstr;
+	string data;
+	string tempstr;
 
-  vector<string> iid;
-  vector<string> chrom; string tmp_chrom;
-  vector<string> snpnam; string tmp_snpnam;
-  vector<unsigned long> map; unsigned long tmp_map;
-  vector<unsigned char*> gtype; unsigned char* tmp_gtype;
-  vector<string> coding; string tmp_coding,tmp_coding1;
-  vector<unsigned short int> intcoding;
-  vector<unsigned short int> strand; string tmp_strand;
-  vector<string> codeset(ncodes);
-  char tmp_chcoding [10];
+	vector<string> iid;
+	vector<string> chrom; string tmp_chrom;
+	vector<string> snpnam; string tmp_snpnam;
+	vector<unsigned long> map; unsigned long tmp_map;
+	vector<unsigned char*> gtype; unsigned char* tmp_gtype;
+	vector<string> coding; string tmp_coding,tmp_coding1;
+	vector<unsigned short int> intcoding;
+	vector<unsigned short int> strand; string tmp_strand;
+	vector<string> codeset(ncodes);
+	char tmp_chcoding [10];
 
-  for (int i=0;i<ncodes;i++) codeset[i].assign(allele_codes[i]);
+	for (int i=0;i<ncodes;i++) codeset[i].assign(allele_codes[i]);
 
-  ifstream illfile (filename[0]);
-  if (illfile == NULL) {
-  error ("could not open file '%s'!",filename[0]);
-  }
+	ifstream illfile (filename[0]);
+	if (illfile == NULL) {
+		error ("could not open file '%s'!",filename[0]);
+	}
 
-  if (loud) {
-  Rprintf("Reading genotypes from file '%s' ...\n",filename[0]);
-  }
+	if (loud) {
+		Rprintf("Reading genotypes from file '%s' ...\n",filename[0]);
+	}
 
-  nsnps = 0;
-  unsigned long int lasti = 1;
+	nsnps = 0;
+	unsigned long int lasti = 1;
 
-  //
-  // Processing header line
-  //
+	//
+	// Processing header line
+	//
 
-  if (getline(illfile,data)) {
-    istringstream datas (data);
-    datas >> tempstr >> tempstr >> tempstr;
-    if (strandid == 3) datas >> tempstr;
-    while (datas >> tempstr) {
-      iid.push_back(tempstr);
-      nids++;
-    }
-  } else {
-    error("Can not read the first line from file '%s'!\n",filename[0]);
-  }
+	if (getline(illfile,data)) {
+		istringstream datas (data);
+		datas >> tempstr >> tempstr >> tempstr;
+		if (strandid == 3) datas >> tempstr;
+		while (datas >> tempstr) {
+			iid.push_back(tempstr);
+			nids++;
+		}
+	} else {
+		error("Can not read the first line from file '%s'!\n",filename[0]);
+	}
 
-  char* chgt = new char [nids*2];
-  nbytes = (unsigned long int)ceil((double)nids/4.);
+	char* chgt = new char [nids*2];
+	nbytes = (unsigned long int)ceil((double)nids/4.);
 
-  //
-  //Processing rest of the file
-  //
+	//
+	//Processing rest of the file
+	//
 
-  while (getline(illfile,data)) {
-  nsnps++;
+	while (getline(illfile,data)) {
+		nsnps++;
 
-  istringstream datas (data);
+		istringstream datas (data);
 
-  char gdata;
+		char gdata;
 
-  if (!(datas >> tmp_snpnam >> tmp_chrom >> tmp_map))
-    error("First three fields are missing in line %i!\n",(nsnps+1));
-  else {
-    chrom.push_back(tmp_chrom);
-    snpnam.push_back(tmp_snpnam);
-    map.push_back(tmp_map);
-    if (strandid == 3) {
-	    if (!(datas >> tmp_strand)) error("Strand field is missing in line %i, SNP '%s'!\n",(nsnps+1),tmp_snpnam.c_str());
-	    if (tmp_strand == "u") strand.push_back(0);
-	    else if (tmp_strand == "+") strand.push_back(1);
-	    else if (tmp_strand == "-") strand.push_back(2);
-	    else error("Bad strand coding ('%s'), only '+', '-' or 'u' is accepted!\n",tmp_strand.c_str());
-    } else strand.push_back(strandid);
+		if (!(datas >> tmp_snpnam >> tmp_chrom >> tmp_map))
+			error("First three fields are missing in line %i!\n",(nsnps+1));
+		else {
+			chrom.push_back(tmp_chrom);
+			snpnam.push_back(tmp_snpnam);
+			map.push_back(tmp_map);
+			if (strandid == 3) {
+				if (!(datas >> tmp_strand)) error("Strand field is missing in line %i, SNP '%s'!\n",(nsnps+1),tmp_snpnam.c_str());
+				if (tmp_strand == "u") strand.push_back(0);
+				else if (tmp_strand == "+") strand.push_back(1);
+				else if (tmp_strand == "-") strand.push_back(2);
+				else error("Bad strand coding ('%s'), only '+', '-' or 'u' is accepted!\n",tmp_strand.c_str());
+			} else strand.push_back(strandid);
 
-    unsigned long int idx = 0;
+			unsigned long int idx = 0;
 
-    idx = 0;
-    for (unsigned long int i = 0;i<nids;i++) {
-      if (!(datas >> tempstr)) error("Too few fields for SNP '%s', line %i (no. IDs = %li, no. fields = %li)!\n",snpnam[nsnps-1].c_str(),(nsnps+1),nids,(i+1));
-      char sd[10];
-      sprintf(sd,"%s",tempstr.c_str());
-      chgt[idx++] = sd[0];
-      chgt[idx++] = sd[1];
-    }
+			idx = 0;
+			for (unsigned long int i = 0;i<nids;i++) {
+				if (!(datas >> tempstr)) error("Too few fields for SNP '%s', line %i (no. IDs = %li, no. fields = %li)!\n",snpnam[nsnps-1].c_str(),(nsnps+1),nids,(i+1));
+				char sd[10];
+				sprintf(sd,"%s",tempstr.c_str());
+				chgt[idx++] = sd[0];
+				chgt[idx++] = sd[1];
+			}
 
-  //
-  //Processing SNP string
-  //
-  unsigned short int* gnum = new unsigned short int [nids*2];
-  unsigned short int ind;
-  unsigned short int offset[4] = {6,4,2,0};
-  char allele1 = 0;
-  char allele2 = 0;
-  unsigned long int ca1 = 0;
-  unsigned long int ca2 = 0;
-	for (unsigned long int idx = 0; idx < 2*nids; idx++) {
+			//
+			//Processing SNP string
+			//
+			unsigned short int* gnum = new unsigned short int [nids*2];
+			unsigned short int ind;
+			unsigned short int offset[4] = {6,4,2,0};
+			char allele1 = 0;
+			char allele2 = 0;
+			unsigned long int ca1 = 0;
+			unsigned long int ca2 = 0;
+			for (unsigned long int idx = 0; idx < 2*nids; idx++) {
 
-	    gdata = chgt[idx];
-	    if (gdata == allele1) {
-	      gnum[idx] = 1;
-		ca1++;
-	    } else if (gdata == allele2) {
-	      gnum[idx] = 3;
-		ca2++;
-	    } else if (gdata == '0' || gdata == '-') {
-	      gnum[idx] = 0;
-	    } else {
-	      if (allele1 == 0) {
-		allele1 = gdata;
-		gnum[idx] = 1;
-		ca1++;
-	      } else if (allele2 == 0) {
-		allele2 = gdata;
-		gnum[idx] = 3;
-		ca2++;
-	      } else {
-		error ("illegal genotype (three alleles) for SNP '%s' (line %li)!",
-		       snpnam[nsnps-1].c_str(),(nsnps+1));
-	      }
-	      }
-	    }
+				gdata = chgt[idx];
+				if (gdata == allele1) {
+					gnum[idx] = 1;
+					ca1++;
+				} else if (gdata == allele2) {
+					gnum[idx] = 3;
+					ca2++;
+				} else if (gdata == '0' || gdata == '-') {
+					gnum[idx] = 0;
+				} else {
+					if (allele1 == 0) {
+						allele1 = gdata;
+						gnum[idx] = 1;
+						ca1++;
+					} else if (allele2 == 0) {
+						allele2 = gdata;
+						gnum[idx] = 3;
+						ca2++;
+					} else {
+						error ("illegal genotype (three alleles) for SNP '%s' (line %li)!",
+								snpnam[nsnps-1].c_str(),(nsnps+1));
+					}
+				}
+			}
 
-	if (!allele1 && !allele2) tmp_coding="12"; // all genotypes missing
-	else if (!allele1 && allele2) sprintf(tmp_chcoding,"%c%c",allele2,allele2); // only one allele present
-	else if (allele1 && !allele2) sprintf(tmp_chcoding,"%c%c",allele1,allele1); // only one allele present
-	else if (allele1 && allele2) {
-		if (ca1 > ca2) sprintf(tmp_chcoding,"%c%c",allele1,allele2);
-		else sprintf(tmp_chcoding,"%c%c",allele2,allele1);
-	}
-	Rprintf("%s\n",tmp_chcoding);
-	tmp_coding.assign(tmp_chcoding);
-	int ccd = -1;
-	for (int i = 0; i < ncodes; i++) {
-		if (codeset[i].compare(tmp_coding)==0) {
-			ccd = i + 1;
-			intcoding.push_back(ccd);
-		}
-	}
-	if (ccd<0) error ("coding '%s' for SNP not recognised !\n",tmp_coding.c_str());
-	try {
-	  tmp_gtype = new unsigned char [nbytes];
-	}
-	catch (bad_alloc) {
-	  error ("ran out of memory reading file '%s' line %li !");
-	}
-	if (tmp_gtype == NULL) {
-	  error ("ran out of memory reading file '%s' line %li !");
-	}
+			if (!allele1 && !allele2) tmp_coding="12"; // all genotypes missing
+			else if (!allele1 && allele2) sprintf(tmp_chcoding,"%c%c",allele2,allele2); // only one allele present
+			else if (allele1 && !allele2) sprintf(tmp_chcoding,"%c%c",allele1,allele1); // only one allele present
+			else if (allele1 && allele2) {
+				if (ca1 > ca2) sprintf(tmp_chcoding,"%c%c",allele1,allele2);
+				else sprintf(tmp_chcoding,"%c%c",allele2,allele1);
+			}
+			//Rprintf("%s\n",tmp_chcoding);
+			tmp_coding.assign(tmp_chcoding);
+			int ccd = -1;
+			for (int i = 0; i < ncodes; i++) {
+				if (codeset[i].compare(tmp_coding)==0) {
+					ccd = i + 1;
+					intcoding.push_back(ccd);
+				}
+			}
+			if (ccd<0) error ("coding '%s' for SNP not recognised !\n",tmp_coding.c_str());
+			try {
+				tmp_gtype = new unsigned char [nbytes];
+			}
+			catch (bad_alloc) {
+				error ("ran out of memory reading file '%s' line %li !");
+			}
+			if (tmp_gtype == NULL) {
+				error ("ran out of memory reading file '%s' line %li !");
+			}
 
-	idx = 0;
-	for (byte = 0; byte < nbytes; ++byte) {
+			idx = 0;
+			for (byte = 0; byte < nbytes; ++byte) {
 
-	  tmp_gtype[byte] = 0;
-	  for (ind = 0; ind < 4; ++ind) {
+				tmp_gtype[byte] = 0;
+				for (ind = 0; ind < 4; ++ind) {
 
-	    switch (gnum[idx]+gnum[idx+1]) {
-	    case 2:
-	      if (ca1 > ca2)
-		      tmp_gtype[byte] = tmp_gtype[byte] | ((unsigned char)1 << offset[ind]);
-	      else
-		      tmp_gtype[byte] = tmp_gtype[byte] | ((unsigned char)3 << offset[ind]);
-	      break;
-	    case 4:
[TRUNCATED]

To get the complete diff run:
    svnlook diff /svnroot/genabel -r 750


More information about the Genabel-commits mailing list