[Genabel-commits] r1249 - pkg/GenABEL/R

noreply at r-forge.r-project.org noreply at r-forge.r-project.org
Fri Jun 14 20:30:42 CEST 2013


Author: lckarssen
Date: 2013-06-14 20:30:39 +0200 (Fri, 14 Jun 2013)
New Revision: 1249

Modified:
   pkg/GenABEL/R/impute2databel.R
Log:
In GenABEL's impute2databel() function: only code layout changes, no functional changes. This is to make impute2databel more according to the code style standards (and to fix a type in the documentation).


Modified: pkg/GenABEL/R/impute2databel.R
===================================================================
--- pkg/GenABEL/R/impute2databel.R	2013-06-13 18:44:51 UTC (rev 1248)
+++ pkg/GenABEL/R/impute2databel.R	2013-06-14 18:30:39 UTC (rev 1249)
@@ -1,168 +1,191 @@
 #'
 #' converts IMPUTE-imputed files to DatABEL (filevector) format
-#' 
+#'
 #' this function converts IMPUTE-imputed files to DatABEL (filevector) format
-#' containing estimated dosages. 
-#' After conversion, two files (outfile.fvi and outfile.fvd), corresponding 
-#' to a single filevector object, will appear on the disk; a 'databel-class' 
+#' containing estimated dosages.
+#' After conversion, two files (outfile.fvi and outfile.fvd), corresponding
+#' to a single filevector object, will appear on the disk; a 'databel-class'
 #' object connected to these files will be returned to R.
-#' 
+#'
 #' @param genofile IMPUTE genotype file name
 #' @param samplefile IMPUTE sample file name
 #' @param outfile output file name
-#' @param makeprob wheather probability-files are also to be arranged
-#' @param old for developers' use
-#' 
+#' @param makeprob whether probability-files are also to be arranged
+#' @param old for developers' use only
+#'
 #' @return 'databel-class' object
-#' 
-#' 
+#'
+#'
 
 
-impute2databel <- function(genofile,samplefile,outfile,makeprob=TRUE,old=FALSE) 
+impute2databel <- function(genofile, samplefile, outfile,
+                           makeprob=TRUE, old=FALSE)
 {
-	if (!require(DatABEL))
-		stop("this function requires DatABEL package to be installed")
-	if (missing(genofile))
-		stop("genofile file must be specified")
-	if (missing(outfile)) outfile <- genofile
-# extract snp names (varnames)
-	#   if (tmpname != "")
-	#       text2databel(infile=genofile,outfile=outfile,
-	#               colnames=tmpname,
-	#               rownames=2,skipcols=5,
-	#               #skiprows,
-	#               transpose=TRUE,R_matrix=FALSE,type="FLOAT")
-	#   else 
-	tmpname <- get_temporary_file_name()
-	tmp_fv <- text2databel(infile=genofile,outfile=tmpname,
-			rownames=2,skipcols=5,
-			#skiprows,
-			transpose=TRUE,R_matrix=FALSE,type="FLOAT")
-	if ((dim(tmp_fv)[1] %% 3) != 0) 
-		stop("strange data in IMPUTE geno file: number of columns - 5 not dividable by 3")
-	
-	makedose <- function(prob) {
-		dose <- 2*prob[c(F,F,T)]+prob[c(F,T,F)]
-		bp <- prob[c(T,F,F)]
-		miss <- which(abs(bp)<1e-16 & abs(dose)<1e-16)
-		if (length(miss) > 0 ) dose[miss] <- NA
-		return(dose)
-	}
-	
-	pfun <- function(a) return(a)
-	
-	#dosefile <- make_empty_fvf(paste(outfile,".dose",sep=""), 
-	#		nvariables = dim(tmp_fv)[2], nobservations = round(dim(tmp_fv)[1]/3),type = "FLOAT")
-	
-	#print(dimnames(tmp_fv)[[2]])
-	#print(dim(tmp_fv))
-	#print(class(tmp_fv))
-	saved_names <- get_dimnames(tmp_fv)
-	varnames <- saved_names[[2]]
-	#print("original names of tmp_fv")
-	#print(saved_names)
-	#print(class(tmp_fv))
-	
-	#tmplst <- list( as.character(1:dim(tmp_fv)[1]) , as.character(1:dim(tmp_fv)[2]) )
-	#print(class(tmp_fv))
-	#print(dimnames(tmp_fv)[[2]])
-	#set_dimnames(tmp_fv) <- tmplst
-	#print(class(tmp_fv))
-	#print(dimnames(tmp_fv)[[2]])
-	
-	#print("before apply2dfo")
-	#print("calling apply2dfo")
-	if (old) {
-#		stop("'old' ways are no longer there...")
-		dosefile <- apply2dfo(dfodata=tmp_fv, anFUN = "makedose", 
-				MAR = 2, procFUN = "pfun",prob=get("SNP"),
-				outclass="databel",
-				outfile=paste(outfile,".dose",sep=""),
-				type="FLOAT",transpose=FALSE)
-		if (makeprob) {
-			stop("makeprob is not possible with 'old' style")
-		}
-	} else {
-		res <- .Call("iterator", tmp_fv at data, as.integer(0), as.integer(0),
-				as.character("databel_impute_prob_2_databel_mach_dose"),
-				paste(outfile,".dose",sep=""), as.integer(2), as.integer(0))
-		dosefile <- databel(paste(outfile,".dose",sep=""),64,readonly=FALSE)
-		if (makeprob) {
-			res <- .Call("iterator", tmp_fv at data, as.integer(0), as.integer(0),
-					as.character("databel_impute_prob_2_databel_mach_prob"),
-					paste(outfile,".prob",sep=""), as.integer(2), as.integer(0))
-			probfile <- databel(paste(outfile,".prob",sep=""),64,readonly=FALSE)
-		}
-#		res <- .Call("databel_impute_prob_2_databel_mach_dose",
-#				tmp_fv at data, paste(outfile,".dose",sep=""), as.integer(64))
-#		dosefile <- databel_filtered_R(paste(outfile,".dose",sep=""),64)
-#		if (makeprob) {
-#			res <- .Call("databel_impute_prob_2_databel_mach_prob",
-#					tmp_fv at data, paste(outfile,".prob",sep=""), as.integer(64))
-#			#print("AAAA")
-#			probfile <- databel_filtered_R(paste(outfile,".prob",sep=""),64)
-#			#print(class(probfile))
-#			#print(dim(probfile))
-#			#print(dimnames(probfile))
-#			#print(get_dimnames(probfile))
-#		}
-	}
-	#!print("after iterator")
-	set_dimnames(dosefile) <- list(get_dimnames(dosefile)[[1]],varnames)
-	#print("dimnames dose:")
-	#print(get_dimnames(dosefile))
-	
-	
-	if (!missing(samplefile))
-	{
-		l1 <- scan(samplefile,what="character",nlines=1)
-		l2 <- scan(samplefile,what="character",nlines=1,skip=1)
-		temp <- scan(samplefile,what="character",nlines=1,skip=2)
+  if (!require(DatABEL))
+    stop("this function requires the DatABEL package to be installed")
 
-		if((length(l1)!=length(temp)) || (length(temp)!=length(l2))){
-            warning("Wrong number of words in sample file header.");
-		}		
+  if (missing(genofile))
+    stop("genofile file must be specified")
 
-		samnames <- scan(samplefile,what="character",skip=2)
-		samnames <- samnames[c(F,T,rep(F,(length(temp)-2)))]
-		if (length(samnames) == dim(dosefile)[1]) { 
-			#!print("before dose-file")
-			#print(samnames)
-			#print(varnames)
-			set_dimnames(dosefile) <- list(samnames,varnames)
-			#!print("after dose-file")
-			#print(get_dimnames(dosefile))
-		} else {
-			warning("number of IDs specified in sample file does not match to geno-dimension; dropping ID names")
-		}
-		if (makeprob) if (length(samnames) == dim(probfile)[1] && 2*length(varnames)==dim(probfile)[2]) {
-				dpnames <- rep(0,length(varnames)*2)
-				dpnames[c(T,F)] <- paste(varnames,"_11",sep="")
-				dpnames[c(F,T)] <- paste(varnames,"_01",sep="")
-				#!print("before prob-file")
-				#print(samnames)
-				#print(dpnames)
-				set_dimnames(probfile) <- list(samnames,dpnames) #get_dimnames(probfile)[[2]])
-				#!print("after prob-file")
-				#print(get_dimnames(probfile))
-			} else {
-				warning("number of IDs/SNPs specified in sample file does not match to geno-dimension; dropping IDs/SNPs names")
-			}
-	} else 
-		warning("sample file not specified, you will not be able to use ID names (only index)")
-	
-	rm(tmp_fv);gc();
-	unlink(paste(tmpname,"*",sep=""))
-	
-	#disconnect(dosefile)
-	#connect(dosefile)
-	#!print("at exit dosefile:")
-	#print(get_dimnames(dosefile))
-	if (makeprob) {
-		#print("at exit probfile:")
-		#print(get_dimnames(probfile))
-		disconnect(probfile)
-	}
-	#!print("at return")
-	return(dosefile)
+  if (missing(outfile))
+    outfile <- genofile
+
+  ## extract snp names (varnames)
+  ## if (tmpname != "")
+  ##     text2databel(infile=genofile,outfile=outfile,
+  ##            colnames=tmpname,
+  ##            rownames=2,skipcols=5,
+  ##            #skiprows,
+  ##            transpose=TRUE,R_matrix=FALSE,type="FLOAT")
+  ## else
+  tmpname <- get_temporary_file_name()
+  tmp_fv  <- text2databel(infile=genofile,
+                          outfile=tmpname,
+                          rownames=2,
+                          skipcols=5,
+                          ## skiprows,
+                          transpose=TRUE,
+                          R_matrix=FALSE,
+                          type="FLOAT")
+  if ((dim(tmp_fv)[1] %% 3) != 0)
+    stop("strange data in IMPUTE geno file: ",
+         "number of columns - 5 not dividable by 3")
+
+  makedose <- function(prob) {
+    dose <- 2 * prob[c(FALSE, FALSE, TRUE)] + prob[c(FALSE, TRUE, FALSE)]
+    bp   <- prob[c(TRUE, FALSE, FALSE)]
+    miss <- which(abs(bp) < 1e-16 & abs(dose) < 1e-16)
+    if (length(miss) > 0 ) dose[miss] <- NA
+    return(dose)
+  }
+
+  pfun <- function(a) return(a)
+
+  ## dosefile <- make_empty_fvf(paste(outfile, ".dose", sep=""),
+  ##                            nvariables = dim(tmp_fv)[2],
+  ##                            nobservations = round(dim(tmp_fv)[1]/3),
+  ##                            type = "FLOAT")
+
+  ## print(dimnames(tmp_fv)[[2]])
+  ## print(dim(tmp_fv))
+  ## print(class(tmp_fv))
+  saved_names <- get_dimnames(tmp_fv)
+  varnames    <- saved_names[[2]]
+  ## print("original names of tmp_fv")
+  ## print(saved_names)
+  ## print(class(tmp_fv))
+
+  ## tmplst <- list( as.character(1:dim(tmp_fv)[1]) , as.character(1:dim(tmp_fv)[2]) )
+  ## print(class(tmp_fv))
+  ## print(dimnames(tmp_fv)[[2]])
+  ## set_dimnames(tmp_fv) <- tmplst
+  ## print(class(tmp_fv))
+  ## print(dimnames(tmp_fv)[[2]])
+
+  ## print("before apply2dfo")
+  ## print("calling apply2dfo")
+  if (old) {                             #
+    ##          stop("'old' ways are no longer there...")
+    dosefile <- apply2dfo(dfodata=tmp_fv,
+                          anFUN = "makedose",
+                          MAR = 2,
+                          procFUN = "pfun",
+                          prob=get("SNP"),
+                          outclass="databel",
+                          outfile=paste(outfile, ".dose", sep=""),
+                          type="FLOAT",
+                          transpose=FALSE)
+    if (makeprob) {
+      stop("makeprob is not possible with 'old' style")
+    }
+  } else {
+    res <- .Call("iterator", tmp_fv at data, as.integer(0), as.integer(0),
+                 as.character("databel_impute_prob_2_databel_mach_dose"),
+                 paste(outfile, ".dose", sep=""), as.integer(2), as.integer(0))
+    dosefile <- databel(paste(outfile, ".dose", sep=""), 64, readonly=FALSE)
+    if (makeprob) {
+      res <- .Call("iterator", tmp_fv at data, as.integer(0), as.integer(0),
+                   as.character("databel_impute_prob_2_databel_mach_prob"),
+                   paste(outfile, ".prob", sep=""), as.integer(2), as.integer(0))
+      probfile <- databel(paste(outfile, ".prob", sep=""), 64, readonly=FALSE)
+    }
+
+    ## res <- .Call("databel_impute_prob_2_databel_mach_dose",
+    ## tmp_fv at data, paste(outfile,".dose",sep=""), as.integer(64))
+    ## dosefile <- databel_filtered_R(paste(outfile,".dose",sep=""),64)
+    ## if (makeprob) {
+    ##   res <- .Call("databel_impute_prob_2_databel_mach_prob",
+    ##   tmp_fv at data, paste(outfile,".prob",sep=""), as.integer(64))
+    ##   #print("AAAA")
+    ##   probfile <- databel_filtered_R(paste(outfile, ".prob", sep=""),64)
+    ##   #print(class(probfile))
+    ##   #print(dim(probfile))
+    ##   #print(dimnames(probfile))
+    ##   #print(get_dimnames(probfile))
+    ##}
+  }                                #
+  ##!print("after iterator")
+
+  set_dimnames(dosefile) <- list(get_dimnames(dosefile)[[1]], varnames)
+  ## print("dimnames dose:")
+  ## print(get_dimnames(dosefile))
+
+
+  if (!missing(samplefile)) {
+    l1 <- scan(samplefile, what="character", nlines=1)
+    l2 <- scan(samplefile, what="character", nlines=1, skip=1)
+    temp <- scan(samplefile, what="character", nlines=1, skip=2)
+
+    if( (length(l1)!=length(temp)) || (length(temp)!=length(l2)) ) {
+      warning("Wrong number of words in sample file header.");
+      }
+
+    samnames <- scan(samplefile, what="character", skip=2)
+    samnames <- samnames[c(FALSE, TRUE, rep(FALSE, (length(temp)-2)))]
+    if (length(samnames) == dim(dosefile)[1]) {
+      ##!print("before dose-file")
+      ##print(samnames)
+      ##print(varnames)
+      set_dimnames(dosefile) <- list(samnames, varnames)
+      ##!print("after dose-file")
+      ##print(get_dimnames(dosefile))
+    } else {
+      warning("number of IDs specified in sample file does not match to ",
+              "geno-dimension; dropping ID names")
+    }
+    if (makeprob) {
+      if (length(samnames) == dim(probfile)[1] &&
+          2*length(varnames)==dim(probfile)[2]) {
+        dpnames <- rep(0, length(varnames)*2)
+        dpnames[c(TRUE, FALSE)] <- paste(varnames, "_11", sep="")
+        dpnames[c(FALSE, TRUE)] <- paste(varnames, "_01", sep="")
+        ##!print("before prob-file")
+        ##print(samnames)
+        ##print(dpnames)
+        set_dimnames(probfile) <- list(samnames, dpnames) #get_dimnames(probfile)[[2]])
+        ##!print("after prob-file")
+        ##print(get_dimnames(probfile))
+      } else {
+        warning("number of IDs/SNPs specified in sample file does not match to ",
+                "geno-dimension; dropping IDs/SNPs names")
+      }
+    }
+  } else
+    warning("sample file not specified, you will not be able to use ID names ",
+            "(only index)")
+
+  rm(tmp_fv); gc();
+  unlink(paste(tmpname, "*", sep=""))
+
+  ##disconnect(dosefile)
+  ##connect(dosefile)
+  ##!print("at exit dosefile:")
+  ##print(get_dimnames(dosefile))
+  if (makeprob) {
+    ##print("at exit probfile:")
+    ##print(get_dimnames(probfile))
+    disconnect(probfile)
+  }
+  ##!print("at return")
+  return(dosefile)
 }



More information about the Genabel-commits mailing list