[Rcolony-commits] r83 - pkg/R

Mon Dec 10 19:15:01 CET 2012

Author: jonesor
Date: 2012-12-10 19:15:01 +0100 (Mon, 10 Dec 2012)
New Revision: 83

Modified:
   pkg/R/get.colony.data.R
Log:
Added code so that program will not fail if sibship files are not found. Also forced removal of whitespace at beginning of lines

Modified: pkg/R/get.colony.data.R
===================================================================

--- pkg/R/get.colony.data.R	2012-12-10 15:37:17 UTC (rev 82)
+++ pkg/R/get.colony.data.R	2012-12-10 18:15:01 UTC (rev 83)
@@ -1,44 +1,69 @@
 get.colony.data<-function(datadir, filename = list.files(datadir, pattern = ".DAT", ignore.case=TRUE)){
-    
+
+setwd("/Users/orj/Documents/Dropbox/Finished\ Projects/ColonyWork/Colony2MacVersion/Example4/")
+datadir= paste(getwd(),"/",sep="")
+filename = list.files(datadir, pattern = ".DAT", ignore.case=TRUE)
+
+
     #Test to see if colony has finished running.
     t1 <- list.files(path = datadir, pattern = "Maternity")[1]
     
-    if(is.na(t1[1])){
-    	stop("The required COLONY output files aren\'t there.\nCheck that colony has finished running.")
-    	}
+    #if(is.na(t1[1])){
+    #	stop("The required COLONY output files aren\'t there.\nCheck that colony has finished running.")
+    #	}
 
     #Test consistency of the database file. Throw warnings/errors as necessary
     colony.object <- NULL
-    x <- readLines(paste(datadir, filename, sep = "/"))
+    x <- readLines(paste(datadir, filename, sep = ""))
     
     #Strip out empty rows, if there are any.
-    if(length(which(x == "")) > 0){x = x[-which(x == "")]}
+    if(length(which(x == "")) > 0){x = x[-which(x == ""|x == " ")]}
 
     #Extract the number of offspring from the dat file. This information is used for error checking later on.
     n <- x[3]
     n <- sub("^[\t\n\f\r ]*", "", n) #remove leading whitespace
-    n <- as.numeric(gsub("([A-Za-z0-9]*)([!0-9A-Za-z ]*)", "\\1", n, perl = TRUE))
+    n <- as.numeric(gsub("([A-Za-z0-9]*)([!0-9A-Za-z,/= ]*)", "\\1", n, perl = TRUE))
     
+    #Extract the number of loci
+    nLoci <- x[4]
+    nLoci <- sub("^[\t\n\f\r ]*", "", nLoci) #remove leading whitespace
+    nLoci <- as.numeric(gsub("([A-Za-z0-9]*)([!0-9A-Za-z,/= ]*)", "\\1", nLoci, perl = TRUE))
+
+    #Check whether allele frequency is known    
+    AFKnown <- x[11]
+    AFKnown <- sub("^[\t\n\f\r ]*", "", AFKnown) #remove leading whitespace
+    AFKnown = as.numeric(gsub("([A-Za-z0-9]*)([!0-9A-Za-z,/= ]*)", "\\1", AFKnown, perl = TRUE)) == 1
+    if(AFKnown){
+    	#If allele frequency is known then there is an extra row defining the number of alleles per locus (e.g. "12 13 14 15 16  !Number of alleles per locus")
+    	#Then there are a number of rows equal to the number of loci
+    OFSStart = 2 * nLoci + 22
+    }else{    
+    	OFSStart = 23
+    }
+
     ###################################################
     #Lists of offspring, fathers and mothers
     ###################################################
     # Starts at line 24 - 24+(n-1)
-    offspring <- x[24:(24 + (n - 1))]
+    offspring <- x[OFSStart:(OFSStart + (n - 1))]
     
-    nFathers <- as.numeric(strsplit(x[25 + n], split = " +")[[1]][1])
-    nMothers <- as.numeric(strsplit(x[25 + n], split = " +")[[1]][2])
+    nParents = x[OFSStart + (n+1)]
+    nParents <- sub("^[\t\n\f\r ]*", "", nParents) #remove leading whitespace
 
-    fathers <- x[(24 + (n + 2)):(24 + (n + 1) + nFathers)]
-    mothers <- x[(24 + (n + 2) + nFathers):(24 + (n + 1) + nFathers + nMothers)]
+    nFathers <- as.numeric(strsplit(nParents, split = " +")[[1]][1])
+    nMothers <- as.numeric(strsplit(nParents, split = " +")[[1]][2])
 
+    fathers <- x[(OFSStart + (n + 2)):(OFSStart + (n + 1) + nFathers)]
+    mothers <- x[(OFSStart + (n + 2) + nFathers):(OFSStart + (n + 1) + nFathers + nMothers)]
+
     #Remove leading whitespace
     offspring <- sub("^[\t\n\f\r ]*", "", offspring) #remove leading whitespace
     fathers <- sub("^[\t\n\f\r ]*", "", fathers) #remove leading whitespace
     mothers <- sub("^[\t\n\f\r ]*", "", mothers) #remove leading whitespace
     
-    offspring <- as.vector(sapply(offspring, function(x){gsub("([A-Za-z0-9]*)([!0-9A-Za-z ]*)", "\\1", x, perl = TRUE)})) #extract names
-    fathers <- as.vector(sapply(fathers, function(x){gsub("([A-Za-z0-9]*)([!0-9A-Za-z ]*)", "\\1", x, perl = TRUE)})) #extract names
-    mothers <- as.vector(sapply(mothers, function(x){gsub("([A-Za-z0-9]*)([!0-9A-Za-z ]*)", "\\1", x, perl = TRUE)})) #extract names
+    offspring <- as.vector(sapply(offspring, function(x){gsub("([A-Za-z0-9]*)([!0-9A-Za-z,/= ]*)", "\\1", x, perl = TRUE)})) #extract names
+    fathers <- as.vector(sapply(fathers, function(x){gsub("([A-Za-z0-9]*)([!0-9A-Za-z,/= ]*)", "\\1", x, perl = TRUE)})) #extract names
+    mothers <- as.vector(sapply(mothers, function(x){gsub("([A-Za-z0-9]*)([!0-9A-Za-z,/= ]*)", "\\1", x, perl = TRUE)})) #extract names
     
     if(n != length(offspring)){stop("Wrong number of offspring. Check your files.")}else{}
     
@@ -62,18 +87,18 @@
     ###################################################
     #assigned parentage (nonpairwise)
     ###################################################
-    mfile <- list.files(path = datadir, pattern = ".Maternity")[1]
-    if(length(mfile) > 2){
+    mfile <- list.files(path = datadir, pattern = "\\.Maternity")[1]
+    if(length(mfile) > 1){
     	warning("There are too many \"\ *.Maternity\" files in your project directory. \nYou should check them.")
     }
     
-    maternity <- read.table(paste(datadir, mfile, sep=""), header = TRUE)
+    maternity <- read.table(paste(datadir, mfile, sep=""), header = TRUE, fill = TRUE, na.strings = "")
     
-    pfile <- list.files(path = datadir, pattern = ".Paternity")[2]
-    if(length(pfile) > 2){
+    pfile <- list.files(path = datadir, pattern = "\\.Paternity")[1]
+    if(length(pfile) > 1){
     	warning("There are too many \"\ *.Paternity\" files in your project directory. \nYou should check them.")
     }
-    paternity <- read.table(paste(datadir, pfile, sep=""), header = TRUE)
+    paternity <- read.table(paste(datadir, pfile, sep=""), header = TRUE, fill = TRUE, na.strings = "")
     
     colony.object$maternity <- maternity
     colony.object$paternity <- paternity
@@ -81,17 +106,17 @@
     ###################################################
     #assigned parentage (pairwise)
     ###################################################
-    mfile <- list.files(path = datadir, pattern = ".PairwiseMaternity")[1]
+    mfile <- list.files(path = datadir, pattern = "\\.PairwiseMaternity")[1]
     if(length(mfile) > 1){
     	warning("There are too many \"\ *.PairwiseMaternity\" files in your project directory. \nYou should check them. \nUsing the first one.")
     }
-    pairwise.maternity <- read.table(paste(datadir, mfile, sep = ""), header = TRUE)
+    pairwise.maternity <- read.table(paste(datadir, mfile, sep = ""), header = TRUE, sep = ",")
     
-    pfile <- list.files(path = datadir, pattern = ".PairwisePaternity")[1]
+    pfile <- list.files(path = datadir, pattern = "\\.PairwisePaternity")[1]
     if(length(mfile) > 1){
     	warning("There are too many \"\ *.PairwisePaternity\" files in your project directory. \nYou should check them. \nUsing the first one.")
     }
-    pairwise.paternity <- read.table(paste(datadir, pfile, sep=""), header = TRUE)
+    pairwise.paternity <- read.table(paste(datadir, pfile, sep=""), header = TRUE, sep = ",")
     
     colony.object$pairwise.maternity <- pairwise.maternity
     colony.object$pairwise.paternity <- pairwise.paternity
@@ -101,14 +126,24 @@
     #Sibships (nonpairwise)
     ###################################################
     #Get full and half sibship data.
-    full.file <- list.files(path = datadir, pattern = ".FullSibDyad")[1]
-    half.file <- list.files(path = datadir, pattern = ".HalfSibDyad")[1]
+    full.file <- list.files(path = datadir, pattern = "\\.FullSibDyad")[1]
+    half.file <- list.files(path = datadir, pattern = "\\.HalfSibDyad")[1]
     
-    fullsibs <- read.table(paste(datadir, full.file, sep = ""), header = TRUE)
-    halfsibs <- read.table(paste(datadir, half.file, sep = ""), header = TRUE)
-    fullsibs$type <- rep("Full", dim(fullsibs)[1])
-    halfsibs$type <- rep("Half", dim(halfsibs)[1])
-    sibs <- rbind(fullsibs, halfsibs)
+    if(!is.na(full.file)){
+    	fullsibs <- read.table(paste(datadir, full.file, sep = ""), header = TRUE, sep = ",")
+    	fullsibs$type <- rep("Full", dim(fullsibs)[1])
+    	}else{
+    		fullsibs = NA
+    	}
+
+    if(!is.na(half.file)){
+    	halfsibs <- read.table(paste(datadir, half.file, sep = ""), header = TRUE, sep = ",")
+        halfsibs$type <- rep("Half", dim(halfsibs)[1])
+    	}else{
+    		halfsibs = NA
+    	}
+    
+    sibs <- na.omit(rbind(fullsibs, halfsibs))
     sibs$type <- as.factor(sibs$type)
     
     colony.object$sibs <- sibs
@@ -118,17 +153,27 @@
     ###################################################
     
     #Get full and half sibship data.
-    full.file <- list.files(path = datadir, pattern = ".PairwiseFullSibDyad")[1]
-    half.file <- list.files(path = datadir, pattern = ".PairwiseHalfSibDyad")[1]
+    full.file <- list.files(path = datadir, pattern = "\\.PairwiseFullSibDyad")[1]
+    half.file <- list.files(path = datadir, pattern = "\\.PairwiseHalfSibDyad")[1]
     
-    fullsibs <- read.table(paste(datadir, full.file, sep = ""), header = TRUE)
-    halfsibs <- read.table(paste(datadir, half.file, sep = ""), header = TRUE)
-    fullsibs$type <- rep("Full", dim(fullsibs)[1])
-    halfsibs$type <- rep("Half", dim(halfsibs)[1])
-    sibs <- rbind(fullsibs, halfsibs)
+    if(!is.na(full.file)){
+    	fullsibs <- read.table(paste(datadir, full.file, sep = ""), header = TRUE, sep = ",")
+    	fullsibs$type <- rep("Full", dim(fullsibs)[1])
+    	}else{
+    		fullsibs = NA
+    	}
+
+    if(!is.na(half.file)){
+    	halfsibs <- read.table(paste(datadir, half.file, sep = ""), header = TRUE, sep = ",")
+        halfsibs$type <- rep("Half", dim(halfsibs)[1])
+    	}else{
+    		halfsibs = NA
+    	}
+    
+    sibs <- na.omit(rbind(fullsibs, halfsibs))
     sibs$type <- as.factor(sibs$type)
     
-    colony.object$pairwise.sibs <- sibs
+    colony.object$sibs <- sibs
     
     return(colony.object)
     }