[Phylobase-commits] r322 - pkg/R

noreply at r-forge.r-project.org noreply at r-forge.r-project.org
Fri Dec 19 08:07:28 CET 2008


Author: bcomeara
Date: 2008-12-19 08:07:28 +0100 (Fri, 19 Dec 2008)
New Revision: 322

Modified:
   pkg/R/ReadWithNCL.R
Log:
New more general function for reading from nexus files

Modified: pkg/R/ReadWithNCL.R
===================================================================
--- pkg/R/ReadWithNCL.R	2008-12-19 05:23:16 UTC (rev 321)
+++ pkg/R/ReadWithNCL.R	2008-12-19 07:07:28 UTC (rev 322)
@@ -1,89 +1,89 @@
-
-NexusToPhylo4 <- function(fileToRead,multi=FALSE) {
-#This returns a single phylo4 object if the nexus file has one tree; 
-#it returns a list of phylo4 objects if the nexus file has more than one or multi==TRUE
-#Note that a list of phylo4 objects is not the same as a multiphylo4d object
-	trees<-c("Failure");
-	filename<-fileToRead;
-	params <- list(filename=fileToRead)
-	
+readNexus <- function (file, simplify=TRUE, which=c("all","tree","data"), char.all=FALSE, polymorphic.convert=TRUE, levels.uniform=TRUE) {
+#file = input nexus file
+#simplify = 
+#which = specify whether to return trees+data as phylo4d object ("all") if 
+#        both are found, returning a data.frame or phylo4 object if only one
+#        is found, "tree": return a phylo4 object only, regardless of 
+#        whether there are data, "data": return a data.frame (no tree), even
+#        if a tree is present
+#char.all = if TRUE, includes even excluded chars in the nexus file
+#polymorphic.convert = if TRUE, convert polymorphic characters to missing 
+#                      characters
+#levels.uniform = if TRUE, categorical data are loaded with the same levels, 
+#             even if one character is missing a state
+	output<-c("Failure")
+	if (which=="all" || which=="data") {
+        params <- list(filename=file, allchar=char.all, polymorphictomissing=polymorphic.convert, levelsall=levels.uniform)
+        
 # Check that params is properly formatted.
-	if(!is.list(params) || length(params) == 0) {
-		stop("The params parameter must be a non-empty list");
-	}
-	
+        if(!is.list(params) || length(params) == 0) {
+            stop("The params parameter must be a non-empty list")
+        }
+        
+        incharsstring <- .Call("ReadCharsWithNCL",params,
+                               PACKAGE="phylobase")
+#print(incharsstring)
+        tipdata<-eval(parse(text=incharsstring))	
+    }
+    if (which=="all" || which=="tree") {
+        trees<-c("Failure");
+        params <- list(filename=file)
+        
+# Check that params is properly formatted.
+        if(!is.list(params) || length(params) == 0) {
+            stop("The params parameter must be a non-empty list");
+        }
+        
 # Finally ready to make the call...
-    intreesstring <- .Call("ReadTreesWithNCL", params,
-						   PACKAGE="phylobase")
-	
-	intreesphylolist <- read.nexustreestring(intreesstring);
-	if (length(intreesphylolist)>1 || multi) {
-		trees<-list()
-		for (i in 1:length(intreesphylolist)) {
-			trees[[i]]<-as(intreesphylolist[[i]], "phylo4");
-		}
-	}
-	else {
-		trees<-as(intreesphylolist[[1]], "phylo4");
-	}
-	trees
-}
-
-NexusToDataFrame <- function(fileToRead,allchar=FALSE, polymorphictomissing=TRUE, levelsall=TRUE) {
-#This returns a single phylo4D object if the nexus file has one tree; 
-#it returns a list of phylo4D objects if the nexus file has more than one or multi==TRUE
-#Note that a list of phylo4D objects is not the same as a multiphylo4d object
-	output<-c("Failure")
-	filename<-fileToRead
-	params <- list(filename=fileToRead, allchar=allchar, polymorphictomissing=polymorphictomissing, levelsall=levelsall)
-	
-# Check that params is properly formatted.
-	if(!is.list(params) || length(params) == 0) {
-		stop("The params parameter must be a non-empty list")
-	}
-	
-	incharsstring <- .Call("ReadCharsWithNCL",params,
-						   PACKAGE="phylobase")
-	print(incharsstring)
-	tipdata<-eval(parse(text=incharsstring))
-	tipdata
-}
-
-
-NexusToPhylo4D <- function(fileToRead,multi=FALSE,allchar=FALSE, polymorphictomissing=TRUE, levelsall=TRUE) {
-#This returns a single phylo4D object if the nexus file has one tree; 
-#it returns a list of phylo4D objects if the nexus file has more than one or multi==TRUE
-#Note that a list of phylo4D objects is not the same as a multiphylo4d object
-	output<-c("Failure")
-	tipdata<-NexusToDataFrame(fileToRead,allchar, polymorphictomissing, levelsall);
-	intreesphylolist<-NexusToPhylo4(fileToRead,multi=TRUE);
-	if (length(intreesphylolist)>1 || multi) {
-		output<-list()
-		for (i in 1:length(intreesphylolist)) {
-			output[[i]]<-phylo4d(as(intreesphylolist[[i]], "phylo4"), tip.data = tipdata)
-		}
-	}
-	else {
-		output<-phylo4d(as(intreesphylolist[[1]], "phylo4"), tip.data = tipdata)
-	}
+        intreesstring <- .Call("ReadTreesWithNCL", params,
+                               PACKAGE="phylobase")
+        
+        intreesphylolist <- read.nexustreestring(intreesstring);
+        if (length(intreesphylolist)>1 || !simplify) {
+            trees<-list()
+            for (i in 1:length(intreesphylolist)) {
+                trees[[i]]<-as(intreesphylolist[[i]], "phylo4");
+            }
+        }
+        else {
+            trees<-as(intreesphylolist[[1]], "phylo4");
+        }
+    }
+    if (which=="tree" || length(tipdata) == 0 ) { 
+        output<-trees;
+    }
+    else if (which=="data") {
+        output<-tipdata
+    }
+    else {
+        if (length(intreesphylolist)>1 || !simplify) {
+            output<-list()
+            for (i in 1:length(intreesphylolist)) {
+                output[[i]]<-phylo4d(as(intreesphylolist[[i]], "phylo4"), tip.data = tipdata)
+            }
+        }
+        else {
+            output<-phylo4d(as(intreesphylolist[[1]], "phylo4"), tip.data = tipdata)
+        }        
+    }
+    
 	output
 }
 
-
 read.nexustreestring <- function(X)
 {
 #Returns list of phylo objects (not multi.phylo, and always a list, even if there is only one element
 #X is a character vector, each element is one line from a treefile
 #This is based almost entirely on read.nexus from APE (Emmanuel Paradis).
 	
-		X<-unlist(strsplit(unlist(X),c("\n")))
-
+    X<-unlist(strsplit(unlist(X),c("\n")))
+    
 ## first remove all the comments
-
+    
 ## BCO took out the "speedier removal of comments" code -- it keeps [&R] as a node label, replaced it with original APE code
 ## speedier removal of comments pc 13 April 2008
 ##X <- lapply(X, gsub, pattern = "\\[[^\\]]*\\]", replacement = "")
-
+    
     LEFT <- grep("\\[", X)
     RIGHT <- grep("\\]", X)
     if (length(LEFT)) { # in case there are no comments at all
@@ -91,16 +91,16 @@
         if (any(w)) { # in case all comments use at least 2 lines
             s <- LEFT[w]
             X[s] <- gsub("\\[[^]]*\\]", "", X[s])
-            ## The above regexp was quite tough to find: it makes
-            ## possible to delete series of comments on the same line:
-            ##       ...[...]xxx[...]...
-            ## without deleting the "xxx". This regexp is in three parts:
-            ##       \\[      [^]]*       \\]
-            ## where [^]]* means "any character, except "]", repeated zero
-            ## or more times" (note that the ']' is not escaped here).
-            ## The previous version was:
-            ##       X[s] <- gsub("\\[.*\\]", "", X[s])
-            ## which deleted the "xxx". (EP  2008-06-24)
+## The above regexp was quite tough to find: it makes
+## possible to delete series of comments on the same line:
+##       ...[...]xxx[...]...
+## without deleting the "xxx". This regexp is in three parts:
+##       \\[      [^]]*       \\]
+## where [^]]* means "any character, except "]", repeated zero
+## or more times" (note that the ']' is not escaped here).
+## The previous version was:
+##       X[s] <- gsub("\\[.*\\]", "", X[s])
+## which deleted the "xxx". (EP  2008-06-24)
         }
         w <- !w
         if (any(w)) {
@@ -109,7 +109,7 @@
             sb <- RIGHT[w]
             X[sb] <- gsub(".*\\]", "", X[sb])
             if (any(s < sb - 1))
-                X <- X[-unlist(mapply(":", (s + 1), (sb - 1)))]
+            X <- X[-unlist(mapply(":", (s + 1), (sb - 1)))]
         }
     }
     endblock <- grep("END;|ENDBLOCK;", X, ignore.case = TRUE)
@@ -128,32 +128,32 @@
         n <- dim(TRANS)[1]
     }
     start <-
-        if (translation) semico[semico > i2][1] + 1
-        else semico[semico > i1][1]
+    if (translation) semico[semico > i2][1] + 1
+    else semico[semico > i1][1]
     end <- endblock[endblock > i1][1] - 1
     tree <- X[start:end]
     rm(X)
     tree <- gsub("^.*= *", "", tree)
     semico <- grep(";", tree)
     Ntree <- length(semico)
-    ## are some trees on several lines?
+## are some trees on several lines?
     if (any(diff(semico) != 1)) {
         STRING <- character(Ntree)
         s <- c(1, semico[-Ntree] + 1)
         j <- mapply(":", s, semico)
         for (i in 1:Ntree)
-            STRING[i] <- paste(tree[j[, i]], collapse = "")
+        STRING[i] <- paste(tree[j[, i]], collapse = "")
     } else STRING <- tree
     rm(tree)
     STRING <- gsub(" ", "", STRING)
     colon <- grep(":", STRING)
     if (!length(colon)) {
-    	#TODO: recode clado.build & .treeBuildWithTokens from ape to phylobase
+#TODO: recode clado.build & .treeBuildWithTokens from ape to phylobase
         trees <- lapply(STRING, clado.build)
     } else if (length(colon) == Ntree) {
         trees <-
-            if (translation) lapply(STRING, .treeBuildWithTokens)
-            else lapply(STRING, tree.build)
+        if (translation) lapply(STRING, .treeBuildWithTokens)
+        else lapply(STRING, tree.build)
     } else {
         trees <- vector("list", Ntree)
         trees[colon] <- lapply(STRING[colon], tree.build)
@@ -179,9 +179,9 @@
     }
     for (i in 1:Ntree) {
         tr <- trees[[i]]
-        ## Check here that the root edge is not incorrectly represented
-        ## in the object of class "phylo" by simply checking that there
-        ## is a bifurcation at the root
+## Check here that the root edge is not incorrectly represented
+## in the object of class "phylo" by simply checking that there
+## is a bifurcation at the root
         if (!translation) n <- length(tr$tip.label)
         ROOT <- n + 1
         if (sum(tr$edge[, 1] == ROOT) == 1 && dim(tr$edge)[1] > 1) {



More information about the Phylobase-commits mailing list