[Phylobase-commits] r354 - pkg/R

Sat Dec 20 00:43:19 CET 2008

Author: pdc
Date: 2008-12-20 00:43:19 +0100 (Sat, 20 Dec 2008)
New Revision: 354

Added:
   pkg/R/readNexus.R
Removed:
   pkg/R/ReadWithNCL.R
Log:
rename to match the changes in function name and to satisfy my ocd

Deleted: pkg/R/ReadWithNCL.R
===================================================================

--- pkg/R/ReadWithNCL.R	2008-12-19 23:35:38 UTC (rev 353)
+++ pkg/R/ReadWithNCL.R	2008-12-19 23:43:19 UTC (rev 354)
@@ -1,192 +0,0 @@
-readNexus <- function (file, simplify=TRUE, which=c("all","tree","data"), char.all=FALSE, polymorphic.convert=TRUE, levels.uniform=TRUE) {
-#file = input nexus file
-#simplify = 
-#which = specify whether to return trees+data as phylo4d object ("all") if 
-#        both are found, returning a data.frame or phylo4 object if only one
-#        is found, "tree": return a phylo4 object only, regardless of 
-#        whether there are data, "data": return a data.frame (no tree), even
-#        if a tree is present
-#char.all = if TRUE, includes even excluded chars in the nexus file
-#polymorphic.convert = if TRUE, convert polymorphic characters to missing 
-#                      characters
-#levels.uniform = if TRUE, categorical data are loaded with the same levels, 
-#             even if one character is missing a state
-	output<-c("Failure")
-	if (which=="all" || which=="data") {
-        params <- list(filename=file, allchar=char.all, polymorphictomissing=polymorphic.convert, levelsall=levels.uniform)
-        
-# Check that params is properly formatted.
-        if(!is.list(params) || length(params) == 0) {
-            stop("The params parameter must be a non-empty list")
-        }
-        
-        incharsstring <- .Call("ReadCharsWithNCL",params,
-                               PACKAGE="phylobase")
-#print(incharsstring)
-        tipdata<-eval(parse(text=incharsstring))	
-    }
-    if (which=="all" || which=="tree") {
-        trees<-c("Failure");
-        params <- list(filename=file)
-        
-# Check that params is properly formatted.
-        if(!is.list(params) || length(params) == 0) {
-            stop("The params parameter must be a non-empty list");
-        }
-        
-# Finally ready to make the call...
-        intreesstring <- .Call("ReadTreesWithNCL", params,
-                               PACKAGE="phylobase")
-        
-        intreesphylolist <- read.nexustreestring(intreesstring);
-        if (length(intreesphylolist)>1 || !simplify) {
-            trees<-list()
-            for (i in 1:length(intreesphylolist)) {
-                trees[[i]]<-as(intreesphylolist[[i]], "phylo4");
-            }
-        }
-        else {
-            trees<-as(intreesphylolist[[1]], "phylo4");
-        }
-    }
-    if (which=="tree" || length(tipdata) == 0 ) { 
-        output<-trees;
-    }
-    else if (which=="data") {
-        output<-tipdata
-    }
-    else {
-        if (length(intreesphylolist)>1 || !simplify) {
-            output<-list()
-            for (i in 1:length(intreesphylolist)) {
-                output[[i]]<-phylo4d(as(intreesphylolist[[i]], "phylo4"), tip.data = tipdata)
-            }
-        }
-        else {
-            output<-phylo4d(as(intreesphylolist[[1]], "phylo4"), tip.data = tipdata)
-        }        
-    }
-    
-	output
-}
-
-read.nexustreestring <- function(X)
-{
-#Returns list of phylo objects (not multi.phylo, and always a list, even if there is only one element
-#X is a character vector, each element is one line from a treefile
-#This is based almost entirely on read.nexus from APE (Emmanuel Paradis).
-	
-    X<-unlist(strsplit(unlist(X),c("\n")))
-    
-## first remove all the comments
-    
-## BCO took out the "speedier removal of comments" code -- it keeps [&R] as a node label, replaced it with original APE code
-## speedier removal of comments pc 13 April 2008
-##X <- lapply(X, gsub, pattern = "\\[[^\\]]*\\]", replacement = "")
-    
-    LEFT <- grep("\\[", X)
-    RIGHT <- grep("\\]", X)
-    if (length(LEFT)) { # in case there are no comments at all
-        w <- LEFT == RIGHT
-        if (any(w)) { # in case all comments use at least 2 lines
-            s <- LEFT[w]
-            X[s] <- gsub("\\[[^]]*\\]", "", X[s])
-## The above regexp was quite tough to find: it makes
-## possible to delete series of comments on the same line:
-##       ...[...]xxx[...]...
-## without deleting the "xxx". This regexp is in three parts:
-##       \\[      [^]]*       \\]
-## where [^]]* means "any character, except "]", repeated zero
-## or more times" (note that the ']' is not escaped here).
-## The previous version was:
-##       X[s] <- gsub("\\[.*\\]", "", X[s])
-## which deleted the "xxx". (EP  2008-06-24)
-        }
-        w <- !w
-        if (any(w)) {
-            s <- LEFT[w]
-            X[s] <- gsub("\\[.*", "", X[s])
-            sb <- RIGHT[w]
-            X[sb] <- gsub(".*\\]", "", X[sb])
-            if (any(s < sb - 1))
-            X <- X[-unlist(mapply(":", (s + 1), (sb - 1)))]
-        }
-    }
-    endblock <- grep("END;|ENDBLOCK;", X, ignore.case = TRUE)
-    semico <- grep(";", X)
-    i1 <- grep("BEGIN TREES;", X, ignore.case = TRUE)
-    i2 <- grep("TRANSLATE", X, ignore.case = TRUE)
-    translation <- if (length(i2) == 1 && i2 > i1) TRUE else FALSE
-    if (translation) {
-        end <- semico[semico > i2][1]
-        x <- X[(i2 + 1):end] # assumes there's a 'new line' after "TRANSLATE"
-        ## x <- gsub("TRANSLATE", "", x, ignore.case = TRUE)
-        x <- unlist(strsplit(x, "[,; \t]"))
-        x <- x[nzchar(x)]
-        TRANS <- matrix(x, ncol = 2, byrow = TRUE)
-        TRANS[, 2] <- gsub("['\"]", "", TRANS[, 2])
-        n <- dim(TRANS)[1]
-    }
-    start <-
-    if (translation) semico[semico > i2][1] + 1
-    else semico[semico > i1][1]
-    end <- endblock[endblock > i1][1] - 1
-    tree <- X[start:end]
-    rm(X)
-    tree <- gsub("^.*= *", "", tree)
-    semico <- grep(";", tree)
-    Ntree <- length(semico)
-## are some trees on several lines?
-    if (any(diff(semico) != 1)) {
-        STRING <- character(Ntree)
-        s <- c(1, semico[-Ntree] + 1)
-        j <- mapply(":", s, semico)
-        for (i in 1:Ntree)
-        STRING[i] <- paste(tree[j[, i]], collapse = "")
-    } else STRING <- tree
-    rm(tree)
-    STRING <- gsub(" ", "", STRING)
-    colon <- grep(":", STRING)
-    if (!length(colon)) {
-#TODO: recode clado.build & .treeBuildWithTokens from ape to phylobase
-        trees <- lapply(STRING, clado.build)
-    } else if (length(colon) == Ntree) {
-        trees <-
-        if (translation) lapply(STRING, .treeBuildWithTokens)
-        else lapply(STRING, tree.build)
-    } else {
-        trees <- vector("list", Ntree)
-        trees[colon] <- lapply(STRING[colon], tree.build)
-        nocolon <- (1:Ntree)[!1:Ntree %in% colon]
-        trees[nocolon] <- lapply(STRING[nocolon], clado.build)
-        if (translation) {
-            for (i in 1:Ntree) {
-                tr <- trees[[i]]
-                for (j in 1:n) {
-                    ind <- which(tr$tip.label[j] == TRANS[, 1])
-                    tr$tip.label[j] <- TRANS[ind, 2]
-                }
-                if (!is.null(tr$node.label)) {
-                    for (j in 1:length(tr$node.label)) {
-                        ind <- which(tr$node.label[j] == TRANS[, 1])
-                        tr$node.label[j] <- TRANS[ind, 2]
-                    }
-                }
-                trees[[i]] <- tr
-            }
-            translation <- FALSE
-        }
-    }
-    for (i in 1:Ntree) {
-        tr <- trees[[i]]
-## Check here that the root edge is not incorrectly represented
-## in the object of class "phylo" by simply checking that there
-## is a bifurcation at the root
-        if (!translation) n <- length(tr$tip.label)
-        ROOT <- n + 1
-        if (sum(tr$edge[, 1] == ROOT) == 1 && dim(tr$edge)[1] > 1) {
-            stop(paste("There is apparently two root edges in your file: cannot read tree file.\n  Reading NEXUS file aborted at tree no.", i, sep = ""))
-        }
-    }
-    trees
-}
\ No newline at end of file

Copied: pkg/R/readNexus.R (from rev 353, pkg/R/ReadWithNCL.R)
===================================================================
--- pkg/R/readNexus.R	                        (rev 0)
+++ pkg/R/readNexus.R	2008-12-19 23:43:19 UTC (rev 354)
@@ -0,0 +1,192 @@
+readNexus <- function (file, simplify=TRUE, which=c("all","tree","data"), char.all=FALSE, polymorphic.convert=TRUE, levels.uniform=TRUE) {
+#file = input nexus file
+#simplify = 
+#which = specify whether to return trees+data as phylo4d object ("all") if 
+#        both are found, returning a data.frame or phylo4 object if only one
+#        is found, "tree": return a phylo4 object only, regardless of 
+#        whether there are data, "data": return a data.frame (no tree), even
+#        if a tree is present
+#char.all = if TRUE, includes even excluded chars in the nexus file
+#polymorphic.convert = if TRUE, convert polymorphic characters to missing 
+#                      characters
+#levels.uniform = if TRUE, categorical data are loaded with the same levels, 
+#             even if one character is missing a state
+	output<-c("Failure")
+	if (which=="all" || which=="data") {
+        params <- list(filename=file, allchar=char.all, polymorphictomissing=polymorphic.convert, levelsall=levels.uniform)
+        
+# Check that params is properly formatted.
+        if(!is.list(params) || length(params) == 0) {
+            stop("The params parameter must be a non-empty list")
+        }
+        
+        incharsstring <- .Call("ReadCharsWithNCL",params,
+                               PACKAGE="phylobase")
+#print(incharsstring)
+        tipdata<-eval(parse(text=incharsstring))	
+    }
+    if (which=="all" || which=="tree") {
+        trees<-c("Failure");
+        params <- list(filename=file)
+        
+# Check that params is properly formatted.
+        if(!is.list(params) || length(params) == 0) {
+            stop("The params parameter must be a non-empty list");
+        }
+        
+# Finally ready to make the call...
+        intreesstring <- .Call("ReadTreesWithNCL", params,
+                               PACKAGE="phylobase")
+        
+        intreesphylolist <- read.nexustreestring(intreesstring);
+        if (length(intreesphylolist)>1 || !simplify) {
+            trees<-list()
+            for (i in 1:length(intreesphylolist)) {
+                trees[[i]]<-as(intreesphylolist[[i]], "phylo4");
+            }
+        }
+        else {
+            trees<-as(intreesphylolist[[1]], "phylo4");
+        }
+    }
+    if (which=="tree" || length(tipdata) == 0 ) { 
+        output<-trees;
+    }
+    else if (which=="data") {
+        output<-tipdata
+    }
+    else {
+        if (length(intreesphylolist)>1 || !simplify) {
+            output<-list()
+            for (i in 1:length(intreesphylolist)) {
+                output[[i]]<-phylo4d(as(intreesphylolist[[i]], "phylo4"), tip.data = tipdata)
+            }
+        }
+        else {
+            output<-phylo4d(as(intreesphylolist[[1]], "phylo4"), tip.data = tipdata)
+        }        
+    }
+    
+	output
+}
+
+read.nexustreestring <- function(X)
+{
+#Returns list of phylo objects (not multi.phylo, and always a list, even if there is only one element
+#X is a character vector, each element is one line from a treefile
+#This is based almost entirely on read.nexus from APE (Emmanuel Paradis).
+	
+    X<-unlist(strsplit(unlist(X),c("\n")))
+    
+## first remove all the comments
+    
+## BCO took out the "speedier removal of comments" code -- it keeps [&R] as a node label, replaced it with original APE code
+## speedier removal of comments pc 13 April 2008
+##X <- lapply(X, gsub, pattern = "\\[[^\\]]*\\]", replacement = "")
+    
+    LEFT <- grep("\\[", X)
+    RIGHT <- grep("\\]", X)
+    if (length(LEFT)) { # in case there are no comments at all
+        w <- LEFT == RIGHT
+        if (any(w)) { # in case all comments use at least 2 lines
+            s <- LEFT[w]
+            X[s] <- gsub("\\[[^]]*\\]", "", X[s])
+## The above regexp was quite tough to find: it makes
+## possible to delete series of comments on the same line:
+##       ...[...]xxx[...]...
+## without deleting the "xxx". This regexp is in three parts:
+##       \\[      [^]]*       \\]
+## where [^]]* means "any character, except "]", repeated zero
+## or more times" (note that the ']' is not escaped here).
+## The previous version was:
+##       X[s] <- gsub("\\[.*\\]", "", X[s])
+## which deleted the "xxx". (EP  2008-06-24)
+        }
+        w <- !w
+        if (any(w)) {
+            s <- LEFT[w]
+            X[s] <- gsub("\\[.*", "", X[s])
+            sb <- RIGHT[w]
+            X[sb] <- gsub(".*\\]", "", X[sb])
+            if (any(s < sb - 1))
+            X <- X[-unlist(mapply(":", (s + 1), (sb - 1)))]
+        }
+    }
+    endblock <- grep("END;|ENDBLOCK;", X, ignore.case = TRUE)
+    semico <- grep(";", X)
+    i1 <- grep("BEGIN TREES;", X, ignore.case = TRUE)
+    i2 <- grep("TRANSLATE", X, ignore.case = TRUE)
+    translation <- if (length(i2) == 1 && i2 > i1) TRUE else FALSE
+    if (translation) {
+        end <- semico[semico > i2][1]
+        x <- X[(i2 + 1):end] # assumes there's a 'new line' after "TRANSLATE"
+        ## x <- gsub("TRANSLATE", "", x, ignore.case = TRUE)
+        x <- unlist(strsplit(x, "[,; \t]"))
+        x <- x[nzchar(x)]
+        TRANS <- matrix(x, ncol = 2, byrow = TRUE)
+        TRANS[, 2] <- gsub("['\"]", "", TRANS[, 2])
+        n <- dim(TRANS)[1]
+    }
+    start <-
+    if (translation) semico[semico > i2][1] + 1
+    else semico[semico > i1][1]
+    end <- endblock[endblock > i1][1] - 1
+    tree <- X[start:end]
+    rm(X)
+    tree <- gsub("^.*= *", "", tree)
+    semico <- grep(";", tree)
+    Ntree <- length(semico)
+## are some trees on several lines?
+    if (any(diff(semico) != 1)) {
+        STRING <- character(Ntree)
+        s <- c(1, semico[-Ntree] + 1)
+        j <- mapply(":", s, semico)
+        for (i in 1:Ntree)
+        STRING[i] <- paste(tree[j[, i]], collapse = "")
+    } else STRING <- tree
+    rm(tree)
+    STRING <- gsub(" ", "", STRING)
+    colon <- grep(":", STRING)
+    if (!length(colon)) {
+#TODO: recode clado.build & .treeBuildWithTokens from ape to phylobase
+        trees <- lapply(STRING, clado.build)
+    } else if (length(colon) == Ntree) {
+        trees <-
+        if (translation) lapply(STRING, .treeBuildWithTokens)
+        else lapply(STRING, tree.build)
+    } else {
+        trees <- vector("list", Ntree)
+        trees[colon] <- lapply(STRING[colon], tree.build)
+        nocolon <- (1:Ntree)[!1:Ntree %in% colon]
+        trees[nocolon] <- lapply(STRING[nocolon], clado.build)
+        if (translation) {
+            for (i in 1:Ntree) {
+                tr <- trees[[i]]
+                for (j in 1:n) {
+                    ind <- which(tr$tip.label[j] == TRANS[, 1])
+                    tr$tip.label[j] <- TRANS[ind, 2]
+                }
+                if (!is.null(tr$node.label)) {
+                    for (j in 1:length(tr$node.label)) {
+                        ind <- which(tr$node.label[j] == TRANS[, 1])
+                        tr$node.label[j] <- TRANS[ind, 2]
+                    }
+                }
+                trees[[i]] <- tr
+            }
+            translation <- FALSE
+        }
+    }
+    for (i in 1:Ntree) {
+        tr <- trees[[i]]
+## Check here that the root edge is not incorrectly represented
+## in the object of class "phylo" by simply checking that there
+## is a bifurcation at the root
+        if (!translation) n <- length(tr$tip.label)
+        ROOT <- n + 1
+        if (sum(tr$edge[, 1] == ROOT) == 1 && dim(tr$edge)[1] > 1) {
+            stop(paste("There is apparently two root edges in your file: cannot read tree file.\n  Reading NEXUS file aborted at tree no.", i, sep = ""))
+        }
+    }
+    trees
+}
\ No newline at end of file