[Phylobase-commits] r798 - in pkg: . R inst/nexusfiles man src

noreply at r-forge.r-project.org noreply at r-forge.r-project.org
Thu Jul 15 10:40:05 CEST 2010


Author: francois
Date: 2010-07-15 10:40:04 +0200 (Thu, 15 Jul 2010)
New Revision: 798

Added:
   pkg/R/readNCL.R
   pkg/src/GetNCL.cpp
Removed:
   pkg/man/read.nexustreestring.Rd
   pkg/src/NCLInterface.cpp
   pkg/src/NCLInterface.h
   pkg/src/ReadWithNCL.cpp
Modified:
   pkg/DESCRIPTION
   pkg/NAMESPACE
   pkg/R/readNexus.R
   pkg/inst/nexusfiles/treeWithPolyExcludedData.nex
   pkg/man/readNexus.Rd
   pkg/man/subset-methods.Rd
Log:
Change the interface between NCL and phylobase. The interface (GetNCL.cpp) now returns a list that contains the information of the NEXUS file instead of a string of characters.

Modified: pkg/DESCRIPTION
===================================================================
--- pkg/DESCRIPTION	2010-07-15 08:37:24 UTC (rev 797)
+++ pkg/DESCRIPTION	2010-07-15 08:40:04 UTC (rev 798)
@@ -1,13 +1,13 @@
 Package: phylobase
 Type: Package
 Title: Base package for phylogenetic structures and comparative data
-Version: 0.5.11
-Date: 2010-04-12
+Version: 0.6.0
+Date: 2010-07-15
 Depends: methods, grid, ape(>= 2.1), Rcpp (>= 0.7.4)
 Suggests: ade4, MASS
 Author: R Hackathon et al. (alphabetically: Ben Bolker, Marguerite Butler, Peter Cowan,  Damien de Vienne, Dirk Eddelbuettel, Mark Holder, Thibaut Jombart, Steve Kembel, Francois Michonneau, David Orme, Brian O'Meara, Emmanuel Paradis, Jim Regetz, Derrick Zwickl)
 Maintainer:  Ben Bolker <bolker at ufl.edu>
 Description: Provides a base S4 class for comparative methods, incorporating one or more trees and trait data
 License: GPL (>= 2)
-Collate: phylo4.R checkdata.R formatData.R class-multiphylo4.R class-oldclasses.R class-phylo4.R class-phylo4d.R  class-phylomats.R methods-multiphylo4.R methods-oldclasses.R methods-phylo4.R methods-phylo4d.R setAs-Methods.R pdata.R subset.R phylobase.options.R prune.R treePlot.R treestruc.R treewalk.R readNexus.R tbind.R zzz.R
+Collate: phylo4.R checkdata.R formatData.R class-multiphylo4.R class-oldclasses.R class-phylo4.R class-phylo4d.R  class-phylomats.R methods-multiphylo4.R methods-oldclasses.R methods-phylo4.R methods-phylo4d.R setAs-Methods.R pdata.R subset.R phylobase.options.R prune.R treePlot.R treestruc.R treewalk.R readNexus.R readNCL.R tbind.R zzz.R
 URL: http://phylobase.R-forge.R-project.org

Modified: pkg/NAMESPACE
===================================================================
--- pkg/NAMESPACE	2010-07-15 08:37:24 UTC (rev 797)
+++ pkg/NAMESPACE	2010-07-15 08:40:04 UTC (rev 798)
@@ -60,7 +60,8 @@
     MRCA, shortestPath, getEdge)
 
 # Nexus functions
-export(readNexus, read.nexustreestring)
+export(readNexus)
+export(readNCL)
 
 # pdata functions
 export(pdata, check_pdata)

Added: pkg/R/readNCL.R
===================================================================
--- pkg/R/readNCL.R	                        (rev 0)
+++ pkg/R/readNCL.R	2010-07-15 08:40:04 UTC (rev 798)
@@ -0,0 +1,179 @@
+readNCL <- function(file, simplify=FALSE, type=c("all", "tree", "data"),
+                    char.all=FALSE, polymorphic.convert=TRUE,
+                    levels.uniform=TRUE, quiet=TRUE,
+                    check.node.labels=c("keep", "drop", "asdata"),
+                    return.labels=TRUE, ...) {
+
+
+ type <- match.arg(type)
+ check.node.labels <- match.arg(check.node.labels)
+
+
+ 
+ if (type == "all" || type == "data") {
+   returnData <- TRUE
+ }
+ else {
+   returnData <- FALSE
+ }
+ if (type == "all" || type == "tree") {
+   returnTrees <- TRUE
+ }
+ else {
+   returnTrees <- FALSE
+ }
+
+ fileName <- list(fileName=file)
+ parameters <- c(char.all, polymorphic.convert, levels.uniform, returnTrees, returnData)
+
+ ## GetNCL returns a list containing:
+ ##  $taxaNames: names of the taxa (from taxa block, implied or declared)
+ ##  $treeNames: the names of the trees
+ ##  $trees: a vector of (untranslated) Newick strings
+ ##  $dataTypes: data type for each character block of the nexus file (length = number of chr blocks)
+ ##  $nbCharacters: number of characters in each block (length = number of chr blocks)
+ ##  $charLabels: the labels for the characters, i.e. the headers of the data frame to be returned
+ ##    (length = number of chr blocks * sum of number of characters in each block)
+ ##  $nbStates: the number of states of each character (equals 0 for non-standard types, length = number
+ ##    of characters)
+ ##  $stateLabels: the labels for the states of the characters, i.e. the levels of the factors to be returned
+ ##  $dataChr: string that contains the data to be returned
+ ncl <- .Call("GetNCL", fileName, parameters, PACKAGE="phylobase")
+ 
+ if (!quiet) print(ncl)
+
+ ## Disclaimer
+ if (!length(grep("\\{", ncl$dataChr)) && return.labels && !polymorphic.convert) {
+   stop("At this stage, it's not possible to use the combination: ",
+        "return.labels=TRUE and polymorphic.convert=FALSE for datasets ",
+        "that contain polymorphic characters.")
+ }
+ 
+ if (returnData && length(ncl$dataChr)) {
+   tipData <- vector("list", length(ncl$dataChr))
+   for (iBlock in 1:length(ncl$dataTypes)) {
+     chrCounter <- ifelse(iBlock == 1, 0, sum(ncl$nbCharacters[1:(iBlock-1)]))
+     if (ncl$dataTypes[iBlock] == "Continuous") {       
+       for (iChar in 1:ncl$nbCharacters[iBlock]) {
+         i <- chrCounter + iChar
+         tipData[[i]] <- eval(parse(text=ncl$dataChr[i]))
+         names(tipData)[i] <- ncl$charLabels[i]
+       }
+     }
+     else {
+ 
+       if (ncl$dataTypes[iBlock] == "Standard") {
+         iForBlock <- integer(0)
+         for (iChar in 1:ncl$nbCharacters[iBlock]) {      
+           i <- chrCounter + iChar
+           iForBlock <- c(iForBlock, i)
+           lblCounterMin <- ifelse(i == 1, 1, sum(ncl$nbStates[1:(i-1)]) + 1)
+           lblCounter <- seq(lblCounterMin, length.out=ncl$nbStates[i])
+           tipData[[i]] <- eval(parse(text=ncl$dataChr[i]))
+           names(tipData)[i] <- ncl$charLabels[i]
+           tipData[[i]] <- as.factor(tipData[[i]])
+           if (return.labels) {
+             levels(tipData[[i]]) <- ncl$stateLabels[lblCounter]
+           }          
+         }
+         if (levels.uniform) {
+           allLevels <- character(0)
+           for (j in iForBlock) {
+             allLevels <- union(allLevels, levels(tipData[[j]]))
+           }
+           for (j in iForBlock) {
+             levels(tipData[[j]]) <- allLevels
+           }
+         }
+       }
+       else {
+         warning("This datatype is not currently supported by phylobase")
+         next
+         ## FIXME: different datatypes in a same file isn't going to work
+       }
+     }
+   }
+   tipData <- data.frame(tipData)
+   if (length(ncl$taxaNames) == nrow(tipData)) {
+     rownames(tipData) <- ncl$taxaNames
+   }
+   else stop("phylobase doesn't deal with multiple taxa block at this time.")
+ }
+ else {
+   tipData <- NULL
+ }
+
+ if (returnTrees && length(ncl$trees) > 0) {
+   listTrees <- vector("list", length(ncl$trees))
+   for (i in 1:length(ncl$trees)) {
+     if (length(grep(":", ncl$trees[i]))) {
+       listTrees[[i]] <- tree.build(ncl$trees[i])
+     }
+     else {
+       listTrees[[i]] <- clado.build(ncl$trees[i])
+     }
+   }
+   listTrees <- lapply(listTrees, function(tr) {       
+     if (length(ncl$taxaNames) == nTips(tr)) {
+       tr$tip.label <- ncl$taxaNames[as.numeric(tr$tip.label)]     
+     }
+     else stop("phylobase doesn't deal with multiple taxa block at this time.")
+     if (is.null(tr$node.label)) {
+       if (check.node.labels == "asdata") {
+         warning("Could not use value \"asdata\" for ",
+                 "check.node.labels because there are no ",
+                 "labels associated with the tree")
+         check.node.labels <- "drop"
+       }
+       tr <- phylo4(tr, check.node.labels=check.node.labels, ...)       
+     }
+     else {
+       tr <- phylo4d(tr, check.node.labels=check.node.labels, ...)
+     }
+   })
+   if (length(listTrees) == 1 || simplify)
+     listTrees <- listTrees[[1]]
+ }
+ else {
+   listTrees <- NULL
+ }
+ 
+ ###
+ switch(type,
+        "data" = {
+          if (is.null(tipData)) {
+            toRet <- NULL
+          }
+          else {
+            toRet <- tipData
+          }
+        },
+        "tree" = {
+          if (is.null(listTrees)) {
+            toRet <- NULL
+          }
+          else {           
+            toRet <- listTrees                                          
+          }
+        },
+        "all" = {
+          if (is.null(tipData) && is.null(listTrees)) {
+            toRet <- NULL
+          }
+          else if (is.null(tipData)) {              
+            toRet <- listTrees
+          }
+          else if (is.null(listTrees)) {
+            toRet <- tipData
+          }
+          else {
+            if (length(listTrees) > 1) {              
+              toRet <- lapply(listTrees, function(tr)
+                              addData(tr, tip.data=tipData, ...))
+            }
+            else toRet <- addData(listTrees, tip.data=tipData, ...)
+          }
+        })
+ toRet
+}
+ 

Modified: pkg/R/readNexus.R
===================================================================
--- pkg/R/readNexus.R	2010-07-15 08:37:24 UTC (rev 797)
+++ pkg/R/readNexus.R	2010-07-15 08:40:04 UTC (rev 798)
@@ -4,352 +4,8 @@
                        check.node.labels=c("keep", "drop", "asdata"),
                        return.labels=TRUE, ...) {
 
-    ## file = input nexus file
-    ## simplify = if TRUE only keeps the first tree, if several trees are found in
-    ##            the Nexus file
-    ## type = specify whether to return trees+data as phylo4d object ("all") if
-    ##        both are found, returning a data.frame or phylo4 object if only one
-    ##        is found, "tree": return a phylo4 object only, regardless of
-    ##        whether there are data, "data": return a data.frame (no tree), even
-    ##        if a tree is present
-    ## char.all = if TRUE, includes even excluded chars in the nexus file
-    ## polymorphic.convert = if TRUE, convert polymorphic characters to missing
-    ##                       characters
-    ## levels.uniform = if TRUE, categorical data are loaded with the same levels,
-    ##                  even if one character is missing a state
-    ## return.labels = if TRUE, returns the names of the states instead of the
-    ##                 the internal codes
-    ## quiet = if TRUE, returns the object without printing tree strings (printing
-    ##         makes readNexus very slow in the cases of very big trees)
-    ## check.node.labels = how to deal with node labels, to be passed to phylo4d
-    ##                     constructor
-
-    type <- match.arg(type)
-    check.node.labels <- match.arg(check.node.labels)
-
-    output <- c("Failure")
-    if (type == "all" || type == "data") {
-        params <- list(filename=file, allchar=char.all,
-                       polymorphictomissing=polymorphic.convert,
-                       levelsall=levels.uniform,
-                       returnlabels=return.labels)
-
-        ## Check that params is properly formatted.
-        if(!is.list(params) || length(params) == 0) {
-            stop("The params parameter must be a non-empty list")
-        }
-        incharsstring <- .Call("ReadCharsWithNCL", params,
-                               PACKAGE="phylobase")
-        if (length(incharsstring) > 0) {
-            incharsstring <- unlist(strsplit(incharsstring$charstring, "\\|"))
-            incharsstring <- incharsstring[nzchar(incharsstring)]
-
-            if (!quiet) print(incharsstring)   # display character string if quiet is FALSE
-
-            iDtType <- seq(from=1, to=length(incharsstring), by=2)
-            iCharStrg <- seq(from=2, to=length(incharsstring), by=2)
-
-            datatype <- incharsstring[iDtType]
-            charString <- incharsstring[iCharStrg]
-
-            tipdata <- list()
-            for (i in 1:length(charString)) {
-                if (datatype[i] == "Standard") {
-                    ## Remove empty labels for factors
-                    charString[i] <- gsub("\\\"\\\"", "", charString[i])
-                    charString[i] <- gsub(",+)", ")", charString[i])
-
-                    ## For now, we can't deal with polymorphic characters and their labels
-                    if (length(grep("\\{", charString[i])) > 0 &&
-                        return.labels) {
-                        stop("At this stage, it's not possible to use the combination: ",
-                             "return.labels=TRUE for datasets that contain polymorphic ",
-                             "characters.")
-                    }
-
-                    ## Convert the string to data frame
-                    tipdata[[i]] <- eval(parse(text=charString[i]))
-
-                    ## if levels.uniform=TRUE apply the same levels to all characters
-                    if (levels.uniform && length(tipdata[[i]]) > 0) {
-                        allLevels <- character(0)
-                        for (j in 1:ncol(tipdata[[i]])) {
-                            allLevels <- union(allLevels, levels(tipdata[[i]][,j]))
-                        }
-                        for (j in 1:ncol(tipdata[[i]])) {
-                            levels(tipdata[[i]][,j]) <- allLevels
-                        }
-                    }
-                }
-                else {
-                    ## Just convert string to data frame for other datatype
-                    tipdata[[i]] <- eval(parse(text=charString[i]))
-                }
-            }
-            finalTipdata <- tipdata[[1]]
-            if (length(tipdata) > 1) {
-                for(td in tipdata[-1]) {
-                    finalTipdata <- cbind(finalTipdata, td)
-                }
-            }
-            tipdata <- finalTipdata
-        }
-        else {
-            tipdata <- NULL
-        }
-    }
-    if (type == "all" || type == "tree") {
-        trees <- c("Failure");
-        params <- list(filename=file)
-
-        ## Check that params is properly formatted.
-        if(!is.list(params) || length(params) == 0) {
-            stop("The params parameter must be a non-empty list");
-        }
-
-        ## Finally ready to make the call...
-        intreesstring <- .Call("ReadTreesWithNCL", params,
-                               PACKAGE="phylobase")
-        ## Display the string returned by NCL if quiet=FALSE
-        if(!quiet) print(intreesstring)
-		if(length(intreesstring) > 0){
-	        intreesphylolist <- read.nexustreestring(intreesstring)
-	        if (length(intreesphylolist)>1 && !simplify) {
-	            trees <- list()
-	            for (i in 1:length(intreesphylolist)) {
-	                if(identical(check.node.labels, "asdata")) {
-	                    if(is.null(intreesphylolist[[i]]$node.label)) {
-	                        warning("Could not use value \"asdata\" for ",
-	                                "check.node.labels because there are no ",
-	                                "labels associated with the tree ", i)
-	                        check.node.labels <- "drop"
-	                    }
-	                    trees[[i]] <- phylo4d(intreesphylolist[[i]],
-	                                          check.node.labels=check.node.labels,
-	                                          ...)
-	                }
-	                else {
-	                    trees[[i]] <- phylo4(intreesphylolist[[i]],
-	                                         check.node.labels=check.node.labels,
-	                                         ...)
-	                }
-	            }
-	        }
-	        else {
-	            if (identical(check.node.labels, "asdata")) {
-	                if (is.null(intreesphylolist[[1]]$node.label)) {
-	                    warning("Could not use value \"asdata\" for ",
-	                            "check.node.labels because there are no ",
-	                            "labels associated with the tree ", i)
-	                    check.node.labels <- "drop"
-	                }
-	                trees <- phylo4d(intreesphylolist[[1]],
-	                                 check.node.labels=check.node.labels,
-	                                 ...)
-	            }
-	            else {
-	                trees <- phylo4(intreesphylolist[[1]],
-	                                check.node.labels=check.node.labels,
-	                                ...)
-	            }
-	        }
-		}
-		else {
-			trees <- NULL
-		}
-    }
-
-	## scheme of what you get back, given what you asked
-	## for and whether data or tree blocks are actually in
-	## the file
-	##                         
-	## in nexus file        type argument
-	## data     tree        all   data  trees
-	## TRUE     FALSE       df    df    NULL
-	## FALSE    TRUE        p4    NULL  p4
-	## TRUE     TRUE        p4d   df    p4
-	## FALSE    FALSE       NULL  NULL  NULL
-	
-	switch(type,
-		'data' = {
-			if(is.null(tipdata)){
-				output <- NULL
-			}
-			else {
-				output <- tipdata
-			}
-		},
-		'tree' = {
-			if(is.null(trees)){
-				output <- NULL
-			}
-			else {
-				output <- trees
-			}
-		},
-		'all' = {
-			if(is.null(tipdata) & is.null(trees)){
-				output <- NULL
-			}
-			else if (is.null(tipdata)){
-				output <- trees
-			}
-			else if (is.null(trees)){
-				output <- tipdata
-			}
-			else {
-	            if (length(intreesphylolist) > 1 && !simplify) {
-	                output <- list()
-	                for (i in 1:length(intreesphylolist)) {
-	                    output[[i]] <- phylo4d(intreesphylolist[[i]],
-	                                           tip.data = tipdata,
-	                                           check.node.labels=check.node.labels,
-	                                           ...)
-	                }
-	            }
-	            else {
-	                output <- phylo4d(intreesphylolist[[1]],
-	                                  tip.data=tipdata,
-	                                  check.node.labels=check.node.labels,
-	                                  ...)
-	            }
-			}		
-		})
-		
-    output
+  return(readNCL(file=file, simplify=simplify, type=type, char.all=char.all,
+          polymorphic.convert=polymorphic.convert, levels.uniform=levels.uniform,
+          quiet=quiet, check.node.labels=check.node.labels,
+          return.labels=return.labels, ...))
 }
-
-read.nexustreestring <- function(X) {
-    ## Returns list of phylo objects (not multi.phylo, and always a list, even if
-    ## there is only one element X is a character vector, each element is one line
-    ## from a treefile
-    ## This is based almost entirely on read.nexus from APE (Emmanuel Paradis).
-
-    X<-unlist(strsplit(unlist(X),c("\n")))
-
-    ## first remove all the comments
-
-    ## BCO took out the "speedier removal of comments" code -- it keeps [&R]
-    ## as a node label, replaced it with original APE code
-    ## speedier removal of comments pc 13 April 2008
-    ##X <- lapply(X, gsub, pattern = "\\[[^\\]]*\\]", replacement = "")
-
-    LEFT <- grep("\\[", X)
-    RIGHT <- grep("\\]", X)
-    if (length(LEFT)) { # in case there are no comments at all
-        w <- LEFT == RIGHT
-        if (any(w)) { # in case all comments use at least 2 lines
-            s <- LEFT[w]
-            X[s] <- gsub("\\[[^]]*\\]", "", X[s])
-            ## The above regexp was quite tough to find: it makes
-            ## possible to delete series of comments on the same line:
-            ##       ...[...]xxx[...]...
-            ## without deleting the "xxx". This regexp is in three parts:
-            ##       \\[      [^]]*       \\]
-            ## where [^]]* means "any character, except "]", repeated zero
-            ## or more times" (note that the ']' is not escaped here).
-            ## The previous version was:
-            ##       X[s] <- gsub("\\[.*\\]", "", X[s])
-            ## which deleted the "xxx". (EP  2008-06-24)
-        }
-        w <- !w
-        if (any(w)) {
-            s <- LEFT[w]
-            X[s] <- gsub("\\[.*", "", X[s])
-            sb <- RIGHT[w]
-            X[sb] <- gsub(".*\\]", "", X[sb])
-            if (any(s < sb - 1))
-            X <- X[-unlist(mapply(":", (s + 1), (sb - 1)))]
-        }
-    }
-    endblock <- grep("END;|ENDBLOCK;", X, ignore.case = TRUE)
-    semico <- grep(";", X)
-    i1 <- grep("BEGIN TREES;", X, ignore.case = TRUE)
-    i2 <- grep("TRANSLATE", X, ignore.case = TRUE)
-    translation <- if (length(i2) == 1 && i2 > i1) TRUE else FALSE
-    if (translation) {
-        end <- semico[semico > i2][1]
-        x <- X[(i2 + 1):end] # assumes there's a 'new line' after "TRANSLATE"
-        ## x <- gsub("TRANSLATE", "", x, ignore.case = TRUE)
-        x <- unlist(strsplit(x, "[,; \t]"))
-        x <- x[nzchar(x)]
-        TRANS <- matrix(x, ncol = 2, byrow = TRUE)
-        TRANS[, 2] <- gsub("['\"]", "", TRANS[, 2])
-        n <- dim(TRANS)[1]
-    }
-    start <-
-    if (translation)
-        semico[semico > i2][1] + 1
-    else
-        semico[semico > i1][1]
-    end <- endblock[endblock > i1][1] - 1
-    tree <- X[start:end]
-    rm(X)
-    tree <- gsub("^.*= *", "", tree)
-    semico <- grep(";", tree)
-    Ntree <- length(semico)
-
-    ## are some trees on several lines?
-    if (any(diff(semico) != 1)) {
-        STRING <- character(Ntree)
-        s <- c(1, semico[-Ntree] + 1)
-        j <- mapply(":", s, semico)
-        for (i in 1:Ntree)
-            STRING[i] <- paste(tree[j[, i]], collapse = "")
-    }
-    else
-        STRING <- tree
-    rm(tree)
-    STRING <- gsub(" ", "", STRING)
-    colon <- grep(":", STRING)
-    if (!length(colon)) {
-        ## TODO: recode clado.build, tree.build & .treeBuildWithTokens from ape to phylobase
-        trees <- lapply(STRING, clado.build)
-    }
-    else {
-        if (length(colon) == Ntree) {
-            if (translation)
-                trees <- lapply(STRING, .treeBuildWithTokens)
-            else
-               trees <- lapply(STRING, tree.build)
-
-        }
-        else {
-            trees <- vector("list", Ntree)
-            trees[colon] <- lapply(STRING[colon], tree.build)
-            nocolon <- (1:Ntree)[!1:Ntree %in% colon]
-            trees[nocolon] <- lapply(STRING[nocolon], clado.build)
-            if (translation) {
-                for (i in 1:Ntree) {
-                    tr <- trees[[i]]
-                    for (j in 1:n) {
-                        ind <- which(tr$tip.label[j] == TRANS[, 1])
-                        tr$tip.label[j] <- TRANS[ind, 2]
-                    }
-                    if (!is.null(tr$node.label)) {
-                        for (j in 1:length(tr$node.label)) {
-                            ind <- which(tr$node.label[j] == TRANS[, 1])
-                            tr$node.label[j] <- TRANS[ind, 2]
-                        }
-                    }
-                    trees[[i]] <- tr
-                }
-                translation <- FALSE
-            }
-        }
-    }
-    for (i in 1:Ntree) {
-        tr <- trees[[i]]
-        ## Check here that the root edge is not incorrectly represented
-        ## in the object of class "phylo" by simply checking that there
-        ## is a bifurcation at the root
-        if (!translation) n <- length(tr$tip.label)
-        ROOT <- n + 1
-        if (sum(tr$edge[, 1] == ROOT) == 1 && dim(tr$edge)[1] > 1) {
-            stop(paste("There is apparently two root edges in your file: ",
-                       "cannot read tree file.\n  Reading NEXUS file aborted ",
-                       "at tree no.", i, sep = ""))
-            }
-        }
-    trees
-}

Modified: pkg/inst/nexusfiles/treeWithPolyExcludedData.nex
===================================================================
--- pkg/inst/nexusfiles/treeWithPolyExcludedData.nex	2010-07-15 08:37:24 UTC (rev 797)
+++ pkg/inst/nexusfiles/treeWithPolyExcludedData.nex	2010-07-15 08:40:04 UTC (rev 798)
@@ -14,9 +14,9 @@
 BEGIN CHARACTERS;
 	TITLE  testIncomplete;
 	DIMENSIONS  NCHAR=3;
-	FORMAT DATATYPE = STANDARD GAP = - MISSING = ? SYMBOLS = "  0 1 2";
+	FORMAT DATATYPE = STANDARD GAP = - MISSING = ? SYMBOLS = "0 1 2";
 	CHARSTATELABELS 
-		1 Test1 /  test1A test1B, 2 Test2 /  test2A test2B, 3 Test3 /  test3A test3B test3C ; 
+		1 Test1 /test1A test1B, 2 Test2 /test2A test2B, 3 Test3 /test3A test3B test3C; 
 	MATRIX
 	Myrmecocystuscfnavajo    1(0 1)(0 1 2)
 	Myrmecocystuscreightoni  ?(0 1)(0 1)

Deleted: pkg/man/read.nexustreestring.Rd
===================================================================
--- pkg/man/read.nexustreestring.Rd	2010-07-15 08:37:24 UTC (rev 797)
+++ pkg/man/read.nexustreestring.Rd	2010-07-15 08:40:04 UTC (rev 798)
@@ -1,17 +0,0 @@
-\name{read.nexustreestring}
-\alias{read.nexustreestring}
-\title{Read Nexus tree string}
-\description{
-  This is a stub!
-}
-\usage{
-read.nexustreestring(X)
-}
-\arguments{
-  \item{X}{a string containing a Nexus tree definition}
-}
-\value{
-  Some kind of tree list?
-}
-\author{Brian O'Meara?}
-\keyword{misc}

Modified: pkg/man/readNexus.Rd
===================================================================
--- pkg/man/readNexus.Rd	2010-07-15 08:37:24 UTC (rev 797)
+++ pkg/man/readNexus.Rd	2010-07-15 08:40:04 UTC (rev 798)
@@ -1,3 +1,4 @@
+\alias{readNCL}
 \name{readNexus}
 \docType{methods}
 \alias{readNexus}

Modified: pkg/man/subset-methods.Rd
===================================================================
--- pkg/man/subset-methods.Rd	2010-07-15 08:37:24 UTC (rev 797)
+++ pkg/man/subset-methods.Rd	2010-07-15 08:40:04 UTC (rev 798)
@@ -145,7 +145,7 @@
 edgeLength(geotree)['0-15'] <- 0.1
 geotree2 <- geotree[1:2]
 ## in subset tree, edge of new root extends back to the original root
-edgeLength(geotree2)['NA-3']
+edgeLength(geotree2)['0-3']
 ## edge length immediately ancestral to this node in the original tree
 edgeLength(geotree, MRCA(geotree, tipLabels(geotree2)))
 

Added: pkg/src/GetNCL.cpp
===================================================================
--- pkg/src/GetNCL.cpp	                        (rev 0)
+++ pkg/src/GetNCL.cpp	2010-07-15 08:40:04 UTC (rev 798)
@@ -0,0 +1,239 @@
+#include <Rcpp.h>
+#include "ncl/nxsmultiformat.h"
+
+NxsString contData(NxsCharactersBlock& charBlock, NxsString& charString, 
+		   const int& eachChar, const int& nTax) {
+    for (int taxon=0; taxon < nTax; ++taxon) {
+	double state=charBlock.GetSimpleContinuousValue(taxon,eachChar);			
+	if (state==DBL_MAX) {
+	    charString+="NA";
+	}
+	else {					    
+	    char buffer[100];
+	    sprintf(buffer, "%.10f", state);
+	    charString+=buffer; 
+	}
+	
+	if (taxon+1 < nTax) {
+	    charString+=',';
+	}
+    }
+    return charString;
+}
+
+
+NxsString stdData(NxsCharactersBlock& charBlock, NxsString& charString, const int& eachChar,
+		  const int& nTax, bool polyconvert) {
+    for (int taxon=0; taxon<nTax; ++taxon) {
+
+	int stateNumber=charBlock.GetInternalRepresentation(taxon, eachChar, 0);
+
+	if(charBlock.IsMissingState(taxon, eachChar)) {
+	    charString+="NA";
+	}
+	else if (charBlock.GetNumStates(taxon, eachChar)>1) {
+	    if(polyconvert) {
+		charString+="NA";
+	    }
+	    else {
+		charString+='"';
+		charString+='{';
+		for (unsigned int k=0; k < charBlock.GetNumStates(taxon, eachChar); ++k) {
+		    charString += charBlock.GetInternalRepresentation(taxon, eachChar, k);	
+		    if (k+1 < charBlock.GetNumStates(taxon, eachChar)) {
+			charString+=',';
+		    }
+		}
+		charString+='}';
+		charString+='"';
+	    }
+	}
+	else {
+	    charString+='"';
+	    charString+=stateNumber;
+	    charString+='"';
+	}
+	if (taxon+1 < nTax) {
+	    charString+=',';
+	}
+    }
+    return charString;
+}
+
+
+extern "C" SEXP GetNCL(SEXP params, SEXP paramsVecR) {
+
+    Rcpp::List list(params);
+    Rcpp::LogicalVector paramsVec(paramsVecR);
+
+    bool charall = paramsVec[0];
+    bool polyconvert = paramsVec[1];
+    bool levelsUnif = paramsVec[2];
+    bool returnTrees = paramsVec[3];
+    bool returnData = paramsVec[4];
+
+    int nCharToReturn = 0;
+
+    std::vector<std::string> dataTypes;      //vector of datatypes for each character block
+    std::vector<int> nbCharacters;           //number of characters for each character block
+    std::vector<std::string> dataChr;        //characters
+    std::vector<std::string> charLabels;     //labels for the characters
+    std::vector<std::string> stateLabels;    //labels for the states
+    std::vector<int> nbStates;               //number of states for each character (for Standard datatype)
+    std::vector<std::string> trees;          //vector of Newick strings holding the names
+    std::vector<std::string> treeNames;      //vector of tree names
+    std::vector<std::string> taxaNames;      //vector of taxa names
+
+    std::vector<bool> test(3);
+    test[0] = charall;
+    test[1] = polyconvert;
+    test[2] = levelsUnif;
+
+#   if defined(FILENAME_AS_NEXUS)
+    std::string filename = "'" + list["fileName"] + "'";
+#   else
+    std::string filename = list["fileName"];
+#   endif
+
+    MultiFormatReader nexusReader(-1, NxsReader::WARNINGS_TO_STDERR);
+
+    /* make NCL less strict */
+    NxsTreesBlock * treesB = nexusReader.GetTreesBlockTemplate();
+    treesB->SetAllowImplicitNames(true);
+    nexusReader.cullIdenticalTaxaBlocks(true);
+    /* End of making NCL less strict */
+    
+    nexusReader.ReadFilepath(const_cast < char* > (filename.c_str()), MultiFormatReader::NEXUS_FORMAT);  
+
+    const unsigned nTaxaBlocks = nexusReader.GetNumTaxaBlocks();
+    for (unsigned t = 0; t < nTaxaBlocks; ++t) {
+	/* Get blocks */
+	const NxsTaxaBlock * taxaBlock = nexusReader.GetTaxaBlock(t);
+	const unsigned nTreesBlocks = nexusReader.GetNumTreesBlocks(taxaBlock);
+	const unsigned nCharBlocks = nexusReader.GetNumCharactersBlocks(taxaBlock);
+
+	int nTax = taxaBlock->GetNumTaxonLabels();
+	
+	/* Get taxa names */
+	for (int j=0; j < nTax; ++j) {	
+	    taxaNames.push_back (taxaBlock->GetTaxonLabel(j));
+	}
+
+	/* Get trees */
+	if (returnTrees) {
+	    if (nTreesBlocks == 0) {
+		continue;
+	    }
+	    for (unsigned i = 0; i < nTreesBlocks; ++i) {
+		NxsTreesBlock* treeBlock = nexusReader.GetTreesBlock(taxaBlock, i);
+		const unsigned nTrees = treeBlock->GetNumTrees();
+		if (nTrees > 0) {
+		    for (unsigned k = 0; k < nTrees; k++) {
+			NxsString ts = treeBlock->GetTreeDescription(k);
+			NxsString trNm = treeBlock->GetTreeName(k);
+			treeNames.push_back(trNm);
+			trees.push_back (ts);
+		    }
+		}
+		else {
+		    continue;
+		}
+	    }
+	}
+	
+	/* Get data */
+	if (returnData) {
+	    for (unsigned k = 0; k < nCharBlocks; ++k) {
+		NxsCharactersBlock * charBlock = nexusReader.GetCharactersBlock(taxaBlock, k);
+		
+		if (nCharBlocks == 0) {
+		    continue;
+		}
+		else {
+		    NxsString dtType = charBlock->GetNameOfDatatype(charBlock->GetDataType());
+		    dataTypes.push_back(dtType);
+		
+		    if (charall) {
+			nCharToReturn=charBlock->GetNCharTotal();
+		    }
+		    else {
+			nCharToReturn=charBlock->GetNumIncludedChars();
+		    }
+		    nbCharacters.push_back (nCharToReturn);
+		    for (int eachChar=0; eachChar < nCharToReturn; ++eachChar) { //We only pass the non-eliminated chars
+			NxsString charLabel=charBlock->GetCharLabel(eachChar);
+			if (charLabel.length()>1) {
+			    charLabels.push_back (charLabel);
+			}
+			else {
+			    charLabels.push_back ("standard_char"); //FIXME: needs to fixed for sequence data
+			}
+			
+			NxsString tmpCharString;
+			if (std::string("Continuous") == dtType) {
+			    tmpCharString = contData(*charBlock, tmpCharString, eachChar, nTax);
+			    nbStates.push_back (0);			    
+			}
+			else {
+			    if (std::string("Standard") == dtType) {			    
+				tmpCharString = stdData(*charBlock, tmpCharString, eachChar, nTax,
+							polyconvert);
+				unsigned int nCharStates = charBlock->GetNumObsStates(eachChar, false);
+				nbStates.push_back (nCharStates);
+				for (unsigned int l=0; l < nCharStates; ++l) {
+				    NxsString label = charBlock->GetStateLabel(eachChar, l);
+				    stateLabels.push_back (label);
+				}
+			    }
+			    else {
+				if (std::string("DNA") == dtType) {
+				    for (int taxon=0; taxon < nTax; ++taxon) {
+					for (int eachChar=0; eachChar < nCharToReturn; ++eachChar) {
+					    unsigned int nCharStates = charBlock->GetNumStates(taxon, eachChar);
+					    if (charBlock->IsGapState(taxon, eachChar)) {
+						tmpCharString += "-";
+					    }
+					    else {
+						if (charBlock->IsMissingState(taxon, eachChar)) {
+						    tmpCharString += "?";
+						}
+						else {
+						    if (nCharStates == 1) {
+							tmpCharString += charBlock->GetState(taxon, eachChar, 0);
+						    }
+						    else {
+							tmpCharString += "?"; //FIXME
+						    }			    
+						}
+					    }
+					}
+				    }
+				}
+				else { // other type of data not yet supported
+				    tmpCharString = "";
+				    nbStates.push_back (0);
+				    stateLabels.push_back (std::string(""));
+				}
+			    }
+			}
+			std::string charString = "c(" + tmpCharString + ");";
+			dataChr.push_back (charString);
+		    }				
+		}  
+	    }
+	}
+    }
+
+    /* Prepare list to return */
+    Rcpp::List res = Rcpp::List::create(Rcpp::Named("taxaNames") = taxaNames,
+					Rcpp::Named("treeNames") = treeNames,
+					Rcpp::Named("trees") = trees,
+					Rcpp::Named("dataTypes") = dataTypes,
+					Rcpp::Named("nbCharacters") = nbCharacters,
+					Rcpp::Named("charLabels") = charLabels,
+					Rcpp::Named("nbStates") = nbStates,
+					Rcpp::Named("stateLabels") = stateLabels,
+					Rcpp::Named("dataChr") = dataChr,
+					Rcpp::Named("Test") = test);
+    return res;				
+}

Deleted: pkg/src/NCLInterface.cpp
===================================================================
--- pkg/src/NCLInterface.cpp	2010-07-15 08:37:24 UTC (rev 797)
+++ pkg/src/NCLInterface.cpp	2010-07-15 08:40:04 UTC (rev 798)
@@ -1,2219 +0,0 @@
-//  Copyright (C) 2007-2008 Brian O'Meara & Derrick Zwickl
-//  A modification of the BasicCommandLine file of the NCL (see below)
-//  to use for loading trees and data from Nexus into R. Licensing as below.
-
-//	Copyright (C) 1999-2002 Paul O. Lewis
-//
-//	This file is part of NCL (Nexus Class Library).
-//
-//	NCL is free software; you can redistribute it and/or modify
[TRUNCATED]

To get the complete diff run:
    svnlook diff /svnroot/phylobase -r 798


More information about the Phylobase-commits mailing list