[Phylobase-commits] r798 - in pkg: . R inst/nexusfiles man src
noreply at r-forge.r-project.org
noreply at r-forge.r-project.org
Thu Jul 15 10:40:05 CEST 2010
Author: francois
Date: 2010-07-15 10:40:04 +0200 (Thu, 15 Jul 2010)
New Revision: 798
Added:
pkg/R/readNCL.R
pkg/src/GetNCL.cpp
Removed:
pkg/man/read.nexustreestring.Rd
pkg/src/NCLInterface.cpp
pkg/src/NCLInterface.h
pkg/src/ReadWithNCL.cpp
Modified:
pkg/DESCRIPTION
pkg/NAMESPACE
pkg/R/readNexus.R
pkg/inst/nexusfiles/treeWithPolyExcludedData.nex
pkg/man/readNexus.Rd
pkg/man/subset-methods.Rd
Log:
Change the interface between NCL and phylobase. The interface (GetNCL.cpp) now returns a list that contains the information of the NEXUS file instead of a string of characters.
Modified: pkg/DESCRIPTION
===================================================================
--- pkg/DESCRIPTION 2010-07-15 08:37:24 UTC (rev 797)
+++ pkg/DESCRIPTION 2010-07-15 08:40:04 UTC (rev 798)
@@ -1,13 +1,13 @@
Package: phylobase
Type: Package
Title: Base package for phylogenetic structures and comparative data
-Version: 0.5.11
-Date: 2010-04-12
+Version: 0.6.0
+Date: 2010-07-15
Depends: methods, grid, ape(>= 2.1), Rcpp (>= 0.7.4)
Suggests: ade4, MASS
Author: R Hackathon et al. (alphabetically: Ben Bolker, Marguerite Butler, Peter Cowan, Damien de Vienne, Dirk Eddelbuettel, Mark Holder, Thibaut Jombart, Steve Kembel, Francois Michonneau, David Orme, Brian O'Meara, Emmanuel Paradis, Jim Regetz, Derrick Zwickl)
Maintainer: Ben Bolker <bolker at ufl.edu>
Description: Provides a base S4 class for comparative methods, incorporating one or more trees and trait data
License: GPL (>= 2)
-Collate: phylo4.R checkdata.R formatData.R class-multiphylo4.R class-oldclasses.R class-phylo4.R class-phylo4d.R class-phylomats.R methods-multiphylo4.R methods-oldclasses.R methods-phylo4.R methods-phylo4d.R setAs-Methods.R pdata.R subset.R phylobase.options.R prune.R treePlot.R treestruc.R treewalk.R readNexus.R tbind.R zzz.R
+Collate: phylo4.R checkdata.R formatData.R class-multiphylo4.R class-oldclasses.R class-phylo4.R class-phylo4d.R class-phylomats.R methods-multiphylo4.R methods-oldclasses.R methods-phylo4.R methods-phylo4d.R setAs-Methods.R pdata.R subset.R phylobase.options.R prune.R treePlot.R treestruc.R treewalk.R readNexus.R readNCL.R tbind.R zzz.R
URL: http://phylobase.R-forge.R-project.org
Modified: pkg/NAMESPACE
===================================================================
--- pkg/NAMESPACE 2010-07-15 08:37:24 UTC (rev 797)
+++ pkg/NAMESPACE 2010-07-15 08:40:04 UTC (rev 798)
@@ -60,7 +60,8 @@
MRCA, shortestPath, getEdge)
# Nexus functions
-export(readNexus, read.nexustreestring)
+export(readNexus)
+export(readNCL)
# pdata functions
export(pdata, check_pdata)
Added: pkg/R/readNCL.R
===================================================================
--- pkg/R/readNCL.R (rev 0)
+++ pkg/R/readNCL.R 2010-07-15 08:40:04 UTC (rev 798)
@@ -0,0 +1,179 @@
+readNCL <- function(file, simplify=FALSE, type=c("all", "tree", "data"),
+ char.all=FALSE, polymorphic.convert=TRUE,
+ levels.uniform=TRUE, quiet=TRUE,
+ check.node.labels=c("keep", "drop", "asdata"),
+ return.labels=TRUE, ...) {
+
+
+ type <- match.arg(type)
+ check.node.labels <- match.arg(check.node.labels)
+
+
+
+ if (type == "all" || type == "data") {
+ returnData <- TRUE
+ }
+ else {
+ returnData <- FALSE
+ }
+ if (type == "all" || type == "tree") {
+ returnTrees <- TRUE
+ }
+ else {
+ returnTrees <- FALSE
+ }
+
+ fileName <- list(fileName=file)
+ parameters <- c(char.all, polymorphic.convert, levels.uniform, returnTrees, returnData)
+
+ ## GetNCL returns a list containing:
+ ## $taxaNames: names of the taxa (from taxa block, implied or declared)
+ ## $treeNames: the names of the trees
+ ## $trees: a vector of (untranslated) Newick strings
+ ## $dataTypes: data type for each character block of the nexus file (length = number of chr blocks)
+ ## $nbCharacters: number of characters in each block (length = number of chr blocks)
+ ## $charLabels: the labels for the characters, i.e. the headers of the data frame to be returned
+ ## (length = number of chr blocks * sum of number of characters in each block)
+ ## $nbStates: the number of states of each character (equals 0 for non-standard types, length = number
+ ## of characters)
+ ## $stateLabels: the labels for the states of the characters, i.e. the levels of the factors to be returned
+ ## $dataChr: string that contains the data to be returned
+ ncl <- .Call("GetNCL", fileName, parameters, PACKAGE="phylobase")
+
+ if (!quiet) print(ncl)
+
+ ## Disclaimer
+ if (!length(grep("\\{", ncl$dataChr)) && return.labels && !polymorphic.convert) {
+ stop("At this stage, it's not possible to use the combination: ",
+ "return.labels=TRUE and polymorphic.convert=FALSE for datasets ",
+ "that contain polymorphic characters.")
+ }
+
+ if (returnData && length(ncl$dataChr)) {
+ tipData <- vector("list", length(ncl$dataChr))
+ for (iBlock in 1:length(ncl$dataTypes)) {
+ chrCounter <- ifelse(iBlock == 1, 0, sum(ncl$nbCharacters[1:(iBlock-1)]))
+ if (ncl$dataTypes[iBlock] == "Continuous") {
+ for (iChar in 1:ncl$nbCharacters[iBlock]) {
+ i <- chrCounter + iChar
+ tipData[[i]] <- eval(parse(text=ncl$dataChr[i]))
+ names(tipData)[i] <- ncl$charLabels[i]
+ }
+ }
+ else {
+
+ if (ncl$dataTypes[iBlock] == "Standard") {
+ iForBlock <- integer(0)
+ for (iChar in 1:ncl$nbCharacters[iBlock]) {
+ i <- chrCounter + iChar
+ iForBlock <- c(iForBlock, i)
+ lblCounterMin <- ifelse(i == 1, 1, sum(ncl$nbStates[1:(i-1)]) + 1)
+ lblCounter <- seq(lblCounterMin, length.out=ncl$nbStates[i])
+ tipData[[i]] <- eval(parse(text=ncl$dataChr[i]))
+ names(tipData)[i] <- ncl$charLabels[i]
+ tipData[[i]] <- as.factor(tipData[[i]])
+ if (return.labels) {
+ levels(tipData[[i]]) <- ncl$stateLabels[lblCounter]
+ }
+ }
+ if (levels.uniform) {
+ allLevels <- character(0)
+ for (j in iForBlock) {
+ allLevels <- union(allLevels, levels(tipData[[j]]))
+ }
+ for (j in iForBlock) {
+ levels(tipData[[j]]) <- allLevels
+ }
+ }
+ }
+ else {
+ warning("This datatype is not currently supported by phylobase")
+ next
+ ## FIXME: different datatypes in a same file isn't going to work
+ }
+ }
+ }
+ tipData <- data.frame(tipData)
+ if (length(ncl$taxaNames) == nrow(tipData)) {
+ rownames(tipData) <- ncl$taxaNames
+ }
+ else stop("phylobase doesn't deal with multiple taxa block at this time.")
+ }
+ else {
+ tipData <- NULL
+ }
+
+ if (returnTrees && length(ncl$trees) > 0) {
+ listTrees <- vector("list", length(ncl$trees))
+ for (i in 1:length(ncl$trees)) {
+ if (length(grep(":", ncl$trees[i]))) {
+ listTrees[[i]] <- tree.build(ncl$trees[i])
+ }
+ else {
+ listTrees[[i]] <- clado.build(ncl$trees[i])
+ }
+ }
+ listTrees <- lapply(listTrees, function(tr) {
+ if (length(ncl$taxaNames) == nTips(tr)) {
+ tr$tip.label <- ncl$taxaNames[as.numeric(tr$tip.label)]
+ }
+ else stop("phylobase doesn't deal with multiple taxa block at this time.")
+ if (is.null(tr$node.label)) {
+ if (check.node.labels == "asdata") {
+ warning("Could not use value \"asdata\" for ",
+ "check.node.labels because there are no ",
+ "labels associated with the tree")
+ check.node.labels <- "drop"
+ }
+ tr <- phylo4(tr, check.node.labels=check.node.labels, ...)
+ }
+ else {
+ tr <- phylo4d(tr, check.node.labels=check.node.labels, ...)
+ }
+ })
+ if (length(listTrees) == 1 || simplify)
+ listTrees <- listTrees[[1]]
+ }
+ else {
+ listTrees <- NULL
+ }
+
+ ###
+ switch(type,
+ "data" = {
+ if (is.null(tipData)) {
+ toRet <- NULL
+ }
+ else {
+ toRet <- tipData
+ }
+ },
+ "tree" = {
+ if (is.null(listTrees)) {
+ toRet <- NULL
+ }
+ else {
+ toRet <- listTrees
+ }
+ },
+ "all" = {
+ if (is.null(tipData) && is.null(listTrees)) {
+ toRet <- NULL
+ }
+ else if (is.null(tipData)) {
+ toRet <- listTrees
+ }
+ else if (is.null(listTrees)) {
+ toRet <- tipData
+ }
+ else {
+ if (length(listTrees) > 1) {
+ toRet <- lapply(listTrees, function(tr)
+ addData(tr, tip.data=tipData, ...))
+ }
+ else toRet <- addData(listTrees, tip.data=tipData, ...)
+ }
+ })
+ toRet
+}
+
Modified: pkg/R/readNexus.R
===================================================================
--- pkg/R/readNexus.R 2010-07-15 08:37:24 UTC (rev 797)
+++ pkg/R/readNexus.R 2010-07-15 08:40:04 UTC (rev 798)
@@ -4,352 +4,8 @@
check.node.labels=c("keep", "drop", "asdata"),
return.labels=TRUE, ...) {
- ## file = input nexus file
- ## simplify = if TRUE only keeps the first tree, if several trees are found in
- ## the Nexus file
- ## type = specify whether to return trees+data as phylo4d object ("all") if
- ## both are found, returning a data.frame or phylo4 object if only one
- ## is found, "tree": return a phylo4 object only, regardless of
- ## whether there are data, "data": return a data.frame (no tree), even
- ## if a tree is present
- ## char.all = if TRUE, includes even excluded chars in the nexus file
- ## polymorphic.convert = if TRUE, convert polymorphic characters to missing
- ## characters
- ## levels.uniform = if TRUE, categorical data are loaded with the same levels,
- ## even if one character is missing a state
- ## return.labels = if TRUE, returns the names of the states instead of the
- ## the internal codes
- ## quiet = if TRUE, returns the object without printing tree strings (printing
- ## makes readNexus very slow in the cases of very big trees)
- ## check.node.labels = how to deal with node labels, to be passed to phylo4d
- ## constructor
-
- type <- match.arg(type)
- check.node.labels <- match.arg(check.node.labels)
-
- output <- c("Failure")
- if (type == "all" || type == "data") {
- params <- list(filename=file, allchar=char.all,
- polymorphictomissing=polymorphic.convert,
- levelsall=levels.uniform,
- returnlabels=return.labels)
-
- ## Check that params is properly formatted.
- if(!is.list(params) || length(params) == 0) {
- stop("The params parameter must be a non-empty list")
- }
- incharsstring <- .Call("ReadCharsWithNCL", params,
- PACKAGE="phylobase")
- if (length(incharsstring) > 0) {
- incharsstring <- unlist(strsplit(incharsstring$charstring, "\\|"))
- incharsstring <- incharsstring[nzchar(incharsstring)]
-
- if (!quiet) print(incharsstring) # display character string if quiet is FALSE
-
- iDtType <- seq(from=1, to=length(incharsstring), by=2)
- iCharStrg <- seq(from=2, to=length(incharsstring), by=2)
-
- datatype <- incharsstring[iDtType]
- charString <- incharsstring[iCharStrg]
-
- tipdata <- list()
- for (i in 1:length(charString)) {
- if (datatype[i] == "Standard") {
- ## Remove empty labels for factors
- charString[i] <- gsub("\\\"\\\"", "", charString[i])
- charString[i] <- gsub(",+)", ")", charString[i])
-
- ## For now, we can't deal with polymorphic characters and their labels
- if (length(grep("\\{", charString[i])) > 0 &&
- return.labels) {
- stop("At this stage, it's not possible to use the combination: ",
- "return.labels=TRUE for datasets that contain polymorphic ",
- "characters.")
- }
-
- ## Convert the string to data frame
- tipdata[[i]] <- eval(parse(text=charString[i]))
-
- ## if levels.uniform=TRUE apply the same levels to all characters
- if (levels.uniform && length(tipdata[[i]]) > 0) {
- allLevels <- character(0)
- for (j in 1:ncol(tipdata[[i]])) {
- allLevels <- union(allLevels, levels(tipdata[[i]][,j]))
- }
- for (j in 1:ncol(tipdata[[i]])) {
- levels(tipdata[[i]][,j]) <- allLevels
- }
- }
- }
- else {
- ## Just convert string to data frame for other datatype
- tipdata[[i]] <- eval(parse(text=charString[i]))
- }
- }
- finalTipdata <- tipdata[[1]]
- if (length(tipdata) > 1) {
- for(td in tipdata[-1]) {
- finalTipdata <- cbind(finalTipdata, td)
- }
- }
- tipdata <- finalTipdata
- }
- else {
- tipdata <- NULL
- }
- }
- if (type == "all" || type == "tree") {
- trees <- c("Failure");
- params <- list(filename=file)
-
- ## Check that params is properly formatted.
- if(!is.list(params) || length(params) == 0) {
- stop("The params parameter must be a non-empty list");
- }
-
- ## Finally ready to make the call...
- intreesstring <- .Call("ReadTreesWithNCL", params,
- PACKAGE="phylobase")
- ## Display the string returned by NCL if quiet=FALSE
- if(!quiet) print(intreesstring)
- if(length(intreesstring) > 0){
- intreesphylolist <- read.nexustreestring(intreesstring)
- if (length(intreesphylolist)>1 && !simplify) {
- trees <- list()
- for (i in 1:length(intreesphylolist)) {
- if(identical(check.node.labels, "asdata")) {
- if(is.null(intreesphylolist[[i]]$node.label)) {
- warning("Could not use value \"asdata\" for ",
- "check.node.labels because there are no ",
- "labels associated with the tree ", i)
- check.node.labels <- "drop"
- }
- trees[[i]] <- phylo4d(intreesphylolist[[i]],
- check.node.labels=check.node.labels,
- ...)
- }
- else {
- trees[[i]] <- phylo4(intreesphylolist[[i]],
- check.node.labels=check.node.labels,
- ...)
- }
- }
- }
- else {
- if (identical(check.node.labels, "asdata")) {
- if (is.null(intreesphylolist[[1]]$node.label)) {
- warning("Could not use value \"asdata\" for ",
- "check.node.labels because there are no ",
- "labels associated with the tree ", i)
- check.node.labels <- "drop"
- }
- trees <- phylo4d(intreesphylolist[[1]],
- check.node.labels=check.node.labels,
- ...)
- }
- else {
- trees <- phylo4(intreesphylolist[[1]],
- check.node.labels=check.node.labels,
- ...)
- }
- }
- }
- else {
- trees <- NULL
- }
- }
-
- ## scheme of what you get back, given what you asked
- ## for and whether data or tree blocks are actually in
- ## the file
- ##
- ## in nexus file type argument
- ## data tree all data trees
- ## TRUE FALSE df df NULL
- ## FALSE TRUE p4 NULL p4
- ## TRUE TRUE p4d df p4
- ## FALSE FALSE NULL NULL NULL
-
- switch(type,
- 'data' = {
- if(is.null(tipdata)){
- output <- NULL
- }
- else {
- output <- tipdata
- }
- },
- 'tree' = {
- if(is.null(trees)){
- output <- NULL
- }
- else {
- output <- trees
- }
- },
- 'all' = {
- if(is.null(tipdata) & is.null(trees)){
- output <- NULL
- }
- else if (is.null(tipdata)){
- output <- trees
- }
- else if (is.null(trees)){
- output <- tipdata
- }
- else {
- if (length(intreesphylolist) > 1 && !simplify) {
- output <- list()
- for (i in 1:length(intreesphylolist)) {
- output[[i]] <- phylo4d(intreesphylolist[[i]],
- tip.data = tipdata,
- check.node.labels=check.node.labels,
- ...)
- }
- }
- else {
- output <- phylo4d(intreesphylolist[[1]],
- tip.data=tipdata,
- check.node.labels=check.node.labels,
- ...)
- }
- }
- })
-
- output
+ return(readNCL(file=file, simplify=simplify, type=type, char.all=char.all,
+ polymorphic.convert=polymorphic.convert, levels.uniform=levels.uniform,
+ quiet=quiet, check.node.labels=check.node.labels,
+ return.labels=return.labels, ...))
}
-
-read.nexustreestring <- function(X) {
- ## Returns list of phylo objects (not multi.phylo, and always a list, even if
- ## there is only one element X is a character vector, each element is one line
- ## from a treefile
- ## This is based almost entirely on read.nexus from APE (Emmanuel Paradis).
-
- X<-unlist(strsplit(unlist(X),c("\n")))
-
- ## first remove all the comments
-
- ## BCO took out the "speedier removal of comments" code -- it keeps [&R]
- ## as a node label, replaced it with original APE code
- ## speedier removal of comments pc 13 April 2008
- ##X <- lapply(X, gsub, pattern = "\\[[^\\]]*\\]", replacement = "")
-
- LEFT <- grep("\\[", X)
- RIGHT <- grep("\\]", X)
- if (length(LEFT)) { # in case there are no comments at all
- w <- LEFT == RIGHT
- if (any(w)) { # in case all comments use at least 2 lines
- s <- LEFT[w]
- X[s] <- gsub("\\[[^]]*\\]", "", X[s])
- ## The above regexp was quite tough to find: it makes
- ## possible to delete series of comments on the same line:
- ## ...[...]xxx[...]...
- ## without deleting the "xxx". This regexp is in three parts:
- ## \\[ [^]]* \\]
- ## where [^]]* means "any character, except "]", repeated zero
- ## or more times" (note that the ']' is not escaped here).
- ## The previous version was:
- ## X[s] <- gsub("\\[.*\\]", "", X[s])
- ## which deleted the "xxx". (EP 2008-06-24)
- }
- w <- !w
- if (any(w)) {
- s <- LEFT[w]
- X[s] <- gsub("\\[.*", "", X[s])
- sb <- RIGHT[w]
- X[sb] <- gsub(".*\\]", "", X[sb])
- if (any(s < sb - 1))
- X <- X[-unlist(mapply(":", (s + 1), (sb - 1)))]
- }
- }
- endblock <- grep("END;|ENDBLOCK;", X, ignore.case = TRUE)
- semico <- grep(";", X)
- i1 <- grep("BEGIN TREES;", X, ignore.case = TRUE)
- i2 <- grep("TRANSLATE", X, ignore.case = TRUE)
- translation <- if (length(i2) == 1 && i2 > i1) TRUE else FALSE
- if (translation) {
- end <- semico[semico > i2][1]
- x <- X[(i2 + 1):end] # assumes there's a 'new line' after "TRANSLATE"
- ## x <- gsub("TRANSLATE", "", x, ignore.case = TRUE)
- x <- unlist(strsplit(x, "[,; \t]"))
- x <- x[nzchar(x)]
- TRANS <- matrix(x, ncol = 2, byrow = TRUE)
- TRANS[, 2] <- gsub("['\"]", "", TRANS[, 2])
- n <- dim(TRANS)[1]
- }
- start <-
- if (translation)
- semico[semico > i2][1] + 1
- else
- semico[semico > i1][1]
- end <- endblock[endblock > i1][1] - 1
- tree <- X[start:end]
- rm(X)
- tree <- gsub("^.*= *", "", tree)
- semico <- grep(";", tree)
- Ntree <- length(semico)
-
- ## are some trees on several lines?
- if (any(diff(semico) != 1)) {
- STRING <- character(Ntree)
- s <- c(1, semico[-Ntree] + 1)
- j <- mapply(":", s, semico)
- for (i in 1:Ntree)
- STRING[i] <- paste(tree[j[, i]], collapse = "")
- }
- else
- STRING <- tree
- rm(tree)
- STRING <- gsub(" ", "", STRING)
- colon <- grep(":", STRING)
- if (!length(colon)) {
- ## TODO: recode clado.build, tree.build & .treeBuildWithTokens from ape to phylobase
- trees <- lapply(STRING, clado.build)
- }
- else {
- if (length(colon) == Ntree) {
- if (translation)
- trees <- lapply(STRING, .treeBuildWithTokens)
- else
- trees <- lapply(STRING, tree.build)
-
- }
- else {
- trees <- vector("list", Ntree)
- trees[colon] <- lapply(STRING[colon], tree.build)
- nocolon <- (1:Ntree)[!1:Ntree %in% colon]
- trees[nocolon] <- lapply(STRING[nocolon], clado.build)
- if (translation) {
- for (i in 1:Ntree) {
- tr <- trees[[i]]
- for (j in 1:n) {
- ind <- which(tr$tip.label[j] == TRANS[, 1])
- tr$tip.label[j] <- TRANS[ind, 2]
- }
- if (!is.null(tr$node.label)) {
- for (j in 1:length(tr$node.label)) {
- ind <- which(tr$node.label[j] == TRANS[, 1])
- tr$node.label[j] <- TRANS[ind, 2]
- }
- }
- trees[[i]] <- tr
- }
- translation <- FALSE
- }
- }
- }
- for (i in 1:Ntree) {
- tr <- trees[[i]]
- ## Check here that the root edge is not incorrectly represented
- ## in the object of class "phylo" by simply checking that there
- ## is a bifurcation at the root
- if (!translation) n <- length(tr$tip.label)
- ROOT <- n + 1
- if (sum(tr$edge[, 1] == ROOT) == 1 && dim(tr$edge)[1] > 1) {
- stop(paste("There is apparently two root edges in your file: ",
- "cannot read tree file.\n Reading NEXUS file aborted ",
- "at tree no.", i, sep = ""))
- }
- }
- trees
-}
Modified: pkg/inst/nexusfiles/treeWithPolyExcludedData.nex
===================================================================
--- pkg/inst/nexusfiles/treeWithPolyExcludedData.nex 2010-07-15 08:37:24 UTC (rev 797)
+++ pkg/inst/nexusfiles/treeWithPolyExcludedData.nex 2010-07-15 08:40:04 UTC (rev 798)
@@ -14,9 +14,9 @@
BEGIN CHARACTERS;
TITLE testIncomplete;
DIMENSIONS NCHAR=3;
- FORMAT DATATYPE = STANDARD GAP = - MISSING = ? SYMBOLS = " 0 1 2";
+ FORMAT DATATYPE = STANDARD GAP = - MISSING = ? SYMBOLS = "0 1 2";
CHARSTATELABELS
- 1 Test1 / test1A test1B, 2 Test2 / test2A test2B, 3 Test3 / test3A test3B test3C ;
+ 1 Test1 /test1A test1B, 2 Test2 /test2A test2B, 3 Test3 /test3A test3B test3C;
MATRIX
Myrmecocystuscfnavajo 1(0 1)(0 1 2)
Myrmecocystuscreightoni ?(0 1)(0 1)
Deleted: pkg/man/read.nexustreestring.Rd
===================================================================
--- pkg/man/read.nexustreestring.Rd 2010-07-15 08:37:24 UTC (rev 797)
+++ pkg/man/read.nexustreestring.Rd 2010-07-15 08:40:04 UTC (rev 798)
@@ -1,17 +0,0 @@
-\name{read.nexustreestring}
-\alias{read.nexustreestring}
-\title{Read Nexus tree string}
-\description{
- This is a stub!
-}
-\usage{
-read.nexustreestring(X)
-}
-\arguments{
- \item{X}{a string containing a Nexus tree definition}
-}
-\value{
- Some kind of tree list?
-}
-\author{Brian O'Meara?}
-\keyword{misc}
Modified: pkg/man/readNexus.Rd
===================================================================
--- pkg/man/readNexus.Rd 2010-07-15 08:37:24 UTC (rev 797)
+++ pkg/man/readNexus.Rd 2010-07-15 08:40:04 UTC (rev 798)
@@ -1,3 +1,4 @@
+\alias{readNCL}
\name{readNexus}
\docType{methods}
\alias{readNexus}
Modified: pkg/man/subset-methods.Rd
===================================================================
--- pkg/man/subset-methods.Rd 2010-07-15 08:37:24 UTC (rev 797)
+++ pkg/man/subset-methods.Rd 2010-07-15 08:40:04 UTC (rev 798)
@@ -145,7 +145,7 @@
edgeLength(geotree)['0-15'] <- 0.1
geotree2 <- geotree[1:2]
## in subset tree, edge of new root extends back to the original root
-edgeLength(geotree2)['NA-3']
+edgeLength(geotree2)['0-3']
## edge length immediately ancestral to this node in the original tree
edgeLength(geotree, MRCA(geotree, tipLabels(geotree2)))
Added: pkg/src/GetNCL.cpp
===================================================================
--- pkg/src/GetNCL.cpp (rev 0)
+++ pkg/src/GetNCL.cpp 2010-07-15 08:40:04 UTC (rev 798)
@@ -0,0 +1,239 @@
+#include <Rcpp.h>
+#include "ncl/nxsmultiformat.h"
+
+NxsString contData(NxsCharactersBlock& charBlock, NxsString& charString,
+ const int& eachChar, const int& nTax) {
+ for (int taxon=0; taxon < nTax; ++taxon) {
+ double state=charBlock.GetSimpleContinuousValue(taxon,eachChar);
+ if (state==DBL_MAX) {
+ charString+="NA";
+ }
+ else {
+ char buffer[100];
+ sprintf(buffer, "%.10f", state);
+ charString+=buffer;
+ }
+
+ if (taxon+1 < nTax) {
+ charString+=',';
+ }
+ }
+ return charString;
+}
+
+
+NxsString stdData(NxsCharactersBlock& charBlock, NxsString& charString, const int& eachChar,
+ const int& nTax, bool polyconvert) {
+ for (int taxon=0; taxon<nTax; ++taxon) {
+
+ int stateNumber=charBlock.GetInternalRepresentation(taxon, eachChar, 0);
+
+ if(charBlock.IsMissingState(taxon, eachChar)) {
+ charString+="NA";
+ }
+ else if (charBlock.GetNumStates(taxon, eachChar)>1) {
+ if(polyconvert) {
+ charString+="NA";
+ }
+ else {
+ charString+='"';
+ charString+='{';
+ for (unsigned int k=0; k < charBlock.GetNumStates(taxon, eachChar); ++k) {
+ charString += charBlock.GetInternalRepresentation(taxon, eachChar, k);
+ if (k+1 < charBlock.GetNumStates(taxon, eachChar)) {
+ charString+=',';
+ }
+ }
+ charString+='}';
+ charString+='"';
+ }
+ }
+ else {
+ charString+='"';
+ charString+=stateNumber;
+ charString+='"';
+ }
+ if (taxon+1 < nTax) {
+ charString+=',';
+ }
+ }
+ return charString;
+}
+
+
+extern "C" SEXP GetNCL(SEXP params, SEXP paramsVecR) {
+
+ Rcpp::List list(params);
+ Rcpp::LogicalVector paramsVec(paramsVecR);
+
+ bool charall = paramsVec[0];
+ bool polyconvert = paramsVec[1];
+ bool levelsUnif = paramsVec[2];
+ bool returnTrees = paramsVec[3];
+ bool returnData = paramsVec[4];
+
+ int nCharToReturn = 0;
+
+ std::vector<std::string> dataTypes; //vector of datatypes for each character block
+ std::vector<int> nbCharacters; //number of characters for each character block
+ std::vector<std::string> dataChr; //characters
+ std::vector<std::string> charLabels; //labels for the characters
+ std::vector<std::string> stateLabels; //labels for the states
+ std::vector<int> nbStates; //number of states for each character (for Standard datatype)
+ std::vector<std::string> trees; //vector of Newick strings holding the names
+ std::vector<std::string> treeNames; //vector of tree names
+ std::vector<std::string> taxaNames; //vector of taxa names
+
+ std::vector<bool> test(3);
+ test[0] = charall;
+ test[1] = polyconvert;
+ test[2] = levelsUnif;
+
+# if defined(FILENAME_AS_NEXUS)
+ std::string filename = "'" + list["fileName"] + "'";
+# else
+ std::string filename = list["fileName"];
+# endif
+
+ MultiFormatReader nexusReader(-1, NxsReader::WARNINGS_TO_STDERR);
+
+ /* make NCL less strict */
+ NxsTreesBlock * treesB = nexusReader.GetTreesBlockTemplate();
+ treesB->SetAllowImplicitNames(true);
+ nexusReader.cullIdenticalTaxaBlocks(true);
+ /* End of making NCL less strict */
+
+ nexusReader.ReadFilepath(const_cast < char* > (filename.c_str()), MultiFormatReader::NEXUS_FORMAT);
+
+ const unsigned nTaxaBlocks = nexusReader.GetNumTaxaBlocks();
+ for (unsigned t = 0; t < nTaxaBlocks; ++t) {
+ /* Get blocks */
+ const NxsTaxaBlock * taxaBlock = nexusReader.GetTaxaBlock(t);
+ const unsigned nTreesBlocks = nexusReader.GetNumTreesBlocks(taxaBlock);
+ const unsigned nCharBlocks = nexusReader.GetNumCharactersBlocks(taxaBlock);
+
+ int nTax = taxaBlock->GetNumTaxonLabels();
+
+ /* Get taxa names */
+ for (int j=0; j < nTax; ++j) {
+ taxaNames.push_back (taxaBlock->GetTaxonLabel(j));
+ }
+
+ /* Get trees */
+ if (returnTrees) {
+ if (nTreesBlocks == 0) {
+ continue;
+ }
+ for (unsigned i = 0; i < nTreesBlocks; ++i) {
+ NxsTreesBlock* treeBlock = nexusReader.GetTreesBlock(taxaBlock, i);
+ const unsigned nTrees = treeBlock->GetNumTrees();
+ if (nTrees > 0) {
+ for (unsigned k = 0; k < nTrees; k++) {
+ NxsString ts = treeBlock->GetTreeDescription(k);
+ NxsString trNm = treeBlock->GetTreeName(k);
+ treeNames.push_back(trNm);
+ trees.push_back (ts);
+ }
+ }
+ else {
+ continue;
+ }
+ }
+ }
+
+ /* Get data */
+ if (returnData) {
+ for (unsigned k = 0; k < nCharBlocks; ++k) {
+ NxsCharactersBlock * charBlock = nexusReader.GetCharactersBlock(taxaBlock, k);
+
+ if (nCharBlocks == 0) {
+ continue;
+ }
+ else {
+ NxsString dtType = charBlock->GetNameOfDatatype(charBlock->GetDataType());
+ dataTypes.push_back(dtType);
+
+ if (charall) {
+ nCharToReturn=charBlock->GetNCharTotal();
+ }
+ else {
+ nCharToReturn=charBlock->GetNumIncludedChars();
+ }
+ nbCharacters.push_back (nCharToReturn);
+ for (int eachChar=0; eachChar < nCharToReturn; ++eachChar) { //We only pass the non-eliminated chars
+ NxsString charLabel=charBlock->GetCharLabel(eachChar);
+ if (charLabel.length()>1) {
+ charLabels.push_back (charLabel);
+ }
+ else {
+ charLabels.push_back ("standard_char"); //FIXME: needs to fixed for sequence data
+ }
+
+ NxsString tmpCharString;
+ if (std::string("Continuous") == dtType) {
+ tmpCharString = contData(*charBlock, tmpCharString, eachChar, nTax);
+ nbStates.push_back (0);
+ }
+ else {
+ if (std::string("Standard") == dtType) {
+ tmpCharString = stdData(*charBlock, tmpCharString, eachChar, nTax,
+ polyconvert);
+ unsigned int nCharStates = charBlock->GetNumObsStates(eachChar, false);
+ nbStates.push_back (nCharStates);
+ for (unsigned int l=0; l < nCharStates; ++l) {
+ NxsString label = charBlock->GetStateLabel(eachChar, l);
+ stateLabels.push_back (label);
+ }
+ }
+ else {
+ if (std::string("DNA") == dtType) {
+ for (int taxon=0; taxon < nTax; ++taxon) {
+ for (int eachChar=0; eachChar < nCharToReturn; ++eachChar) {
+ unsigned int nCharStates = charBlock->GetNumStates(taxon, eachChar);
+ if (charBlock->IsGapState(taxon, eachChar)) {
+ tmpCharString += "-";
+ }
+ else {
+ if (charBlock->IsMissingState(taxon, eachChar)) {
+ tmpCharString += "?";
+ }
+ else {
+ if (nCharStates == 1) {
+ tmpCharString += charBlock->GetState(taxon, eachChar, 0);
+ }
+ else {
+ tmpCharString += "?"; //FIXME
+ }
+ }
+ }
+ }
+ }
+ }
+ else { // other type of data not yet supported
+ tmpCharString = "";
+ nbStates.push_back (0);
+ stateLabels.push_back (std::string(""));
+ }
+ }
+ }
+ std::string charString = "c(" + tmpCharString + ");";
+ dataChr.push_back (charString);
+ }
+ }
+ }
+ }
+ }
+
+ /* Prepare list to return */
+ Rcpp::List res = Rcpp::List::create(Rcpp::Named("taxaNames") = taxaNames,
+ Rcpp::Named("treeNames") = treeNames,
+ Rcpp::Named("trees") = trees,
+ Rcpp::Named("dataTypes") = dataTypes,
+ Rcpp::Named("nbCharacters") = nbCharacters,
+ Rcpp::Named("charLabels") = charLabels,
+ Rcpp::Named("nbStates") = nbStates,
+ Rcpp::Named("stateLabels") = stateLabels,
+ Rcpp::Named("dataChr") = dataChr,
+ Rcpp::Named("Test") = test);
+ return res;
+}
Deleted: pkg/src/NCLInterface.cpp
===================================================================
--- pkg/src/NCLInterface.cpp 2010-07-15 08:37:24 UTC (rev 797)
+++ pkg/src/NCLInterface.cpp 2010-07-15 08:40:04 UTC (rev 798)
@@ -1,2219 +0,0 @@
-// Copyright (C) 2007-2008 Brian O'Meara & Derrick Zwickl
-// A modification of the BasicCommandLine file of the NCL (see below)
-// to use for loading trees and data from Nexus into R. Licensing as below.
-
-// Copyright (C) 1999-2002 Paul O. Lewis
-//
-// This file is part of NCL (Nexus Class Library).
-//
-// NCL is free software; you can redistribute it and/or modify
[TRUNCATED]
To get the complete diff run:
svnlook diff /svnroot/phylobase -r 798
More information about the Phylobase-commits
mailing list