[R-gregmisc-commits] r2039 - in pkg/gtools: R man
noreply at r-forge.r-project.org
noreply at r-forge.r-project.org
Wed May 27 03:37:41 CEST 2015
Author: warnes
Date: 2015-05-27 03:37:39 +0200 (Wed, 27 May 2015)
New Revision: 2039
Modified:
pkg/gtools/R/mixedsort.R
pkg/gtools/man/mixedsort.Rd
Log:
Add roman numeral support to mixedorder() and mixedsort().
Modified: pkg/gtools/R/mixedsort.R
===================================================================
--- pkg/gtools/R/mixedsort.R 2015-05-27 00:28:22 UTC (rev 2038)
+++ pkg/gtools/R/mixedsort.R 2015-05-27 01:37:39 UTC (rev 2039)
@@ -1,17 +1,37 @@
-mixedsort <- function(x, decreasing=FALSE, na.last=TRUE, blank.last=FALSE)
+mixedsort <- function(x,
+ decreasing=FALSE,
+ na.last=TRUE,
+ blank.last=FALSE,
+ numeric.type=c("decimal", "roman"),
+ roman.case=c("upper","lower","both")
+ )
{
- ord <- mixedorder(x, decreasing=decreasing, na.last=na.last,
- blank.last=blank.last)
+ ord <- mixedorder(x,
+ decreasing=decreasing,
+ na.last=na.last,
+ blank.last=blank.last,
+ numeric.type=numeric.type,
+ roman.case=roman.case
+ )
x[ord]
}
-mixedorder <- function(x, decreasing=FALSE, na.last=TRUE, blank.last=FALSE)
+mixedorder <- function(x,
+ decreasing=FALSE,
+ na.last=TRUE,
+ blank.last=FALSE,
+ numeric.type=c("decimal", "roman"),
+ roman.case=c("upper","lower","both")
+ )
{
# - Split each each character string into an vector of strings and
# numbers
# - Separately rank numbers and strings
# - Combine orders so that strings follow numbers
+ numeric.type <- match.arg(numeric.type)
+ roman.case <- match.arg(roman.case)
+
if(length(x)<1)
return(NULL)
else if(length(x)==1)
@@ -22,14 +42,26 @@
delim="\\$\\@\\$"
- numeric <- function(x)
+ if(numeric.type=="decimal")
{
- as.numeric(x)
+ regex <- "((?:(?i)(?:[-+]?)(?:(?=[.]?[0123456789])(?:[0123456789]*)(?:(?:[.])(?:[0123456789]{0,}))?)(?:(?:[eE])(?:(?:[-+]?)(?:[0123456789]+))|)))" # uses PERL syntax
+ numeric <- function(x) as.numeric(x)
}
+ else if (numeric.type=="roman")
+ {
+ regex <- switch(roman.case,
+ "both" = "([IVXCLDMivxcldm]+)",
+ "upper" = "([IVXCLDM]+)",
+ "lower" = "([ivxcldm]+)"
+ )
+ numeric <- function(x) roman2int(x)
+ }
+ else
+ stop("Unknown value for numeric.type: ", numeric.type)
nonnumeric <- function(x)
{
- ifelse(is.na(as.numeric(x)), toupper(x), NA)
+ ifelse(is.na(numeric(x)), toupper(x), NA)
}
x <- as.character(x)
@@ -43,8 +75,10 @@
####
# find and mark numbers in the form of +1.23e+45.67
- delimited <- gsub("([+-]{0,1}[0-9]+\\.{0,1}[0-9]*([eE][\\+\\-]{0,1}[0-9]+\\.{0,1}[0-9]*){0,1})",
- paste(delim,"\\1",delim,sep=""), x)
+ delimited <- gsub(regex,
+ paste(delim,"\\1",delim,sep=""),
+ x,
+ perl=TRUE)
# separate out numbers
step1 <- strsplit(delimited, delim)
Modified: pkg/gtools/man/mixedsort.Rd
===================================================================
--- pkg/gtools/man/mixedsort.Rd 2015-05-27 00:28:22 UTC (rev 2038)
+++ pkg/gtools/man/mixedsort.Rd 2015-05-27 01:37:39 UTC (rev 2039)
@@ -11,8 +11,13 @@
that "a", will come before "B" and "C".
}
\usage{
-mixedsort(x, decreasing=FALSE, na.last=TRUE, blank.last=FALSE)
-mixedorder(x, decreasing=FALSE, na.last=TRUE, blank.last=FALSE)
+mixedsort(x, decreasing=FALSE, na.last=TRUE, blank.last=FALSE,
+ numeric.type=c("decimal", "roman"),
+ roman.case=c("upper","lower","both") )
+)
+mixedorder(x, decreasing=FALSE, na.last=TRUE, blank.last=FALSE,
+ numeric.type=c("decimal", "roman"),
+ roman.case=c("upper","lower","both") )
}
\arguments{
\item{x}{Vector to be sorted.}
@@ -25,6 +30,12 @@
\item{blank.last}{for controlling the treatment of blank values. If \code{TRUE}, blank
values in the data are put last; if \code{FALSE}, they are put
first; if \code{NA}, they are removed.}
+ \item{numeric.type}{either "decimal" (default) or "roman". Are numeric values represented as
+ decimal numbers (\code{numeric.type="decimal"}) or as Roman numerals
+ (\code{numeric.type="roman"})? }
+ \item{roman.case}{one of "upper", "lower", or "both". Are roman
+ numerals represented using only capital letters ('IX') or lower-case
+ letters ('ix') or both?}
}
\details{
I often have character vectors (e.g. factor labels), such as compound
@@ -42,6 +53,11 @@
and \code{NA} values to the end. Setting \code{descending=TRUE}
changes the sort order to descending and reverses the meanings of
\code{na.last} and \code{blank.last}.
+
+ Parsing looks for decimal numbers unless \code{numeric.type="roman"},
+ in which parsing looks for roman numerals, with character case
+ specified by \code{roman.case}.
+
}
\value{
\code{mixedorder} returns a vector giving the sort order of the input
@@ -82,11 +98,21 @@
mixedsort(x, decreasing=FALSE) # default
mixedsort(x, decreasing=TRUE) # reverse sort order
+
+## Roman numerals
+chapters <- c("V. Non Sequiturs", "II. More Nonsense",
+ "I. Nonsense", "IV. Nonesensical Citations",
+ "III. Utter Nonsense")
+mixedsort(chapters, numeric.type="roman" )
+
+## Lower-case Roman numerals
+vals <- c("xix", "xii", "mcv", "iii", "iv", "dcclxxii", "cdxcii",
+ "dcxcviii", "dcvi", "cci")
+(ordered <- mixedsort(vals, numeric.type="roman", roman.case="lower"))
+roman2int(ordered)
}
\keyword{univar}
\keyword{manip}
-
-
\concept{natural sort}
\concept{dictionary sort}
More information about the R-gregmisc-commits
mailing list