[Dplr-commits] r921 - in pkg/dplR: . R inst/unitTests man
noreply at r-forge.r-project.org
noreply at r-forge.r-project.org
Fri Dec 12 19:55:17 CET 2014
Author: mvkorpel
Date: 2014-12-12 19:55:16 +0100 (Fri, 12 Dec 2014)
New Revision: 921
Modified:
pkg/dplR/DESCRIPTION
pkg/dplR/NAMESPACE
pkg/dplR/R/latexify.R
pkg/dplR/inst/unitTests/runit.utils.R
pkg/dplR/man/latexify.Rd
Log:
latexify() supports a more comprehensive set of characters
Modified: pkg/dplR/DESCRIPTION
===================================================================
--- pkg/dplR/DESCRIPTION 2014-12-11 09:39:52 UTC (rev 920)
+++ pkg/dplR/DESCRIPTION 2014-12-12 18:55:16 UTC (rev 921)
@@ -3,7 +3,7 @@
Type: Package
Title: Dendrochronology Program Library in R
Version: 1.6.1
-Date: 2014-12-11
+Date: 2014-12-12
Authors at R: c(person("Andy", "Bunn", role = c("aut", "cph",
"cre", "trl"), email = "andy.bunn at wwu.edu"), person("Mikko",
"Korpela", role = c("aut", "trl")), person("Franco", "Biondi",
Modified: pkg/dplR/NAMESPACE
===================================================================
--- pkg/dplR/NAMESPACE 2014-12-11 09:39:52 UTC (rev 920)
+++ pkg/dplR/NAMESPACE 2014-12-12 18:55:16 UTC (rev 921)
@@ -22,7 +22,7 @@
importFrom(png, readPNG)
-importFrom(stringi, stri_trans_nfc)
+importFrom(stringi, stri_trans_nfc, stri_trans_nfd)
importFrom(stringr, str_pad, str_trim)
Modified: pkg/dplR/R/latexify.R
===================================================================
--- pkg/dplR/R/latexify.R 2014-12-11 09:39:52 UTC (rev 920)
+++ pkg/dplR/R/latexify.R 2014-12-12 18:55:16 UTC (rev 921)
@@ -13,15 +13,9 @@
## Usage: \Sexpr{latexify(string_produced_by_R_code)}
##
-## Make arbitrary character() vector compatible with LaTeX by escaping
-## special characters, then convert to UTF-8 encoding. Any formatting
-## (newlines, tabs, etc.) will be lost. Note that the set of
-## characters actually supported depends on the font, LaTeX engine and
-## set of packages used.
-##
## It seems that Sweave needs doublebackslash = TRUE
## but knitr needs doublebackslash = FALSE.
-latexify <- function(x, doublebackslash = TRUE,
+latexify <- function(x, doublebackslash = TRUE, dashdash = TRUE,
quotes = c("straight", "curved"),
packages = c("fontenc", "textcomp")) {
y <- as.character(x)
@@ -39,8 +33,11 @@
cat(y[encBytes], sep = "\n")
y[encBytes] <- foo
}
+ l10n <- l10n_info()
+ Letters <- paste0(c(LETTERS, letters), collapse="")
fontenc <- "fontenc" %in% packages
textcomp <- "textcomp" %in% packages
+ eurosym <- "eurosym" %in% packages
straightQuotes <- match.arg(quotes) == "straight"
## Remove control characters (not spaces!)
y <- gsub("(?![[:space:]])[[:cntrl:]]", "", y, perl=TRUE)
@@ -48,10 +45,14 @@
## substitution must be done before control characters because
## newline belongs to both groups.
y <- gsub("[[:space:]]+", " ", y)
- ## Escape LaTeX special characters.
+
+ ## Handle LaTeX special characters in the ASCII range.
## Some substitutions are mandatory, others affect matters such as
- ## the rendering of the character in question (\textquote...) or
- ## line breaks (\slash).
+ ## the rendering of the character in question (e.g. \textquote...)
+ ## or line breaks (\slash). Some substitutions are unnecessary if
+ ## using a font encoding other than OT1 (\textbar, ...), but are
+ ## performed every time nonetheless. The dash (-) is given
+ ## special treatment to disable the em and en dash ligatures.
## Source: Scott Pakin (2009) The Comprehensive LaTeX Symbol List.
## Accessible through "texdoc symbols".
## Particularly section 8.6 "ASCII and Latin 1 quick reference".
@@ -61,46 +62,290 @@
## Then, \ is replaced with \textbackslash{},
## but not if followed by { or }.
## After that, the order does not matter.
+ ##
+ ## This starts the 'substitutions' list, which is finally
+ ## processed close to the end of the function.
substitutions <-
list(c("([{}])", "\\\\\\1"),
- c("\\\\(?!\\{|\\})", "\\\\textbackslash{}"),
+ c("\\\\(?![{}])", "\\\\textbackslash{}"),
c("\\^", "\\\\textasciicircum{}"),
c("~", "\\\\textasciitilde{}"),
+ c("<", "\\\\textless{}"),
+ c(">", "\\\\textgreater{}"),
+ c("\\|", "\\\\textbar{}"),
c("([#$%&_])", "\\\\\\1"),
+ if (isTRUE(dashdash)) {
+ c("-", "\\\\mbox{-}")
+ },
if (textcomp && straightQuotes) {
c("'", "\\\\textquotesingle{}")
},
+ if (textcomp && straightQuotes) {
+ c("`", "\\\\textasciigrave{}")
+ },
c('"', if (fontenc && straightQuotes) {
"\\\\textquotedbl{}"
} else {
"\\\\textquotedblright{}"
}),
c("/", "\\\\slash{}"))
- if (isTRUE(l10n_info()[["MBCS"]])) {
- ## The output of sQuote() and dQuote() may contain
- ## non-ASCII quoting characters. If the input is ASCII,
- ## it may be a surprise to the user that an UTF-8 input
- ## encoding is then needed in LaTeX. Converting the
- ## quotes to commands solves this problem.
- substitutions <-
- c(substitutions,
- list(c("\u2018", "\\\\textquoteleft{}"),
- c("\u2019", "\\\\textquoteright{}"),
- c("\u201c", "\\\\textquotedblleft{}"),
- c("\u201d", "\\\\textquotedblright{}")))
- }
- ## Remove empty group after command when followed by a backslash
- Letters <- paste(c(LETTERS, letters), collapse="")
- substitutions <- c(substitutions,
- list(c(sprintf("(\\\\[%s]+)\\{\\}(?=\\\\)",
- Letters), "\\1")))
+ substitutions <- substitutions[!vapply(substitutions, is.null, logical(1))]
- for (subst in substitutions[!vapply(substitutions, is.null, logical(1))]) {
+ ## Treatment of non-ASCII characters follows.
+
+ ## Digraphs and ligatures broken into their parts, except the
+ ## Dutch digraphs IJ and ij which have their own commands.
+ substitutions <-
+ c(substitutions,
+ list(c("\u0132", "\\\\IJ{}"),
+ c("\u0133", "\\\\ij{}"),
+ c("\u01f1", "DZ"),
+ c("\u01f2", "Dz"),
+ c("\u01f3", "dz"),
+ c("\u01c4", "DZ\u030c"),
+ c("\u01c5", "Dz\u030c"),
+ c("\u01c6", "dz\u030c"),
+ c("\u01c7", "LJ"),
+ c("\u01c8", "Lj"),
+ c("\u01c9", "lj"),
+ c("\u01ca", "NJ"),
+ c("\u01cb", "Nj"),
+ c("\u01cc", "nj"),
+ c("\ufb00", "ff"),
+ c("\ufb01", "fi"),
+ c("\ufb02", "fl"),
+ c("\ufb03", "ffi"),
+ c("\ufb04", "ffl"),
+ c("\ufb05", "\u017ft"),
+ c("\ufb06", "st")))
+
+ ## Accents (diacritics) above the letter (drop i and j dots)
+ above <- list(diaeresis = c("\u0308", "\""),
+ acute = c("\u0301", "'"),
+ dotabove = c("\u0307", "."),
+ macron = c("\u0304", "="),
+ circumflex = c("\u0302", "^"),
+ grave = c("\u0300", "`"),
+ tilde = c("\u0303", "~"),
+ doubleacute = c("\u030b", "H"),
+ ringabove = c("\u030a", "r"),
+ breve = c("\u0306", "u"),
+ caron = c("\u030c", "v"),
+ invbreve = c("\u0311", "newtie")) # textcomp
+ ## Accents that co-exist with i and j dots (mainly below the
+ ## letter, but also the ligature tie)
+ below <- list(macronbelow = c("\u0331", "b"),
+ cedilla = c("\u0327", "c"),
+ dotbelow = c("\u0323", "d"),
+ tie = c("\u0361", "t"),
+ ogonek = c("\u0328", "k")) # not in OT1 fontenc
+ accents <- c(above, below)
+ command <- paste0("\\\\[", Letters, "]+|\\\\.")
+ combining <- paste0(vapply(accents, "[", character(1), 1),
+ collapse="")
+ accPre <- paste0("(", command, "|.)({})?(?<![", combining, "])")
+ accPost <- paste0("(?![", combining, "])")
+
+ ## Accent above the letter
+ aboveInCode <- vapply(above, "[", character(1), 1)
+ ijPattern <- paste0("([ij])", aboveInCode, accPost)
+ otherPattern <- paste0(accPre, aboveInCode, accPost)
+ aboveOutCode <- vapply(above, "[", character(1), 2)
+ ijReplacement <- paste0("\\\\", aboveOutCode, "{\\\\\\1}")
+ otherReplacement <- paste0("\\\\", aboveOutCode, "{\\1}")
+ ## for (accent in above) {
+ ## code <- accent[1]
+ ## replacement <- accent[2]
+ ## y <- gsub(paste0("([ij])", code, accPost),
+ ## paste0("\\\\", replacement, "{\\\\\\1}"), y, perl = TRUE)
+ ## y <- gsub(paste0(accPre, code, accPost),
+ ## paste0("\\\\", replacement, "{\\1}"), y, perl = TRUE)
+ ## }
+
+ ## Accent below the letter, and ligature tie
+ belowInCode <- vapply(below, "[", character(1), 1)
+ belowPattern <- paste0(accPre, belowInCode, accPost)
+ belowOutCode <- vapply(below, "[", character(1), 2)
+ belowReplacement <- paste0("\\\\", belowOutCode, "{\\1}")
+ ## for (accent in below) {
+ ## code <- accent[1]
+ ## replacement <- accent[2]
+ ## y <- gsub(paste0(accPre, code, accPost),
+ ## paste0("\\\\", replacement, "{\\1}"), y, perl = TRUE)
+ ## }
+
+ ## Combining an enclosing circle with an accent seems to work
+ circPre <- paste0("(", command, "({([^}]|\\\\})+})?|.)({})?")
+ circPattern <- paste0(circPre, "\u20dd", accPost)
+ circReplacement <- "\\\\textcircled{\\1}"
+
+ substitutions <-
+ c(substitutions,
+ lapply(lapply(mapply(list, list(as.name("c")),
+ c(ijPattern, otherPattern,
+ belowPattern, circPattern),
+ c(ijReplacement, otherReplacement,
+ belowReplacement, circReplacement),
+ SIMPLIFY=FALSE), as.call), eval))
+
+ ## The output of sQuote() and dQuote() may contain non-ASCII
+ ## quoting characters. If the input is ASCII, it may be a
+ ## surprise to the user that an UTF-8 input encoding is then
+ ## needed in LaTeX. Converting the quotes to commands solves
+ ## this problem. The substitution list also contains
+ ## non-ASCII letters and other unicode characters.
+ substitutions <-
+ c(substitutions,
+ list(c("\u00a1", "\\\\textexclamdown{}"),
+ c("\u00a3", "\\\\pounds{}"),
+ c("\u00a7", "\\\\S{}"),
+ c("\u00a9", "\\\\copyright{}"),
+ c("\u00aa", "\\\\textordfeminine{}"),
+ c("\u00ae", "\\\\textregistered{}"),
+ c("\u00b6", "\\\\P{}"),
+ c("\u00b7", "\\\\textperiodcentered{}"),
+ c("\u00ba", "\\\\textordmasculine{}"),
+ c("\u00bf", "\\\\textquestiondown{}"),
+ c("\u2013", "\\\\textendash{}"),
+ c("\u2014", "\\\\textemdash{}"),
+ c("\u2018", "\\\\textquoteleft{}"),
+ c("\u2019", "\\\\textquoteright{}"),
+ c("\u201c", "\\\\textquotedblleft{}"),
+ c("\u201d", "\\\\textquotedblright{}"),
+ c("\u2020", "\\\\dag{}"),
+ c("\u2021", "\\\\ddag{}"),
+ c("\u2022", "\\\\textbullet{}"),
+ c("\u2026", "\\\\dots{}"),
+ c("\u2122", "\\\\texttrademark{}"),
+ c("\u2423", "\\\\textvisiblespace{}"),
+ c("\u00c6", "\\\\AE{}"),
+ c("\u00e6", "\\\\ae{}"),
+ c("\u0152", "\\\\OE{}"),
+ c("\u0153", "\\\\oe{}"),
+ c("\u00d8", "\\\\O{}"),
+ c("\u00f8", "\\\\o{}"),
+ c("\u0141", "\\\\L{}"),
+ c("\u0142", "\\\\l{}"),
+ ## U+1E9E Latin capital letter sharp s. Works with
+ ## XeTeX and LuaTeX, provided that the character is
+ ## present in the font. Otherwise \SS, which usually
+ ## produces "SS".
+ c("\u1e9e", "\\\\ifdefined\\\\XeTeXrevision\\\\iffontchar\\\\font\"1E9E\\\\symbol{\"1E9E}\\\\else\\\\SS\\\\fi\\\\else\\\\ifdefined\\\\directlua\\\\iffontchar\\\\font\"1E9E\\\\symbol{\"1E9E}\\\\else\\\\SS\\\\fi\\\\else\\\\SS\\\\fi\\\\fi{}"),
+ c("\u00df", "\\\\ss{}"),
+ ## U+017F Latin small letter long s
+ c("\u017f", "\\\\ifdefined\\\\XeTeXrevision\\\\symbol{\"017F}\\\\else\\\\ifdefined\\\\directlua\\\\symbol{\"017F}\\\\else{\\\\fontencoding{TS1}\\\\selectfont s}\\\\fi\\\\fi{}")))
+ ## Other non-ASCII letters, punctuation marks.
+ ## These don't work with the OT1 font encoding.
+ substitutions <-
+ c(substitutions,
+ list(c("\u00d0", "\\\\DH{}"),
+ c("\u00f0", "\\\\dh{}"),
+ c("\u0110", "\\\\DJ{}"),
+ c("\u0111", "\\\\dj{}"),
+ c("\u014a", "\\\\NG{}"),
+ c("\u014b", "\\\\ng{}"),
+ c("\u00de", "\\\\TH{}"),
+ c("\u00fe", "\\\\th{}"),
+ c("\u00ab", "\\\\guillemotleft{}"),
+ c("\u00bb", "\\\\guillemotright{}"),
+ c("\u201a", "\\\\quotesinglbase{}"),
+ c("\u201e", "\\\\quotedblbase{}"),
+ c("\u2039", "\\\\guilsinglleft{}"),
+ c("\u203a", "\\\\guilsinglright{}")))
+ ## Miscellaneous, arrows, delimiters, legal symbols, science
+ ## and engineering, currencies, diacritics, text mode math.
+ ## These require textcomp.
+ substitutions <-
+ c(substitutions,
+ list(c("\u00a0", "~"),# no-break space (NBSP)
+ c("\u00ad", "\\\\-"),# soft hyphen (SHY)
+ c("\u200b", "\\\\hspace{0pt}"),# zero width space (ZWSP)
+ c("\u2217", "\\\\textasteriskcentered{}"),
+ c("\u2016", "\\\\textbardbl{}"),
+ c("\u25ef", "\\\\textbigcircle{}"),
+ c("\u2422", "\\\\textblank{}"),
+ c("\u00a6", "\\\\textbrokenbar{}"),
+ c("\u2052", "\\\\textdiscount{}"),
+ c("\u212e", "\\\\textestimated{}"),
+ c("\u203d", "\\\\textinterrobang{}"),
+ c("\u2e18", "\\\\textinterrobangdown{}"),
+ c("\u2116", "\\\\textnumero{}"),
+ c("\u25e6", "\\\\textopenbullet{}"),
+ c("\u2030", "\\\\textperthousand{}"),
+ c("\u2031", "\\\\textpertenthousand{}"),
+ c("\u211e", "\\\\textrecipe{}"),
+ c("\u203b", "\\\\textreferencemark{}"),
+ c("\u02f7", "\\\\texttildelow{}"),
+ c("\u2190", "\\\\textleftarrow{}"),
+ c("\u2191", "\\\\textuparrow{}"),
+ c("\u2192", "\\\\textrightarrow{}"),
+ c("\u2193", "\\\\textdownarrow{}"),
+ c("\u3008", "\\\\textlangle{}"),
+ c("\u3009", "\\\\textrangle{}"),
+ c("\u301a", "\\\\textlbrackdbl{}"),
+ c("\u301b", "\\\\textrbrackdbl{}"),
+ c("\u2045", "\\\\textlquill{}"),
+ c("\u2046", "\\\\textrquill{}"),
+ c("\u2117", "\\\\textcircledP{}"),
+ c("\u2120", "\\\\textservicemark{}"),
+ c("\u2103", "\\\\textcelsius{}"),
+ c("\u2127", "\\\\textmho{}"),
+ c("\u00b5", "\\\\textmu{}"),
+ c("\u03a9", "\\\\textohm{}"),
+ c("\u0e3f", "\\\\textbaht{}"),
+ c("\u00a2", "\\\\textcent{}"),
+ c("\u20a1", "\\\\textcolonmonetary{}"),
+ c("\u00a4", "\\\\textcurrency{}"),
+ c("\u20ab", "\\\\textdong{}"),
+ c("\u20ac", if (eurosym) "\\\\euro{}" else "\\\\texteuro{}"),
+ c("\u20b2", "\\\\textguarani{}"),
+ c("\u20a4", "\\\\textlira{}"),
+ c("\u20a6", "\\\\textnaira{}"),
+ c("\u20b1", "\\\\textpeso{}"),
+ c("\u20a9", "\\\\textwon{}"),
+ c("\u00a5", "\\\\textyen{}"),
+ c("\u02dd", "\\\\textacutedbl{}"),
+ c("\u00b4", "\\\\textasciiacute{}"),
+ c("\u00b8", "\\\\c{}"),
+ c("\u02d8", "\\\\textasciibreve{}"),
+ c("\u02c7", "\\\\textasciicaron{}"),
+ c("\u00a8", "\\\\textasciidieresis{}"),
+ c("\u00af", "\\\\textasciimacron{}"),
+ c("\u00b0", "\\\\textdegree{}"),
+ c("\u00f7", "\\\\textdiv{}"),
+ c("\u00bc", "\\\\textonequarter{}"),
+ c("\u00bd", "\\\\textonehalf{}"),
+ c("\u00be", "\\\\textthreequarters{}"),
+ c("\u00d7", "\\\\texttimes{}"),
+ c("\u00b1", "\\\\textpm{}"),
+ c("\u00b9", "\\\\textonesuperior{}"),
+ c("\u00b2", "\\\\texttwosuperior{}"),
+ c("\u00b3", "\\\\textthreesuperior{}"),
+ c("\u2044", "\\\\textfractionsolidus{}"),
+ c("\u221a", "\\\\textsurd{}"),
+ c("\u00ac", "\\\\textlnot{}"),
+ c("\u2212", "\\\\textminus{}")))
+
+ ## After a command, remove empty group (when at the end of the
+ ## string or some suitable character follows) or replace it with a
+ ## space (when a space does not follow).
+ tmp <- paste0("(\\\\[", Letters, "]+){}")
+ substitutions <-
+ c(substitutions,
+ list(c(paste0(tmp, "(?=$|[[:digit:],.?!;:\\\\}+*/-])"), "\\1"),
+ c(paste0(tmp, "(?! )"), "\\1 ")))
+
+ ## Convert strings to UTF-8 encoding, NFD (decomposed) form, for
+ ## processing of accented characters.
+ y <- stri_trans_nfd(y)
+ ## Apply the substitutions in the list
+ for (subst in substitutions) {
y <- gsub(subst[1], subst[2], y, perl = TRUE)
}
if (isTRUE(doublebackslash)) {
y <- gsub("\\", "\\\\", y, fixed=TRUE)
}
- ## Convert result to UTF-8 NFC encoding
+ ## Convert result to UTF-8 NFC encoding, although most non-ASCII
+ ## content has probably been converted to LaTeX commands.
stri_trans_nfc(y)
}
Modified: pkg/dplR/inst/unitTests/runit.utils.R
===================================================================
--- pkg/dplR/inst/unitTests/runit.utils.R 2014-12-11 09:39:52 UTC (rev 920)
+++ pkg/dplR/inst/unitTests/runit.utils.R 2014-12-12 18:55:16 UTC (rev 921)
@@ -28,7 +28,12 @@
msg="IDs have a restricted character (4/16 choices) in one position")
}
-test.latexify <- function(testDocument=FALSE) {
+## If 'testDocument' is TRUE, produces a test document to 'con', which may
+## be a connection or a filename.
+test.latexify <-
+ function(testDocument=FALSE,
+ con=tempfile(pattern = "latexify", fileext = ".tex"))
+{
## Number of test strings
## (including one "extra difficult" case and one empty string)
SAMP.SIZE <- 50
@@ -39,16 +44,22 @@
## All ASCII characters except NUL (0)
characters <- rawToChar(as.raw(1:127), multiple = TRUE)
## LaTeX special characters. Some of these must be converted to
- ## commands. Others (single and double quote) are converted to
- ## commands or other characters for improved compatibility with
- ## other packages or to get a particular glyph (upright quote)
- ## instead of the default (curved quote).
+ ## commands. Others (e.g. single and double quote, *) are
+ ## converted to commands or other characters for improved
+ ## compatibility with other packages or to get a particular glyph
+ ## (upright quote, centered asterisk) instead of the default
+ ## (curved quote, asterisk located higher than the center of the
+ ## line). Some characters would work fine in font encodings other
+ ## than OT1 (e.g. <, >, |), but are converted to commands anyway.
+ ## The dash is included to prevent -- and --- turning into an n
+ ## dash and an m dash, respectively.
##
## NOTE that the handling (what kind of treatment if any) of some
- ## characters (currently single quote) depends on the (default)
+ ## characters (currently quotes) depends on the (default)
## arguments of latexify().
specialChars <-
- c("{", "}", "\\", "#", "$", "%", "^", "&", "_", "~", "\"", "/", "'")
+ c("{", "}", "\\", "#", "$", "%", "^", "&", "_", "~", "\"", "/", "'",
+ "<", ">", "|", "`", "-")
specialStr <- paste(specialChars, collapse="")
## latexify() is designed to convert any sequence of space
## characters to a single regular space
@@ -96,12 +107,14 @@
checkTrue(!any(grepl("\\\\", ltxSingle, fixed=TRUE)),
msg="No line breaks (double backslash)")
Letters <- paste(c(LETTERS, letters), collapse="")
- textCommand <- sprintf("\\\\[%s]+", Letters)
- commandAndGroup <- paste(textCommand, "(\\{\\}|(?=\\\\))", sep="")
- commandsAt <- gregexpr(commandAndGroup, ltxSingle, perl=TRUE)
+ textCommand <- sprintf("\\\\[%s]+(\\{([^}]|\\\\})+})?", Letters)
+ commandTerminated <-
+ paste(textCommand,
+ "((?<=})|\\{\\}| +|(?=$|[[:digit:],.?!;:\\\\}+*/-]))", sep="")
+ commandsAt <- gregexpr(commandTerminated, ltxSingle, perl=TRUE)
checkEquals(lapply(gregexpr(textCommand, ltxSingle), as.vector),
lapply(commandsAt, as.vector),
- msg="Command name is followed by empty group or backslash")
+ msg="Command name is terminated properly")
escape <- sprintf("\\\\[^%s](\\{\\})?", Letters)
escapesAt <- gregexpr(escape, ltxSingle)
@@ -153,8 +166,8 @@
## a space between words
strStart <- which(diff(c(0, charIdx)) > 0)
strStop <- c(strStart[-1] - 1, nChars)
- ## Strip off empty group following a command
- ltxChars <- sub(sprintf("([%s])\\{\\}$", Letters), "\\1",
+ ## Strip off empty group or spaces following a command
+ ltxChars <- sub(sprintf("([%s])(\\{\\}| +)$", Letters), "\\1",
substring(ltxSingle[i], strStart, strStop))
} else {
ltxChars <- character(0)
@@ -194,16 +207,16 @@
tolerance=0)
checkTrue(all(specialChars %in% specialMap[, 1]),
msg="Each special character has a mapping")
- ## A test for encoding conversion
+ ## A test for handling of different encodings in the input
latin1String <- "clich\xe9 ma\xf1ana"
Encoding(latin1String) <- "latin1"
- utf8fy <- latexify(latin1String)
- checkEquals("UTF-8", Encoding(utf8fy),
- msg="Declared encoding is UTF-8")
- checkEquals(as.raw(c(0x63, 0x6c, 0x69, 0x63, 0x68, 0xc3, 0xa9, 0x20,
- 0x6d, 0x61, 0xc3, 0xb1, 0x61, 0x6e, 0x61)),
- charToRaw(utf8fy),
- msg="Conversion to UTF-8 NFC succeeded")
+ latinConverted <- latexify(latin1String, doublebackslash=FALSE)
+ checkEquals("clich\\'{e} ma\\~{n}ana",
+ latinConverted,
+ msg="Conversion of latin1 string succeeded")
+ checkEquals(latinConverted,
+ latexify(enc2utf8(latin1String), doublebackslash=FALSE),
+ msg="Encoding of the input does not matter")
## A test for other than default quoting options
quoteString <- "\"It's five o'clock\", he said."
res1 <- latexify(quoteString, doublebackslash=FALSE)
@@ -211,48 +224,123 @@
res3 <- latexify(quoteString, packages=character(0), doublebackslash=FALSE)
res4 <- latexify(quoteString, packages="fontenc", doublebackslash=FALSE)
res5 <- latexify(quoteString, packages="textcomp", doublebackslash=FALSE)
- exp2 <- gsub("\"", "\\\\textquotedblright{}", quoteString)
- exp4 <- gsub("\"", "\\\\textquotedbl{}", quoteString)
- exp5 <- gsub("'", "\\\\textquotesingle{}", exp2)
- exp1 <- gsub("'", "\\\\textquotesingle{}", exp4)
+ exp2 <- sub("\"", "\\\\textquotedblright ", quoteString)
+ exp2 <- sub("\"", "\\\\textquotedblright", exp2)
+ exp4 <- sub("\"", "\\\\textquotedbl ", quoteString)
+ exp4 <- sub("\"", "\\\\textquotedbl", exp4)
+ exp5 <- gsub("'", "\\\\textquotesingle ", exp2)
+ exp1 <- gsub("'", "\\\\textquotesingle ", exp4)
checkEquals(exp1, res1, msg="Default straight quotes")
checkEquals(exp2, res2, msg="Curved quotes")
checkEquals(res2, res3, msg="Fallback to curved quotes")
checkEquals(exp4, res4, msg="Fallback to curved single quotes")
- retVal <- checkEquals(exp5, res5, msg="Fallback to curved double quotes")
+ checkEquals(exp5, res5, msg="Fallback to curved double quotes")
## Check that non-ASCII quotes used by dQuote() and sQuote() are
## converted to LaTeX commands
- if (isTRUE(l10n_info()[["MBCS"]])) {
- nestQuotes <- paste0("You said, \u201cHe said, ",
- "\u2018Have a nice day.\u2019\u201d")
- nq <- latexify(nestQuotes, doublebackslash=FALSE)
- retVal <-
- checkEquals(gsub("\\{\\}(?=\\\\)", "",
- gsub("\u2018", "\\\\textquoteleft{}",
- gsub("\u2019", "\\\\textquoteright{}",
- gsub("\u201c", "\\\\textquotedblleft{}",
- gsub("\u201d",
- "\\\\textquotedblright{}",
- nestQuotes)))),
- perl=TRUE),
- nq)
- }
+ nestQuotes <- paste0("You said, \u201cHe said, ",
+ "\u2018Have a nice day.\u2019\u201d")
+ nq <- latexify(nestQuotes, doublebackslash=FALSE)
+ checkEquals(gsub("\\{\\}(?=\\\\)", "",
+ gsub("\u2018", "\\\\textquoteleft ",
+ gsub("\u2019", "\\\\textquoteright",
+ gsub("\u201c", "\\\\textquotedblleft ",
+ gsub("\u201d",
+ "\\\\textquotedblright",
+ nestQuotes)))),
+ perl=TRUE),
+ nq, msg="dQuote() and sQuote() are safe")
+ diaeresisD <- "o\u0308ljysa\u0308ilio\u0308"
+ diasD <- latexify(diaeresisD, doublebackslash=FALSE)
+ diaeresisC <- "\u00f6ljys\u00e4ili\u00f6"
+ diasC <- latexify(diaeresisC, doublebackslash=FALSE)
+ checkEquals(diasD, diasC, msg="Unicode NFD and NFC")
+ ## Strings containing practically every non-ASCII character that
+ ## will be converted by latexify()
+ breakWords <- rep(c("space", "vacation", "movie", "line", "break",
+ "rope", "period"), 2)
+ allChars <-
+ c("\u0132sselmeer is a lake, Berl\u0133n is Dutch for Berlin.",
+ paste0("Other digraphs and ligatures: \u01f1, \u01f2, \u01f3, ",
+ "\u01c4, \u01c5, \u01c6, \u01c7, \u01c8, \u01c9, \u01ca, ",
+ "\u01cb, \u01cc, \ufb00, \ufb01, \ufb02, \ufb03, \ufb04, ",
+ "\ufb05, \ufb06"),
+ paste0("\u017c\u00f3\u0142ty, p\u0113c, f\u00eate, ",
+ "vis-\u00e0-vis, pi\u00f1ata, Erd\u0151s, ",
+ "\u00c5ngstr\u00f6m, m\u0103r, t\u0159i, \u0203, t\u0331, ",
+ "fa\u00e7ade, \u1e0c, ",
+ "k\u0361p (there should be a ligature tie), t\u0105sa"),
+ "Circled: a\u20dd, \u00f1\u20dd",
+ "\u00a1Hola! \u00bfQu\u00e9 pasa? \u2e18Verdad\u203d",
+ paste0("Money: \u00a3, \u20ac, \u00a2, \u00a5, \u00a4, \u0e3f, ",
+ "\u20a1, \u20ab, \u20b2, \u20a4, \u20a6, \u20b1, \u20a9"),
+ paste0("No-Break\u00a0", breakWords, collapse=" "),
+ paste0("Do-Break ", breakWords, collapse=" "),
+ "visible\u2423space",
+ paste0(rep("Zero\u200bWidth\u200bSpace\u200b", 10), collapse=""),
+ paste0(rep(paste0("S\u00ado\u00adf\u00adt\u00ad",
+ "H\u00ady\u00adp\u00adh\u00ade\u00adn\u00ad",
+ "E\u00adv\u00ade\u00adr\u00ady\u00ad",
+ "w\u00adh\u00ade\u00adr\u00ade"), 8),
+ collapse="\u00ad"),
+ "Legal \u00a7, \u00a9, \u00ae, \u00b6, \u2117, \u2120, \u2122",
+ paste0("Letters \u00c6, \u00e6, \u0152, \u0153, \u00d8, \u00f8, ",
+ "\u0141, \u0142, \u1e9e, \u00df, \u017f, \u00d0, \u00f0, ",
+ "\u0110, \u0111, \u014a, \u014b, \u00de, \u00fe"),
+ paste0("Quotes: \u00ab | \u00bb | \u201a | \u201e | \u2039 | \u203a",
+ " | \u2018 | \u2019 | \u201c | \u201d"),
+ paste0("mho \u2127, ohm \u03a9, micro \u00b5, Celsius \u2103, ",
+ "degree \u00b0"),
+ paste0("Division \u00f7, times \u00d7, fractions \u00bc, \u00bd, ",
+ "\u00be, plusminus \u00b1, root \u221a, minus \u2212, ",
+ "asterisk \u2217, not \u00ac, fraction solidus \u2044, ",
+ "superscript \u00b9, \u00b2, \u00b3"),
+ paste0("Discount \u2052, estimated \u212e, \u2116 5, ",
+ "per mille \u2030, parts per ten thousand \u2031"),
+ "Bullets: \u2022 Closed, \u25e6 open",
+ paste0("Spacing diacritics: double acute \u02dd, acute \u00b4, ",
+ "cedilla \u00b8, breve \u02d8, ",
+ "caron \u02c7, diaeresis \u00a8, macron \u00af"),
+ "Arrows: left \u2190, up \u2191, right \u2192, down \u2193",
+ "Delimiters: \u3008, \u3009, \u301a, \u301b, \u2045, \u2046",
+ paste0("Miscellaneous: feminine ordinal \u00aa, ",
+ "masculine ordinal \u00ba, middle dot \u00b7, ",
+ "daggers \u2020, \u2021, ellipsis \u2026, ",
+ "double vertical line \u2016, large circle \u25ef, ",
+ "blank symbol \u2422, broken bar \u00a6, recipe \u211e, ",
+ "reference mark \u203b, low tilde \u02f7, ",
+ "en dash \u2013, em dash \u2014"))
+ ac <- latexify(allChars, doublebackslash=FALSE)
+ retVal <- checkTrue(all(Encoding(ac) == "unknown"),
+ msg("No non-ASCII characters left"))
## When used independently outside the test suite, the function
- ## can create a test document
+ ## can create a test document, but only in a UTF-8 locale.
if (isTRUE(testDocument) && isTRUE(l10n_info()[["UTF-8"]])) {
preamble <- c("\\documentclass[a4paper]{article}",
+ "\\usepackage{etoolbox}",
+ "\\usepackage{ifluatex}",
+ "\\usepackage{ifxetex}",
+ "\\usepackage{parskip}",
+ "\\usepackage{textcomp}",
+ "\\ifbool{luatex}{",
+ "\\usepackage{fontspec}",
+ "}{",
+ "\\ifbool{xetex}{",
+ "\\usepackage{fontspec}",
+ "}{",
"\\usepackage[T1]{fontenc}",
- "\\usepackage{textcomp}",
- "\\usepackage{parskip}",
- "\\usepackage[utf8]{inputenx}",
- "\\input{ix-utf8enc.dfu}")
- id <- c(testStrings, latin1String, rep(quoteString, 5), nestQuotes)
- extraInfo <- c(rep("", length(testStrings) + 1),
+ "\\usepackage{lmodern}",
+ "}}")
+ id <- c(testStrings, latin1String, rep(quoteString, 5), nestQuotes,
+ diaeresisD, diaeresisC, allChars)
+ extraInfo <- c(rep("", length(testStrings) + length(latin1String)),
paste0(" (", c("default", "curved", "no packages",
"only fontenc", "only textcomp"), ")"),
- "")
+ rep("", length(nestQuotes)),
+ rep(" (Unicode NFD)", length(diaeresisD)),
+ rep(" (Unicode NFC)", length(diaeresisC)),
+ rep("", length(allChars)))
- ## Record how R prints inputDescription
+ ## Record how R prints the elements in 'id'
inputDescription <- character(length(id)) # dummy line
tc <- textConnection("inputDescription", "w", local = TRUE)
sink(tc)
@@ -265,9 +353,18 @@
close(tc)
on.exit()
- allOutput <- c(ltxSingle, utf8fy, res1, res2, res3, res4, res5, nq)
- filename <- tempfile(pattern = "latexify", fileext = ".tex")
- co <- file(filename, open = "wt", encoding = "UTF-8")
+ allOutput <- c(ltxSingle, latinConverted, res1, res2, res3, res4,
+ res5, nq, diasD, diasC, ac)
+ if (is.character(con)) {
+ co <- file(con, open = "wt", encoding = "UTF-8")
+ on.exit(close(co))
+ } else {
+ co <- con
+ if (!isOpen(co)) {
+ open(co, open = "wt")
+ on.exit(close(co))
+ }
+ }
writeLines(preamble, co)
writeLines("\\begin{document}", co)
writeLines("\\begin{enumerate}", co)
@@ -277,8 +374,7 @@
co, sep = "\n\n")
writeLines("\\end{enumerate}", co)
writeLines("\\end{document}", co)
- close(co)
- filename
+ con
} else {
retVal
}
[TRUNCATED]
To get the complete diff run:
svnlook diff /svnroot/dplr -r 921
More information about the Dplr-commits
mailing list