[Dplr-commits] r918 - in pkg/dplR: . R inst/doc inst/unitTests man vignettes

noreply at r-forge.r-project.org noreply at r-forge.r-project.org
Thu Nov 20 15:26:08 CET 2014


Author: mvkorpel
Date: 2014-11-20 15:26:07 +0100 (Thu, 20 Nov 2014)
New Revision: 918

Modified:
   pkg/dplR/DESCRIPTION
   pkg/dplR/R/latexify.R
   pkg/dplR/inst/doc/math-dplR.Rnw.txt
   pkg/dplR/inst/doc/math-dplR.pdf
   pkg/dplR/inst/unitTests/runit.utils.R
   pkg/dplR/man/latexify.Rd
   pkg/dplR/vignettes/dplR.sty
Log:
* Improved latexify() and its test function, particularly
  w.r.t. handling of quotes.  It is now possible to produce a test
  LaTeX document by manually running the test function
  test.latexify(testDocument=TRUE).

* Because latexify() can now produce straight single quotes when the
  'textcomp' LaTeX package is loaded (which is the default assumption
  in latexify()), the package is now included in dplR.sty and the
  pseudo vignette "math-dplR"


Modified: pkg/dplR/DESCRIPTION
===================================================================
--- pkg/dplR/DESCRIPTION	2014-11-19 18:35:43 UTC (rev 917)
+++ pkg/dplR/DESCRIPTION	2014-11-20 14:26:07 UTC (rev 918)
@@ -3,7 +3,7 @@
 Type: Package
 Title: Dendrochronology Program Library in R
 Version: 1.6.1
-Date: 2014-11-17
+Date: 2014-11-20
 Authors at R: c(person("Andy", "Bunn", role = c("aut", "cph",
         "cre", "trl"), email = "andy.bunn at wwu.edu"), person("Mikko",
         "Korpela", role = c("aut", "trl")), person("Franco", "Biondi",

Modified: pkg/dplR/R/latexify.R
===================================================================
--- pkg/dplR/R/latexify.R	2014-11-19 18:35:43 UTC (rev 917)
+++ pkg/dplR/R/latexify.R	2014-11-20 14:26:07 UTC (rev 918)
@@ -21,7 +21,9 @@
 ##
 ## It seems that Sweave needs doublebackslash = TRUE
 ## but knitr needs doublebackslash = FALSE.
-latexify <- function(x, doublebackslash=TRUE) {
+latexify <- function(x, doublebackslash = TRUE,
+                     quotes = c("straight", "curved"),
+                     packages = c("fontenc", "textcomp")) {
     y <- as.character(x)
     ## Kludge for converting from "byte" to the current encoding
     ## in a way which preserves the hex notation.
@@ -37,6 +39,9 @@
         cat(y[encBytes], sep = "\n")
         y[encBytes] <- foo
     }
+    fontenc <- "fontenc" %in% packages
+    textcomp <- "textcomp" %in% packages
+    straightQuotes <- match.arg(quotes) == "straight"
     ## Remove control characters (not spaces!)
     y <- gsub("(?![[:space:]])[[:cntrl:]]", "", y, perl=TRUE)
     ## Convert any sequence of whitespace to a single space.  This
@@ -56,17 +61,43 @@
     substitutions <-
         list(c("\\{", "\\\\{"),
              c("\\}", "\\\\}"),
-             c("\\\\(?!(\\{|\\}))", "\\\\textbackslash{}"),
+             c("\\\\(?!\\{|\\})", "\\\\textbackslash{}"),
              c("\\#", "\\\\#"),
              c("\\$", "\\\\$"),
              c("%", "\\\\%"),
-             c("\\^", "\\\\^{}"),
+             c("\\^", "\\\\textasciicircum{}"),
              c("&", "\\\\&"),
              c("_", "\\\\_"),
-             c("~", "\\\\~{}"),
-             c('"', "\\\\textquotedbl{}"),
+             c("~", "\\\\textasciitilde{}"),
+             if (textcomp && straightQuotes) {
+                 c("'", "\\\\textquotesingle{}")
+             },
+             c('"', if (fontenc && straightQuotes) {
+                 "\\\\textquotedbl{}"
+             } else {
+                 "\\\\textquotedblright{}"
+             }),
              c("/", "\\\\slash{}"))
-    for (subst in substitutions) {
+    if (isTRUE(l10n_info()[["MBCS"]])) {
+        ## The output of sQuote() and dQuote() may contain
+        ## non-ASCII quoting characters.  If the input is ASCII,
+        ## it may be a surprise to the user that an UTF-8 input
+        ## encoding is then needed in LaTeX.  Converting the
+        ## quotes to commands solves this problem.
+        substitutions <-
+            c(substitutions,
+              list(c("\u2018", "\\\\textquoteleft{}"),
+                   c("\u2019", "\\\\textquoteright{}"),
+                   c("\u201c", "\\\\textquotedblleft{}"),
+                   c("\u201d", "\\\\textquotedblright{}")))
+    }
+    ## Remove empty group after command when followed by a backslash
+    Letters <- paste(c(LETTERS, letters), collapse="")
+    substitutions <- c(substitutions,
+                       list(c(sprintf("(\\\\[%s]+)\\{\\}(?=\\\\)",
+                                      Letters), "\\1")))
+
+    for (subst in substitutions[!vapply(substitutions, is.null, logical(1))]) {
         y <- gsub(subst[1], subst[2], y, perl = TRUE)
     }
     if (isTRUE(doublebackslash)) {

Modified: pkg/dplR/inst/doc/math-dplR.Rnw.txt
===================================================================
--- pkg/dplR/inst/doc/math-dplR.Rnw.txt	2014-11-19 18:35:43 UTC (rev 917)
+++ pkg/dplR/inst/doc/math-dplR.Rnw.txt	2014-11-20 14:26:07 UTC (rev 918)
@@ -65,6 +65,7 @@
 @ 
 
 \usepackage[T1]{fontenc}
+\usepackage{textcomp}
 \usepackage[utf8]{inputenx}
 \usepackage[english]{babel}
 \usepackage{amsmath}

Modified: pkg/dplR/inst/doc/math-dplR.pdf
===================================================================
(Binary files differ)

Modified: pkg/dplR/inst/unitTests/runit.utils.R
===================================================================
--- pkg/dplR/inst/unitTests/runit.utils.R	2014-11-19 18:35:43 UTC (rev 917)
+++ pkg/dplR/inst/unitTests/runit.utils.R	2014-11-20 14:26:07 UTC (rev 918)
@@ -28,18 +28,27 @@
               msg="IDs have a restricted character (4/16 choices) in one position")
 }
 
-test.latexify <- function() {
+test.latexify <- function(testDocument=FALSE) {
     ## Number of test strings
     ## (including one "extra difficult" case and one empty string)
     SAMP.SIZE <- 50
     stopifnot(SAMP.SIZE >= 2)
     MIN.LENGTH <- 1
     MAX.LENGTH <- 1000
+    MAX.NCHAR <- 20 # maximum length of a "word"
     ## All ASCII characters except NUL (0)
     characters <- rawToChar(as.raw(1:127), multiple = TRUE)
-    ## LaTeX special characters must be converted to commands
+    ## LaTeX special characters.  Some of these must be converted to
+    ## commands.  Others (single and double quote) are converted to
+    ## commands or other characters for improved compatibility with
+    ## other packages or to get a particular glyph (upright quote)
+    ## instead of the default (curved quote).
+    ##
+    ## NOTE that the handling (what kind of treatment if any) of some
+    ## characters (currently single quote) depends on the (default)
+    ## arguments of latexify().
     specialChars <-
-        c("{", "}", "\\", "#", "$", "%", "^", "&", "_", "~", "\"", "/")
+        c("{", "}", "\\", "#", "$", "%", "^", "&", "_", "~", "\"", "/", "'")
     specialStr <- paste(specialChars, collapse="")
     ## latexify() is designed to convert any sequence of space
     ## characters to a single regular space
@@ -57,12 +66,15 @@
     ## * The other elements consist of a random sample of characters.
     strStop <- cumsum(stringLengths)
     strStart <- strStop - (stringLengths - 1)
+    nSpaces <- round(0.2 * nTotal) # In addition to spaces in 'characters'
     testStrings <-
-        c(substring(paste(sample(rep(characters, length.out = nTotal)),
+        c(substring(paste(sample(c(rep(characters,
+                                       length.out = nTotal - nSpaces),
+                                   rep(" ", nSpaces))),
                           collapse=""), strStart, strStop),
-          paste(c(specialChars,
-                  rev(specialChars),
-                  rep(specialChars, each=3),
+          paste(c(specialChars, " ",
+                  rev(specialChars), " ",
+                  unlist(lapply(lapply(specialChars,rep,3), c, " ")),
                   paste(specialChars, " \t")),
                 collapse=""),
           "")
@@ -85,11 +97,11 @@
               msg="No line breaks (double backslash)")
     Letters <- paste(c(LETTERS, letters), collapse="")
     textCommand <- sprintf("\\\\[%s]+", Letters)
-    commandAndGroup <- paste(textCommand, "\\{\\}", sep="")
-    commandsAt <- gregexpr(commandAndGroup, ltxSingle)
+    commandAndGroup <- paste(textCommand, "(\\{\\}|(?=\\\\))", sep="")
+    commandsAt <- gregexpr(commandAndGroup, ltxSingle, perl=TRUE)
     checkEquals(lapply(gregexpr(textCommand, ltxSingle), as.vector),
                 lapply(commandsAt, as.vector),
-                msg="Command name is terminated with empty group")
+                msg="Command name is followed by empty group or backslash")
     escape <- sprintf("\\\\[^%s](\\{\\})?", Letters)
     escapesAt <- gregexpr(escape, ltxSingle)
 
@@ -141,7 +153,9 @@
             ## a space between words
             strStart <- which(diff(c(0, charIdx)) > 0)
             strStop <- c(strStart[-1] - 1, nChars)
-            ltxChars <- substring(ltxSingle[i], strStart, strStop)
+            ## Strip off empty group following a command
+            ltxChars <- sub(sprintf("([%s])\\{\\}$", Letters), "\\1",
+                            substring(ltxSingle[i], strStart, strStop))
         } else {
             ltxChars <- character(0)
         }
@@ -180,7 +194,7 @@
                        tolerance=0)
     checkTrue(all(specialChars %in% specialMap[, 1]),
               msg="Each special character has a mapping")
-    ## A separate test for encoding conversion
+    ## A test for encoding conversion
     latin1String <- "clich\xe9 ma\xf1ana"
     Encoding(latin1String) <- "latin1"
     utf8fy <- latexify(latin1String)
@@ -190,6 +204,84 @@
                          0x6d, 0x61, 0xc3, 0xb1, 0x61, 0x6e, 0x61)),
                 charToRaw(utf8fy),
                 msg="Conversion to UTF-8 NFC succeeded")
+    ## A test for other than default quoting options
+    quoteString <- "\"It's five o'clock\", he said."
+    res1 <- latexify(quoteString, doublebackslash=FALSE)
+    res2 <- latexify(quoteString, quotes="curved", doublebackslash=FALSE)
+    res3 <- latexify(quoteString, packages=character(0), doublebackslash=FALSE)
+    res4 <- latexify(quoteString, packages="fontenc", doublebackslash=FALSE)
+    res5 <- latexify(quoteString, packages="textcomp", doublebackslash=FALSE)
+    exp2 <- gsub("\"", "\\\\textquotedblright{}", quoteString)
+    exp4 <- gsub("\"", "\\\\textquotedbl{}", quoteString)
+    exp5 <- gsub("'", "\\\\textquotesingle{}", exp2)
+    exp1 <- gsub("'", "\\\\textquotesingle{}", exp4)
+    checkEquals(exp1, res1, msg="Default straight quotes")
+    checkEquals(exp2, res2, msg="Curved quotes")
+    checkEquals(res2, res3, msg="Fallback to curved quotes")
+    checkEquals(exp4, res4, msg="Fallback to curved single quotes")
+    retVal <- checkEquals(exp5, res5, msg="Fallback to curved double quotes")
+    ## Check that non-ASCII quotes used by dQuote() and sQuote() are
+    ## converted to LaTeX commands
+    if (isTRUE(l10n_info()[["MBCS"]])) {
+        nestQuotes <- paste0("You said, \u201cHe said, ",
+                             "\u2018Have a nice day.\u2019\u201d")
+        nq <- latexify(nestQuotes, doublebackslash=FALSE)
+        retVal <-
+            checkEquals(gsub("\\{\\}(?=\\\\)", "",
+                             gsub("\u2018", "\\\\textquoteleft{}",
+                                  gsub("\u2019", "\\\\textquoteright{}",
+                                       gsub("\u201c", "\\\\textquotedblleft{}",
+                                            gsub("\u201d",
+                                                 "\\\\textquotedblright{}",
+                                                 nestQuotes)))),
+                             perl=TRUE),
+                        nq)
+    }
+    ## When used independently outside the test suite, the function
+    ## can create a test document
+    if (isTRUE(testDocument) && isTRUE(l10n_info()[["UTF-8"]])) {
+        preamble <- c("\\documentclass[a4paper]{article}",
+                      "\\usepackage[T1]{fontenc}",
+                      "\\usepackage{textcomp}",
+                      "\\usepackage{parskip}",
+                      "\\usepackage[utf8]{inputenx}",
+                      "\\input{ix-utf8enc.dfu}")
+        id <- c(testStrings, latin1String, rep(quoteString, 5), nestQuotes)
+        extraInfo <- c(rep("", length(testStrings) + 1),
+                       paste0(" (", c("default", "curved", "no packages",
+                                      "only fontenc", "only textcomp"), ")"),
+                       "")
+
+        ## Record how R prints inputDescription
+        inputDescription <- character(length(id)) # dummy line
+        tc <- textConnection("inputDescription", "w", local = TRUE)
+        sink(tc)
+        on.exit(sink())
+        on.exit(close(tc), add = TRUE)
+        for (i in seq_along(id)) {
+            print(id[i])
+        }
+        sink()
+        close(tc)
+        on.exit()
+
+        allOutput <- c(ltxSingle, utf8fy, res1, res2, res3, res4, res5, nq)
+        filename <- tempfile(pattern = "latexify", fileext = ".tex")
+        co <- file(filename, open = "wt", encoding = "UTF-8")
+        writeLines(preamble, co)
+        writeLines("\\begin{document}", co)
+        writeLines("\\begin{enumerate}", co)
+        writeLines(paste0("% ", inputDescription, extraInfo, "\n",
+                          "\\item\\relax ", allOutput,
+                          "% ", seq_along(allOutput)),
+                   co, sep = "\n\n")
+        writeLines("\\end{enumerate}", co)
+        writeLines("\\end{document}", co)
+        close(co)
+        filename
+    } else {
+        retVal
+    }
 }
 
 test.latexDate <- function() {

Modified: pkg/dplR/man/latexify.Rd
===================================================================
--- pkg/dplR/man/latexify.Rd	2014-11-19 18:35:43 UTC (rev 917)
+++ pkg/dplR/man/latexify.Rd	2014-11-20 14:26:07 UTC (rev 918)
@@ -10,13 +10,30 @@
   used together with \samp{\Sexpr} in vignettes.
 }
 \usage{
-latexify(x, doublebackslash = TRUE)
+latexify(x, doublebackslash = TRUE,
+         quotes = c("straight", "curved"),
+         packages = c("fontenc", "textcomp"))
 }
 \arguments{
   \item{x}{ a \code{character} vector }
   \item{doublebackslash}{ a \code{logical} flag.  If \code{TRUE},
     backslashes in the output are doubled.  It seems that Sweave needs
     \code{TRUE} and knitr \code{FALSE}. }
+  \item{quotes}{ a \code{character} string specifying how single and
+    double quotes (ASCII codes 39 and 34) in the input are
+    treated.  The default is to use straight quotes.  The other option
+    is to use curved right side (closing) quotes.  Straight double
+    quotes are not available in the default OT1 font encoding of LaTeX.
+    Straight single quotes require the \dQuote{textcomp} package.  See
+    \code{\var{packages}}. }
+  \item{packages}{ a \code{character} vector specifying the LaTeX
+    packages allowed.  The use of some symbols in LaTeX requires
+    commands or characters made available by an add-on package.  If a
+    package required for a given character is not marked as available, a
+    fallback solution is silently used.  For example, curved quotes may
+    be used instead of straight quotes.  Including \code{"fontenc"} in
+    the vector means that some other encoding than OT1 is going to be
+    used.  }
 }
 \details{
 
@@ -38,11 +55,18 @@
   This set includes tabs, newlines and control characters.  The
   substitutions are then applied to the intermediate result.
 
-  The result is converted to UTF-8 encoding, Normalization Form C
-  (NFC).
+  The quoting functions \code{\link{sQuote}} and \code{\link{dQuote}} may
+  use non-ASCII quote characters, depending on the locale.  Also these
+  quotes are converted to LaTeX commands.  This means that the quoting
+  functions are safe to use with any LaTeX input encoding.
 
+  The result is converted to UTF-8 encoding, Normalization Form C (NFC).
+  Note that this function will not add any non-ASCII characters that
+  were not already present in the input.
+
   Assuming that \samp{pdflatex} is used for compilation, suggested
-  package loading commands in the document preamble are: \preformatted{\usepackage[T1]{fontenc}    \% required for "
+  package loading commands in the document preamble are: \preformatted{\usepackage[T1]{fontenc}    \% no '"' in OT1 font encoding
+\usepackage{textcomp}       \% some symbols e.g. straight single quote
 \usepackage[utf8]{inputenx} \% UTF-8 input encoding
 \input{ix-utf8enc.dfu}      \% more supported characters} 
   

Modified: pkg/dplR/vignettes/dplR.sty
===================================================================
--- pkg/dplR/vignettes/dplR.sty	2014-11-19 18:35:43 UTC (rev 917)
+++ pkg/dplR/vignettes/dplR.sty	2014-11-20 14:26:07 UTC (rev 918)
@@ -1,11 +1,12 @@
 % This file is part of dplR: Dendrochronology Program Library in R.
 % Written by Andy Bunn and Mikko Korpela.
 \NeedsTeXFormat{LaTeX2e}
-\ProvidesPackage{dplR}[2014/04/11 Package for dplR vignettes]
+\ProvidesPackage{dplR}[2014/11/20 Package for dplR vignettes]
 
 \RequirePackage{amsmath}
 \RequirePackage{amssymb}
 \RequirePackage[T1]{fontenc}
+\RequirePackage{textcomp}
 \RequirePackage[english]{babel}[2000/01/28]
 \RequirePackage{booktabs}
 \RequirePackage{Sweave}



More information about the Dplr-commits mailing list