[Rcpp-devel] Regular Expressions
Dirk Eddelbuettel
edd at debian.org
Sun Mar 3 03:21:26 CET 2013
Gabor,
Here is a completely new, second variant of the same example, this time
implemented using only inline with a custom plugin. This should have what
you need.
Code first:
-----------------------------------------------------------------------------
edd at max:/tmp$ cat boostregex.R
library(inline)
## NB double backslashes expanded to four backslashes because of quoting :-/
inctxt <- '
#include <Rcpp.h>
#include <string>
#include <boost/regex.hpp>
bool validate_card_format(const std::string& s) {
static const boost::regex e("(\\\\d{4}[- ]){3}\\\\d{4}");
return boost::regex_match(s, e);
}
const boost::regex e("\\\\A(\\\\d{3,4})[- ]?(\\\\d{4})[- ]?(\\\\d{4})[- ]?(\\\\d{4})\\\\z");
const std::string machine_format("\\\\1\\\\2\\\\3\\\\4");
const std::string human_format("\\\\1-\\\\2-\\\\3-\\\\4");
std::string machine_readable_card_number(const std::string& s) {
return boost::regex_replace(s, e, machine_format, boost::match_default | boost::format_sed);
}
std::string human_readable_card_number(const std::string& s) {
return boost::regex_replace(s, e, human_format, boost::match_default | boost::format_sed);
}
'
srctxt <- '
std::vector<std::string> s = Rcpp::as<std::vector<std::string> >(sx);
int n = s.size();
std::vector<bool> valid(n);
std::vector<std::string> machine(n);
std::vector<std::string> human(n);
for (int i=0; i<n; i++) {
valid[i] = validate_card_format(s[i]);
machine[i] = machine_readable_card_number(s[i]);
human[i] = human_readable_card_number(s[i]);
}
return Rcpp::DataFrame::create(Rcpp::Named("input") = s,
Rcpp::Named("valid") = valid,
Rcpp::Named("machine") = machine,
Rcpp::Named("human") = human);
'
plug <- Rcpp:::Rcpp.plugin.maker(
include.before = "#include <boost/regex.hpp>",
libs = paste("-L/usr/local/lib/R/site-library/Rcpp/lib -lRcpp",
"-Wl,-rpath,/usr/local/lib/R/site-library/Rcpp/lib",
"-L/usr/lib -lboost_regex -lm"))
registerPlugin("boostDemo", plug )
regexDemo <- cxxfunction(signature(sx="CharVec"), body=srctxt, incl=inctxt, plugin="boostDemo", verbose=TRUE)
s <- c("0000111122223333", "0000 1111 2222 3333", "0000-1111-2222-3333", "000-1111-2222-3333")
regexDemo(s)
edd at max:/tmp$
-----------------------------------------------------------------------------
Output in verbose mode:
-----------------------------------------------------------------------------
edd at max:/tmp$
edd at max:/tmp$ Rscript boostregex.R
Loading required package: methods
>> setting environment variables:
PKG_LIBS = -L/usr/local/lib/R/site-library/Rcpp/lib -lRcpp -Wl,-rpath,/usr/local/lib/R/site-library/Rcpp/lib -L/usr/lib -lboost_regex -lm -L/usr/local/lib/R/site-library/Rcpp/lib -lRcpp -Wl,-rpath,/usr/local/lib/R/site-library/Rcpp/lib
>> LinkingTo : Rcpp
CLINK_CPPFLAGS = -I"/usr/local/lib/R/site-library/Rcpp/include"
>> Program source :
1 :
2 : // includes from the plugin
3 : #include <boost/regex.hpp>
4 : #include <Rcpp.h>
5 :
6 :
7 : #ifndef BEGIN_RCPP
8 : #define BEGIN_RCPP
9 : #endif
10 :
11 : #ifndef END_RCPP
12 : #define END_RCPP
13 : #endif
14 :
15 : using namespace Rcpp;
16 :
17 :
18 : // user includes
19 :
20 : #include <Rcpp.h>
21 : #include <string>
22 : #include <boost/regex.hpp>
23 :
24 : bool validate_card_format(const std::string& s) {
25 : static const boost::regex e("(\\d{4}[- ]){3}\\d{4}");
26 : return boost::regex_match(s, e);
27 : }
28 :
29 : const boost::regex e("\\A(\\d{3,4})[- ]?(\\d{4})[- ]?(\\d{4})[- ]?(\\d{4})\\z");
30 : const std::string machine_format("\\1\\2\\3\\4");
31 : const std::string human_format("\\1-\\2-\\3-\\4");
32 :
33 : std::string machine_readable_card_number(const std::string& s) {
34 : return boost::regex_replace(s, e, machine_format, boost::match_default | boost::format_sed);
35 : }
36 :
37 : std::string human_readable_card_number(const std::string& s) {
38 : return boost::regex_replace(s, e, human_format, boost::match_default | boost::format_sed);
39 : }
40 :
41 :
42 : // declarations
43 : extern "C" {
44 : SEXP file13316a634edf( SEXP sx) ;
45 : }
46 :
47 : // definition
48 :
49 : SEXP file13316a634edf( SEXP sx ){
50 : BEGIN_RCPP
51 :
52 : std::vector<std::string> s = Rcpp::as<std::vector<std::string> >(sx);
53 : int n = s.size();
54 :
55 : std::vector<bool> valid(n);
56 : std::vector<std::string> machine(n);
57 : std::vector<std::string> human(n);
58 :
59 : for (int i=0; i<n; i++) {
60 : valid[i] = validate_card_format(s[i]);
61 : machine[i] = machine_readable_card_number(s[i]);
62 : human[i] = human_readable_card_number(s[i]);
63 : }
64 : return Rcpp::DataFrame::create(Rcpp::Named("input") = s,
65 : Rcpp::Named("valid") = valid,
66 : Rcpp::Named("machine") = machine,
67 : Rcpp::Named("human") = human);
68 :
69 : END_RCPP
70 : }
71 :
72 :
Compilation argument:
/usr/lib/R/bin/R CMD SHLIB file13316a634edf.cpp 2> file13316a634edf.cpp.err.txt
ccache g++-4.7 -I/usr/share/R/include -DNDEBUG -I"/usr/local/lib/R/site-library/Rcpp/include" -fpic -g0 -O3 -Wall -pipe -Wno-variadic-macros -pedantic -c file13316a634edf.cpp -o file13316a634edf.o
g++-4.7 -shared -o file13316a634edf.so file13316a634edf.o -L/usr/local/lib/R/site-library/Rcpp/lib -lRcpp -Wl,-rpath,/usr/local/lib/R/site-library/Rcpp/lib -L/usr/lib -lboost_regex -lm -L/usr/local/lib/R/site-library/Rcpp/lib -lRcpp -Wl,-rpath,/usr/local/lib/R/site-library/Rcpp/lib -L/usr/lib/R/lib -lR
input valid machine human
1 0000111122223333 FALSE 0000111122223333 0000-1111-2222-3333
2 0000 1111 2222 3333 TRUE 0000111122223333 0000-1111-2222-3333
3 0000-1111-2222-3333 TRUE 0000111122223333 0000-1111-2222-3333
4 000-1111-2222-3333 FALSE 000111122223333 000-1111-2222-3333
edd at max:/tmp$
-----------------------------------------------------------------------------
You should be able to adapt this on Windows. Keeping my fingers crossed...
Dirk
--
Dirk Eddelbuettel | edd at debian.org | http://dirk.eddelbuettel.com
More information about the Rcpp-devel
mailing list