[Blotter-commits] r917 - pkg/FinancialInstrument/inst/parser
noreply at r-forge.r-project.org
noreply at r-forge.r-project.org
Mon Jan 30 23:54:04 CET 2012
Author: gsee
Date: 2012-01-30 23:54:04 +0100 (Mon, 30 Jan 2012)
New Revision: 917
Modified:
pkg/FinancialInstrument/inst/parser/TRTH_BackFill.R
Log:
patch header creation; use of metadata is optional.
Modified: pkg/FinancialInstrument/inst/parser/TRTH_BackFill.R
===================================================================
--- pkg/FinancialInstrument/inst/parser/TRTH_BackFill.R 2012-01-23 17:31:14 UTC (rev 916)
+++ pkg/FinancialInstrument/inst/parser/TRTH_BackFill.R 2012-01-30 22:54:04 UTC (rev 917)
@@ -32,6 +32,8 @@
#default_currency = 'USD' # passed to instrument.auto if type cannot be inferred from RIC
#digits.sec = 6 # for options(digits.secs=digits.secs)
#width = 200 # for options(width=width)
+#use.instrument = FALSE # If TRUE, non-defined instruments will be defined, and
+ # negative prices will be removed from non-synthetics
#instrument_file = [searches path.output for filename containing 'instruments'] #name of instrument envir RData file
#job.name = "" # the Reuters TRTH job name (by default all files not on disk)
#no.cores = 4 # number of cores for foreach
@@ -50,6 +52,7 @@
# path.output = '~/TRTH/',
# width = 200,
# digits.secs = 6,
+# use.instrument = FALSE,
# instrument_file = '~/TRTH/instruments.RData',
# username = 'email at domain.com',
# password = 'password',
@@ -59,7 +62,7 @@
# overwrite = FALSE,
# tick.image = TRUE,
# sec.image = TRUE,
-# no.cores = 20
+# no.cores = 20,
#)
#
#download_reut(.TRTH) # Download big zipped CSV
@@ -145,11 +148,19 @@
.TRTH$sec.image <- pickArg('sec.image', TRUE)
.TRTH$no.cores <- pickArg('no.cores', 4)
- instr.file.bak <- tail(list.files(path.output)[grep("instruments", list.files(path.output))], 1)
- .TRTH$instrument_file <- pickArg('instrument_file', instr.file.bak)
- if (length(.TRTH$instrument_file) == 0 || is.na(.TRTH$instrument_file) || !file.exists(.TRTH$instrument_file))
- stop("Please specify a valid filepath for instrument_file or move a file with 'instruments' in its name to 'path.output'")
+ .TRTH$use.instrument <- pickArg('use.instrument', FALSE)
+ # if `use.instrument` is TRUE, then the code will remove negative prices from
+ # instruments that are not of type='synthetic'. An instrument_file that contains an .instrument is required
+ # and will be loaded. If the RIC cannot be found in the .instrument environment, it will be auto defined
+ # using instrument.auto.
+ # Using use.instrument=TRUE can take a while.
+ if (isTRUE(.TRTH$use.instrument)) {
+ instr.file.bak <- tail(list.files(path.output)[grep("instruments", list.files(path.output))], 1)
+ .TRTH$instrument_file <- pickArg('instrument_file', instr.file.bak)
+ if (length(.TRTH$instrument_file) == 0 || is.na(.TRTH$instrument_file) || !file.exists(.TRTH$instrument_file))
+ stop("Please specify a valid filepath for instrument_file or move a file with 'instruments' in its name to 'path.output'")
+ }
registerDoMC(.TRTH$no.cores)
# registerDoSEQ()
@@ -287,7 +298,7 @@
} else .TRTH$instrument_file <- instrument_file
}
- loadInstruments(.TRTH$instrument_file)
+ if (isTRUE(.TRTH$use.instrument)) loadInstruments(.TRTH$instrument_file)
registerDoMC(.TRTH$no.cores)
## unzip and split (new unzip method does not require rezip; keeps original gz file)
@@ -315,33 +326,36 @@
# awk string says to make a file and put this row in it if the RIC or date are different than the previous row's RIC/date
print(paste('Making headers from', filename.csv))
system(paste('awk -v f2="" -F "," '," '",'{f1 = $1"."$2".csv";if(f1 != f2) { print >> f1; close(f2); f2=f1; } }',"' ",filename.csv, sep=""))
- }
+
+ tmpfiles <- list.files(.TRTH$archive_dir)
+ files.header <- tmpfiles[grep("RIC",tmpfiles)]
- tmpfiles <- list.files(.TRTH$archive_dir)
- files.header <- tmpfiles[grep("RIC",tmpfiles)]
- big.files <- tmpfiles[grep("@", tmpfiles)] #Big zipped CSVs from Reuters have e-mail address in name
- #big.files <- tmpfiles[grep(job.name, tmpfiles)]
- # csv files will be everthing that is not in "ignore" below
- # all these things we're ignoring should actually be in path.output, not here
- ignore <- c(big.files, files.header, 'NA', "Report",
- tmpfiles[grep("Tick2Sec|TRTH_config_file", tmpfiles)],
- tmpfiles[grep("\\.rda", tmpfiles)],
- tmpfiles[grep("\\.RData", tmpfiles)],
- tmpfiles[grep("missing_instruments", tmpfiles)]
- )
+ big.files <- tmpfiles[grep("@", tmpfiles)] #Big zipped CSVs from Reuters have e-mail address in name
+ #big.files <- tmpfiles[grep(job.name, tmpfiles)]
+ # csv files will be everthing that is not in "ignore" below
+ # all these things we're ignoring should actually be in path.output, not here
+ ignore <- c(big.files, files.header, 'NA', "Report",
+ tmpfiles[grep("Tick2Sec|TRTH_config_file", tmpfiles)],
+ tmpfiles[grep("\\.rda", tmpfiles)],
+ tmpfiles[grep("\\.RData", tmpfiles)],
+ tmpfiles[grep("missing_instruments", tmpfiles)]
+ )
+ .TRTH$files.csv <- tmpfiles[!tmpfiles %in% ignore]
+
+ # files.header now has several identical rows. Delete all but first by extracting first row, and overwritting file with it
+ system(paste('tail -1 "', files.header, '" > header.csv', sep="")) # extract 1st line
+ # head -1 "RIC.Date[G].csv" > header.csv
+ system(paste('mv header.csv "', files.header, '"', sep="")) # replace files.header with only 1st line
+ # mv header.csv "RIC.Date[G].csv"
- .TRTH$files.csv <- tmpfiles[!tmpfiles %in% ignore]
- # files.header now has several identical rows. Delete all but first by extracting first row, and overwritting file with it
- system(paste('head -1 "', files.header, '" > header.csv', sep="")) # extract 1st line
- # head -1 "RIC.Date[G].csv" > header.csv
- system(paste('mv header.csv "', files.header, '"', sep="")) # replace files.header with only 1st line
- # mv header.csv "RIC.Date[G].csv"
+ for (fl in .TRTH$files.csv) {
+ system(paste('cp "', files.header, '" ', paste(.TRTH$archive_dir, fl, sep=""), sep=""))
+ #cp "#RIC.Date[G].csv" /home/garrett/TRTH/archive/GEM1-U1.01-APR-2008.csv
+ }
- for (fl in .TRTH$files.csv) {
- system(paste('cp "', files.header, '" ', paste(.TRTH$archive_dir, fl, sep=""), sep=""))
- #cp "#RIC.Date[G].csv" /home/garrett/TRTH/archive/GEM1-U1.01-APR-2008.csv
}
+
for (j in 1:length(.TRTH$files.gz))
{
filename.gz <- .TRTH$files.gz[j]
@@ -404,57 +418,59 @@
.TRTH$files.xts <- files.xts
assign('.TRTH', .TRTH, pos=.GlobalEnv)
-
- missing_i <- NULL
- instr_s <- unique(files.xts[,'name.new'])
- alldefined <- unique(c(ls_instruments(), ls_instruments_by("X.RIC", NULL, in.slot='identifiers')))
- #FIXME: we really need a list of X.RICs, not a list of instrument_names that have X.RICs
- print(paste('Creating files.xts. No more than',
- length(instr_s[!instr_s %in% alldefined]),
- 'missing instruments will have to be defined'))
- missing_list <- list() # list to hold auto-defined missing instruments
- for(i in 1:length(instr_s)){
- instr <- getInstrument(instr_s[i], silent=TRUE)
- iauto <- NULL
- if(is.instrument(instr)){
- files.xts[files.xts$name.new ==instr_s[i],]$type <- paste(instr$type, collapse=";")
- } else {
- #NOTE: If we skip all this define-on-the-fly stuff, it would be much faster.
- pid <- try(parse_id(instr_s[i]))
- tmpid <- if(!inherits(pid, 'try-error')
- && !"" %in% c(pid$root, pid$suffix)) {
- paste(pid$root, pid$suffix, sep="_")
- } else if (!inherits(pid, 'try-error')) {
- pid$root
- } else instr_s[i]
- iauto <- instrument.auto(tmpid, currency=.TRTH$default_currency,
- default_type=.TRTH$default_type, identifiers=list(X.RIC=instr_s[i]), assign_i=FALSE)
- if (!is.instrument(iauto)) {
- warning(paste("Could NOT create ", .TRTH$default_type, " from ",
- instr_s[i], ". Creating _unknown_ instrument instead.", sep=""))
- iauto <- try(suppressWarnings(instrument.auto(instr_s[i], currency=.TRTH$default_currency,
- default_type="unknown", assign_i=FALSE)))
+
+ if (isTRUE(.TRTH$use.instrument)) {
+ missing_i <- NULL
+ instr_s <- unique(files.xts[,'name.new'])
+ alldefined <- unique(c(ls_instruments(), ls_instruments_by("X.RIC", NULL, in.slot='identifiers')))
+ #FIXME: we really need a list of X.RICs, not a list of instrument_names that have X.RICs
+ print(paste('Creating files.xts. No more than',
+ length(instr_s[!instr_s %in% alldefined]),
+ 'missing instruments will have to be defined'))
+ missing_list <- list() # list to hold auto-defined missing instruments
+ for(i in 1:length(instr_s)){
+ instr <- getInstrument(instr_s[i], silent=TRUE)
+ iauto <- NULL
+ if(is.instrument(instr)){
+ files.xts[files.xts$name.new ==instr_s[i],]$type <- paste(instr$type, collapse=";")
+ } else {
+ #NOTE: If we skip all this define-on-the-fly stuff, it would be much faster.
+ pid <- try(parse_id(instr_s[i]))
+ tmpid <- if(!inherits(pid, 'try-error')
+ && !"" %in% c(pid$root, pid$suffix)) {
+ paste(pid$root, pid$suffix, sep="_")
+ } else if (!inherits(pid, 'try-error')) {
+ pid$root
+ } else instr_s[i]
+ iauto <- instrument.auto(tmpid, currency=.TRTH$default_currency,
+ default_type=.TRTH$default_type, identifiers=list(X.RIC=instr_s[i]), assign_i=FALSE)
+ if (!is.instrument(iauto)) {
+ warning(paste("Could NOT create ", .TRTH$default_type, " from ",
+ instr_s[i], ". Creating _unknown_ instrument instead.", sep=""))
+ iauto <- try(suppressWarnings(instrument.auto(instr_s[i], currency=.TRTH$default_currency,
+ default_type="unknown", assign_i=FALSE)))
+ }
+ missing_list[[iauto$primary_id]] <- iauto
+ #assign(iauto$primary_id, iauto, pos=missing_i_envir)
+ files.xts[files.xts$name.new==instr_s[i],]$type <- paste(iauto$type, collapse=";")
+ missing_i <- c(missing_i, instr_s[i])
}
- missing_list[[iauto$primary_id]] <- iauto
- #assign(iauto$primary_id, iauto, pos=missing_i_envir)
- files.xts[files.xts$name.new==instr_s[i],]$type <- paste(iauto$type, collapse=";")
- missing_i <- c(missing_i, instr_s[i])
}
- }
-
- if (length(missing_list) > 0) {
- # Remove everything from .instrument, put back the auto-defined missing instruments and save them
- print("saving RData file with auto-defined missing instruments.")
- try(rm_instruments(), silent=TRUE)
- lapply(missing_list, function(x) {
- assign(x$primary_id, x, pos=FinancialInstrument:::.instrument)
- })
- saveInstruments(paste("missing_instr", format(Sys.time(), "%Y.%m.%d_%H%M%S"), sep='_'), .TRTH$path.output)
- # now that we've saved only the newly defined instruments, we can load back our other instruments
- loadInstruments(.TRTH$instrument_file)
- if (!is.null(iauto)) {
- .TRTH$missing_i <- missing_i <- data.frame(symbol=missing_i,type=iauto$type[1]) #legacy
- write.csv(missing_i,file=paste(.TRTH$path.output,'missing_instruments.CSV',sep=''))
+
+ if (length(missing_list) > 0) {
+ # Remove everything from .instrument, put back the auto-defined missing instruments and save them
+ print("saving RData file with auto-defined missing instruments.")
+ try(rm_instruments(), silent=TRUE)
+ lapply(missing_list, function(x) {
+ assign(x$primary_id, x, pos=FinancialInstrument:::.instrument)
+ })
+ saveInstruments(paste("missing_instr", format(Sys.time(), "%Y.%m.%d_%H%M%S"), sep='_'), .TRTH$path.output)
+ # now that we've saved only the newly defined instruments, we can load back our other instruments
+ loadInstruments(.TRTH$instrument_file)
+ if (!is.null(iauto)) {
+ .TRTH$missing_i <- missing_i <- data.frame(symbol=missing_i,type=iauto$type[1]) #legacy
+ write.csv(missing_i,file=paste(.TRTH$path.output,'missing_instruments.CSV',sep=''))
+ }
}
}
.TRTH$files.xts <- files.xts
@@ -538,7 +554,12 @@
#only zipped file on disk. We'll have to unzip.
system(paste("gzip -d -f ", CSV.name, ".gz", sep=""))
}
- Data <- read.csv(CSV.name, stringsAsFactors=FALSE,header=TRUE)
+ Data <- try(read.csv(CSV.name, stringsAsFactors=FALSE, header=TRUE))
+ if (inherits(Data, 'try-error')) {
+ Data <- read.csv(CSV.name, stringsAsFactors=FALSE, header=FALSE)
+ colnames(Data) <- make.names(Data[1, ])
+ Data <- Data[-1, ]
+ }
# Now that we've read the CSV, zip it and delete original to conserve disk space
# print(paste("zipping ", CSV.name, sep=""))
system(paste("gzip -f ", CSV.name, sep=""))
@@ -568,18 +589,20 @@
Data <- xts(Data,order.by=index.new,tz="GMT")
- ## Turn bids/offers that are less than zero into NA for outrights
- type <- try(unlist(strsplit(type, ";")))
- if (inherits(type, 'try-error')) {
- warning('type is incorrect. Using "synthetic"')
- type <- 'synthetic'
- }
- if(!any(c("unknown", "synthetic") %in% type))
- { #outrights
- Data$Bid.Price[Data$Bid.Price < 0, ] <- NA
- Data$Ask.Price[Data$Ask.Price < 0, ] <- NA
- Data$Price[Data$Price < 0, ] <- NA
- }
+ if (isTRUE(.TRTH$use.instrument)) {
+ ## Turn bids/offers that are less than zero into NA for outrights
+ type <- try(unlist(strsplit(type, ";")))
+ if (inherits(type, 'try-error')) {
+ warning('type is incorrect. Using "synthetic"')
+ type <- 'synthetic'
+ }
+ if(!any(c("unknown", "synthetic") %in% type))
+ { #outrights
+ Data$Bid.Price[Data$Bid.Price < 0, ] <- NA
+ Data$Ask.Price[Data$Ask.Price < 0, ] <- NA
+ Data$Price[Data$Price < 0, ] <- NA
+ }
+ }
## If Bid.Price and Bid.Size are zero set both to NA
zero.replace <- which(Data$Bid.Price == 0 & Data$Bid.Size == 0)
More information about the Blotter-commits
mailing list