[Blotter-commits] r1000 - pkg/FinancialInstrument/inst/parser

noreply at r-forge.r-project.org noreply at r-forge.r-project.org
Mon Apr 9 15:54:16 CEST 2012


Author: gsee
Date: 2012-04-09 15:54:16 +0200 (Mon, 09 Apr 2012)
New Revision: 1000

Added:
   pkg/FinancialInstrument/inst/parser/download.TrueFX.R
Log:
 add script to download FREE historical bid/ask TICK data for 15 spot FX rates

Added: pkg/FinancialInstrument/inst/parser/download.TrueFX.R
===================================================================
--- pkg/FinancialInstrument/inst/parser/download.TrueFX.R	                        (rev 0)
+++ pkg/FinancialInstrument/inst/parser/download.TrueFX.R	2012-04-09 13:54:16 UTC (rev 1000)
@@ -0,0 +1,192 @@
+
+# Download Bid/Ask tick data for 15 FX pairs since May 2009 from TrueFX.com
+#
+# Garrett See 
+#
+# You probably _should_ sign up for a FREE account with TrueFX before 
+# downloading their data, although presently (2012-04), it works without an 
+# account.
+#
+# Data will be downloaded, unzipped, converted to xts, and saved as both
+# tick data and 1 second frequency data.  Then it is saved by day.  
+# seealso ?saveSymbols.days
+#
+# 3 directories will be created underneath base_dir: "archive", "tick", "sec"
+#  - The archive directory will contain the original zip files that each contain
+#    one month of tick data for a single FX pair
+#  - The tick directory will contain a subdirectory for each Symbol.  Each 
+#    subdirectory will contain an RData file for each day containing tick data
+#    for that Symbol on that day.
+#  - The sec directory will look the same as the tick directory, except the
+#    data will be converted to 1 second snapshots before saving.
+#
+# There is a lot of data here; be patient.  
+#
+# compressed data from May 2009 through March 2012 will use 19G of disk space
+# See the very end of this script for a breakdown of disk space used.
+#
+## NOTE: This script requires R 2.15.0 for the paste0 function.  
+## NOTE: Not intended to be used on Windows.
+
+require(FinancialInstrument)
+require(foreach)
+require(doMC)
+
+################################################################################
+# Begin User Parameters #
+#########################
+base_dir <- "~/truefx/"
+## cores should not be greater than the number of gigs of available memory.
+registerDoMC(cores=4) # Can replace with a different registerDo* function
+Symbols <- c("AUDJPY", "AUDNZD", "AUDUSD", "CADJPY", "CHFJPY", "EURCHF", 
+             "EURGBP", "EURJPY", "EURUSD", "GBPJPY", "GBPUSD", "NZDUSD",
+             "USDCAD", "USDCHF", "USDJPY")
+             
+## yyyymm should be a chr vector of years and months formatted YYYYMM
+## The data are stored by month at truefx.com
+
+#yyyymm <- c("201201", "201202", "201203")
+
+curr.year <- as.numeric(format(Sys.Date(), "%Y"))
+# get all the months in the current year that have already passed
+curr.yyyymm <- paste0(curr.year, 
+    sprintf("%02d", 1:(match(months(Sys.Date()), month.name) - 1)))
+# Now combine the months from this year with the older year/months available
+yyyymm <- sort(c(curr.yyyymm,
+                 outer(2010:(curr.year - 1), sprintf("%02d", 1:12), paste0), 
+                 paste0("2009", sprintf("%02d", 5:12))), decreasing=TRUE)
+#######################
+# End User Parameters #
+################################################################################
+
+if (Sys.info()[["sysname"]] == "Windows") {
+  warning(paste0('This script has only been tested on linux and mac.'))
+}
+
+# if base_dir doesn't end with a forward slash, add a forward slash at the end
+if (substr(base_dir, nchar(base_dir), nchar(base_dir)) != "/") {
+  base_dir <- paste0(base_dir, "/")
+}
+
+# Create base_dir if it doesn't already exist as well as 3 subdirectories
+dir.create(base_dir, mode="0755", showWarnings=FALSE)
+dir.create(archive_dir <- paste0(base_dir, "archive/"), mode="0755", 
+           showWarnings=FALSE)
+dir.create(tick_dir <- paste0(base_dir, "tick/"), mode="0755", 
+           showWarnings=FALSE)
+dir.create(sec_dir <- paste0(base_dir, "sec/"), mode="0755", 
+           showWarnings=FALSE)
+
+# set some options
+if (is.null(getOption("digits.secs"))) options(digits.secs=6)
+oldTZ <- Sys.getenv("TZ")
+Sys.setenv(TZ='GMT') # data is stored in GMT
+oldwd <- getwd()
+setwd(archive_dir)
+
+# convert "2011 09", "2011-09", or "2011/09" to "201109"
+yyyymm <- substr(gsub("[ -/]", "", as.character(yyyymm)), 1, 6)
+
+# for each of the 15 pairs, download data for each of the yyyymm months.
+foreach(ym = yyyymm) %:% foreach(Symbol = Symbols) %dopar% {
+  cat(Symbol, ym, "\n")
+  yyyy <- substr(ym, 1, 4)
+  mm <- substr(ym, 5, 6)
+  zf.name <- paste0("http://www.truefx.com/dev/data/", yyyy, "/", 
+                    toupper(month.name[as.numeric(mm)]), "-", yyyy, "/", 
+                    Symbol, "-", yyyy, "-", mm, ".zip")
+  file.create(fl <- paste0(archive_dir, Symbol, "-", yyyy, "-", mm, ".zip"))
+  cat("downloading ", zf.name, "\n")
+  download.file(zf.name, destfile=fl)
+  cat("unzipping ", zf.name, "\n")
+  uzf <- unzip(fl)
+  cat("reading ", uzf, '\n')
+  cat(system.time(fr <- read.csv(uzf, header=FALSE, stringsAsFactors=FALSE)), 
+      "\n")
+  unlink(uzf)
+  id <- sub("/", "", fr[1, 1])
+  cat("making index for ", id, "\n")
+  idx <- as.POSIXct(fr[, 2], format="%Y%m%d %H:%M:%OS", tz="GMT")
+  obj <- xts(fr[, 3:4], idx)
+  #colnames(obj) <- paste(id, c("Bid.Price", "Ask.Price"), sep=".")
+  colnames(obj) <- c("Bid.Price", "Ask.Price")
+  tmpenv <- new.env()
+  assign(id, obj, tmpenv)
+  cat("saving ", id, " tick\n")
+  saveSymbols.days(id, base_dir=tick_dir, extension="RData", env=tmpenv)
+  assign(id, align.time(to.period(obj, "seconds", name=id, OHLC=FALSE), 1), 
+         tmpenv)
+  cat("saving ", id, " sec\n\n")
+  saveSymbols.days(id, base_dir=sec_dir, extension="RData", env=tmpenv)
+  rm("tmpenv", "obj", "fr")
+}
+# Restore previous settings
+Sys.setenv(TZ=oldTZ)
+setwd(oldwd)
+
+## Now you should be able to
+# getSymbols(Symbols, src='FI', dir=sec_dir, extension='RData', 
+#            split_method='days', from='2009-05-01', days_to_omit="Saturday")
+# or 
+# getSymbols(Symbols, src='FI', dir=tick_dir, extension='RData', 
+#            split_method='days', from='2009-05-01', days_to_omit="Saturday")
+
+
+
+### Define instruments
+#source("~/svn/blotter/pkg/FinancialInstrument/inst/parser/ISO.currencies.wiki.R")
+#ccy <- unique(c(sapply(Symbols, substr, 1, 3), sapply(Symbols, substr, 4, 6)))
+#
+##rm_instruments()
+##rm_currencies()
+#define_currencies.wiki(ccy)
+#fx <- exchange_rate(Symbols)
+#lapply(fx, function(x) {
+#  add.identifier(x, local=paste(substr(x, 1, 3), substr(x, 4, 6), sep="."))
+#})
+#
+#lapply(fx, instrument_attr, "exchange", "TrueFX")
+#lapply(fx, instrument_attr, "indexTZ", "GMT")
+#lapply(fx, instrument_attr, "updated", Sys.time())
+#
+#saveInstruments("TrueFXinstruments.RData", dir=base_dir)
+  
+################################################################################
+## Disk usage after downloading data for all Symbols from 200905 through 201203
+#
+# $ du -h truefx
+# 8.7G    truefx/archive
+# 147M    truefx/sec/AUDJPY
+# 128M    truefx/sec/AUDNZD
+# 131M    truefx/sec/AUDUSD
+# 139M    truefx/sec/CADJPY
+# 140M    truefx/sec/CHFJPY
+# 120M    truefx/sec/EURCHF
+# 143M    truefx/sec/EURGBP
+# 150M    truefx/sec/EURJPY
+# 156M    truefx/sec/EURUSD
+# 148M    truefx/sec/GBPJPY
+# 144M    truefx/sec/GBPUSD
+# 77M     truefx/sec/NZDUSD
+# 99M     truefx/sec/USDCAD
+# 139M    truefx/sec/USDCHF
+# 98M     truefx/sec/USDJPY
+# 2.0G    truefx/sec
+# 670M    truefx/tick/AUDJPY
+# 522M    truefx/tick/AUDNZD
+# 538M    truefx/tick/AUDUSD
+# 626M    truefx/tick/CADJPY
+# 641M    truefx/tick/CHFJPY
+# 478M    truefx/tick/EURCHF
+# 626M    truefx/tick/EURGBP
+# 647M    truefx/tick/EURJPY
+# 866M    truefx/tick/EURUSD
+# 642M    truefx/tick/GBPJPY
+# 574M    truefx/tick/GBPUSD
+# 248M    truefx/tick/NZDUSD
+# 391M    truefx/tick/USDCAD
+# 609M    truefx/tick/USDCHF
+# 408M    truefx/tick/USDJPY
+# 8.3G    truefx/tick
+# 19G     truefx
+



More information about the Blotter-commits mailing list