[Blotter-commits] r1000 - pkg/FinancialInstrument/inst/parser
noreply at r-forge.r-project.org
noreply at r-forge.r-project.org
Mon Apr 9 15:54:16 CEST 2012
Author: gsee
Date: 2012-04-09 15:54:16 +0200 (Mon, 09 Apr 2012)
New Revision: 1000
Added:
pkg/FinancialInstrument/inst/parser/download.TrueFX.R
Log:
add script to download FREE historical bid/ask TICK data for 15 spot FX rates
Added: pkg/FinancialInstrument/inst/parser/download.TrueFX.R
===================================================================
--- pkg/FinancialInstrument/inst/parser/download.TrueFX.R (rev 0)
+++ pkg/FinancialInstrument/inst/parser/download.TrueFX.R 2012-04-09 13:54:16 UTC (rev 1000)
@@ -0,0 +1,192 @@
+
+# Download Bid/Ask tick data for 15 FX pairs since May 2009 from TrueFX.com
+#
+# Garrett See
+#
+# You probably _should_ sign up for a FREE account with TrueFX before
+# downloading their data, although presently (2012-04), it works without an
+# account.
+#
+# Data will be downloaded, unzipped, converted to xts, and saved as both
+# tick data and 1 second frequency data. Then it is saved by day.
+# seealso ?saveSymbols.days
+#
+# 3 directories will be created underneath base_dir: "archive", "tick", "sec"
+# - The archive directory will contain the original zip files that each contain
+# one month of tick data for a single FX pair
+# - The tick directory will contain a subdirectory for each Symbol. Each
+# subdirectory will contain an RData file for each day containing tick data
+# for that Symbol on that day.
+# - The sec directory will look the same as the tick directory, except the
+# data will be converted to 1 second snapshots before saving.
+#
+# There is a lot of data here; be patient.
+#
+# compressed data from May 2009 through March 2012 will use 19G of disk space
+# See the very end of this script for a breakdown of disk space used.
+#
+## NOTE: This script requires R 2.15.0 for the paste0 function.
+## NOTE: Not intended to be used on Windows.
+
+require(FinancialInstrument)
+require(foreach)
+require(doMC)
+
+################################################################################
+# Begin User Parameters #
+#########################
+base_dir <- "~/truefx/"
+## cores should not be greater than the number of gigs of available memory.
+registerDoMC(cores=4) # Can replace with a different registerDo* function
+Symbols <- c("AUDJPY", "AUDNZD", "AUDUSD", "CADJPY", "CHFJPY", "EURCHF",
+ "EURGBP", "EURJPY", "EURUSD", "GBPJPY", "GBPUSD", "NZDUSD",
+ "USDCAD", "USDCHF", "USDJPY")
+
+## yyyymm should be a chr vector of years and months formatted YYYYMM
+## The data are stored by month at truefx.com
+
+#yyyymm <- c("201201", "201202", "201203")
+
+curr.year <- as.numeric(format(Sys.Date(), "%Y"))
+# get all the months in the current year that have already passed
+curr.yyyymm <- paste0(curr.year,
+ sprintf("%02d", 1:(match(months(Sys.Date()), month.name) - 1)))
+# Now combine the months from this year with the older year/months available
+yyyymm <- sort(c(curr.yyyymm,
+ outer(2010:(curr.year - 1), sprintf("%02d", 1:12), paste0),
+ paste0("2009", sprintf("%02d", 5:12))), decreasing=TRUE)
+#######################
+# End User Parameters #
+################################################################################
+
+if (Sys.info()[["sysname"]] == "Windows") {
+ warning(paste0('This script has only been tested on linux and mac.'))
+}
+
+# if base_dir doesn't end with a forward slash, add a forward slash at the end
+if (substr(base_dir, nchar(base_dir), nchar(base_dir)) != "/") {
+ base_dir <- paste0(base_dir, "/")
+}
+
+# Create base_dir if it doesn't already exist as well as 3 subdirectories
+dir.create(base_dir, mode="0755", showWarnings=FALSE)
+dir.create(archive_dir <- paste0(base_dir, "archive/"), mode="0755",
+ showWarnings=FALSE)
+dir.create(tick_dir <- paste0(base_dir, "tick/"), mode="0755",
+ showWarnings=FALSE)
+dir.create(sec_dir <- paste0(base_dir, "sec/"), mode="0755",
+ showWarnings=FALSE)
+
+# set some options
+if (is.null(getOption("digits.secs"))) options(digits.secs=6)
+oldTZ <- Sys.getenv("TZ")
+Sys.setenv(TZ='GMT') # data is stored in GMT
+oldwd <- getwd()
+setwd(archive_dir)
+
+# convert "2011 09", "2011-09", or "2011/09" to "201109"
+yyyymm <- substr(gsub("[ -/]", "", as.character(yyyymm)), 1, 6)
+
+# for each of the 15 pairs, download data for each of the yyyymm months.
+foreach(ym = yyyymm) %:% foreach(Symbol = Symbols) %dopar% {
+ cat(Symbol, ym, "\n")
+ yyyy <- substr(ym, 1, 4)
+ mm <- substr(ym, 5, 6)
+ zf.name <- paste0("http://www.truefx.com/dev/data/", yyyy, "/",
+ toupper(month.name[as.numeric(mm)]), "-", yyyy, "/",
+ Symbol, "-", yyyy, "-", mm, ".zip")
+ file.create(fl <- paste0(archive_dir, Symbol, "-", yyyy, "-", mm, ".zip"))
+ cat("downloading ", zf.name, "\n")
+ download.file(zf.name, destfile=fl)
+ cat("unzipping ", zf.name, "\n")
+ uzf <- unzip(fl)
+ cat("reading ", uzf, '\n')
+ cat(system.time(fr <- read.csv(uzf, header=FALSE, stringsAsFactors=FALSE)),
+ "\n")
+ unlink(uzf)
+ id <- sub("/", "", fr[1, 1])
+ cat("making index for ", id, "\n")
+ idx <- as.POSIXct(fr[, 2], format="%Y%m%d %H:%M:%OS", tz="GMT")
+ obj <- xts(fr[, 3:4], idx)
+ #colnames(obj) <- paste(id, c("Bid.Price", "Ask.Price"), sep=".")
+ colnames(obj) <- c("Bid.Price", "Ask.Price")
+ tmpenv <- new.env()
+ assign(id, obj, tmpenv)
+ cat("saving ", id, " tick\n")
+ saveSymbols.days(id, base_dir=tick_dir, extension="RData", env=tmpenv)
+ assign(id, align.time(to.period(obj, "seconds", name=id, OHLC=FALSE), 1),
+ tmpenv)
+ cat("saving ", id, " sec\n\n")
+ saveSymbols.days(id, base_dir=sec_dir, extension="RData", env=tmpenv)
+ rm("tmpenv", "obj", "fr")
+}
+# Restore previous settings
+Sys.setenv(TZ=oldTZ)
+setwd(oldwd)
+
+## Now you should be able to
+# getSymbols(Symbols, src='FI', dir=sec_dir, extension='RData',
+# split_method='days', from='2009-05-01', days_to_omit="Saturday")
+# or
+# getSymbols(Symbols, src='FI', dir=tick_dir, extension='RData',
+# split_method='days', from='2009-05-01', days_to_omit="Saturday")
+
+
+
+### Define instruments
+#source("~/svn/blotter/pkg/FinancialInstrument/inst/parser/ISO.currencies.wiki.R")
+#ccy <- unique(c(sapply(Symbols, substr, 1, 3), sapply(Symbols, substr, 4, 6)))
+#
+##rm_instruments()
+##rm_currencies()
+#define_currencies.wiki(ccy)
+#fx <- exchange_rate(Symbols)
+#lapply(fx, function(x) {
+# add.identifier(x, local=paste(substr(x, 1, 3), substr(x, 4, 6), sep="."))
+#})
+#
+#lapply(fx, instrument_attr, "exchange", "TrueFX")
+#lapply(fx, instrument_attr, "indexTZ", "GMT")
+#lapply(fx, instrument_attr, "updated", Sys.time())
+#
+#saveInstruments("TrueFXinstruments.RData", dir=base_dir)
+
+################################################################################
+## Disk usage after downloading data for all Symbols from 200905 through 201203
+#
+# $ du -h truefx
+# 8.7G truefx/archive
+# 147M truefx/sec/AUDJPY
+# 128M truefx/sec/AUDNZD
+# 131M truefx/sec/AUDUSD
+# 139M truefx/sec/CADJPY
+# 140M truefx/sec/CHFJPY
+# 120M truefx/sec/EURCHF
+# 143M truefx/sec/EURGBP
+# 150M truefx/sec/EURJPY
+# 156M truefx/sec/EURUSD
+# 148M truefx/sec/GBPJPY
+# 144M truefx/sec/GBPUSD
+# 77M truefx/sec/NZDUSD
+# 99M truefx/sec/USDCAD
+# 139M truefx/sec/USDCHF
+# 98M truefx/sec/USDJPY
+# 2.0G truefx/sec
+# 670M truefx/tick/AUDJPY
+# 522M truefx/tick/AUDNZD
+# 538M truefx/tick/AUDUSD
+# 626M truefx/tick/CADJPY
+# 641M truefx/tick/CHFJPY
+# 478M truefx/tick/EURCHF
+# 626M truefx/tick/EURGBP
+# 647M truefx/tick/EURJPY
+# 866M truefx/tick/EURUSD
+# 642M truefx/tick/GBPJPY
+# 574M truefx/tick/GBPUSD
+# 248M truefx/tick/NZDUSD
+# 391M truefx/tick/USDCAD
+# 609M truefx/tick/USDCHF
+# 408M truefx/tick/USDJPY
+# 8.3G truefx/tick
+# 19G truefx
+
More information about the Blotter-commits
mailing list