[Blotter-commits] r709 - pkg/FinancialInstrument/sandbox
noreply at r-forge.r-project.org
noreply at r-forge.r-project.org
Thu Aug 4 19:19:57 CEST 2011
Author: braverock
Date: 2011-08-04 19:19:56 +0200 (Thu, 04 Aug 2011)
New Revision: 709
Added:
pkg/FinancialInstrument/sandbox/TRTH_BackFill.R
Log:
- TRTH backfill
Added: pkg/FinancialInstrument/sandbox/TRTH_BackFill.R
===================================================================
--- pkg/FinancialInstrument/sandbox/TRTH_BackFill.R (rev 0)
+++ pkg/FinancialInstrument/sandbox/TRTH_BackFill.R 2011-08-04 17:19:56 UTC (rev 709)
@@ -0,0 +1,382 @@
+##############################################################################
+# Reuters Backfill Configuration Parameters #
+##############################################################################
+config_file <- "/full/path/to/config.csv" # full path to the config file with instrument metadata in it
+archive_dir <- "/full/path/to/archives/" # where the split CSV, job reports, and gz files will be stored
+path.output <- "/full/path/to/output/" # root dir where the .rda files for the xts will be placed
+
+job.name <- "mybackfill" # the Reuters TRTH job name
+
+username <- "username" #TRTH user name, usually your email address
+password <- "password" #TRTH password
+image <- TRUE
+default_type='future_series'
+default_type='guaranteed_spread'
+
+email_to <- 'someuser at somehost.com'
+email_from <- 'someuser at somehost.com'
+
+no.cores <- 1 # for foreach
+##############################################################################
+
+
+Sys.umask("0002")
+
+options(width=200)
+Date <- Sys.Date()
+
+require(xts)
+require(quantmod)
+require(doMC)
+#require(sendmailR) # for email on failure
+#error.codes<-read.csv('curl.errors.csv',stringsAsFactors=FALSE,header=TRUE,row.names=1)
+
+registerDoMC(no.cores)
+# registerDoSEQ()
+
+Archive.output <- list.files(archive_dir)
+Archive.output <- Archive.output[grep(".gz",Archive.output)]
+Archive.output <- Archive.output[-c(grep("confirmation",Archive.output),grep("report",Archive.output))]
+
+listflag=FALSE
+while(!listflag)#try to download file list
+{
+ clear<-warnings() #currency loads from oanda alway generate warnings, clear them out
+ Reuters <- system(paste("curl ftp://tickhistory-ftp.thomsonreuters.com:15500/results/ -u ",username,":",password," --ftp-ssl -k -l",sep=""),intern=TRUE)
+ w<-''
+ w<-warnings()
+ if(!as.logical(length(Reuters)) || isTRUE(grep('curl',names(w))))
+ {
+ tmpmsg<-paste("curl returned error code", names(w),'\n','while attempting to download file list','\n','script will wait and retry in 30 min')
+ #sendmail(email_to,email_from,"error downloading Reuters file list",msg=tmpmsg)
+ Sys.sleep(1800)
+ } else listflag=TRUE
+}
+
+# or to start from the file list
+#
+
+
+# now we're past the while loop, so we have a file list
+Reuters.report <- Reuters[grep("report",Reuters)]
+Reuters.output <- Reuters[-c(grep("report",Reuters),grep("confirmation",Reuters))]
+Reuters.output <- Reuters.output[grep(job.name,Reuters.output)]
+
+Reuters.new <- Reuters.output[!(Reuters.output %in% Archive.output)]
+
+
+for(i in 1:length(Reuters.new))
+{
+ filename.gz <- Reuters.new[i]
+ filename.csv <- substr(filename.gz,1,(nchar(filename.gz)-3))
+
+ alias <- unlist(strsplit(filename.gz,"-"))[3]
+ alias <- unlist(strsplit(alias,".csv.gz"))
+
+ ## Download New Datasets
+ print(paste("Downloading ",filename.gz,sep=""))
+ fileflag=FALSE
+ while(!fileflag) #try to download individual files
+ {
+ Reuters2<-system(paste("curl -m 10800 --max-filesize 1610612736 ftp://tickhistory-ftp.thomsonreuters.com:15500/results/",filename.gz, " -u ",username,":",password," --ftp-ssl -k > ",archive_dir,"/",filename.gz,sep=""))
+ if(Reuters2!=0)
+ {
+ w2<-''
+ w2<-warnings()
+ tmpmsg<-paste("curl returned error code", Reuters2,"\n",w2,'\n','while attempting to download',filename.gz,'\n','will wait and retry in 10 min')
+ #sendmail(email_to,email_from,paste("error downloading Reuters file",filename.gz),msg=tmpmsg)
+ Sys.sleep(600)
+ } else fileflag=TRUE
+ }
+
+ ## Download Report s
+ system(paste("curl ftp://tickhistory-ftp.thomsonreuters.com:15500/results/",Reuters.report[grep(alias,Reuters.report)], " -u ",username,":",password," --ftp-ssl -k > ",archive_dir,"/",Reuters.report[grep(alias,Reuters.report)],sep=""))
+ system(paste("gzip -d -f ",archive_dir,"Report/",Reuters.report[grep(alias,Reuters.report)],sep=""))
+
+}
+
+## now unzip, split, rezip
+setwd(archive_dir)
+#foreach(j=1:length(Reuters.new)) %dopar%
+for(j in 1:length(Reuters.new))
+{
+
+ # system(paste("split --lines=10000000 -d ",filename.csv," split.",sep=""))
+ # files.split <- list.files("/home/mktdata/ReutersData/Archives", pattern="split.")
+
+ filename.gz <- Reuters.new[j]
+ filename.csv <- substr(filename.gz,1,(nchar(filename.gz)-3))
+
+ #unzip the file
+ print(paste("unzipping ",filename.gz, sep=""))
+ system(paste("gzip -d -f ",archive_dir,'/',filename.gz,sep=""))
+
+ alias <- unlist(strsplit(filename.gz,"-"))[3]
+ alias <- unlist(strsplit(alias,".csv.gz"))
+
+ ## Split the Files
+ ## command line awkstring would look like this:
+ #awk -F "," '{print >>$1"."$2".csv"}' sourcefile.csv
+ #
+ ## modified to deal with 'too many open files', thanks to Josh Ulrich
+ # awk -v f2="" -F "," '{
+ # f1 = $1"."$2".csv";
+ # print >> f1;
+ # if(f1 != f2) {
+ # close(f2);
+ # f2=f1;
+ # }
+ # }' sourcefile.csv
+ ## NOTE: if you get errors on 'too many open files' from awk, you'll need to adjust ulimit/nolimit
+ print(paste("Splitting ",filename.csv,sep=""))
+ #system(paste('awk -F "," ',"'{print >> $1", '"."$2".csv','"}',"' ",filename.csv, sep=""))
+ system(paste('awk -v f2="" -F "," '," '",'{
+ f1 = $1"."$2".csv";
+ print >> f1;
+ if(f1 != f2) {
+ close(f2);
+ f2=f1;
+ }
+ }',"' ",filename.csv, sep="")) #Improved awk w/ file close thanks to Josh Ulrich
+ ## Zip the File
+ print(paste("zipping ",filename.csv,sep=""))
+ system(paste("gzip -f ",archive_dir,'/',filename.csv,sep=""))
+
+}
+
+
+
+files.csv <- list.files(archive_dir)
+files.csv <- files.csv[-grep(".csv.gz",files.csv)]
+files.csv <- files.csv[grep(".csv",files.csv)]
+
+files.header <- files.csv[grep("RIC",files.csv)]
+files.csv <- files.csv[-grep("RIC",files.csv)]
+files.xts <- NULL
+
+for(k in 1:length(files.csv))
+{
+ #print(k)
+ name.csv <- files.csv[k]
+ name <- unlist(strsplit(name.csv,".",fixed=TRUE))[1]
+ RIC.date <- try(as.Date(unlist(strsplit(name.csv,".",fixed=TRUE))[2], format="%d-%b-%Y"))
+ date.format <- gsub("-",".",RIC.date)
+
+ if(weekdays(RIC.date)=="Saturday" | weekdays(RIC.date)=="Sunday")
+ {
+ file.remove(paste(archive_dir,name.csv,sep=""))
+ next
+
+ }
+
+ ## Handle leading digits and VIX and Cash
+ if(substr(name,1,1)==1){name.new <- substr(name,2,nchar(name));name.new <- make.names(name.new)}
+ else{name.new <- make.names(name)}
+
+ ## Does directory exist?
+ ex <- file.exists(paste(path.output,date.format,"/",sep=""))
+ if(ex != TRUE){dir.create(paste(path.output,date.format,"/",sep=""), mode="775")}
+
+ ## Move files to appropriate place
+ #system(paste("mv -f ", path.output,"Archives/",name.csv, " ", path.output,date.format,"/",date.format,".",name.new,".csv", sep=""))
+ system(paste("mv -f ", name.csv, " ", path.output,date.format,"/",date.format,".",name.new,".csv", sep=""))
+
+ print(paste(date.format, name.new, "moved", sep=" "))
+ files.xts <- rbind(files.xts,as.data.frame(cbind(name.new,date.format),stringsAsFactors=FALSE))
+
+}
+
+# now get instrument data
+files.xts$type<-rep(NA,nrow(files.xts))
+instr_s<-unique(files.xts[,'name.new'])
+for(i in 1:length(instr_s)){
+ instr<-getInstrument(instr_s[i])
+ if(is.instrument(instr)){
+ files.xts[grep(instr_s[i],files.xts[,'name.new']),'type']<-as.character(instr$type[1])
+ } else {
+ print(instr, 'does not appear to be an instrument, setting it to', default_type)
+ files.xts[grep(instr_s[i],files.xts[,'name.new']),'type']<-default_type
+ }
+}
+
+##If trying to fix a broken set:
+#files.csv<-'';for(dir in list.files(getwd(),pattern="20")) {files.csv<-c(files.csv,list.files(paste(getwd(),'/',dir,'/',sep=''),pattern=".csv"))}[-1]
+#files.xts<-NULL
+#for(l in 1:length(files.csv)) { rsplit<-as.vector(strsplit(files.csv[l],'.',fixed=TRUE)[[1]]); files.xts<-rbind(files.xts,cbind(rsplit[4],paste(rsplit[1],rsplit[2],rsplit[3],sep='.'))); print(files.xts[l,])}
+#colnames(files.xts)<-c('name.new','date.format')
+
+H <- read.csv(paste(path.output,"Archives/#RIC.Date[G].csv",sep=""),header=FALSE,stringsAsFactors=FALSE)
+H <- H[nrow(H),]
+H <- make.names(H)
+
+## Into xts###############################################################
+reut2xts <- function( data, datapath, image=TRUE, overwrite=FALSE )
+{
+ prod <- data[,'name.new']
+ date <- data[,'date.format']
+ type <- data[,'type']
+
+ print(paste(date, prod, "xts", sep=" "))
+
+ RIC.code <- prod
+
+ file.name <- paste(datapath,"xts/",RIC.code,"/",date,".",RIC.code,".RData",sep="")
+ if(!isTRUE(overwrite) && file.exists(file.name)){
+ return(paste(file.name,"already exists, not overwriting"))
+ }
+
+ Data <- read.csv(paste(datapath,date,'/',date,'.',prod,'.csv',sep=''),stringsAsFactors=FALSE,header=FALSE)
+ names(Data) <- H
+
+ OTC.remove <- grep("IRGCOND",Data$Qualifiers)
+ OTC.remove <- c(OTC.remove,grep("High[USER]",Data$Qualifiers,fixed=TRUE))
+ OTC.remove <- c(OTC.remove,grep("Low[USER]",Data$Qualifiers,fixed=TRUE))
+
+ if(substr(prod,1,(nchar(prod)-2))=="ICF"){OTC.remove <- NULL}
+ if(substr(prod,1,(nchar(prod)-2))=="DOL"){OTC.remove <- NULL}
+ if(dim(Data)[1]<=25){return(NULL)}
+
+ index.new <- as.POSIXct(paste(Data$Date.G.,Data$Time.G,sep=" "),format="%d-%b-%Y%H:%M:%OS",tz="GMT")
+
+ ## Force Everything to numerics
+ Data <- Data[,c("Price","Volume","Bid.Price","Bid.Size","Ask.Price","Ask.Size")]
+ Data$Price <- as.numeric(Data$Price)
+ Data$Volume <- as.numeric(Data$Volume)
+ Data$Bid.Price <- as.numeric(Data$Bid.Price)
+ Data$Bid.Size <- as.numeric(Data$Bid.Size)
+ Data$Ask.Price <- as.numeric(Data$Ask.Price)
+ Data$Ask.Size <- as.numeric(Data$Ask.Size)
+
+ Data <- xts(Data,order.by=index.new,tz="GMT")
+
+ ## Remove block trades
+ if(length(OTC.remove)){Data <- Data[-OTC.remove]}
+
+ ## Turn bids/offers that are zero or less into NA for outrights
+
+ if(type !="guaranteed_spread")
+ {
+ Data$Bid.Price[Data$Bid.Price<=0,] <- NA
+ Data$Ask.Price[Data$Ask.Price<=0,] <- NA
+ Data$Price[Data$Ask.Price<=0,] <- NA
+ }else{
+ Data$Bid.Price[Data$Bid.Price==0,] <- NA
+ Data$Ask.Price[Data$Ask.Price==0,] <- NA
+ Data$Price[Data$Ask.Price==0,] <- NA
+ }
+
+ ## Carry last bid/offer forward
+
+ Data$Bid.Price <- na.locf(Data$Bid.Price)
+ Data$Ask.Price <- na.locf(Data$Ask.Price)
+
+ Data$Bid.Size <- na.locf(Data$Bid.Size)
+ Data$Ask.Size <- na.locf(Data$Ask.Size)
+
+ ## Remove Trades with Price or Volume of zero
+
+ Price.remove <- which(Data$Price == 0)
+ Volume.remove <- which(Data$Volume == 0)
+
+ if(length(c(Price.remove,Volume.remove))!=0)
+ {
+ Data <- Data[-c(Price.remove,Volume.remove)]
+ }
+ if(dim(Data)[1]<=25){return(NULL)}
+
+
+ ## Remove Price w/ Volume of NA and
+ ## Volume w/ Price of NA
+ na.remove <- c(which(!is.na(Data$Price) & is.na(Data$Volume)),
+ which(is.na(Data$Price) & !is.na(Data$Volume)))
+ if (length(na.remove)!=0) { Data <- Data[-sort(na.remove)] }
+
+ ## not enough rows
+ if(dim(Data)[1]<=10){return(NULL)}
+
+ ## Remove leading NAs on Bid/Ask
+ bid.remove <- which(is.na(Data$Bid.Price))
+ ask.remove <- which(is.na(Data$Ask.Price))
+ union.remove <- c(bid.remove,ask.remove)
+ if(length(union.remove)>0){Data <- Data[-union.remove]}
+
+ ## not enough rows
+ if(dim(Data)[1]<=25){return(NULL)}
+
+ ## Rename Data to RIC code
+ assign(RIC.code,Data)
+
+ ## Does xts directory exist?
+ ex <- file.exists(paste(datapath,"xts/",prod,"/",sep=""))
+ if(!ex){dir.create(paste(datapath,"xts/",prod,"/",sep=""),mode="775")}
+
+
+ file.name <- paste(datapath,"xts/",RIC.code,"/",date,".",RIC.code,".RData",sep="")
+ if(!isTRUE(overwrite) && file.exists(file.name)){
+ return(paste(file.name,"already exists, not overwriting"))
+ } else {
+ file.text <- paste("save(",RIC.code," ,file='",file.name,"')",sep="")
+ if(!file.exists(paste(datapath,"xts/",RIC.code,sep=""))){dir.create(paste(datapath,"xts/",RIC.code,sep=""),mode="775")}
+ eval(parse(text=file.text))
+ }
+
+ datarange <- range(index(Data),na.rm = TRUE)
+ datarange.dif <- difftime(datarange[2],datarange[1],units="secs")
+
+ if(isTRUE(image) && datarange.dif>3600)
+ {
+ ## Bid
+ if(!file.exists(paste(datapath,"xts/",RIC.code,"/Bid.Image/",sep=""))){dir.create(paste(datapath,"xts/",RIC.code,"/Bid.Image/",sep=""),mode="775")}
+ png(filename=paste(datapath,"xts/",RIC.code,"/Bid.Image/",date,".",RIC.code,".png",sep=""),width=1500,height=1000)
+ try(chartSeries(to.minutes(Data$Bid.Price,1),type="bar"),silent=TRUE)
+ dev.off()
+
+ file.copy(paste(datapath,"xts/",RIC.code,"/Bid.Image/",date,".",RIC.code,".png",sep=""),paste(datapath,"Archives/Bid.Image.tmp/",date,".",RIC.code,".png",sep=""))
+
+ ## Ask
+ if(!file.exists(paste(datapath,"xts/",RIC.code,"/Ask.Image/",sep=""))){dir.create(paste(datapath,"xts/",RIC.code,"/Ask.Image/",sep=""),mode="775")}
+ png(paste(datapath,"xts/",RIC.code,"/Ask.Image/",date,".",RIC.code,".png",sep=""),width=1500,height=1000)
+ try(chartSeries(to.minutes(Data$Ask.Price,1),type="bar"),silent=TRUE)
+ dev.off()
+
+ file.copy(paste(datapath,"xts/",RIC.code,"/Ask.Image/",date,".",RIC.code,".png",sep=""),paste(datapath,"Archives/Ask.Image.tmp/",date,".",RIC.code,".png",sep=""))
+
+ ## Price
+ Data.1 <- Data[!is.na(Data$Price),]
+ if(dim(Data.1)[1]>50)
+ {
+ if(!file.exists(paste(datapath,"xts/",RIC.code,"/Price.Image/",sep=""))){dir.create(paste(datapath,"xts/",RIC.code,"/Price.Image/",sep=""),mode="775")}
+ png(paste(datapath,"xts/",RIC.code,"/Price.Image/",date,".",RIC.code,".png",sep=""),width=1500,height=1000)
+ try(chartSeries(to.minutes(na.omit(Data$Price),1),type="bar"),silent=TRUE)
+ dev.off()
+
+ file.copy(paste(datapath,"xts/",RIC.code,"/Price.Image/",date,".",RIC.code,".png",sep=""),paste(datapath,"Archives/Price.Image.tmp/",date,".",RIC.code,".png",sep=""))
+ }
+ }
+
+ rm(list = paste(RIC.code,sep=""))
+
+} ## End fn reut2xts
+
+Out <- foreach(ii=iter(1:nrow(files.xts)),.errorhandling='pass') %dopar% reut2xts(files.xts[ii,,drop=FALSE],datapath=path.output, image=image)
+
+# now clean up
+files.rm <- list.files(archive_dir)
+files.rm <- files.rm[-grep(".csv.gz",files.rm)]
+files.rm <- files.rm[grep(".csv",files.rm)]
+file.remove(files.rm)
+
+
+rm(Out)
+
+###############################################################################
+# Copyright (c) 2009-2011
+# Peter Carl, Brian G. Peterson, Lance Levenson
+#
+# This code is distributed under the terms of the GNU Public License (GPL)
+# for full details see the file COPYING
+#
+# $Id$
+#
+###############################################################################
+
Property changes on: pkg/FinancialInstrument/sandbox/TRTH_BackFill.R
___________________________________________________________________
Added: svn:keywords
+ Revision Id Date Author
More information about the Blotter-commits
mailing list