[Blotter-commits] r1018 - in pkg/RTAQ: R man

noreply at r-forge.r-project.org noreply at r-forge.r-project.org
Sat May 5 22:58:08 CEST 2012


Author: jonathan
Date: 2012-05-05 22:58:07 +0200 (Sat, 05 May 2012)
New Revision: 1018

Modified:
   pkg/RTAQ/R/aggregate.R
   pkg/RTAQ/R/convert_to_RData.R
   pkg/RTAQ/R/volatility.R
   pkg/RTAQ/man/convert.Rd
Log:
Updated convert function (conversion X to xts/Rdata): supports data from tickdata.com and allows input data from WRDS to be in one csv file..

Modified: pkg/RTAQ/R/aggregate.R
===================================================================
--- pkg/RTAQ/R/aggregate.R	2012-05-05 01:33:07 UTC (rev 1017)
+++ pkg/RTAQ/R/aggregate.R	2012-05-05 20:58:07 UTC (rev 1018)
@@ -48,24 +48,24 @@
             secs = k
         }
         a = .index(ts2) + (secs - .index(ts2)%%secs)
-        ts3 = .xts(ts2, a,tz="GMT")
+        ts3 = .xts(ts2, a,tzone="GMT")
     }
     if (on == "hours") {
         secs = 3600
         a = .index(ts2) + (secs - .index(ts2)%%secs)
-        ts3 = .xts(ts2, a,tz="GMT")
+        ts3 = .xts(ts2, a,tzone="GMT")
     }
     if (on == "days") {
         secs = 24 * 3600
         a = .index(ts2) + (secs - .index(ts2)%%secs) - (24 * 
             3600)
-        ts3 = .xts(ts2, a,tz="GMT")
+        ts3 = .xts(ts2, a,tzone="GMT")
     }
     if (on == "weeks") {
         secs = 24 * 3600 * 7
         a = (.index(ts2) + (secs - (.index(ts2) + (3L * 86400L))%%secs)) - 
             (24 * 3600)
-        ts3 = .xts(ts2, a,tz="GMT")
+        ts3 = .xts(ts2, a,tzone="GMT")
     }
 
     if (!dropna) {

Modified: pkg/RTAQ/R/convert_to_RData.R
===================================================================
--- pkg/RTAQ/R/convert_to_RData.R	2012-05-05 01:33:07 UTC (rev 1017)
+++ pkg/RTAQ/R/convert_to_RData.R	2012-05-05 20:58:07 UTC (rev 1018)
@@ -28,29 +28,7 @@
 return(data);
 }
 
-############################
-convert = function(from,to,datasource,datadestination,trades=TRUE,quotes=TRUE,ticker,dir=FALSE,extention="txt",header=FALSE,tradecolnames=NULL,quotecolnames=NULL,format="%Y%M%D %H:%M:%S"){
-  dates = timeSequence(from,to, format = "%Y-%m-%d", FinCenter = "GMT")
-  dates = dates[isBizday(dates, holidays = holidayNYSE(1950:2030))];
-  missingt = missingq = matrix(ncol=2,nrow=0);
 
-  if(dir)	{
-  dir.create(datadestination);
-  for(i in 1:length(dates))	{
-  dirname = paste(datadestination,"/",as.character(dates[i]),sep="")
-  dir.create(dirname);
- 					}			
-		}
-
-  for(i in 1:length(dates)){
-  datasourcex = paste(datasource,"/",dates[i],sep="");
-  datadestinationx = paste(datadestination,"/",dates[i],sep="");
-  if(trades==TRUE){convert_trades(datasourcex,datadestinationx,ticker,extention=extention,header=header,tradecolnames=tradecolnames,format=format)}
-  if(quotes==TRUE){convert_quotes(datasourcex,datadestinationx,ticker,extention=extention,header=header,quotecolnames=quotecolnames,format=format)}
-  }
-}
-
-
 convert_trades = function (datasource, datadestination, ticker, extention = "txt", 
     header = FALSE, tradecolnames = NULL, format = "%Y%M%D %H:%M:%S") 
 {  
@@ -173,4 +151,151 @@
     }
 }
 
+############## NEW CODE GSoC 2012 ###############################################
+makeXtsTrades = function(tdata,format=format){
+  adjtime = function(z) {
+    zz = unlist(strsplit(z, ":"))
+    if (nchar(zz[1]) != 2) {
+      return(paste(paste(0, zz[1], sep = ""), zz[2], zz[3], 
+                   sep = ":"))  } 
+    return(z) }
+  tradecolnames = colnames(tdata)
+  if (is.null(tradecolnames)){
+    tradecolnames = c("SYMBOL", "DATE", "EX", "TIME", 
+                      "PRICE", "SIZE", "COND", "CORR", "G127");
+    colnames(tdata) = tradecolnames; }  
+  
+  cond = tdata$COND[is.na(tdata$G127)];
+  cr = tdata$CORR[is.na(tdata$G127)];
+  tdata$COND[is.na(tdata$G127)] = 0;
+  tdata$CORR[is.na(tdata$G127)] = as.character(cond);
+  tdata$G127[is.na(tdata$G127)] = as.character(cr);
+  rm(cond, cr);
+  oldtime = as.matrix(as.vector(tdata$TIME));
+  newtime = apply(oldtime, 1, adjtime);
+  tdata$TIME = newtime;
+  rm(oldtime, newtime);
+  tdobject = timeDate:::timeDate(paste(as.vector(tdata$DATE),as.vector(tdata$TIME)), format = format, FinCenter = "GMT", zone = "GMT");
+  tdata  = xts(tdata, order.by = tdobject);
+  tdata  = tdata[, c("SYMBOL", "EX", "PRICE", "SIZE","COND", "CORR", "G127")];
+  rm(tdobject)
+  return(tdata)  
+}  
 
+####
+makeXtsQuotes = function( qdata, format = format){ 
+  adjtime = function(z) {    zz = unlist(strsplit(z, ":")); if (nchar(zz[1]) != 2) {return(paste(paste(0, zz[1], sep = ""), zz[2], zz[3], sep = ":"))}; return(z) }
+  quotecolnames = colnames(qdata);
+  
+  if (is.null(quotecolnames)) {
+    quotecolnames = c("SYMBOL", "DATE", "EX", "TIME", "BID", "BIDSIZ", "OFR", "OFRSIZ", "MODE")
+    colnames(qdata) = quotecolnames;
+  }else{ colnames(qdata) = quotecolnames }
+  
+  oldtime = as.matrix(as.vector(qdata$TIME));
+  newtime = apply(oldtime, 1, adjtime);
+  qdata$TIME = newtime;
+  rm(oldtime, newtime);
+  test = paste(as.vector(qdata$DATE), as.vector(qdata$TIME))
+  tdobject = timeDate:::timeDate(test, format = format, 
+                                 FinCenter = "GMT", zone = "GMT")
+  tdobject = timeDate:::timeDate(test, format = format, FinCenter = "GMT", zone = "GMT")
+  qdata = xts(qdata, order.by = tdobject)
+  qdata = qdata[, c("SYMBOL", "EX", "BID", "BIDSIZ","OFR", "OFRSIZ", "MODE")];
+  rm(tdobject);
+  return(qdata);
+}
+
+################ the real conversion starts here ;)
+
+convert = function(from, to, datasource, datadestination, trades = TRUE, 
+                   quotes = TRUE, ticker, dir = FALSE, extention = "txt", header = FALSE, 
+                   tradecolnames = NULL, quotecolnames = NULL, format = "%Y%m%d %H:%M:%S", onefile=FALSE){  
+  
+  #############  1.A the data is in the "RTAQ folder" sturcture ##############
+  if( onefile == FALSE ){
+    
+    # Create trading dates:
+    dates = timeSequence(from, to, format = "%Y-%m-%d", FinCenter = "GMT")
+    dates = dates[isBizday(dates, holidays = holidayNYSE(1950:2030))];
+    
+    # Create folder structure for saving:
+    if (dir) { dir.create(datadestination); for (i in 1:length(dates)) {dirname = paste(datadestination, "/", as.character(dates[i]), sep = ""); dir.create(dirname)    } }
+    for (i in 1:length(dates)){ #Loop over days  
+      #Get the day-specific path
+      datasourcex = paste(datasource, "/", dates[i], sep = "")
+      datadestinationx = paste(datadestination, "/", dates[i], sep = "")
+      
+      if(trades == TRUE){ 
+        if(extention=="txt"|extention=="csv"){ convert_trades(datasourcex, datadestinationx, ticker, extention = extention, header = header, tradecolnames = tradecolnames, format = format) }
+      }
+      
+      if (quotes == TRUE) { 
+        if(extention=="txt"|extention=="csv"){ convert_quotes(datasourcex, datadestinationx, ticker, extention = extention, header = header, quotecolnames = quotecolnames,format = format)}
+      } 
+    }#End loop over days
+  }#End "not oneday" if
+  
+  #############  1.B The data is in one file: ###########
+  if( onefile == TRUE ){
+    # Load the data: ############################ This depends on the data provider
+    if(trades == TRUE){ 
+      if( extention=="txt"){ dataname = paste(datasource,"/",ticker,"_trades",sep=""); RTAQ:::readdata(path = datasource, extention = "txt", header = FALSE, dims = 0); } 
+      if( extention=="csv"){ dataname = paste(datasource,"/",ticker,"_trades.csv",sep=""); data = read.csv(dataname);}
+      if( extention=="tickdatacom"){ 
+        dataname   = paste(datasource,"/",ticker,"_trades.asc",sep="");
+        colnames   = c("DATE","TIME","PRICE","SIZE","EX","COND","CORR","SEQN","SOURCE","TSTOP","G127","EXCL","FPRICE");
+        alldata    = read.delim(dataname, header=F, sep=",",dec=".",col.names=colnames); 
+        taqnames   = c("DATE","EX","TIME","PRICE","SIZE","COND","CORR","G127"); 
+        data = alldata[,taqnames]; 
+        data = cbind(rep(ticker,dim(data)[1]),data); colnames(data)[1] = "SYMBOL"; 
+        format = "%d/%m/%Y %H:%M:%S"; #tickdata always has this format
+      }
+      alldata = suppressWarnings(makeXtsTrades(tdata=data,format=format)); 
+    }
+    if (quotes == TRUE){ 
+      if( extention=="txt"){ dataname = paste(datasource,"/",ticker,"_quotes",sep=""); RTAQ:::readdata(path = datasource, extention = "txt", header = FALSE, dims = 0); } 
+      if( extention=="csv"){ dataname = paste(datasource,"/",ticker,"_quotes.csv",sep=""); data = read.csv(dataname);}
+      if( extention=="tickdatacom"){ 
+        dataname   = paste(datasource,"/",ticker,"_quotes.asc",sep=""); 
+        colnames   = c("DATE","TIME","EX","BID","OFR","BIDSIZ","OFRSIZ","MODE","MMID","SEQN","EXB", "EXO","NBBOID","NBBOID","CORR","QSO"); 
+        alldata    = read.delim(dataname, header=F, sep=",",dec=".",col.names=colnames); 
+        taqnames   = c("DATE","TIME","EX","BID","BIDSIZ","OFR","OFRSIZ","MODE"); 
+        data = alldata[,taqnames]; 
+        data = cbind(rep(ticker,dim(data)[1]),data); colnames(data)[1] = "SYMBOL"; 
+        format = "%d/%m/%Y %H:%M:%S"; # Tickdata always has this format
+      } 
+      alldata = suppressWarnings( makeXtsQuotes( qdata=data, format=format) );
+    }
+    
+    # Save the data: ############################ This is the same irrespective of the data provider
+    # Create trading dates: 
+    
+    dates = unique(as.Date(index(alldata)));
+    
+    # Create folder structure for saving : 
+    suppressWarnings( if (dir){ dir.create(datadestination); for (i in 1:length(dates)) {dirname = paste(datadestination, "/", as.character(dates[i]), sep = ""); dir.create(dirname) } })
+    
+    for(i in 1:length(dates) ){ # Loop over days
+      datadestinationx = paste(datadestination, "/", dates[i], sep = ""); 
+      
+      if( trades == TRUE ){ 
+        tdata        = alldata[as.character(dates[i])];
+        xts_name     = paste(ticker, "_trades.RData", sep = "")
+        destfullname = paste(datadestinationx,"/",xts_name,sep="");          
+        save(tdata, file = destfullname); # Save daily in right folder:
+      } 
+      
+      if( quotes == TRUE ){ 
+        qdata        = alldata[as.character(dates[i])]; 
+        xts_name     = paste(ticker, "_quotes.RData", sep = ""); 
+        destfullname = paste(datadestinationx,"/",xts_name,sep=""); 
+        save(qdata, file = destfullname); # Save daily in right folder: 
+      }#End quotes if
+    } #End save loop over days
+  } #End oneday   
+} #End convert function
+
+
+
+

Modified: pkg/RTAQ/R/volatility.R
===================================================================
--- pkg/RTAQ/R/volatility.R	2012-05-05 01:33:07 UTC (rev 1017)
+++ pkg/RTAQ/R/volatility.R	2012-05-05 20:58:07 UTC (rev 1018)
@@ -1,5 +1,5 @@
-##UNIVARIATE:
-#Realized Volatility (RV)
+# UNIVARIATE: 
+# Realized Volatility (RV)
 RV = function(rdata,...){
   if(hasArg(data)){ rdata = data }
 returns=as.numeric(rdata);
@@ -298,7 +298,7 @@
 #  sdmatrix = sqrt(diag(diag(covariance)));
 #  rcor = solve(sdmatrix)%*%covariance%*%solve(sdmatrix);
 #  return(rcor);
-#}
+# }
 
 RTSRV = function (pdata, startIV = NULL, noisevar = NULL, K = 300, J = 1, 
     eta = 9){
@@ -453,7 +453,7 @@
 
     
     if (   is.null(noisevar1)   ) {
-        logprices1 = log(as.numeric(pdata1))
+        logprices1 = log(as.numeric(pdata1))     
         n_var1 = length(logprices1)
         nbarK_var1 = (n_var1 - K_var1 + 1)/(K_var1) ;
         nbarJ_var1 = (n_var1 - J_var1 + 1)/(J_var1)

Modified: pkg/RTAQ/man/convert.Rd
===================================================================
--- pkg/RTAQ/man/convert.Rd	2012-05-05 01:33:07 UTC (rev 1017)
+++ pkg/RTAQ/man/convert.Rd	2012-05-05 20:58:07 UTC (rev 1018)
@@ -1,21 +1,19 @@
 \name{convert}
 \Rdversion{1.1}
 \alias{convert}
-\title{
-Convert trade or quote data into xts object saved in the RData format}
+\title{Convert trade or quote data into xts object saved in the RData format}
 
 \description{
 Function converts both trade and quote data stored as "txt" or "csv" and
 structured as illustrated in the pdf documentation into xts-objects and 
 stores them in the "RData" format. 
-Subsequently, the data can be loaded into the R console by \code{\link{TAQLoad}}.
-}
+Subsequently, the data can be loaded into the R console by \code{\link{TAQLoad}}.}
 
 \usage{
 convert(from,to,datasource,datadestination,trades=TRUE,quotes=TRUE,
         ticker,dir=FALSE,extention="txt",header=FALSE,
         tradecolnames=NULL,quotecolnames=NULL,
-        format="\%Y\%M\%D \%H:\%M:\%S")
+        format="\%Y\%m\%d \%H:\%M:\%S",onefile=FALSE)        
 }
 
 \arguments{
@@ -27,7 +25,7 @@
 \item{quotes}{ boolean, determines whether quotes are converted.}
 \item{ticker}{ vector with tickers to be converted.}
 \item{dir}{ boolean, if TRUE the datadestination folder and subfolders will be created automatically.}
-\item{extention}{ character, indicating the data format of the original data. Can be either "txt" or "csv".}
+\item{extention}{character, indicating the data format of the original data. Can be either "txt" or "csv". In case your data comes from "www.tickdata.com", set extention="tickdatacom".}
 \item{header}{ boolean, indicating whether each data file contains a header.}
 \item{tradecolnames}{ vector containing column names of your trade data. By default, 
 the standard NYSE data format is taken, see pdf documentation for more details.}
@@ -35,6 +33,8 @@
 the standard NYSE data format is taken, see pdf documentation for more details.}
 \item{format}{ character, indicates in what format TIME and DATE are recorded in the original data.
 By default, "\%Y\%M\%D \%H:\%M:\%S" is taken, which means the date is denoted by e.g. "20080130" and the time by e.g. "09:30:00".}
+\item{onefile}{  
+indicates the way the source data is stored. By default FALSE, which means the function expects that the source data is saved in a folder sturcture with a folder for each date (see vignette for more info). In case the data for multiple days is stored in one file, set onefile=TRUE. The naming convention for files is, e.g. for ticker="AAPL", "AAPL_trades.extention" and "AAPL_quotes.extension" for trades and quotes respectively.}
 }
 
 \value{For each day an xts object is saved into the folder of that date, containing the converted data.
@@ -53,6 +53,8 @@
 to = "2008-01-03";
 \dontrun{datasource=datadestination="C:\\data"
 
+### txt files from NYSE:
+
 convert(from,to,datasource,datadestination,trades=TRUE,
           quotes=FALSE,ticker=c("AA","AAPL"),dir=FALSE,extention="txt",
           header=FALSE,tradecolnames=NULL,quotecolnames=NULL,
@@ -62,5 +64,24 @@
 #named 2008-01-02 and 2008-01-03 containing 
 #the files AAPL_trades.RData and AAPL_trades.RData containing the trades.
 #The data can now be loaded with the TAQLoad function.
+
+########## Csv file from WRDS
+#Suppose the datasource folder contains one csv file from WRDS 
+#with data on IBM for multiple days.
+#The file should be named "IBM_trades.csv" and can be easily converted into xts 
+#and then saved in RData format by:
+
+ convert(from=from, to=to, datasource=datasource, datadestination=datadestination, trades = T, 
+                       quotes = T, ticker="IBM", dir = FALSE, extention = "csv", header = TRUE, 
+                       tradecolnames = NULL, quotecolnames = NULL, format = format, onefile = TRUE )  
+
+####### ASC file from www.tickdata.com
+#Suppose the datasource folder contains asc files for trades and quotes 
+#from "www.tickdata.com" for GLP. 
+#The files "GLP_quotes.asc" and "GLP_trades.asc" should be saved in datasource folder.
+
+ convert(from=from, to=to, datasource=datasource, datadestination=datadestination, trades = T, 
+            quotes = T, ticker="GLP", dir = TRUE, extention = "tickdatacom", header = TRUE, 
+            tradecolnames = NULL, quotecolnames = NULL, format = format, onefile = TRUE );  
 }
 }



More information about the Blotter-commits mailing list