[Blotter-commits] r176 - in pkg: . RTAQ RTAQ/R RTAQ/man
noreply at r-forge.r-project.org
noreply at r-forge.r-project.org
Mon Jan 18 16:47:57 CET 2010
Author: jonathan
Date: 2010-01-18 16:47:56 +0100 (Mon, 18 Jan 2010)
New Revision: 176
Added:
pkg/RTAQ/
pkg/RTAQ/DESCRIPTION
pkg/RTAQ/R/
pkg/RTAQ/R/aggregate.R
pkg/RTAQ/R/cleanupfunctions.R
pkg/RTAQ/R/convert_to_RData.R
pkg/RTAQ/R/manipulation.R
pkg/RTAQ/R/totalcleanup.R
pkg/RTAQ/R/volatility.R
pkg/RTAQ/man/
pkg/RTAQ/man/RTAQ-package.Rd
Log:
rtaq upload
Added: pkg/RTAQ/DESCRIPTION
===================================================================
--- pkg/RTAQ/DESCRIPTION (rev 0)
+++ pkg/RTAQ/DESCRIPTION 2010-01-18 15:47:56 UTC (rev 176)
@@ -0,0 +1,11 @@
+Package: RTAQ
+Type: Package
+Title: RTAQ: Tools for the analysis of trades and quotes in R
+Version: 1.0
+Date: 2010-01-18
+Author: Kris Boudt, Jonathan Cornelissen
+Maintainer: Jonathan Cornelissen <Jonathan.cornelissen at econ.kuleuven.be>
+Description: The Trades and Quotes data of the New York Stock Exchange is a popular input for the implementation of intraday trading strategies, the measurement of liquidity and volatility and investigation of the market microstructure, among others. This package contains a collection of R functions to carefully clean and match the trades and quotes data, calculate ex post liquidity and volatility measures and detect price jumps in the data.
+Depends: xts, timeDate
+License: GPL
+LazyLoad: yes
\ No newline at end of file
Added: pkg/RTAQ/R/aggregate.R
===================================================================
--- pkg/RTAQ/R/aggregate.R (rev 0)
+++ pkg/RTAQ/R/aggregate.R 2010-01-18 15:47:56 UTC (rev 176)
@@ -0,0 +1,150 @@
+previoustick = function(a){
+a=as.vector(a);
+b = a[length(a)];
+return(b)
+}
+
+weightedaverage = function(a){
+aa = as.vector(as.numeric(a[,1]));
+bb = as.vector(as.numeric(a[,2]));
+c = weighted.mean(aa,bb);
+return(c)
+}
+
+
+##AGGREGATION;
+aggregatets = function(ts, FUN=previoustick, on="minutes", k=1, weights=F){
+ #Valid values for the argument "on" include: secs (seconds), seconds, mins (minutes), minutes,hours, days, weeks.
+
+ #Without weights:
+ if(weights[1]==F){
+ ep = endpoints(ts, on, k);
+ ts2 = period.apply(ts,ep,FUN);
+ }
+
+ #With weights:
+ if(weights[1]!=F){
+ tsb = cbind(ts,weights);
+ ep = endpoints(tsb, on, k);
+ ts2 = period.apply(tsb,ep,FUN=weightedaverage);
+ }
+
+
+ if(on=="minutes"|on=="mins"|on=="secs"|on=="seconds"){
+ if(on=="minutes"|on=="mins"){secs = k*60;}
+ if(on=="secs"|on=="seconds"){secs = k}
+ a = .index(ts2) + (secs-.index(ts2) %% secs);
+ ts3 = .xts(ts2,a)
+ }
+
+ if(on=="hours"){
+ secs = 3600;
+ a = .index(ts2) + (secs-.index(ts2) %% secs)
+ ts3 = .xts(ts2,a);
+ }
+
+
+ if(on=="days"){
+ secs = 24*3600;
+ a = .index(ts2) + (secs-.index(ts2) %% secs) - (24*3600)
+ ts3 = .xts(ts2,a);
+ }
+
+
+ if(on=="weeks") {
+ secs = 24*3600*7;
+ a = (.index(ts2) + (secs-(.index(ts2) + (3L * 86400L)) %% secs))-(24*3600);
+ ts3 = .xts(ts2,a);
+ }
+
+ else {print("YOU FOOL: still have to add other time periods for this function!")}
+
+
+ #return to timeDate timestamps
+ index(ts3) = as.timeDate(index(ts3));
+
+ return(ts3);
+}
+
+#PRICE (specificity: opening price and previoustick)
+
+agg_price = function(ts,FUN = previoustick,on="minutes",k=5){
+##Return new timeseries as xts object where
+##first observation is always the opening price
+##subsequent observations are the closing prices over the interval with endpoint the timestamp of the result
+##on indicates the type of period to aggregate over
+##k indicates the number of periods to aggregate over
+ ts2 = aggregatets(ts, FUN=previoustick, on, k);
+
+ date = strsplit(as.character(index(ts))," ")[[1]][1]
+ realopen = "09:30:00";
+ a = as.timeDate(paste(date,realopen));
+ b = xts(ts[1],a);
+
+ ts3 = c(b,ts2);
+ return(ts3);
+}
+
+#VOLUME: (specificity: always sum)
+agg_volume = function(ts,FUN = sumN,on="minutes",k=5, includeopen=FALSE){
+
+ if(!includeopen){ts3 = aggregatets(ts, FUN=sumN, on, k)}
+
+ if(includeopen){
+ ts2 = aggregatets(ts, FUN=sumN, on, k);
+ date = strsplit(as.character(index(ts))," ")[[1]][1]
+ realopen = "09:30:00";
+ a = as.timeDate(paste(date,realopen));
+ b = xts(as.numeric(ts[1]),a);
+ ts3 = c(b,ts2);
+ }
+return(ts3)
+}
+
+
+###TRADES AGGREGATION:
+agg_trades = function(tdata,on="minutes",k=5){
+ ## Aggregates an entire trades xts object (tdata) over a "k"-minute interval.
+ ## Returned xts-object contains: SYMBOL,EX,PRICE,SIZE.
+ ## Variables COND, CR, G127 are dropped because aggregating them makes no sense.
+ ## NOTE: first observation (opening price) always included.
+
+ PRICE = agg_price(tdata$PRICE,on=on,k=k);
+ SIZE = agg_volume(tdata$SIZE,on=on,k=k,includeopen=TRUE);
+ EX = c(tdata$EX[1],aggregatets(tdata$EX, FUN=previoustick, on=on, k=k));
+ SYMBOL = rep(tdata$SYMBOL[1],length(PRICE));
+ all = data.frame(SYMBOL,EX,PRICE,SIZE);
+ colnames(all) =c("SYMBOL","EX","PRICE","SIZE");
+ ts = xts(all,index(SIZE));
+
+ return(ts);
+}
+
+
+###QUOTES AGGREGATION:
+agg_quotes = function(qdata,on="minutes",k=5){
+ ## Aggregates an entire quotes xts object (qdata) object over a "k"-minute interval.
+ ## Returned xts-object contains: SYMBOL,EX,BID,BIDSIZE,OFFER,OFFERSIZE.
+ ## Variable MODE is dropped because aggregation makes no sense.
+ ## "includeopen" determines whether to include the exact opening quotes.
+
+ BID = agg_price(qdata$BID,on=on,k=k);
+ OFFER = agg_price(qdata$OFFER,on=on,k=k);
+
+ BIDSIZE = agg_volume(qdata$BIDSIZE,on=on,k=k,includeopen=TRUE);
+ OFFERSIZE = agg_volume(qdata$OFFERSIZE,on=on,k=k,includeopen=TRUE);
+
+ EX = agg_price(qdata$EX,on=on,k=k)
+ SYMBOL = rep(qdata$SYMBOL[1],length(BIDSIZE));
+
+ all = data.frame(SYMBOL,EX,BID,BIDSIZE,OFFER,OFFERSIZE);
+ colnames(all) =c("SYMBOL","EX","BID","BIDSIZE","OFFER","OFFERSIZE");
+
+ ts = xts(all,index(BIDSIZE));
+
+ return(ts);
+}
+
+##LIQUIDITY AGGREGATION:
+##Just combine aggregation functions and spot liquidity functions!
+
Added: pkg/RTAQ/R/cleanupfunctions.R
===================================================================
--- pkg/RTAQ/R/cleanupfunctions.R (rev 0)
+++ pkg/RTAQ/R/cleanupfunctions.R 2010-01-18 15:47:56 UTC (rev 176)
@@ -0,0 +1,325 @@
+##### Help functions
+## help function to make all time notation consistent
+adjtime = function(z){
+ zz = unlist(strsplit(z,":"));
+ if(nchar(zz[1])!=2){
+ return(paste(paste(0,zz[1],sep=""),zz[2],zz[3],sep=":"))}
+ return(z);
+ }
+
+########## DATA CLEAN-UP: FOR ALL DATA #####################
+
+####FUNCTION TO FILTER EXCHANGE HOURS ONLY: ExchangeHoursOnly
+ExchangeHoursOnly = function(ts, daybegin = "09:30:00",dayend="16:00:00")
+{
+ # a function to excerpt data within exchange trading hours
+ # daybegin and dayend: two characters in the format of "HH:MM:SS",
+ # specifying the starting hour and minute and sec of an exhange
+ # trading day and the closing hour and minute and sec
+ # of the trading day repectively
+
+ if(!is(ts, "xts"))
+ stop("ts must be an xts object")
+
+ gettime = function(z){unlist(strsplit(as.character(z)," "))[2]};
+ times1 = as.matrix(as.vector(as.character(index(ts))));
+ times = apply(times1,1,gettime);
+ tdtimes = timeDate(times,format = "%H:%M:%S",FinCenter = "GMT",zone="GMT");
+
+ #create timeDate begin and end
+ tddaybegin = timeDate(daybegin,format = "%H:%M:%S",FinCenter = "GMT",zone="GMT");
+ tddayend = timeDate(dayend,format = "%H:%M:%S",FinCenter = "GMT",zone="GMT");
+
+ #select correct observations
+ filteredts = ts[tdtimes>=tddaybegin & tdtimes<=tddayend];
+ return(filteredts);
+}
+
+
+nozeroprices = function(ts){
+####FUNCTION TO DELETE ZERO PRICES: nozeroprices
+filteredts = ts[as.numeric(ts$PRICE)!= 0];
+return(filteredts);
+}
+
+
+selectexchange = function(ts,exch="N"){
+###FUNCTION TO SELECT THE OBSERVATIONS OF A SINGLE EXCHANGE: selectexchange
+filteredts = ts[ts$EX==exch];
+return(filteredts);
+}
+
+
+autoselectexchange = function(ts){
+## AUTOSELECT EXCHANGE WITH HIGHEST NUMBER OF SHARES TRADED (for trades) ON:
+#function returns ts with obs of only 1 exchange
+#searches exchange with a maximum on the variable "SIZE"
+ nobs=c();
+
+ exchanges = c("Q","A","P","B","C","N","D","X","I","M","W","Z");
+ exchangenames = c("NASDAQ","AMEX","ARCA","Boston","NSX","NYSE","NASD ADF and TRF","Philadelphia","ISE","Chicago","CBOE","BATS");
+
+
+ z1 = sum(as.numeric(selectexchange(ts,"Q")$SIZE));
+ z2 = sum(as.numeric(selectexchange(ts,"T")$SIZE));
+ z = max(z1,z2);
+ watchout = z == z2;
+ nobs = cbind(nobs,z);
+
+ for(i in 2:length(exchanges)) {
+ z = sum(as.numeric(selectexchange(ts,exchanges[i])$SIZE));
+ nobs = cbind(nobs,z);
+ }
+
+ exch = exchanges[max(nobs)==nobs];
+
+ as.character(ts$EX[1]) == exchanges;
+ namechosen = exchangenames[exch==exchanges];
+ print(paste("The information of the",namechosen,"exchange was collected"));
+
+ if(exch=="Q"&watchout){exch="T"}
+ filteredts = ts[ts$EX==exch];
+}
+
+
+##### TRADE DATA SPECIFIC FUNCTIONS: ###################################
+salescond = function(ts){
+###DELETE ENTRIES WITH AN ABONORMAL SALES CONDITION
+filteredts = ts[ts$COND == "0"|ts$COND == "E"|ts$COND == "F"];
+return(filteredts);
+}
+
+##Merge same timestamp:
+sumN = function(a){
+ a = sum(as.numeric(a));
+ return(a)
+}
+
+medianN = function(a){
+ a = median(as.numeric(a));
+ return(a)
+}
+
+maxvol = function(a){
+ p = as.numeric(a[,1]);
+ s = as.numeric(a[,2]);
+
+ b = median(p[s == max(s)]);
+ return(b);
+}
+
+waverage = function(a){
+ p = as.numeric(a[,1]);
+ s = as.numeric(a[,2]);
+
+ b = sum(p*s/sum(s));
+ return(b);
+}
+
+mergesametimestamp = function(ts,selection="median"){
+ #find end points:
+ ep = endpoints(ts,"secs");
+
+ #size per second:
+ size = period.apply(ts$SIZE,ep,sumN);
+
+ #price per second:
+ if(selection=="median"){price = period.apply(ts$PRICE,ep,medianN)}
+ if(selection=="maxvolume"){price = period.apply(cbind(ts$PRICE,ts$SIZE),ep,maxvol)}
+ if(selection=="weightedaverage"){price = period.apply(cbind(ts$PRICE,ts$SIZE),ep,waverage)}
+
+ ##merge everything:
+ selection = ep[2:length(ep)];
+ ts2 = ts[selection];
+ ts2$PRICE = price;
+ ts2$SIZE = size;
+
+return(ts2)
+}
+
+rmtradeoutliers = function(tdata,qdata){
+##Function to delete entries with prices that are above the ask plus the bid-ask
+##spread. Similar for entries with prices below the bid minus the bid-ask
+##spread.
+ data = matchtq(tdata,qdata);
+ price = as.numeric(data$PRICE);
+ bid = as.numeric(data$BID);
+ offer = as.numeric(data$OFFER);
+ spread = offer - bid;
+
+ upper = offer+spread;
+ lower = bid-spread;
+
+ tdata[(price<upper) & (price>lower)];
+ return(tdata);
+}
+
+
+################# QUOTE SPECIFIC FUNCTIONS: #################
+
+nozeroquotes = function(ts){
+####FUNCTION TO DELETE ZERO QUOTES: nozeroquotes
+filteredts = ts[as.numeric(ts$BID)!= 0& as.numeric(ts$OFFER)!= 0];
+return(filteredts);
+}
+
+
+autoselectexchangeq = function(ts){
+####Autoselect exchange with highest value for (bidsize+offersize)
+ nobs=c();
+ exchanges = c("Q","A","P","B","C","N","D","X","I","M","W","Z");
+ exchangenames = c("NASDAQ","AMEX","ARCA","Boston","NSX","NYSE","NASD ADF and TRF","Philadelphia","ISE","Chicago","CBOE","BATS");
+
+ selected1 = selectexchange(ts,"Q");
+ selected2 = selectexchange(ts,"T");
+ z1 = sum(as.numeric(selected1$BIDSIZE)+as.numeric(selected1$OFFERSIZE));
+ z2 = sum(as.numeric(selected2$BIDSIZE)+as.numeric(selected2$OFFERSIZE));
+ z = max(z1,z2);
+ watchout = z == z2;
+ nobs = cbind(nobs,z);
+
+ for(i in 2:length(exchanges)) {
+ selected = selectexchange(ts,exchanges[i]);
+ z = sum(as.numeric(selected$BIDSIZE)+as.numeric(selected$OFFERSIZE));
+ nobs = cbind(nobs,z);
+ }
+
+ exch=exchanges[max(nobs)==nobs];
+
+ namechosen = exchangenames[exch==exchanges];
+ print(paste("The information of the",namechosen,"exchange was collected"));
+
+ if(exch=="Q"&watchout){exch="T"}
+
+ filteredts = ts[ts$EX==exch];
+ return(filteredts);
+}
+
+
+mergequotessametimestamp = function(ts,selection="median"){ ##FAST
+ condition=selection=="median"|selection=="maxvolume"|selection=="weightedaverage";
+ if(!condition){print(paste("WARNING:The result will be corrupted. Check whether",selection,"is an existing option for the attribute selection."))}
+
+ #find end points:
+ ep = endpoints(ts,"secs");
+
+ #size per second:
+ bidsize = period.apply(ts$BIDSIZE,ep,sumN);
+ offersize = period.apply(ts$OFFERSIZE,ep,sumN);
+
+ #median per second:
+ if(selection=="median"){
+ bid = period.apply(ts$BID,ep,medianN);
+ offer = period.apply(ts$OFFER,ep,medianN);
+ }
+
+ #maxvolume per second:
+ if(selection=="maxvolume"){
+ bid = period.apply(cbind(ts$BID,ts$BIDSIZE),ep,maxvol);
+ offer = period.apply(cbind(ts$OFFER,ts$OFFERSIZE),ep,maxvol);
+ }
+
+ if(selection=="weightedaverage"){
+ bid = period.apply(cbind(ts$BID,ts$BIDSIZE),ep,waverage);
+ offer = period.apply(cbind(ts$OFFER,ts$OFFERSIZE),ep,waverage);
+ }
+
+ ##merge everything:
+ selection = ep[2:length(ep)];
+ ts2 = ts[selection];
+ ts2$BID = bid;
+ ts2$OFFER = offer;
+
+ ts2$BIDSIZE = bidsize;
+ ts2$OFFERSIZE = offersize;
+
+return(ts2)
+}
+
+
+rmnegspread = function(ts){
+##function to remove observations with negative spread
+ condition = as.numeric(ts$OFFER)>as.numeric(ts$BID);
+ ts[condition];
+}
+
+
+rmlargespread = function(ts,maxi=50){
+##function to remove observations with a spread larger than 50 times the median spread that day
+###WATCH OUT: works only correct if supplied input data consists of 1 day...
+ spread = as.numeric(ts$OFFER)-as.numeric(ts$BID);
+ condition = ((maxi*median(spread))>spread);
+ return(ts[condition])
+}
+
+
+rmoutliers = function(ts,maxi=10,window=50,type="advanced"){
+##function to remove entries for which the mid-quote deviated by more than 10 median absolute deviations
+##from a rolling centered median (excluding the observation under consideration) of 50 observations if type = "standard".
+
+##if type="advanced":
+##function removes entries for which the mid-quote deviates by more than 10 median absolute deviations
+##from the variable "mediani".
+##mediani is defined as the value closest to the midquote of these three options:
+##1. Rolling centered median (excluding the observation under consideration)
+##2. Rolling median of the following "window" observations
+##3. Rolling median of the previous "window" observations
+
+##NOTE: Median Absolute deviation chosen contrary to Barndorff-Nielsen et al.
+ print("NOTE: This function is only useful for quotes NOT for trades");
+ condition = c();
+ halfwindow = round(window/2);
+ midquote = (as.numeric(ts$BID)+as.numeric(ts$OFFER))/2;
+ if(type=="standard"){
+ for(i in (halfwindow+1):(dim(ts)[1]-halfwindow)){
+ mid = midquote[i];
+ vec = c(midquote[(i-halfwindow):(i-1)],midquote[(i+1):(i+halfwindow)]);
+ mad = mad(vec);
+ maxcriterion = median(vec)+maxi*mad;
+ mincriterion = median(vec)-maxi*mad;
+ condition[i-halfwindow] = mincriterion < mid & mid< maxcriterion;
+ }
+ }
+
+if(type=="advanced"){
+ for(i in (window+1):(dim(ts)[1]-window)){
+ mid = midquote[i];
+
+ vec = c(midquote[(i-halfwindow):(i-1)],midquote[(i+1):(i+halfwindow)]);
+ vec2 = midquote[(i-window):(i-1)];
+ vec3 = midquote[(i+1):(i+window)];
+
+ medianv = c(median(vec),median(vec2),median(vec3));
+ difference = abs(medianv-mid);
+ mediani = medianv[min(difference) == difference];
+ mad = mad(vec);
+
+ maxcriterion = mediani+maxi*mad;
+ mincriterion = mediani-maxi*mad;
+
+ condition[i-halfwindow] = mincriterion < mid & mid< maxcriterion;
+ }
+
+}
+
+ condition = c(rep(TRUE,halfwindow),condition,rep(TRUE,halfwindow));
+ ts[condition];
+}
+
+
+########################## JUNK #############################################################
+#conv =function(z){
+# zz = unlist(strsplit(z,","));
+# return(as.numeric(paste(zz[1],zz[2],sep=".")))
+#}
+### make prices numeric ###
+#x = as.matrix(as.vector(test2$PRICE))
+#xx = apply(x,1,conv)
+#test2$PRICE=xx
+
+##appropriate days selection:
+#create list of all trading days
+#start = unlist(strsplit(as.character(start(myxts))," "))[1];
+#end = unlist(strsplit(as.character(end(myxts))," "))[1];
+#alldays = timeSequence(from = start, to = end, by = "day");
+#alldays = alldays[isWeekday(alldays)];
Added: pkg/RTAQ/R/convert_to_RData.R
===================================================================
--- pkg/RTAQ/R/convert_to_RData.R (rev 0)
+++ pkg/RTAQ/R/convert_to_RData.R 2010-01-18 15:47:56 UTC (rev 176)
@@ -0,0 +1,125 @@
+convert = function(from,to,datasource,datadestination,trades=TRUE,quotes=TRUE,ticker,dir=F){
+ dates = timeSequence(from,to, format = "%Y-%m-%d", FinCenter = "GMT")
+ dates = dates[isBizday(dates, holidays = holidayNYSE(2004:2010))];
+
+ if(dir) {
+ dir.create(datadestination);
+ for(i in 1:length(dates)) {
+ dirname = paste(datadestination,"\\",as.character(dates[i]),sep="")
+ dir.create(dirname);
+ }
+ }
+
+ for(i in 1:length(dates)){
+ datasource = paste(datasource,"\\",dates[i],sep="");
+ datadestination = paste(datadestination,"\\",dates[i],sep="");
+ if(trades==TRUE){convert_trades(datasource,datadestination,ticker)}
+ if(trades==TRUE){convert_quotes(datasource,datadestination,ticker)}
+ }
+}
+
+
+convert_trades = function(datasource,datadestination,ticker){
+ setwd(datasource);
+ adjtime = function(z){
+ zz = unlist(strsplit(z,":"));
+ if(nchar(zz[1])!=2){
+ return(paste(paste(0,zz[1],sep=""),zz[2],zz[3],sep=":"))}
+ return(z);
+ }
+
+ for(i in 1:length(ticker)){
+ klassen = c(rep("character",4),"real","real","character",rep("numeric",2));
+ tfile_name = paste(ticker[i],"_trades.txt",sep="");
+ tdata = try(read.delim(tfile_name,sep="",header=F,dec=",",colClasses=klassen),silent=TRUE);
+ error = tdata[1]== "Error in read.table(file = file, header = header, sep = sep, quote = quote, : \n no lines available in input\n";
+
+ if(error[1])
+ {print(paste("no trades for stock",ticker[i]));
+ missingt = rbind(missingt,c(currentdate,ticker[i]));
+ }
+ if(error==FALSE){
+
+ #assign column names
+ tradecolnames=c("SYMBOL","DATE","EX","TIME","PRICE","SIZE","COND","CR","G127");
+
+ colnames(tdata)=tradecolnames
+
+ ### solve issue when there is no COND ###
+ cond=tdata$COND[is.na(tdata$G127)];
+ cr=tdata$CR[is.na(tdata$G127)];
+
+ tdata$COND[is.na(tdata$G127)]=0;
+ tdata$CR[is.na(tdata$G127)]= cond;
+ tdata$G127[is.na(tdata$G127)] = cr;
+ rm(cond,cr);
+
+ ## solve issue that time notation is inconsequent (no 09h but 9h)
+ oldtime = as.matrix(as.vector(tdata$TIME));
+ newtime = apply(oldtime,1,adjtime);
+ tdata$TIME = newtime;
+ rm(oldtime,newtime);
+
+ ##make xts object ##
+ tdobject=timeDate(paste(as.vector(tdata$DATE), as.vector(tdata$TIME)),format = "%m/%d/%Y %H:%M:%S",FinCenter = "GMT",zone="GMT");
+ tdata = xts(tdata,order.by=tdobject);
+ tdata = tdata[,c(1,3,5,6,7,8,9)];
+ rm(tdobject);
+ }
+
+ xts_name = paste(ticker[i],"_trades.RData",sep="");
+ setwd(datadestination);
+ save(tdata, file = xts_name);
+ }
+ }
+
+
+convert_quotes = function(datasource,datadestination,ticker){
+ setwd(datasource);
+ adjtime = function(z){
+ zz = unlist(strsplit(z,":"));
+ if(nchar(zz[1])!=2){
+ return(paste(paste(0,zz[1],sep=""),zz[2],zz[3],sep=":"))}
+ return(z);
+ }
+
+ for(i in 1:length(ticker)){
+ klassen = c(rep("character",4),rep("real",5));
+ qfile_name = paste(ticker[i],"_quotes.txt",sep="");
+
+ qdata=try(read.delim(qfile_name,sep="",header=F,dec=",",colClasses=klassen),silent=TRUE);
+
+ error = qdata[1]== "Error in read.table(file = file, header = header, sep = sep, quote = quote, : \n no lines available in input\n";
+ if(error[1])
+ {print(paste("no quotes for stock",ticker[i]));
+ missingq=rbind(missingq,c(currentdate,ticker[i]));
+ }
+ if((error==FALSE)[1]){
+ #assign column names
+ tradecolnames=c("SYMBOL","DATE","EX","TIME","BID","BIDSIZE","OFFER","OFFERSIZE","MODE");
+ colnames(qdata)=tradecolnames
+
+ ####important because of data mistakes,must become something like "ticker"
+ qdata = qdata[qdata$SYMBOL==ticker[i],]
+
+ ## solve issue that time notation is inconsequent (no 09h but 9h)
+ oldtime = as.matrix(as.vector(qdata$TIME));
+ newtime = apply(oldtime,1,adjtime); #check if function in this file
+ qdata$TIME = newtime;
+ rm(oldtime,newtime);
+
+ ##make xts object
+ test=paste(as.vector(qdata$DATE), as.vector(qdata$TIME))
+ tdobject=timeDate(test,format = "%m/%d/%Y %H:%M:%S",FinCenter = "GMT",zone="GMT");
+ qdata = xts(qdata,order.by=tdobject);
+ qdata = qdata[,c(1,3,5,6,7,8,9)];
+ }
+
+ xts_name = paste(ticker[i],"_quotes.RData",sep="");
+ setwd(datadestination);
+ save(qdata, file = xts_name);
+ }
+ }
+
+
+
Added: pkg/RTAQ/R/manipulation.R
===================================================================
--- pkg/RTAQ/R/manipulation.R (rev 0)
+++ pkg/RTAQ/R/manipulation.R 2010-01-18 15:47:56 UTC (rev 176)
@@ -0,0 +1,474 @@
+#MANIPULATION FUNCTIONS:
+
+TAQload = function(ticker,from,to,trades=TRUE,quotes=FALSE,datasource="V:\\Jobstudent\\TAQdata"){
+##Function to load the taq data from a certain stock
+#From&to (both included) should be in the format "%Y-%m-%d" e.g."2008-11-30"
+ dates = timeSequence(from,to, format = "%Y-%m-%d", FinCenter = "GMT")
+ dates = dates[isBizday(dates, holidays = holidayNYSE(2004:2010))];
+
+ if(trades){
+ for(i in 1:length(dates)){
+ datasourcex = paste(datasource,"\\",dates[i],sep="");
+ filename = paste(datasourcex,"\\",ticker,"_trades.RData",sep="");
+
+ ifmissingname = paste(datasourcex,"\\missing_",ticker,".RData",sep="");
+ if(file.exists(ifmissingname)){stop(paste("no trades available on ",dates[i],sep=""))}
+ if(file.exists(ifmissingname)==FALSE){
+ load(filename);
+ if(i==1){totaldata=tdata};
+ if(i>=1){totaldata=rbind(totaldata,tdata)};
+ rm(tdata);
+ }
+ }
+ }
+
+ if(quotes){
+ for(i in 1:length(dates)){
+ datasourcex = paste(datasource,"\\",dates[i],sep="");
+ filename = paste(datasourcex,"\\",ticker,"_quotes.RData",sep="");
+ ifmissingname = paste(datasourcex,"\\missingquotes_",ticker,".RData",sep="");
+
+ if(file.exists(ifmissingname)){stop(paste("no quotes available on ",dates[i],sep=""))}
+ if(file.exists(ifmissingname)==FALSE){
+ load(filename);
+ if(i==1){totaldataq=qdata};
+ if(i>=1){totaldataq=rbind(totaldataq,qdata)};
+ rm(qdata);
+ }
+ }
+ }
+
+ if(trades"es){return(list(trades = totaldata,quotes=totaldataq))}
+ if(trades==TRUE & quotes==FALSE){return(totaldata)}
+ if(trades==FALSE & quotes==TRUE){return(totaldataq)}
+ }
+
+
+matchtq = function(tdata,qdata,adjustment=2){ ##FAST VERSION
+ tt = dim(tdata)[2];
+ index(qdata) = index(qdata) + adjustment;
+
+ #merge:
+ merged = merge(tdata,qdata);
+
+ ##fill NA's:
+ merged[,((tt+1):dim(merged)[2])] = na.locf(as.zoo(merged[,((tt+1):dim(merged)[2])]), na.rm=FALSE);
+
+ #Select trades:
+ index(tdata)=as.POSIXct(index(tdata));
+ index(merged)=as.POSIXct(index(merged));
+ merged = merged[index(tdata)];
+
+ #return useful parts:
+ merged = merged[,c((1:tt),((tt+3):(dim(merged)[2])))];
+
+ ##a bit rough but otherwise opening price disappears...
+ merged = as.xts(na.locf(as.zoo(merged),fromLast=TRUE));
+
+ index(merged) = as.timeDate(index(merged));
+ return(merged)
+}
+
+matchtq_old = function(tdata,qdata,adjustment=2){ ##FAST VERSION
+ tt = dim(tdata)[2];
+ index(qdata) = index(qdata) + adjustment;
+
+ #merge:
+ counter = xts(as.character(1:dim(qdata)[1]),order.by=index(qdata))#an integer for every quote
+ merged = cbind(qdata,counter);
+ merged = merge(tdata,merged);
+
+ ##fill NA's:
+ merged[,((tt+1):dim(merged)[2])] = na.locf(as.zoo(merged[,((tt+1):dim(merged)[2])]), na.rm=FALSE);
+
+ #Select trades:
+ merged = merged[index(tdata)];
+
+ #Remove duplicated quotes:
+ merged = merged[!duplicated(merged[,dim(merged)[2]])];
+
+ #return usefull parts:
+ merged = merged[,c((1:tt),((tt+3):(dim(merged)[2]-1)))];
+
+ return(merged)
+}
+
+
+
+gettradedir = function(data){
+##Function returns a vector with the inferred trade direction:
+##NOTE: the value of the first (and second) observation should be ignored if price=midpoint for the first (second) observation.
+ bid = as.numeric(data$BID);
+ offer = as.numeric(data$OFFER);
+ midpoints = (bid + offer)/2;
+ price = as.numeric(data$PRICE);
+
+ buy1 = price > midpoints; #definitely a buy
+ equal = price == midpoints;
+ dif1 = c(TRUE,0 < price[2:length(price)]-price[1:(length(price)-1)]);#for trades=midpoints: if uptick=>buy
+ equal1 = c(TRUE,0 == price[2:length(price)]-price[1:(length(price)-1)]);#for trades=midpoints: zero-uptick=>buy
+ dif2 = c(TRUE,TRUE,0 < price[3:length(price)]-price[1:(length(price)-2)]);
+
+ buy = buy1 | (dif1 & equal) | (equal1 & dif2 & equal);
+
+ buy[buy==TRUE]=1;
+ buy[buy==FALSE]=-1;
+
+ return(buy);
+}
+
+
+es = function(data){
+#returns the effective spread as xts object
+ bid = as.numeric(data$BID);
+ offer = as.numeric(data$OFFER);
+ midpoints = (bid + offer)/2;
+ price = as.numeric(data$PRICE);
+ d = gettradedir(data);
+
+ es=xts(2*d*(price-midpoints),order.by=index(data));
+ return(es);
+}
+
+
+rs = function(data,tdata,qdata){
+###Function returns the realized spread as an xts object
+#Please note that the returned object can contain less observations that the original "data"
+#because of the need to find quotes that match the trades 5 min ahead
+
+#arguments
+#data=> xts object containing matched trades and quotes
+#tdata and qdata, the xts object containing the trades and quotes respectively
+
+ ##First part solves the problem that unequal number of obs (in data and data2) is possible when computing the RS
+ data2 = matchtq(tdata,qdata,300,maxit=50);
+ if(dim(data2)[1]>dim(data)[1]){
+ condition = as.vector(as.character(index(data2)))%in%as.vector(as.character(index(data)));
+ data2 = subset(data2,condition,select=1:(dim(data)[2]));
+ data = subset(data,as.vector(as.character(index(data)))%in%as.vector(as.character(index(data2))),select=1:(dim(data2)[2]));
+ }
+
+ if(dim(data2)[1]<dim(data)[1]){
+ condition = as.vector(as.character(index(data)))%in%as.vector(as.character(index(data2)));
+ data = subset(data,condition,select=1:(dim(data2)[2]));
+ data2 = subset(data2,as.vector(as.character(index(data2)))%in%as.vector(as.character(index(data))),select=1:(dim(data)[2]));
+ }
+
+
+ bid = as.numeric(data2$BID);
+ offer = as.numeric(data2$OFFER);
+ midpoints = (bid + offer)/2;
+ price = as.numeric(data$PRICE);
+ d = gettradedir(data);
+ rs = 2*d*(price-midpoints);
+
+ rs_xts = xts(rs,order.by=index(data));
+ return(rs_xts);
+}
+
+value_trade = function(data){
+#returns the trade value as xts object
+ price = as.numeric(data$PRICE);
+ size = as.numeric(data$SIZE);
+
+ value = xts(price*size,order.by=index(data));
+ return(value);
+}
+
+signed_value_trade = function(data){
+#returns the signed trade value as xts object
+ price = as.numeric(data$PRICE);
+ size = as.numeric(data$SIZE);
+ d = gettradedir(data);
+
+ value = xts(d*price*size,order.by=index(data));
+ return(value);
+}
+
+
+signed_trade_size = function(data){
+#returns the signed size of the trade as xts object
+ size = as.numeric(data$SIZE);
+ d = gettradedir(data);
+
+ value = xts(d*size,order.by=index(data));
+ return(value);
+}
+
+di_diff = function(data){
+#returns the depth imbalance (as a difference) as xts object
+ bidsize = as.numeric(data$BIDSIZE);
+ offersize = as.numeric(data$OFFERSIZE);
+
+ d = gettradedir(data);
+ di = (d*(offersize-bidsize))/(offersize+bidsize);
+ di_xts = xts(di,order.by=index(data));
+ return(di_xts);
+}
+
+di_div = function(data){
+#returns the depth imbalance (as a ratio) as xts object
+ bidsize = as.numeric(data$BIDSIZE);
+ offersize = as.numeric(data$OFFERSIZE);
+ d = gettradedir(data);
+
+ di = (offersize/bidsize)^d;
+ di_xts = xts(di,order.by=index(data));
+ return(di_xts);
+}
+
+pes = function(data){
+#returns the Proportional Effective Spread as xts object
+ es = es(data);
+ bid = as.numeric(data$BID);
+ offer = as.numeric(data$OFFER);
+ midpoints = (bid + offer)/2;
+
+ pes = 100*es/midpoints
+ pes_xts = xts(pes,order.by=index(data));
+ return(pes_xts);
+}
+
+prs = function(data){
+#returns the Proportional Realized Spread as xts object
+ rs = rs(data);
+ bid = as.numeric(data$BID);
+ offer = as.numeric(data$OFFER);
+ midpoints = (bid + offer)/2;
+
+ prs = 100*rs/midpoints
+ prs_xts = xts(prs,order.by=index(data));
+ return(prs_xts);
+}
+
+price_impact = function(data){
+#returns the Price impact as xts object
+ rs = rs(data);
+ es = es(data);
+
+ pi = (es-rs)/2;
+ pi_xts = xts(pi,order.by=index(data));
+ return(pi_xts);
+}
+
+prop_price_impact = function(data){
+#returns the Proportional Price impact as xts object
+ rs = rs(data);
+ es = es(data);
+ bid = as.numeric(data$BID);
+ offer = as.numeric(data$OFFER);
+ midpoints = (bid + offer)/2;
+
+ prop_pi = (100*(es-rs)/2)/midpoints;
+ prop_pi_xts = xts(prop_pi,order.by=index(data));
+ return(prop_pi_xts);
+}
+
+
+tspread = function(data){
+#returns the half traded spread as xts object
+ bid = as.numeric(data$BID);
+ offer = as.numeric(data$OFFER);
+ midpoints = (bid + offer)/2;
+ price = as.numeric(data$PRICE);
+ d = gettradedir(data);
+
+ ts = xts(d*(price-midpoints),order.by=index(data));
+ return(ts);
+}
+
+pts = function(data){
+#returns the proportional half traded spread as xts object
+ bid = as.numeric(data$BID);
+ offer = as.numeric(data$OFFER);
+ midpoints = (bid + offer)/2;
+ price = as.numeric(data$PRICE);
+ d = gettradedir(data);
+ pts = (d*(price-midpoints))/midpoints;
+
+ pts_xts = xts(pts,order.by=index(data));
+ return(ts);
+}
+
+p_return_sqr = function(data){
+#returns the squared log return on Trade prices as xts object
+ price = as.numeric(data$PRICE);
+ return = c(0,log(price[2:length(price)])-log(price[1:length(price)-1]));
+ sqr_return = return^2;
+
+ sqr_return_xts = xts(sqr_return,order.by=index(data));
+ return(sqr_return_xts);
+}
+
+p_return_abs = function(data){
+#returns the absolute log return on Trade prices as xts object
+ price = as.numeric(data$PRICE);
+ return = c(0,log(price[2:length(price)])-log(price[1:length(price)-1]));
+ abs_return = abs(return);
+
+ abs_return_xts = xts(abs_return,order.by=index(data));
+ return(sqr_return_xts);
+}
+
+qs = function(data){
+#returns the quoted spread as xts object
+ bid = as.numeric(data$BID);
+ offer = as.numeric(data$OFFER);
+ qs = offer-bid;
+
+ qs_xts = xts(qs,order.by=index(data));
+ return(qs_xts);
+}
+
+pqs = function(data){
+#returns the proportional quoted spread as xts object
+ bid = as.numeric(data$BID);
+ offer = as.numeric(data$OFFER);
+ midpoints = (bid + offer)/2;
+ qs = offer-bid;
+ pqs = 100*qs/midpoints;
+
+ pqs_xts = xts(pqs,order.by=index(data));
+ return(pqs_xts);
+}
+
+logqs = function(data){
+#returns the logarithm of the quoted spread as xts object
+ bid = as.numeric(data$BID);
+ offer = as.numeric(data$OFFER);
+ logqs = log(offer/bid);
+
+ logqs_xts = xts(logqs,order.by=index(data));
+ return(logqs_xts);
+}
+
+logsize = function(data){
+#returns the log quoted size as xts object
+ bidsize = as.numeric(data$BIDSIZE);
+ offersize = as.numeric(data$OFFERSIZE);
+ logsize = log(bidsize)+log(offersize);
+
+ logsize_xts = xts(logsize,order.by=index(data));
+ return(logsize_xts);
+}
+
+qslope = function(data){
+#returns the quoted slope as xts object
+ logsize = logsize(data);
+ qs = qs(data);
+
+ qslope = qs/logsize;
+
+ qslope_xts = xts(qslope,order.by=index(data));
+ return(qslope_xts);
+}
+
+logqslope = function(data){
+#returns the log quoted slope as xts object
+ logqs = logqs(data);
+ logsize = logsize(data);
+
+ logqslope = logqs/logsize;
+
+ logqslope_xts = xts(logqslope,order.by=index(data));
+ return(logqslope_xts);
+}
+
+
+
+mq_return_sqr = function(data){
+#returns midquote squared returns slope as xts object
+ mq_return = mq_return(data);
+
+ mq_return_sqr = mq_return^2;
+
+ mq_return_sqr_xts = xts(mq_return_sqr,order.by=index(data));
+ return(mq_return_sqr_xts);
+}
+
+mq_return_abs = function(data){
+#returns absolute midquote returns slope as xts object
+ mq_return = mq_return(data);
+
+ mq_return_abs = abs(mq_return);
+
+ mq_return_abs_xts = xts(mq_return_abs,order.by=index(data));
+ return(mq_return_abs_xts);
+}
+
+
+liquidity = function(data,tdata,qdata){
+##Function computes many liquidity measures and returns an xts object containing them
+
+##First part solves the problem that unequal number of obs (in data and data2) is possible when computing the RS
+ data2 = matchtq(tdata,qdata,300,maxit=50);
+ if(dim(data2)[1]>dim(data)[1]){
+ condition = as.vector(as.character(index(data2)))%in%as.vector(as.character(index(data)));
+ data2 = subset(data2,condition,select=1:(dim(data)[2]));
+ data = subset(data,as.vector(as.character(index(data)))%in%as.vector(as.character(index(data2))),select=1:(dim(data2)[2]));
+ }
+ if(dim(data2)[1]<dim(data)[1]){
+ condition = as.vector(as.character(index(data)))%in%as.vector(as.character(index(data2)));
+ data = subset(data,condition,select=1:(dim(data2)[2]));
+ data2 = subset(data2,as.vector(as.character(index(data2)))%in%as.vector(as.character(index(data))),select=1:(dim(data)[2]));
+ }
+
+##Variables needed for the computation of the liquidity measures
+ bid = as.numeric(data$BID);
+ offer = as.numeric(data$OFFER);
+ midpoints = (bid + offer)/2;
+ price = as.numeric(data$PRICE);
+ size = as.numeric(data$SIZE);
+ d = gettradedir(data);
+ bidsize = as.numeric(data$BIDSIZE);
+ offersize = as.numeric(data$OFFERSIZE);
+ return = c(0,log(price[2:length(price)])-log(price[1:length(price)-1]));
+ mq_return = mq_return(data);
+ midpoints2 = (as.numeric(data2$BID)+as.numeric(data2$OFFER))/2;
+
[TRUNCATED]
To get the complete diff run:
svnlook diff /svnroot/blotter -r 176
More information about the Blotter-commits
mailing list