[Blotter-commits] r176 - in pkg: . RTAQ RTAQ/R RTAQ/man

noreply at r-forge.r-project.org noreply at r-forge.r-project.org
Mon Jan 18 16:47:57 CET 2010


Author: jonathan
Date: 2010-01-18 16:47:56 +0100 (Mon, 18 Jan 2010)
New Revision: 176

Added:
   pkg/RTAQ/
   pkg/RTAQ/DESCRIPTION
   pkg/RTAQ/R/
   pkg/RTAQ/R/aggregate.R
   pkg/RTAQ/R/cleanupfunctions.R
   pkg/RTAQ/R/convert_to_RData.R
   pkg/RTAQ/R/manipulation.R
   pkg/RTAQ/R/totalcleanup.R
   pkg/RTAQ/R/volatility.R
   pkg/RTAQ/man/
   pkg/RTAQ/man/RTAQ-package.Rd
Log:
rtaq upload

Added: pkg/RTAQ/DESCRIPTION
===================================================================
--- pkg/RTAQ/DESCRIPTION	                        (rev 0)
+++ pkg/RTAQ/DESCRIPTION	2010-01-18 15:47:56 UTC (rev 176)
@@ -0,0 +1,11 @@
+Package: RTAQ
+Type: Package
+Title: RTAQ: Tools for the analysis of trades and quotes in R
+Version: 1.0
+Date: 2010-01-18
+Author: Kris Boudt, Jonathan Cornelissen
+Maintainer: Jonathan Cornelissen <Jonathan.cornelissen at econ.kuleuven.be>
+Description: The Trades and Quotes data of the New York Stock Exchange is a popular input for the implementation of intraday trading strategies, the measurement of liquidity and volatility and investigation of the market microstructure, among others. This package contains a collection of R functions to carefully clean and match the trades and quotes data, calculate ex post liquidity and volatility measures and detect price jumps in the data.
+Depends: xts, timeDate
+License: GPL
+LazyLoad: yes
\ No newline at end of file

Added: pkg/RTAQ/R/aggregate.R
===================================================================
--- pkg/RTAQ/R/aggregate.R	                        (rev 0)
+++ pkg/RTAQ/R/aggregate.R	2010-01-18 15:47:56 UTC (rev 176)
@@ -0,0 +1,150 @@
+previoustick = function(a){
+a=as.vector(a);
+b = a[length(a)];
+return(b)
+}
+
+weightedaverage = function(a){
+aa = as.vector(as.numeric(a[,1]));
+bb = as.vector(as.numeric(a[,2]));
+c = weighted.mean(aa,bb);
+return(c)
+}
+
+
+##AGGREGATION;
+aggregatets = function(ts, FUN=previoustick, on="minutes", k=1, weights=F){
+  #Valid values for the argument "on" include: “secs” (seconds), “seconds”, “mins” (minutes), “minutes”,“hours”, “days”, “weeks”.
+
+  #Without weights:
+  if(weights[1]==F){
+  ep = endpoints(ts, on, k);
+  ts2 = period.apply(ts,ep,FUN);  
+  }
+
+  #With weights:
+  if(weights[1]!=F){
+  tsb = cbind(ts,weights);
+  ep = endpoints(tsb, on, k);
+  ts2 = period.apply(tsb,ep,FUN=weightedaverage);  
+  }
+
+
+  if(on=="minutes"|on=="mins"|on=="secs"|on=="seconds"){
+  if(on=="minutes"|on=="mins"){secs = k*60;}
+  if(on=="secs"|on=="seconds"){secs = k}
+  a = .index(ts2) + (secs-.index(ts2) %% secs);
+  ts3 = .xts(ts2,a)
+  }
+
+  if(on=="hours"){
+  secs = 3600;
+  a = .index(ts2) + (secs-.index(ts2) %% secs)
+  ts3 = .xts(ts2,a);
+  }
+
+
+  if(on=="days"){
+  secs = 24*3600;
+  a = .index(ts2) + (secs-.index(ts2) %% secs) - (24*3600)
+  ts3 = .xts(ts2,a);
+  }
+
+  
+  if(on=="weeks")	{
+  secs = 24*3600*7;
+  a = (.index(ts2) + (secs-(.index(ts2) + (3L * 86400L)) %% secs))-(24*3600);
+  ts3 = .xts(ts2,a);
+				}
+
+  else {print("YOU FOOL: still have to add other time periods for this function!")}
+
+
+  #return to timeDate timestamps
+  index(ts3) = as.timeDate(index(ts3));
+
+  return(ts3);
+}
+
+#PRICE (specificity: opening price and previoustick)
+
+agg_price = function(ts,FUN = previoustick,on="minutes",k=5){
+##Return new timeseries as xts object where
+##first observation is always the opening price
+##subsequent observations are the closing prices over the interval with endpoint the timestamp of the result
+##on indicates the type of period to aggregate over
+##k indicates the number of periods to aggregate over
+  ts2 = aggregatets(ts, FUN=previoustick, on, k);
+
+  date = strsplit(as.character(index(ts))," ")[[1]][1]
+  realopen = "09:30:00";
+  a = as.timeDate(paste(date,realopen));
+  b = xts(ts[1],a);
+
+  ts3 = c(b,ts2);
+  return(ts3);
+}
+
+#VOLUME: (specificity: always sum)
+agg_volume = function(ts,FUN = sumN,on="minutes",k=5, includeopen=FALSE){
+
+  if(!includeopen){ts3 = aggregatets(ts, FUN=sumN, on, k)}
+
+  if(includeopen){
+  ts2 = aggregatets(ts, FUN=sumN, on, k);
+  date = strsplit(as.character(index(ts))," ")[[1]][1]
+  realopen = "09:30:00";
+  a = as.timeDate(paste(date,realopen));
+  b = xts(as.numeric(ts[1]),a);
+  ts3 = c(b,ts2);
+  }
+return(ts3)
+}
+
+
+###TRADES AGGREGATION:
+agg_trades = function(tdata,on="minutes",k=5){
+  ## Aggregates an entire trades xts object (tdata) over a "k"-minute interval.
+  ## Returned xts-object contains: SYMBOL,EX,PRICE,SIZE.
+  ## Variables COND, CR, G127 are dropped because aggregating them makes no sense.
+  ## NOTE: first observation (opening price) always included.
+
+  PRICE = agg_price(tdata$PRICE,on=on,k=k);
+  SIZE = agg_volume(tdata$SIZE,on=on,k=k,includeopen=TRUE);
+  EX = c(tdata$EX[1],aggregatets(tdata$EX, FUN=previoustick, on=on, k=k));
+  SYMBOL = rep(tdata$SYMBOL[1],length(PRICE));
+  all = data.frame(SYMBOL,EX,PRICE,SIZE);
+  colnames(all) =c("SYMBOL","EX","PRICE","SIZE");
+  ts = xts(all,index(SIZE));
+
+  return(ts);
+}
+
+
+###QUOTES AGGREGATION:
+agg_quotes = function(qdata,on="minutes",k=5){
+  ## Aggregates an entire quotes xts object (qdata) object over a "k"-minute interval.
+  ## Returned xts-object contains: SYMBOL,EX,BID,BIDSIZE,OFFER,OFFERSIZE.
+  ## Variable MODE is dropped because aggregation makes no sense.
+  ## "includeopen" determines whether to include the exact opening quotes.
+  
+  BID = agg_price(qdata$BID,on=on,k=k);
+  OFFER = agg_price(qdata$OFFER,on=on,k=k);
+
+  BIDSIZE = agg_volume(qdata$BIDSIZE,on=on,k=k,includeopen=TRUE);
+  OFFERSIZE = agg_volume(qdata$OFFERSIZE,on=on,k=k,includeopen=TRUE);
+
+  EX = agg_price(qdata$EX,on=on,k=k)
+  SYMBOL = rep(qdata$SYMBOL[1],length(BIDSIZE));
+
+  all = data.frame(SYMBOL,EX,BID,BIDSIZE,OFFER,OFFERSIZE);
+  colnames(all) =c("SYMBOL","EX","BID","BIDSIZE","OFFER","OFFERSIZE");
+
+  ts = xts(all,index(BIDSIZE));
+
+  return(ts);
+}
+
+##LIQUIDITY AGGREGATION:
+##Just combine aggregation functions and spot liquidity functions!
+

Added: pkg/RTAQ/R/cleanupfunctions.R
===================================================================
--- pkg/RTAQ/R/cleanupfunctions.R	                        (rev 0)
+++ pkg/RTAQ/R/cleanupfunctions.R	2010-01-18 15:47:56 UTC (rev 176)
@@ -0,0 +1,325 @@
+##### Help functions
+## help function to make all time notation consistent
+adjtime = function(z){ 
+  zz = unlist(strsplit(z,":")); 
+  if(nchar(zz[1])!=2){
+  return(paste(paste(0,zz[1],sep=""),zz[2],zz[3],sep=":"))}
+  return(z);
+  }
+
+########## DATA CLEAN-UP: FOR ALL DATA #####################
+
+####FUNCTION TO FILTER EXCHANGE HOURS ONLY: ExchangeHoursOnly
+ExchangeHoursOnly = function(ts, daybegin = "09:30:00",dayend="16:00:00")
+{
+    # a function to excerpt data within exchange trading hours
+    # daybegin and dayend: two characters in the format of "HH:MM:SS",
+    #                specifying the starting hour and minute and sec of an exhange
+    #               trading day and the closing hour and minute and sec
+    #                   of the trading day repectively
+        
+    if(!is(ts, "xts"))
+        stop("ts must be an xts object")
+
+  gettime = function(z){unlist(strsplit(as.character(z)," "))[2]};
+  times1 = as.matrix(as.vector(as.character(index(ts))));
+  times = apply(times1,1,gettime); 
+  tdtimes = timeDate(times,format = "%H:%M:%S",FinCenter = "GMT",zone="GMT");
+
+  #create timeDate begin and end
+  tddaybegin = timeDate(daybegin,format = "%H:%M:%S",FinCenter = "GMT",zone="GMT");
+  tddayend = timeDate(dayend,format = "%H:%M:%S",FinCenter = "GMT",zone="GMT");
+
+  #select correct observations
+  filteredts = ts[tdtimes>=tddaybegin & tdtimes<=tddayend];
+  return(filteredts);
+}
+
+
+nozeroprices = function(ts){
+####FUNCTION TO DELETE ZERO PRICES: nozeroprices
+filteredts = ts[as.numeric(ts$PRICE)!= 0];
+return(filteredts);
+}
+
+
+selectexchange = function(ts,exch="N"){ 
+###FUNCTION TO SELECT THE OBSERVATIONS OF A SINGLE EXCHANGE: selectexchange
+filteredts = ts[ts$EX==exch];
+return(filteredts);
+}
+
+
+autoselectexchange = function(ts){
+## AUTOSELECT EXCHANGE WITH HIGHEST NUMBER OF SHARES TRADED (for trades) ON:
+#function returns ts with obs of only 1 exchange
+#searches exchange with a maximum on the variable "SIZE"
+  nobs=c();
+
+  exchanges = c("Q","A","P","B","C","N","D","X","I","M","W","Z");
+  exchangenames = c("NASDAQ","AMEX","ARCA","Boston","NSX","NYSE","NASD ADF and TRF","Philadelphia","ISE","Chicago","CBOE","BATS");
+  
+
+  z1 = sum(as.numeric(selectexchange(ts,"Q")$SIZE));
+  z2 = sum(as.numeric(selectexchange(ts,"T")$SIZE));
+  z = max(z1,z2);
+  watchout = z == z2;
+  nobs = cbind(nobs,z);
+
+  for(i in 2:length(exchanges)) {
+  z = sum(as.numeric(selectexchange(ts,exchanges[i])$SIZE));
+  nobs = cbind(nobs,z); 
+                        }
+
+  exch = exchanges[max(nobs)==nobs];
+
+  as.character(ts$EX[1]) == exchanges;
+  namechosen = exchangenames[exch==exchanges];
+  print(paste("The information of the",namechosen,"exchange was collected"));
+  
+  if(exch=="Q"&watchout){exch="T"}
+  filteredts = ts[ts$EX==exch];
+}
+
+
+##### TRADE DATA SPECIFIC FUNCTIONS: ###################################
+salescond = function(ts){ 
+###DELETE ENTRIES WITH AN ABONORMAL SALES CONDITION
+filteredts = ts[ts$COND == "0"|ts$COND == "E"|ts$COND == "F"];
+return(filteredts);
+}
+
+##Merge same timestamp:
+sumN = function(a){
+  a = sum(as.numeric(a));
+  return(a)
+}
+
+medianN = function(a){
+  a = median(as.numeric(a));
+  return(a)
+}
+
+maxvol = function(a){
+  p = as.numeric(a[,1]);
+  s = as.numeric(a[,2]);
+
+  b = median(p[s == max(s)]);
+  return(b);
+}
+
+waverage = function(a){
+  p = as.numeric(a[,1]);
+  s = as.numeric(a[,2]);
+
+  b = sum(p*s/sum(s));
+  return(b);
+}
+
+mergesametimestamp = function(ts,selection="median"){
+  #find end points:
+  ep = endpoints(ts,"secs");
+
+  #size per second:
+  size = period.apply(ts$SIZE,ep,sumN);
+
+  #price per second:
+  if(selection=="median"){price = period.apply(ts$PRICE,ep,medianN)}
+  if(selection=="maxvolume"){price = period.apply(cbind(ts$PRICE,ts$SIZE),ep,maxvol)}
+  if(selection=="weightedaverage"){price = period.apply(cbind(ts$PRICE,ts$SIZE),ep,waverage)}
+
+  ##merge everything:
+  selection = ep[2:length(ep)];
+  ts2 = ts[selection];
+  ts2$PRICE = price;
+  ts2$SIZE = size;
+
+return(ts2)
+}
+
+rmtradeoutliers = function(tdata,qdata){
+##Function to delete entries with prices that are above the ask plus the bid-ask
+##spread. Similar for entries with prices below the bid minus the bid-ask
+##spread.
+  data = matchtq(tdata,qdata);
+  price = as.numeric(data$PRICE);
+  bid = as.numeric(data$BID);
+  offer = as.numeric(data$OFFER);
+  spread = offer - bid;
+
+  upper = offer+spread;
+  lower = bid-spread;
+
+  tdata[(price<upper) & (price>lower)];
+  return(tdata);
+}
+
+
+#################       QUOTE SPECIFIC FUNCTIONS:       #################
+
+nozeroquotes = function(ts){
+####FUNCTION TO DELETE ZERO QUOTES: nozeroquotes
+filteredts = ts[as.numeric(ts$BID)!= 0& as.numeric(ts$OFFER)!= 0];
+return(filteredts);
+}
+
+
+autoselectexchangeq = function(ts){
+####Autoselect exchange with highest value for (bidsize+offersize)
+  nobs=c();
+  exchanges = c("Q","A","P","B","C","N","D","X","I","M","W","Z");
+  exchangenames = c("NASDAQ","AMEX","ARCA","Boston","NSX","NYSE","NASD ADF and TRF","Philadelphia","ISE","Chicago","CBOE","BATS");
+
+  selected1 = selectexchange(ts,"Q");
+  selected2 = selectexchange(ts,"T");
+  z1 = sum(as.numeric(selected1$BIDSIZE)+as.numeric(selected1$OFFERSIZE));
+  z2 = sum(as.numeric(selected2$BIDSIZE)+as.numeric(selected2$OFFERSIZE));
+  z = max(z1,z2);
+  watchout = z == z2;
+  nobs = cbind(nobs,z);
+
+  for(i in 2:length(exchanges)) {
+  selected = selectexchange(ts,exchanges[i]);
+  z = sum(as.numeric(selected$BIDSIZE)+as.numeric(selected$OFFERSIZE));
+  nobs = cbind(nobs,z); 
+                        }
+
+  exch=exchanges[max(nobs)==nobs];
+
+  namechosen = exchangenames[exch==exchanges];  
+  print(paste("The information of the",namechosen,"exchange was collected"));
+
+  if(exch=="Q"&watchout){exch="T"}
+
+  filteredts = ts[ts$EX==exch];
+  return(filteredts);
+}
+
+
+mergequotessametimestamp = function(ts,selection="median"){  ##FAST
+  condition=selection=="median"|selection=="maxvolume"|selection=="weightedaverage";
+  if(!condition){print(paste("WARNING:The result will be corrupted. Check whether",selection,"is an existing option for the attribute selection."))}
+
+  #find end points:
+  ep = endpoints(ts,"secs");
+
+  #size per second:
+  bidsize = period.apply(ts$BIDSIZE,ep,sumN);
+  offersize =  period.apply(ts$OFFERSIZE,ep,sumN);
+
+  #median per second:
+  if(selection=="median"){
+  bid = period.apply(ts$BID,ep,medianN);
+  offer = period.apply(ts$OFFER,ep,medianN);
+  }
+
+  #maxvolume per second:
+  if(selection=="maxvolume"){
+  bid = period.apply(cbind(ts$BID,ts$BIDSIZE),ep,maxvol);
+  offer = period.apply(cbind(ts$OFFER,ts$OFFERSIZE),ep,maxvol);
+  }
+
+  if(selection=="weightedaverage"){
+  bid = period.apply(cbind(ts$BID,ts$BIDSIZE),ep,waverage);
+  offer = period.apply(cbind(ts$OFFER,ts$OFFERSIZE),ep,waverage);
+  }
+
+  ##merge everything:
+  selection = ep[2:length(ep)];
+  ts2 = ts[selection];
+  ts2$BID = bid;
+  ts2$OFFER = offer;
+
+  ts2$BIDSIZE = bidsize;
+  ts2$OFFERSIZE = offersize;
+
+return(ts2)
+}
+
+
+rmnegspread = function(ts){
+##function to remove observations with negative spread
+  condition = as.numeric(ts$OFFER)>as.numeric(ts$BID);
+  ts[condition];
+}
+
+
+rmlargespread = function(ts,maxi=50){
+##function to remove observations with a spread larger than 50 times the median spread that day
+###WATCH OUT: works only correct if supplied input data consists of 1 day...
+  spread = as.numeric(ts$OFFER)-as.numeric(ts$BID);
+  condition = ((maxi*median(spread))>spread);
+  return(ts[condition])
+}
+
+
+rmoutliers = function(ts,maxi=10,window=50,type="advanced"){
+##function to remove entries for which the mid-quote deviated by more than 10 median absolute deviations 
+##from a rolling centered median (excluding the observation under consideration) of 50 observations if type = "standard".
+
+##if type="advanced":
+##function removes entries for which the mid-quote deviates by more than 10 median absolute deviations
+##from the variable "mediani".
+##mediani is defined as the value closest to the midquote of these three options:
+##1. Rolling centered median (excluding the observation under consideration)
+##2. Rolling median of the following "window" observations
+##3. Rolling median of the previous "window" observations
+
+##NOTE: Median Absolute deviation chosen contrary to Barndorff-Nielsen et al.
+  print("NOTE: This function is only useful for quotes NOT for trades");
+  condition = c();
+  halfwindow = round(window/2);
+  midquote = (as.numeric(ts$BID)+as.numeric(ts$OFFER))/2;
+  if(type=="standard"){
+  for(i in (halfwindow+1):(dim(ts)[1]-halfwindow)){
+    mid = midquote[i];
+    vec = c(midquote[(i-halfwindow):(i-1)],midquote[(i+1):(i+halfwindow)]);
+    mad = mad(vec);
+    maxcriterion = median(vec)+maxi*mad;    
+    mincriterion = median(vec)-maxi*mad;  
+    condition[i-halfwindow] = mincriterion < mid & mid< maxcriterion;
+  }
+  }
+
+if(type=="advanced"){
+  for(i in (window+1):(dim(ts)[1]-window)){
+    mid = midquote[i];
+
+    vec = c(midquote[(i-halfwindow):(i-1)],midquote[(i+1):(i+halfwindow)]);
+    vec2 = midquote[(i-window):(i-1)];
+    vec3 = midquote[(i+1):(i+window)];
+
+    medianv = c(median(vec),median(vec2),median(vec3));
+    difference = abs(medianv-mid);
+    mediani = medianv[min(difference) == difference];   
+    mad = mad(vec);
+    
+    maxcriterion = mediani+maxi*mad;    
+    mincriterion = mediani-maxi*mad;  
+          
+    condition[i-halfwindow] = mincriterion < mid & mid< maxcriterion;
+  }
+
+}
+
+  condition = c(rep(TRUE,halfwindow),condition,rep(TRUE,halfwindow));
+  ts[condition];
+}
+
+
+##########################  JUNK  #############################################################
+#conv =function(z){ 
+#  zz = unlist(strsplit(z,",")); 
+#  return(as.numeric(paste(zz[1],zz[2],sep=".")))
+#}
+### make prices numeric ###
+#x = as.matrix(as.vector(test2$PRICE))
+#xx = apply(x,1,conv)
+#test2$PRICE=xx
+
+##appropriate days selection:
+#create list of all trading days
+#start = unlist(strsplit(as.character(start(myxts))," "))[1];
+#end = unlist(strsplit(as.character(end(myxts))," "))[1];
+#alldays = timeSequence(from = start, to = end, by = "day");
+#alldays = alldays[isWeekday(alldays)];

Added: pkg/RTAQ/R/convert_to_RData.R
===================================================================
--- pkg/RTAQ/R/convert_to_RData.R	                        (rev 0)
+++ pkg/RTAQ/R/convert_to_RData.R	2010-01-18 15:47:56 UTC (rev 176)
@@ -0,0 +1,125 @@
+convert = function(from,to,datasource,datadestination,trades=TRUE,quotes=TRUE,ticker,dir=F){
+  dates = timeSequence(from,to, format = "%Y-%m-%d", FinCenter = "GMT")
+  dates = dates[isBizday(dates, holidays = holidayNYSE(2004:2010))];
+
+  if(dir)	{
+  dir.create(datadestination);
+  for(i in 1:length(dates))	{
+  dirname = paste(datadestination,"\\",as.character(dates[i]),sep="")
+  dir.create(dirname);
+ 					}			
+		}
+  
+  for(i in 1:length(dates)){
+  datasource = paste(datasource,"\\",dates[i],sep="");
+  datadestination = paste(datadestination,"\\",dates[i],sep="");
+  if(trades==TRUE){convert_trades(datasource,datadestination,ticker)}
+  if(trades==TRUE){convert_quotes(datasource,datadestination,ticker)}
+  }
+}
+
+
+convert_trades = function(datasource,datadestination,ticker){
+  setwd(datasource);
+  adjtime = function(z){ 
+  zz = unlist(strsplit(z,":")); 
+  if(nchar(zz[1])!=2){
+  return(paste(paste(0,zz[1],sep=""),zz[2],zz[3],sep=":"))}
+  return(z);
+  }
+
+  for(i in 1:length(ticker)){
+  klassen = c(rep("character",4),"real","real","character",rep("numeric",2));
+  tfile_name = paste(ticker[i],"_trades.txt",sep="");
+  tdata = try(read.delim(tfile_name,sep="",header=F,dec=",",colClasses=klassen),silent=TRUE);
+  error = tdata[1]== "Error in read.table(file = file, header = header, sep = sep, quote = quote,  : \n  no lines available in input\n";
+
+  if(error[1])
+  {print(paste("no trades for stock",ticker[i]));
+  missingt = rbind(missingt,c(currentdate,ticker[i]));
+  }
+  if(error==FALSE){
+
+  #assign column names
+  tradecolnames=c("SYMBOL","DATE","EX","TIME","PRICE","SIZE","COND","CR","G127");
+
+  colnames(tdata)=tradecolnames
+
+  ### solve issue when there is no COND ###
+  cond=tdata$COND[is.na(tdata$G127)];
+  cr=tdata$CR[is.na(tdata$G127)];
+
+  tdata$COND[is.na(tdata$G127)]=0;
+  tdata$CR[is.na(tdata$G127)]= cond;
+  tdata$G127[is.na(tdata$G127)] = cr;
+  rm(cond,cr);
+
+  ## solve issue that time notation is inconsequent (no 09h but 9h)
+  oldtime = as.matrix(as.vector(tdata$TIME));
+  newtime = apply(oldtime,1,adjtime);
+  tdata$TIME = newtime;
+  rm(oldtime,newtime);
+
+  ##make xts object ##
+  tdobject=timeDate(paste(as.vector(tdata$DATE), as.vector(tdata$TIME)),format = "%m/%d/%Y %H:%M:%S",FinCenter = "GMT",zone="GMT");
+  tdata = xts(tdata,order.by=tdobject);
+  tdata = tdata[,c(1,3,5,6,7,8,9)];
+  rm(tdobject);
+  }
+
+  xts_name = paste(ticker[i],"_trades.RData",sep="");
+  setwd(datadestination);
+  save(tdata, file = xts_name);
+  }
+  }
+
+
+convert_quotes = function(datasource,datadestination,ticker){
+  setwd(datasource);
+  adjtime = function(z){ 
+  zz = unlist(strsplit(z,":")); 
+  if(nchar(zz[1])!=2){
+  return(paste(paste(0,zz[1],sep=""),zz[2],zz[3],sep=":"))}
+  return(z);
+  }
+
+  for(i in 1:length(ticker)){
+  klassen = c(rep("character",4),rep("real",5));
+  qfile_name = paste(ticker[i],"_quotes.txt",sep="");
+
+  qdata=try(read.delim(qfile_name,sep="",header=F,dec=",",colClasses=klassen),silent=TRUE);
+
+  error = qdata[1]== "Error in read.table(file = file, header = header, sep = sep, quote = quote,  : \n  no lines available in input\n";
+  if(error[1])
+  {print(paste("no quotes for stock",ticker[i])); 
+  missingq=rbind(missingq,c(currentdate,ticker[i]));
+  }
+  if((error==FALSE)[1]){
+  #assign column names
+  tradecolnames=c("SYMBOL","DATE","EX","TIME","BID","BIDSIZE","OFFER","OFFERSIZE","MODE");
+  colnames(qdata)=tradecolnames
+
+  ####important because of data mistakes,must become something like "ticker"
+  qdata = qdata[qdata$SYMBOL==ticker[i],]
+
+  ## solve issue that time notation is inconsequent (no 09h but 9h)
+  oldtime = as.matrix(as.vector(qdata$TIME));
+  newtime = apply(oldtime,1,adjtime); #check if function in this file
+  qdata$TIME = newtime;
+  rm(oldtime,newtime);
+
+  ##make xts object 
+  test=paste(as.vector(qdata$DATE), as.vector(qdata$TIME))
+  tdobject=timeDate(test,format = "%m/%d/%Y %H:%M:%S",FinCenter = "GMT",zone="GMT");
+  qdata = xts(qdata,order.by=tdobject);
+  qdata = qdata[,c(1,3,5,6,7,8,9)];
+  }
+
+  xts_name = paste(ticker[i],"_quotes.RData",sep="");
+  setwd(datadestination);
+  save(qdata, file = xts_name);
+  }
+  }
+
+
+

Added: pkg/RTAQ/R/manipulation.R
===================================================================
--- pkg/RTAQ/R/manipulation.R	                        (rev 0)
+++ pkg/RTAQ/R/manipulation.R	2010-01-18 15:47:56 UTC (rev 176)
@@ -0,0 +1,474 @@
+#MANIPULATION FUNCTIONS:
+
+TAQload = function(ticker,from,to,trades=TRUE,quotes=FALSE,datasource="V:\\Jobstudent\\TAQdata"){
+##Function to load the taq data from a certain stock 
+#From&to (both included) should be in the format "%Y-%m-%d" e.g."2008-11-30"
+  dates = timeSequence(from,to, format = "%Y-%m-%d", FinCenter = "GMT")
+  dates = dates[isBizday(dates, holidays = holidayNYSE(2004:2010))];
+
+  if(trades){
+  for(i in 1:length(dates)){
+  datasourcex = paste(datasource,"\\",dates[i],sep="");
+  filename = paste(datasourcex,"\\",ticker,"_trades.RData",sep="");
+
+  ifmissingname = paste(datasourcex,"\\missing_",ticker,".RData",sep="");  
+  if(file.exists(ifmissingname)){stop(paste("no trades available on ",dates[i],sep=""))}
+  if(file.exists(ifmissingname)==FALSE){
+  load(filename);
+  if(i==1){totaldata=tdata};
+  if(i>=1){totaldata=rbind(totaldata,tdata)};
+  rm(tdata);
+				}
+				}
+				}
+
+  if(quotes){
+  for(i in 1:length(dates)){
+  datasourcex = paste(datasource,"\\",dates[i],sep="");
+  filename = paste(datasourcex,"\\",ticker,"_quotes.RData",sep="");
+  ifmissingname = paste(datasourcex,"\\missingquotes_",ticker,".RData",sep="");
+  
+  if(file.exists(ifmissingname)){stop(paste("no quotes available on ",dates[i],sep=""))}
+  if(file.exists(ifmissingname)==FALSE){
+  load(filename);
+  if(i==1){totaldataq=qdata};
+  if(i>=1){totaldataq=rbind(totaldataq,qdata)};
+  rm(qdata);
+				}
+				}
+				}
+
+  if(trades&quotes){return(list(trades = totaldata,quotes=totaldataq))}
+  if(trades==TRUE & quotes==FALSE){return(totaldata)}
+  if(trades==FALSE & quotes==TRUE){return(totaldataq)}
+  }
+
+
+matchtq = function(tdata,qdata,adjustment=2){ ##FAST VERSION
+  tt = dim(tdata)[2];  
+  index(qdata) = index(qdata) + adjustment;
+
+  #merge:
+  merged = merge(tdata,qdata);
+
+  ##fill NA's:
+  merged[,((tt+1):dim(merged)[2])] = na.locf(as.zoo(merged[,((tt+1):dim(merged)[2])]), na.rm=FALSE);
+
+  #Select trades:
+  index(tdata)=as.POSIXct(index(tdata));
+  index(merged)=as.POSIXct(index(merged));  
+  merged = merged[index(tdata)];
+
+  #return useful parts:
+  merged = merged[,c((1:tt),((tt+3):(dim(merged)[2])))];
+
+  ##a bit rough but otherwise opening price disappears...
+  merged = as.xts(na.locf(as.zoo(merged),fromLast=TRUE));
+
+  index(merged) = as.timeDate(index(merged));
+  return(merged)
+}
+
+matchtq_old = function(tdata,qdata,adjustment=2){ ##FAST VERSION
+  tt = dim(tdata)[2];  
+  index(qdata) = index(qdata) + adjustment;
+  
+  #merge:
+  counter = xts(as.character(1:dim(qdata)[1]),order.by=index(qdata))#an integer for every quote
+  merged = cbind(qdata,counter);
+  merged = merge(tdata,merged);
+  
+  ##fill NA's:
+  merged[,((tt+1):dim(merged)[2])] = na.locf(as.zoo(merged[,((tt+1):dim(merged)[2])]), na.rm=FALSE);
+  
+  #Select trades:
+  merged = merged[index(tdata)];
+  
+  #Remove duplicated quotes:
+  merged = merged[!duplicated(merged[,dim(merged)[2]])];
+
+  #return usefull parts:
+  merged = merged[,c((1:tt),((tt+3):(dim(merged)[2]-1)))];
+
+  return(merged)
+}
+
+
+
+gettradedir = function(data){
+##Function returns a vector with the inferred trade direction:
+##NOTE: the value of the first (and second) observation should be ignored if price=midpoint for the first (second) observation.
+  bid = as.numeric(data$BID);
+  offer = as.numeric(data$OFFER);
+  midpoints = (bid + offer)/2;
+  price = as.numeric(data$PRICE);
+ 
+  buy1 = price > midpoints; #definitely a buy
+  equal = price == midpoints;
+  dif1 = c(TRUE,0 < price[2:length(price)]-price[1:(length(price)-1)]);#for trades=midpoints: if uptick=>buy
+  equal1 = c(TRUE,0 == price[2:length(price)]-price[1:(length(price)-1)]);#for trades=midpoints: zero-uptick=>buy
+  dif2 = c(TRUE,TRUE,0 < price[3:length(price)]-price[1:(length(price)-2)]);
+
+  buy = buy1 | (dif1 & equal) | (equal1 & dif2 & equal);
+
+  buy[buy==TRUE]=1;
+  buy[buy==FALSE]=-1;
+  
+  return(buy);
+}
+
+
+es = function(data){
+#returns the effective spread as xts object
+  bid = as.numeric(data$BID);
+  offer = as.numeric(data$OFFER);
+  midpoints = (bid + offer)/2;
+  price = as.numeric(data$PRICE);
+  d = gettradedir(data);
+ 
+  es=xts(2*d*(price-midpoints),order.by=index(data));
+  return(es);
+}
+
+
+rs = function(data,tdata,qdata){
+###Function returns the realized spread as an xts object
+#Please note that the returned object can contain less observations that the original "data"
+#because of the need to find quotes that match the trades 5 min ahead
+
+#arguments
+#data=> xts object containing matched trades and quotes
+#tdata and qdata, the xts object containing the trades and quotes respectively
+
+  ##First part solves the problem that unequal number of obs (in data and data2) is possible when computing the RS
+  data2 = matchtq(tdata,qdata,300,maxit=50);
+  if(dim(data2)[1]>dim(data)[1]){
+  condition = as.vector(as.character(index(data2)))%in%as.vector(as.character(index(data)));
+  data2 = subset(data2,condition,select=1:(dim(data)[2]));
+  data = subset(data,as.vector(as.character(index(data)))%in%as.vector(as.character(index(data2))),select=1:(dim(data2)[2]));
+  }
+
+  if(dim(data2)[1]<dim(data)[1]){
+  condition = as.vector(as.character(index(data)))%in%as.vector(as.character(index(data2)));
+  data = subset(data,condition,select=1:(dim(data2)[2]));
+  data2 = subset(data2,as.vector(as.character(index(data2)))%in%as.vector(as.character(index(data))),select=1:(dim(data)[2]));
+  }
+
+
+  bid = as.numeric(data2$BID);
+  offer = as.numeric(data2$OFFER);
+  midpoints = (bid + offer)/2;
+  price = as.numeric(data$PRICE);
+  d = gettradedir(data);
+  rs = 2*d*(price-midpoints);
+
+  rs_xts = xts(rs,order.by=index(data));
+  return(rs_xts);
+}
+
+value_trade = function(data){
+#returns the trade value as xts object
+  price = as.numeric(data$PRICE);
+  size = as.numeric(data$SIZE);
+  
+  value = xts(price*size,order.by=index(data));
+  return(value);
+}
+
+signed_value_trade = function(data){
+#returns the signed trade value as xts object
+  price = as.numeric(data$PRICE);
+  size = as.numeric(data$SIZE);
+  d = gettradedir(data);
+
+  value = xts(d*price*size,order.by=index(data));
+  return(value);
+}
+
+
+signed_trade_size = function(data){
+#returns the signed size of the trade as xts object
+  size = as.numeric(data$SIZE);
+  d = gettradedir(data);
+
+  value = xts(d*size,order.by=index(data));
+  return(value);
+}
+
+di_diff = function(data){
+#returns the depth imbalance (as a difference) as xts object
+  bidsize = as.numeric(data$BIDSIZE);
+  offersize = as.numeric(data$OFFERSIZE);
+
+  d = gettradedir(data);
+  di = (d*(offersize-bidsize))/(offersize+bidsize);
+  di_xts = xts(di,order.by=index(data));
+  return(di_xts);
+}
+
+di_div = function(data){
+#returns the depth imbalance (as a ratio) as xts object
+  bidsize = as.numeric(data$BIDSIZE);
+  offersize = as.numeric(data$OFFERSIZE);
+  d = gettradedir(data);
+
+  di = (offersize/bidsize)^d;
+  di_xts = xts(di,order.by=index(data));
+  return(di_xts);
+}
+
+pes = function(data){
+#returns the Proportional Effective Spread as xts object
+  es = es(data);
+  bid = as.numeric(data$BID);
+  offer = as.numeric(data$OFFER);
+  midpoints = (bid + offer)/2;
+
+  pes = 100*es/midpoints
+  pes_xts = xts(pes,order.by=index(data));
+  return(pes_xts);
+}
+
+prs = function(data){
+#returns the Proportional Realized Spread as xts object
+  rs = rs(data);
+  bid = as.numeric(data$BID);
+  offer = as.numeric(data$OFFER);
+  midpoints = (bid + offer)/2;
+
+  prs = 100*rs/midpoints
+  prs_xts = xts(prs,order.by=index(data));
+  return(prs_xts);
+}
+
+price_impact = function(data){
+#returns the Price impact as xts object
+  rs = rs(data);
+  es = es(data);
+
+  pi = (es-rs)/2;
+  pi_xts = xts(pi,order.by=index(data));
+  return(pi_xts);
+}
+
+prop_price_impact = function(data){
+#returns the Proportional Price impact as xts object
+  rs = rs(data);
+  es = es(data);
+  bid = as.numeric(data$BID);
+  offer = as.numeric(data$OFFER);
+  midpoints = (bid + offer)/2;
+
+  prop_pi = (100*(es-rs)/2)/midpoints;
+  prop_pi_xts = xts(prop_pi,order.by=index(data));
+  return(prop_pi_xts);
+}
+
+
+tspread = function(data){
+#returns the half traded spread as xts object
+  bid = as.numeric(data$BID);
+  offer = as.numeric(data$OFFER);
+  midpoints = (bid + offer)/2;
+  price = as.numeric(data$PRICE);
+  d = gettradedir(data);
+
+  ts = xts(d*(price-midpoints),order.by=index(data));
+  return(ts);
+}
+
+pts = function(data){
+#returns the proportional half traded spread as xts object
+  bid = as.numeric(data$BID);
+  offer = as.numeric(data$OFFER);
+  midpoints = (bid + offer)/2;
+  price = as.numeric(data$PRICE);
+  d = gettradedir(data);
+  pts = (d*(price-midpoints))/midpoints;
+
+  pts_xts = xts(pts,order.by=index(data));
+  return(ts);
+}
+
+p_return_sqr = function(data){
+#returns the squared log return on Trade prices as xts object
+  price = as.numeric(data$PRICE);
+  return = c(0,log(price[2:length(price)])-log(price[1:length(price)-1]));
+  sqr_return = return^2;
+
+  sqr_return_xts = xts(sqr_return,order.by=index(data));
+  return(sqr_return_xts);
+}
+
+p_return_abs = function(data){
+#returns the absolute log return on Trade prices as xts object
+  price = as.numeric(data$PRICE);
+  return = c(0,log(price[2:length(price)])-log(price[1:length(price)-1]));
+  abs_return = abs(return);
+
+  abs_return_xts = xts(abs_return,order.by=index(data));
+  return(sqr_return_xts);
+}
+
+qs = function(data){
+#returns the quoted spread as xts object
+  bid = as.numeric(data$BID);
+  offer = as.numeric(data$OFFER);
+  qs = offer-bid;
+
+  qs_xts = xts(qs,order.by=index(data));
+  return(qs_xts);
+}
+
+pqs = function(data){
+#returns the proportional quoted spread as xts object
+  bid = as.numeric(data$BID);
+  offer = as.numeric(data$OFFER);
+  midpoints = (bid + offer)/2;
+  qs = offer-bid;
+  pqs = 100*qs/midpoints;
+
+  pqs_xts = xts(pqs,order.by=index(data));
+  return(pqs_xts);
+}
+
+logqs = function(data){
+#returns the logarithm of the quoted spread as xts object
+  bid = as.numeric(data$BID);
+  offer = as.numeric(data$OFFER);
+  logqs = log(offer/bid);
+
+  logqs_xts = xts(logqs,order.by=index(data));
+  return(logqs_xts);
+}
+
+logsize = function(data){
+#returns the log quoted size as xts object
+  bidsize = as.numeric(data$BIDSIZE);
+  offersize = as.numeric(data$OFFERSIZE);
+  logsize = log(bidsize)+log(offersize);
+
+  logsize_xts = xts(logsize,order.by=index(data));
+  return(logsize_xts);
+}
+
+qslope = function(data){
+#returns the quoted slope as xts object
+  logsize = logsize(data);
+  qs = qs(data);
+
+  qslope = qs/logsize;
+
+  qslope_xts = xts(qslope,order.by=index(data));
+  return(qslope_xts);
+}
+
+logqslope = function(data){
+#returns the log quoted slope as xts object
+  logqs = logqs(data);
+  logsize = logsize(data);
+  
+  logqslope = logqs/logsize;
+
+  logqslope_xts = xts(logqslope,order.by=index(data));
+  return(logqslope_xts);
+}
+
+
+
+mq_return_sqr = function(data){
+#returns midquote squared returns slope as xts object
+  mq_return = mq_return(data);
+  
+  mq_return_sqr = mq_return^2;
+
+  mq_return_sqr_xts = xts(mq_return_sqr,order.by=index(data));
+  return(mq_return_sqr_xts);
+}
+
+mq_return_abs = function(data){
+#returns absolute midquote returns slope as xts object
+  mq_return = mq_return(data);
+  
+  mq_return_abs = abs(mq_return);
+
+  mq_return_abs_xts = xts(mq_return_abs,order.by=index(data));
+  return(mq_return_abs_xts);
+}
+
+
+liquidity = function(data,tdata,qdata){
+##Function computes many liquidity measures and returns an xts object containing them
+
+##First part solves the problem that unequal number of obs (in data and data2) is possible when computing the RS
+  data2 = matchtq(tdata,qdata,300,maxit=50);
+  if(dim(data2)[1]>dim(data)[1]){
+  condition = as.vector(as.character(index(data2)))%in%as.vector(as.character(index(data)));
+  data2 = subset(data2,condition,select=1:(dim(data)[2]));
+  data = subset(data,as.vector(as.character(index(data)))%in%as.vector(as.character(index(data2))),select=1:(dim(data2)[2]));
+  }
+  if(dim(data2)[1]<dim(data)[1]){
+  condition = as.vector(as.character(index(data)))%in%as.vector(as.character(index(data2)));
+  data = subset(data,condition,select=1:(dim(data2)[2]));
+  data2 = subset(data2,as.vector(as.character(index(data2)))%in%as.vector(as.character(index(data))),select=1:(dim(data)[2]));
+  }
+
+##Variables needed for the computation of the liquidity measures
+  bid = as.numeric(data$BID);
+  offer = as.numeric(data$OFFER);
+  midpoints = (bid + offer)/2;
+  price = as.numeric(data$PRICE);
+  size = as.numeric(data$SIZE);
+  d = gettradedir(data);
+  bidsize = as.numeric(data$BIDSIZE);
+  offersize = as.numeric(data$OFFERSIZE);
+  return = c(0,log(price[2:length(price)])-log(price[1:length(price)-1]));
+  mq_return = mq_return(data);
+  midpoints2 = (as.numeric(data2$BID)+as.numeric(data2$OFFER))/2;
+
[TRUNCATED]

To get the complete diff run:
    svnlook diff /svnroot/blotter -r 176


More information about the Blotter-commits mailing list