[Blotter-commits] r461 - in pkg/RTAQ: . R man src

noreply at r-forge.r-project.org noreply at r-forge.r-project.org
Mon Nov 22 13:04:36 CET 2010


Author: jonathan
Date: 2010-11-22 13:04:35 +0100 (Mon, 22 Nov 2010)
New Revision: 461

Added:
   pkg/RTAQ/NAMESPACE
   pkg/RTAQ/R/newfunctions_names.R
   pkg/RTAQ/R/realized.R
   pkg/RTAQ/R/zzz.R
   pkg/RTAQ/man/RTSCov.rd
   pkg/RTAQ/man/TSCov.rd
   pkg/RTAQ/man/aggregatePrice.Rd
   pkg/RTAQ/man/aggregateQuotes.Rd
   pkg/RTAQ/man/aggregateTrades.Rd
   pkg/RTAQ/man/as.realizedObject.Rd
   pkg/RTAQ/man/autoSelectExchangeQuotes.Rd
   pkg/RTAQ/man/autoSelectExchangeTrades.Rd
   pkg/RTAQ/man/getTradeDirection.Rd
   pkg/RTAQ/man/makePsd.Rd
   pkg/RTAQ/man/makeReturns.Rd
   pkg/RTAQ/man/matchTradesQuotes.Rd
   pkg/RTAQ/man/mergeTradesSameTimestamp.Rd
   pkg/RTAQ/man/refreshTime.Rd
   pkg/RTAQ/man/rmNegativeSpread.Rd
   pkg/RTAQ/man/salesCondition.Rd
   pkg/RTAQ/man/sample_qdata.Rd
   pkg/RTAQ/man/sample_qdataraw.Rd
   pkg/RTAQ/man/sample_tdata.Rd
   pkg/RTAQ/man/sample_tdataraw.Rd
   pkg/RTAQ/man/spotVol.rd
   pkg/RTAQ/man/tqLiquidity.Rd
   pkg/RTAQ/man/tradesCleanupFinal.Rd
   pkg/RTAQ/src/
   pkg/RTAQ/src/rtaq.c
Modified:
   pkg/RTAQ/DESCRIPTION
   pkg/RTAQ/R/aggregate.R
   pkg/RTAQ/R/cleanupfunctions.R
   pkg/RTAQ/R/convert_to_RData.R
   pkg/RTAQ/R/manipulation.R
   pkg/RTAQ/R/periodicityTAQ.R
   pkg/RTAQ/R/totalcleanup.R
   pkg/RTAQ/R/volatility.R
   pkg/RTAQ/man/ExchangeHoursOnly.Rd
   pkg/RTAQ/man/MedRV.Rd
   pkg/RTAQ/man/MinRV.Rd
   pkg/RTAQ/man/RBPCov.Rd
   pkg/RTAQ/man/RCov.Rd
   pkg/RTAQ/man/ROWCov.Rd
   pkg/RTAQ/man/RTAQ-package.Rd
   pkg/RTAQ/man/TAQload.Rd
   pkg/RTAQ/man/aggregatets.Rd
   pkg/RTAQ/man/convert.Rd
   pkg/RTAQ/man/mergequotessametimestamp.Rd
   pkg/RTAQ/man/nozeroprices.Rd
   pkg/RTAQ/man/nozeroquotes.Rd
   pkg/RTAQ/man/quotescleanup.Rd
   pkg/RTAQ/man/rmlargespread.Rd
   pkg/RTAQ/man/rmoutliers.Rd
   pkg/RTAQ/man/rmtradeoutliers.Rd
   pkg/RTAQ/man/sample_5minprices.Rd
   pkg/RTAQ/man/sample_5minprices_jumps.Rd
   pkg/RTAQ/man/selectexchange.Rd
   pkg/RTAQ/man/thresholdcov.Rd
   pkg/RTAQ/man/tradescleanup.Rd
Log:
major update: refreshtime, tqliquidity, new realized covariance estimators, ... (tscov and rtscov need to be doublechecked)

Modified: pkg/RTAQ/DESCRIPTION
===================================================================
--- pkg/RTAQ/DESCRIPTION	2010-11-21 16:32:24 UTC (rev 460)
+++ pkg/RTAQ/DESCRIPTION	2010-11-22 12:04:35 UTC (rev 461)
@@ -7,5 +7,6 @@
 Maintainer: Jonathan Cornelissen <Jonathan.cornelissen at econ.kuleuven.be>
 Description: The Trades and Quotes data of the New York Stock Exchange is a popular input for the implementation of intraday trading strategies, the measurement of liquidity and volatility and investigation of the market microstructure, among others. This package contains a collection of R functions to carefully clean and match the trades and quotes data, calculate ex post liquidity and volatility measures and detect price jumps in the data.
 Depends: xts, timeDate
+Suggests: realized, robustbase, cubature, mvtnorm
 License: GPL
 LazyLoad: yes
\ No newline at end of file

Added: pkg/RTAQ/NAMESPACE
===================================================================
--- pkg/RTAQ/NAMESPACE	                        (rev 0)
+++ pkg/RTAQ/NAMESPACE	2010-11-22 12:04:35 UTC (rev 461)
@@ -0,0 +1,73 @@
+useDynLib(RTAQ, .registration = TRUE);
+
+#export(anova.trls, anovalist.trls, correlogram, expcov, gaucov, Kaver,
+#       Kenvl, Kfn, plot.trls, ppgetregion, ppinit, pplik, ppregion,
+#       predict.trls, prmat, Psim, semat, sphercov, SSI, Strauss,
+#       surf.gls, surf.ls, trls.influence, trmat, variogram)
+
+export(
+#cleaning
+autoSelectExchangeQuotes, 
+autoSelectExchangeTrades, 
+exchangeHoursOnly,
+mergeQuotesSameTimestamp, 
+mergeTradesSameTimestamp,
+noZeroPrices,
+noZeroQuotes,
+quotesCleanup,
+rmLargeSpread,
+rmNegativeSpread,
+rmOutliers,
+rmTradeOutliers,
+salesCondition,
+selectExchange, 
+tradesCleanup, 
+tradesCleanupFinal, 
+#Topic data manipulation
+aggregatePrice,
+aggregateQuotes,
+aggregateTrades,
+aggregatets,
+as.realizedObject,
+matchTradesQuotes,
+TAQLoad,
+refreshTime,
+#Topic datasets
+#sample_5minprices,
+#sample_5minprices_jumps,
+#sample_qdata, 
+#sample_qdataraw,
+#sample_tdata,
+#sample_tdataraw,
+#Topic liquidity
+getTradeDirection, 
+tqLiquidity,
+#Topic volatility
+MedRV, 
+MinRV,
+RBPCov,
+RCov,
+ROWCov,
+thresholdCov,
+TSCov,
+RTSCov,
+makePsd,
+#misc
+convert,
+makeReturns,
+#to be adjusted :
+#liquidity :
+#es, rs, value_trade, signed_value_trade,  
+#di_diff, di_div, pes, prs, price_impact, prop_price_impact,  
+#tspread, pts, p_return_sqr, p_return_abs, qs, pqs,  
+#logqs, logsize, qslope, logqslope, mq_return_sqr, mq_return_abs,
+#rest :
+spotVol, 
+sumN,                       #TODO with match.fun!!
+previoustick                #TODO with match.fun!!
+)
+
+#importFrom(mvtnorm, dmvnorm);
+#importFrom(cubature, adaptIntegrate);
+
+#S3method(summary,trls)

Modified: pkg/RTAQ/R/aggregate.R
===================================================================
--- pkg/RTAQ/R/aggregate.R	2010-11-21 16:32:24 UTC (rev 460)
+++ pkg/RTAQ/R/aggregate.R	2010-11-22 12:04:35 UTC (rev 461)
@@ -11,19 +11,35 @@
 return(c)
 }
 
-##AGGREGATION;
-aggregatets = function (ts, FUN = previoustick, on = "minutes", k = 1, weights = NULL,dropna=F)
+period.apply2 = function (x, INDEX, FUN2, ...) 
 {
-  #Valid values for the argument "on" include: “secs” (seconds), “seconds”, “mins” (minutes), “minutes”,“hours”, “days”, “weeks”.
+    x <- try.xts(x, error = FALSE)
+    FUN <- match.fun(FUN2)
+    xx <- sapply(1:(length(INDEX) - 1), function(y) {
+        FUN(x[(INDEX[y] + 1):INDEX[y + 1]], ...)
+    })
+    reclass(xx, x[INDEX])
+}
 
+
+
+## AGGREGATION;
+aggregatets = function (ts, FUN = "previoustick", on = "minutes", k = 1, weights = NULL,dropna=FALSE) 
+{
+    makethispartbetter = ((!is.null(weights))| on=="days"|on=="weeks"| (FUN!="previoustick")|dropna);
+    if(makethispartbetter)  {
+
+    FUN = match.fun(FUN);
+    
     if (is.null(weights)) {
-        ep = endpoints(ts, on, k);
-        ts2 = period.apply(ts, ep, FUN);
+        ep = endpoints(ts, on, k)
+        if(dim(ts)[2]==1){ ts2 = period.apply(ts, ep, FUN) }
+        if(dim(ts)[2]>1){  ts2 = xts(apply(ts,2,FUN=period.apply2,FUN2=FUN,INDEX=ep),order.by=index(ts)[ep],)}
     }
     if (!is.null(weights)) {
         tsb = cbind(ts, weights)
         ep = endpoints(tsb, on, k)
-        ts2 = period.apply(tsb, ep, FUN = weightedaverage)
+        ts2 = period.apply(tsb, ep, FUN = match.fun(weightedaverage) )
     }
     if (on == "minutes" | on == "mins" | on == "secs" | on == 
         "seconds") {
@@ -34,143 +50,195 @@
             secs = k
         }
         a = .index(ts2) + (secs - .index(ts2)%%secs)
-        ts3 = .xts(ts2, a)
+        ts3 = .xts(ts2, a,tz="GMT")
     }
     if (on == "hours") {
         secs = 3600
         a = .index(ts2) + (secs - .index(ts2)%%secs)
-        ts3 = .xts(ts2, a)
+        ts3 = .xts(ts2, a,tz="GMT")
     }
     if (on == "days") {
         secs = 24 * 3600
         a = .index(ts2) + (secs - .index(ts2)%%secs) - (24 * 
             3600)
-        ts3 = .xts(ts2, a)
+        ts3 = .xts(ts2, a,tz="GMT")
     }
     if (on == "weeks") {
         secs = 24 * 3600 * 7
         a = (.index(ts2) + (secs - (.index(ts2) + (3L * 86400L))%%secs)) - 
             (24 * 3600)
-        ts3 = .xts(ts2, a)
+        ts3 = .xts(ts2, a,tz="GMT")
     }
 
-	if(!dropna){
-	if(on !="weeks"|on!="days"){
-	if(on=="secs"|on=="seconds"){tby = "s"}
-	if(on=="mins"|on=="minutes"){tby = "min"}
-      if (on == "hours"){tby = "h"}
-	by = paste(k,tby,sep=" ");
-	allindex = as.timeDate(seq(start(ts3),end(ts3),by=by));
-	xx = xts(rep("1",length(allindex)),order.by=allindex);
-	ts3 = merge(ts3,xx)[,1];
-
-	}#currently for weeks and days, na are still dropped
-	}#end dropna if
+    if (!dropna) {
+        if (on != "weeks" | on != "days") {
+            if (on == "secs" | on == "seconds") {
+                tby = "s"
+            }
+            if (on == "mins" | on == "minutes") {
+                tby = "min"
+            }
+            if (on == "hours") {
+                tby = "h"
+            }
+            by = paste(k, tby, sep = " ")
+            allindex = as.timeDate(seq(start(ts3), end(ts3), 
+                by = by))
+            xx = xts(rep("1", length(allindex)), order.by = allindex)
+            ts3 = merge(ts3, xx)[, (1:dim(ts)[2])]
+        }
+    }
+    
     index(ts3) = as.timeDate(index(ts3));
-
-    return(ts3)
+    return(ts3);
+    }
+    
+    if(!makethispartbetter){
+     if (on == "secs" | on == "seconds") { secs = k; tby = paste(k,"sec",sep=" ")}
+     if (on == "mins" | on == "minutes") { secs = 60*k; tby = paste(60*k,"sec",sep=" ")}
+     if (on == "hours") {secs = 3600*k; tby = paste(3600*k,"sec",sep=" ")}
+    
+    FUN = match.fun(FUN);
+    
+    g = seq(start(ts), end(ts), by = tby);
+    rawg = as.numeric(as.POSIXct(g,tz="GMT"));
+    newg = rawg + (secs - rawg%%secs);
+    g = as.timeDate(as.POSIXct(newg,origin="1970-01-01",tz="GMT"));
+    ts3 = na.locf(merge(ts, zoo(, g)))[as.POSIXct(g,tz="GMT")]; 
+    return(ts3) 
+    }
 }
 
 #PRICE (specificity: opening price and previoustick)
 
-agg_price = function(ts,FUN = previoustick,on="minutes",k=1){
-ts = dataformatc(ts);
-##Return new timeseries as xts object where
-##first observation is always the opening price
-##subsequent observations are the closing prices over the interval with endpoint the timestamp of the result
-##on indicates the type of period to aggregate over
-##k indicates the number of periods to aggregate over
-  ts2 = aggregatets(ts, FUN=previoustick, on, k);
+aggregatePrice = function (ts, FUN = "previoustick", on = "minutes", k = 1,marketopen="09:30:00",marketclose = "16:00:00") 
+{
+    ts = dataformatc(ts)
+    ts2 = aggregatets(ts, FUN = FUN, on, k)
+    date = strsplit(as.character(index(ts)), " ")[[1]][1]
 
-  #adjustmet correct opening price
-  date = strsplit(as.character(index(ts))," ")[[1]][1]
-  realopen = "09:30:00";
-  a = as.timeDate(paste(date,realopen));
-  b = xts(ts[1],a);
-  ts3 = c(b,ts2);
+	#open
+    a = as.timeDate(paste(date, marketopen))
+    b = as.xts(matrix(as.numeric(ts[1]),nrow=1), a)
+    ts3 = c(b, ts2)
 
-  ##adjustment for correct closing price:
-  realclose = "16:00:00";
-  aa = as.timeDate(paste(date,realclose));
-  condition = index(ts3) < aa;
-  ts3=ts3[condition];
-  bb = xts(ts[length(ts)],aa);
-  ts3 = c(ts3,bb);
+	#close
+    aa = as.timeDate(paste(date, marketclose))
+    condition = index(ts3) < aa
+    ts3 = ts3[condition]
+    bb = as.xts(matrix(as.numeric(last(ts)),nrow=1), aa)
+    ts3 = c(ts3, bb)
 
-  return(ts3);
+    return(ts3)
 }
 
 #VOLUME: (specificity: always sum)
-agg_volume = function (ts, FUN = sumN, on = "minutes", k = 5, includeopen = FALSE) 
+agg_volume= function(ts, FUN = "sumN", on = "minutes", k = 5, includeopen = FALSE,marketopen="09:30:00",marketclose="16:00:00") 
 {
     ts = dataformatc(ts)
     if (!includeopen) {
-        ts3 = aggregatets(ts, FUN = sumN, on, k)
+        ts3 = aggregatets(ts, FUN = FUN, on, k)
     }
     if (includeopen) {
-        ts2 = aggregatets(ts, FUN = sumN, on, k)
+        ts2 = aggregatets(ts, FUN = FUN, on, k)
         date = strsplit(as.character(index(ts)), " ")[[1]][1]
-        realopen = "09:30:00"
-        a = as.timeDate(paste(date, realopen))
-        b = xts(as.numeric(ts[1]), a)
+        a = as.timeDate(paste(date, marketopen))
+	  b = as.xts(matrix(as.numeric(ts[1]),nrow=1), a)
         ts3 = c(b, ts2)
-	  }
-	#new
-	    realclose = "16:00:00";
-	    aa = as.timeDate(paste(date, realclose));
-	    condition = index(ts3) < aa;
-	    ts4 = ts3[condition];
-	    lastinterval = sum(as.numeric(ts3[!condition]));
-	    bb = xts(lastinterval, aa)
-	    ts4 = c(ts4, bb)
+    }
+
+    aa = as.timeDate(paste(date, marketclose))
+    condition = index(ts3) < aa
+    ts4 = ts3[condition]
+
+    lastinterval = matrix(colSums(matrix(ts3[!condition],ncol=dim(ts3)[2])),ncol=dim(ts3)[2])
+    bb = xts(lastinterval, aa)
+    ts4 = c(ts4, bb)
+
     return(ts4)
 }
 
-###TRADES AGGREGATION:
-agg_trades = function(tdata,on="minutes",k=5){
-tdata = dataformatc(tdata);
+aggregateTrades =  function (tdata, on = "minutes", k = 5,marketopen="09:30:00",marketclose="16:00:00") 
+{
+	tdata = dataformatc(tdata)
+	tdatacheck(tdata)
   ## Aggregates an entire trades xts object (tdata) over a "k"-minute interval.
   ## Returned xts-object contains: SYMBOL,EX,PRICE,SIZE.
   ## Variables COND, CORR, G127 are dropped because aggregating them makes no sense.
   ## NOTE: first observation (opening price) always included.
 
-  PRICE = agg_price(tdata$PRICE,on=on,k=k);
-  SIZE = agg_volume(tdata$SIZE,on=on,k=k,includeopen=TRUE);
-  EX = c(tdata$EX[1],aggregatets(tdata$EX, FUN=previoustick, on=on, k=k));
-  SYMBOL = rep(tdata$SYMBOL[1],length(PRICE));
-  all = data.frame(SYMBOL,EX,PRICE,SIZE);
-  colnames(all) =c("SYMBOL","EX","PRICE","SIZE");
-  ts = xts(all,index(SIZE));
+	selection = colnames(tdata)%in%c("PRICE","EX","SYMBOL");
+	tdata1 = tdata[,selection];
+	PRICE = aggregatePrice(tdata$PRICE,on=on,k=k,marketopen=marketopen,marketclose=marketclose);
+	SIZE = agg_volume(tdata$SIZE, on = on, k = k, includeopen = TRUE,marketopen=marketopen,marketclose=marketclose)
 
-  return(ts);
+	EX = rep(tdata$EX[1], length(PRICE));
+	SYMBOL = rep(tdata$SYMBOL[1], length(PRICE));
+
+	all = data.frame(SYMBOL, EX, PRICE, SIZE);
+	colnames(all) = c("SYMBOL", "EX", "PRICE", "SIZE");
+	ts = xts(all, index(SIZE));
+	return(ts);
 }
 
+###QUOTES AGGREGATION:
+aggregateQuotes = function(qdata,on="minutes",k=5,marketopen="09:30:00",marketclose="16:00:00"){
+  qdata = dataformatc(qdata);
+  qdatacheck(qdata);
 
-###QUOTES AGGREGATION:
-agg_quotes = function(qdata,on="minutes",k=5){
-qdata = dataformatc(qdata);
   ## Aggregates an entire quotes xts object (qdata) object over a "k"-minute interval.
   ## Returned xts-object contains: SYMBOL,EX,BID,BIDSIZ,OFR,OFRSIZ.
   ## Variable MODE is dropped because aggregation makes no sense.
   ## "includeopen" determines whether to include the exact opening quotes.
-  
-  BID = agg_price(qdata$BID,on=on,k=k);
-  OFR = agg_price(qdata$OFR,on=on,k=k);
 
-  BIDSIZ = agg_volume(qdata$BIDSIZ,on=on,k=k,includeopen=TRUE);
-  OFRSIZ = agg_volume(qdata$OFRSIZ,on=on,k=k,includeopen=TRUE);
+  BIDOFR = aggregatePrice(cbind(qdata$BID,qdata$OFR),on=on,k=k,marketopen=marketopen,marketclose=marketclose);
+  BIDOFRSIZ = agg_volume(cbind(qdata$BIDSIZ,qdata$OFRSIZ),on=on,k=k,includeopen=TRUE,marketopen=marketopen,marketclose=marketclose);
 
-  EX = agg_price(qdata$EX,on=on,k=k)
-  SYMBOL = rep(qdata$SYMBOL[1],length(BIDSIZ));
+  EX = rep(qdata$EX[1],dim(BIDOFR)[1]);
+  SYMBOL = rep(qdata$SYMBOL[1],dim(BIDOFR)[1]);
 
-  all = data.frame(SYMBOL,EX,BID,BIDSIZ,OFR,OFRSIZ);
+  all = data.frame(SYMBOL,EX,BIDOFR[,1],BIDOFRSIZ[,1],BIDOFR[,2],BIDOFRSIZ[,2]);
   colnames(all) =c("SYMBOL","EX","BID","BIDSIZ","OFR","OFRSIZ");
 
-  ts = xts(all,index(BIDSIZ));
-
+  ts = xts(all,index(BIDOFR));
   return(ts);
 }
 
 ##LIQUIDITY AGGREGATION:
 ##Just combine aggregation functions and spot liquidity functions!
 
+
+###### refresh time ########
+refreshTime = function(pdata){
+dim = length(pdata);
+lengths = rep(0,dim+1);
+  for(i in 1:dim){
+    lengths[i+1] = length(pdata[[i]]);
+  }
+  minl = min(lengths[(2:(dim+1))]); #number of obs for stock with least observations
+  lengths = cumsum(lengths);        
+  alltimes = rep(0,lengths[dim+1]); #all timestamps in 1 vector
+  for(i in 1:dim){
+    alltimes[(lengths[i]+1):lengths[i+1]] = as.numeric(as.POSIXct(index(pdata[[i]]),tz="GMT"));
+  }
+  
+  #get refresh points via C:
+  x = .C("refreshpoints",as.integer(alltimes),as.integer(lengths),
+  as.integer(rep(0,minl)),as.integer(dim),
+  as.integer(0),as.integer(rep(0,minl*dim)),as.integer(minl));
+
+  #get matrix with "refresh prices"
+  newlength=x[[5]];
+  pmatrix = matrix(ncol=dim,nrow=newlength);
+  
+  for(i in 1:dim){
+  selection = x[[6]][((i-1)*minl+1):(i*minl)];
+  pmatrix[,i] = pdata[[i]][ selection[1:newlength] ];
+  }
+
+  time = as.timeDate(as.POSIXct(x[[3]][1:newlength],origin="1970-01-01",tz="GMT"));
+
+  resmatrix = xts(pmatrix,order.by=time)
+  return(resmatrix);
+}
+

Modified: pkg/RTAQ/R/cleanupfunctions.R
===================================================================
--- pkg/RTAQ/R/cleanupfunctions.R	2010-11-21 16:32:24 UTC (rev 460)
+++ pkg/RTAQ/R/cleanupfunctions.R	2010-11-22 12:04:35 UTC (rev 461)
@@ -10,7 +10,7 @@
 ########## DATA CLEAN-UP: FOR ALL DATA #####################
 
 ####FUNCTION TO FILTER EXCHANGE HOURS ONLY: ExchangeHoursOnly
-ExchangeHoursOnly = function(data, daybegin = "09:30:00",dayend="16:00:00")
+exchangeHoursOnly = function(data, daybegin = "09:30:00",dayend="16:00:00")
 {
 data = dataformatc(data);
     # a function to excerpt data within exchange trading hours
@@ -37,23 +37,25 @@
 }
 
 
-nozeroprices = function(tdata){
+noZeroPrices = function(tdata){
 tdata = dataformatc(tdata);
+tdatacheck(tdata);
 ####FUNCTION TO DELETE ZERO PRICES: nozeroprices
 filteredts = tdata[as.numeric(tdata$PRICE)!= 0];
 return(filteredts);
 }
 
 
-selectexchange = function(data,exch="N"){ 
+selectExchange = function(data,exch="N"){ 
 data = dataformatc(data);
 ###FUNCTION TO SELECT THE OBSERVATIONS OF A SINGLE EXCHANGE: selectexchange
 filteredts = data[data$EX==exch];
 return(filteredts);
 }
 
-autoselectexchange = function(tdata){
+autoSelectExchangeTrades = function(tdata){
 tdata = dataformatc(tdata);
+tdatacheck(tdata);
 ## AUTOSELECT EXCHANGE WITH HIGHEST NUMBER OF SHARES TRADED (for trades) ON:
 #function returns ts with obs of only 1 exchange
 #searches exchange with a maximum on the variable "SIZE"
@@ -94,8 +96,9 @@
 #}
 
 #zivot
-salescond <- function (tdata)
+salesCondition <- function (tdata)
 {
+tdatacheck(tdata);
     filteredts = tdata[tdata$COND == "0" | tdata$COND == "E" |
         tdata$COND == "F" | tdata$COND == "" | tdata$COND == "@F"]
     return(filteredts)
@@ -129,8 +132,9 @@
   return(b);
 }
 
-mergesametimestamp = function(tdata,selection="median"){
+mergeTradesSameTimestamp = function(tdata,selection="median"){
 tdata = dataformatc(tdata);
+tdatacheck(tdata);
   #find end points:
   ep = endpoints(tdata,"secs");
 
@@ -151,9 +155,11 @@
 return(tdata2)
 }
 
-rmtradeoutliers = function(tdata,qdata){
+rmTradeOutliers = function(tdata,qdata){
 tdata = dataformatc(tdata);
 qdata = dataformatc(qdata);
+qdatacheck(qdata);
+tdatacheck(tdata);
 
 ##Function to delete entries with prices that are above the ask plus the bid-ask
 ##spread. Similar for entries with prices below the bid minus the bid-ask
@@ -167,23 +173,25 @@
   upper = offer+spread;
   lower = bid-spread;
 
-  tdata[(price<upper) & (price>lower)];
+  tdata = tdata[(price<upper) & (price>lower)];
   return(tdata);
 }
 
 
 #################       QUOTE SPECIFIC FUNCTIONS:       #################
 
-nozeroquotes = function(qdata){
+noZeroQuotes = function(qdata){
 qdata = dataformatc(qdata);
+qdatacheck(qdata);
 ####FUNCTION TO DELETE ZERO QUOTES: nozeroquotes
 filteredts = qdata[as.numeric(qdata$BID)!= 0& as.numeric(qdata$OFR)!= 0];
 return(filteredts);
 }
 
 
-autoselectexchangeq = function(qdata){
+autoSelectExchangeQuotes = function(qdata){
 qdata = dataformatc(qdata);
+qdatacheck(qdata);
 ####Autoselect exchange with highest value for (bidsize+offersize)
   nobs=c();
   exchanges = c("Q","A","P","B","C","N","D","X","I","M","W","Z");
@@ -215,8 +223,9 @@
 }
 
 
-mergequotessametimestamp = function(qdata,selection="median"){  ##FAST
+mergeQuotesSameTimestamp = function(qdata,selection="median"){  ##FAST
 qdata = dataformatc(qdata);
+qdatacheck(qdata);
   condition=selection=="median"|selection=="maxvolume"|selection=="weightedaverage";
   if(!condition){print(paste("WARNING:The result will be corrupted. Check whether",selection,"is an existing option for the attribute selection."))}
 
@@ -257,15 +266,17 @@
 }
 
 
-rmnegspread = function(qdata){
+rmNegativeSpread = function(qdata){
 qdata = dataformatc(qdata);
+qdatacheck(qdata);
 ##function to remove observations with negative spread
   condition = as.numeric(qdata$OFR)>as.numeric(qdata$BID);
   qdata[condition];
 }
 
 
-rmlargespread = function(qdata,maxi=50){
+rmLargeSpread = function(qdata,maxi=50){
+qdatacheck(qdata);
 ##function to remove observations with a spread larger than 50 times the median spread that day
 ###WATCH OUT: works only correct if supplied input data consists of 1 day...
   spread = as.numeric(qdata$OFR)-as.numeric(qdata$BID);
@@ -273,9 +284,10 @@
   return(qdata[condition])
 }
 
-rmoutliers = function (qdata, maxi = 10, window = 50, type = "advanced")
+rmOutliers = function (qdata, maxi = 10, window = 50, type = "advanced")
 {
 qdata = dataformatc(qdata);
+qdatacheck(qdata);
 ##function to remove entries for which the mid-quote deviated by more than 10 median absolute deviations 
 ##from a rolling centered median (excluding the observation under consideration) of 50 observations if type = "standard".
 
@@ -288,7 +300,6 @@
 ##3. Rolling median of the previous "window" observations
 
 ##NOTE: Median Absolute deviation chosen contrary to Barndorff-Nielsen et al.
-    print("NOTE: This function is only useful for quotes NOT for trades")
     window = floor(window/2) * 2
     condition = c();
     halfwindow = window/2;
@@ -357,6 +368,7 @@
 ###zivot
 correctedTrades <- function (tdata)
 {
+tdatacheck(tdata);
     filteredts = tdata[tdata$CR == " 0"]
     return(filteredts)
 }

Modified: pkg/RTAQ/R/convert_to_RData.R
===================================================================
--- pkg/RTAQ/R/convert_to_RData.R	2010-11-21 16:32:24 UTC (rev 460)
+++ pkg/RTAQ/R/convert_to_RData.R	2010-11-22 12:04:35 UTC (rev 461)
@@ -1,5 +1,5 @@
-##########HELPFUNCTION####
-readdata = function(path=NULL, extention="txt",header=F,dims=0){
+########## HELPFUNCTION ####
+readdata = function(path=NULL, extention="txt",header=FALSE,dims=0){
 #extention should either be "txt" or "csv"
 if(!(extention=="txt"|extention=="csv")){print("Please select a supported extention")}
 colnames = rep("x",dims);
@@ -29,7 +29,7 @@
 }
 
 ############################
-convert = function(from,to,datasource,datadestination,trades=TRUE,quotes=TRUE,ticker,dir=F,extention="txt",header=F,tradecolnames=NULL,quotecolnames=NULL,format="%m/%d/%Y %H:%M:%S"){
+convert = function(from,to,datasource,datadestination,trades=TRUE,quotes=TRUE,ticker,dir=FALSE,extention="txt",header=FALSE,tradecolnames=NULL,quotecolnames=NULL,format="%m/%d/%Y %H:%M:%S"){
   dates = timeSequence(from,to, format = "%Y-%m-%d", FinCenter = "GMT")
   dates = dates[isBizday(dates, holidays = holidayNYSE(2004:2010))];
   missingt=missingq=matrix(ncol=2,nrow=0);
@@ -51,8 +51,10 @@
 }
 
 convert_trades = function (datasource, datadestination, ticker, extention = "txt", 
-    header = F, tradecolnames = NULL, format = "%m/%d/%Y %H:%M:%S") 
-{
+    header = FALSE, tradecolnames = NULL, format = "%m/%d/%Y %H:%M:%S") 
+{  
+    missingt=matrix(ncol=2,nrow=0);
+
     setwd(datasource)
     adjtime = function(z) {
         zz = unlist(strsplit(z, ":"))
@@ -107,8 +109,10 @@
 
 
 convert_quotes = function (datasource, datadestination, ticker, extention = "txt", 
-    header = F, quotecolnames = NULL, format = "%m/%d/%Y %H:%M:%S") 
+    header = FALSE, quotecolnames = NULL, format = "%m/%d/%Y %H:%M:%S") 
 {
+    missingq=matrix(ncol=2,nrow=0);
+    
     setwd(datasource)
     adjtime = function(z) {
         zz = unlist(strsplit(z, ":"))

Modified: pkg/RTAQ/R/manipulation.R
===================================================================
--- pkg/RTAQ/R/manipulation.R	2010-11-21 16:32:24 UTC (rev 460)
+++ pkg/RTAQ/R/manipulation.R	2010-11-22 12:04:35 UTC (rev 461)
@@ -1,5 +1,5 @@
 #MANIPULATION FUNCTIONS:
-TAQload = function(tickers,from,to,trades=TRUE,quotes=FALSE,datasource=NULL,variables=NULL){ 
+TAQLoad = function(tickers,from,to,trades=TRUE,quotes=FALSE,datasource=NULL,variables=NULL){ 
   if( is.null(datasource)){print("Please provide the argument 'datasource' to indicate in which folder your data is stored")}
 
   if(!(trades&quotes)){#not both trades and quotes demanded
@@ -25,13 +25,15 @@
   dates = timeSequence(as.character(from),as.character(to), format = "%Y-%m-%d", FinCenter = "GMT")
   dates = dates[isBizday(dates, holidays = holidayNYSE(2004:2010))];
 
-  if(trades){
+  if(trades){ tdata=NULL;
   for(i in 1:length(dates)){
   datasourcex = paste(datasource,"\\",dates[i],sep="");
   filename = paste(datasourcex,"\\",ticker,"_trades.RData",sep="");
 
   ifmissingname = paste(datasourcex,"\\missing_",ticker,".RData",sep="");  
-  if(file.exists(ifmissingname)){stop(paste("no trades available on ",dates[i],sep=""))}
+
+  if(file.exists(ifmissingname)){stop(paste("No trades available on ",dates[i],sep=""))}
+  if(!file.exists(filename)){stop(paste("The file ",filename," does not exist. Please read the documentation.",sep=""))}
   if(file.exists(ifmissingname)==FALSE){
   load(filename);
   if(i==1)	{
@@ -53,13 +55,14 @@
 				}
 				}
 
-  if(quotes){
+  if(quotes){ qdata=NULL;
   for(i in 1:length(dates)){
   datasourcex = paste(datasource,"\\",dates[i],sep="");
   filename = paste(datasourcex,"\\",ticker,"_quotes.RData",sep="");
   ifmissingname = paste(datasourcex,"\\missingquotes_",ticker,".RData",sep="");
   
   if(file.exists(ifmissingname)){stop(paste("no quotes available on ",dates[i],sep=""))}
+  if(!file.exists(filename)){stop(paste("The file ",filename," does not exist. Please read the documentation.",sep=""))}
   if(file.exists(ifmissingname)==FALSE){
   load(filename);
 
@@ -88,9 +91,12 @@
   }
 
 
-matchtq = function(tdata,qdata,adjustment=2){ ##FAST VERSION
+matchTradesQuotes = function(tdata,qdata,adjustment=2){ ##FAST VERSION
 tdata = dataformatc(tdata);
 qdata = dataformatc(qdata);
+qdatacheck(qdata);
+tdatacheck(tdata);
+
   tt = dim(tdata)[2];  
   index(qdata) = index(qdata) + adjustment;
 
@@ -119,43 +125,46 @@
   return(merged)
 }
 
-matchtq_old = function(tdata,qdata,adjustment=2){ ##FAST VERSION
-qdata = dataformatc(qdata);
-tdata = dataformatc(tdata);
+#matchtq_old = function(tdata,qdata,adjustment=2){ ##FAST VERSION
+#qdata = dataformatc(qdata);
+#tdata = dataformatc(tdata);
+#
+#  tt = dim(tdata)[2];  
+#  index(qdata) = index(qdata) + adjustment;
+#  
+#  #merge:
+#  counter = xts(as.character(1:dim(qdata)[1]),order.by=index(qdata))#an integer for every quote
+#  merged = cbind(qdata,counter);
+#  merged = merge(tdata,merged);
+#  
+#  ##fill NA's:
+#  merged[,((tt+1):dim(merged)[2])] = na.locf(as.zoo(merged[,((tt+1):dim(merged)[2])]), na.rm=FALSE);
+#  
+#  #Select trades:
+#  merged = merged[index(tdata)];
+#  
+#  #Remove duplicated quotes:
+#  merged = merged[!duplicated(merged[,dim(merged)[2]])];
+#
+#  #return usefull parts:
+#  merged = merged[,c((1:tt),((tt+3):(dim(merged)[2]-1)))];
+#
+#  return(merged)
+#}
 
-  tt = dim(tdata)[2];  
-  index(qdata) = index(qdata) + adjustment;
-  
-  #merge:
-  counter = xts(as.character(1:dim(qdata)[1]),order.by=index(qdata))#an integer for every quote
-  merged = cbind(qdata,counter);
-  merged = merge(tdata,merged);
-  
-  ##fill NA's:
-  merged[,((tt+1):dim(merged)[2])] = na.locf(as.zoo(merged[,((tt+1):dim(merged)[2])]), na.rm=FALSE);
-  
-  #Select trades:
-  merged = merged[index(tdata)];
-  
-  #Remove duplicated quotes:
-  merged = merged[!duplicated(merged[,dim(merged)[2]])];
 
-  #return usefull parts:
-  merged = merged[,c((1:tt),((tt+3):(dim(merged)[2]-1)))];
 
-  return(merged)
-}
+getTradeDirection = function(tqdata,...){
+  if(hasArg(data)){ tqdata = data; rm(data) }
+  tqdata = dataformatc(tqdata);
+  tqdatacheck(tqdata); 
 
-
-
-gettradedir = function(data){
-data = dataformatc(data);
 ##Function returns a vector with the inferred trade direction:
 ##NOTE: the value of the first (and second) observation should be ignored if price=midpoint for the first (second) observation.
-  bid = as.numeric(data$BID);
-  offer = as.numeric(data$OFR);
+  bid = as.numeric(tqdata$BID);
+  offer = as.numeric(tqdata$OFR);
   midpoints = (bid + offer)/2;
-  price = as.numeric(data$PRICE);
+  price = as.numeric(tqdata$PRICE);
  
   buy1 = price > midpoints; #definitely a buy
   equal = price == midpoints;
@@ -171,7 +180,6 @@
   return(buy);
 }
 
-
 es = function(data){
 data = dataformatc(data);
 #returns the effective spread as xts object
@@ -185,7 +193,6 @@
   return(es);
 }
 
-
 rs = function(data,tdata,qdata){
 data = dataformatc(data);
 qdata = dataformatc(qdata);
@@ -306,10 +313,10 @@
   return(prs_xts);
 }
 
-price_impact = function(data){
+price_impact = function(data,tdata,qdata){
 data = dataformatc(data);
 #returns the Price impact as xts object
-  rs = rs(data);
+  rs = rs(data,tdata,qdata);
   es = es(data);
 
   pi = (es-rs)/2;
@@ -317,10 +324,10 @@
   return(pi_xts);
 }
 
-prop_price_impact = function(data){
+prop_price_impact = function(data,tdata,qdata){
 data = dataformatc(data);
 #returns the Proportional Price impact as xts object
-  rs = rs(data);
+  rs = rs(data,tdata,qdata);
   es = es(data);
   bid = as.numeric(data$BID);
   offer = as.numeric(data$OFR);
@@ -355,7 +362,7 @@
   pts = (d*(price-midpoints))/midpoints;
 
   pts_xts = xts(pts,order.by=index(data));
-  return(ts);
+  return(pts_xts);
 }
 
 p_return_sqr = function(data){
@@ -439,8 +446,6 @@
   return(logqslope_xts);
 }
 
-
-
 mq_return_sqr = function(data){
 data = dataformatc(data);
 #returns midquote squared returns slope as xts object
@@ -463,76 +468,39 @@
   return(mq_return_abs_xts);
 }
 
-liquidity <- function (data, tdata, qdata)
-{
-data = dataformatc(data);
-qdata = dataformatc(qdata);
-tdata = dataformatc(tdata);
-##Function computes many liquidity measures and returns an xts object containing them
+tqLiquidity <- function(tqdata=NULL,tdata=NULL,qdata=NULL,type,...) {
+  if(hasArg(data)){ tqdata = data }
+  if(!is.null(tqdata)){tqdatacheck(tqdata)}
+  if(!is.null(qdata)){qdatacheck(qdata)}
+  if(!is.null(tdata)){tdatacheck(tdata)}
+  
+  result=switch(type,
+  es = es(tqdata),
+  rs = rs(tqdata,tdata,qdata),
+  value_trade = value_trade(tqdata),
+  signed_value_trade = signed_value_trade(tqdata),
+  di_diff = di_diff(tqdata),
+  pes = pes(tqdata),
+  prs = prs(tqdata,tdata,qdata),
+  price_impact = price_impact(tqdata,tdata,qdata),
+  prop_price_impact = prop_price_impact(tqdata,tdata,qdata),
+  tspread =tspread(tqdata),
+  pts = pts(tqdata),
+  p_return_sqr = p_return_sqr(tqdata),
+  p_return_abs = p_return_abs(tqdata),
+  qs = qs(tqdata),
+  pqs = pqs(tqdata),
+  logqs = logqs(tqdata),
+  logsize = logsize(tqdata),
+  qslope = qslope(tqdata),
+  logqslope = logqslope(tqdata),
+  mq_return_sqr = mq_return_sqr(tqdata),
+  mq_return_abs = mq_return_abs(tqdata),
+  signed_trade_size = signed_trade_size(tqdata)
+  )
 
-##First part solves the problem that unequal number of obs (in data and data2) is possible when computing the RS
-
-    data2 = matchtq(tdata, qdata, adjustment = 300)
-    if (dim(data2)[1] > dim(data)[1]) {
-        condition = as.vector(as.character(index(data2))) %in%
-            as.vector(as.character(index(data)))
-        data2 = subset(data2, condition, select = 1:(dim(data)[2]))
-        data = subset(data, as.vector(as.character(index(data))) %in%
-            as.vector(as.character(index(data2))), select = 1:(dim(data2)[2]))
-    }
-    if (dim(data2)[1] < dim(data)[1]) {
-        condition = as.vector(as.character(index(data))) %in%
-            as.vector(as.character(index(data2)))
-        data = subset(data, condition, select = 1:(dim(data2)[2]))
-        data2 = subset(data2, as.vector(as.character(index(data2))) %in%
-            as.vector(as.character(index(data))), select = 1:(dim(data)[2]))
-    }
-    bid = as.numeric(data$BID)
-    offer = as.numeric(data$OFR)
-    midpoints = (bid + offer)/2
-    price = as.numeric(data$PRICE)
-    size = as.numeric(data$SIZE)
-    d = gettradedir(data)
-    bidsize = as.numeric(data$BIDSIZ)
-    offersize = as.numeric(data$OFRSIZ)
-    return = c(0, log(price[2:length(price)]) - log(price[1:length(price) -
-        1]))
-    mq_return = mq_return(data)
-    midpoints2 = (as.numeric(data2$BID) + as.numeric(data2$OFR))/2
-    es = 2 * d * (price - midpoints)
-    rs = 2 * d * (price - midpoints2)
-    value_trade = price * size
-    signed_value_trade = d * price * size
-    signed_trade_size = d * size
-    di_diff = (d * (offersize - bidsize))/(offersize + bidsize)
-    di_div = (offersize/bidsize)^d
-    pes = 100 * es/midpoints
-    prs = 100 * rs/midpoints
-    price_impact = (es - rs)/2
-    prop_price_impact = (100 * price_impact)/midpoints
-    tspread = d * (price - midpoints)
-    pts = tspread/midpoints
-    p_return_sqr = return^2
-    p_return_abs = abs(return)
-    qs = offer - bid
-    pqs = 100 * qs/midpoints
-    logqs = log(offer/bid)
-    logsize = log(bidsize) + log(offersize)
-    qslope = qs/logsize
-    logqslope = logqs/logsize
-    mq_return_sqr = mq_return^2
-    mq_return_abs = abs(mq_return)
-    liquid = cbind(es, rs, value_trade, signed_value_trade, di_diff,
-        di_div, pes, prs, price_impact, prop_price_impact, tspread,
-        pts, p_return_sqr, p_return_abs, qs, pqs, logqs, logsize,
-        qslope, logqslope, mq_return_sqr, mq_return_abs)
-    names = c("es", "rs", "value_trade", "signed_value_trade",
-        "di_diff", "di_div", "pes", "prs", "price_impact", "prop_price_impact",
-        "tspread", "pts", "p_return_sqr", "p_return_abs", "qs", "pqs",
-        "logqs", "logsize", "qslope", "logqslope", "mq_return_sqr",
-        "mq_return_abs")
-    colnames(liquid) = names
-    return(liquid)
+  colnames(result)=type;
+  return(result);
 }
 
 ##help_function:
@@ -550,15 +518,24 @@
 
 
 ###Zivot:
-p_return <- function (data)
-{
-    price = as.numeric(data$PRICE)
-    log.return = c(0, log(price[2:length(price)]) - log(price[1:length(price) -
-        1]))
-    return_xts = xts(log.return, order.by = index(data))
-    return(return_xts)
+makeReturns = function(ts){
+  l = dim(ts)[1];
+  x = matrix(as.numeric(ts),nrow=l);
+  x[(2:l),] = log(x[(2:l),]) - log(x[(1:(l-1)),])
+  x[1,] = rep(0,dim(ts)[2]);
+  x = xts(x,order.by=index(ts));
+  return(x);
 }
 
+#p_return <- function (data)
+#{
+#    price = as.numeric(data$PRICE)
+#    log.return = c(0, log(price[2:length(price)]) - log(price[1:length(price) -
+#        1]))
+#    return_xts = xts(log.return, order.by = index(data))
+#    return(return_xts)
+#}
+
 p_return_abs <- function (data)
 {
     price = as.numeric(data$PRICE)

Added: pkg/RTAQ/R/newfunctions_names.R
===================================================================
--- pkg/RTAQ/R/newfunctions_names.R	                        (rev 0)
+++ pkg/RTAQ/R/newfunctions_names.R	2010-11-22 12:04:35 UTC (rev 461)
@@ -0,0 +1,23 @@
+agg_price = function(...){aggregatePrice(...)};                           
+agg_quotes = function(...){aggregateQuotes(...)};                         
+agg_trades = function(...){aggregateTrades(...)};                         
+autoselectexchange = function(...){autoSelectExchangeTrades(...)};        
+autoselectexchangeq = function(...){autoSelectExchangeQuotes(...)};       
+ExchangeHoursOnly = function(...){exchangeHoursOnly(...)};                
+gettradedir = function(...){getTradeDirection(...)};                      
+matchtq = function(...){matchTradesQuotes(...)};                          
+mergequotessametimestamp = function(...){mergeQuotesSameTimestamp(...)};  
+mergesametimestamp = function(...){mergeTradesSameTimestamp(...)};        
+nozeroprices = function(...){noZeroPrices(...)};                          
+nozeroquotes = function(...){noZeroQuotes(...)};                          
+quotescleanup = function(...){quotesCleanup(...)};                        
+rmlargespread = function(...){rmLargeSpread(...)};                        
[TRUNCATED]

To get the complete diff run:
    svnlook diff /svnroot/blotter -r 461


More information about the Blotter-commits mailing list