[Blotter-commits] r461 - in pkg/RTAQ: . R man src
noreply at r-forge.r-project.org
noreply at r-forge.r-project.org
Mon Nov 22 13:04:36 CET 2010
Author: jonathan
Date: 2010-11-22 13:04:35 +0100 (Mon, 22 Nov 2010)
New Revision: 461
Added:
pkg/RTAQ/NAMESPACE
pkg/RTAQ/R/newfunctions_names.R
pkg/RTAQ/R/realized.R
pkg/RTAQ/R/zzz.R
pkg/RTAQ/man/RTSCov.rd
pkg/RTAQ/man/TSCov.rd
pkg/RTAQ/man/aggregatePrice.Rd
pkg/RTAQ/man/aggregateQuotes.Rd
pkg/RTAQ/man/aggregateTrades.Rd
pkg/RTAQ/man/as.realizedObject.Rd
pkg/RTAQ/man/autoSelectExchangeQuotes.Rd
pkg/RTAQ/man/autoSelectExchangeTrades.Rd
pkg/RTAQ/man/getTradeDirection.Rd
pkg/RTAQ/man/makePsd.Rd
pkg/RTAQ/man/makeReturns.Rd
pkg/RTAQ/man/matchTradesQuotes.Rd
pkg/RTAQ/man/mergeTradesSameTimestamp.Rd
pkg/RTAQ/man/refreshTime.Rd
pkg/RTAQ/man/rmNegativeSpread.Rd
pkg/RTAQ/man/salesCondition.Rd
pkg/RTAQ/man/sample_qdata.Rd
pkg/RTAQ/man/sample_qdataraw.Rd
pkg/RTAQ/man/sample_tdata.Rd
pkg/RTAQ/man/sample_tdataraw.Rd
pkg/RTAQ/man/spotVol.rd
pkg/RTAQ/man/tqLiquidity.Rd
pkg/RTAQ/man/tradesCleanupFinal.Rd
pkg/RTAQ/src/
pkg/RTAQ/src/rtaq.c
Modified:
pkg/RTAQ/DESCRIPTION
pkg/RTAQ/R/aggregate.R
pkg/RTAQ/R/cleanupfunctions.R
pkg/RTAQ/R/convert_to_RData.R
pkg/RTAQ/R/manipulation.R
pkg/RTAQ/R/periodicityTAQ.R
pkg/RTAQ/R/totalcleanup.R
pkg/RTAQ/R/volatility.R
pkg/RTAQ/man/ExchangeHoursOnly.Rd
pkg/RTAQ/man/MedRV.Rd
pkg/RTAQ/man/MinRV.Rd
pkg/RTAQ/man/RBPCov.Rd
pkg/RTAQ/man/RCov.Rd
pkg/RTAQ/man/ROWCov.Rd
pkg/RTAQ/man/RTAQ-package.Rd
pkg/RTAQ/man/TAQload.Rd
pkg/RTAQ/man/aggregatets.Rd
pkg/RTAQ/man/convert.Rd
pkg/RTAQ/man/mergequotessametimestamp.Rd
pkg/RTAQ/man/nozeroprices.Rd
pkg/RTAQ/man/nozeroquotes.Rd
pkg/RTAQ/man/quotescleanup.Rd
pkg/RTAQ/man/rmlargespread.Rd
pkg/RTAQ/man/rmoutliers.Rd
pkg/RTAQ/man/rmtradeoutliers.Rd
pkg/RTAQ/man/sample_5minprices.Rd
pkg/RTAQ/man/sample_5minprices_jumps.Rd
pkg/RTAQ/man/selectexchange.Rd
pkg/RTAQ/man/thresholdcov.Rd
pkg/RTAQ/man/tradescleanup.Rd
Log:
major update: refreshtime, tqliquidity, new realized covariance estimators, ... (tscov and rtscov need to be doublechecked)
Modified: pkg/RTAQ/DESCRIPTION
===================================================================
--- pkg/RTAQ/DESCRIPTION 2010-11-21 16:32:24 UTC (rev 460)
+++ pkg/RTAQ/DESCRIPTION 2010-11-22 12:04:35 UTC (rev 461)
@@ -7,5 +7,6 @@
Maintainer: Jonathan Cornelissen <Jonathan.cornelissen at econ.kuleuven.be>
Description: The Trades and Quotes data of the New York Stock Exchange is a popular input for the implementation of intraday trading strategies, the measurement of liquidity and volatility and investigation of the market microstructure, among others. This package contains a collection of R functions to carefully clean and match the trades and quotes data, calculate ex post liquidity and volatility measures and detect price jumps in the data.
Depends: xts, timeDate
+Suggests: realized, robustbase, cubature, mvtnorm
License: GPL
LazyLoad: yes
\ No newline at end of file
Added: pkg/RTAQ/NAMESPACE
===================================================================
--- pkg/RTAQ/NAMESPACE (rev 0)
+++ pkg/RTAQ/NAMESPACE 2010-11-22 12:04:35 UTC (rev 461)
@@ -0,0 +1,73 @@
+useDynLib(RTAQ, .registration = TRUE);
+
+#export(anova.trls, anovalist.trls, correlogram, expcov, gaucov, Kaver,
+# Kenvl, Kfn, plot.trls, ppgetregion, ppinit, pplik, ppregion,
+# predict.trls, prmat, Psim, semat, sphercov, SSI, Strauss,
+# surf.gls, surf.ls, trls.influence, trmat, variogram)
+
+export(
+#cleaning
+autoSelectExchangeQuotes,
+autoSelectExchangeTrades,
+exchangeHoursOnly,
+mergeQuotesSameTimestamp,
+mergeTradesSameTimestamp,
+noZeroPrices,
+noZeroQuotes,
+quotesCleanup,
+rmLargeSpread,
+rmNegativeSpread,
+rmOutliers,
+rmTradeOutliers,
+salesCondition,
+selectExchange,
+tradesCleanup,
+tradesCleanupFinal,
+#Topic data manipulation
+aggregatePrice,
+aggregateQuotes,
+aggregateTrades,
+aggregatets,
+as.realizedObject,
+matchTradesQuotes,
+TAQLoad,
+refreshTime,
+#Topic datasets
+#sample_5minprices,
+#sample_5minprices_jumps,
+#sample_qdata,
+#sample_qdataraw,
+#sample_tdata,
+#sample_tdataraw,
+#Topic liquidity
+getTradeDirection,
+tqLiquidity,
+#Topic volatility
+MedRV,
+MinRV,
+RBPCov,
+RCov,
+ROWCov,
+thresholdCov,
+TSCov,
+RTSCov,
+makePsd,
+#misc
+convert,
+makeReturns,
+#to be adjusted :
+#liquidity :
+#es, rs, value_trade, signed_value_trade,
+#di_diff, di_div, pes, prs, price_impact, prop_price_impact,
+#tspread, pts, p_return_sqr, p_return_abs, qs, pqs,
+#logqs, logsize, qslope, logqslope, mq_return_sqr, mq_return_abs,
+#rest :
+spotVol,
+sumN, #TODO with match.fun!!
+previoustick #TODO with match.fun!!
+)
+
+#importFrom(mvtnorm, dmvnorm);
+#importFrom(cubature, adaptIntegrate);
+
+#S3method(summary,trls)
Modified: pkg/RTAQ/R/aggregate.R
===================================================================
--- pkg/RTAQ/R/aggregate.R 2010-11-21 16:32:24 UTC (rev 460)
+++ pkg/RTAQ/R/aggregate.R 2010-11-22 12:04:35 UTC (rev 461)
@@ -11,19 +11,35 @@
return(c)
}
-##AGGREGATION;
-aggregatets = function (ts, FUN = previoustick, on = "minutes", k = 1, weights = NULL,dropna=F)
+period.apply2 = function (x, INDEX, FUN2, ...)
{
- #Valid values for the argument "on" include: secs (seconds), seconds, mins (minutes), minutes,hours, days, weeks.
+ x <- try.xts(x, error = FALSE)
+ FUN <- match.fun(FUN2)
+ xx <- sapply(1:(length(INDEX) - 1), function(y) {
+ FUN(x[(INDEX[y] + 1):INDEX[y + 1]], ...)
+ })
+ reclass(xx, x[INDEX])
+}
+
+
+## AGGREGATION;
+aggregatets = function (ts, FUN = "previoustick", on = "minutes", k = 1, weights = NULL,dropna=FALSE)
+{
+ makethispartbetter = ((!is.null(weights))| on=="days"|on=="weeks"| (FUN!="previoustick")|dropna);
+ if(makethispartbetter) {
+
+ FUN = match.fun(FUN);
+
if (is.null(weights)) {
- ep = endpoints(ts, on, k);
- ts2 = period.apply(ts, ep, FUN);
+ ep = endpoints(ts, on, k)
+ if(dim(ts)[2]==1){ ts2 = period.apply(ts, ep, FUN) }
+ if(dim(ts)[2]>1){ ts2 = xts(apply(ts,2,FUN=period.apply2,FUN2=FUN,INDEX=ep),order.by=index(ts)[ep],)}
}
if (!is.null(weights)) {
tsb = cbind(ts, weights)
ep = endpoints(tsb, on, k)
- ts2 = period.apply(tsb, ep, FUN = weightedaverage)
+ ts2 = period.apply(tsb, ep, FUN = match.fun(weightedaverage) )
}
if (on == "minutes" | on == "mins" | on == "secs" | on ==
"seconds") {
@@ -34,143 +50,195 @@
secs = k
}
a = .index(ts2) + (secs - .index(ts2)%%secs)
- ts3 = .xts(ts2, a)
+ ts3 = .xts(ts2, a,tz="GMT")
}
if (on == "hours") {
secs = 3600
a = .index(ts2) + (secs - .index(ts2)%%secs)
- ts3 = .xts(ts2, a)
+ ts3 = .xts(ts2, a,tz="GMT")
}
if (on == "days") {
secs = 24 * 3600
a = .index(ts2) + (secs - .index(ts2)%%secs) - (24 *
3600)
- ts3 = .xts(ts2, a)
+ ts3 = .xts(ts2, a,tz="GMT")
}
if (on == "weeks") {
secs = 24 * 3600 * 7
a = (.index(ts2) + (secs - (.index(ts2) + (3L * 86400L))%%secs)) -
(24 * 3600)
- ts3 = .xts(ts2, a)
+ ts3 = .xts(ts2, a,tz="GMT")
}
- if(!dropna){
- if(on !="weeks"|on!="days"){
- if(on=="secs"|on=="seconds"){tby = "s"}
- if(on=="mins"|on=="minutes"){tby = "min"}
- if (on == "hours"){tby = "h"}
- by = paste(k,tby,sep=" ");
- allindex = as.timeDate(seq(start(ts3),end(ts3),by=by));
- xx = xts(rep("1",length(allindex)),order.by=allindex);
- ts3 = merge(ts3,xx)[,1];
-
- }#currently for weeks and days, na are still dropped
- }#end dropna if
+ if (!dropna) {
+ if (on != "weeks" | on != "days") {
+ if (on == "secs" | on == "seconds") {
+ tby = "s"
+ }
+ if (on == "mins" | on == "minutes") {
+ tby = "min"
+ }
+ if (on == "hours") {
+ tby = "h"
+ }
+ by = paste(k, tby, sep = " ")
+ allindex = as.timeDate(seq(start(ts3), end(ts3),
+ by = by))
+ xx = xts(rep("1", length(allindex)), order.by = allindex)
+ ts3 = merge(ts3, xx)[, (1:dim(ts)[2])]
+ }
+ }
+
index(ts3) = as.timeDate(index(ts3));
-
- return(ts3)
+ return(ts3);
+ }
+
+ if(!makethispartbetter){
+ if (on == "secs" | on == "seconds") { secs = k; tby = paste(k,"sec",sep=" ")}
+ if (on == "mins" | on == "minutes") { secs = 60*k; tby = paste(60*k,"sec",sep=" ")}
+ if (on == "hours") {secs = 3600*k; tby = paste(3600*k,"sec",sep=" ")}
+
+ FUN = match.fun(FUN);
+
+ g = seq(start(ts), end(ts), by = tby);
+ rawg = as.numeric(as.POSIXct(g,tz="GMT"));
+ newg = rawg + (secs - rawg%%secs);
+ g = as.timeDate(as.POSIXct(newg,origin="1970-01-01",tz="GMT"));
+ ts3 = na.locf(merge(ts, zoo(, g)))[as.POSIXct(g,tz="GMT")];
+ return(ts3)
+ }
}
#PRICE (specificity: opening price and previoustick)
-agg_price = function(ts,FUN = previoustick,on="minutes",k=1){
-ts = dataformatc(ts);
-##Return new timeseries as xts object where
-##first observation is always the opening price
-##subsequent observations are the closing prices over the interval with endpoint the timestamp of the result
-##on indicates the type of period to aggregate over
-##k indicates the number of periods to aggregate over
- ts2 = aggregatets(ts, FUN=previoustick, on, k);
+aggregatePrice = function (ts, FUN = "previoustick", on = "minutes", k = 1,marketopen="09:30:00",marketclose = "16:00:00")
+{
+ ts = dataformatc(ts)
+ ts2 = aggregatets(ts, FUN = FUN, on, k)
+ date = strsplit(as.character(index(ts)), " ")[[1]][1]
- #adjustmet correct opening price
- date = strsplit(as.character(index(ts))," ")[[1]][1]
- realopen = "09:30:00";
- a = as.timeDate(paste(date,realopen));
- b = xts(ts[1],a);
- ts3 = c(b,ts2);
+ #open
+ a = as.timeDate(paste(date, marketopen))
+ b = as.xts(matrix(as.numeric(ts[1]),nrow=1), a)
+ ts3 = c(b, ts2)
- ##adjustment for correct closing price:
- realclose = "16:00:00";
- aa = as.timeDate(paste(date,realclose));
- condition = index(ts3) < aa;
- ts3=ts3[condition];
- bb = xts(ts[length(ts)],aa);
- ts3 = c(ts3,bb);
+ #close
+ aa = as.timeDate(paste(date, marketclose))
+ condition = index(ts3) < aa
+ ts3 = ts3[condition]
+ bb = as.xts(matrix(as.numeric(last(ts)),nrow=1), aa)
+ ts3 = c(ts3, bb)
- return(ts3);
+ return(ts3)
}
#VOLUME: (specificity: always sum)
-agg_volume = function (ts, FUN = sumN, on = "minutes", k = 5, includeopen = FALSE)
+agg_volume= function(ts, FUN = "sumN", on = "minutes", k = 5, includeopen = FALSE,marketopen="09:30:00",marketclose="16:00:00")
{
ts = dataformatc(ts)
if (!includeopen) {
- ts3 = aggregatets(ts, FUN = sumN, on, k)
+ ts3 = aggregatets(ts, FUN = FUN, on, k)
}
if (includeopen) {
- ts2 = aggregatets(ts, FUN = sumN, on, k)
+ ts2 = aggregatets(ts, FUN = FUN, on, k)
date = strsplit(as.character(index(ts)), " ")[[1]][1]
- realopen = "09:30:00"
- a = as.timeDate(paste(date, realopen))
- b = xts(as.numeric(ts[1]), a)
+ a = as.timeDate(paste(date, marketopen))
+ b = as.xts(matrix(as.numeric(ts[1]),nrow=1), a)
ts3 = c(b, ts2)
- }
- #new
- realclose = "16:00:00";
- aa = as.timeDate(paste(date, realclose));
- condition = index(ts3) < aa;
- ts4 = ts3[condition];
- lastinterval = sum(as.numeric(ts3[!condition]));
- bb = xts(lastinterval, aa)
- ts4 = c(ts4, bb)
+ }
+
+ aa = as.timeDate(paste(date, marketclose))
+ condition = index(ts3) < aa
+ ts4 = ts3[condition]
+
+ lastinterval = matrix(colSums(matrix(ts3[!condition],ncol=dim(ts3)[2])),ncol=dim(ts3)[2])
+ bb = xts(lastinterval, aa)
+ ts4 = c(ts4, bb)
+
return(ts4)
}
-###TRADES AGGREGATION:
-agg_trades = function(tdata,on="minutes",k=5){
-tdata = dataformatc(tdata);
+aggregateTrades = function (tdata, on = "minutes", k = 5,marketopen="09:30:00",marketclose="16:00:00")
+{
+ tdata = dataformatc(tdata)
+ tdatacheck(tdata)
## Aggregates an entire trades xts object (tdata) over a "k"-minute interval.
## Returned xts-object contains: SYMBOL,EX,PRICE,SIZE.
## Variables COND, CORR, G127 are dropped because aggregating them makes no sense.
## NOTE: first observation (opening price) always included.
- PRICE = agg_price(tdata$PRICE,on=on,k=k);
- SIZE = agg_volume(tdata$SIZE,on=on,k=k,includeopen=TRUE);
- EX = c(tdata$EX[1],aggregatets(tdata$EX, FUN=previoustick, on=on, k=k));
- SYMBOL = rep(tdata$SYMBOL[1],length(PRICE));
- all = data.frame(SYMBOL,EX,PRICE,SIZE);
- colnames(all) =c("SYMBOL","EX","PRICE","SIZE");
- ts = xts(all,index(SIZE));
+ selection = colnames(tdata)%in%c("PRICE","EX","SYMBOL");
+ tdata1 = tdata[,selection];
+ PRICE = aggregatePrice(tdata$PRICE,on=on,k=k,marketopen=marketopen,marketclose=marketclose);
+ SIZE = agg_volume(tdata$SIZE, on = on, k = k, includeopen = TRUE,marketopen=marketopen,marketclose=marketclose)
- return(ts);
+ EX = rep(tdata$EX[1], length(PRICE));
+ SYMBOL = rep(tdata$SYMBOL[1], length(PRICE));
+
+ all = data.frame(SYMBOL, EX, PRICE, SIZE);
+ colnames(all) = c("SYMBOL", "EX", "PRICE", "SIZE");
+ ts = xts(all, index(SIZE));
+ return(ts);
}
+###QUOTES AGGREGATION:
+aggregateQuotes = function(qdata,on="minutes",k=5,marketopen="09:30:00",marketclose="16:00:00"){
+ qdata = dataformatc(qdata);
+ qdatacheck(qdata);
-###QUOTES AGGREGATION:
-agg_quotes = function(qdata,on="minutes",k=5){
-qdata = dataformatc(qdata);
## Aggregates an entire quotes xts object (qdata) object over a "k"-minute interval.
## Returned xts-object contains: SYMBOL,EX,BID,BIDSIZ,OFR,OFRSIZ.
## Variable MODE is dropped because aggregation makes no sense.
## "includeopen" determines whether to include the exact opening quotes.
-
- BID = agg_price(qdata$BID,on=on,k=k);
- OFR = agg_price(qdata$OFR,on=on,k=k);
- BIDSIZ = agg_volume(qdata$BIDSIZ,on=on,k=k,includeopen=TRUE);
- OFRSIZ = agg_volume(qdata$OFRSIZ,on=on,k=k,includeopen=TRUE);
+ BIDOFR = aggregatePrice(cbind(qdata$BID,qdata$OFR),on=on,k=k,marketopen=marketopen,marketclose=marketclose);
+ BIDOFRSIZ = agg_volume(cbind(qdata$BIDSIZ,qdata$OFRSIZ),on=on,k=k,includeopen=TRUE,marketopen=marketopen,marketclose=marketclose);
- EX = agg_price(qdata$EX,on=on,k=k)
- SYMBOL = rep(qdata$SYMBOL[1],length(BIDSIZ));
+ EX = rep(qdata$EX[1],dim(BIDOFR)[1]);
+ SYMBOL = rep(qdata$SYMBOL[1],dim(BIDOFR)[1]);
- all = data.frame(SYMBOL,EX,BID,BIDSIZ,OFR,OFRSIZ);
+ all = data.frame(SYMBOL,EX,BIDOFR[,1],BIDOFRSIZ[,1],BIDOFR[,2],BIDOFRSIZ[,2]);
colnames(all) =c("SYMBOL","EX","BID","BIDSIZ","OFR","OFRSIZ");
- ts = xts(all,index(BIDSIZ));
-
+ ts = xts(all,index(BIDOFR));
return(ts);
}
##LIQUIDITY AGGREGATION:
##Just combine aggregation functions and spot liquidity functions!
+
+###### refresh time ########
+refreshTime = function(pdata){
+dim = length(pdata);
+lengths = rep(0,dim+1);
+ for(i in 1:dim){
+ lengths[i+1] = length(pdata[[i]]);
+ }
+ minl = min(lengths[(2:(dim+1))]); #number of obs for stock with least observations
+ lengths = cumsum(lengths);
+ alltimes = rep(0,lengths[dim+1]); #all timestamps in 1 vector
+ for(i in 1:dim){
+ alltimes[(lengths[i]+1):lengths[i+1]] = as.numeric(as.POSIXct(index(pdata[[i]]),tz="GMT"));
+ }
+
+ #get refresh points via C:
+ x = .C("refreshpoints",as.integer(alltimes),as.integer(lengths),
+ as.integer(rep(0,minl)),as.integer(dim),
+ as.integer(0),as.integer(rep(0,minl*dim)),as.integer(minl));
+
+ #get matrix with "refresh prices"
+ newlength=x[[5]];
+ pmatrix = matrix(ncol=dim,nrow=newlength);
+
+ for(i in 1:dim){
+ selection = x[[6]][((i-1)*minl+1):(i*minl)];
+ pmatrix[,i] = pdata[[i]][ selection[1:newlength] ];
+ }
+
+ time = as.timeDate(as.POSIXct(x[[3]][1:newlength],origin="1970-01-01",tz="GMT"));
+
+ resmatrix = xts(pmatrix,order.by=time)
+ return(resmatrix);
+}
+
Modified: pkg/RTAQ/R/cleanupfunctions.R
===================================================================
--- pkg/RTAQ/R/cleanupfunctions.R 2010-11-21 16:32:24 UTC (rev 460)
+++ pkg/RTAQ/R/cleanupfunctions.R 2010-11-22 12:04:35 UTC (rev 461)
@@ -10,7 +10,7 @@
########## DATA CLEAN-UP: FOR ALL DATA #####################
####FUNCTION TO FILTER EXCHANGE HOURS ONLY: ExchangeHoursOnly
-ExchangeHoursOnly = function(data, daybegin = "09:30:00",dayend="16:00:00")
+exchangeHoursOnly = function(data, daybegin = "09:30:00",dayend="16:00:00")
{
data = dataformatc(data);
# a function to excerpt data within exchange trading hours
@@ -37,23 +37,25 @@
}
-nozeroprices = function(tdata){
+noZeroPrices = function(tdata){
tdata = dataformatc(tdata);
+tdatacheck(tdata);
####FUNCTION TO DELETE ZERO PRICES: nozeroprices
filteredts = tdata[as.numeric(tdata$PRICE)!= 0];
return(filteredts);
}
-selectexchange = function(data,exch="N"){
+selectExchange = function(data,exch="N"){
data = dataformatc(data);
###FUNCTION TO SELECT THE OBSERVATIONS OF A SINGLE EXCHANGE: selectexchange
filteredts = data[data$EX==exch];
return(filteredts);
}
-autoselectexchange = function(tdata){
+autoSelectExchangeTrades = function(tdata){
tdata = dataformatc(tdata);
+tdatacheck(tdata);
## AUTOSELECT EXCHANGE WITH HIGHEST NUMBER OF SHARES TRADED (for trades) ON:
#function returns ts with obs of only 1 exchange
#searches exchange with a maximum on the variable "SIZE"
@@ -94,8 +96,9 @@
#}
#zivot
-salescond <- function (tdata)
+salesCondition <- function (tdata)
{
+tdatacheck(tdata);
filteredts = tdata[tdata$COND == "0" | tdata$COND == "E" |
tdata$COND == "F" | tdata$COND == "" | tdata$COND == "@F"]
return(filteredts)
@@ -129,8 +132,9 @@
return(b);
}
-mergesametimestamp = function(tdata,selection="median"){
+mergeTradesSameTimestamp = function(tdata,selection="median"){
tdata = dataformatc(tdata);
+tdatacheck(tdata);
#find end points:
ep = endpoints(tdata,"secs");
@@ -151,9 +155,11 @@
return(tdata2)
}
-rmtradeoutliers = function(tdata,qdata){
+rmTradeOutliers = function(tdata,qdata){
tdata = dataformatc(tdata);
qdata = dataformatc(qdata);
+qdatacheck(qdata);
+tdatacheck(tdata);
##Function to delete entries with prices that are above the ask plus the bid-ask
##spread. Similar for entries with prices below the bid minus the bid-ask
@@ -167,23 +173,25 @@
upper = offer+spread;
lower = bid-spread;
- tdata[(price<upper) & (price>lower)];
+ tdata = tdata[(price<upper) & (price>lower)];
return(tdata);
}
################# QUOTE SPECIFIC FUNCTIONS: #################
-nozeroquotes = function(qdata){
+noZeroQuotes = function(qdata){
qdata = dataformatc(qdata);
+qdatacheck(qdata);
####FUNCTION TO DELETE ZERO QUOTES: nozeroquotes
filteredts = qdata[as.numeric(qdata$BID)!= 0& as.numeric(qdata$OFR)!= 0];
return(filteredts);
}
-autoselectexchangeq = function(qdata){
+autoSelectExchangeQuotes = function(qdata){
qdata = dataformatc(qdata);
+qdatacheck(qdata);
####Autoselect exchange with highest value for (bidsize+offersize)
nobs=c();
exchanges = c("Q","A","P","B","C","N","D","X","I","M","W","Z");
@@ -215,8 +223,9 @@
}
-mergequotessametimestamp = function(qdata,selection="median"){ ##FAST
+mergeQuotesSameTimestamp = function(qdata,selection="median"){ ##FAST
qdata = dataformatc(qdata);
+qdatacheck(qdata);
condition=selection=="median"|selection=="maxvolume"|selection=="weightedaverage";
if(!condition){print(paste("WARNING:The result will be corrupted. Check whether",selection,"is an existing option for the attribute selection."))}
@@ -257,15 +266,17 @@
}
-rmnegspread = function(qdata){
+rmNegativeSpread = function(qdata){
qdata = dataformatc(qdata);
+qdatacheck(qdata);
##function to remove observations with negative spread
condition = as.numeric(qdata$OFR)>as.numeric(qdata$BID);
qdata[condition];
}
-rmlargespread = function(qdata,maxi=50){
+rmLargeSpread = function(qdata,maxi=50){
+qdatacheck(qdata);
##function to remove observations with a spread larger than 50 times the median spread that day
###WATCH OUT: works only correct if supplied input data consists of 1 day...
spread = as.numeric(qdata$OFR)-as.numeric(qdata$BID);
@@ -273,9 +284,10 @@
return(qdata[condition])
}
-rmoutliers = function (qdata, maxi = 10, window = 50, type = "advanced")
+rmOutliers = function (qdata, maxi = 10, window = 50, type = "advanced")
{
qdata = dataformatc(qdata);
+qdatacheck(qdata);
##function to remove entries for which the mid-quote deviated by more than 10 median absolute deviations
##from a rolling centered median (excluding the observation under consideration) of 50 observations if type = "standard".
@@ -288,7 +300,6 @@
##3. Rolling median of the previous "window" observations
##NOTE: Median Absolute deviation chosen contrary to Barndorff-Nielsen et al.
- print("NOTE: This function is only useful for quotes NOT for trades")
window = floor(window/2) * 2
condition = c();
halfwindow = window/2;
@@ -357,6 +368,7 @@
###zivot
correctedTrades <- function (tdata)
{
+tdatacheck(tdata);
filteredts = tdata[tdata$CR == " 0"]
return(filteredts)
}
Modified: pkg/RTAQ/R/convert_to_RData.R
===================================================================
--- pkg/RTAQ/R/convert_to_RData.R 2010-11-21 16:32:24 UTC (rev 460)
+++ pkg/RTAQ/R/convert_to_RData.R 2010-11-22 12:04:35 UTC (rev 461)
@@ -1,5 +1,5 @@
-##########HELPFUNCTION####
-readdata = function(path=NULL, extention="txt",header=F,dims=0){
+########## HELPFUNCTION ####
+readdata = function(path=NULL, extention="txt",header=FALSE,dims=0){
#extention should either be "txt" or "csv"
if(!(extention=="txt"|extention=="csv")){print("Please select a supported extention")}
colnames = rep("x",dims);
@@ -29,7 +29,7 @@
}
############################
-convert = function(from,to,datasource,datadestination,trades=TRUE,quotes=TRUE,ticker,dir=F,extention="txt",header=F,tradecolnames=NULL,quotecolnames=NULL,format="%m/%d/%Y %H:%M:%S"){
+convert = function(from,to,datasource,datadestination,trades=TRUE,quotes=TRUE,ticker,dir=FALSE,extention="txt",header=FALSE,tradecolnames=NULL,quotecolnames=NULL,format="%m/%d/%Y %H:%M:%S"){
dates = timeSequence(from,to, format = "%Y-%m-%d", FinCenter = "GMT")
dates = dates[isBizday(dates, holidays = holidayNYSE(2004:2010))];
missingt=missingq=matrix(ncol=2,nrow=0);
@@ -51,8 +51,10 @@
}
convert_trades = function (datasource, datadestination, ticker, extention = "txt",
- header = F, tradecolnames = NULL, format = "%m/%d/%Y %H:%M:%S")
-{
+ header = FALSE, tradecolnames = NULL, format = "%m/%d/%Y %H:%M:%S")
+{
+ missingt=matrix(ncol=2,nrow=0);
+
setwd(datasource)
adjtime = function(z) {
zz = unlist(strsplit(z, ":"))
@@ -107,8 +109,10 @@
convert_quotes = function (datasource, datadestination, ticker, extention = "txt",
- header = F, quotecolnames = NULL, format = "%m/%d/%Y %H:%M:%S")
+ header = FALSE, quotecolnames = NULL, format = "%m/%d/%Y %H:%M:%S")
{
+ missingq=matrix(ncol=2,nrow=0);
+
setwd(datasource)
adjtime = function(z) {
zz = unlist(strsplit(z, ":"))
Modified: pkg/RTAQ/R/manipulation.R
===================================================================
--- pkg/RTAQ/R/manipulation.R 2010-11-21 16:32:24 UTC (rev 460)
+++ pkg/RTAQ/R/manipulation.R 2010-11-22 12:04:35 UTC (rev 461)
@@ -1,5 +1,5 @@
#MANIPULATION FUNCTIONS:
-TAQload = function(tickers,from,to,trades=TRUE,quotes=FALSE,datasource=NULL,variables=NULL){
+TAQLoad = function(tickers,from,to,trades=TRUE,quotes=FALSE,datasource=NULL,variables=NULL){
if( is.null(datasource)){print("Please provide the argument 'datasource' to indicate in which folder your data is stored")}
if(!(trades"es)){#not both trades and quotes demanded
@@ -25,13 +25,15 @@
dates = timeSequence(as.character(from),as.character(to), format = "%Y-%m-%d", FinCenter = "GMT")
dates = dates[isBizday(dates, holidays = holidayNYSE(2004:2010))];
- if(trades){
+ if(trades){ tdata=NULL;
for(i in 1:length(dates)){
datasourcex = paste(datasource,"\\",dates[i],sep="");
filename = paste(datasourcex,"\\",ticker,"_trades.RData",sep="");
ifmissingname = paste(datasourcex,"\\missing_",ticker,".RData",sep="");
- if(file.exists(ifmissingname)){stop(paste("no trades available on ",dates[i],sep=""))}
+
+ if(file.exists(ifmissingname)){stop(paste("No trades available on ",dates[i],sep=""))}
+ if(!file.exists(filename)){stop(paste("The file ",filename," does not exist. Please read the documentation.",sep=""))}
if(file.exists(ifmissingname)==FALSE){
load(filename);
if(i==1) {
@@ -53,13 +55,14 @@
}
}
- if(quotes){
+ if(quotes){ qdata=NULL;
for(i in 1:length(dates)){
datasourcex = paste(datasource,"\\",dates[i],sep="");
filename = paste(datasourcex,"\\",ticker,"_quotes.RData",sep="");
ifmissingname = paste(datasourcex,"\\missingquotes_",ticker,".RData",sep="");
if(file.exists(ifmissingname)){stop(paste("no quotes available on ",dates[i],sep=""))}
+ if(!file.exists(filename)){stop(paste("The file ",filename," does not exist. Please read the documentation.",sep=""))}
if(file.exists(ifmissingname)==FALSE){
load(filename);
@@ -88,9 +91,12 @@
}
-matchtq = function(tdata,qdata,adjustment=2){ ##FAST VERSION
+matchTradesQuotes = function(tdata,qdata,adjustment=2){ ##FAST VERSION
tdata = dataformatc(tdata);
qdata = dataformatc(qdata);
+qdatacheck(qdata);
+tdatacheck(tdata);
+
tt = dim(tdata)[2];
index(qdata) = index(qdata) + adjustment;
@@ -119,43 +125,46 @@
return(merged)
}
-matchtq_old = function(tdata,qdata,adjustment=2){ ##FAST VERSION
-qdata = dataformatc(qdata);
-tdata = dataformatc(tdata);
+#matchtq_old = function(tdata,qdata,adjustment=2){ ##FAST VERSION
+#qdata = dataformatc(qdata);
+#tdata = dataformatc(tdata);
+#
+# tt = dim(tdata)[2];
+# index(qdata) = index(qdata) + adjustment;
+#
+# #merge:
+# counter = xts(as.character(1:dim(qdata)[1]),order.by=index(qdata))#an integer for every quote
+# merged = cbind(qdata,counter);
+# merged = merge(tdata,merged);
+#
+# ##fill NA's:
+# merged[,((tt+1):dim(merged)[2])] = na.locf(as.zoo(merged[,((tt+1):dim(merged)[2])]), na.rm=FALSE);
+#
+# #Select trades:
+# merged = merged[index(tdata)];
+#
+# #Remove duplicated quotes:
+# merged = merged[!duplicated(merged[,dim(merged)[2]])];
+#
+# #return usefull parts:
+# merged = merged[,c((1:tt),((tt+3):(dim(merged)[2]-1)))];
+#
+# return(merged)
+#}
- tt = dim(tdata)[2];
- index(qdata) = index(qdata) + adjustment;
-
- #merge:
- counter = xts(as.character(1:dim(qdata)[1]),order.by=index(qdata))#an integer for every quote
- merged = cbind(qdata,counter);
- merged = merge(tdata,merged);
-
- ##fill NA's:
- merged[,((tt+1):dim(merged)[2])] = na.locf(as.zoo(merged[,((tt+1):dim(merged)[2])]), na.rm=FALSE);
-
- #Select trades:
- merged = merged[index(tdata)];
-
- #Remove duplicated quotes:
- merged = merged[!duplicated(merged[,dim(merged)[2]])];
- #return usefull parts:
- merged = merged[,c((1:tt),((tt+3):(dim(merged)[2]-1)))];
- return(merged)
-}
+getTradeDirection = function(tqdata,...){
+ if(hasArg(data)){ tqdata = data; rm(data) }
+ tqdata = dataformatc(tqdata);
+ tqdatacheck(tqdata);
-
-
-gettradedir = function(data){
-data = dataformatc(data);
##Function returns a vector with the inferred trade direction:
##NOTE: the value of the first (and second) observation should be ignored if price=midpoint for the first (second) observation.
- bid = as.numeric(data$BID);
- offer = as.numeric(data$OFR);
+ bid = as.numeric(tqdata$BID);
+ offer = as.numeric(tqdata$OFR);
midpoints = (bid + offer)/2;
- price = as.numeric(data$PRICE);
+ price = as.numeric(tqdata$PRICE);
buy1 = price > midpoints; #definitely a buy
equal = price == midpoints;
@@ -171,7 +180,6 @@
return(buy);
}
-
es = function(data){
data = dataformatc(data);
#returns the effective spread as xts object
@@ -185,7 +193,6 @@
return(es);
}
-
rs = function(data,tdata,qdata){
data = dataformatc(data);
qdata = dataformatc(qdata);
@@ -306,10 +313,10 @@
return(prs_xts);
}
-price_impact = function(data){
+price_impact = function(data,tdata,qdata){
data = dataformatc(data);
#returns the Price impact as xts object
- rs = rs(data);
+ rs = rs(data,tdata,qdata);
es = es(data);
pi = (es-rs)/2;
@@ -317,10 +324,10 @@
return(pi_xts);
}
-prop_price_impact = function(data){
+prop_price_impact = function(data,tdata,qdata){
data = dataformatc(data);
#returns the Proportional Price impact as xts object
- rs = rs(data);
+ rs = rs(data,tdata,qdata);
es = es(data);
bid = as.numeric(data$BID);
offer = as.numeric(data$OFR);
@@ -355,7 +362,7 @@
pts = (d*(price-midpoints))/midpoints;
pts_xts = xts(pts,order.by=index(data));
- return(ts);
+ return(pts_xts);
}
p_return_sqr = function(data){
@@ -439,8 +446,6 @@
return(logqslope_xts);
}
-
-
mq_return_sqr = function(data){
data = dataformatc(data);
#returns midquote squared returns slope as xts object
@@ -463,76 +468,39 @@
return(mq_return_abs_xts);
}
-liquidity <- function (data, tdata, qdata)
-{
-data = dataformatc(data);
-qdata = dataformatc(qdata);
-tdata = dataformatc(tdata);
-##Function computes many liquidity measures and returns an xts object containing them
+tqLiquidity <- function(tqdata=NULL,tdata=NULL,qdata=NULL,type,...) {
+ if(hasArg(data)){ tqdata = data }
+ if(!is.null(tqdata)){tqdatacheck(tqdata)}
+ if(!is.null(qdata)){qdatacheck(qdata)}
+ if(!is.null(tdata)){tdatacheck(tdata)}
+
+ result=switch(type,
+ es = es(tqdata),
+ rs = rs(tqdata,tdata,qdata),
+ value_trade = value_trade(tqdata),
+ signed_value_trade = signed_value_trade(tqdata),
+ di_diff = di_diff(tqdata),
+ pes = pes(tqdata),
+ prs = prs(tqdata,tdata,qdata),
+ price_impact = price_impact(tqdata,tdata,qdata),
+ prop_price_impact = prop_price_impact(tqdata,tdata,qdata),
+ tspread =tspread(tqdata),
+ pts = pts(tqdata),
+ p_return_sqr = p_return_sqr(tqdata),
+ p_return_abs = p_return_abs(tqdata),
+ qs = qs(tqdata),
+ pqs = pqs(tqdata),
+ logqs = logqs(tqdata),
+ logsize = logsize(tqdata),
+ qslope = qslope(tqdata),
+ logqslope = logqslope(tqdata),
+ mq_return_sqr = mq_return_sqr(tqdata),
+ mq_return_abs = mq_return_abs(tqdata),
+ signed_trade_size = signed_trade_size(tqdata)
+ )
-##First part solves the problem that unequal number of obs (in data and data2) is possible when computing the RS
-
- data2 = matchtq(tdata, qdata, adjustment = 300)
- if (dim(data2)[1] > dim(data)[1]) {
- condition = as.vector(as.character(index(data2))) %in%
- as.vector(as.character(index(data)))
- data2 = subset(data2, condition, select = 1:(dim(data)[2]))
- data = subset(data, as.vector(as.character(index(data))) %in%
- as.vector(as.character(index(data2))), select = 1:(dim(data2)[2]))
- }
- if (dim(data2)[1] < dim(data)[1]) {
- condition = as.vector(as.character(index(data))) %in%
- as.vector(as.character(index(data2)))
- data = subset(data, condition, select = 1:(dim(data2)[2]))
- data2 = subset(data2, as.vector(as.character(index(data2))) %in%
- as.vector(as.character(index(data))), select = 1:(dim(data)[2]))
- }
- bid = as.numeric(data$BID)
- offer = as.numeric(data$OFR)
- midpoints = (bid + offer)/2
- price = as.numeric(data$PRICE)
- size = as.numeric(data$SIZE)
- d = gettradedir(data)
- bidsize = as.numeric(data$BIDSIZ)
- offersize = as.numeric(data$OFRSIZ)
- return = c(0, log(price[2:length(price)]) - log(price[1:length(price) -
- 1]))
- mq_return = mq_return(data)
- midpoints2 = (as.numeric(data2$BID) + as.numeric(data2$OFR))/2
- es = 2 * d * (price - midpoints)
- rs = 2 * d * (price - midpoints2)
- value_trade = price * size
- signed_value_trade = d * price * size
- signed_trade_size = d * size
- di_diff = (d * (offersize - bidsize))/(offersize + bidsize)
- di_div = (offersize/bidsize)^d
- pes = 100 * es/midpoints
- prs = 100 * rs/midpoints
- price_impact = (es - rs)/2
- prop_price_impact = (100 * price_impact)/midpoints
- tspread = d * (price - midpoints)
- pts = tspread/midpoints
- p_return_sqr = return^2
- p_return_abs = abs(return)
- qs = offer - bid
- pqs = 100 * qs/midpoints
- logqs = log(offer/bid)
- logsize = log(bidsize) + log(offersize)
- qslope = qs/logsize
- logqslope = logqs/logsize
- mq_return_sqr = mq_return^2
- mq_return_abs = abs(mq_return)
- liquid = cbind(es, rs, value_trade, signed_value_trade, di_diff,
- di_div, pes, prs, price_impact, prop_price_impact, tspread,
- pts, p_return_sqr, p_return_abs, qs, pqs, logqs, logsize,
- qslope, logqslope, mq_return_sqr, mq_return_abs)
- names = c("es", "rs", "value_trade", "signed_value_trade",
- "di_diff", "di_div", "pes", "prs", "price_impact", "prop_price_impact",
- "tspread", "pts", "p_return_sqr", "p_return_abs", "qs", "pqs",
- "logqs", "logsize", "qslope", "logqslope", "mq_return_sqr",
- "mq_return_abs")
- colnames(liquid) = names
- return(liquid)
+ colnames(result)=type;
+ return(result);
}
##help_function:
@@ -550,15 +518,24 @@
###Zivot:
-p_return <- function (data)
-{
- price = as.numeric(data$PRICE)
- log.return = c(0, log(price[2:length(price)]) - log(price[1:length(price) -
- 1]))
- return_xts = xts(log.return, order.by = index(data))
- return(return_xts)
+makeReturns = function(ts){
+ l = dim(ts)[1];
+ x = matrix(as.numeric(ts),nrow=l);
+ x[(2:l),] = log(x[(2:l),]) - log(x[(1:(l-1)),])
+ x[1,] = rep(0,dim(ts)[2]);
+ x = xts(x,order.by=index(ts));
+ return(x);
}
+#p_return <- function (data)
+#{
+# price = as.numeric(data$PRICE)
+# log.return = c(0, log(price[2:length(price)]) - log(price[1:length(price) -
+# 1]))
+# return_xts = xts(log.return, order.by = index(data))
+# return(return_xts)
+#}
+
p_return_abs <- function (data)
{
price = as.numeric(data$PRICE)
Added: pkg/RTAQ/R/newfunctions_names.R
===================================================================
--- pkg/RTAQ/R/newfunctions_names.R (rev 0)
+++ pkg/RTAQ/R/newfunctions_names.R 2010-11-22 12:04:35 UTC (rev 461)
@@ -0,0 +1,23 @@
+agg_price = function(...){aggregatePrice(...)};
+agg_quotes = function(...){aggregateQuotes(...)};
+agg_trades = function(...){aggregateTrades(...)};
+autoselectexchange = function(...){autoSelectExchangeTrades(...)};
+autoselectexchangeq = function(...){autoSelectExchangeQuotes(...)};
+ExchangeHoursOnly = function(...){exchangeHoursOnly(...)};
+gettradedir = function(...){getTradeDirection(...)};
+matchtq = function(...){matchTradesQuotes(...)};
+mergequotessametimestamp = function(...){mergeQuotesSameTimestamp(...)};
+mergesametimestamp = function(...){mergeTradesSameTimestamp(...)};
+nozeroprices = function(...){noZeroPrices(...)};
+nozeroquotes = function(...){noZeroQuotes(...)};
+quotescleanup = function(...){quotesCleanup(...)};
+rmlargespread = function(...){rmLargeSpread(...)};
[TRUNCATED]
To get the complete diff run:
svnlook diff /svnroot/blotter -r 461
More information about the Blotter-commits
mailing list