[Blotter-commits] r314 - pkg/RTAQ/R
noreply at r-forge.r-project.org
noreply at r-forge.r-project.org
Sun Mar 28 21:02:09 CEST 2010
Author: jonathan
Date: 2010-03-28 21:02:08 +0200 (Sun, 28 Mar 2010)
New Revision: 314
Modified:
pkg/RTAQ/R/cleanupfunctions.R
pkg/RTAQ/R/manipulation.R
pkg/RTAQ/R/totalcleanup.R
pkg/RTAQ/R/volatility.R
Log:
code update (with correct/consequent input argument names)
Modified: pkg/RTAQ/R/cleanupfunctions.R
===================================================================
--- pkg/RTAQ/R/cleanupfunctions.R 2010-03-28 18:53:07 UTC (rev 313)
+++ pkg/RTAQ/R/cleanupfunctions.R 2010-03-28 19:02:08 UTC (rev 314)
@@ -10,7 +10,7 @@
########## DATA CLEAN-UP: FOR ALL DATA #####################
####FUNCTION TO FILTER EXCHANGE HOURS ONLY: ExchangeHoursOnly
-ExchangeHoursOnly = function(ts, daybegin = "09:30:00",dayend="16:00:00")
+ExchangeHoursOnly = function(data, daybegin = "09:30:00",dayend="16:00:00")
{
# a function to excerpt data within exchange trading hours
# daybegin and dayend: two characters in the format of "HH:MM:SS",
@@ -18,11 +18,11 @@
# trading day and the closing hour and minute and sec
# of the trading day repectively
- if(!is(ts, "xts"))
- stop("ts must be an xts object")
+ if(!is(data, "xts"))
+ stop("data must be an xts object")
gettime = function(z){unlist(strsplit(as.character(z)," "))[2]};
- times1 = as.matrix(as.vector(as.character(index(ts))));
+ times1 = as.matrix(as.vector(as.character(index(data))));
times = apply(times1,1,gettime);
tdtimes = timeDate(times,format = "%H:%M:%S",FinCenter = "GMT",zone="GMT");
@@ -31,26 +31,25 @@
tddayend = timeDate(dayend,format = "%H:%M:%S",FinCenter = "GMT",zone="GMT");
#select correct observations
- filteredts = ts[tdtimes>=tddaybegin & tdtimes<=tddayend];
+ filteredts = data[tdtimes>=tddaybegin & tdtimes<=tddayend];
return(filteredts);
}
-nozeroprices = function(ts){
+nozeroprices = function(tdata){
####FUNCTION TO DELETE ZERO PRICES: nozeroprices
-filteredts = ts[as.numeric(ts$PRICE)!= 0];
+filteredts = tdata[as.numeric(tdata$PRICE)!= 0];
return(filteredts);
}
-selectexchange = function(ts,exch="N"){
+selectexchange = function(data,exch="N"){
###FUNCTION TO SELECT THE OBSERVATIONS OF A SINGLE EXCHANGE: selectexchange
-filteredts = ts[ts$EX==exch];
+filteredts = data[data$EX==exch];
return(filteredts);
}
-
-autoselectexchange = function(ts){
+autoselectexchange = function(tdata){
## AUTOSELECT EXCHANGE WITH HIGHEST NUMBER OF SHARES TRADED (for trades) ON:
#function returns ts with obs of only 1 exchange
#searches exchange with a maximum on the variable "SIZE"
@@ -60,32 +59,32 @@
exchangenames = c("NASDAQ","AMEX","ARCA","Boston","NSX","NYSE","NASD ADF and TRF","Philadelphia","ISE","Chicago","CBOE","BATS");
- z1 = sum(as.numeric(selectexchange(ts,"Q")$SIZE));
- z2 = sum(as.numeric(selectexchange(ts,"T")$SIZE));
+ z1 = sum(as.numeric(selectexchange(tdata,"Q")$SIZE));
+ z2 = sum(as.numeric(selectexchange(tdata,"T")$SIZE));
z = max(z1,z2);
watchout = z == z2;
nobs = cbind(nobs,z);
for(i in 2:length(exchanges)) {
- z = sum(as.numeric(selectexchange(ts,exchanges[i])$SIZE));
+ z = sum(as.numeric(selectexchange(tdata,exchanges[i])$SIZE));
nobs = cbind(nobs,z);
}
exch = exchanges[max(nobs)==nobs];
- as.character(ts$EX[1]) == exchanges;
+ as.character(tdata$EX[1]) == exchanges;
namechosen = exchangenames[exch==exchanges];
print(paste("The information of the",namechosen,"exchange was collected"));
if(exch=="Q"&watchout){exch="T"}
- filteredts = ts[ts$EX==exch];
+ filteredtdata = tdata[tdata$EX==exch];
}
##### TRADE DATA SPECIFIC FUNCTIONS: ###################################
-salescond = function(ts){
+salescond = function(tdata){
###DELETE ENTRIES WITH AN ABONORMAL SALES CONDITION
-filteredts = ts[ts$COND == "0"|ts$COND == "E"|ts$COND == "F"];
+filteredts = tdata[tdata$COND == "0"|tdata$COND == "E"|tdata$COND == "F"];
return(filteredts);
}
@@ -116,25 +115,25 @@
return(b);
}
-mergesametimestamp = function(ts,selection="median"){
+mergesametimestamp = function(tdata,selection="median"){
#find end points:
- ep = endpoints(ts,"secs");
+ ep = endpoints(tdata,"secs");
#size per second:
- size = period.apply(ts$SIZE,ep,sumN);
+ size = period.apply(tdata$SIZE,ep,sumN);
#price per second:
- if(selection=="median"){price = period.apply(ts$PRICE,ep,medianN)}
- if(selection=="maxvolume"){price = period.apply(cbind(ts$PRICE,ts$SIZE),ep,maxvol)}
- if(selection=="weightedaverage"){price = period.apply(cbind(ts$PRICE,ts$SIZE),ep,waverage)}
+ if(selection=="median"){price = period.apply(tdata$PRICE,ep,medianN)}
+ if(selection=="maxvolume"){price = period.apply(cbind(tdata$PRICE,tdata$SIZE),ep,maxvol)}
+ if(selection=="weightedaverage"){price = period.apply(cbind(tdata$PRICE,tdata$SIZE),ep,waverage)}
##merge everything:
selection = ep[2:length(ep)];
- ts2 = ts[selection];
- ts2$PRICE = price;
- ts2$SIZE = size;
+ tdata2 = tdata[selection];
+ tdata2$PRICE = price;
+ tdata2$SIZE = size;
-return(ts2)
+return(tdata2)
}
rmtradeoutliers = function(tdata,qdata){
@@ -157,21 +156,21 @@
################# QUOTE SPECIFIC FUNCTIONS: #################
-nozeroquotes = function(ts){
+nozeroquotes = function(qdata){
####FUNCTION TO DELETE ZERO QUOTES: nozeroquotes
-filteredts = ts[as.numeric(ts$BID)!= 0& as.numeric(ts$OFFER)!= 0];
+filteredts = qdata[as.numeric(qdata$BID)!= 0& as.numeric(qdata$OFFER)!= 0];
return(filteredts);
}
-autoselectexchangeq = function(ts){
+autoselectexchangeq = function(qdata){
####Autoselect exchange with highest value for (bidsize+offersize)
nobs=c();
exchanges = c("Q","A","P","B","C","N","D","X","I","M","W","Z");
exchangenames = c("NASDAQ","AMEX","ARCA","Boston","NSX","NYSE","NASD ADF and TRF","Philadelphia","ISE","Chicago","CBOE","BATS");
- selected1 = selectexchange(ts,"Q");
- selected2 = selectexchange(ts,"T");
+ selected1 = selectexchange(qdata,"Q");
+ selected2 = selectexchange(qdata,"T");
z1 = sum(as.numeric(selected1$BIDSIZE)+as.numeric(selected1$OFFERSIZE));
z2 = sum(as.numeric(selected2$BIDSIZE)+as.numeric(selected2$OFFERSIZE));
z = max(z1,z2);
@@ -179,7 +178,7 @@
nobs = cbind(nobs,z);
for(i in 2:length(exchanges)) {
- selected = selectexchange(ts,exchanges[i]);
+ selected = selectexchange(qdata,exchanges[i]);
z = sum(as.numeric(selected$BIDSIZE)+as.numeric(selected$OFFERSIZE));
nobs = cbind(nobs,z);
}
@@ -191,42 +190,42 @@
if(exch=="Q"&watchout){exch="T"}
- filteredts = ts[ts$EX==exch];
+ filteredts = qdata[qdata$EX==exch];
return(filteredts);
}
-mergequotessametimestamp = function(ts,selection="median"){ ##FAST
+mergequotessametimestamp = function(qdata,selection="median"){ ##FAST
condition=selection=="median"|selection=="maxvolume"|selection=="weightedaverage";
if(!condition){print(paste("WARNING:The result will be corrupted. Check whether",selection,"is an existing option for the attribute selection."))}
#find end points:
- ep = endpoints(ts,"secs");
+ ep = endpoints(qdata,"secs");
#size per second:
- bidsize = period.apply(ts$BIDSIZE,ep,sumN);
- offersize = period.apply(ts$OFFERSIZE,ep,sumN);
+ bidsize = period.apply(qdata$BIDSIZE,ep,sumN);
+ offersize = period.apply(qdata$OFFERSIZE,ep,sumN);
#median per second:
if(selection=="median"){
- bid = period.apply(ts$BID,ep,medianN);
- offer = period.apply(ts$OFFER,ep,medianN);
+ bid = period.apply(qdata$BID,ep,medianN);
+ offer = period.apply(qdata$OFFER,ep,medianN);
}
#maxvolume per second:
if(selection=="maxvolume"){
- bid = period.apply(cbind(ts$BID,ts$BIDSIZE),ep,maxvol);
- offer = period.apply(cbind(ts$OFFER,ts$OFFERSIZE),ep,maxvol);
+ bid = period.apply(cbind(qdata$BID,qdata$BIDSIZE),ep,maxvol);
+ offer = period.apply(cbind(qdata$OFFER,qdata$OFFERSIZE),ep,maxvol);
}
if(selection=="weightedaverage"){
- bid = period.apply(cbind(ts$BID,ts$BIDSIZE),ep,waverage);
- offer = period.apply(cbind(ts$OFFER,ts$OFFERSIZE),ep,waverage);
+ bid = period.apply(cbind(qdata$BID,qdata$BIDSIZE),ep,waverage);
+ offer = period.apply(cbind(qdata$OFFER,qdata$OFFERSIZE),ep,waverage);
}
##merge everything:
selection = ep[2:length(ep)];
- ts2 = ts[selection];
+ ts2 = qdata[selection];
ts2$BID = bid;
ts2$OFFER = offer;
@@ -237,23 +236,23 @@
}
-rmnegspread = function(ts){
+rmnegspread = function(qdata){
##function to remove observations with negative spread
- condition = as.numeric(ts$OFFER)>as.numeric(ts$BID);
- ts[condition];
+ condition = as.numeric(qdata$OFFER)>as.numeric(qdata$BID);
+ qdata[condition];
}
-rmlargespread = function(ts,maxi=50){
+rmlargespread = function(qdata,maxi=50){
##function to remove observations with a spread larger than 50 times the median spread that day
###WATCH OUT: works only correct if supplied input data consists of 1 day...
- spread = as.numeric(ts$OFFER)-as.numeric(ts$BID);
+ spread = as.numeric(qdata$OFFER)-as.numeric(qdata$BID);
condition = ((maxi*median(spread))>spread);
- return(ts[condition])
+ return(qdata[condition])
}
-rmoutliers = function(ts,maxi=10,window=50,type="advanced"){
+rmoutliers = function(qdata,maxi=10,window=50,type="advanced"){
##function to remove entries for which the mid-quote deviated by more than 10 median absolute deviations
##from a rolling centered median (excluding the observation under consideration) of 50 observations if type = "standard".
@@ -269,9 +268,9 @@
print("NOTE: This function is only useful for quotes NOT for trades");
condition = c();
halfwindow = round(window/2);
- midquote = (as.numeric(ts$BID)+as.numeric(ts$OFFER))/2;
+ midquote = (as.numeric(qdata$BID)+as.numeric(qdata$OFFER))/2;
if(type=="standard"){
- for(i in (halfwindow+1):(dim(ts)[1]-halfwindow)){
+ for(i in (halfwindow+1):(dim(qdata)[1]-halfwindow)){
mid = midquote[i];
vec = c(midquote[(i-halfwindow):(i-1)],midquote[(i+1):(i+halfwindow)]);
mad = mad(vec);
@@ -282,7 +281,7 @@
}
if(type=="advanced"){
- for(i in (window+1):(dim(ts)[1]-window)){
+ for(i in (window+1):(dim(qdata)[1]-window)){
mid = midquote[i];
vec = c(midquote[(i-halfwindow):(i-1)],midquote[(i+1):(i+halfwindow)]);
@@ -303,7 +302,7 @@
}
condition = c(rep(TRUE,halfwindow),condition,rep(TRUE,halfwindow));
- ts[condition];
+ qdata[condition];
}
Modified: pkg/RTAQ/R/manipulation.R
===================================================================
--- pkg/RTAQ/R/manipulation.R 2010-03-28 18:53:07 UTC (rev 313)
+++ pkg/RTAQ/R/manipulation.R 2010-03-28 19:02:08 UTC (rev 314)
@@ -185,7 +185,7 @@
#tdata and qdata, the xts object containing the trades and quotes respectively
##First part solves the problem that unequal number of obs (in data and data2) is possible when computing the RS
- data2 = matchtq(tdata,qdata,300,maxit=50);
+ data2 = matchtq(tdata,qdata,adjustment =300);
if(dim(data2)[1]>dim(data)[1]){
condition = as.vector(as.character(index(data2)))%in%as.vector(as.character(index(data)));
data2 = subset(data2,condition,select=1:(dim(data)[2]));
@@ -445,7 +445,7 @@
##Function computes many liquidity measures and returns an xts object containing them
##First part solves the problem that unequal number of obs (in data and data2) is possible when computing the RS
- data2 = matchtq(tdata,qdata,300,maxit=50);
+ data2 = matchtq(tdata,qdata,adjustment =300);
if(dim(data2)[1]>dim(data)[1]){
condition = as.vector(as.character(index(data2)))%in%as.vector(as.character(index(data)));
data2 = subset(data2,condition,select=1:(dim(data)[2]));
Modified: pkg/RTAQ/R/totalcleanup.R
===================================================================
--- pkg/RTAQ/R/totalcleanup.R 2010-03-28 18:53:07 UTC (rev 313)
+++ pkg/RTAQ/R/totalcleanup.R 2010-03-28 19:02:08 UTC (rev 314)
@@ -1,7 +1,9 @@
#TRADES CLEANUP WRAPPER
-tradescleanup = function(from="2008-01-03",to="2008-01-03",datasource,datadestination,ticker){
+tradescleanup = function(from="2008-01-03",to="2008-01-03",datasource,datadestination,ticker,exchange){
dates = timeSequence(from,to, format = "%Y-%m-%d", FinCenter = "GMT");
dates = dates[isBizday(dates, holidays = holidayNYSE(2004:2010))];
+ exchanges = exchange;
+ rm(exchange);
for(j in 1:length(dates)){
datasourcex = paste(datasource,"\\",dates[j],sep="");
@@ -12,7 +14,7 @@
load(paste(datasourcex,"\\",dataname,sep=""));
if(class(tdata)!="try-error"){
- exchange = exchanges[(exchanges[,1]==ticker[i]),2];
+ exchange = exchanges[exchanges==ticker[i]];
##actual clean-up:
##general:
@@ -78,9 +80,11 @@
}
##QUOTES CLEAN-UP WRAPPER
-quotescleanup = function(from,to,datasource,datadestination,ticker){
+quotescleanup = function(from,to,datasource,datadestination,ticker,exchange){
dates = timeSequence(from,to, format = "%Y-%m-%d", FinCenter = "GMT");
dates = dates[isBizday(dates, holidays = holidayNYSE(2004:2010))];
+ exchanges = exchange;
+ rm(exchange);
for(j in 1:length(dates)){
datasourcex = paste(datasource,"\\",dates[j],sep="");
@@ -91,7 +95,7 @@
load(paste(datasourcex,"\\",dataname,sep=""));
if(class(qdata)!="try-error"){
- exchange = exchanges[(exchanges[,1]==ticker[i]),2];
+ exchange = exchanges[exchanges==ticker[i]];
if(exchange=="Q"){exchange="T"}
##actual clean-up:
Modified: pkg/RTAQ/R/volatility.R
===================================================================
--- pkg/RTAQ/R/volatility.R 2010-03-28 18:53:07 UTC (rev 313)
+++ pkg/RTAQ/R/volatility.R 2010-03-28 19:02:08 UTC (rev 314)
@@ -19,26 +19,28 @@
ROWVar =
-function(R, seasadjR = NULL, wfunction = "HR" , alphaMCD = 0.5, alpha = 0.001)
+function(data, seasadjR = NULL, wfunction = "HR" , alphaMCD = 0.5, alpha = 0.001)
{
require(robustbase)
if (is.null(seasadjR)) {
- seasadjR = R
+ seasadjR = data;
}
- intraT = length(R); N=1;
- MCDcov = as.vector(covMcd( R , use.correction = FALSE )$raw.cov)
+
+ data = as.vector(data); seasadjR = as.vector(seasadjR);
+ intraT = length(data); N=1;
+ MCDcov = as.vector(covMcd( data , use.correction = FALSE )$raw.cov)
outlyingness = seasadjR^2/MCDcov
k = qchisq(p = 1 - alpha, df = N)
outlierindic = outlyingness > k
weights = rep(1, intraT)
if( wfunction == "HR" ){
weights[outlierindic] = 0
- wR = sqrt(weights) * R
+ wR = sqrt(weights) * data
return((conHR(di = N, alpha = alpha) * sum(wR^2))/mean(weights))
}
if( wfunction == "SR" ){
weights[outlierindic] = k/outlyingness[outlierindic]
- wR = sqrt(weights) * R
+ wR = sqrt(weights) * data
return((conhuber(di = N, alpha = alpha) * sum(wR^2))/mean(weights))
}
@@ -48,15 +50,15 @@
#Realized BiPower Variation (RBPVar) (RBPVar)
RBPVar = function(data){
- returns = as.numeric(data);
+ returns = as.vector(as.numeric(data));
n = length(returns);
rbpvar = (pi/2)*sum(abs(returns[1:(n-1)])*abs(returns[2:n]));
return(rbpvar);
}
#MinRV:
-MinRV = function(a){
- q = as.zoo(abs(as.numeric(a))); #absolute value
+MinRV = function(data){
+ q = as.zoo(abs(as.numeric(data))); #absolute value
q = as.numeric(rollapply(q, width=2, FUN=min,by = 1, align="left"));
N = length(q)+1; #number of obs
minrv = (pi/(pi-2))*(N/(N-1))*sum(q^2);
@@ -64,8 +66,8 @@
}
#MedRV
-MedRV = function(a){
- q = abs(as.numeric(a)); #absolute value
+MedRV = function(data){
+ q = abs(as.numeric(data)); #absolute value
q = as.numeric(rollmedian(q, k=3, align="center"));
N = length(q) + 2;
minrv = (pi/(6-4*sqrt(3)+pi))*(N/(N-2))*sum(q^2);
@@ -73,12 +75,12 @@
}
-
##Multivariate measures:
#Realized Covariation (RCov):
-RCov = function(ts){
- ts = na.locf(ts,na.rm=FALSE);
- covariance = t(ts)%*%ts;
+RCov = function(data){
+ data = na.locf(data,na.rm=FALSE);
+ data = as.matrix(data);
+ covariance = t(data)%*%data;
return(covariance);
}
@@ -112,17 +114,18 @@
ROWCov =
-function (R, seasadjR = NULL, wfunction = "HR" , alphaMCD = 0.5, alpha = 0.001)
+function (data, seasadjR = NULL, wfunction = "HR" , alphaMCD = 0.5, alpha = 0.001)
{
require(robustbase)
- if( is.null(dim(R) )){
- return( ROWVar( R , seasadjR = seasadjR , wfunction = wfunction , alphaMCD = alphaMCD , alpha = alpha ))
+ if( is.null(dim(data) )){
+ return( ROWVar( data , seasadjR = seasadjR , wfunction = wfunction , alphaMCD = alphaMCD , alpha = alpha ))
}else{
if (is.null(seasadjR)) {
- seasadjR = R
+ seasadjR = data
}
- intraT = nrow(R)
- N = ncol(R)
+ data = as.matrix(data); seasadjR = as.matrix(seasadjR);
+ intraT = nrow(data)
+ N = ncol(data)
perczeroes = apply(seasadjR, 2, countzeroes)/intraT
select = c(1:N)[perczeroes < 0.5]
seasadjRselect = seasadjR[, select]
@@ -143,12 +146,12 @@
weights = rep(1, intraT)
if( wfunction == "HR" ){
weights[outlierindic] = 0
- wR = sqrt(weights) * R
+ wR = sqrt(weights) * data
return((conHR(di = N, alpha = alpha) * t(wR) %*% wR)/mean(weights))
}
if( wfunction == "SR" ){
weights[outlierindic] = k/outlyingness[outlierindic]
- wR = sqrt(weights) * R
+ wR = sqrt(weights) * data
return((conhuber(di = N, alpha = alpha) * t(wR) %*% wR)/mean(weights))
}
}
@@ -166,43 +169,46 @@
}
RBPCov =
-function (ts)
+function (data)
{
- if( is.null(dim(ts) )){
- return( RBPVar( ts ))
+ if( is.null(dim(data) )){
+ return( RBPVar( data ))
}else{
- n = dim(ts)[2]
+ data = as.matrix(data);
+ n = dim(data)[2]
cov = matrix(rep(0, n * n), ncol = n)
diagonal = c()
for (i in 1:n) {
- diagonal[i] = RBPVar(ts[, i])
+ diagonal[i] = RBPVar(data[, i])
}
diag(cov) = diagonal
for (i in 2:n) {
for (j in 1:(i - 1)) {
- cov[i, j] = cov[j, i] = RBPCov_bi(ts[, i], ts[, j])
+ cov[i, j] = cov[j, i] = RBPCov_bi(data[, i], data[, j])
}
}
return(cov)
}
}
-thresholdcov = function(ts) {
- n=dim(ts)[1]; #number of observations
+thresholdcov = function(data) {
+ data=as.matrix(data);
+ n=dim(data)[1]; #number of observations
delta = 1/n;
- rbpvars = apply(ts,2,FUN=RBPVar); #bipower variation per stock
+ rbpvars = apply(data,2,FUN=RBPVar); #bipower variation per stock
tresholds = 3*sqrt(rbpvars)*(delta^(0.49)); #treshold per stock
tresmatrix = matrix(rep(tresholds,n),ncol=length(tresholds),nrow=n,byrow=TRUE);
- condition = ts>tresmatrix;
- ts[condition] = 0;
- cov = RCov(ts);
+ condition = data>tresmatrix;
+ data[condition] = 0;
+ cov = RCov(data);
return(cov);
}
#Realized Correlation (RCor)
-RCor = function(ts){
- ts = na.locf(ts,na.rm=FALSE);
- covariance = t(ts)%*%ts;
+RCor = function(data){
+ data = na.locf(data,na.rm=FALSE);
+ data = as.matrix(data);
+ covariance = t(data)%*%data;
sdmatrix = sqrt(diag(diag(covariance)));
rcor = solve(sdmatrix)%*%covariance%*%solve(sdmatrix);
return(rcor);
More information about the Blotter-commits
mailing list