[Blotter-commits] r314 - pkg/RTAQ/R

noreply at r-forge.r-project.org noreply at r-forge.r-project.org
Sun Mar 28 21:02:09 CEST 2010


Author: jonathan
Date: 2010-03-28 21:02:08 +0200 (Sun, 28 Mar 2010)
New Revision: 314

Modified:
   pkg/RTAQ/R/cleanupfunctions.R
   pkg/RTAQ/R/manipulation.R
   pkg/RTAQ/R/totalcleanup.R
   pkg/RTAQ/R/volatility.R
Log:
code update (with correct/consequent input argument names)

Modified: pkg/RTAQ/R/cleanupfunctions.R
===================================================================
--- pkg/RTAQ/R/cleanupfunctions.R	2010-03-28 18:53:07 UTC (rev 313)
+++ pkg/RTAQ/R/cleanupfunctions.R	2010-03-28 19:02:08 UTC (rev 314)
@@ -10,7 +10,7 @@
 ########## DATA CLEAN-UP: FOR ALL DATA #####################
 
 ####FUNCTION TO FILTER EXCHANGE HOURS ONLY: ExchangeHoursOnly
-ExchangeHoursOnly = function(ts, daybegin = "09:30:00",dayend="16:00:00")
+ExchangeHoursOnly = function(data, daybegin = "09:30:00",dayend="16:00:00")
 {
     # a function to excerpt data within exchange trading hours
     # daybegin and dayend: two characters in the format of "HH:MM:SS",
@@ -18,11 +18,11 @@
     #               trading day and the closing hour and minute and sec
     #                   of the trading day repectively
         
-    if(!is(ts, "xts"))
-        stop("ts must be an xts object")
+    if(!is(data, "xts"))
+        stop("data must be an xts object")
 
   gettime = function(z){unlist(strsplit(as.character(z)," "))[2]};
-  times1 = as.matrix(as.vector(as.character(index(ts))));
+  times1 = as.matrix(as.vector(as.character(index(data))));
   times = apply(times1,1,gettime); 
   tdtimes = timeDate(times,format = "%H:%M:%S",FinCenter = "GMT",zone="GMT");
 
@@ -31,26 +31,25 @@
   tddayend = timeDate(dayend,format = "%H:%M:%S",FinCenter = "GMT",zone="GMT");
 
   #select correct observations
-  filteredts = ts[tdtimes>=tddaybegin & tdtimes<=tddayend];
+  filteredts = data[tdtimes>=tddaybegin & tdtimes<=tddayend];
   return(filteredts);
 }
 
 
-nozeroprices = function(ts){
+nozeroprices = function(tdata){
 ####FUNCTION TO DELETE ZERO PRICES: nozeroprices
-filteredts = ts[as.numeric(ts$PRICE)!= 0];
+filteredts = tdata[as.numeric(tdata$PRICE)!= 0];
 return(filteredts);
 }
 
 
-selectexchange = function(ts,exch="N"){ 
+selectexchange = function(data,exch="N"){ 
 ###FUNCTION TO SELECT THE OBSERVATIONS OF A SINGLE EXCHANGE: selectexchange
-filteredts = ts[ts$EX==exch];
+filteredts = data[data$EX==exch];
 return(filteredts);
 }
 
-
-autoselectexchange = function(ts){
+autoselectexchange = function(tdata){
 ## AUTOSELECT EXCHANGE WITH HIGHEST NUMBER OF SHARES TRADED (for trades) ON:
 #function returns ts with obs of only 1 exchange
 #searches exchange with a maximum on the variable "SIZE"
@@ -60,32 +59,32 @@
   exchangenames = c("NASDAQ","AMEX","ARCA","Boston","NSX","NYSE","NASD ADF and TRF","Philadelphia","ISE","Chicago","CBOE","BATS");
   
 
-  z1 = sum(as.numeric(selectexchange(ts,"Q")$SIZE));
-  z2 = sum(as.numeric(selectexchange(ts,"T")$SIZE));
+  z1 = sum(as.numeric(selectexchange(tdata,"Q")$SIZE));
+  z2 = sum(as.numeric(selectexchange(tdata,"T")$SIZE));
   z = max(z1,z2);
   watchout = z == z2;
   nobs = cbind(nobs,z);
 
   for(i in 2:length(exchanges)) {
-  z = sum(as.numeric(selectexchange(ts,exchanges[i])$SIZE));
+  z = sum(as.numeric(selectexchange(tdata,exchanges[i])$SIZE));
   nobs = cbind(nobs,z); 
                         }
 
   exch = exchanges[max(nobs)==nobs];
 
-  as.character(ts$EX[1]) == exchanges;
+  as.character(tdata$EX[1]) == exchanges;
   namechosen = exchangenames[exch==exchanges];
   print(paste("The information of the",namechosen,"exchange was collected"));
   
   if(exch=="Q"&watchout){exch="T"}
-  filteredts = ts[ts$EX==exch];
+  filteredtdata = tdata[tdata$EX==exch];
 }
 
 
 ##### TRADE DATA SPECIFIC FUNCTIONS: ###################################
-salescond = function(ts){ 
+salescond = function(tdata){ 
 ###DELETE ENTRIES WITH AN ABONORMAL SALES CONDITION
-filteredts = ts[ts$COND == "0"|ts$COND == "E"|ts$COND == "F"];
+filteredts = tdata[tdata$COND == "0"|tdata$COND == "E"|tdata$COND == "F"];
 return(filteredts);
 }
 
@@ -116,25 +115,25 @@
   return(b);
 }
 
-mergesametimestamp = function(ts,selection="median"){
+mergesametimestamp = function(tdata,selection="median"){
   #find end points:
-  ep = endpoints(ts,"secs");
+  ep = endpoints(tdata,"secs");
 
   #size per second:
-  size = period.apply(ts$SIZE,ep,sumN);
+  size = period.apply(tdata$SIZE,ep,sumN);
 
   #price per second:
-  if(selection=="median"){price = period.apply(ts$PRICE,ep,medianN)}
-  if(selection=="maxvolume"){price = period.apply(cbind(ts$PRICE,ts$SIZE),ep,maxvol)}
-  if(selection=="weightedaverage"){price = period.apply(cbind(ts$PRICE,ts$SIZE),ep,waverage)}
+  if(selection=="median"){price = period.apply(tdata$PRICE,ep,medianN)}
+  if(selection=="maxvolume"){price = period.apply(cbind(tdata$PRICE,tdata$SIZE),ep,maxvol)}
+  if(selection=="weightedaverage"){price = period.apply(cbind(tdata$PRICE,tdata$SIZE),ep,waverage)}
 
   ##merge everything:
   selection = ep[2:length(ep)];
-  ts2 = ts[selection];
-  ts2$PRICE = price;
-  ts2$SIZE = size;
+  tdata2 = tdata[selection];
+  tdata2$PRICE = price;
+  tdata2$SIZE = size;
 
-return(ts2)
+return(tdata2)
 }
 
 rmtradeoutliers = function(tdata,qdata){
@@ -157,21 +156,21 @@
 
 #################       QUOTE SPECIFIC FUNCTIONS:       #################
 
-nozeroquotes = function(ts){
+nozeroquotes = function(qdata){
 ####FUNCTION TO DELETE ZERO QUOTES: nozeroquotes
-filteredts = ts[as.numeric(ts$BID)!= 0& as.numeric(ts$OFFER)!= 0];
+filteredts = qdata[as.numeric(qdata$BID)!= 0& as.numeric(qdata$OFFER)!= 0];
 return(filteredts);
 }
 
 
-autoselectexchangeq = function(ts){
+autoselectexchangeq = function(qdata){
 ####Autoselect exchange with highest value for (bidsize+offersize)
   nobs=c();
   exchanges = c("Q","A","P","B","C","N","D","X","I","M","W","Z");
   exchangenames = c("NASDAQ","AMEX","ARCA","Boston","NSX","NYSE","NASD ADF and TRF","Philadelphia","ISE","Chicago","CBOE","BATS");
 
-  selected1 = selectexchange(ts,"Q");
-  selected2 = selectexchange(ts,"T");
+  selected1 = selectexchange(qdata,"Q");
+  selected2 = selectexchange(qdata,"T");
   z1 = sum(as.numeric(selected1$BIDSIZE)+as.numeric(selected1$OFFERSIZE));
   z2 = sum(as.numeric(selected2$BIDSIZE)+as.numeric(selected2$OFFERSIZE));
   z = max(z1,z2);
@@ -179,7 +178,7 @@
   nobs = cbind(nobs,z);
 
   for(i in 2:length(exchanges)) {
-  selected = selectexchange(ts,exchanges[i]);
+  selected = selectexchange(qdata,exchanges[i]);
   z = sum(as.numeric(selected$BIDSIZE)+as.numeric(selected$OFFERSIZE));
   nobs = cbind(nobs,z); 
                         }
@@ -191,42 +190,42 @@
 
   if(exch=="Q"&watchout){exch="T"}
 
-  filteredts = ts[ts$EX==exch];
+  filteredts = qdata[qdata$EX==exch];
   return(filteredts);
 }
 
 
-mergequotessametimestamp = function(ts,selection="median"){  ##FAST
+mergequotessametimestamp = function(qdata,selection="median"){  ##FAST
   condition=selection=="median"|selection=="maxvolume"|selection=="weightedaverage";
   if(!condition){print(paste("WARNING:The result will be corrupted. Check whether",selection,"is an existing option for the attribute selection."))}
 
   #find end points:
-  ep = endpoints(ts,"secs");
+  ep = endpoints(qdata,"secs");
 
   #size per second:
-  bidsize = period.apply(ts$BIDSIZE,ep,sumN);
-  offersize =  period.apply(ts$OFFERSIZE,ep,sumN);
+  bidsize = period.apply(qdata$BIDSIZE,ep,sumN);
+  offersize =  period.apply(qdata$OFFERSIZE,ep,sumN);
 
   #median per second:
   if(selection=="median"){
-  bid = period.apply(ts$BID,ep,medianN);
-  offer = period.apply(ts$OFFER,ep,medianN);
+  bid = period.apply(qdata$BID,ep,medianN);
+  offer = period.apply(qdata$OFFER,ep,medianN);
   }
 
   #maxvolume per second:
   if(selection=="maxvolume"){
-  bid = period.apply(cbind(ts$BID,ts$BIDSIZE),ep,maxvol);
-  offer = period.apply(cbind(ts$OFFER,ts$OFFERSIZE),ep,maxvol);
+  bid = period.apply(cbind(qdata$BID,qdata$BIDSIZE),ep,maxvol);
+  offer = period.apply(cbind(qdata$OFFER,qdata$OFFERSIZE),ep,maxvol);
   }
 
   if(selection=="weightedaverage"){
-  bid = period.apply(cbind(ts$BID,ts$BIDSIZE),ep,waverage);
-  offer = period.apply(cbind(ts$OFFER,ts$OFFERSIZE),ep,waverage);
+  bid = period.apply(cbind(qdata$BID,qdata$BIDSIZE),ep,waverage);
+  offer = period.apply(cbind(qdata$OFFER,qdata$OFFERSIZE),ep,waverage);
   }
 
   ##merge everything:
   selection = ep[2:length(ep)];
-  ts2 = ts[selection];
+  ts2 = qdata[selection];
   ts2$BID = bid;
   ts2$OFFER = offer;
 
@@ -237,23 +236,23 @@
 }
 
 
-rmnegspread = function(ts){
+rmnegspread = function(qdata){
 ##function to remove observations with negative spread
-  condition = as.numeric(ts$OFFER)>as.numeric(ts$BID);
-  ts[condition];
+  condition = as.numeric(qdata$OFFER)>as.numeric(qdata$BID);
+  qdata[condition];
 }
 
 
-rmlargespread = function(ts,maxi=50){
+rmlargespread = function(qdata,maxi=50){
 ##function to remove observations with a spread larger than 50 times the median spread that day
 ###WATCH OUT: works only correct if supplied input data consists of 1 day...
-  spread = as.numeric(ts$OFFER)-as.numeric(ts$BID);
+  spread = as.numeric(qdata$OFFER)-as.numeric(qdata$BID);
   condition = ((maxi*median(spread))>spread);
-  return(ts[condition])
+  return(qdata[condition])
 }
 
 
-rmoutliers = function(ts,maxi=10,window=50,type="advanced"){
+rmoutliers = function(qdata,maxi=10,window=50,type="advanced"){
 ##function to remove entries for which the mid-quote deviated by more than 10 median absolute deviations 
 ##from a rolling centered median (excluding the observation under consideration) of 50 observations if type = "standard".
 
@@ -269,9 +268,9 @@
   print("NOTE: This function is only useful for quotes NOT for trades");
   condition = c();
   halfwindow = round(window/2);
-  midquote = (as.numeric(ts$BID)+as.numeric(ts$OFFER))/2;
+  midquote = (as.numeric(qdata$BID)+as.numeric(qdata$OFFER))/2;
   if(type=="standard"){
-  for(i in (halfwindow+1):(dim(ts)[1]-halfwindow)){
+  for(i in (halfwindow+1):(dim(qdata)[1]-halfwindow)){
     mid = midquote[i];
     vec = c(midquote[(i-halfwindow):(i-1)],midquote[(i+1):(i+halfwindow)]);
     mad = mad(vec);
@@ -282,7 +281,7 @@
   }
 
 if(type=="advanced"){
-  for(i in (window+1):(dim(ts)[1]-window)){
+  for(i in (window+1):(dim(qdata)[1]-window)){
     mid = midquote[i];
 
     vec = c(midquote[(i-halfwindow):(i-1)],midquote[(i+1):(i+halfwindow)]);
@@ -303,7 +302,7 @@
 }
 
   condition = c(rep(TRUE,halfwindow),condition,rep(TRUE,halfwindow));
-  ts[condition];
+  qdata[condition];
 }
 
 

Modified: pkg/RTAQ/R/manipulation.R
===================================================================
--- pkg/RTAQ/R/manipulation.R	2010-03-28 18:53:07 UTC (rev 313)
+++ pkg/RTAQ/R/manipulation.R	2010-03-28 19:02:08 UTC (rev 314)
@@ -185,7 +185,7 @@
 #tdata and qdata, the xts object containing the trades and quotes respectively
 
   ##First part solves the problem that unequal number of obs (in data and data2) is possible when computing the RS
-  data2 = matchtq(tdata,qdata,300,maxit=50);
+  data2 = matchtq(tdata,qdata,adjustment =300);
   if(dim(data2)[1]>dim(data)[1]){
   condition = as.vector(as.character(index(data2)))%in%as.vector(as.character(index(data)));
   data2 = subset(data2,condition,select=1:(dim(data)[2]));
@@ -445,7 +445,7 @@
 ##Function computes many liquidity measures and returns an xts object containing them
 
 ##First part solves the problem that unequal number of obs (in data and data2) is possible when computing the RS
-  data2 = matchtq(tdata,qdata,300,maxit=50);
+  data2 = matchtq(tdata,qdata,adjustment =300);
   if(dim(data2)[1]>dim(data)[1]){
   condition = as.vector(as.character(index(data2)))%in%as.vector(as.character(index(data)));
   data2 = subset(data2,condition,select=1:(dim(data)[2]));

Modified: pkg/RTAQ/R/totalcleanup.R
===================================================================
--- pkg/RTAQ/R/totalcleanup.R	2010-03-28 18:53:07 UTC (rev 313)
+++ pkg/RTAQ/R/totalcleanup.R	2010-03-28 19:02:08 UTC (rev 314)
@@ -1,7 +1,9 @@
 #TRADES CLEANUP WRAPPER
-tradescleanup = function(from="2008-01-03",to="2008-01-03",datasource,datadestination,ticker){
+tradescleanup = function(from="2008-01-03",to="2008-01-03",datasource,datadestination,ticker,exchange){
   dates = timeSequence(from,to, format = "%Y-%m-%d", FinCenter = "GMT");
   dates = dates[isBizday(dates, holidays = holidayNYSE(2004:2010))];
+  exchanges = exchange;
+  rm(exchange);
 
   for(j in 1:length(dates)){
   datasourcex = paste(datasource,"\\",dates[j],sep="");
@@ -12,7 +14,7 @@
   load(paste(datasourcex,"\\",dataname,sep=""));
 
   if(class(tdata)!="try-error"){
-  exchange = exchanges[(exchanges[,1]==ticker[i]),2];  
+  exchange = exchanges[exchanges==ticker[i]];  
 
   ##actual clean-up: 
   ##general:
@@ -78,9 +80,11 @@
 }
 
 ##QUOTES CLEAN-UP WRAPPER
-quotescleanup = function(from,to,datasource,datadestination,ticker){
+quotescleanup = function(from,to,datasource,datadestination,ticker,exchange){
   dates = timeSequence(from,to, format = "%Y-%m-%d", FinCenter = "GMT");
   dates = dates[isBizday(dates, holidays = holidayNYSE(2004:2010))];
+  exchanges = exchange;
+  rm(exchange);
 
   for(j in 1:length(dates)){
   datasourcex = paste(datasource,"\\",dates[j],sep="");
@@ -91,7 +95,7 @@
   load(paste(datasourcex,"\\",dataname,sep=""));
 
   if(class(qdata)!="try-error"){
-  exchange = exchanges[(exchanges[,1]==ticker[i]),2];  
+  exchange = exchanges[exchanges==ticker[i]];  
   if(exchange=="Q"){exchange="T"}
 
   ##actual clean-up:

Modified: pkg/RTAQ/R/volatility.R
===================================================================
--- pkg/RTAQ/R/volatility.R	2010-03-28 18:53:07 UTC (rev 313)
+++ pkg/RTAQ/R/volatility.R	2010-03-28 19:02:08 UTC (rev 314)
@@ -19,26 +19,28 @@
 
 
 ROWVar =
-function(R, seasadjR = NULL, wfunction = "HR" , alphaMCD = 0.5, alpha = 0.001) 
+function(data, seasadjR = NULL, wfunction = "HR" , alphaMCD = 0.5, alpha = 0.001) 
 {
     require(robustbase)
     if (is.null(seasadjR)) {
-        seasadjR = R
+        seasadjR = data;
     }
-    intraT = length(R); N=1;
-    MCDcov = as.vector(covMcd( R , use.correction = FALSE )$raw.cov)
+
+    data = as.vector(data); seasadjR = as.vector(seasadjR);
+    intraT = length(data); N=1;
+    MCDcov = as.vector(covMcd( data , use.correction = FALSE )$raw.cov)
     outlyingness = seasadjR^2/MCDcov    
     k = qchisq(p = 1 - alpha, df = N)
     outlierindic = outlyingness > k
     weights = rep(1, intraT)
     if( wfunction == "HR" ){
        weights[outlierindic] = 0
-       wR = sqrt(weights) * R
+       wR = sqrt(weights) * data
        return((conHR(di = N, alpha = alpha) * sum(wR^2))/mean(weights))
     }
     if( wfunction == "SR" ){
        weights[outlierindic] = k/outlyingness[outlierindic]
-       wR = sqrt(weights) * R
+       wR = sqrt(weights) * data
        return((conhuber(di = N, alpha = alpha) * sum(wR^2))/mean(weights))
     }
 
@@ -48,15 +50,15 @@
 
 #Realized BiPower Variation (RBPVar) (RBPVar)
 RBPVar = function(data){
-  returns = as.numeric(data);
+  returns = as.vector(as.numeric(data));
   n = length(returns);
   rbpvar = (pi/2)*sum(abs(returns[1:(n-1)])*abs(returns[2:n]));
   return(rbpvar);
 }
 
 #MinRV:
-MinRV = function(a){
-  q = as.zoo(abs(as.numeric(a))); #absolute value
+MinRV = function(data){
+  q = as.zoo(abs(as.numeric(data))); #absolute value
   q = as.numeric(rollapply(q, width=2, FUN=min,by = 1, align="left"));
   N = length(q)+1; #number of obs
   minrv = (pi/(pi-2))*(N/(N-1))*sum(q^2);
@@ -64,8 +66,8 @@
 }
 
 #MedRV
-MedRV = function(a){
-  q = abs(as.numeric(a)); #absolute value
+MedRV = function(data){
+  q = abs(as.numeric(data)); #absolute value
   q = as.numeric(rollmedian(q, k=3, align="center"));
   N = length(q) + 2;
   minrv = (pi/(6-4*sqrt(3)+pi))*(N/(N-2))*sum(q^2);
@@ -73,12 +75,12 @@
 }
 
 
-
 ##Multivariate measures:
 #Realized Covariation (RCov):
-RCov = function(ts){
-  ts = na.locf(ts,na.rm=FALSE);
-  covariance = t(ts)%*%ts;
+RCov = function(data){
+  data = na.locf(data,na.rm=FALSE);
+  data = as.matrix(data);
+  covariance = t(data)%*%data;
   return(covariance);
 }
 
@@ -112,17 +114,18 @@
 
 
 ROWCov =
-function (R, seasadjR = NULL, wfunction = "HR" , alphaMCD = 0.5, alpha = 0.001) 
+function (data, seasadjR = NULL, wfunction = "HR" , alphaMCD = 0.5, alpha = 0.001) 
 {
     require(robustbase)
-    if( is.null(dim(R) )){ 
-          return( ROWVar( R , seasadjR = seasadjR , wfunction = wfunction , alphaMCD = alphaMCD , alpha = alpha ))
+    if( is.null(dim(data) )){ 
+          return( ROWVar( data , seasadjR = seasadjR , wfunction = wfunction , alphaMCD = alphaMCD , alpha = alpha ))
     }else{
        if (is.null(seasadjR)) {
-           seasadjR = R
+           seasadjR = data
        }
-       intraT = nrow(R)
-       N = ncol(R)
+	 data = as.matrix(data); seasadjR = as.matrix(seasadjR);
+       intraT = nrow(data)
+       N = ncol(data)
        perczeroes = apply(seasadjR, 2, countzeroes)/intraT
        select = c(1:N)[perczeroes < 0.5]
        seasadjRselect = seasadjR[, select]
@@ -143,12 +146,12 @@
        weights = rep(1, intraT)
        if( wfunction == "HR" ){
           weights[outlierindic] = 0
-          wR = sqrt(weights) * R
+          wR = sqrt(weights) * data
           return((conHR(di = N, alpha = alpha) * t(wR) %*% wR)/mean(weights))
        }
        if( wfunction == "SR" ){
           weights[outlierindic] = k/outlyingness[outlierindic]
-          wR = sqrt(weights) * R
+          wR = sqrt(weights) * data
           return((conhuber(di = N, alpha = alpha) * t(wR) %*% wR)/mean(weights))
        }
    }
@@ -166,43 +169,46 @@
 }
 
 RBPCov = 
-function (ts) 
+function (data) 
 {
-    if( is.null(dim(ts) )){ 
-          return( RBPVar( ts ))
+    if( is.null(dim(data) )){ 
+          return( RBPVar( data ))
     }else{
-       n = dim(ts)[2]
+	 data  = as.matrix(data);
+       n = dim(data)[2]
        cov = matrix(rep(0, n * n), ncol = n)
        diagonal = c()
        for (i in 1:n) {
-          diagonal[i] = RBPVar(ts[, i])
+          diagonal[i] = RBPVar(data[, i])
        }
        diag(cov) = diagonal
        for (i in 2:n) {
            for (j in 1:(i - 1)) {
-               cov[i, j] = cov[j, i] = RBPCov_bi(ts[, i], ts[, j])
+               cov[i, j] = cov[j, i] = RBPCov_bi(data[, i], data[, j])
            }
        }
        return(cov)
    }
 }
 
-thresholdcov = function(ts)	{
-  n=dim(ts)[1];						#number of observations
+thresholdcov = function(data)	{
+  data=as.matrix(data);
+  n=dim(data)[1];						#number of observations
   delta = 1/n;
-  rbpvars = apply(ts,2,FUN=RBPVar);			#bipower variation per stock
+  rbpvars = apply(data,2,FUN=RBPVar);			#bipower variation per stock
   tresholds = 3*sqrt(rbpvars)*(delta^(0.49));	#treshold per stock
   tresmatrix = matrix(rep(tresholds,n),ncol=length(tresholds),nrow=n,byrow=TRUE);
-  condition = ts>tresmatrix;
-  ts[condition] = 0;
-  cov = RCov(ts);
+  condition = data>tresmatrix;
+  data[condition] = 0;
+  cov = RCov(data);
 return(cov);	
 				}
 
 #Realized Correlation (RCor)
-RCor = function(ts){
-  ts = na.locf(ts,na.rm=FALSE);
-  covariance = t(ts)%*%ts;
+RCor = function(data){
+  data = na.locf(data,na.rm=FALSE);
+  data = as.matrix(data);
+  covariance = t(data)%*%data;
   sdmatrix = sqrt(diag(diag(covariance)));
   rcor = solve(sdmatrix)%*%covariance%*%solve(sdmatrix);
   return(rcor);



More information about the Blotter-commits mailing list