[Blotter-commits] r344 - in pkg/RTAQ: R man

Wed Jun 23 13:39:51 CEST 2010

Author: jonathan
Date: 2010-06-23 13:39:51 +0200 (Wed, 23 Jun 2010)
New Revision: 344

Modified:
   pkg/RTAQ/R/aggregate.R
   pkg/RTAQ/R/cleanupfunctions.R
   pkg/RTAQ/man/aggregatets.Rd
   pkg/RTAQ/man/rmoutliers.Rd
Log:
incorporation of first comments Eric Zivot

Modified: pkg/RTAQ/R/aggregate.R
===================================================================

--- pkg/RTAQ/R/aggregate.R	2010-06-16 15:39:36 UTC (rev 343)
+++ pkg/RTAQ/R/aggregate.R	2010-06-23 11:39:51 UTC (rev 344)
@@ -13,55 +13,62 @@
 
 
 ##AGGREGATION;
-aggregatets = function(ts, FUN=previoustick, on="minutes", k=1, weights=NULL){
+aggregatets = function (ts, FUN = previoustick, on = "minutes", k = 1, weights = NULL,dropna=F)
+{
   #Valid values for the argument "on" include: “secs” (seconds), “seconds”, “mins” (minutes), “minutes”,“hours”, “days”, “weeks”.
 
-  #Without weights:
-  if(is.null(weights)){
-  ep = endpoints(ts, on, k);
-  ts2 = period.apply(ts,ep,FUN);  
-  }
+    if (is.null(weights)) {
+        ep = endpoints(ts, on, k);
+        ts2 = period.apply(ts, ep, FUN);
+    }
+    if (!is.null(weights)) {
+        tsb = cbind(ts, weights)
+        ep = endpoints(tsb, on, k)
+        ts2 = period.apply(tsb, ep, FUN = weightedaverage)
+    }
+    if (on == "minutes" | on == "mins" | on == "secs" | on == 
+        "seconds") {
+        if (on == "minutes" | on == "mins") {
+            secs = k * 60
+        }
+        if (on == "secs" | on == "seconds") {
+            secs = k
+        }
+        a = .index(ts2) + (secs - .index(ts2)%%secs)
+        ts3 = .xts(ts2, a)
+    }
+    if (on == "hours") {
+        secs = 3600
+        a = .index(ts2) + (secs - .index(ts2)%%secs)
+        ts3 = .xts(ts2, a)
+    }
+    if (on == "days") {
+        secs = 24 * 3600
+        a = .index(ts2) + (secs - .index(ts2)%%secs) - (24 * 
+            3600)
+        ts3 = .xts(ts2, a)
+    }
+    if (on == "weeks") {
+        secs = 24 * 3600 * 7
+        a = (.index(ts2) + (secs - (.index(ts2) + (3L * 86400L))%%secs)) - 
+            (24 * 3600)
+        ts3 = .xts(ts2, a)
+    }
 
+    index(ts3) = as.timeDate(index(ts3));
+	if(!dropna){
+	if(on !="weeks"|on!="days"){
+	if(on=="secs"|on=="seconds"){tby = "s"}
+	if(on=="mins"|on=="minutes"){tby = "min"}
+      if (on == "hours"){tby = "h"}
+	by = paste(k,tby,sep=" ");
+	allindex = as.timeDate(seq(start(ts3),end(ts3),by=by));
+	xx = xts(rep(1,length(allindex)),order.by=allindex);
+	ts3 = merge(ts3,xx)[,1];
+	}#currently for weeks and days, na are still dropped
+	}#end dropna if
 
-  #With weights:
-  if(!is.null(weights)){
-  tsb = cbind(ts,weights);
-  ep = endpoints(tsb, on, k);
-  ts2 = period.apply(tsb,ep,FUN=weightedaverage);  
-  }
-
-
-  if(on=="minutes"|on=="mins"|on=="secs"|on=="seconds"){
-  if(on=="minutes"|on=="mins"){secs = k*60;}
-  if(on=="secs"|on=="seconds"){secs = k}
-  a = .index(ts2) + (secs-.index(ts2) %% secs);
-  ts3 = .xts(ts2,a)
-  }
-
-  if(on=="hours"){
-  secs = 3600;
-  a = .index(ts2) + (secs-.index(ts2) %% secs)
-  ts3 = .xts(ts2,a);
-  }
-
-
-  if(on=="days"){
-  secs = 24*3600;
-  a = .index(ts2) + (secs-.index(ts2) %% secs) - (24*3600)
-  ts3 = .xts(ts2,a);
-  }
-
-  
-  if(on=="weeks")	{
-  secs = 24*3600*7;
-  a = (.index(ts2) + (secs-(.index(ts2) + (3L * 86400L)) %% secs))-(24*3600);
-  ts3 = .xts(ts2,a);
-				}
-
-  #return to timeDate timestamps
-  index(ts3) = as.timeDate(index(ts3));
-
-  return(ts3);
+    return(ts3)
 }
 
 #PRICE (specificity: opening price and previoustick)

Modified: pkg/RTAQ/R/cleanupfunctions.R
===================================================================
--- pkg/RTAQ/R/cleanupfunctions.R	2010-06-16 15:39:36 UTC (rev 343)
+++ pkg/RTAQ/R/cleanupfunctions.R	2010-06-23 11:39:51 UTC (rev 344)
@@ -251,8 +251,8 @@
   return(qdata[condition])
 }
 
-
-rmoutliers = function(qdata,maxi=10,window=50,type="advanced"){
+rmoutliers = function (qdata, maxi = 10, window = 50, type = "advanced")
+{
 ##function to remove entries for which the mid-quote deviated by more than 10 median absolute deviations 
 ##from a rolling centered median (excluding the observation under consideration) of 50 observations if type = "standard".
 
@@ -265,44 +265,70 @@
 ##3. Rolling median of the previous "window" observations
 
 ##NOTE: Median Absolute deviation chosen contrary to Barndorff-Nielsen et al.
-  print("NOTE: This function is only useful for quotes NOT for trades");
-  condition = c();
-  halfwindow = round(window/2);
-  midquote = (as.numeric(qdata$BID)+as.numeric(qdata$OFFER))/2;
-  if(type=="standard"){
-  for(i in (halfwindow+1):(dim(qdata)[1]-halfwindow)){
-    mid = midquote[i];
-    vec = c(midquote[(i-halfwindow):(i-1)],midquote[(i+1):(i+halfwindow)]);
-    mad = mad(vec);
-    maxcriterion = median(vec)+maxi*mad;    
-    mincriterion = median(vec)-maxi*mad;  
-    condition[i-halfwindow] = mincriterion < mid & mid< maxcriterion;
-  }
-  }
+    print("NOTE: This function is only useful for quotes NOT for trades")
+    window = floor(window/2) * 2
+    condition = c();
+    halfwindow = window/2;
+    midquote = as.vector(as.numeric(qdata$BID) + as.numeric(qdata$OFFER))/2;
+    mad_all = mad(midquote);
 
-if(type=="advanced"){
-  for(i in (window+1):(dim(qdata)[1]-window)){
-    mid = midquote[i];
+    midquote = xts(midquote,order.by = index(qdata))
 
-    vec = c(midquote[(i-halfwindow):(i-1)],midquote[(i+1):(i+halfwindow)]);
-    vec2 = midquote[(i-window):(i-1)];
-    vec3 = midquote[(i+1):(i+window)];
+    if (mad_all == 0) {
+        m = as.vector(as.numeric(midquote))
+        s = c(TRUE, (m[2:length(m)] - m[1:(length(m) - 1)] != 
+            0))
+        mad_all = mad(as.numeric(midquote[s]))
+    }
 
-    medianv = c(median(vec),median(vec2),median(vec3));
-    difference = abs(medianv-mid);
-    mediani = medianv[min(difference) == difference];   
-    mad = mad(vec);
-    
-    maxcriterion = mediani+maxi*mad;    
-    mincriterion = mediani-maxi*mad;  
-          
-    condition[i-halfwindow] = mincriterion < mid & mid< maxcriterion;
-  }
+    medianw = function(midquote, n = window) {
+        m = floor(n/2) + 1
+        q = median(c(midquote[1:(m - 1)], midquote[(m + 1):(n + 
+            1)]))
+        return(q)
+    }
 
-}
+    if (type == "standard") {
+        meds = as.numeric(rollapply(midquote, width = (window + 
+            1), FUN = medianw, align = "center"))
+    }
+    if (type == "advanced") {
+        advancedperrow = function(qq) {
+            diff = abs(qq[1:3] - qq[4])
+            select = min(diff) == diff
+            value = qq[select]
+            if (length(value) > 1) {
+                value = median(value)
+            }
+            return(value)
+        }
+        n = length(midquote)
+        allmatrix = matrix(rep(0, 4 * n), ncol = 4)
+        median2 = function(a) {
+            median(a)
+        }
+        standardmed = as.numeric(rollapply(midquote, width = (window), 
+            FUN = median2, align = "center"))
+        allmatrix[(halfwindow + 1):(n - halfwindow), 1] = as.numeric(rollapply(midquote, 
+            width = (window + 1), FUN = medianw, align = "center"))
+        allmatrix[(1:(n - window)), 2] = standardmed[2:length(standardmed)]
+        allmatrix[(window + 1):(n), 3] = standardmed[1:(length(standardmed) - 
+            1)]
+        allmatrix[, 4] = midquote
+        meds = apply(allmatrix, 1, advancedperrow)[(halfwindow + 
+            1):(n - halfwindow)]
+    }
 
-  condition = c(rep(TRUE,halfwindow),condition,rep(TRUE,halfwindow));
-  qdata[condition];
+    midquote = as.numeric(midquote);
+    maxcriterion = meds + maxi * mad_all
+    mincriterion = meds - maxi * mad_all
+
+    condition = mincriterion < midquote[(halfwindow + 1):(length(midquote) - 
+        halfwindow)] & midquote[(halfwindow + 1):(length(midquote) - 
+        halfwindow)] < maxcriterion
+    condition = c(rep(TRUE, halfwindow), condition, rep(TRUE, 
+        halfwindow))
+    qdata[condition];
 }
 
 
@@ -321,4 +347,4 @@
 #start = unlist(strsplit(as.character(start(myxts))," "))[1];
 #end = unlist(strsplit(as.character(end(myxts))," "))[1];
 #alldays = timeSequence(from = start, to = end, by = "day");
-#alldays = alldays[isWeekday(alldays)];
+#alldays = alldays[isWeekday(alldays)];
\ No newline at end of file

Modified: pkg/RTAQ/man/aggregatets.Rd
===================================================================
--- pkg/RTAQ/man/aggregatets.Rd	2010-06-16 15:39:36 UTC (rev 343)
+++ pkg/RTAQ/man/aggregatets.Rd	2010-06-23 11:39:51 UTC (rev 344)
@@ -25,6 +25,8 @@
 When you assign an xts object with wheights to this argument, a weighted mean is taken over each interval. 
 Of course, the weights should have the same timestamps as the supplied time series.
 }
+\item{dropna}{ boolean, which determines whether empty intervals should be dropped.
+By default, an NA is returned in case an interval is empty.}
 }
 
 \section{Details}{
@@ -37,7 +39,9 @@
 the last observation up to that point, excluding the value at 09:35:00 itself.
 
 Please Note:
-In case an interval is empty, it is omitted. (Possibly, you expect an NA but this isn't the case)
+In case an interval is empty, by default an NA is returned. In case e.g. previous 
+tick aggregation it makes sense to fill these NA's by the function \code{na.locf}
+(last observation carried forward) from the zoo package.
 }
 
 \value{

Modified: pkg/RTAQ/man/rmoutliers.Rd
===================================================================
--- pkg/RTAQ/man/rmoutliers.Rd	2010-06-16 15:39:36 UTC (rev 343)
+++ pkg/RTAQ/man/rmoutliers.Rd	2010-06-23 11:39:51 UTC (rev 344)
@@ -2,7 +2,7 @@
 \Rdversion{1.1}
 \alias{rmoutliers}
 \title{
-Delete entries for which the mid-quote is outlying with to respect surrounding entries
+Delete entries for which the mid-quote is outlying with respect to surrounding entries
 }
 \description{
 If type="standard":  Function deletes entries for which the mid-quote deviated by more than "maxi"
@@ -10,7 +10,7 @@
 the observation under consideration) of "window" observations.
 
 If type="advanced":  Function deletes entries for which the mid-quote deviates by more than "maxi"
-median absolute deviations from the value closest to the midquote of
+median absolute deviations from the value closest to the mid-quote of
 these three options:
 \enumerate{
 \item Rolling centered median (excluding the observation under consideration)
@@ -21,7 +21,11 @@
 The advantage of this procedure compared to the "standard" proposed
 by Barndorff-Nielsen et al. (2010) is that it will not incorrectly remove
 large price jumps. Therefore this procedure has been set as the default
-for removing outliers.
+for removing outliers. 
+
+Note that the median absolute deviation is taken over the entire
+sample. In case it is zero (which can happen if mid-quotes don't change much), 
+the median absolute deviation is taken over a subsample without constant mid-quotes.
 }
 
 \usage{
@@ -34,7 +38,8 @@
   \item{type}{should be "standard" or "advanced" (see description).}
 }
 
-\section{Details}{NOTE: This function works only correct if supplied input data consists of 1 day.}
+\section{Details}{
+NOTE: This function works only correct if supplied input data consists of 1 day.}
 
 \value{xts object}