[Blotter-commits] r344 - in pkg/RTAQ: R man
noreply at r-forge.r-project.org
noreply at r-forge.r-project.org
Wed Jun 23 13:39:51 CEST 2010
Author: jonathan
Date: 2010-06-23 13:39:51 +0200 (Wed, 23 Jun 2010)
New Revision: 344
Modified:
pkg/RTAQ/R/aggregate.R
pkg/RTAQ/R/cleanupfunctions.R
pkg/RTAQ/man/aggregatets.Rd
pkg/RTAQ/man/rmoutliers.Rd
Log:
incorporation of first comments Eric Zivot
Modified: pkg/RTAQ/R/aggregate.R
===================================================================
--- pkg/RTAQ/R/aggregate.R 2010-06-16 15:39:36 UTC (rev 343)
+++ pkg/RTAQ/R/aggregate.R 2010-06-23 11:39:51 UTC (rev 344)
@@ -13,55 +13,62 @@
##AGGREGATION;
-aggregatets = function(ts, FUN=previoustick, on="minutes", k=1, weights=NULL){
+aggregatets = function (ts, FUN = previoustick, on = "minutes", k = 1, weights = NULL,dropna=F)
+{
#Valid values for the argument "on" include: secs (seconds), seconds, mins (minutes), minutes,hours, days, weeks.
- #Without weights:
- if(is.null(weights)){
- ep = endpoints(ts, on, k);
- ts2 = period.apply(ts,ep,FUN);
- }
+ if (is.null(weights)) {
+ ep = endpoints(ts, on, k);
+ ts2 = period.apply(ts, ep, FUN);
+ }
+ if (!is.null(weights)) {
+ tsb = cbind(ts, weights)
+ ep = endpoints(tsb, on, k)
+ ts2 = period.apply(tsb, ep, FUN = weightedaverage)
+ }
+ if (on == "minutes" | on == "mins" | on == "secs" | on ==
+ "seconds") {
+ if (on == "minutes" | on == "mins") {
+ secs = k * 60
+ }
+ if (on == "secs" | on == "seconds") {
+ secs = k
+ }
+ a = .index(ts2) + (secs - .index(ts2)%%secs)
+ ts3 = .xts(ts2, a)
+ }
+ if (on == "hours") {
+ secs = 3600
+ a = .index(ts2) + (secs - .index(ts2)%%secs)
+ ts3 = .xts(ts2, a)
+ }
+ if (on == "days") {
+ secs = 24 * 3600
+ a = .index(ts2) + (secs - .index(ts2)%%secs) - (24 *
+ 3600)
+ ts3 = .xts(ts2, a)
+ }
+ if (on == "weeks") {
+ secs = 24 * 3600 * 7
+ a = (.index(ts2) + (secs - (.index(ts2) + (3L * 86400L))%%secs)) -
+ (24 * 3600)
+ ts3 = .xts(ts2, a)
+ }
+ index(ts3) = as.timeDate(index(ts3));
+ if(!dropna){
+ if(on !="weeks"|on!="days"){
+ if(on=="secs"|on=="seconds"){tby = "s"}
+ if(on=="mins"|on=="minutes"){tby = "min"}
+ if (on == "hours"){tby = "h"}
+ by = paste(k,tby,sep=" ");
+ allindex = as.timeDate(seq(start(ts3),end(ts3),by=by));
+ xx = xts(rep(1,length(allindex)),order.by=allindex);
+ ts3 = merge(ts3,xx)[,1];
+ }#currently for weeks and days, na are still dropped
+ }#end dropna if
- #With weights:
- if(!is.null(weights)){
- tsb = cbind(ts,weights);
- ep = endpoints(tsb, on, k);
- ts2 = period.apply(tsb,ep,FUN=weightedaverage);
- }
-
-
- if(on=="minutes"|on=="mins"|on=="secs"|on=="seconds"){
- if(on=="minutes"|on=="mins"){secs = k*60;}
- if(on=="secs"|on=="seconds"){secs = k}
- a = .index(ts2) + (secs-.index(ts2) %% secs);
- ts3 = .xts(ts2,a)
- }
-
- if(on=="hours"){
- secs = 3600;
- a = .index(ts2) + (secs-.index(ts2) %% secs)
- ts3 = .xts(ts2,a);
- }
-
-
- if(on=="days"){
- secs = 24*3600;
- a = .index(ts2) + (secs-.index(ts2) %% secs) - (24*3600)
- ts3 = .xts(ts2,a);
- }
-
-
- if(on=="weeks") {
- secs = 24*3600*7;
- a = (.index(ts2) + (secs-(.index(ts2) + (3L * 86400L)) %% secs))-(24*3600);
- ts3 = .xts(ts2,a);
- }
-
- #return to timeDate timestamps
- index(ts3) = as.timeDate(index(ts3));
-
- return(ts3);
+ return(ts3)
}
#PRICE (specificity: opening price and previoustick)
Modified: pkg/RTAQ/R/cleanupfunctions.R
===================================================================
--- pkg/RTAQ/R/cleanupfunctions.R 2010-06-16 15:39:36 UTC (rev 343)
+++ pkg/RTAQ/R/cleanupfunctions.R 2010-06-23 11:39:51 UTC (rev 344)
@@ -251,8 +251,8 @@
return(qdata[condition])
}
-
-rmoutliers = function(qdata,maxi=10,window=50,type="advanced"){
+rmoutliers = function (qdata, maxi = 10, window = 50, type = "advanced")
+{
##function to remove entries for which the mid-quote deviated by more than 10 median absolute deviations
##from a rolling centered median (excluding the observation under consideration) of 50 observations if type = "standard".
@@ -265,44 +265,70 @@
##3. Rolling median of the previous "window" observations
##NOTE: Median Absolute deviation chosen contrary to Barndorff-Nielsen et al.
- print("NOTE: This function is only useful for quotes NOT for trades");
- condition = c();
- halfwindow = round(window/2);
- midquote = (as.numeric(qdata$BID)+as.numeric(qdata$OFFER))/2;
- if(type=="standard"){
- for(i in (halfwindow+1):(dim(qdata)[1]-halfwindow)){
- mid = midquote[i];
- vec = c(midquote[(i-halfwindow):(i-1)],midquote[(i+1):(i+halfwindow)]);
- mad = mad(vec);
- maxcriterion = median(vec)+maxi*mad;
- mincriterion = median(vec)-maxi*mad;
- condition[i-halfwindow] = mincriterion < mid & mid< maxcriterion;
- }
- }
+ print("NOTE: This function is only useful for quotes NOT for trades")
+ window = floor(window/2) * 2
+ condition = c();
+ halfwindow = window/2;
+ midquote = as.vector(as.numeric(qdata$BID) + as.numeric(qdata$OFFER))/2;
+ mad_all = mad(midquote);
-if(type=="advanced"){
- for(i in (window+1):(dim(qdata)[1]-window)){
- mid = midquote[i];
+ midquote = xts(midquote,order.by = index(qdata))
- vec = c(midquote[(i-halfwindow):(i-1)],midquote[(i+1):(i+halfwindow)]);
- vec2 = midquote[(i-window):(i-1)];
- vec3 = midquote[(i+1):(i+window)];
+ if (mad_all == 0) {
+ m = as.vector(as.numeric(midquote))
+ s = c(TRUE, (m[2:length(m)] - m[1:(length(m) - 1)] !=
+ 0))
+ mad_all = mad(as.numeric(midquote[s]))
+ }
- medianv = c(median(vec),median(vec2),median(vec3));
- difference = abs(medianv-mid);
- mediani = medianv[min(difference) == difference];
- mad = mad(vec);
-
- maxcriterion = mediani+maxi*mad;
- mincriterion = mediani-maxi*mad;
-
- condition[i-halfwindow] = mincriterion < mid & mid< maxcriterion;
- }
+ medianw = function(midquote, n = window) {
+ m = floor(n/2) + 1
+ q = median(c(midquote[1:(m - 1)], midquote[(m + 1):(n +
+ 1)]))
+ return(q)
+ }
-}
+ if (type == "standard") {
+ meds = as.numeric(rollapply(midquote, width = (window +
+ 1), FUN = medianw, align = "center"))
+ }
+ if (type == "advanced") {
+ advancedperrow = function(qq) {
+ diff = abs(qq[1:3] - qq[4])
+ select = min(diff) == diff
+ value = qq[select]
+ if (length(value) > 1) {
+ value = median(value)
+ }
+ return(value)
+ }
+ n = length(midquote)
+ allmatrix = matrix(rep(0, 4 * n), ncol = 4)
+ median2 = function(a) {
+ median(a)
+ }
+ standardmed = as.numeric(rollapply(midquote, width = (window),
+ FUN = median2, align = "center"))
+ allmatrix[(halfwindow + 1):(n - halfwindow), 1] = as.numeric(rollapply(midquote,
+ width = (window + 1), FUN = medianw, align = "center"))
+ allmatrix[(1:(n - window)), 2] = standardmed[2:length(standardmed)]
+ allmatrix[(window + 1):(n), 3] = standardmed[1:(length(standardmed) -
+ 1)]
+ allmatrix[, 4] = midquote
+ meds = apply(allmatrix, 1, advancedperrow)[(halfwindow +
+ 1):(n - halfwindow)]
+ }
- condition = c(rep(TRUE,halfwindow),condition,rep(TRUE,halfwindow));
- qdata[condition];
+ midquote = as.numeric(midquote);
+ maxcriterion = meds + maxi * mad_all
+ mincriterion = meds - maxi * mad_all
+
+ condition = mincriterion < midquote[(halfwindow + 1):(length(midquote) -
+ halfwindow)] & midquote[(halfwindow + 1):(length(midquote) -
+ halfwindow)] < maxcriterion
+ condition = c(rep(TRUE, halfwindow), condition, rep(TRUE,
+ halfwindow))
+ qdata[condition];
}
@@ -321,4 +347,4 @@
#start = unlist(strsplit(as.character(start(myxts))," "))[1];
#end = unlist(strsplit(as.character(end(myxts))," "))[1];
#alldays = timeSequence(from = start, to = end, by = "day");
-#alldays = alldays[isWeekday(alldays)];
+#alldays = alldays[isWeekday(alldays)];
\ No newline at end of file
Modified: pkg/RTAQ/man/aggregatets.Rd
===================================================================
--- pkg/RTAQ/man/aggregatets.Rd 2010-06-16 15:39:36 UTC (rev 343)
+++ pkg/RTAQ/man/aggregatets.Rd 2010-06-23 11:39:51 UTC (rev 344)
@@ -25,6 +25,8 @@
When you assign an xts object with wheights to this argument, a weighted mean is taken over each interval.
Of course, the weights should have the same timestamps as the supplied time series.
}
+\item{dropna}{ boolean, which determines whether empty intervals should be dropped.
+By default, an NA is returned in case an interval is empty.}
}
\section{Details}{
@@ -37,7 +39,9 @@
the last observation up to that point, excluding the value at 09:35:00 itself.
Please Note:
-In case an interval is empty, it is omitted. (Possibly, you expect an NA but this isn't the case)
+In case an interval is empty, by default an NA is returned. In case e.g. previous
+tick aggregation it makes sense to fill these NA's by the function \code{na.locf}
+(last observation carried forward) from the zoo package.
}
\value{
Modified: pkg/RTAQ/man/rmoutliers.Rd
===================================================================
--- pkg/RTAQ/man/rmoutliers.Rd 2010-06-16 15:39:36 UTC (rev 343)
+++ pkg/RTAQ/man/rmoutliers.Rd 2010-06-23 11:39:51 UTC (rev 344)
@@ -2,7 +2,7 @@
\Rdversion{1.1}
\alias{rmoutliers}
\title{
-Delete entries for which the mid-quote is outlying with to respect surrounding entries
+Delete entries for which the mid-quote is outlying with respect to surrounding entries
}
\description{
If type="standard": Function deletes entries for which the mid-quote deviated by more than "maxi"
@@ -10,7 +10,7 @@
the observation under consideration) of "window" observations.
If type="advanced": Function deletes entries for which the mid-quote deviates by more than "maxi"
-median absolute deviations from the value closest to the midquote of
+median absolute deviations from the value closest to the mid-quote of
these three options:
\enumerate{
\item Rolling centered median (excluding the observation under consideration)
@@ -21,7 +21,11 @@
The advantage of this procedure compared to the "standard" proposed
by Barndorff-Nielsen et al. (2010) is that it will not incorrectly remove
large price jumps. Therefore this procedure has been set as the default
-for removing outliers.
+for removing outliers.
+
+Note that the median absolute deviation is taken over the entire
+sample. In case it is zero (which can happen if mid-quotes don't change much),
+the median absolute deviation is taken over a subsample without constant mid-quotes.
}
\usage{
@@ -34,7 +38,8 @@
\item{type}{should be "standard" or "advanced" (see description).}
}
-\section{Details}{NOTE: This function works only correct if supplied input data consists of 1 day.}
+\section{Details}{
+NOTE: This function works only correct if supplied input data consists of 1 day.}
\value{xts object}
More information about the Blotter-commits
mailing list