[Highfrequency-commits] r24 - in pkg/highfrequency: . R man

Thu Jul 19 07:56:46 CEST 2012

Author: jonathan
Date: 2012-07-19 07:56:46 +0200 (Thu, 19 Jul 2012)
New Revision: 24

Added:
   pkg/highfrequency/man/heavyModel.Rd
Modified:
   pkg/highfrequency/NAMESPACE
   pkg/highfrequency/R/realized.R
Log:
first version heavyModel

Modified: pkg/highfrequency/NAMESPACE
===================================================================

--- pkg/highfrequency/NAMESPACE	2012-07-17 22:03:01 UTC (rev 23)
+++ pkg/highfrequency/NAMESPACE	2012-07-19 05:56:46 UTC (rev 24)
@@ -50,7 +50,8 @@
 aggregateQuotes,
 aggregateTrades,
 aggregatets,
-previoustick
+previoustick,
+heavyModel
 )#end exported function
 
 S3method(print, harModel);

Modified: pkg/highfrequency/R/realized.R
===================================================================
--- pkg/highfrequency/R/realized.R	2012-07-17 22:03:01 UTC (rev 23)
+++ pkg/highfrequency/R/realized.R	2012-07-19 05:56:46 UTC (rev 24)
@@ -3927,4 +3927,178 @@
   
   ts = xts(all,index(BIDOFR));
   return(ts);
-}
\ No newline at end of file
+}
+
+###########
+# Likelihood for HEAVY volatility model of Shephard and Sheppard 
+# Code is R-translation by Jonathan Cornelissen of matlab code of http://www.kevinsheppard.com/wiki/MFE_Toolbox
+# by: kevin.sheppard at economics.ox.ac.uk
+
+# USAGE:
+#  [LL,LLS,H] = heavy_likelihood(PARAMETERS,DATA,P,Q,BACKCAST,LB,UB)
+
+# INPUTS: 
+#   PARAMETERS  - A vector with K+sum(sum(P))+sum(sum(Q)) elements. 
+#    DATA       - A T by K vector of non-negative data.  Returns should be squared before using
+#    P          - A K by K matrix containing the lag length of model innovations.  Position (i,j)
+#                   indicates the number of lags of series j in the model for series i
+#    Q          - A K by K matrix containing the lag length of conditional variances.  Position (i,j)
+#                   indicates the number of lags of series j in the model for series i
+#    BACKCAST   - A 1 by K matrix of values to use fo rback casting
+#    LB         - A 1 by K matrix of volatility lower bounds to use in estimation
+#    UB         - A 1 by K matrix of volatility upper bounds to use in estimation
+# 
+# OUTPUTS:
+#    LL          - The log likelihood evaluated at the PARAMETERS
+#    LLS         - A T by 1 vector of log-likelihoods
+#    HT          - A T by K matrix of conditional variances
+
+# In contrast to Sheppards code, I make a list for parameters (easier interpretation)
+#NOTE # the parameter list has three items: O, A and B
+# O is a matrix (K by 1), A and B items are (K by K) matrices
+
+heavyModel = function(data, p=matrix( c(0,0,1,1),ncol=2 ), q=matrix( c(1,0,0,1),ncol=2 ), 
+                      startingvalues = NULL, LB = NULL, UB = NULL, 
+                      backcast = NULL, compconst = FALSE){
+  K = dim(p)[2];
+  
+  # Set lower and upper-bound if not specified:
+  if( is.null(LB) ){ LB = rep(0,K)   }
+  if( is.null(UB) ){ UB = rep(Inf,K) }
+  
+  # Assign starting values if necessary:
+  if( is.null(startingvalues) ){  #Very very naive, to adjust later
+    startingvalues = rep(NA,K+sum(p)+sum(q));
+    startingvalues[1:K] = 0.1;
+    start = K+1; end = K+sum(p);
+    startingvalues[start:end] = 0.3;
+    start = end+1; end = start+sum(q)-1;
+    startingvalues[start:end] = 0.6;}
+  
+  # Rescale? (useful to avoid numerical problems: TODO LATER)
+  
+  # Set backcast if necessary: how to initialize the model?
+  if(is.null( backcast)){ 
+    # For now, just unconditionally
+    backcast = t( t( colMeans(data) ) );
+  } 
+  
+  # Estimate the parameters: 
+  # Set constraints: 
+  KKK  = length(startingvalues);    
+  ui   = diag(rep(1,KKK));          #All parameters should be larger than zero, add extra constraints with rbind...  
+  ci   = matrix(rep(0,dim(ui)[2]),ncol=1);  
+  
+  x = optim( par = startingvalues, fn = heavy_likelihood,
+             data=data, p=p, q=q,backcast=backcast,UB=UB,LB=LB, compconst = compconst ); # ADJUST maxit ?!!
+  #  x = constrOptim( theta = startingvalues, f = heavy_likelihood, 
+  #                    grad=NULL,ui=ui, ci = ci, 
+  #                    data=data, p=p, q=q,backcast=backcast,UB=UB,LB=LB, compconst = compconst ); 
+  
+  # Get the output: 
+  estparams = x$par; 
+  loglikelihood = x$value; 
+  
+  # Get the list with: total-log-lik, daily-log-lik, condvars
+  xx = heavy_likelihood(parameters = estparams, data=data, p=p, q=q, backcast=backcast, LB=LB, UB=UB, foroptim=FALSE, compconst = compconst);
+  xx$estparams =  estparams;
+  
+  return(xx)
+}
+
+transformparams = function( p, q, paramsvector ){
+  K = dim(p)[1]; 
+  pmax = max(p); qmax = max(q); # Max number of lags for innovations and cond vars
+  
+  O = matrix( paramsvector[1:K], ncol=1);
+  A = B = list();
+  start = (K+1); 
+  
+  for(i in 1:pmax){    # A will contain a list-item per innovation lag
+    end =          start + sum(p>=i) - 1; # How many non-zero params in this loop?
+    A[[i]] =       matrix(rep(0,K^2),ncol=2); 
+    A[[i]][p>=i] = paramsvector[start:end];
+    start  = end + 1;   
+  }#end loop over number of lags for innovations
+  
+  for(i in 1:qmax){   # B will contain a list-item per cond var lag
+    end   = start + sum(q>=i) -1; # How many non-zero params in this loop?
+    B[[i]] = matrix(rep(0,K^2),ncol=2); 
+    B[[i]][q >= i] = paramsvector[start:end];
+    start  = end+1;   
+  }#End loop over number of lags for cond variances
+  
+  return( list(O,A,B) ) 
+}  
+
+heavy_likelihood = function(parameters, data, p, q, backcast, LB, UB, foroptim=TRUE, compconst=FALSE ){ 
+  # Get the required variables
+  # p is Max number of lags for innovations 
+  # q is Max number of lags for conditional variances
+  K    = dim(data)[2];  #Number of series to model
+  T    = dim(data)[1];  #Number of time periods
+  lls  = rep(NA,T);     #Vector containing the likelihoods
+  h    = matrix(nrow=K,ncol=T); #Matrix to containing conditional variances
+  maxp = max(p); maxq=max(q);
+  
+  # Get the parameters:
+  x = transformparams( parameters, p=p, q=q );
+  if( compconst ){ O = x[[1]]; } 
+  A = x[[2]]; B = x[[3]]; 
+  # Compute constant in case it needn't to be optimized:
+  if( !compconst ){ # Don't compute the omega's but do (1-alpha-beta)*unconditional
+    totalA = totalB = matrix( rep(0,K) ,ncol=1,nrow=K);
+    for(j in 1:length(A) ){ totalA = totalA + t(t(rowSums(A[[j]]))); } # Sum over alphas for all models
+    for(j in 1:length(B) ){ totalB = totalB + t(t(rowSums(B[[j]]))); } # Sum over betas for all models
+    O = 1 - totalA - totalB; # The remaing weight after substracting A & B
+    # Calculate the unconditionals ### KRIS FEEDBACK PLEASE ###
+    uncond = t(t(colMeans(data)));
+    O = O*uncond;
+  } #End if close for not optimizing over omega  
+  
+  if( sum(O) < 0 ){ O[O<0] = 10^(-10)} #params here are shouldn't be smaller than zero
+  
+  likConst = K*log(2*pi); #constant for loglikelihood
+  
+  for(t in 1:T){ # Start loop over time periods
+    h[,t] = O;    #Add constant to h
+    
+    for(j in 1:maxp){# Loop over innovation lags
+      if( (t-j) > 0 ){ 
+        h[,t] = h[,t] + t( A[[j]] %*% t(t(data[(t-j),])) ); #Adding innovations to h        
+      }else{ 
+        h[,t] = h[,t] + t( A[[j]] %*% backcast ); #Adding innovations to h          
+      }
+    } #end loop over innovation lags
+    
+    for(j in 1:maxp){# Loop over cond variances lags
+      if( (t-j) > 0 ){ 
+        h[,t] = h[,t] + t( B[[j]] %*% t(t(h[,(t-j)])) ); #Adding cond vars to h 
+      }else{ 
+        h[,t] = h[,t] + t( B[[j]] %*% backcast ); #Adding cond vars to h          
+      }
+    }#End loop over innovation lags
+    
+    # Check whether h's are between LB and UB:      
+    for(j in 1:K){ #Loop over 
+      if( h[j,t] < LB[j] ){  h[j,t] = LB[j]/(1- (h[j,t] - LB[j]) );}
+      if( h[j,t] > UB[j] ){  h[j,t] = UB[j] + log( h[j,t] - UB[j]);}
+    }#end loop over series
+    
+    lls[t] = 0.5*( likConst + sum( log(h[,t]) ) + sum( data[t,] / h[,t] ) );            
+  } #End loop over days
+  ll = sum(lls);
+  
+  if(is.na(ll) || is.infinite(ll) ){  ll = 10^7 } 
+  
+  if(foroptim){   output = ll; return(output); }
+  if(!foroptim){
+    output = list( loglikelihood=ll, likelihoods=lls, condvar = h );
+    return(output)
+    # Output list:
+    # (i) total loglikelihood
+    # (ii) likelihood parts per time period
+    # (iii) matrix with conditional variances    
+  } #end output in case you want params
+}    
+

Added: pkg/highfrequency/man/heavyModel.Rd
===================================================================
--- pkg/highfrequency/man/heavyModel.Rd	                        (rev 0)
+++ pkg/highfrequency/man/heavyModel.Rd	2012-07-19 05:56:46 UTC (rev 24)
@@ -0,0 +1,63 @@
+\name{heavyModel}  
+\Rdversion{1.1}
+\alias{heavyModel}
+\title{HEAVY Model estimation}
+
+\description{This function calculatest the High frEquency bAsed VolatilitY
+(HEAVY) model proposed in Shephard and Sheppard (2010).} 
+
+\usage{
+heavyModel(data, p=matrix( c(0,0,1,1),ncol=2 ), q=matrix( c(1,0,0,1),ncol=2 ), 
+          startingvalues = NULL, LB = NULL, UB = NULL, 
+            backcast = NULL, compconst = FALSE);
+}
+
+\arguments{ 
+   \item{data}{ a (T x K) matrix containing the data, with T the number of days. For the traditional HEAVY model: K = 2, the first column contains the squared daily returns, the second column contains the realized measures.
+   }
+   \item{p}{ a (K x K) matrix containing the lag length for the model innovations. Position (i, j) in the matrix indicates the number of lags in equation i of the model for the innovations in data column j. For the traditional heavy model p is given by matrix( c(0,0,1,1),ncol=2 ) (default). 
+   }
+   \item{q}{ 
+   a (K x K) matrix containing the lag length for the conditional variances. Position (i, j) in the matrix indicates the number of lags in equation i of the model for conditional variances corresponding to series j. For the traditionalheavy model introduced above q is given by matrix( c(1,0,0,1),ncol=2 ) (default).
+   }
+   \item{startingvalues}{ a vector containing the starting values to be used in the optimizat  ion to find the optimal parameters estimates.
+   }
+   \item{LB}{ a vector of length K indicating the lower bounds to be used in the esti-
+mation. If NULL it is set to a vector of zeros by default.
+   }
+   \item{UB}{ a vector of length K indicating the upper bounds to be used in the
+estimation. If NULL it is set to a vector of Inf by default.}
+   \item{backcast}{ a vector of length K used to initialize the estimation. If NULL the
+unconditional estimates are taken.
+   }
+   \item{compconst}{ a boolean variable. In case TRUE, the omega values are estimated in
+the optimization. In case FALSE, volatility targeting is done and omega is just the
+1 minus the sum of all relevant alpha's and beta's.
+   }
+}
+
+\section{Details}{ 
+See vignette.
+}
+
+\value{ 
+A list with the following values:
+(i) loglikelihood: The log likelihood evaluated at the parameter estimates.
+(ii) likelihoods: a vector of length T containing the log likelihoods per day.
+(iii) condvar: a (K x T) containing the conditional variances
+(iv) estparams: a vector with the parameter estimates. The order in which the
+parameters are reported is as follows: First the estimates for omega then the
+estimates for the non-zero alpha's with the most recent lags first in case max(p) > 1,
+then the estimates for the non-zero beta's with the most recent lag first in case
+max(q) > 1.
+}
+
+
+\references{
+Shephard, N. and K. Sheppard (2010). Realising the future: forecasting with high
+frequency based volatility (heavy) models. Journal of Applied Econometrics 25,
+197-231. 
+}
+
+\author{Jonathan Cornelissen and Kris Boudt}
+\keyword{forecasting}