[Highfrequency-commits] r61 - pkg/highfrequency/R

noreply at r-forge.r-project.org noreply at r-forge.r-project.org
Wed Oct 16 14:20:54 CEST 2013


Author: kboudt
Date: 2013-10-16 14:20:53 +0200 (Wed, 16 Oct 2013)
New Revision: 61

Modified:
   pkg/highfrequency/R/realized.R
Log:


Modified: pkg/highfrequency/R/realized.R
===================================================================
--- pkg/highfrequency/R/realized.R	2013-10-16 12:20:46 UTC (rev 60)
+++ pkg/highfrequency/R/realized.R	2013-10-16 12:20:53 UTC (rev 61)
@@ -1,4140 +1,4140 @@
-# This file contains all realized measures previously implemented in RTAQ and realized
-######################################################## 
-## Help functions: (not exported)
-######################################################## 
-.multixts <- function( x, y=NULL)
-{ 
-    if(is.null(y)){
-        test = is.xts(x) && (ndays(x)!=1);
-        return(test);}
-    if(!is.null(y)){
-        test = (is.xts(x) && (ndays(x)!=1)) || ( ndays(y)!=1 && is.xts(y) );
-        if( test ){
-            test1 = dim(y) == dim(x);
-            if(!test1){ warning("Please make sure x and y have the same dimensions") }
-            if(test1){  test = list( TRUE, cbind(x,y) ); return(test) }
-        } 
-    } 
-} 
-
-RV = function(rdata,...){
-    if(hasArg(data)){ rdata = data }
-    returns=as.numeric(rdata);
-    RV = sum(returns*returns);
-    return(RV);
-}
-
-RBPCov_bi = function(ts1,ts2){
-    n = length(ts1);
-    a = abs(ts1+ts2);
-    b = abs(ts1-ts2);  
-    first = as.numeric(a[1:(n-1)])*as.numeric(a[2:n]);
-    last = as.numeric(b[1:(n-1)])*as.numeric(b[2:n]);
-    result =  (pi/8)*sum(first-last);
-    return(result);
-}
-
-#Realized BiPower Variation (RBPVar) (RBPVar)
-RBPVar = function(rdata,...){
-    if(hasArg(data)){ rdata = data }
-    
-    returns = as.vector(as.numeric(rdata));
-    n = length(returns);
-    rbpvar = (pi/2)*sum(abs(returns[1:(n-1)])*abs(returns[2:n]));
-    return(rbpvar);
-}
-
-# Check data:
-rdatacheck = function (rdata, multi = FALSE) 
-{
-    if ((dim(rdata)[2] < 2) & (multi)) {
-        stop("Your rdata object should have at least 2 columns")
-    }
-}
-
-######## rowcov helper functions:
-#Realized Outlyingness Weighted Quadratic Covariation (ROWQCov)
-conhuber = function(di,alpha=0.05)
-{# consistency factor ROWQCov based on Huber weight function
-    c = qchisq(p=1-alpha,df=di)
-    fw2 = function(t){
-        z=t^2; return(  huberweight(z,c)*( t^(di-1) )*exp(-z/2)    ) }
-    fw1 = function(t){
-        z=t^2; return(  huberweight(z,c)*( t^(di+1) )*exp(-z/2)   )}
-    c2 = integrate(fw2,0,Inf)$value;  c1 = integrate(fw1,0,Inf)$value;
-    return( di*c2/c1 )
-}
-
-conHR = function(di,alpha=0.05)
-{
-    # consistency factor ROWQCov based on hard rejection weight function
-    return( (1-alpha)/pchisq(qchisq(1-alpha,df=di),df=di+2)  )
-}
-
-huberweight = function(d,k){
-    # Huber or soft rejection weight function
-    w = apply( cbind( rep(1,length(d) ) , (k/d) ),1,'min'); return(w);
-}
-
-countzeroes = function( series )
-{
-    return( sum( 1*(series==0) ) )
-}
-
-#Realized Outlyingness Weighted Variance (ROWVar):
-univariateoutlyingness = function(rdata,...){
-    require('robustbase');
-    if(hasArg(data)){ rdata = data }
-    #computes outlyingness of each obs compared to row location and scale
-    location = 0;
-    scale = mad(rdata);
-    if(scale==0){
-        scale = mean(rdata);
-    }
-    d = ((rdata - location)/scale)^2;
-}
-
-
-ROWVar = function(rdata, seasadjR = NULL, wfunction = "HR" , alphaMCD = 0.75, alpha = 0.001,...) 
-{
-    require('robustbase');
-    if(hasArg(data)){ rdata = data }
-    require(robustbase)
-    if (is.null(seasadjR)) {
-        seasadjR = rdata;
-    }
-    
-    rdata = as.vector(rdata); seasadjR = as.vector(seasadjR);
-    intraT = length(rdata); N=1;
-    MCDcov = as.vector(covMcd( rdata , use.correction = FALSE )$raw.cov)
-    outlyingness = seasadjR^2/MCDcov    
-    k = qchisq(p = 1 - alpha, df = N)
-    outlierindic = outlyingness > k
-    weights = rep(1, intraT)
-    if( wfunction == "HR" ){
-        weights[outlierindic] = 0
-        wR = sqrt(weights) * rdata
-        return((conHR(di = N, alpha = alpha) * sum(wR^2))/mean(weights))
-    }
-    if( wfunction == "SR" ){
-        weights[outlierindic] = k/outlyingness[outlierindic]
-        wR = sqrt(weights) * rdata
-        return((conhuber(di = N, alpha = alpha) * sum(wR^2))/mean(weights))
-    }
-    
-}
-
-#### Two time scale helper functions:
-TSRV = function ( pdata , K=300 , J=1 ) 
-{
-    # based on rv.timescale
-    logprices = log(as.numeric(pdata))
-    n = length(logprices) ;
-    nbarK = (n - K + 1)/(K) # average number of obs in 1 K-grid
-    nbarJ = (n - J + 1)/(J)
-    adj = (1 - (nbarK/nbarJ))^-1 
-    logreturns_K = logreturns_J = c();
-    for( k in 1:K){
-        sel =  seq(k,n,K)  
-        logreturns_K = c( logreturns_K , diff( logprices[sel] ) )
-    }
-    for( j in 1:J){
-        sel =  seq(j,n,J)  
-        logreturns_J = c( logreturns_J , diff( logprices[sel] ) )
-    }
-    TSRV = adj * ( (1/K)*sum(logreturns_K^2) - ((nbarK/nbarJ) *(1/J)*sum(logreturns_J^2)))
-    return(TSRV)
-}
-
-RTSRV = function (pdata, startIV = NULL, noisevar = NULL, K = 300, J = 1, 
-eta = 9){
-    logprices = log(as.numeric(pdata))
-    n = length(logprices)
-    nbarK = (n - K + 1)/(K)
-    nbarJ = (n - J + 1)/(J)
-    adj = (1 - (nbarK/nbarJ))^-1
-    zeta = 1/pchisq(eta, 3)
-    seconds = as.numeric(as.POSIXct(index(pdata)))
-    secday = last(seconds) - first(seconds)
-    logreturns_K = vdelta_K = logreturns_J = vdelta_J = c()
-    for (k in 1:K) {
-        sel = seq(k, n, K)
-        logreturns_K = c(logreturns_K, diff(logprices[sel]))
-        vdelta_K = c(vdelta_K, diff(seconds[sel])/secday)
-    }
-    for (j in 1:J) {
-        sel = seq(j, n, J)
-        logreturns_J = c(logreturns_J, diff(logprices[sel]))
-        vdelta_J = c(vdelta_J, diff(seconds[sel])/secday)
-    }
-    if (is.null(noisevar)) {
-        noisevar = max(0,1/(2 * nbarJ) * (sum(logreturns_J^2)/J - TSRV(pdata=pdata,K=K,J=J)))        
-    }
-    if (!is.null(startIV)) {
-        RTSRV = startIV
-    }
-    if (is.null(startIV)) {
-        sel = seq(1, n, K)
-        RTSRV = medRV(diff(logprices[sel]))
-    }
-    iter = 1
-    while (iter <= 20) {
-        I_K = 1 * (logreturns_K^2 <= eta * (RTSRV * vdelta_K + 
-        2 * noisevar))
-        I_J = 1 * (logreturns_J^2 <= eta * (RTSRV * vdelta_J + 
-        2 * noisevar))
-        if (sum(I_J) == 0) {
-            I_J = rep(1, length(logreturns_J))
-        }
-        if (sum(I_K) == 0) {
-            I_K = rep(1, length(logreturns_K))
-        }
-        RTSRV = adj * (zeta * (1/K) * sum(logreturns_K^2 * I_K)/mean(I_K) - 
-        ((nbarK/nbarJ) * zeta * (1/J) * sum(logreturns_J^2 * 
-        I_J)/mean(I_J)))
-        iter = iter + 1
-    }
-    return(RTSRV)
-}
-
-
-RTSCov_bi = 
-function (pdata1, pdata2, startIV1 = NULL, startIV2 = NULL, noisevar1 = NULL, 
-noisevar2 = NULL, K = 300, J = 1, 
-K_cov = NULL , J_cov = NULL , 
-K_var1 = NULL , K_var2 = NULL , 
-J_var1 = NULL , J_var2 = NULL ,       
-eta = 9) 
-{
-    if( is.null(K_cov)){ K_cov = K }   ;   if( is.null(J_cov)){ J_cov = J } 
-    if( is.null(K_var1)){ K_var1 = K } ;   if( is.null(K_var2)){ K_var2 = K }   
-    if( is.null(J_var1)){ J_var1 = J } ;   if( is.null(J_var2)){ J_var2 = J }
-    
-    # Calculation of the noise variance and TSRV for the truncation
-    
-    
-    
-    if (   is.null(noisevar1)   ) {
-        logprices1 = log(as.numeric(pdata1))     
-        n_var1 = length(logprices1)
-        nbarK_var1 = (n_var1 - K_var1 + 1)/(K_var1) ;
-        nbarJ_var1 = (n_var1 - J_var1 + 1)/(J_var1)
-        adj_var1 = n_var1/((K_var1 - J_var1) * nbarK_var1) 
-        
-        logreturns_K1 = logreturns_J1 = c()
-        for (k in 1:K_var1) {
-            sel.avg = seq(k, n_var1, K_var1)
-            logreturns_K1 = c(logreturns_K1, diff(logprices1[sel.avg]))
-        }
-        for (j in 1:J_var1) {
-            sel.avg = seq(j, n_var1, J_var1)
-            logreturns_J1 = c(logreturns_J1, diff(logprices1[sel.avg]))
-        }   
-        if(  is.null(noisevar1)  ){
-            noisevar1 = max(0,1/(2 * nbarJ_var1) * (sum(logreturns_J1^2)/J_var1 - TSRV(pdata1,K=K_var1,J=J_var1)))
-        }
-    }
-    if (is.null(noisevar2)) {
-        logprices2 = log(as.numeric(pdata2))
-        n_var2 = length(logprices2)
-        nbarK_var2 = (n_var2 - K_var2 + 1)/(K_var2) ;
-        nbarJ_var2 = (n_var2 - J_var2 + 1)/(J_var2)
-        adj_var2 = n_var2/((K_var2 - J_var2) * nbarK_var2)    
-        
-        logreturns_K2 = logreturns_J2 = c()
-        for (k in 1:K_var2) {
-            sel.avg = seq(k, n_var2, K_var2)
-            logreturns_K2 = c(logreturns_K2, diff(logprices2[sel.avg]))
-        }
-        for (j in 1:J_var2) {
-            sel.avg = seq(j, n_var2, J_var2)
-            logreturns_J2 = c(logreturns_J2, diff(logprices2[sel.avg]))
-        }        
-        noisevar2 = max(0,1/(2 * nbarJ_var2) * (sum(logreturns_J2^2)/J_var2 - TSRV(pdata2,K=K_var2,J=J_var2)))
-    }    
-    
-    if (!is.null(startIV1)) {
-        RTSRV1 = startIV1
-    }else{
-        RTSRV1 = RTSRV(pdata=pdata1, noisevar = noisevar1, K = K_var1, J = J_var1, eta = eta)      
-    }
-    if (!is.null(startIV2)) {
-        RTSRV2 = startIV2
-    }else{
-        RTSRV2 = RTSRV(pdata=pdata2, noisevar = noisevar2, K = K_var2, J = J_var2, eta = eta)      
-    }
-    
-    # Refresh time is for the covariance calculation
-    
-    x = refreshTime(list(pdata1, pdata2))
-    newprice1 = x[, 1]
-    newprice2 = x[, 2]
-    logprices1 = log(as.numeric(newprice1))
-    logprices2 = log(as.numeric(newprice2))
-    seconds = as.numeric(as.POSIXct(index(newprice1)))
-    secday = last(seconds) - first(seconds)        
-    K = K_cov ; J = J_cov ;    
-    
-    n = length(logprices1)
-    nbarK_cov = (n - K_cov + 1)/(K_cov)
-    nbarJ_cov = (n - J_cov + 1)/(J_cov)
-    adj_cov = n/((K_cov - J_cov) * nbarK_cov)    
-    
-    logreturns_K1 = logreturns_K2 = vdelta_K = c()
-    for (k in 1:K_cov) {
-        sel.avg = seq(k, n, K_cov)
-        logreturns_K1 = c(logreturns_K1, diff(logprices1[sel.avg]))
-        logreturns_K2 = c(logreturns_K2, diff(logprices2[sel.avg]))
-        vdelta_K = c(vdelta_K, diff(seconds[sel.avg])/secday)
-    }
-    
-    logreturns_J1 = logreturns_J2 = vdelta_J = c()      
-    for (j in 1:J_cov) {
-        sel.avg = seq(j, n, J_cov)
-        logreturns_J1 = c(logreturns_J1, diff(logprices1[sel.avg]))
-        logreturns_J2 = c(logreturns_J2, diff(logprices2[sel.avg]))
-        vdelta_J = c(vdelta_J, diff(seconds[sel.avg])/secday)
-    }
-    
-    
-    I_K1 = 1 * (logreturns_K1^2 <= eta * (RTSRV1 * vdelta_K + 2 * noisevar1))
-    I_K2 = 1 * (logreturns_K2^2 <= eta * (RTSRV2 * vdelta_K + 2 * noisevar2))
-    I_J1 = 1 * (logreturns_J1^2 <= eta * (RTSRV1 * vdelta_J + 2 * noisevar1))
-    I_J2 = 1 * (logreturns_J2^2 <= eta * (RTSRV2 * vdelta_J + 2 * noisevar2))
-    if (eta == 9) {
-        ccc = 1.0415
-    } else {
-        ccc = cfactor_RTSCV(eta = eta)
-    }
-    RTSCV = adj_cov * (ccc * (1/K_cov) * sum(logreturns_K1 * I_K1 * 
-    logreturns_K2 * I_K2)/mean(I_K1 * I_K2) - ((nbarK_cov/nbarJ_cov) * 
-    ccc * (1/J_cov) * sum(logreturns_J1 * logreturns_J2 * I_J1 * 
-    I_J2)/mean(I_J1 * I_J2)))
-    return(RTSCV)
-}
-
-TSCov_bi = function (pdata1, pdata2, K = 300, J = 1) 
-{
-    x = refreshTime(list(pdata1, pdata2))
-    newprice1 = x[, 1]
-    newprice2 = x[, 2]
-    logprices1 = log(as.numeric(newprice1))
-    logprices2 = log(as.numeric(newprice2))
-    seconds = as.numeric(as.POSIXct(index(newprice1)))
-    secday = last(seconds) - first(seconds)
-    n = length(logprices1)
-    nbarK = (n - K + 1)/(K)
-    nbarJ = (n - J + 1)/(J)
-    adj = n/((K - J) * nbarK)
-    
-    logreturns_K1 = logreturns_K2 = logreturns_J1 = logreturns_J2 = c()
-    vdelta_K =  vdelta_J = c();
-    
-    for (k in 1:K) {
-        sel.avg = seq(k, n, K)
-        logreturns_K1 = c(logreturns_K1, diff(logprices1[sel.avg]))
-        logreturns_K2 = c(logreturns_K2, diff(logprices2[sel.avg]))
-        vdelta_K = c(vdelta_K, diff(seconds[sel.avg]) / secday)
-    }
-    
-    for (j in 1:J) {
-        sel.avg = seq(j, n, J)
-        logreturns_J1 = c(logreturns_J1, diff(logprices1[sel.avg]))
-        logreturns_J2 = c(logreturns_J2, diff(logprices2[sel.avg]))
-        vdelta_J = c(vdelta_J, diff(seconds[sel.avg])/secday)
-    }
-    
-    TSCOV = adj * ((1/K) * sum(logreturns_K1 * logreturns_K2) - 
-    ((nbarK/nbarJ) * (1/J) * sum(logreturns_J1 * logreturns_J2)))
-    return(TSCOV)
-}
-
-cfactor_RTSCV = function(eta=9){
-    require('cubature'); require('mvtnorm')
-    # rho = 1
-    c1 = pchisq(eta,df=1)/pchisq(eta,df=3) 
-    # 
-    rho = 0.001
-    R = matrix( c(1,rho,rho,1) , ncol = 2 ) 
-    int1 <- function(x) {    dmvnorm(x,sigma=R) }
-    num = adaptIntegrate(int1, c(-3,-3), c(3,3), tol=1e-4)$integral
-    int2 <- function(x) {  x[1]*x[2]*dmvnorm(x,sigma=R) }
-    denom = adaptIntegrate(int2, c(-3,-3), c(3,3), tol=1e-4)$integral
-    c2 = rho*num/denom   
-    return( (c1+c2)/2 )
-}
-
-# Hayashi-Yoshida helper function:
-rc.hy <- function(x,y, period=1,align.by="seconds", align.period =1, cts = TRUE, makeReturns=FALSE, ...)
-{
-    align.period = .getAlignPeriod(align.period, align.by)
-    cdata <- .convertData(x, cts=cts, makeReturns=makeReturns)
-    x <- cdata$data
-    x.t <- cdata$milliseconds
-    
-    cdatay <- .convertData(y, cts=cts, makeReturns=makeReturns)
-    y <- cdatay$data
-    y.t <- cdatay$milliseconds
-    
-    
-    errorCheck <- c(is.null(x.t),is.na(x.t), is.null(y.t), is.na(y.t))
-    if(any(errorCheck))
-    stop("ERROR: Time data is not in x or y.")
-    
-    
-    sum(     .C("pcovcc", 
-    as.double(x), #a
-    as.double(rep(0,length(x)/(period*align.period)+1)),
-    as.double(y), #b
-    as.double(x.t), #a
-    as.double(rep(0,length(x)/(period*align.period)+1)), #a
-    as.double(y.t), #b
-    as.integer(length(x)), #na
-    as.integer(length(x)/(period*align.period)),
-    as.integer(length(y)), #na
-    as.integer(period*align.period),
-    ans = double(length(x)/(period*align.period)+1), 
-    COPY=c(FALSE,FALSE,FALSE,FALSE,FALSE,FALSE,TRUE), 
-    PACKAGE="highfrequency")$ans)
-}
-
-# 
-# Realized variance calculation using a kernel estimator.
-#
-rv.kernel <- function(x,                             # Tick Data
-kernel.type = "rectangular",   # Kernel name (or number)
-kernel.param = 1,              # Kernel parameter (usually lags)
-kernel.dofadj = TRUE,          # Kernel Degree of freedom adjustment
-align.by="seconds",            # Align the tick data to [seconds|minutes|hours]
-align.period = 1,              # Align the tick data to this many [seconds|minutes|hours]
-cts = TRUE,                    # Calendar Time Sampling is used
-makeReturns = FALSE,            # Convert to Returns 
-type = NULL,                   # Deprectated
-adj = NULL,                    # Deprectated
-q = NULL, ...){                     # Deprectated
-    # Multiday adjustment: 
-    multixts = .multixts(x);
-    if(multixts){
-        result = apply.daily(x,rv.kernel,kernel.type,kernel.param,kernel.dofadj,
-        align.by,align.period,cts,makeReturns,type,adj,q);
-        return(result)}
-    if(!multixts){ #Daily estimation:
-        
-        #
-        # Handle deprication
-        #
-        
-        
-        if(!is.null(type)){
-            warning("type is deprecated, use kernel.type")
-            kernel.type=type
-        }
-        if(!is.null(q)){
-            warning("q is deprecated, use kernel.param")
-            kernel.param=q
-        }
-        if(!is.null(adj)){
-            warning("adj is deprecated, use kernel.dofadj")
-            kernel.dofadj=adj
-        }          
-        align.period = .getAlignPeriod(align.period, align.by)         
-        cdata <- .convertData(x, cts=cts, makeReturns=makeReturns)
-        x <- cdata$data
-        x <- .alignReturns(x, align.period)
-        type <- .kernel.chartoint(kernel.type)
-        .C("kernelEstimator", as.double(x), as.double(x), as.integer(length(x)),
-        as.integer(kernel.param), as.integer(ifelse(kernel.dofadj, 1, 0)),
-        as.integer(type), ab=double(kernel.param + 1),
-        ab2=double(kernel.param + 1),
-        ans=double(1),PACKAGE="highfrequency")$ans
-    }
-}
-
-rc.kernel <- function(x,                             # Tick Data for first asset
-y,                             # Tick Data for second asset
-kernel.type = "rectangular",   # Kernel name (or number)
-kernel.param = 1,              # Kernel parameter (usually lags)
-kernel.dofadj = TRUE,          # Kernel Degree of freedom adjustment
-align.by="seconds",            # Align the tick data to [seconds|minutes|hours]
-align.period = 1,              # Align the tick data to this many [seconds|minutes|hours]
-cts = TRUE,                    # Calendar Time Sampling is used
-makeReturns = FALSE,           # Convert to Returns 
-type = NULL,                   # Deprectated
-adj = NULL,                    # Deprectated
-q = NULL,...){                 # Deprectated
-    #
-    # Handle deprication
-    #
-    if(!is.null(type)){
-        warning("type is deprecated, use kernel.type")
-        kernel.type=type
-    }
-    if(!is.null(q)){
-        warning("q is deprecated, use kernel.param")
-        kernel.param=q
-    }
-    if(!is.null(adj)){
-        warning("adj is deprecated, use kernel.dofadj")
-        kernel.dofadj=adj
-    }
-    
-    align.period = .getAlignPeriod(align.period, align.by)   
-    cdata <- .convertData(x, cts=cts, makeReturns=makeReturns)
-    
-    x <- cdata$data
-    x <- .alignReturns(x, align.period)
-    cdatay <- .convertData(y, cts=cts, makeReturns=makeReturns)
-    y <- cdatay$data
-    y <- .alignReturns(y, align.period)
-    type <- .kernel.chartoint(kernel.type)
-    .C("kernelEstimator", as.double(x), as.double(y), as.integer(length(x)),
-    as.integer(kernel.param), as.integer(ifelse(kernel.dofadj, 1, 0)),
-    as.integer(type), ab=double(kernel.param + 1),
-    ab2=double(kernel.param + 1),
-    ans=double(1),PACKAGE="highfrequency")$ans
-}
-
-rKernel <- function(x,type=0)
-{
-    type <- .kernel.chartoint(type)
-    .C("justKernel", x=as.double(x),type= as.integer(type), ans=as.double(0),PACKAGE="realized")$ans
-}
-
-.kernel.chartoint <- function(type)
-{
-    if(is.character(type))
-    {
-        ans <- switch(casefold(type), 
-        rectangular=0,
-        bartlett=1,
-        second=2,
-        epanechnikov=3,
-        cubic=4,
-        fifth=5,
-        sixth=6,
-        seventh=7,
-        eighth=8,
-        parzen=9,
-        th=10,
-        mth=11,
-        tukeyhanning=10,
-        modifiedtukeyhanning=11,
-        -99)
-        if(ans==-99)
-        { 
-            warning("Invalid Kernel, using Bartlet")
-            1
-        }
-        else
-        {
-            ans     
-        }
-    }
-    else
-    {
-        type
-    }
-}
-
-rKernel.available <- function()
-{
-    c("Rectangular", 
-    "Bartlett",
-    "Second",
-    "Epanechnikov",
-    "Cubic",
-    "Fifth",
-    "Sixth",
-    "Seventh",
-    "Eighth",
-    "Parzen",
-    "TukeyHanning",
-    "ModifiedTukeyHanning")
-}
-
-
-## REalized Variance: Average subsampled
-rv.avg = function(x, period)
-{ 
-    mean(.rv.subsample(x, period))
-}
-
-rc.avg = function( x, y,  period )
-{
-    mean(.rc.subsample(x, y, period));
-}
-
-.rv.subsample <- function(x, period, cts=TRUE, makeReturns=FALSE,...)
-{
-    cdata <- .convertData(x, cts=cts, makeReturns=makeReturns)
-    x <- cdata$data
-    
-    .C("subsample", 
-    
-    as.double(x), #a
-    as.double(x), #na
-    as.integer(length(x)), #na
-    as.integer(length(x)/period),       #m
-    as.integer(period), #period 
-    as.double(rep(0,as.integer(length(x)/period +1))), #tmp
-    as.double(rep(0,as.integer(length(x)/period +1))), #tmp
-    as.integer(length(x)/period), #tmpn
-    ans = double(period), 
-    COPY=c(FALSE,FALSE,FALSE,FALSE,FALSE,FALSE,TRUE), 
-    PACKAGE="highfrequency")$ans
-}
-
-
-.rc.subsample <- function(x, y, period, cts=TRUE, makeReturns=FALSE, ... )
-{
-    cdata <- .convertData(x, cts=cts, makeReturns=makeReturns)
-    x <- cdata$data
-    
-    cdatay <- .convertData(y, cts=cts, makeReturns=makeReturns)
-    y <- cdatay$data
-    
-    .C("subsample", 
-    as.double(x), #a
-    as.double(y), #na
-    as.integer(length(x)), #na
-    as.integer(length(x)/period),       #m
-    as.integer(period), #period 
-    as.double(rep(0,as.integer(length(x)/period +1))), #tmp
-    as.double(rep(0,as.integer(length(x)/period +1))), #tmp
-    as.integer(length(x)/period), #tmpn
-    ans = double(period), 
-    COPY=c(FALSE,FALSE,FALSE,FALSE,FALSE,FALSE,TRUE), 
-    PACKAGE="highfrequency")$ans               
-}
-
-#### percentage of zeros calc:
-.makeROlist = function(rdata, align.period, align.by,cts,makeReturns){
-    align.period = .getAlignPeriod(align.period, align.by); 
-    L = list(); 
-    for(i in 1:length(rdata)){ 
-        L[[i]] = .alignReturns(.convertData(rdata[[i]], cts=cts, makeReturns=makeReturns)$data, align.period);
-    }
-    return(L);
-}
-
-rv.zero = function(x, period)
-{  
-    ac <- .accum.naive(x=x,y=x,period=period)
-    sum((ac*ac)==0)/length(ac)
-}  
-
-rc.zero = function(x, y, period)
-{  
-    acy <- .accum.naive(x=y,y=y,period=period)
-    acx <- .accum.naive(x=x,y=x,period=period)
-    sum((acx*acy)==0)/length(acy)
-}  
-
-#########################################################################
-#
-# Utility Functions from realized package Scott Payseur
-#
-#########################################################################
-.alignedAccum <- function(x,y, period, cum=TRUE, makeReturns...)
-{
-    x<-.accum.naive(x,x, period)
-    y<-.accum.naive(y,y, period)
-    if(cum)
-    {
-        ans <- cumsum(x*y)
-    }
-    else
-    {
-        ans <- x*y     
-    }
-    ans
-}
-
-
-.accum.naive <- function(x,y, period, ...)
-{
-    .C("rv", 
-    as.double(x), #a
-    as.double(y), #b
-    as.integer(length(x)), #na
-    as.integer(period), #period 
-    tmpa = as.double(rep(0,as.integer(length(x)/period +1))), #tmp
-    as.double(rep(0,as.integer(length(x)/period +1))), #tmp
-    as.integer(length(x)/period), #tmpn
-    ans = double(1), 
-    COPY=c(FALSE,FALSE,FALSE,FALSE,FALSE,FALSE,TRUE), 
-    PACKAGE="highfrequency")$tmpa     
-}
-
-
-.alignReturns <- function(x, period, ...)
-{
-    .C("rv", 
-    as.double(x), #a
-    as.double(x), #b
-    as.integer(length(x)), #na
-    as.integer(period), #period 
-    tmpa = as.double(rep(0,as.integer(length(x)/period +1))), #tmp
-    as.double(rep(0,as.integer(length(x)/period +1))), #tmp
-    as.integer(length(x)/period), #tmpn
-    ans = double(1), 
-    COPY=c(FALSE,FALSE,FALSE,FALSE,FALSE,FALSE,TRUE), 
-    PACKAGE="highfrequency")$tmpa     
-}
-
-.getAlignPeriod <- function(align.period, align.by)
-{   
-    align.by <- gsub("(^ +)|( +$)", "",align.by) # Trim White
-    
-    if(casefold(align.by)=="min" || casefold(align.by)=="mins" ||casefold(align.by)=="minute"||casefold(align.by)=="minutes"||casefold(align.by)=="m"){
-        ans <- align.period * 60
-    }
-    if(casefold(align.by)=="sec" || casefold(align.by)=="secs" ||casefold(align.by)=="second"||casefold(align.by)=="seconds"||casefold(align.by)=="s"||casefold(align.by)==""){
-        ans <- align.period
-    }
-    if(casefold(align.by)=="hour" || casefold(align.by)=="hours" ||casefold(align.by)=="h"){
-        ans <- align.period * 60 * 60
-    }
-    return(ans)
-}
-
-
-.alignIndices <- function(x, period, ...)
-{
-    .C("rvperiod", 
-    as.double(x), #a
-    as.double(x), #b
-    as.integer(length(x)), #na
-    as.integer(period), #period 
-    tmpa = as.double(rep(max(x),as.integer(length(x)/period +1))), #tmp
-    as.double(rep(0,as.integer(length(x)/period +1))), #tmp
-    as.integer(length(x)/period), #tmpn
-    ans = double(1), 
-    COPY=c(FALSE,FALSE,FALSE,FALSE,FALSE,FALSE,TRUE), 
-    PACKAGE="highfrequency")$tmpa     
-}
-
-.multixts <- function( x, y=NULL)
-{ 
-    if(is.null(y)){
-        test = is.xts(x) && (ndays(x)!=1);
-        return(test);}
-    if(!is.null(y)){
-        test = (is.xts(x) && (ndays(x)!=1)) || ( ndays(y)!=1 && is.xts(y) );
-        if( test ){
-            test1 = dim(y) == dim(x);
-            if(!test1){ warning("Please make sure x and y have the same dimensions") }
-            if(test1){  test = list( TRUE, cbind(x,y) ); return(test) }
-        }
-    }
-}      
-
-.convertData <- function(x, cts = TRUE, millisstart=NA, millisend=NA, makeReturns=FALSE)
-{
-    if(is.null(x))
-    {
-        return(NULL)
-    }
-    if("realizedObject" %in% class(x))
-    {
-        return(x)
-    }
-    if(is.null(version$language)) #splus
-    {
-        if("timeSeries" %in% class(x))
-        {
-            x <- x[!is.na(x[,1]),1]
-            if(cts)
-            {
-                return(ts2realized(x, millisstart=millisstart, millisend=millisend, make.returns=makeReturns)$cts)
-            }
-            else
-            {
-                return(ts2realized(x, millisstart=millisstart, millisend=millisend, make.returns=makeReturns)$tts)
-            }
-            #list(milliseconds = positions(x)@.Data[[2]], data = matrix(seriesData(x), ncol=1))
-        }
-    }
-    
-    if("xts" %in% class(x))
-    {
-        xtmp <- x
-        x <- list() 
-        x$data <- as.numeric(xtmp[,1])
-        
-        x$milliseconds <- (as.POSIXlt(time(xtmp))$hour*60*60 + as.POSIXlt(time(xtmp))$min*60 + as.POSIXlt(time(xtmp))$sec )*1000
-        if(is.na(millisstart))
-        {
-            millisstart = x$milliseconds[[1]]
-        }
-        if(is.na(millisend))
-        {
-            millisend = x$milliseconds[[length(x$milliseconds)]]
-        }
-        
-        cat(paste("xts -> realizedObject [", as.character(time(xtmp[1])), " :: ", as.character(time(xtmp[length(x$milliseconds)])), "]", sep=""),"\n")
-    }
-    
-    if(is.na(millisstart))
-    {
-        millisstart=34200000
-    }
-    if(is.na(millisend))
-    {
-        millisend=57600000
-    }    
-    if("list" %in% class(x))
-    {
-        if(sum(names(x) == c("tts", "cts")) == 2) #realized obj  
-        {
-            if(cts)
-            {
-                return(x$cts)
-            }
-            else
-            {
-                return(x$tts)
-            }
-        }
-        if(sum(names(x) == c("data", "milliseconds")) == 2) 
-        {
-            if(makeReturns)
-            {                                           # only works on non cts prices
-                errcheck <- try(.getReturns(.sameTime(x$data, x$milliseconds)))
-                if(class(errcheck) != "Error")
-                {
-                    x$data <- errcheck
-                    x$milliseconds <- intersect(x$milliseconds,x$milliseconds)
-                }
-                else
-                {
-                    warning("It appears that these are already returns.  Not creating returns")
-                }
-            }          
-            else
-            {
-                x$data <- .sameTime(x$data, x$milliseconds)
-                x$milliseconds <- intersect(x$milliseconds,x$milliseconds)
-            }          
-            if(cts)
-            {
-                toret <- list(data=.toCts(x=x$data, millis=intersect(x$milliseconds,x$milliseconds), millisstart=millisstart, millisend=millisend),
-                milliseconds=(((millisstart/1000)+1):(millisend/1000))*1000)
-                return(toret)
-            }
-            else
-            {
-                toret <- list(data=x$data, 
-                milliseconds=intersect(x$milliseconds,x$milliseconds))
-                return(toret)
-            }
-        }
-    }
-    
-    
-    if("timeSeries" %in% class(x))
-    {
-        stop("R timeSeries not implmented yet. Convert to realized object")
-    }
-    return(list(milliseconds = 1:dim(as.matrix(x))[[1]], data = as.matrix(x)))  # not an object, fake the milliseconds and return
-}
-
-.getReturns <- function(x)
-{
-    x <- as.numeric(x)
-    n <- length(x)[[1]]
-    return(log(x[2:n]) - log(x[1:(n-1)]))
-}
-
-.sameTime <- function(x, millis)
-{
-    .C("sametime", 
-    as.double(x), #a
-    as.integer(length(x)), #na
-    as.integer(millis), #millis
-    ans = double(length(union(millis,millis))), #tts
-    COPY=c(FALSE,FALSE,FALSE,TRUE), 
-    PACKAGE="highfrequency")$ans
-}
-
-
-data.toCts <- function(x, millis, millisstart=34200000, millisend=57600000)
-{
-    .toCts(x=x, millis=millis, millisstart=millisstart, millisend=millisend)
-}
-
-.toCts <- function(x, millis, millisstart=34200000, millisend=57600000)
-{
-    .C("tocts", 
-    as.double(x), #a
-    as.integer(length(x)),
-    as.integer(millis), #millis
-    as.integer(millisstart),
-    as.integer(millisend),
-    ans = double(((millisend-millisstart)/1000)), #cts
-    COPY=c(FALSE,FALSE,FALSE,FALSE,TRUE), 
-    PACKAGE="highfrequency")$ans
-}
-
-data.toReturns <- function(x)
-{
-    x <- as.numeric(x)   
-    n <- length(x)
-    log(x[2:n]) - log(x[1:(n-1)])
-}
-
-ts2realized <- function(x, make.returns=TRUE,millisstart=34200000, millisend=57600000)
-{
-    warning("SPLUS is no longer supported.")
-    #     thedata <- data.sameTime(as.numeric(as.matrix(x at data)), .ts2millis(x))
-    
-    #    if(make.returns)
-    #    {
-    
-    #          thedata <- .getReturns(thedata)
-    
-    #          tts <- list(data=as.numeric(thedata), milliseconds=intersect(.ts2millis(x),.ts2millis(x))[-1])
-    #          cts <- list(data=.toCts(x=as.numeric(thedata), millis=intersect(.ts2millis(x),.ts2millis(x)), millisstart=millisstart, millisend=millisend),
-    #               milliseconds=(((millisstart/1000)+1):(millisend/1000))*1000)
-    #    }
-    #    else
-    #    {
-    #          tts <- list(data=as.numeric(thedata), milliseconds=intersect(.ts2millis(x),.ts2millis(x)))
-    #          cts <- list(data=.toCts(x=as.numeric(thedata), millis=intersect(.ts2millis(x),.ts2millis(x)), millisstart=millisstart, millisend=millisend),
-    #               milliseconds=(((millisstart/1000)+1):(millisend/1000))*1000)
-    
-    
-    #    }
-    #     ans <- list(tts=tts, cts=cts)     
-    #     ans
-}
-
-
-# Make positive definite
-makePsd = function(S,method="covariance"){
-    if(method=="correlation" & !any(diag(S)<=0) ){
-        # Fan, J., Y. Li, and K. Yu (2010). Vast volatility matrix estimation using high frequency data for portfolio selection.
-        D = matrix(diag(S)^(1/2),ncol=1)
-        R = S/(D%*%t(D))
-        out = eigen( x=R , symmetric = TRUE )
-        mGamma = t(out$vectors)
-        vLambda = out$values
-        vLambda[vLambda<0] = 0
-        Apsd = t(mGamma)%*%diag(vLambda)%*%mGamma
-        dApsd = matrix(diag(Apsd)^(1/2),ncol=1)
-        Apsd = Apsd/(dApsd%*%t(dApsd))
-        D = diag( as.numeric(D)  , ncol = length(D) )
-        Spos = D%*%Apsd%*%D
-        return(Spos)
-        #check:  eigen(Apsd)$values
-    }else{
-        # Rousseeuw, P. and G. Molenberghs (1993). Transformation of non positive semidefinite correlation matrices. Communications in Statistics - Theory and Methods 22, 965-984.
-        out = eigen( x=S , symmetric = TRUE )
-        mGamma = t(out$vectors)
-        vLambda = out$values
-        vLambda[vLambda<0] = 0
-        Apsd = t(mGamma)%*%diag(vLambda)%*%mGamma
-    }
-}
-
-### Do a daily apply but with list as output:
-.applygetlist = function(x, FUN,cor=FALSE,align.by=NULL,align.period=NULL,makeReturns=FALSE,makePsd=FALSE,...){
-    on="days";k=1;
-    x <- try.xts(x, error = FALSE); 
-    INDEX = endpoints(x,on=on,k=k); 
-    D = length(INDEX)-1; 
-    result = list(); 
-    FUN <- match.fun(FUN);
-    for(i in 1:(length(INDEX)-1)){
-        result[[i]] = FUN(x[(INDEX[i] + 1):INDEX[i + 1]],cor,align.by,align.period,makeReturns,makePsd);
-    }
-    return(result);
-}
-
-# Aggregation function: FAST previous tick aggregation
-.aggregatets = function (ts, on = "minutes", k = 1) 
-{
-    if (on == "secs" | on == "seconds") {
-        secs = k
-        tby = paste(k, "sec", sep = " ")
-    } 
-    if (on == "mins" | on == "minutes") {
-        secs = 60 * k
-        tby = paste(60 * k, "sec", sep = " ")
-    } 
-    if (on == "hours"){
-        secs = 3600 * k;
-        tby = paste(3600 * k, "sec", sep = " ");
-    } 
-    g = base:::seq(start(ts), end(ts), by = tby);
-    rawg = as.numeric(as.POSIXct(g, tz = "GMT"));
-    newg = rawg + (secs - rawg%%secs);
-    g    = as.POSIXct(newg, origin = "1970-01-01",tz = "GMT");
-    ts3 = na.locf(merge(ts, zoo(, g)))[as.POSIXct(g, tz = "GMT")];
-    return(ts3)
-} #Very fast and elegant way to do previous tick aggregation :D!
-
-#Make Returns: 
-makeReturns = function (ts) 
-{
-    l = dim(ts)[1]
-    x = matrix(as.numeric(ts), nrow = l)
-    x[(2:l), ] = log(x[(2:l), ]) - log(x[(1:(l - 1)), ])
-    x[1, ] = rep(0, dim(ts)[2])
-    x = xts(x, order.by = index(ts))
-    return(x);
-}
-
-#Refresh Time:
-refreshTime = function (pdata) 
-{
-    dim = length(pdata)
-    lengths = rep(0, dim + 1)
-    for (i in 1:dim) {
-        lengths[i + 1] = length(pdata[[i]])
-    }
-    minl = min(lengths[(2:(dim + 1))])
-    lengths = cumsum(lengths)
-    alltimes = rep(0, lengths[dim + 1])
-    for (i in 1:dim) {
-        alltimes[(lengths[i] + 1):lengths[i + 1]] = as.numeric(as.POSIXct(index(pdata[[i]]), 
-        tz = "GMT"))
-    }
-    x = .C("refreshpoints", as.integer(alltimes), as.integer(lengths), 
-    as.integer(rep(0, minl)), as.integer(dim), as.integer(0), 
-    as.integer(rep(0, minl * dim)), as.integer(minl))
-    newlength = x[[5]]
-    pmatrix = matrix(ncol = dim, nrow = newlength)
-    for (i in 1:dim) {
-        selection = x[[6]][((i - 1) * minl + 1):(i * minl)]
-        pmatrix[, i] = pdata[[i]][selection[1:newlength]]
-    }
-    time = as.POSIXct(x[[3]][1:newlength], origin = "1970-01-01", 
-    tz = "GMT")
[TRUNCATED]

To get the complete diff run:
    svnlook diff /svnroot/highfrequency -r 61


More information about the Highfrequency-commits mailing list