[Returnanalytics-commits] r2559 - in pkg/Meucci: . R data demo man

Fri Jul 12 21:20:14 CEST 2013

Author: xavierv
Date: 2013-07-12 21:20:14 +0200 (Fri, 12 Jul 2013)
New Revision: 2559

Added:
   pkg/Meucci/R/FitExpectationMaximization.R
   pkg/Meucci/R/QuantileMixture.R
   pkg/Meucci/data/highYieldIndices.Rda
   pkg/Meucci/demo/S_Estimator.R
   pkg/Meucci/demo/S_ExpectationMaximizationHighYield.R
   pkg/Meucci/man/FitExpectationMaximization.Rd
   pkg/Meucci/man/QuantileMixture.Rd
Modified:
   pkg/Meucci/DESCRIPTION
   pkg/Meucci/NAMESPACE
   pkg/Meucci/demo/S_EigenvalueDispersion.R
Log:
-added a couple of demo scripts and functions

Modified: pkg/Meucci/DESCRIPTION
===================================================================

--- pkg/Meucci/DESCRIPTION	2013-07-12 11:47:14 UTC (rev 2558)
+++ pkg/Meucci/DESCRIPTION	2013-07-12 19:20:14 UTC (rev 2559)
@@ -80,3 +80,5 @@
     'Cumul2Raw.R'
     'Raw2Central.R'
     'Raw2Cumul.R'
+    'FitExpectationMaximization.R'
+    'QuantileMixture.R'

Modified: pkg/Meucci/NAMESPACE
===================================================================
--- pkg/Meucci/NAMESPACE	2013-07-12 11:47:14 UTC (rev 2558)
+++ pkg/Meucci/NAMESPACE	2013-07-12 19:20:14 UTC (rev 2559)
@@ -10,6 +10,7 @@
 export(Cumul2Raw)
 export(DetectOutliersViaMVE)
 export(EntropyProg)
+export(FitExpectationMaximization)
 export(GenerateLogNormalDistribution)
 export(hermitePolynomial)
 export(integrateSubIntervals)
@@ -27,6 +28,7 @@
 export(PerformIidAnalysis)
 export(PlotDistributions)
 export(ProjectionStudentT)
+export(QuantileMixture)
 export(Raw2Central)
 export(Raw2Cumul)
 export(RejectOutlier)

Added: pkg/Meucci/R/FitExpectationMaximization.R
===================================================================
--- pkg/Meucci/R/FitExpectationMaximization.R	                        (rev 0)
+++ pkg/Meucci/R/FitExpectationMaximization.R	2013-07-12 19:20:14 UTC (rev 2559)
@@ -0,0 +1,74 @@
+#' Expectation-Maximization (EM) algorithm to recover missing observations in a time series ,
+#' as described in  A. Meucci, "Risk and Asset Allocation", Springer, 2005.
+#'  
+#'  @param   X         : [matrix] (T x N) of data
+#'  
+#'  @return  E_EM      : [vector] (N x 1) expectation
+#'  @return  S_EM      : [matrix] (N x N) covariance matrix
+#'  @return  Y         : [matrix] (T x N) updated data
+#'  @return  CountLoop : [scalar] number of iterations of the algorithm
+#'
+#' @references
+#' \url{http://symmys.com/node/170}
+#' See Meucci's script for "FitExpectationMaximization.m"
+#'
+#' @author Xavier Valls \email{flamejat@@gmail.com}
+#' @export
+
+FitExpectationMaximization = function(X)
+{
+    T = nrow(X);
+    N = ncol(X);
+
+    # E-M initialization
+    idx = apply( !is.nan( X ), 1, all );
+    X_Init = X[ idx, ];
+
+    M =  matrix(apply( X_Init, 2, mean ));
+
+    S = cov( X_Init );
+
+    Tolerance = 10 ^ ( -6 ) * mean(rbind( M, sqrt( matrix(diag( S ) ) ) ) );
+
+    # E-M loop
+    Convergence = 0;
+    CountLoop   = 0;
+    Y = X;
+    while( !Convergence )
+    {
+        CountLoop = CountLoop + 1;
+
+        # Step 1: estimation
+        C = array( 0, dim = c( T, N, N) );
+        for( t in 1 : T )
+        {
+            Miss = is.nan( X[ t,  ] );
+            Obs  = !Miss;
+            c = matrix(0, N, N );
+            y =  matrix( X[ t,  ]);
+            if( any( Miss ) )
+            {
+                y[ Miss ] = M[ Miss ] + S[ Miss, Obs] %*% ( solve(S[ Obs, Obs ]) %*% matrix (y[ Obs ] - M[ Obs ]) );
+                c[ Miss, Miss ] = S[ Miss, Miss ] - S[ Miss, Obs ] %*% ( solve(S[ Obs, Obs ]) %*% S[ Obs, Miss ] );          
+            }
+    	    Y[ t, ] = y;
+            C[ t, , ] = c + (y - M) %*% t(y - M);
+        }
+
+        # Step 2: update
+        M_new = matrix( apply( Y, 2, mean ));
+        S_new = drop( apply( C, c( 2, 3 ), mean ) );
+        
+        D4 = rbind( ( M_new - M ) ^ 4, matrix(diag( (S_new - S) ^ 2 ) ) );
+        Distance = mean( D4 ^ (1/4) );
+        Convergence = ( Distance < Tolerance );
+        
+        M = M_new;
+        S = S_new;
+    }
+
+    E_EM = M;
+    S_EM = S;
+
+    return( list( E_EM = E_EM, S_EM = S_EM, Recovered_Series = Y, CountLoop = CountLoop ) );
+}
\ No newline at end of file

Added: pkg/Meucci/R/QuantileMixture.R
===================================================================
--- pkg/Meucci/R/QuantileMixture.R	                        (rev 0)
+++ pkg/Meucci/R/QuantileMixture.R	2013-07-12 19:20:14 UTC (rev 2559)
@@ -0,0 +1,41 @@
+
+#' Computes the quantile of a mixture distirbution by linear interpolation/extrapolation of the cdf.the confidence 
+#' level p can be vector. If this vector is uniformly distributed on [0,1] the sample Q is distributed as the mixture.
+#' Described in A. Meucci "Risk and Asset Allocation", Springer, 2005
+#'
+#'  @param	p   : [scalar] in [0,1], probability
+#'	@param	a   : [scalar] in (0,1), mixing probability
+#'  @param	m_Y : [scalar] mean of normal component
+#'  @param  s_Y : [scalar] standard deviation of normal component
+#'  @param  m_Z : [scalar] first parameters of the log-normal component
+#'  @param  s_Z : [scalar] second parameter of the log-normal component
+#'  
+#'  @return	Q   : [scalar] quantile
+#'
+#' @references
+#' \url{http://symmys.com/node/170}
+#' See Meucci's script for "QuantileMixture.m"
+#'
+#' @author Xavier Valls \email{flamejat@@gmail.com}
+#' @export
+
+QuantileMixture = function( p, a, m_Y, s_Y, m_Z, s_Z )
+{
+	# compute first moment
+	m = a * m_Y + (1 - a) * exp( m_Z + 0.5 * s_Z * s_Z); 
+
+	# compute second moment
+	Ex2 = a * (m_Y^2 + s_Y^2) + (1 - a) * exp( 2 * m_Z + 2 * s_Z * s_Z);
+	s = sqrt( Ex2 - m * m );
+
+	# compute cdf on suitable range
+	X = m + 6 * s * seq( -1, 1, 0.001 );
+	F = a * pnorm( X,  m_Y, s_Y) + (1 - a) * plnorm(X, m_Z, s_Z);
+	X = X[!duplicated(F)];
+	F = unique(F);
+	# compute quantile by interpolation
+	Q = interp1( F, X, p, method = "linear");
+
+	return( Q );
+
+}
\ No newline at end of file

Added: pkg/Meucci/data/highYieldIndices.Rda
===================================================================
(Binary files differ)


Property changes on: pkg/Meucci/data/highYieldIndices.Rda
___________________________________________________________________
Added: svn:mime-type
   + application/octet-stream

Modified: pkg/Meucci/demo/S_EigenvalueDispersion.R
===================================================================
--- pkg/Meucci/demo/S_EigenvalueDispersion.R	2013-07-12 11:47:14 UTC (rev 2558)
+++ pkg/Meucci/demo/S_EigenvalueDispersion.R	2013-07-12 19:20:14 UTC (rev 2559)
@@ -5,7 +5,7 @@
 #'
 #' @references
 #' \url{http://symmys.com/node/170}
-#' See Meucci's script for "S_EigenValueDispersion.R"
+#' See Meucci's script for "S_EigenValueDispersion.m"
 #'
 #' @author Xavier Valls \email{flamejat@@gmail.com}
 

Added: pkg/Meucci/demo/S_Estimator.R
===================================================================
--- pkg/Meucci/demo/S_Estimator.R	                        (rev 0)
+++ pkg/Meucci/demo/S_Estimator.R	2013-07-12 19:20:14 UTC (rev 2559)
@@ -0,0 +1,143 @@
+#'This script script familiarizes the user with the evaluation of an estimator replicability, loss, error, bias and inefficiency
+#', as described in A. Meucci, "Risk and Asset Allocation", Springer, 2005,  Chapter 4.
+#'
+#' @references
+#' \url{http://symmys.com/node/170}
+#' See Meucci's script for "S_EigenValueprintersion.R"
+#'
+#' @author Xavier Valls \email{flamejat@@gmail.com}
+
+##################################################################################################################
+### Inputs
+T     = 52; # number of observations in time series
+Mu    = 0.1;
+Sigma = 0.2;
+    
+##################################################################################################################
+### Plain vanilla estimation
+# unknown functional of the distribution to be estimated, in this case the expected value
+G_fX = exp( Mu + 0.5 * Sigma^2 );
+print( G_fX );
+
+i_T = matrix( rlnorm( T, Mu, Sigma ), 1, T);  # series generated by "nature": do not know the distribution
+
+G_Hat_1 = function(X) X[ , 1 ] * X[ ,3 ]; # estimator of unknown functional G_1=x(1)*x(3)
+G_Hat_2 = function(X) apply( X, 1,mean);  # estimator of unknown functional G_1=sample mean
+
+G1 = G_Hat_1( i_T );
+G2 = G_Hat_2( i_T );
+print( G1 );
+print( G2 );
+
+##################################################################################################################
+### Replicability vs. "luck"
+# unknown functional of the distribution to be estimated, in this case the expected value
+G_fX = exp( Mu + 0.5 * Sigma^2 );
+
+nSim = 10000;
+I_T = matrix( rlnorm( nSim * T, Mu, Sigma ), nSim, T); # randomize series generated by "nature" to check replicability
+
+G1 = G_Hat_1( I_T ); # estimator of unknown functional G_1=x(1)*x(3)
+G2 = G_Hat_2( I_T ); # estimator of unknown functional G_2=sample mean
+
+Loss_G1 = (G1 - G_fX)^2;
+Loss_G2 = (G2 - G_fX)^2;
+
+Err_G1 = sqrt(mean(Loss_G1));
+Err_G2 = sqrt(mean(Loss_G2));
+
+Bias_G1 = abs(mean(G1) - G_fX);
+Bias_G2 = abs(mean(G2) - G_fX);
+
+Ineff_G1 = sd( G1 );
+Ineff_G2 = sd( G2 );
+
+##################################################################################################################
+### printlay results
+dev.new()
+NumBins = round( 10 * log( nSim ) );
+par( mfrow = c(2,1) );
+hist(G1, NumBins);
+points(G_fX, 0, pch = 21, bg = "red", main = "estimator: x(1)*x(3)");
+#set(h, 'markersize', 20, 'col', 'r');
+
+hist(G2, NumBins);
+points(G_fX, 0,  pch = 21, bg = "red", main = "estimator: sample mean" );
+#set(h, 'markersize', 20, 'col', 'r');
+
+
+# loss
+dev.new();
+par( mfrow = c(2,1) );
+hist(Loss_G1, NumBins,  main = "estimator: x(1)*x(3)");
+
+hist(Loss_G2, NumBins,  main = "estimator: sample mean" );
+
+
+##################################################################################################################
+### Stress test replicability
+Mus = seq( 0, 0.7, 0.1 );
+
+Err_G1sq   = NULL;
+Err_G2sq   = NULL;
+Bias_G1sq  = NULL;
+Bias_G2sq  = NULL;
+Ineff_G1sq = NULL;
+Ineff_G2sq = NULL;
+
+for( j in 1 : length(Mus) )
+{
+    Mu = Mus[ j ];
+
+    # unknown functional of the distribution to be estimated, in this case the expected value
+    G_fX = exp( Mu + 0.5 * Sigma^2);
+    I_T = matrix( rlnorm( nSim * T, Mu, Sigma ), nSim, T);  # randomize series generated by "nature" to check replicability
+
+    G1 = G_Hat_1(I_T);        # estimator of unknown functional G_1=x(1)*x(3)
+    G2 = G_Hat_2(I_T);        # estimator of unknown functional G_2=sample mean
+
+    Loss_G1 = ( G1 - G_fX )^2;
+    Loss_G2 = ( G2 - G_fX )^2;
+
+    Err_G1   = sqrt(mean(Loss_G1));
+    Err_G2   = sqrt(mean(Loss_G2));
+    Err_G1sq = cbind( Err_G1sq, Err_G1^2 ); ##ok<*AGROW> #store results
+    Err_G2sq = cbind( Err_G2sq, Err_G2^2 );
+
+    Bias_G1   = abs( mean( G1 )- G_fX );
+    Bias_G2   = abs( mean( G2 )- G_fX );
+    Bias_G1sq = cbind( Bias_G1sq, Bias_G1^2 ); #store results
+    Bias_G2sq = cbind( Bias_G2sq, Bias_G2^2 );
+
+    Ineff_G1   = sd(G1);
+    Ineff_G2   = sd(G2);
+    Ineff_G1sq = cbind(Ineff_G1sq, Ineff_G1^2); #store results
+    Ineff_G2sq = cbind(Ineff_G2sq, Ineff_G2^2);
+
+    dev.new();
+    NumBins = round(10*log(nSim));
+	par( mfrow = c(2,1) );
+	
+	hist(G1, NumBins);
+	points(G_fX, 0, pch = 21, bg = "red", main = "estimator: x(1)*x(3)");
+	
+	hist(G2, NumBins);
+	points(G_fX, 0,  pch = 21, bg = "red", main = "estimator: sample mean" );
+
+}
+
+dev.new();
+par( mfrow = c(2,1) );
+
+b = barplot(Bias_G1sq + Ineff_G1sq, col = "red", main = "stress-test of estimator: x(1)*x(3)");
+barplot( Ineff_G1sq, col="blue", add = TRUE);
+lines( b, Err_G1sq);
+legend( "topleft", 1.9, c( "bias²", "ineff²", "error²" ), col = c( "red","blue", "black" ),
+     lty=1, lwd=c(5,5,1),bg = "gray90" );
+
+
+b=barplot( Bias_G2sq + Ineff_G2sq , col = "red", main = "stress-test of estimator sample mean");
+barplot( Ineff_G2sq, col="blue", add = TRUE);
+lines(b, Err_G2sq);
+legend( "topleft", 1.9, c( "bias²", "ineff²", "error²" ), col = c( "red","blue", "black" ),
+     lty=1, lwd=c(5,5,1),bg = "gray90" );

Added: pkg/Meucci/demo/S_ExpectationMaximizationHighYield.R
===================================================================
--- pkg/Meucci/demo/S_ExpectationMaximizationHighYield.R	                        (rev 0)
+++ pkg/Meucci/demo/S_ExpectationMaximizationHighYield.R	2013-07-12 19:20:14 UTC (rev 2559)
@@ -0,0 +1,45 @@
+#' This script implements the Expectation-Maximization (EM) algoritm, which estimates the parameters of a 
+#' multivariate normal distribution when some observations are randomly missing, as described in A. Meucci,
+#' "Risk and Asset Allocation", Springer, 2005,  Chapter 4.
+#'
+#' @references
+#' \url{http://symmys.com/node/170}
+#' See Meucci's script for "S_ExpectationMaximizationHighYield.m"
+#'
+#' @author Xavier Valls \email{flamejat@@gmail.com}
+
+##################################################################################################################
+### Load data
+load("../data/highYieldIndices.Rda");
+
+##################################################################################################################
+### Compute invariants and set NaN for large values
+N  = ncol(highYieldIndices$Data);
+Series = log( highYieldIndices$Data[ -1, ] ) - log( highYieldIndices$Data[ -nrow(highYieldIndices$Data), ] );
+NANs_Index = which( abs( Series ) > (10 ^ 10) );
+Series[ NANs_Index ] = NaN;
+
+##################################################################################################################
+### Run EM algorithm
+FEM = FitExpectationMaximization(Series);
+
+##################################################################################################################
+### Display results
+dev.new();
+par( mfrow = c( N, 1 ) )
+for( n in 1 : N )
+{
+    Drop = is.nan( Series[  , n ] );
+    Bad_Dates = highYieldIndices$Dates[ Drop ];
+    
+    Keep = !is.nan( Series[ , n ] );
+    Good_Dates = highYieldIndices$Dates[ Keep ];
+    
+    plot(Good_Dates[1:(length(Good_Dates)-1)], Series[ Keep, n ], xaxt = "n", xlab = "", ylab = "");
+    points(Bad_Dates, FEM$Recovered_Series[ Drop, n ], pch = 21, bg = "red");
+    #axis( 1, at = highYieldIndices$Dates, labels=format(highYieldIndices$Dates,"%m/%d/%y"))  # Format x-axis
+    
+}
+legend( "bottom", 1.9, "EM-recovered data",  pch = 21, col = "red" ,bg = "gray90" );
+
+

Added: pkg/Meucci/man/FitExpectationMaximization.Rd
===================================================================
--- pkg/Meucci/man/FitExpectationMaximization.Rd	                        (rev 0)
+++ pkg/Meucci/man/FitExpectationMaximization.Rd	2013-07-12 19:20:14 UTC (rev 2559)
@@ -0,0 +1,33 @@
+\name{FitExpectationMaximization}
+\alias{FitExpectationMaximization}
+\title{Expectation-Maximization (EM) algorithm to recover missing observations in a time series ,
+as described in  A. Meucci, "Risk and Asset Allocation", Springer, 2005.}
+\usage{
+  FitExpectationMaximization(X)
+}
+\arguments{
+  \item{X}{: [matrix] (T x N) of data}
+}
+\value{
+  E_EM : [vector] (N x 1) expectation
+
+  S_EM : [matrix] (N x N) covariance matrix
+
+  Y : [matrix] (T x N) updated data
+
+  CountLoop : [scalar] number of iterations of the
+  algorithm
+}
+\description{
+  Expectation-Maximization (EM) algorithm to recover
+  missing observations in a time series , as described in
+  A. Meucci, "Risk and Asset Allocation", Springer, 2005.
+}
+\author{
+  Xavier Valls \email{flamejat at gmail.com}
+}
+\references{
+  \url{http://symmys.com/node/170} See Meucci's script for
+  "FitExpectationMaximization.m"
+}
+

Added: pkg/Meucci/man/QuantileMixture.Rd
===================================================================
--- pkg/Meucci/man/QuantileMixture.Rd	                        (rev 0)
+++ pkg/Meucci/man/QuantileMixture.Rd	2013-07-12 19:20:14 UTC (rev 2559)
@@ -0,0 +1,43 @@
+\name{QuantileMixture}
+\alias{QuantileMixture}
+\title{Computes the quantile of a mixture distirbution by linear interpolation/extrapolation of the cdf.the confidence
+level p can be vector. If this vector is uniformly distributed on [0,1] the sample Q is distributed as the mixture.
+Described in A. Meucci "Risk and Asset Allocation", Springer, 2005}
+\usage{
+  QuantileMixture(p, a, m_Y, s_Y, m_Z, s_Z)
+}
+\arguments{
+  \item{p}{: [scalar] in [0,1], probability}
+
+  \item{a}{: [scalar] in (0,1), mixing probability}
+
+  \item{m_Y}{: [scalar] mean of normal component}
+
+  \item{s_Y}{: [scalar] standard deviation of normal
+  component}
+
+  \item{m_Z}{: [scalar] first parameters of the log-normal
+  component}
+
+  \item{s_Z}{: [scalar] second parameter of the log-normal
+  component}
+}
+\value{
+  Q : [scalar] quantile
+}
+\description{
+  Computes the quantile of a mixture distirbution by linear
+  interpolation/extrapolation of the cdf.the confidence
+  level p can be vector. If this vector is uniformly
+  distributed on [0,1] the sample Q is distributed as the
+  mixture. Described in A. Meucci "Risk and Asset
+  Allocation", Springer, 2005
+}
+\author{
+  Xavier Valls \email{flamejat at gmail.com}
+}
+\references{
+  \url{http://symmys.com/node/170} See Meucci's script for
+  "QuantileMixture.m"
+}
+