[Uwgarp-commits] r21 - in pkg/GARPFRM: sandbox vignettes

Wed Nov 27 01:26:17 CET 2013

Author: rossbennett34
Date: 2013-11-27 01:26:16 +0100 (Wed, 27 Nov 2013)
New Revision: 21

Added:
   pkg/GARPFRM/vignettes/RB.Rnw
   pkg/GARPFRM/vignettes/RB.pdf
Modified:
   pkg/GARPFRM/sandbox/regression_example.R
Log:
Adding vignette for first few sections of quantitative analysis book

Modified: pkg/GARPFRM/sandbox/regression_example.R
===================================================================

--- pkg/GARPFRM/sandbox/regression_example.R	2013-11-26 23:58:26 UTC (rev 20)
+++ pkg/GARPFRM/sandbox/regression_example.R	2013-11-27 00:26:16 UTC (rev 21)
@@ -59,10 +59,10 @@
 # Sample quantiles of SPY returns
 quantile(SPY.ret, probs=c(0, 0.25, 0.5, 0.75, 1))
 
-# Sample correlation of SPY returns
+# Sample correlation of returns
 cor(returns)
 
-# Sample covariance of SPY returns
+# Sample covariance of returns
 cov(returns)
 
 # Distributions
@@ -105,6 +105,9 @@
 p_value <- 2 * pt(q=-abs(t_stat), df=462)
 df <- nrow(SPY.ret) - 1
 ci <- mean(SPY.ret) + c(-1, 1) * 1.96 * sd(SPY.ret) / sqrt(nrow(SPY.ret))
+paste("t = ", round(t_stat, 4), ", df = ", df, ", p-value = ", round(p_value, 4), sep="")
+print("95% Confidence Interval")
+print(ci)
 
 ##### Regression #####
 # Signle Regressor
@@ -168,21 +171,25 @@
 
 # Align the dates of the Fama-French Factors and the returns
 returns <- returns['/2013-10-25']
+# Omit the first column of returns because it is the SPY weekly returns, which is
+# a proxy for the market.
 returns <- returns[, -1]
 AAPL.ret <- returns[, "AAPL"]
 
+# AAPL excess returns
 AAPL.e <- AAPL.ret - fama_french_factors[, "RF"] / 100
 
+# Fit the model
 ff.fit <- lm(AAPL.e ~ ff_factors)
 print(ff.fit)
 summary(ff.fit)
 
 # Fit the Fama-French 3 Factor Model to all the assets in the returns object
-# Excess returns
+# Calculate the excess returns of all assets in the returns object
 ret.e <- returns - (fama_french_factors[, "RF"] / 100) %*% rep(1, ncol(returns))
 ff.fit <- lm(ret.e ~ ff_factors)
 print(ff.fit)
-summary(ff.fit)
+print(summary(ff.fit))
 
 
 beta0 <- coef(ff.fit)[1,]

Added: pkg/GARPFRM/vignettes/RB.Rnw
===================================================================
--- pkg/GARPFRM/vignettes/RB.Rnw	                        (rev 0)
+++ pkg/GARPFRM/vignettes/RB.Rnw	2013-11-27 00:26:16 UTC (rev 21)
@@ -0,0 +1,337 @@
+\documentclass[a4paper]{article}
+\usepackage[OT1]{fontenc}
+\usepackage{Sweave}
+\usepackage{Rd}
+\usepackage{amsmath}
+\usepackage{hyperref}
+\usepackage{url}
+\usepackage[round]{natbib}
+\usepackage{bm}
+\usepackage{verbatim}
+\usepackage[latin1]{inputenc}
+\bibliographystyle{abbrvnat}
+
+\let\proglang=\textsf
+%\newcommand{\pkg}[1]{{\fontseries{b}\selectfont #1}}
+%\newcommand{\R}[1]{{\fontseries{b}\selectfont #1}}
+%\newcommand{\email}[1]{\href{mailto:#1}{\normalfont\texttt{#1}}}
+%\newcommand{\E}{\mathsf{E}}
+%\newcommand{\VAR}{\mathsf{VAR}}
+%\newcommand{\COV}{\mathsf{COV}}
+%\newcommand{\Prob}{\mathsf{P}}
+
+\renewcommand{\topfraction}{0.85}
+\renewcommand{\textfraction}{0.1}
+\renewcommand{\baselinestretch}{1.5}
+\setlength{\textwidth}{15cm} \setlength{\textheight}{22cm} \topmargin-1cm \evensidemargin0.5cm \oddsidemargin0.5cm
+
+\usepackage[latin1]{inputenc}
+% or whatever
+
+\usepackage{lmodern}
+\usepackage[T1]{fontenc}
+% Or whatever. Note that the encoding and the font should match. If T1
+% does not look nice, try deleting the line with the fontenc.
+
+\begin{document}
+
+\title{Exploratory Data Analysis, basic probability and statistics}
+\author{Ross Bennett}
+
+\maketitle
+
+\begin{abstract}
+The goal of this vignette is to demonstrate key concepts in Financial Risk Manager (FRM (R)) Part 1: Quantitative Analysis using R and the GARPFRM package. This vignette will cover exploratory data analysis, basic probability and statistics, and linear regression.
+\end{abstract}
+
+\tableofcontents
+
+\section{Exploratory Data Analysis}
+
+Load the GARPFRM package and the \verb"returns" dataset. The \verb"returns" dataset includes weekly returns for SPY, AAPL, XOM, GOOG, MSFT, and GE from 2005-01-14 to 2013-11-22.
+<<>>=
+library(GARPFRM)
+data(returns)
+@
+
+The exploratory data analysis, basic probability and statistics will use the SPY weekly returns.
+<<>>=
+SPY.ret <- returns[, "SPY"]
+@
+
+Plot of the SPY weekly returns. 
+<<>>=
+plot(SPY.ret, main="SPY Weekly Returns")
+@
+
+The density of the SPY weekly returns is plotted to better understand its distribution. A normal density is overlayed on the plot with standard estimates of the sample mean and standard deviation. Another normal density is overlayed using robust estimates. It is clear from the chart that the robust estimates provide a better fit than the standard estimates of the sample mean and sample standard deviation, but it is not clear if the SPY returns are normally distributed.
+<<>>=
+# Plot the density of SPY Weekly Returns
+plot(density(SPY.ret), main="Density of SPY Weekly Returns")
+rug(SPY.ret)
+# sample estimates
+curve(dnorm(x, mean=mean(SPY.ret), sd=sd(SPY.ret)), 
+      add=TRUE, col="red", lty=2, lwd=2)
+# robust estimates
+curve(dnorm(x, mean=median(SPY.ret), sd=mad(SPY.ret)), 
+      add=TRUE, col="blue", lty=2, lwd=2)
+legend("topleft", legend=c("estimated density", "normal density", "robust normal density"), 
+       col=c("black", "red", "blue"), lty=c(1, 2, 2), bty="n", cex=0.8)
+@
+
+Quantile-Quantile plot of SPY weekly returns. It can be seen from the Normal Q-Q plot that the SPY returns have "fat tails".
+<<>>=
+qqnorm(SPY.ret)
+qqline(SPY.ret)
+@
+
+We can test if the SPY weekly returns came from a normal distribution using the Shapiro-Wilk test of normality. The null hypothesis is that the data came from a normal distribution. The p-value is very small and we can reject the null hypothesis.
+<<>>=
+shapiro.test(coredata(SPY.ret))
+@
+
+\subsection{Basic Statistics}
+Here we calculate some basic statisitics on the SPY weekly returns.
+<<>>=
+# Sample mean of SPY return
+mean(SPY.ret)
+
+# Sample Variance of SPY returns
+var(SPY.ret)
+
+# Sample standard deviation of SPY returns
+sd(SPY.ret)
+
+# Standard error of SPY returns
+sd(SPY.ret) / sqrt(nrow(SPY.ret))
+
+# Sample skewness of SPY returns.
+# See ?skewness for additional methods for calculating skewness
+skewness(SPY.ret, method="sample")
+
+# Sample kurtosis of SPY returns.
+# See ?kurtosis for additional methods for calculating kurtosis
+kurtosis(SPY.ret, method="sample")
+
+# Summary statistics of SPY returns
+summary(SPY.ret)
+
+# Sample quantiles of SPY returns
+quantile(SPY.ret, probs=c(0, 0.25, 0.5, 0.75, 1))
+
+# Sample correlation of returns
+cor(returns)
+
+# Sample covariance of returns
+cov(returns)
+@
+
+\subsection{Distributions}
+R has functions to compute the density, distribution function, quantile, and random number generation for several distributions. The continuous distributions covered in chapter 1 are listed here.
+\begin{itemize}
+\item Normal Distribution: \verb"dnorm", \verb"pnorm", \verb"qnorm", \verb"rnorm"
+
+\item Chi-Squared Distribution: \verb"dchisq", \verb"pchisq", \verb"qchisq", \verb"rchisq"
+
+\item Student t Distribution: \verb"dt", \verb"pt", \verb"qt", \verb"rt"
+
+\item F Distribution: \verb"df", \verb"pf", \verb"qf", \verb"rf"
+\end{itemize}
+
+In general, the functions are as follows:
+\begin{itemize}
+\item d*: density
+\item p*: distribution function (probability)
+\item q*: quantile function
+\item r*: random generation
+\end{itemize}
+where * is the appropriate distribution.
+
+Here we demonstrate these functions for the normal distribution.
+
+Use dnorm to plot the pdf of a standard normal distribution
+<<>>=
+curve(dnorm(x), from=-4, to=4, main="Standard Normal pdf")
+@
+
+Calculate the probability that $Y \leq 2$ when $Y$ is distributed $N(1, 4)$ with mean of 1 and variance of 4.
+<<>>=
+pnorm(q=2, mean=1, sd=2)
+# Normalize as is done in the book
+pnorm(q=0.5)
+@
+
+Quantile function of a standard normal at probability 0.975.
+<<>>=
+qnorm(p=0.975)
+@
+
+Generate 10 random numbers from a normal distribution with mean 0.0015 and standard deviation 0.025.
+<<>>=
+# Set the seed for reproducible results
+set.seed(123)
+rnorm(n=10, mean=0.0015, sd=0.025)
+@
+
+\subsection{Hypothesis Test}
+The null hypothesis is that the true mean return of SPY is equal to 0
+<<>>=
+t.test(x=SPY.ret, alternative="two.sided", mu=0)
+
+# Replicate the results of t.test using the method outlined in the book
+t_stat <- (mean(SPY.ret) - 0) / (sd(SPY.ret) / sqrt(nrow(SPY.ret)))
+p_value <- 2 * pt(q=-abs(t_stat), df=462)
+df <- nrow(SPY.ret) - 1
+ci <- mean(SPY.ret) + c(-1, 1) * 1.96 * sd(SPY.ret) / sqrt(nrow(SPY.ret))
+paste("t = ", round(t_stat, 4), ", df = ", df, ", p-value = ", round(p_value, 4), sep="")
+print("95% Confidence Interval")
+print(ci)
+@
+
+\section{Regression}
+\subsection{Regression with a single regressor}
+
+Extract the weekly returns of AAPL and SPY from the returns object. The returns of AAPL and SPY will be used to demonstrate linear regression in R.
+<<>>=
+AAPL.ret <- returns[, "AAPL"]
+SPY.ret <- returns[, "SPY"]
+@
+
+Scatterplot of AAPL and SPY returns.
+<<>>=
+plot(x=coredata(SPY.ret), y=coredata(AAPL.ret), 
+     xlab="SPY returns", ylab="AAPL returns")
+@
+
+Fit the linear regression model. \verb"AAPL.ret" is the response variable and \verb"SPY.ret" is the explanatory variable.
+<<>>=
+model.fit <- lm(AAPL.ret ~ SPY.ret)
+@
+
+The \verb"print" and \verb"summary" methods for \verb"lm" objects are very useful and provide several of the statistics covered in the book.
+<<>>=
+# The print method displays the call and the coefficients of the linear model
+print(model.fit)
+
+# The summary method displays additional information for the linear model
+model.summary <- summary(model.fit)
+print(model.summary)
+@
+
+Access elements of the \verb"lm" object
+<<>>=
+# Coefficients
+coef(model.fit)
+# Extract the fitted values
+# fitted(model.fit)
+# Extract the residuals
+# resid(model.fit)
+# Exctract the standardized residuals
+# rstandard(model.fit)
+@
+
+Access elements of the \verb"lm.summary" object
+<<>>=
+# Coefficients
+coef(model.summary)
+# Sigma
+model.summary$sigma
+# R squared
+model.summary$r.squared
+# Adjusted R squared
+model.summary$adj.r.squared
+@
+
+Use the \verb"predict" method to calculate the confidence and prediction intervals of the fitted model.
+<<>>=
+model.ci <- predict(object=model.fit, interval="confidence")
+model.pi <- predict(object=model.fit, interval="prediction")
+@
+
+Plot the residuals of the model.
+<<>>=
+plot(resid(model.fit), type="h")
+@
+
+Plot the fitted model with the confidence and prediction intervals.
+<<tidy=FALSE>>=
+plot(x=coredata(SPY.ret), y=coredata(AAPL.ret), 
+     xlab="SPY returns", ylab="AAPL returns")
+abline(model.fit, col="red")
+lines(x=coredata(SPY.ret), y=model.ci[, "upr"], col="blue", lty=1)
+lines(x=coredata(SPY.ret), y=model.ci[, "lwr"], col="blue", lty=1)
+lines(x=coredata(SPY.ret), y=model.pi[, "upr"], col="red", lty=2)
+lines(x=coredata(SPY.ret), y=model.pi[, "lwr"], col="red", lty=2)
+@
+
+\subsection{Regression with multiple regressors}
+The Fama French 3 Factor model is used to demonstrate regression with multiple regressors. The first example will use AAPL weekly returns and the Fama French factors from 2005-01-14 to 2013-10-25. The premise of the model is that AAPL returns can be explained by the 3 factors of the Fama French model.
+
+<<>>=
+data(fama_french_factors)
+
+# The first 3 columns are the factors, the 4th column is the risk free rate.
+ff_factors <- fama_french_factors[, 1:3]
+@
+
+Prepare the data for the model.
+<<>>=
+# Align the dates of the Fama-French Factors and the returns
+returns <- returns['/2013-10-25']
+AAPL.ret <- returns[, "AAPL"]
+
+# AAPL excess returns
+AAPL.e <- AAPL.ret - fama_french_factors[, "RF"] / 100
+@
+
+Fit the model.
+<<>>=
+ff.fit <- lm(AAPL.e ~ ff_factors)
+print(ff.fit)
+print(summary(ff.fit))
+@
+
+If we wanted to fit the model to more assets, we could manually fit the model with different assets as the response variable. However, we can automatically fit several models very easily with R.
+
+<<>>=
+# Omit the first column of returns because it is the SPY weekly returns, which is
+# a proxy for the market.
+returns <- returns[, -1]
+
+# Calculate the excess returns of all assets in the returns object
+ret.e <- returns - (fama_french_factors[, "RF"] / 100) %*% rep(1, ncol(returns))
+@
+
+The \verb"ret.e" object contains the excess returns for AAPL, XOM, GOOG, MSFT, and GE.
+<<>>=
+# Show the first 5 rows of ret.e
+head(ret.e, 5)
+@
+
+Here we fit the Fama French 3 Factor model to each asset in \verb"ret.e". This fits 5 models, 1 for each asset, and stores results of each model in the \verb"ff.fit" object as a multiple linear model (mlm) object.
+<<>>=
+ff.fit <- lm(ret.e ~ ff_factors)
+# Display the coefficients of each model
+print(ff.fit)
+# Display the summary object for each model
+print(summary(ff.fit))
+@
+
+Extract and plot the beta values and the R squared values for each asset.
+<<>>=
+beta0 <- coef(ff.fit)[1,]
+beta1 <- coef(ff.fit)[2,]
+beta2 <- coef(ff.fit)[3,]
+beta3 <- coef(ff.fit)[4,]
+rsq <- sapply(X=summary(ff.fit), FUN=function(x) x$r.squared)
+names(rsq) <- colnames(ret.e)
+
+par(mfrow=c(2,2))
+barplot(beta1, main="Beta for Market-RF", col=c(2:6))
+barplot(beta2, main="Beta for SMB", col=c(2:6))
+barplot(beta3, main="Beta for HML", col=c(2:6))
+barplot(rsq, main="R Squared Values", col=c(2:6))
+@
+
+
+\end{document}
\ No newline at end of file

Added: pkg/GARPFRM/vignettes/RB.pdf
===================================================================
(Binary files differ)


Property changes on: pkg/GARPFRM/vignettes/RB.pdf
___________________________________________________________________
Added: svn:mime-type
   + application/octet-stream