[Returnanalytics-commits] r3657 - in pkg/FactorAnalytics: . R man vignettes
noreply at r-forge.r-project.org
noreply at r-forge.r-project.org
Fri May 29 05:26:25 CEST 2015
Author: pragnya
Date: 2015-05-29 05:26:24 +0200 (Fri, 29 May 2015)
New Revision: 3657
Modified:
pkg/FactorAnalytics/DESCRIPTION
pkg/FactorAnalytics/R/fitTsfm.R
pkg/FactorAnalytics/R/fitTsfm.control.R
pkg/FactorAnalytics/R/fmmc.R
pkg/FactorAnalytics/R/plot.sfm.r
pkg/FactorAnalytics/R/plot.tsfm.r
pkg/FactorAnalytics/man/fitTsfm.control.Rd
pkg/FactorAnalytics/vignettes/fitSfm_vignette.pdf
pkg/FactorAnalytics/vignettes/fitTsfm_vignette.pdf
Log:
Edits to lasso var selection, opt.method for st.mple, fix bug in fmmc
Modified: pkg/FactorAnalytics/DESCRIPTION
===================================================================
--- pkg/FactorAnalytics/DESCRIPTION 2015-05-28 21:14:29 UTC (rev 3656)
+++ pkg/FactorAnalytics/DESCRIPTION 2015-05-29 03:26:24 UTC (rev 3657)
@@ -1,8 +1,8 @@
Package: factorAnalytics
Type: Package
Title: Factor Analytics
-Version:2.0.20
-Date:2015-05-19
+Version:2.0.21
+Date:2015-05-28
Author: Eric Zivot, Sangeetha Srinivasan and Yi-An Chen
Maintainer: Sangeetha Srinivasan <sangee at uw.edu>
Description: An R package for the estimation and risk analysis of linear factor
Modified: pkg/FactorAnalytics/R/fitTsfm.R
===================================================================
--- pkg/FactorAnalytics/R/fitTsfm.R 2015-05-28 21:14:29 UTC (rev 3656)
+++ pkg/FactorAnalytics/R/fitTsfm.R 2015-05-29 03:26:24 UTC (rev 3657)
@@ -192,7 +192,7 @@
m5 <- match(c("type","normalize","eps","max.steps","trace"),
names(control), 0L)
lars.args <- control[m5, drop=TRUE]
- m6 <- match(c("K","type","normalize","eps","max.steps","trace"),
+ m6 <- match(c("K","type","normalize","eps","max.steps","trace","plot.it"),
names(control), 0L)
cv.lars.args <- control[m6, drop=TRUE]
@@ -404,30 +404,30 @@
lars.fit <- do.call(lars, c(list(x=xmat, y=yvec),lars.args))
lars.sum <- summary(lars.fit)
lars.cv <- do.call(cv.lars, c(list(x=xmat,y=yvec,mode="step"),cv.lars.args))
- # including plot.it=FALSE to cv.lars strangely gives an error: "Argument s
- # out of range". And, specifying index=seq(nrow(lars.fit$beta)-1) resolves
- # the issue, but care needs to be taken for small N
# get the step that minimizes the "Cp" statistic or
# the K-fold "cv" mean-squared prediction error
if (lars.criterion=="Cp") {
- s <- which.min(lars.sum$Cp)-1 # 2nd row is "step 1"
+ s <- which.min(lars.sum$Cp)
} else {
- s <- which.min(lars.cv$cv)-1
+ s <- which.min(lars.cv$cv)
}
# get factor model coefficients & fitted values at the step obtained above
coef.lars <- predict(lars.fit, s=s, type="coef", mode="step")
+ # alternately: coef.lars <- lars.fit[s, ]
fitted.lars <- predict(lars.fit, xmat, s=s, type="fit", mode="step")
fitted.list[[i]] <- xts(fitted.lars$fit, index(reg.xts))
# extract and assign the results
asset.fit[[i]] = lars.fit
- alpha[i] <- (fitted.lars$fit -
- reg.xts[,factor.names]%*%coef.lars$coefficients)[1]
beta.names <- names(coef.lars$coefficients)
beta[i, beta.names] <- coef.lars$coefficients
- r2[i] <- lars.fit$R2[s+1]
- resid.sd[i] <- sqrt(lars.sum$Rss[s+1]/(nrow(reg.xts)-lars.sum$Df[s+1]))
-
+ alpha[i] <- predict(lars.fit, matrix(0,1,length(beta.names)), s=s,
+ type="fit", mode="step")$fit
+ # alternately: alpha[i] <-
+ # (fitted.lars$fit - reg.xts[,factor.names]%*%coef.lars$coefficients)[1]
+ r2[i] <- lars.fit$R2[s]
+ resid.sd[i] <- sqrt(lars.sum$Rss[s]/(nrow(reg.xts)-sum(!beta[i,]==0)))
+ # according to summary.lars help files, $df is tricky for some models
}
if (length(asset.names)>1) {
fitted.xts <- do.call(merge, fitted.list)
Modified: pkg/FactorAnalytics/R/fitTsfm.control.R
===================================================================
--- pkg/FactorAnalytics/R/fitTsfm.control.R 2015-05-28 21:14:29 UTC (rev 3656)
+++ pkg/FactorAnalytics/R/fitTsfm.control.R 2015-05-29 03:26:24 UTC (rev 3657)
@@ -103,6 +103,8 @@
#' during any step, variables are frequently droppped and added as the
#' algorithm proceeds. Although the default usually guarantees that the
#' algorithm has proceeded to the saturated fit, users should check.
+#' @param plot.it option to plot the output for \code{\link[lars]{cv.lars}}.
+#' Default is \code{FALSE}.
#' @param lars.criterion an option to assess model selection for the
#' \code{"lars"} method; one of "Cp" or "cv". See details. Default is "Cp".
#' @param K number of folds for computing the K-fold cross-validated mean
@@ -143,7 +145,7 @@
steps=1000, k=2, nvmin=1, nvmax=8, force.in=NULL,
force.out=NULL, method, really.big=FALSE, type,
normalize=TRUE, eps=.Machine$double.eps, max.steps,
- lars.criterion="Cp", K=10) {
+ plot.it=FALSE, lars.criterion="Cp", K=10) {
# get the user-specified arguments (that have no defaults)
# this part of the code was adapted from stats::lm
@@ -177,6 +179,12 @@
if (!is.logical(really.big) || length(really.big) != 1) {
stop("Invalid argument: control parameter 'really.big' must be logical")
}
+ if (!is.logical(normalize) || length(normalize) != 1) {
+ stop("Invalid argument: control parameter 'normalize' must be logical")
+ }
+ if (!is.logical(plot.it) || length(plot.it) != 1) {
+ stop("Invalid argument: control parameter 'plot.it' must be logical")
+ }
if (nvmin <= 0 || round(nvmin) != nvmin) {
stop("Control parameter 'nvmin' must be a positive integer")
}
@@ -197,6 +205,6 @@
trace=trace, steps=steps, k=k, nvmin=nvmin,
nvmax=nvmax, force.in=force.in, force.out=force.out,
really.big=really.big, normalize=normalize, eps=eps,
- lars.criterion=lars.criterion, K=K))
+ plot.it=plot.it, lars.criterion=lars.criterion, K=K))
return(result)
}
Modified: pkg/FactorAnalytics/R/fmmc.R
===================================================================
--- pkg/FactorAnalytics/R/fmmc.R 2015-05-28 21:14:29 UTC (rev 3656)
+++ pkg/FactorAnalytics/R/fmmc.R 2015-05-29 03:26:24 UTC (rev 3657)
@@ -1,332 +1,333 @@
-#' @title Functions to compute estimates and thier standard errors using fmmc
-#'
-#' Control default arguments. Usually for factorAnalytics.
-#'
-#' @details
-#' This method takes in the additional arguments list and checks if parameters
-#' are set. Then it defaults values if they are unset. Currently it controls the
-#' fit.method(default: OLS) and variable.selection(default: subsets). If
-#' variable.selection is set to values other than subsets/none then it will
-#' default to subsets.
-#' arguments for factorAnalytics
-#'
-#' @param ... Arguments that must be passed to fitTsfm
-#'
-#'
-.fmmc.default.args <- function(...) {
- add.args <- list(...)
- if(!"fit.method" %in% names(add.args)) add.args[["fit.method"]] <- "LS"
-
- if(!"variable.selection" %in% names(add.args))
- add.args[["variable.selection"]] <- "subsets"
- else {
- if(!add.args[["variable.selection"]] %in% c("none", "subsets"))
- add.args[["variable.selection"]] <- "subsets"
- }
-
- if (add.args[["variable.selection"]] == "subsets") {
- if(!"nvmax" %in% names(add.args))
- add.args[["nvmax"]] <- NA
- }
-
- add.args
-}
-
-#' This is the main implementation of the Factor Model Monte Carlo method. It returns
-#' a fmmc object that contains the joint empirical density of factors and returns. This
-#' fmmc object can be reused to for calucluting risk and performance estimates along
-#' with standard errors for the estimates
-#'
-#' @details
-#' This method takes in data, factors and residual type. It then does the following
-#' 1. Fit a time series factor model to the data using user supplied selection and
-#' fit variables or it defaults them to stepwise and OLS respectively. If any
-#' of the betas are NA then the corresponding factors are dropped
-#' 2. If the residual type beisdes empirical is specified then it fits the
-#' corresponding distribution to the residuals and simulates from the fitted
-#' distribution. The number of NA's in the simulated sample are the same as
-#' original residuals.
-#' 3. It then merges factors and non-NA residuals for each asset to create a full
-#' outer join of the factors and residuals. We use this joined data to create new
-#' simulated returns. Returns together with factors define a joint emperical density.
-#'
-#' @param R single vector of returns
-#' @param factors matrix of factor returns
-#' @param ... allows passing paramters to factorAnalytics.
-#' @author Rohit Arora
-#'
-#'
-.fmmc.proc <- function(R, factors ,... ) {
-
- # Check if the classes of Returns and factors are correct
- if(is.null(nrow(R)) || is.null(nrow(factors))) {
- warning("Inputs are not matrix")
- return(NA)
- }
-
- factors.data <- na.omit(factors)
- T <- nrow(factors.data); T1 <- nrow(R)
- if (T < T1) {
- warning("Length of factors cannot be less than assets")
- return(NA)
- }
-
- # Start getting ready to fit a time-series factor model to the data.
- .data <- as.matrix(merge(R,factors.data))
-
- #default args if not set
- add.args <- .fmmc.default.args(...)
- fit.method <- add.args[["fit.method"]]
- variable.selection <- add.args[["variable.selection"]]
-
- if(variable.selection == "subsets" && is.na(add.args[["nvmax"]]))
- add.args[["nvmax"]] <- floor((ncol(factors.data) - 1)/2)
-
- # Lets fit the time-series model
- args <- list(asset.names=colnames(R),
- factor.names=colnames(factors.data), data=.data)
-
- args <- merge.list(args,add.args)
-
- # We do not need to remove NA's. Beta's do no change if NA's are not removed
- possibleError <- tryCatch(
- fit <- do.call(fitTsfm, args),
- error=function(e)
- e)
-
- if(inherits(possibleError, "error")) {
- warning(paste("Timeseries model fitting failed for ", colnames(R)))
- return(NA)
- }
-
- resid <- do.call(merge,lapply(lapply(fit$asset.fit,residuals),as.xts))
- beta <- t(fit$beta)
-
- if(any(is.na(beta))) {
- warning("some of the betas where NA in .fmmc.proc. Dropping those")
- beta <- beta[!is.na(c(beta)), 1, drop=FALSE]
- names.factors <- colnames(factors.data)
- names.beta <- rownames(beta)
- factors.data <- as.matrix(factors.data[,names.factors %in% names.beta])
- }
-
- # define a joint empirical density for the factors and residuals and use
- # that to calculate the returns.
- .data <- as.matrix(merge(as.matrix(factors.data), resid))
- alpha <- matrix(as.numeric(fit$alpha), nrow=nrow(.data), ncol=1, byrow=TRUE)
-
- returns <- alpha + .data[,-ncol(.data),drop=FALSE] %*% beta +
- .data[,ncol(.data),drop=FALSE]
-
- result <- list(bootdist = list(returns = returns,
- factors = .data[,-ncol(.data),drop=FALSE]),
- data = list(R = R, factors = factors.data), args = add.args)
- result
-}
-
-#' Statistic function for the boot call. It calculates the risk or performnace
-#' meeasure by using the estimatation function in its argument list.
-#'
-#' @details
-#' This method works as follows.
-#' 1. Get data with factors and returns.
-#' 2. Subset T rows from the data.
-#' 3. Discard first TR-TR1 of the asset returns by setting them to NA
-#' 4. calls .fmmc.proc method over the new data set to get a new joint empirical
-#' distribution of returns and factors
-#' 5. We use the new returns with the estimation function to calculate the
-#' risk or performance measure.
-#'
-#' @param data matrix of (all factors + returns of just 1 asset)
-#' @param indices row numbers generated by boot
-#' @param args additinal paramters needed for subsetting the data and calulating
-#' the perfomance/risk measure.
-#' @author Rohit Arora
-#'
-#'
-.fmmc.boot <- function(data, indices, args) {
-
- TR <- args$TR
- TR1 <- args$TR1
- estimate.func <- args$estimate.func
- fit.method <- args$fit.method
- var.sel <- args$var.sel
-
- fun <- match.fun(estimate.func)
-
- # we just need TR rows of data
- ind <- sample(indices, TR , replace = TRUE)
- data <- data[ind,]
-
- # discard the first (TR-TR1) portion of returns if using fmmc. For
- # complete data TR = TR1
- .data <- data
- .data[1:(TR-TR1),ncol(.data)] <- NA
-
- # If the data does not have dates then it cannot be transformed to xts.
- # So lets fake dates to make xts happy
- .data <- as.xts(.data , order.by=seq(as.Date("1980/1/1"), by = "day",
- length.out = nrow(.data)))
-
- # lets get a new empirical distribution of factors and returns for a new subset
- fmmcObj <- .fmmc.proc(R=.data[,ncol(.data),drop=FALSE],
- factors=.data[,-ncol(.data)],
- fit.method = fit.method, variable.selection = var.sel)
-
- # lets calculate the performance or risk estimate
- measure <- fun(fmmcObj$bootdist$returns)
- measure
-}
-
-#' Main function to calculate the risk/performance estimate and calculate the
-#' standard error of the estimate using bootstrapping.
-#'
-#' @details
-#' bootstrapping in our case can be painfully slow, so we exploit the parallel
-#' capabilities of boot function. All cores on your machine are used.
-#' We use the boot call from the boot library for calculating the estimate and
-#' its standard error.
-#'
-#' @param fmmcObj object returned by fmmc proc. This is a comprehensive object
-#' with all data for factors and returns.
-#' @param nboot number of bootstap samples. Not sure how many repetations are
-#' reuired but remember bias-variance tradeoff. Increasing nboot will only
-#' reduce variance and not have a significant effect on bias(estimate)
-#' @param estimate.func this is a handle to the function used for calulating
-#' the perfomance/risk measure.
-#' @param cl A cluster for running across multiple cores
-#' @author Rohit Arora
-#'
-#'
-.fmmc.se <- function(fmmcObj, nboot = 50, estimate.func, cl = NULL) {
-
- parallel <- if(is.null(cl)) "no" else "snow"
- ncpus <- if(is.null(cl)) 1 else detectCores()
-
- # length of factors
- TR <- nrow(fmmcObj$data$factors)
-
- # length of the asset returns
- len <- nrow(fmmcObj$data$R) -
- apply(fmmcObj$data$R, 2, function(col) which.min(is.na(col))) + 1
-
- returns <- fmmcObj$bootdist$returns
- factors <- fmmcObj$bootdist$factors
-
- # no need to do variable selection again. So lets turn it off
- args <- list(TR = TR, TR1 = len, estimate.func = estimate.func,
- fit.method = fmmcObj$args[["fit.method"]], var.sel = "none")
-
- result <- boot(data=cbind(factors, returns), statistic = .fmmc.boot,
- R = nboot, parallel = parallel, ncpus = ncpus, cl = cl, args=args)
-
- se <- apply(result$t,2,sd)
- se
-}
-
-#' Worker function that acts between the fmmc procedure and calling method.
-#'
-#' @details
-#' This method takes in data as single time series and factors as xts objects
-#' It then calls the actual estimation procedure.
-#'
-#' @param R single vector of returns
-#' @param factors matrix of factor returns
-#' @param ... allows passing paramters to factorAnalytics.
-#' @author Rohit Arora
-#'
-#'
-#'
-.fmmc.worker <- function(R, factors, ...) {
- fmmc.obj <- .fmmc.proc(R=R, factors=factors, ...)
- fmmc.obj
-}
-
-#' Compute fmmc objects that can be used for calcuation of estimates and their
-#' standard errors
-#'
-#' @details
-#' This method takes in data and factors as xts objects where multiple
-#' time series with different starting dates are merged together. It then
-#' computes FMMC objects as described in Jiang and Martin (2013)
-#'
-#' @param R matrix of returns in xts format
-#' @param factors matrix of factor returns in xts format
-#' @param parallel flag to utilize multiplecores on the cpu. All cores are used.
-#' @param ... Arguments that must be passed to fitTsfm
-#'
-#' @return returns an list of fmmc objects
-#'
-#' @references
-#' Yindeng Jiang and Richard Doug Martin. Better Risk and Performance
-#' Estimates with Factor Model Monte Carlo. SSRN Electronic Journal, July 2013.
-#'
-#' @author Rohit Arora
-#' @export
-#'
-#'
-fmmc <- function(R, factors, parallel=FALSE, ...) {
-
- ret <- NA
- assets.count <- ncol(R)
-
- if (parallel) {
- cl <- makeCluster(detectCores())
- registerDoSNOW(cl)
- ret <- foreach (i = 1:assets.count) %dopar% .fmmc.worker(R[,i], factors, ...)
- stopCluster(cl)
- } else
- ret <- foreach (i = 1:assets.count) %do% .fmmc.worker(R[,i], factors, ...)
-
- result <- ret[lapply(ret,length) > 1]
- result
-}
-
-#' Main function to calculate the standard errror of the estimate
-#'
-#' @details
-#' This method takes in a list of fmmc objects and a callback function to compute
-#' an estimate. The first argument of the callback function must be the data
-#' bootstrapped using fmmc procedure. The remaining arguments can be suitably
-#' bound to the parameters as needed. This function can also be used to calculate
-#' the standard error using the se flag.
-#'
-#' @param fmmcObjs A list of fmmc objects computed using .fmmc.proc and containing
-#' bootstrapped returns
-#' @param fun A callback function where the first argument is returns and all the
-#' other arguments are bounded to values
-#' @param se A flag to indicate if standard error for the estimate must be calculated
-#' @param parallel A flag to indicate if multiple cpu cores must be used
-#' @param nboot Number of bootstrap samples
-#'
-#' @return returns the estimates and thier standard errors given fmmc objects
-#'
-#' @author Rohit Arora
-#' @export
-#'
-fmmc.estimate.se <- function(fmmcObjs, fun=NULL, se=FALSE, nboot=100,
- parallel = FALSE) {
-
- result <- as.matrix(rep(NA, length(fmmcObjs))); colnames(result) <- "estimate"
- rownames(result) <- unlist(lapply(fmmcObjs, function(obj) colnames(obj$data$R)))
-
- if(is.null(fun)) return(result)
-
- cl <- NULL
- if(parallel) {
- cl <- makeCluster(detectCores())
- clusterEvalQ(cl, library(xts))
- }
-
- result[,1] <- unlist(lapply(fmmcObjs, function(obj) fun(obj$bootdist$returns)))
- if(se) {
- serr <- unlist(
- lapply(fmmcObjs, function(obj) .fmmc.se(obj, nboot, fun, cl)))
- result <- cbind(result, serr)
- colnames(result) <- c("estimate", "se")
- }
-
- if(parallel) stopCluster(cl)
-
- result
+#' @title Functions to compute estimates and thier standard errors using fmmc
+#'
+#' Control default arguments. Usually for factorAnalytics.
+#'
+#' @details
+#' This method takes in the additional arguments list and checks if parameters
+#' are set. Then it defaults values if they are unset. Currently it controls the
+#' fit.method(default: OLS) and variable.selection(default: subsets). If
+#' variable.selection is set to values other than subsets/none then it will
+#' default to subsets.
+#' arguments for factorAnalytics
+#'
+#' @param ... Arguments that must be passed to fitTsfm
+#'
+#'
+.fmmc.default.args <- function(...) {
+ add.args <- list(...)
+ if(!"fit.method" %in% names(add.args)) add.args[["fit.method"]] <- "LS"
+
+ if(!"variable.selection" %in% names(add.args))
+ add.args[["variable.selection"]] <- "subsets"
+ else {
+ if(!add.args[["variable.selection"]] %in% c("none", "subsets"))
+ add.args[["variable.selection"]] <- "subsets"
+ }
+
+ if (add.args[["variable.selection"]] == "subsets") {
+ if(!"nvmax" %in% names(add.args))
+ add.args[["nvmax"]] <- NA
+ }
+
+ add.args
+}
+
+#' This is the main implementation of the Factor Model Monte Carlo method. It returns
+#' a fmmc object that contains the joint empirical density of factors and returns. This
+#' fmmc object can be reused to for calucluting risk and performance estimates along
+#' with standard errors for the estimates
+#'
+#' @details
+#' This method takes in data, factors and residual type. It then does the following
+#' 1. Fit a time series factor model to the data using user supplied selection and
+#' fit variables or it defaults them to stepwise and OLS respectively. If any
+#' of the betas are NA then the corresponding factors are dropped
+#' 2. If the residual type beisdes empirical is specified then it fits the
+#' corresponding distribution to the residuals and simulates from the fitted
+#' distribution. The number of NA's in the simulated sample are the same as
+#' original residuals.
+#' 3. It then merges factors and non-NA residuals for each asset to create a full
+#' outer join of the factors and residuals. We use this joined data to create new
+#' simulated returns. Returns together with factors define a joint emperical density.
+#'
+#' @param R single vector of returns
+#' @param factors matrix of factor returns
+#' @param ... allows passing paramters to factorAnalytics.
+#' @author Rohit Arora
+#'
+#'
+.fmmc.proc <- function(R, factors ,... ) {
+
+ # Check if the classes of Returns and factors are correct
+ if(is.null(nrow(R)) || is.null(nrow(factors))) {
+ warning("Inputs are not matrix")
+ return(NA)
+ }
+
+ factors.data <- na.omit(factors)
+ T <- nrow(factors.data); T1 <- nrow(R)
+ if (T < T1) {
+ warning("Length of factors cannot be less than assets")
+ return(NA)
+ }
+
+ # Start getting ready to fit a time-series factor model to the data.
+ .data <- as.matrix(merge(R,factors.data))
+
+ #default args if not set
+ add.args <- .fmmc.default.args(...)
+ fit.method <- add.args[["fit.method"]]
+ variable.selection <- add.args[["variable.selection"]]
+
+ if(variable.selection == "subsets" && is.na(add.args[["nvmax"]]))
+ add.args[["nvmax"]] <- floor((ncol(factors.data) - 1)/2)
+
+ # Lets fit the time-series model
+ args <- list(asset.names=colnames(R),
+ factor.names=colnames(factors.data), data=.data)
+
+ args <- merge.list(args,add.args)
+
+ # We do not need to remove NA's. Beta's do no change if NA's are not removed
+ possibleError <- tryCatch(
+ fit <- do.call(fitTsfm, args),
+ error=function(e)
+ e)
+
+ if(inherits(possibleError, "error")) {
+ warning(paste("Timeseries model fitting failed for ", colnames(R)))
+ return(NA)
+ }
+
+ resid <- do.call(merge,lapply(lapply(fit$asset.fit,residuals),as.xts))
+ beta <- t(fit$beta)
+
+ if(any(is.na(beta))) {
+ warning("some of the betas where NA in .fmmc.proc. Dropping those")
+ beta <- beta[!is.na(c(beta)), 1, drop=FALSE]
+ names.factors <- colnames(factors.data)
+ names.beta <- rownames(beta)
+ factors.data <- as.matrix(factors.data[,names.factors %in% names.beta])
+ }
+
+ # define a joint empirical density for the factors and residuals and use
+ # that to calculate the returns.
+ .data <- as.matrix(merge(as.matrix(factors.data), resid))
+ alpha <- matrix(as.numeric(fit$alpha), nrow=nrow(.data), ncol=1, byrow=TRUE)
+
+ returns <- alpha + .data[,-ncol(.data),drop=FALSE] %*% beta +
+ .data[,ncol(.data),drop=FALSE]
+
+ result <- list(bootdist = list(returns = returns,
+ factors = .data[,-ncol(.data),drop=FALSE]),
+ data = list(R = R, factors = factors.data), args = add.args)
+ result
+}
+
+#' Statistic function for the boot call. It calculates the risk or performnace
+#' meeasure by using the estimatation function in its argument list.
+#'
+#' @details
+#' This method works as follows.
+#' 1. Get data with factors and returns.
+#' 2. Subset T rows from the data.
+#' 3. Discard first TR-TR1 of the asset returns by setting them to NA
+#' 4. calls .fmmc.proc method over the new data set to get a new joint empirical
+#' distribution of returns and factors
+#' 5. We use the new returns with the estimation function to calculate the
+#' risk or performance measure.
+#'
+#' @param data matrix of (all factors + returns of just 1 asset)
+#' @param indices row numbers generated by boot
+#' @param args additinal paramters needed for subsetting the data and calulating
+#' the perfomance/risk measure.
+#' @author Rohit Arora
+#'
+#'
+.fmmc.boot <- function(data, indices, args) {
+
+ TR <- args$TR
+ TR1 <- args$TR1
+ estimate.func <- args$estimate.func
+ fit.method <- args$fit.method
+ var.sel <- args$var.sel
+
+ fun <- match.fun(estimate.func)
+
+ # we just need TR rows of data
+ ind <- sample(indices, TR , replace = TRUE)
+ data <- data[ind,]
+
+ # discard the first (TR-TR1) portion of returns if using fmmc. For
+ # complete data TR = TR1
+ .data <- data
+ .data[1:(TR-TR1),ncol(.data)] <- NA
+
+ # If the data does not have dates then it cannot be transformed to xts.
+ # So lets fake dates to make xts happy
+ .data <- as.xts(.data , order.by=seq(as.Date("1980/1/1"), by = "day",
+ length.out = nrow(.data)))
+
+ # lets get a new empirical distribution of factors and returns for a new subset
+ fmmcObj <- .fmmc.proc(R=.data[,ncol(.data),drop=FALSE],
+ factors=.data[,-ncol(.data)],
+ fit.method = fit.method, variable.selection = var.sel)
+
+ # lets calculate the performance or risk estimate
+ measure <- fun(fmmcObj$bootdist$returns)
+ measure
+}
+
+#' Main function to calculate the risk/performance estimate and calculate the
+#' standard error of the estimate using bootstrapping.
+#'
+#' @details
+#' bootstrapping in our case can be painfully slow, so we exploit the parallel
+#' capabilities of boot function. All cores on your machine are used.
+#' We use the boot call from the boot library for calculating the estimate and
+#' its standard error.
+#'
+#' @param fmmcObj object returned by fmmc proc. This is a comprehensive object
+#' with all data for factors and returns.
+#' @param nboot number of bootstap samples. Not sure how many repetations are
+#' reuired but remember bias-variance tradeoff. Increasing nboot will only
+#' reduce variance and not have a significant effect on bias(estimate)
+#' @param estimate.func this is a handle to the function used for calulating
+#' the perfomance/risk measure.
+#' @param cl A cluster for running across multiple cores
+#' @author Rohit Arora
+#'
+#'
+.fmmc.se <- function(fmmcObj, nboot = 50, estimate.func, cl = NULL) {
+
+ parallel <- if(is.null(cl)) "no" else "snow"
+ ncpus <- if(is.null(cl)) 1 else detectCores()
+
+ # length of factors
+ TR <- nrow(fmmcObj$data$factors)
+
+ # length of the asset returns
+ len <- nrow(fmmcObj$data$R) -
+ apply(fmmcObj$data$R, 2, function(col) which.min(is.na(col))) + 1
+
+ returns <- fmmcObj$bootdist$returns
+ factors <- fmmcObj$bootdist$factors
+
+ # no need to do variable selection again. So lets turn it off
+ args <- list(TR = TR, TR1 = len, estimate.func = estimate.func,
+ fit.method = fmmcObj$args[["fit.method"]], var.sel = "none")
+
+ result <- boot(data=cbind(factors, returns), statistic = .fmmc.boot,
+ R = nboot, parallel = parallel, ncpus = ncpus, cl = cl, args=args)
+
+ se <- apply(result$t,2,sd)
+ se
+}
+
+#' Worker function that acts between the fmmc procedure and calling method.
+#'
+#' @details
+#' This method takes in data as single time series and factors as xts objects
+#' It then calls the actual estimation procedure.
+#'
+#' @param R single vector of returns
+#' @param factors matrix of factor returns
+#' @param ... allows passing paramters to factorAnalytics.
+#' @author Rohit Arora
+#'
+#'
+#'
+.fmmc.worker <- function(R, factors, ...) {
+ fmmc.obj <- .fmmc.proc(R=R, factors=factors, ...)
+ fmmc.obj
+}
+
+#' Compute fmmc objects that can be used for calcuation of estimates and their
+#' standard errors
+#'
+#' @details
+#' This method takes in data and factors as xts objects where multiple
+#' time series with different starting dates are merged together. It then
+#' computes FMMC objects as described in Jiang and Martin (2013)
+#'
+#' @param R matrix of returns in xts format
+#' @param factors matrix of factor returns in xts format
+#' @param parallel flag to utilize multiplecores on the cpu. All cores are used.
+#' @param ... Arguments that must be passed to fitTsfm
+#'
+#' @return returns an list of fmmc objects
+#'
+#' @references
+#' Yindeng Jiang and Richard Doug Martin. Better Risk and Performance
+#' Estimates with Factor Model Monte Carlo. SSRN Electronic Journal, July 2013.
+#'
+#' @author Rohit Arora
+#' @export
+#'
+#'
+fmmc <- function(R, factors, parallel=FALSE, ...) {
+
+ ret <- NA
+ assets.count <- ncol(R)
+ i <- NA
+
+ if (parallel) {
+ cl <- makeCluster(detectCores())
+ registerDoSNOW(cl)
+ ret <- foreach (i = 1:assets.count) %dopar% .fmmc.worker(R[,i], factors, ...)
+ stopCluster(cl)
+ } else
+ ret <- foreach (i = 1:assets.count) %do% .fmmc.worker(R[,i], factors, ...)
+
+ result <- ret[lapply(ret,length) > 1]
+ result
+}
+
+#' Main function to calculate the standard errror of the estimate
+#'
+#' @details
+#' This method takes in a list of fmmc objects and a callback function to compute
+#' an estimate. The first argument of the callback function must be the data
+#' bootstrapped using fmmc procedure. The remaining arguments can be suitably
+#' bound to the parameters as needed. This function can also be used to calculate
+#' the standard error using the se flag.
+#'
+#' @param fmmcObjs A list of fmmc objects computed using .fmmc.proc and containing
+#' bootstrapped returns
+#' @param fun A callback function where the first argument is returns and all the
+#' other arguments are bounded to values
+#' @param se A flag to indicate if standard error for the estimate must be calculated
+#' @param parallel A flag to indicate if multiple cpu cores must be used
+#' @param nboot Number of bootstrap samples
+#'
+#' @return returns the estimates and thier standard errors given fmmc objects
+#'
+#' @author Rohit Arora
+#' @export
+#'
+fmmc.estimate.se <- function(fmmcObjs, fun=NULL, se=FALSE, nboot=100,
+ parallel = FALSE) {
+
+ result <- as.matrix(rep(NA, length(fmmcObjs))); colnames(result) <- "estimate"
+ rownames(result) <- unlist(lapply(fmmcObjs, function(obj) colnames(obj$data$R)))
+
+ if(is.null(fun)) return(result)
+
+ cl <- NULL
+ if(parallel) {
+ cl <- makeCluster(detectCores())
+ clusterEvalQ(cl, library(xts))
+ }
+
+ result[,1] <- unlist(lapply(fmmcObjs, function(obj) fun(obj$bootdist$returns)))
+ if(se) {
+ serr <- unlist(
+ lapply(fmmcObjs, function(obj) .fmmc.se(obj, nboot, fun, cl)))
+ result <- cbind(result, serr)
+ colnames(result) <- c("estimate", "se")
+ }
+
+ if(parallel) stopCluster(cl)
+
+ result
}
\ No newline at end of file
Modified: pkg/FactorAnalytics/R/plot.sfm.r
===================================================================
--- pkg/FactorAnalytics/R/plot.sfm.r 2015-05-28 21:14:29 UTC (rev 3656)
+++ pkg/FactorAnalytics/R/plot.sfm.r 2015-05-29 03:26:24 UTC (rev 3657)
@@ -174,7 +174,8 @@
den <- density(Residuals)
xval <- den$x
den.norm <- dnorm(xval, mean=mean(Residuals), sd=resid.sd)
- den.st <- dst(xval, dp=st.mple(x=matrix(1,nrow(Residuals)), y=as.vector(Residuals))$dp)
+ den.st <- dst(xval, dp=st.mple(x=matrix(1,nrow(Residuals)),
+ y=as.vector(Residuals), opt.method="BFGS")$dp)
# plot selection
repeat {
Modified: pkg/FactorAnalytics/R/plot.tsfm.r
===================================================================
--- pkg/FactorAnalytics/R/plot.tsfm.r 2015-05-28 21:14:29 UTC (rev 3656)
+++ pkg/FactorAnalytics/R/plot.tsfm.r 2015-05-29 03:26:24 UTC (rev 3657)
@@ -179,7 +179,8 @@
den <- density(Residuals)
xval <- den$x
den.norm <- dnorm(xval, mean=mean(Residuals), sd=resid.sd)
- den.st <- dst(xval, dp=st.mple(x=matrix(1,nrow(Residuals)), y=as.vector(Residuals))$dp)
+ den.st <- dst(xval, dp=st.mple(x=matrix(1,nrow(Residuals)),
+ y=as.vector(Residuals), opt.method="BFGS")$dp)
# plot selection
repeat {
Modified: pkg/FactorAnalytics/man/fitTsfm.control.Rd
===================================================================
--- pkg/FactorAnalytics/man/fitTsfm.control.Rd 2015-05-28 21:14:29 UTC (rev 3656)
+++ pkg/FactorAnalytics/man/fitTsfm.control.Rd 2015-05-29 03:26:24 UTC (rev 3657)
@@ -9,7 +9,8 @@
mxf = 50, mxs = 50, scope, scale, direction, trace = FALSE,
steps = 1000, k = 2, nvmin = 1, nvmax = 8, force.in = NULL,
force.out = NULL, method, really.big = FALSE, type, normalize = TRUE,
- eps = .Machine$double.eps, max.steps, lars.criterion = "Cp", K = 10)
+ eps = .Machine$double.eps, max.steps, plot.it = FALSE,
+ lars.criterion = "Cp", K = 10)
}
\arguments{
\item{decay}{a scalar in (0, 1] to specify the decay factor for "DLS".
@@ -111,6 +112,9 @@
algorithm proceeds. Although the default usually guarantees that the
[TRUNCATED]
To get the complete diff run:
svnlook diff /svnroot/returnanalytics -r 3657
More information about the Returnanalytics-commits
mailing list