[Returnanalytics-commits] r2355 - in pkg/FactorAnalytics: . R

noreply at r-forge.r-project.org noreply at r-forge.r-project.org
Tue Jun 18 20:18:01 CEST 2013


Author: chenyian
Date: 2013-06-18 20:18:01 +0200 (Tue, 18 Jun 2013)
New Revision: 2355

Modified:
   pkg/FactorAnalytics/NAMESPACE
   pkg/FactorAnalytics/R/fitMacroeconomicFactorModel.R
Log:


Modified: pkg/FactorAnalytics/NAMESPACE
===================================================================
--- pkg/FactorAnalytics/NAMESPACE	2013-06-18 17:51:26 UTC (rev 2354)
+++ pkg/FactorAnalytics/NAMESPACE	2013-06-18 18:18:01 UTC (rev 2355)
@@ -1,7 +0,0 @@
-exportPattern("^[^\\.]")
-S3method(print, MacroFactorModel)
-S3method(print, StatFactorModel)
-S3method(plot, MacroFactorModel)
-S3method(plot, StatFactorModel)
-S3method(plot, FundamentalFactorModel)
-S3method(summary, MacroFactorModel)
\ No newline at end of file

Modified: pkg/FactorAnalytics/R/fitMacroeconomicFactorModel.R
===================================================================
--- pkg/FactorAnalytics/R/fitMacroeconomicFactorModel.R	2013-06-18 17:51:26 UTC (rev 2354)
+++ pkg/FactorAnalytics/R/fitMacroeconomicFactorModel.R	2013-06-18 18:18:01 UTC (rev 2355)
@@ -1,373 +1,344 @@
-#' Fit macroeconomic factor model by time series regression techniques.
-#' 
-#' Fit macroeconomic factor model by time series regression techniques. It
-#' creates the class of "MacroFactorModel".
-#' 
-#' If \code{Robust} is chosen, there is no subsets but all factors will be
-#' used.  Cp is defined in
-#' http://www-stat.stanford.edu/~hastie/Papers/LARS/LeastAngle_2002.pdf. p17.
-#' 
-#' @param ret.assets N x T Numerical returns data, univariate or multivariate,
-#' where N is the number of assets return and T is the length of time period.
-#' data has to be saved as class "data.frame" so that lm function can be used
-#' and must have column names.
-#' @param factors K x T Numerical factors data, where K is the number of
-#' factors and T is the length of the time period. Data has to be saved as
-#' class "data.frame" so that lm function can be used and must have column
-#' names.
-#' @param factor.set scalar, number of factors
-#' @param fit.method "OLS" is ordinary least squares method, "DLS" is
-#' discounted least squares method. Discounted least squares (DLS) estimation
-#' is weighted least squares estimation with exponentially declining weights
-#' that sum to unity. "Robust"
-#' @param variable.selection "stepwise" is traditional forward/backward
-#' stepwise OLS regression, starting from the initial set of factors, that adds
-#' factors only if the regression fit as measured by the Bayesian Information
-#' Criteria (BIC) or Akaike Information Criteria (AIC) can be done using the R
-#' function step() from the stats package. If \code{Robust} is chosen, the
-#' function step.lmRob in Robust package will be used. "all subsets" is
-#' Traditional all subsets regression can be done using the R function
-#' regsubsets() from the package leaps. "lar" , "lasso" is based on package
-#' "lars", linear angle regression.
-#' @param decay.factor for DLS. Default is 0.95.
-#' @param nvmax control option for all subsets. maximum size of subsets to
-#' examine
-#' @param force.in control option for all subsets. The factors that should be
-#' in all models.
-#' @param subsets.method control option for all subsets. se exhaustive search,
-#' forward selection, backward selection or sequential replacement to search.
-#' @param lars.criteria either choose minimum "Cp": unbiased estimator of the
-#' true rist or "cv" 10 folds cross-validation. See detail.
-#' @return an S3 object containing
-#' @returnItem asset.fit Fit objects for each asset. This is the class "lm" for
-#' each object.
-#' @returnItem alpha.vec N x 1 Vector of estimated alphas.
-#' @returnItem beta.mat N x K Matrix of estimated betas
-#' @returnItem r2.vec N x 1 Vector of R-square values.
-#' @returnItem residVars.vec N x 1 Vector of residual variances.
-#' @returnItem call function call.
-#' @returnItem ret.assets Assets returns of input data.
-#' @returnItem factors Factors of input data.
-#' @returnItem variable.selection variables selected by the user.
-#' @author Eric Zivot and Yi-An Chen.
-#' @references 1. Efron, Hastie, Johnstone and Tibshirani (2002) "Least Angle
-#' Regression" (with discussion) Annals of Statistics; see also
-#' http://www-stat.stanford.edu/~hastie/Papers/LARS/LeastAngle_2002.pdf.  2.
-#' Hastie, Tibshirani and Friedman (2008) Elements of Statistical Learning 2nd
-#' edition, Springer, NY.
-#' @examples
-#' 
-#' # load data from the database
-#' data(managers.df)
-#' ret.assets = managers.df[,(1:6)]
-#' factors    = managers.df[,(7:9)]
-#' # fit the factor model with OLS
-#' fit <- fitMacroeconomicFactorModel(ret.assets,factors,fit.method="OLS",
-#'                                  variable.selection="all subsets")
-#' # summary of HAM1 
-#' summary(fit$asset.fit$HAM1)
-#' # plot actual vs. fitted over time for HAM1
-#' # use chart.TimeSeries() function from PerformanceAnalytics package
-#' dataToPlot = cbind(fitted(fit$asset.fit$HAM1), na.omit(managers.df$HAM1))
-#' colnames(dataToPlot) = c("Fitted","Actual")
-#' chart.TimeSeries(dataToPlot, main="FM fit for HAM1",
-#'                  colorset=c("black","blue"), legend.loc="bottomleft")
-#' 
-fitMacroeconomicFactorModel <-
-function(ret.assets, factors, factor.set = 2, 
-                                        fit.method=c("OLS","DLS","Robust"),
-                                        variable.selection=c("stepwise", "all subsets", "lar", "lasso"),
-                                        decay.factor = 0.95,nvmax=8,force.in=NULL,
-                                        subsets.method = c("exhaustive", "backward", "forward", "seqrep"),
-                                        lars.criteria = c("Cp","cv")
-                                        ) {
-## Inputs:
-# ret.assets           numerical returns data, univariate or multivariate. data has to be saved as class "data.frame" 
-#                      so that lm function can be used and must have colnames.
-# factors              numerical factors data.  data has to be saved as class "data.frame" 
-#                      so that lm function can be used and must have colnames.
-# factor.set           numerical, the numbers of the factors to be included in the model when all subsets 
-#                      is chosen.  
-# fit.method           chose between "OLS" ordinanary least sqaures, "DLS" discounted least squares using exponential
-#                      declining weights, and "Robust" coerced by function lmob in package "robustbase"
-# variable.selection   "all" includes all the explanary variables in the model.
-#                      "stepwise" is traditional forward/backward stepwise regression, starting from the initial 
-#                      set of factors, that adds factors only if the regression fit as measured by the Bayesian 
-#                      Information Criteria (BIC) or Akaike Information Criteria (AIC) can be done using the R 
-#                      function step() from the stats package. "all subsets" is Traditional all subsets regression 
-#                      can be done using the R function regsubsets() from the package leaps. 
-#                      only \code(OLS) or \code(DLS) can choose this option.   
-#                      "lar" , "lasso" is based on package "lars", linear angle regression.
-# decay.factor         for DLS. Default is 0.95.
-# nvmax                control option for all subsets. maximum size of subsets to examine
-# force.in             control option for all subsets. The factors that should be in all models.
-# subsets.method       control option for all subsets. se exhaustive search, forward selection, backward selection 
-#                      or sequential replacement to search.
-# lars.criteria        either choose minimum "Cp" which is unbiased estimator of the true rist or "cv"
-#                      10 folds cross-validation. See detail.    
-## Outputs:
-## output: list with following components:
-## asset.fit           fit objects for each asset. This is the class "lm" for each assets.
-## alpha.vec          vector of estimated alphas.
-## beta.mat                matrix of estimated betas.
-## r2.vec              vector of R-square values
-## residVars.vec       vector of residual variances
-## call               function call.  
-  require(leaps)
-  require(lars)
-  require(robust)
-  require(MASS)
-  this.call <- match.call()
-  
-if (is.data.frame(ret.assets) & is.data.frame(factors) ) {
-  manager.names = colnames(ret.assets)
-  factor.names  = colnames(factors)
-  managers.df   = cbind(ret.assets,factors)
-} else {
-  stop("ret.assets and beta.mat must be in class data.frame")
-}
-                                          
-
-
-
-# initialize list object to hold regression objects
-reg.list = list()
-
-
-# initialize matrices and vectors to hold estimated betas,
-# residual variances, and R-square values from
-# fitted factor models
-
-Alphas = ResidVars = R2values = rep(0, length(manager.names))
-names(Alphas) = names(ResidVars) = names(R2values) = manager.names
-Betas = matrix(0, length(manager.names), length(factor.names))
-colnames(Betas) = factor.names
-rownames(Betas) = manager.names
-
-
-
-
-
-
-if (variable.selection == "all subsets") {
-# estimate multiple factor model using loop b/c of unequal histories for the hedge funds
-
-
-
-if (fit.method == "OLS") {
-
-if (factor.set == length(force.in)) {
-  for (i in manager.names) {
- reg.df = na.omit(managers.df[, c(i, force.in)])
- fm.formula = as.formula(paste(i,"~", ".", sep=" "))
- fm.fit = lm(fm.formula, data=reg.df)
- fm.summary = summary(fm.fit)
- reg.list[[i]] = fm.fit
- Alphas[i] = coef(fm.fit)[1]
- Betas.names = names(coef(fm.fit)[-1])
- Betas[i,Betas.names] = coef(fm.fit)[-1]
- ResidVars[i] = fm.summary$sigma^2
- R2values[i] =  fm.summary$r.squared
-  }
-}  else if (factor.set > length(force.in)) {
-    
-for (i in manager.names) {
- reg.df = na.omit(managers.df[, c(i, factor.names)])
- fm.formula = as.formula(paste(i,"~", ".", sep=" "))
- fm.subsets <- regsubsets(fm.formula,data=reg.df,nvmax=nvmax,force.in=force.in,
-                          method=subsets.method)
- sum.sub <- summary(fm.subsets)
- reg.df <- na.omit(managers.df[,c(i,names(which(sum.sub$which[as.character(factor.set),-1]==TRUE))  )])
- fm.fit = lm(fm.formula, data=reg.df)
- fm.summary = summary(fm.fit)
- reg.list[[i]] = fm.fit
- Alphas[i] = coef(fm.fit)[1]
- Betas.names = names(coef(fm.fit)[-1])
- Betas[i,Betas.names] = coef(fm.fit)[-1]
- ResidVars[i] = fm.summary$sigma^2
- R2values[i] =  fm.summary$r.squared
-  }
-} else {
-  stop("ERROR! number of force.in should less or equal to factor.set")
-}
-  
-
-
-
-} else if (fit.method == "DLS"){
-  
-
-  if (factor.set == length(force.in)) {  
-  # define weight matrix 
-for (i in manager.names) {
-  reg.df = na.omit(managers.df[, c(i, force.in)])
- t.length <- nrow(reg.df)
- w <- rep(decay.factor^(t.length-1),t.length)
-   for (k in 2:t.length) {
-    w[k] = w[k-1]/decay.factor 
-  }   
-# sum weigth to unitary  
- w <- w/sum(w) 
- fm.formula = as.formula(paste(i,"~", ".", sep=""))                              
- fm.fit = lm(fm.formula, data=reg.df,weight=w)
- fm.summary = summary(fm.fit)
- reg.list[[i]] = fm.fit
- Alphas[i] = coef(fm.fit)[1]
- Betas.names = names(coef(fm.fit)[-1])
- Betas[i,Betas.names] = coef(fm.fit)[-1]
- ResidVars[i] = fm.summary$sigma^2
- R2values[i] =  fm.summary$r.squared
- } 
-} else if  (factor.set > length(force.in)) {
-  for (i in manager.names) {
-  reg.df = na.omit(managers.df[, c(i, factor.names)])
-  t.length <- nrow(reg.df)
-  w <- rep(decay.factor^(t.length-1),t.length)
-  for (k in 2:t.length) {
-  w[k] = w[k-1]/decay.factor 
-  }   
-  w <- w/sum(w) 
- fm.formula = as.formula(paste(i,"~", ".", sep=""))                              
- fm.subsets <- regsubsets(fm.formula,data=reg.df,nvmax=nvmax,force.in=force.in,
-                          method=subsets.method,weights=w) # w is called from global envio
- sum.sub <- summary(fm.subsets)
- reg.df <- na.omit(managers.df[,c(i,names(which(sum.sub$which[as.character(factor.set),-1]==TRUE))  )])
- fm.fit = lm(fm.formula, data=reg.df,weight=w)
- fm.summary = summary(fm.fit)
- reg.list[[i]] = fm.fit
- Alphas[i] = coef(fm.fit)[1]
- Betas.names = names(coef(fm.fit)[-1])
- Betas[i,Betas.names] = coef(fm.fit)[-1]
- ResidVars[i] = fm.summary$sigma^2
- R2values[i] =  fm.summary$r.squared
- }
-} else {
-  stop("ERROR! number of force.in should less or equal to factor.set")
-}
-
-
-} else if (fit.method=="Robust") {
-  for (i in manager.names) {
- reg.df = na.omit(managers.df[, c(i, factor.names)])
- fm.formula = as.formula(paste(i,"~", ".", sep=" "))
- fm.fit = lmRob(fm.formula, data=reg.df)
- fm.summary = summary(fm.fit)
- reg.list[[i]] = fm.fit
- Alphas[i] = coef(fm.fit)[1]
- Betas[i, ] = coef(fm.fit)[-1]
- ResidVars[i] = fm.summary$sigma^2
- R2values[i] =  fm.summary$r.squared
- }
-
-}  else {
-  stop("invalid method")
-}
-
-
-} else if (variable.selection == "stepwise") {
-
-  
-  if (fit.method == "OLS") {
-# loop over all assets and estimate time series regression
-for (i in manager.names) {
- reg.df = na.omit(managers.df[, c(i, factor.names)])
- fm.formula = as.formula(paste(i,"~", ".", sep=" "))
- fm.fit = step(lm(fm.formula, data=reg.df),trace=0)
- fm.summary = summary(fm.fit)
- reg.list[[i]] = fm.fit
- Alphas[i] = coef(fm.fit)[1]
- Betas.names = names(coef(fm.fit)[-1])
- Betas[i,Betas.names] = coef(fm.fit)[-1]
- ResidVars[i] = fm.summary$sigma^2
- R2values[i] =  fm.summary$r.squared
-  }
-
-
-}  else if (fit.method == "DLS"){
-  # define weight matrix 
-for (i in manager.names) {
-  reg.df = na.omit(managers.df[, c(i, factor.names)])
-  t.length <- nrow(reg.df)
-  w <- rep(decay.factor^(t.length-1),t.length)
-  for (k in 2:t.length) {
-    w[k] = w[k-1]/decay.factor 
-  }   
-# sum weigth to unitary  
- w <- w/sum(w) 
- fm.formula = as.formula(paste(i,"~", ".", sep=""))                              
- fm.fit = step(lm(fm.formula, data=reg.df,weight=w),trace=0)
- fm.summary = summary(fm.fit)
- reg.list[[i]] = fm.fit
- Alphas[i] = coef(fm.fit)[1]
- Betas.names = names(coef(fm.fit)[-1])
- Betas[i,Betas.names] = coef(fm.fit)[-1]
- ResidVars[i] = fm.summary$sigma^2
- R2values[i] =  fm.summary$r.squared
- } 
-
-} else if (fit.method=="Robust") {  
-  for (i in manager.names) {
- assign("reg.df" , na.omit(managers.df[, c(i, factor.names)]),envir = .GlobalEnv )
- fm.formula = as.formula(paste(i,"~", ".", sep=" "))
- lmRob.obj <- lmRob(fm.formula, data=reg.df)
- fm.fit = step.lmRob(lmRob.obj,trace=FALSE)
- fm.summary = summary(fm.fit)
- reg.list[[i]] = fm.fit
- Alphas[i] = coef(fm.fit)[1]
- Betas.names = names(coef(fm.fit)[-1])
- Betas[i,Betas.names] = coef(fm.fit)[-1]
- ResidVars[i] = fm.summary$sigma^2
- R2values[i] =  fm.summary$r.squared
-  }
-
-}
-  
-} else if (variable.selection == "lar" || variable.selection == "lasso") {
-  # use min Cp as criteria to choose predictors
-  
-    for (i in manager.names) {
- reg.df = na.omit(managers.df[, c(i, factor.names)])
- reg.df = as.matrix(reg.df)
- lars.fit = lars(reg.df[,factor.names],reg.df[,i],type=variable.selection,trace=FALSE)
- sum.lars <- summary(lars.fit)
- if (lars.criteria == "Cp") {
- s<- which.min(sum.lars$Cp)
- } else {
- lars.cv <- cv.lars(reg.df[,factor.names],reg.df[,i],trace=FALSE,
-                    type=variable.selection,mode="step",plot.it=FALSE)
- s<- which.min(lars.cv$cv)
-   }
- coef.lars <- predict(lars.fit,s=s,type="coef",mode="step")
- reg.list[[i]] = lars.fit
- fitted <- predict(lars.fit,reg.df[,factor.names],s=s,type="fit",mode="step")
- Alphas[i] = (fitted$fit - reg.df[,factor.names]%*%coef.lars$coefficients)[1]
- Betas.names = names(coef.lars$coefficients)
- Betas[i,Betas.names] = coef.lars$coefficients
- ResidVars[i] = sum.lars$Rss[s]/(nrow(reg.df)-s)
- R2values[i] =  lars.fit$R2[s]
-  } 
- 
-  }  else  {
-  stop("wrong method")
-}
-  
-
-  
-  
-  
-  # return results
-# add option to return list
-ans = list (asset.fit = reg.list,
-            alpha.vec = Alphas,
-            beta.mat  = Betas,
-            r2.vec    = R2values,
-            residVars.vec = ResidVars,
-            call      = this.call,
-            ret.assets = ret.assets,
-            factors   = factors,
-            variable.selection = variable.selection
-            )
-class(ans) = "MacroFactorModel"
-return(ans)
-}
-
+#' Fit macroeconomic factor model by time series regression techniques.
+#' 
+#' Fit macroeconomic factor model by time series regression techniques. It
+#' creates the class of "MacroFactorModel".
+#' 
+#' If \code{Robust} is chosen, there is no subsets but all factors will be
+#' used.  Cp is defined in
+#' http://www-stat.stanford.edu/~hastie/Papers/LARS/LeastAngle_2002.pdf. p17.
+#' 
+#' @param assets.names  names of assets returns.
+#' @param factors.names names of factors returns.
+#' @param factor.set scalar, number of factors
+#' @param data a vector, matrix, data.frame, xts, timeSeries or zoo object with asset returns 
+#' and factors retunrs rownames 
+#' @param fit.method "OLS" is ordinary least squares method, "DLS" is
+#' discounted least squares method. Discounted least squares (DLS) estimation
+#' is weighted least squares estimation with exponentially declining weights
+#' that sum to unity. "Robust"
+#' @param variable.selection "stepwise" is traditional forward/backward
+#' stepwise OLS regression, starting from the initial set of factors, that adds
+#' factors only if the regression fit as measured by the Bayesian Information
+#' Criteria (BIC) or Akaike Information Criteria (AIC) can be done using the R
+#' function step() from the stats package. If \code{Robust} is chosen, the
+#' function step.lmRob in Robust package will be used. "all subsets" is
+#' Traditional all subsets regression can be done using the R function
+#' regsubsets() from the package leaps. "lar" , "lasso" is based on package
+#' "lars", linear angle regression.
+#' @param decay.factor for DLS. Default is 0.95.
+#' @param nvmax control option for all subsets. maximum size of subsets to
+#' examine
+#' @param force.in control option for all subsets. The factors that should be
+#' in all models.
+#' @param subsets.method control option for all subsets. se exhaustive search,
+#' forward selection, backward selection or sequential replacement to search.
+#' @param lars.criteria either choose minimum "Cp": unbiased estimator of the
+#' true rist or "cv" 10 folds cross-validation. See detail.
+#' @return an S3 object containing
+#'   \item{asset.fit}{Fit objects for each asset. This is the class "lm" for
+#' each object.}
+#'   \item{alpha.vec}{N x 1 Vector of estimated alphas.}
+#'   \item{beta.mat}{N x K Matrix of estimated betas.}
+#'   \item{r2.vec}{N x 1 Vector of R-square values.}
+#'   \item{residVars.vec}{N x 1 Vector of residual variances.}
+#'   \item{call}{function call.}
+#'   \item{ret.assets}{Assets returns of input data.}
+#'   \item{factors Factors of input data.}
+#'   \item{variable.selection variables selected by the user.}
+#' @author Eric Zivot and Yi-An Chen.
+#' @references 1. Efron, Hastie, Johnstone and Tibshirani (2002) "Least Angle
+#' Regression" (with discussion) Annals of Statistics; see also
+#' http://www-stat.stanford.edu/~hastie/Papers/LARS/LeastAngle_2002.pdf.  2.
+#' Hastie, Tibshirani and Friedman (2008) Elements of Statistical Learning 2nd
+#' edition, Springer, NY.
+#' @examples
+#'  \dontrun{
+#' # load data from the database
+#' data(managers.df)
+#' ret.assets = managers.df[,(1:6)]
+#' factors    = managers.df[,(7:9)]
+#' # fit the factor model with OLS
+#' fit <- fitMacroeconomicFactorModel(ret.assets,factors,fit.method="OLS",
+#'                                  variable.selection="all subsets")
+#' # summary of HAM1 
+#' summary(fit$asset.fit$HAM1)
+#' # plot actual vs. fitted over time for HAM1
+#' # use chart.TimeSeries() function from PerformanceAnalytics package
+#' dataToPlot = cbind(fitted(fit$asset.fit$HAM1), na.omit(managers.df$HAM1))
+#' colnames(dataToPlot) = c("Fitted","Actual")
+#' chart.TimeSeries(dataToPlot, main="FM fit for HAM1",
+#'                  colorset=c("black","blue"), legend.loc="bottomleft")
+#'  }
+fitMacroeconomicFactorModel <-
+function(assets.names, factors.names, data=data, factor.set = 3, 
+          fit.method=c("OLS","DLS","Robust"),
+          variable.selection=c("stepwise", "all subsets", "lar", "lasso"),
+          decay.factor = 0.95,nvmax=8,force.in=NULL,
+          subsets.method = c("exhaustive", "backward", "forward", "seqrep"),
+          lars.criteria = c("Cp","cv")) {
+  
+  require(PerformanceAnalytics)
+  require(leaps)
+  require(lars)
+  require(robust)
+  require(MASS)
+  this.call <- match.call()
+  
+  # convert data into xts and hereafter compute in xts
+  data.xts <- checkData(data) 
+  #assets.names <- colnames(data.xts)[1:6] # erase later 
+  #factors.names <- colnames(data.xts)[7:9]
+  reg.xts <- merge(data.xts[,assets.names],data.xts[,factors.names])
+  
+# if (is.data.frame(ret.assets) & is.data.frame(factors) ) {
+#   assets.names = colnames(ret.assets)
+#   factors.names  = colnames(factors)
+#   reg.xts   = cbind(ret.assets,factors)
+# } else {
+#   stop("ret.assets and beta.mat must be in class data.frame")
+# }
+                                          
+
+
+
+# initialize list object to hold regression objects
+reg.list = list()
+
+
+# initialize matrices and vectors to hold estimated betas,
+# residual variances, and R-square values from
+# fitted factor models
+
+Alphas = ResidVars = R2values = rep(0, length(assets.names))
+names(Alphas) = names(ResidVars) = names(R2values) = assets.names
+Betas = matrix(0, length(assets.names), length(factors.names))
+colnames(Betas) = factors.names
+rownames(Betas) = assets.names
+
+
+
+
+
+
+if (variable.selection == "all subsets") {
+# estimate multiple factor model using loop b/c of unequal histories for the hedge funds
+
+
+
+if (fit.method == "OLS") {
+
+if (factor.set == length(force.in)) {
+  for (i in assets.names) {
+ reg.df = na.omit(reg.xts[, c(i, force.in)])
+ fm.formula = as.formula(paste(i,"~", ".", sep=" "))
+ fm.fit = lm(fm.formula, data=reg.df)
+ fm.summary = summary(fm.fit)
+ reg.list[[i]] = fm.fit
+ Alphas[i] = coef(fm.fit)[1]
+ Betas.names = names(coef(fm.fit)[-1])
+ Betas[i,Betas.names] = coef(fm.fit)[-1]
+ ResidVars[i] = fm.summary$sigma^2
+ R2values[i] =  fm.summary$r.squared
+  }
+}  else if (factor.set > length(force.in)) {
+    
+for (i in assets.names) {
+ reg.df = na.omit(reg.xts[, c(i, factors.names)])
+ fm.formula = as.formula(paste(i,"~", ".", sep=" "))
+ fm.subsets <- regsubsets(fm.formula,data=reg.df,nvmax=nvmax,force.in=force.in,
+                          method=subsets.method)
+ sum.sub <- summary(fm.subsets)
+ reg.df <- na.omit(reg.xts[,c(i,names(which(sum.sub$which[as.character(factor.set),-1]==TRUE))  )])
+ fm.fit = lm(fm.formula, data=reg.df)
+ fm.summary = summary(fm.fit)
+ reg.list[[i]] = fm.fit
+ Alphas[i] = coef(fm.fit)[1]
+ Betas.names = names(coef(fm.fit)[-1])
+ Betas[i,Betas.names] = coef(fm.fit)[-1]
+ ResidVars[i] = fm.summary$sigma^2
+ R2values[i] =  fm.summary$r.squared
+  }
+} else {
+  stop("ERROR! number of force.in should less or equal to factor.set")
+}
+  
+
+
+
+} else if (fit.method == "DLS"){
+  
+
+  if (factor.set == length(force.in)) {  
+  # define weight matrix 
+for (i in assets.names) {
+  reg.df = na.omit(reg.xts[, c(i, force.in)])
+ t.length <- nrow(reg.df)
+ w <- rep(decay.factor^(t.length-1),t.length)
+   for (k in 2:t.length) {
+    w[k] = w[k-1]/decay.factor 
+  }   
+# sum weigth to unitary  
+ w <- w/sum(w) 
+ fm.formula = as.formula(paste(i,"~", ".", sep=""))                              
+ fm.fit = lm(fm.formula, data=reg.df,weight=w)
+ fm.summary = summary(fm.fit)
+ reg.list[[i]] = fm.fit
+ Alphas[i] = coef(fm.fit)[1]
+ Betas.names = names(coef(fm.fit)[-1])
+ Betas[i,Betas.names] = coef(fm.fit)[-1]
+ ResidVars[i] = fm.summary$sigma^2
+ R2values[i] =  fm.summary$r.squared
+ } 
+} else if  (factor.set > length(force.in)) {
+  for (i in assets.names) {
+  reg.df = na.omit(reg.xts[, c(i, factors.names)])
+  t.length <- nrow(reg.df)
+  w <- rep(decay.factor^(t.length-1),t.length)
+  for (k in 2:t.length) {
+  w[k] = w[k-1]/decay.factor 
+  }   
+  w <- w/sum(w) 
+ fm.formula = as.formula(paste(i,"~", ".", sep=""))                              
+ fm.subsets <- regsubsets(fm.formula,data=reg.df,nvmax=nvmax,force.in=force.in,
+                          method=subsets.method,weights=w) # w is called from global envio
+ sum.sub <- summary(fm.subsets)
+ reg.df <- na.omit(reg.xts[,c(i,names(which(sum.sub$which[as.character(factor.set),-1]==TRUE))  )])
+ fm.fit = lm(fm.formula, data=reg.df,weight=w)
+ fm.summary = summary(fm.fit)
+ reg.list[[i]] = fm.fit
+ Alphas[i] = coef(fm.fit)[1]
+ Betas.names = names(coef(fm.fit)[-1])
+ Betas[i,Betas.names] = coef(fm.fit)[-1]
+ ResidVars[i] = fm.summary$sigma^2
+ R2values[i] =  fm.summary$r.squared
+ }
+} else {
+  stop("ERROR! number of force.in should less or equal to factor.set")
+}
+
+
+} else if (fit.method=="Robust") {
+  for (i in assets.names) {
+ reg.df = na.omit(reg.xts[, c(i, factors.names)])
+ fm.formula = as.formula(paste(i,"~", ".", sep=" "))
+ fm.fit = lmRob(fm.formula, data=reg.df)
+ fm.summary = summary(fm.fit)
+ reg.list[[i]] = fm.fit
+ Alphas[i] = coef(fm.fit)[1]
+ Betas[i, ] = coef(fm.fit)[-1]
+ ResidVars[i] = fm.summary$sigma^2
+ R2values[i] =  fm.summary$r.squared
+ }
+
+}  else {
+  stop("invalid method")
+}
+
+
+} else if (variable.selection == "stepwise") {
+
+  
+  if (fit.method == "OLS") {
+# loop over all assets and estimate time series regression
+for (i in assets.names) {
+ reg.df = na.omit(reg.xts[, c(i, factors.names)])
+ fm.formula = as.formula(paste(i,"~", ".", sep=" "))
+ fm.fit = step(lm(fm.formula, data=reg.df),trace=0)
+ fm.summary = summary(fm.fit)
+ reg.list[[i]] = fm.fit
+ Alphas[i] = coef(fm.fit)[1]
+ Betas.names = names(coef(fm.fit)[-1])
+ Betas[i,Betas.names] = coef(fm.fit)[-1]
+ ResidVars[i] = fm.summary$sigma^2
+ R2values[i] =  fm.summary$r.squared
+  }
+
+
+}  else if (fit.method == "DLS"){
+  # define weight matrix 
+for (i in assets.names) {
+  reg.df = na.omit(reg.xts[, c(i, factors.names)])
+  t.length <- nrow(reg.df)
+  w <- rep(decay.factor^(t.length-1),t.length)
+  for (k in 2:t.length) {
+    w[k] = w[k-1]/decay.factor 
+  }   
+# sum weigth to unitary  
+ w <- w/sum(w) 
+ fm.formula = as.formula(paste(i,"~", ".", sep=""))                              
+ fm.fit = step(lm(fm.formula, data=reg.df,weight=w),trace=0)
+ fm.summary = summary(fm.fit)
+ reg.list[[i]] = fm.fit
+ Alphas[i] = coef(fm.fit)[1]
+ Betas.names = names(coef(fm.fit)[-1])
+ Betas[i,Betas.names] = coef(fm.fit)[-1]
+ ResidVars[i] = fm.summary$sigma^2
+ R2values[i] =  fm.summary$r.squared
+ } 
+
+} else if (fit.method=="Robust") {  
+  for (i in assets.names) {
+ assign("reg.df" , na.omit(reg.xts[, c(i, factors.names)]),envir = .GlobalEnv )
+ fm.formula = as.formula(paste(i,"~", ".", sep=" "))
+ lmRob.obj <- lmRob(fm.formula, data=reg.df)
+ fm.fit = step.lmRob(lmRob.obj,trace=FALSE)
+ fm.summary = summary(fm.fit)
+ reg.list[[i]] = fm.fit
+ Alphas[i] = coef(fm.fit)[1]
+ Betas.names = names(coef(fm.fit)[-1])
+ Betas[i,Betas.names] = coef(fm.fit)[-1]
+ ResidVars[i] = fm.summary$sigma^2
+ R2values[i] =  fm.summary$r.squared
+  }
+
+}
+  
+} else if (variable.selection == "lar" || variable.selection == "lasso") {
+  # use min Cp as criteria to choose predictors
+  
+    for (i in assets.names) {
+ reg.df = na.omit(reg.xts[, c(i, factors.names)])
+ reg.df = as.matrix(reg.df)
+ lars.fit = lars(reg.df[,factors.names],reg.df[,i],type=variable.selection,trace=FALSE)
+ sum.lars <- summary(lars.fit)
+ if (lars.criteria == "Cp") {
+ s<- which.min(sum.lars$Cp)
+ } else {
+ lars.cv <- cv.lars(reg.df[,factors.names],reg.df[,i],trace=FALSE,
+                    type=variable.selection,mode="step",plot.it=FALSE)
+ s<- which.min(lars.cv$cv)
+   }
+ coef.lars <- predict(lars.fit,s=s,type="coef",mode="step")
+ reg.list[[i]] = lars.fit
+ fitted <- predict(lars.fit,reg.df[,factors.names],s=s,type="fit",mode="step")
+ Alphas[i] = (fitted$fit - reg.df[,factors.names]%*%coef.lars$coefficients)[1]
+ Betas.names = names(coef.lars$coefficients)
+ Betas[i,Betas.names] = coef.lars$coefficients
+ ResidVars[i] = sum.lars$Rss[s]/(nrow(reg.df)-s)
+ R2values[i] =  lars.fit$R2[s]
+  } 
+ 
+  }  else  {
+  stop("wrong method")
+}
+  
+
+  
+  
+  
+  # return results
+# add option to return list
+ans = list (asset.fit = reg.list,
+            alpha.vec = Alphas,
+            beta.mat  = Betas,
+            r2.vec    = R2values,
+            residVars.vec = ResidVars,
+            call      = this.call,
+            ret.assets = ret.assets,
+            factors   = factors,
+            variable.selection = variable.selection
+            )
+class(ans) = "MacroFactorModel"
+return(ans)
+}
+



More information about the Returnanalytics-commits mailing list