[Returnanalytics-commits] r3556 - in pkg/FactorAnalytics: R man vignettes

Fri Nov 21 13:26:00 CET 2014

Author: pragnya
Date: 2014-11-21 13:26:00 +0100 (Fri, 21 Nov 2014)
New Revision: 3556

Modified:
   pkg/FactorAnalytics/R/fitTsfm.R
   pkg/FactorAnalytics/R/fitTsfm.control.R
   pkg/FactorAnalytics/man/fitTsfm.Rd
   pkg/FactorAnalytics/man/fitTsfm.control.Rd
   pkg/FactorAnalytics/vignettes/fitTsfm_vignette.Rnw
   pkg/FactorAnalytics/vignettes/fitTsfm_vignette.pdf
Log:
Removed subset.size control paramter for parsimony.

Modified: pkg/FactorAnalytics/R/fitTsfm.R
===================================================================

--- pkg/FactorAnalytics/R/fitTsfm.R	2014-11-21 11:25:12 UTC (rev 3555)
+++ pkg/FactorAnalytics/R/fitTsfm.R	2014-11-21 12:26:00 UTC (rev 3556)
@@ -159,7 +159,7 @@
 #' fit.sub <- fitTsfm(asset.names=colnames(managers[,(1:6)]),
 #'                    factor.names=colnames(managers[,(7:9)]), 
 #'                    data=managers, variable.selection="subsets", 
-#'                    method="exhaustive", subset.size=2) 
+#'                    method="exhaustive", nvmin=2) 
 #' 
 #' # example using "lars" variable selection and subtracting risk-free rate
 #' fit.lar <- fitTsfm(asset.names=colnames(managers[,(1:6)]),
@@ -199,7 +199,6 @@
   # extract arguments to pass to different fit and variable selection functions
   decay <- control$decay
   nvmin <- control$nvmin
-  subset.size <- control$subset.size
   lars.criterion <- control$lars.criterion
   m1 <- match(c("weights","model","x","y","qr"), 
               names(control), 0L)
@@ -270,7 +269,7 @@
   } else if (variable.selection == "subsets") {
     reg.list <- SelectAllSubsets(dat.xts, asset.names, factor.names, fit.method, 
                                  lm.args, lmRob.args, regsubsets.args, 
-                                 nvmin, subset.size, decay)
+                                 nvmin, decay)
   } else if (variable.selection == "lars") {
     result.lars <- SelectLars(dat.xts, asset.names, factor.names, lars.args, 
                               cv.lars.args, lars.criterion)
@@ -368,7 +367,7 @@
 #
 SelectAllSubsets <- function(dat.xts, asset.names, factor.names, fit.method, 
                              lm.args, lmRob.args, regsubsets.args, nvmin, 
-                             subset.size, decay) {
+                             decay) {
   
   # initialize list object to hold the fitted objects
   reg.list <- list()
@@ -390,16 +389,12 @@
                                         regsubsets.args))
     sum.sub <- summary(fm.subsets)
     
-    # choose best model of a given subset.size (or) 
+    # choose best model of a given subset size nvmax=nvmin (or) 
     # best model amongst subset sizes in [nvmin, nvmax]
-    if (!is.null(subset.size)) { 
-      names.sub <- names(which(sum.sub$which[subset.size,-1]==TRUE))
-      bic <- sum.sub$bic[subset.size - nvmin + 1]
-    } else { 
-      best.size <- which.min(sum.sub$bic[nvmin:length(sum.sub$bic)]) + nvmin -1
-      names.sub <- names(which(sum.sub$which[best.size,-1]==TRUE))
-      bic <- min(sum.sub$bic[nvmin:length(sum.sub$bic)])
-    }
+    nvmax <- length(sum.sub$bic)
+    best.size <- which.min(sum.sub$bic[nvmin:nvmax]) + nvmin -1
+    names.sub <- names(which(sum.sub$which[best.size,-1]==TRUE))
+    bic <- min(sum.sub$bic[nvmin:nvmax])
     
     # completely remove NA cases for chosen subset
     reg.xts <- na.omit(dat.xts[,c(i,names.sub)])

Modified: pkg/FactorAnalytics/R/fitTsfm.control.R
===================================================================
--- pkg/FactorAnalytics/R/fitTsfm.control.R	2014-11-21 11:25:12 UTC (rev 3555)
+++ pkg/FactorAnalytics/R/fitTsfm.control.R	2014-11-21 12:26:00 UTC (rev 3556)
@@ -77,10 +77,6 @@
 #' "exhaustive".
 #' @param really.big option for \code{"subsets"}; Must be \code{TRUE} to 
 #' perform exhaustive search on more than 50 variables.
-#' @param subset.size number of factors required in the factor model; an 
-#' option for \code{"subsets"} variable selection. \code{NULL} selects the 
-#' best model (BIC) from amongst subset sizes in [\code{nvmin},\code{nvmax}]. 
-#' Default is \code{NULL}.
 #' @param type option for \code{"lars"}. One of "lasso", "lar", 
 #' "forward.stagewise" or "stepwise". The names can be abbreviated to any 
 #' unique substring. Default is "lasso".
@@ -120,7 +116,7 @@
 #' @examples
 #' 
 #' # check argument list passed by fitTsfm.control
-#' tsfm.ctrl <- fitTsfm.control(method="exhaustive", subset.size=2)
+#' tsfm.ctrl <- fitTsfm.control(method="exhaustive", nvmin=2)
 #' print(tsfm.ctrl)
 #' 
 #' # used internally by fitTsfm
@@ -128,7 +124,7 @@
 #' fit <- fitTsfm(asset.names=colnames(managers[,(1:6)]),
 #'                factor.names=colnames(managers[,(7:9)]), 
 #'                data=managers, variable.selection="subsets", 
-#'                method="exhaustive", subset.size=2)
+#'                method="exhaustive", nvmin=2)
 #' 
 #' @export
 
@@ -136,8 +132,8 @@
                             qr=TRUE, nrep=NULL, scope, scale, direction, 
                             trace=FALSE, steps=1000, k=2, nvmin=1, nvmax=8, 
                             force.in=NULL, force.out=NULL, method, 
-                            really.big=FALSE, subset.size=NULL, type, 
-                            normalize=TRUE, eps=.Machine$double.eps, max.steps, 
+                            really.big=FALSE, type, normalize=TRUE, 
+                            eps=.Machine$double.eps, max.steps, 
                             lars.criterion="Cp", K=10) {
   
   # get the user-specified arguments (that have no defaults)
@@ -172,14 +168,12 @@
   if (!is.logical(really.big) || length(really.big) != 1) {
     stop("Invalid argument: control parameter 'really.big' must be logical")
   }
-  if (!is.null(subset.size)) {
-    if (subset.size <= 0 || round(subset.size) != subset.size) {
-      stop("Control parameter 'subset.size' must be a positive integer or NULL")
-    }
-    if (nvmax < subset.size || subset.size < length(force.in)) {
-      stop("Invaid Argument: nvmax should be >= subset.size and subset.size 
+  if (nvmin <= 0 || round(nvmin) != nvmin) {
+    stop("Control parameter 'nvmin' must be a positive integer")
+  }
+  if (nvmax < nvmin || nvmin < length(force.in)) {
+    stop("Invaid Argument: nvmax should be >= nvmin and nvmin 
            should be >= length(force.in)")
-    }
   }
   if (!is.logical(normalize) || length(normalize) != 1) {
     stop("Invalid argument: control parameter 'normalize' must be logical")
@@ -192,8 +186,7 @@
   result <- c(args, list(decay=decay, model=model, x=x, y=y, qr=qr, nrep=nrep, 
                          trace=trace, steps=steps, k=k, nvmin=nvmin, 
                          nvmax=nvmax, force.in=force.in, force.out=force.out, 
-                         really.big=really.big, subset.size=subset.size, 
-                         normalize=normalize, eps=eps, 
+                         really.big=really.big, normalize=normalize, eps=eps, 
                          lars.criterion=lars.criterion, K=K))
   return(result)
 }

Modified: pkg/FactorAnalytics/man/fitTsfm.Rd
===================================================================
--- pkg/FactorAnalytics/man/fitTsfm.Rd	2014-11-21 11:25:12 UTC (rev 3555)
+++ pkg/FactorAnalytics/man/fitTsfm.Rd	2014-11-21 12:26:00 UTC (rev 3556)
@@ -160,7 +160,7 @@
 fit.sub <- fitTsfm(asset.names=colnames(managers[,(1:6)]),
                    factor.names=colnames(managers[,(7:9)]),
                    data=managers, variable.selection="subsets",
-                   method="exhaustive", subset.size=2)
+                   method="exhaustive", nvmin=2)
 
 # example using "lars" variable selection and subtracting risk-free rate
 fit.lar <- fitTsfm(asset.names=colnames(managers[,(1:6)]),

Modified: pkg/FactorAnalytics/man/fitTsfm.control.Rd
===================================================================
--- pkg/FactorAnalytics/man/fitTsfm.control.Rd	2014-11-21 11:25:12 UTC (rev 3555)
+++ pkg/FactorAnalytics/man/fitTsfm.control.Rd	2014-11-21 12:26:00 UTC (rev 3556)
@@ -6,9 +6,9 @@
 fitTsfm.control(decay = 0.95, weights, model = TRUE, x = FALSE,
   y = FALSE, qr = TRUE, nrep = NULL, scope, scale, direction,
   trace = FALSE, steps = 1000, k = 2, nvmin = 1, nvmax = 8,
-  force.in = NULL, force.out = NULL, method, really.big = FALSE,
-  subset.size = NULL, type, normalize = TRUE, eps = .Machine$double.eps,
-  max.steps, lars.criterion = "Cp", K = 10)
+  force.in = NULL, force.out = NULL, method, really.big = FALSE, type,
+  normalize = TRUE, eps = .Machine$double.eps, max.steps,
+  lars.criterion = "Cp", K = 10)
 }
 \arguments{
 \item{decay}{a scalar in (0, 1] to specify the decay factor for "DLS".
@@ -76,11 +76,6 @@
 \item{really.big}{option for \code{"subsets"}; Must be \code{TRUE} to
 perform exhaustive search on more than 50 variables.}
 
-\item{subset.size}{number of factors required in the factor model; an
-option for \code{"subsets"} variable selection. \code{NULL} selects the
-best model (BIC) from amongst subset sizes in [\code{nvmin},\code{nvmax}].
-Default is \code{NULL}.}
-
 \item{type}{option for \code{"lars"}. One of "lasso", "lar",
 "forward.stagewise" or "stepwise". The names can be abbreviated to any
 unique substring. Default is "lasso".}
@@ -144,7 +139,7 @@
 }
 \examples{
 # check argument list passed by fitTsfm.control
-tsfm.ctrl <- fitTsfm.control(method="exhaustive", subset.size=2)
+tsfm.ctrl <- fitTsfm.control(method="exhaustive", nvmin=2)
 print(tsfm.ctrl)
 
 # used internally by fitTsfm
@@ -152,7 +147,7 @@
 fit <- fitTsfm(asset.names=colnames(managers[,(1:6)]),
                factor.names=colnames(managers[,(7:9)]),
                data=managers, variable.selection="subsets",
-               method="exhaustive", subset.size=2)
+               method="exhaustive", nvmin=2)
 }
 \author{
 Sangeetha Srinivasan

Modified: pkg/FactorAnalytics/vignettes/fitTsfm_vignette.Rnw
===================================================================
--- pkg/FactorAnalytics/vignettes/fitTsfm_vignette.Rnw	2014-11-21 11:25:12 UTC (rev 3555)
+++ pkg/FactorAnalytics/vignettes/fitTsfm_vignette.Rnw	2014-11-21 12:26:00 UTC (rev 3556)
@@ -178,7 +178,7 @@
 fit.sub <- fitTsfm(asset.names=colnames(managers[,(1:6)]), 
                    factor.names=colnames(managers[,(7:9)]), data=managers, 
                    rf.name="US 3m TR", mkt.name="SP500 TR", 
-                   variable.selection="subsets", subset.size=4)
+                   variable.selection="subsets", nvmin=4, nvmax=4)
 fit.sub$beta
 fit.sub$r2
 @
@@ -213,8 +213,7 @@
 There are 4 other arguments passed to \code{fitTsfm.control} that determine the type of factor model fit chosen.
 \begin{itemize}
 \item \verb"decay": Determines the decay factor for \code{fit.method="DLS"}, which performs exponentially weighted least squares, with weights adding to unity.
-\item \verb"nvmin": The lower limit for the range of subset sizes from which the best model (BIC) is found when performing \verb"subsets" selection. Note that the upper limit was already passed to \verb"regsubsets" function.
-\item \verb"subset.size": Number of factors required in the factor model when performing \verb"subsets" selection. This might be meaningful when looking for the best model of a certain size (perhaps for parsimony, perhaps to compare with a different model of the same size, perhaps to avoid over-fitting/ data dredging etc.) Alternately, users can specify \code{NULL} to get the best model from amongst subset sizes in the range \code{[nvmin,nvmax]}.
+\item \verb"nvmin": The lower limit for the range of subset sizes from which the best model (BIC) is found when performing \verb"subsets" selection. Note that the upper limit was already passed to \verb"regsubsets" function. By specifying \code{nvmin=nvmax}, users can obtain the best model of a particular size (meaningful to those who want a parsimonious model, or to compare with a different model of the same size, or perhaps to avoid over-fitting/ data dredging etc.).
 \item \verb"lars.criterion": An option (one of "Cp" or "cv") to assess model selection for the \code{"lars"} variable selection method. "Cp" is Mallow's Cp statistic and "cv" is K-fold cross-validated mean squared prediction error.
 \end{itemize}
 

Modified: pkg/FactorAnalytics/vignettes/fitTsfm_vignette.pdf
===================================================================
(Binary files differ)