[Returnanalytics-commits] r3556 - in pkg/FactorAnalytics: R man vignettes
noreply at r-forge.r-project.org
noreply at r-forge.r-project.org
Fri Nov 21 13:26:00 CET 2014
Author: pragnya
Date: 2014-11-21 13:26:00 +0100 (Fri, 21 Nov 2014)
New Revision: 3556
Modified:
pkg/FactorAnalytics/R/fitTsfm.R
pkg/FactorAnalytics/R/fitTsfm.control.R
pkg/FactorAnalytics/man/fitTsfm.Rd
pkg/FactorAnalytics/man/fitTsfm.control.Rd
pkg/FactorAnalytics/vignettes/fitTsfm_vignette.Rnw
pkg/FactorAnalytics/vignettes/fitTsfm_vignette.pdf
Log:
Removed subset.size control paramter for parsimony.
Modified: pkg/FactorAnalytics/R/fitTsfm.R
===================================================================
--- pkg/FactorAnalytics/R/fitTsfm.R 2014-11-21 11:25:12 UTC (rev 3555)
+++ pkg/FactorAnalytics/R/fitTsfm.R 2014-11-21 12:26:00 UTC (rev 3556)
@@ -159,7 +159,7 @@
#' fit.sub <- fitTsfm(asset.names=colnames(managers[,(1:6)]),
#' factor.names=colnames(managers[,(7:9)]),
#' data=managers, variable.selection="subsets",
-#' method="exhaustive", subset.size=2)
+#' method="exhaustive", nvmin=2)
#'
#' # example using "lars" variable selection and subtracting risk-free rate
#' fit.lar <- fitTsfm(asset.names=colnames(managers[,(1:6)]),
@@ -199,7 +199,6 @@
# extract arguments to pass to different fit and variable selection functions
decay <- control$decay
nvmin <- control$nvmin
- subset.size <- control$subset.size
lars.criterion <- control$lars.criterion
m1 <- match(c("weights","model","x","y","qr"),
names(control), 0L)
@@ -270,7 +269,7 @@
} else if (variable.selection == "subsets") {
reg.list <- SelectAllSubsets(dat.xts, asset.names, factor.names, fit.method,
lm.args, lmRob.args, regsubsets.args,
- nvmin, subset.size, decay)
+ nvmin, decay)
} else if (variable.selection == "lars") {
result.lars <- SelectLars(dat.xts, asset.names, factor.names, lars.args,
cv.lars.args, lars.criterion)
@@ -368,7 +367,7 @@
#
SelectAllSubsets <- function(dat.xts, asset.names, factor.names, fit.method,
lm.args, lmRob.args, regsubsets.args, nvmin,
- subset.size, decay) {
+ decay) {
# initialize list object to hold the fitted objects
reg.list <- list()
@@ -390,16 +389,12 @@
regsubsets.args))
sum.sub <- summary(fm.subsets)
- # choose best model of a given subset.size (or)
+ # choose best model of a given subset size nvmax=nvmin (or)
# best model amongst subset sizes in [nvmin, nvmax]
- if (!is.null(subset.size)) {
- names.sub <- names(which(sum.sub$which[subset.size,-1]==TRUE))
- bic <- sum.sub$bic[subset.size - nvmin + 1]
- } else {
- best.size <- which.min(sum.sub$bic[nvmin:length(sum.sub$bic)]) + nvmin -1
- names.sub <- names(which(sum.sub$which[best.size,-1]==TRUE))
- bic <- min(sum.sub$bic[nvmin:length(sum.sub$bic)])
- }
+ nvmax <- length(sum.sub$bic)
+ best.size <- which.min(sum.sub$bic[nvmin:nvmax]) + nvmin -1
+ names.sub <- names(which(sum.sub$which[best.size,-1]==TRUE))
+ bic <- min(sum.sub$bic[nvmin:nvmax])
# completely remove NA cases for chosen subset
reg.xts <- na.omit(dat.xts[,c(i,names.sub)])
Modified: pkg/FactorAnalytics/R/fitTsfm.control.R
===================================================================
--- pkg/FactorAnalytics/R/fitTsfm.control.R 2014-11-21 11:25:12 UTC (rev 3555)
+++ pkg/FactorAnalytics/R/fitTsfm.control.R 2014-11-21 12:26:00 UTC (rev 3556)
@@ -77,10 +77,6 @@
#' "exhaustive".
#' @param really.big option for \code{"subsets"}; Must be \code{TRUE} to
#' perform exhaustive search on more than 50 variables.
-#' @param subset.size number of factors required in the factor model; an
-#' option for \code{"subsets"} variable selection. \code{NULL} selects the
-#' best model (BIC) from amongst subset sizes in [\code{nvmin},\code{nvmax}].
-#' Default is \code{NULL}.
#' @param type option for \code{"lars"}. One of "lasso", "lar",
#' "forward.stagewise" or "stepwise". The names can be abbreviated to any
#' unique substring. Default is "lasso".
@@ -120,7 +116,7 @@
#' @examples
#'
#' # check argument list passed by fitTsfm.control
-#' tsfm.ctrl <- fitTsfm.control(method="exhaustive", subset.size=2)
+#' tsfm.ctrl <- fitTsfm.control(method="exhaustive", nvmin=2)
#' print(tsfm.ctrl)
#'
#' # used internally by fitTsfm
@@ -128,7 +124,7 @@
#' fit <- fitTsfm(asset.names=colnames(managers[,(1:6)]),
#' factor.names=colnames(managers[,(7:9)]),
#' data=managers, variable.selection="subsets",
-#' method="exhaustive", subset.size=2)
+#' method="exhaustive", nvmin=2)
#'
#' @export
@@ -136,8 +132,8 @@
qr=TRUE, nrep=NULL, scope, scale, direction,
trace=FALSE, steps=1000, k=2, nvmin=1, nvmax=8,
force.in=NULL, force.out=NULL, method,
- really.big=FALSE, subset.size=NULL, type,
- normalize=TRUE, eps=.Machine$double.eps, max.steps,
+ really.big=FALSE, type, normalize=TRUE,
+ eps=.Machine$double.eps, max.steps,
lars.criterion="Cp", K=10) {
# get the user-specified arguments (that have no defaults)
@@ -172,14 +168,12 @@
if (!is.logical(really.big) || length(really.big) != 1) {
stop("Invalid argument: control parameter 'really.big' must be logical")
}
- if (!is.null(subset.size)) {
- if (subset.size <= 0 || round(subset.size) != subset.size) {
- stop("Control parameter 'subset.size' must be a positive integer or NULL")
- }
- if (nvmax < subset.size || subset.size < length(force.in)) {
- stop("Invaid Argument: nvmax should be >= subset.size and subset.size
+ if (nvmin <= 0 || round(nvmin) != nvmin) {
+ stop("Control parameter 'nvmin' must be a positive integer")
+ }
+ if (nvmax < nvmin || nvmin < length(force.in)) {
+ stop("Invaid Argument: nvmax should be >= nvmin and nvmin
should be >= length(force.in)")
- }
}
if (!is.logical(normalize) || length(normalize) != 1) {
stop("Invalid argument: control parameter 'normalize' must be logical")
@@ -192,8 +186,7 @@
result <- c(args, list(decay=decay, model=model, x=x, y=y, qr=qr, nrep=nrep,
trace=trace, steps=steps, k=k, nvmin=nvmin,
nvmax=nvmax, force.in=force.in, force.out=force.out,
- really.big=really.big, subset.size=subset.size,
- normalize=normalize, eps=eps,
+ really.big=really.big, normalize=normalize, eps=eps,
lars.criterion=lars.criterion, K=K))
return(result)
}
Modified: pkg/FactorAnalytics/man/fitTsfm.Rd
===================================================================
--- pkg/FactorAnalytics/man/fitTsfm.Rd 2014-11-21 11:25:12 UTC (rev 3555)
+++ pkg/FactorAnalytics/man/fitTsfm.Rd 2014-11-21 12:26:00 UTC (rev 3556)
@@ -160,7 +160,7 @@
fit.sub <- fitTsfm(asset.names=colnames(managers[,(1:6)]),
factor.names=colnames(managers[,(7:9)]),
data=managers, variable.selection="subsets",
- method="exhaustive", subset.size=2)
+ method="exhaustive", nvmin=2)
# example using "lars" variable selection and subtracting risk-free rate
fit.lar <- fitTsfm(asset.names=colnames(managers[,(1:6)]),
Modified: pkg/FactorAnalytics/man/fitTsfm.control.Rd
===================================================================
--- pkg/FactorAnalytics/man/fitTsfm.control.Rd 2014-11-21 11:25:12 UTC (rev 3555)
+++ pkg/FactorAnalytics/man/fitTsfm.control.Rd 2014-11-21 12:26:00 UTC (rev 3556)
@@ -6,9 +6,9 @@
fitTsfm.control(decay = 0.95, weights, model = TRUE, x = FALSE,
y = FALSE, qr = TRUE, nrep = NULL, scope, scale, direction,
trace = FALSE, steps = 1000, k = 2, nvmin = 1, nvmax = 8,
- force.in = NULL, force.out = NULL, method, really.big = FALSE,
- subset.size = NULL, type, normalize = TRUE, eps = .Machine$double.eps,
- max.steps, lars.criterion = "Cp", K = 10)
+ force.in = NULL, force.out = NULL, method, really.big = FALSE, type,
+ normalize = TRUE, eps = .Machine$double.eps, max.steps,
+ lars.criterion = "Cp", K = 10)
}
\arguments{
\item{decay}{a scalar in (0, 1] to specify the decay factor for "DLS".
@@ -76,11 +76,6 @@
\item{really.big}{option for \code{"subsets"}; Must be \code{TRUE} to
perform exhaustive search on more than 50 variables.}
-\item{subset.size}{number of factors required in the factor model; an
-option for \code{"subsets"} variable selection. \code{NULL} selects the
-best model (BIC) from amongst subset sizes in [\code{nvmin},\code{nvmax}].
-Default is \code{NULL}.}
-
\item{type}{option for \code{"lars"}. One of "lasso", "lar",
"forward.stagewise" or "stepwise". The names can be abbreviated to any
unique substring. Default is "lasso".}
@@ -144,7 +139,7 @@
}
\examples{
# check argument list passed by fitTsfm.control
-tsfm.ctrl <- fitTsfm.control(method="exhaustive", subset.size=2)
+tsfm.ctrl <- fitTsfm.control(method="exhaustive", nvmin=2)
print(tsfm.ctrl)
# used internally by fitTsfm
@@ -152,7 +147,7 @@
fit <- fitTsfm(asset.names=colnames(managers[,(1:6)]),
factor.names=colnames(managers[,(7:9)]),
data=managers, variable.selection="subsets",
- method="exhaustive", subset.size=2)
+ method="exhaustive", nvmin=2)
}
\author{
Sangeetha Srinivasan
Modified: pkg/FactorAnalytics/vignettes/fitTsfm_vignette.Rnw
===================================================================
--- pkg/FactorAnalytics/vignettes/fitTsfm_vignette.Rnw 2014-11-21 11:25:12 UTC (rev 3555)
+++ pkg/FactorAnalytics/vignettes/fitTsfm_vignette.Rnw 2014-11-21 12:26:00 UTC (rev 3556)
@@ -178,7 +178,7 @@
fit.sub <- fitTsfm(asset.names=colnames(managers[,(1:6)]),
factor.names=colnames(managers[,(7:9)]), data=managers,
rf.name="US 3m TR", mkt.name="SP500 TR",
- variable.selection="subsets", subset.size=4)
+ variable.selection="subsets", nvmin=4, nvmax=4)
fit.sub$beta
fit.sub$r2
@
@@ -213,8 +213,7 @@
There are 4 other arguments passed to \code{fitTsfm.control} that determine the type of factor model fit chosen.
\begin{itemize}
\item \verb"decay": Determines the decay factor for \code{fit.method="DLS"}, which performs exponentially weighted least squares, with weights adding to unity.
-\item \verb"nvmin": The lower limit for the range of subset sizes from which the best model (BIC) is found when performing \verb"subsets" selection. Note that the upper limit was already passed to \verb"regsubsets" function.
-\item \verb"subset.size": Number of factors required in the factor model when performing \verb"subsets" selection. This might be meaningful when looking for the best model of a certain size (perhaps for parsimony, perhaps to compare with a different model of the same size, perhaps to avoid over-fitting/ data dredging etc.) Alternately, users can specify \code{NULL} to get the best model from amongst subset sizes in the range \code{[nvmin,nvmax]}.
+\item \verb"nvmin": The lower limit for the range of subset sizes from which the best model (BIC) is found when performing \verb"subsets" selection. Note that the upper limit was already passed to \verb"regsubsets" function. By specifying \code{nvmin=nvmax}, users can obtain the best model of a particular size (meaningful to those who want a parsimonious model, or to compare with a different model of the same size, or perhaps to avoid over-fitting/ data dredging etc.).
\item \verb"lars.criterion": An option (one of "Cp" or "cv") to assess model selection for the \code{"lars"} variable selection method. "Cp" is Mallow's Cp statistic and "cv" is K-fold cross-validated mean squared prediction error.
\end{itemize}
Modified: pkg/FactorAnalytics/vignettes/fitTsfm_vignette.pdf
===================================================================
(Binary files differ)
More information about the Returnanalytics-commits
mailing list