[Returnanalytics-commits] r2415 - in pkg/FactorAnalytics: R man

noreply at r-forge.r-project.org noreply at r-forge.r-project.org
Mon Jun 24 18:57:25 CEST 2013


Author: chenyian
Date: 2013-06-24 18:57:24 +0200 (Mon, 24 Jun 2013)
New Revision: 2415

Added:
   pkg/FactorAnalytics/R/fitTimeSeriesFactorModel.R
   pkg/FactorAnalytics/man/fitTimeseriesFactorModel.Rd
Modified:
   pkg/FactorAnalytics/R/fitStatisticalFactorModel.R
   pkg/FactorAnalytics/man/fitStatisticalFactorModel.Rd
Log:
1. fitMacroeconomicFactorModel.R changes name to fitTimeSeriesFactorModel.R
and same for .Rd file. 

2. add checkData into fitStatisticalFactorModel.R

Modified: pkg/FactorAnalytics/R/fitStatisticalFactorModel.R
===================================================================
--- pkg/FactorAnalytics/R/fitStatisticalFactorModel.R	2013-06-24 14:02:34 UTC (rev 2414)
+++ pkg/FactorAnalytics/R/fitStatisticalFactorModel.R	2013-06-24 16:57:24 UTC (rev 2415)
@@ -4,7 +4,8 @@
 #' mainly adapted from S+FinMetric function mfactor.
 #' 
 #' 
-#' @param x T x N assets returns data which is saved as data.frame class.
+#' @param data a vector, matrix, data.frame, xts, timeSeries or zoo object with asset returns 
+#' and factors retunrs rownames
 #' @param k numbers of factors if it is scalar or method of choosing optimal
 #' number of factors. "bn" represents Bai and Ng (2002) method and "ck"
 #' represents Connor and korajczyk (1993) method. Default is k = 1.
@@ -29,7 +30,7 @@
 #' \item{asset.ret}{asset returns}
 #' \item{asset.fit}{List of regression lm class of individual returns on
 #' factors.}
-#' \item{residVars.vec}{vector of residual variances.}
+#' \item{resid.variance}{vector of residual variances.}
 #' \item{mimic}{N x K matrix of factor mimicking portfolio returns.}
 #' @author Eric Zivot and Yi-An Chen
 #' @examples
@@ -53,7 +54,7 @@
 #' sfm.pca.fit$loadings
 #' sfm.pca.fit$r2
 #' sfm.pca.fit$residuals
-#' sfm.pca.fit$residVars.vec
+#' sfm.pca.fit$resid.variance
 #' sfm.pca.fit$mimic
 #' # apca
 #' sfm.apca.fit <- fitStatisticalFactorModel(sfm.apca.dat,k=1)
@@ -73,32 +74,41 @@
 #' sfm.apca.fit.ck$mimic
 #' 
 fitStatisticalFactorModel <-
-function(x, k = 1, refine = TRUE, check = FALSE, max.k = NULL, sig = 0.05, na.rm = FALSE){
+function(data, k = 1, refine = TRUE, check = FALSE, max.k = NULL, sig = 0.05, na.rm = FALSE, 
+         ckeckData.method = "xts" ){
 	
 # load package
 require(MASS)  
+require(PerformanceAnalytics)
+
+
+# check data 
+data.xts <- checkData(data,method=ckeckData.method) 
+
+# convert it to coredata
+
+
   
-  
  # function of test
- mfactor.test <- function(x, method = "bn", refine = TRUE, check = FALSE, max.k = NULL, sig = 0.05){
+ mfactor.test <- function(data, method = "bn", refine = TRUE, check = FALSE, max.k = NULL, sig = 0.05){
   
     if(is.null(max.k)) {
-		max.k <- min(10, nrow(x) - 1)
-	} 	else if (max.k >= nrow(x)) {
+		max.k <- min(10, nrow(data) - 1)
+	} 	else if (max.k >= nrow(data)) {
 		stop("max.k must be less than the number of observations.")
 	}
 	if(check) {
-		if(mfactor.check(x)) {
+		if(mfactor.check(data)) {
 			warning("Some variables have identical observations.")
 			return(list(factors = NA, loadings = NA, k = NA))
 		}
 	}
 	method <- casefold(method)
 	if(method == "bn") {
-		ans <- mfactor.bn(x, max.k, refine = refine)
+		ans <- mfactor.bn(data, max.k, refine = refine)
 	}
 	else if(method == "ck") {
-		ans <- mfactor.ck(x, max.k, refine = refine, sig = sig)
+		ans <- mfactor.ck(data, max.k, refine = refine, sig = sig)
 	}
 	else {
 		stop("Invalid choice for optional argument method.")
@@ -109,25 +119,25 @@
 
  
 # function of ck
-mfactor.ck <- function(x, max.k, sig = 0.05, refine = TRUE) {
+mfactor.ck <- function(data, max.k, sig = 0.05, refine = TRUE) {
   
-  n <- ncol(x)
-  m <- nrow(x)
+  n <- ncol(data)
+  m <- nrow(data)
 	idx <- 2 * (1:(m/2))
 	#
-	f <- mfactor.apca(x, k = 1, refine = refine, check = FALSE)
+	f <- mfactor.apca(data, k = 1, refine = refine, check = FALSE)
 	f1 <- cbind(1, f$factors)
 	B <- backsolve(chol(crossprod(f1)), diag(2))
-	eps <- x - f1 %*% crossprod(t(B)) %*% crossprod(f1, x)
+	eps <- data - f1 %*% crossprod(t(B)) %*% crossprod(f1, data)
 	s <- eps^2/(1 - 2/m - 1/n)
 	#	
 	for(i in 2:max.k) {
 		f.old <- f
 		s.old <- s
-		f <- mfactor.apca(x, k = i, refine = refine, check = FALSE)
+		f <- mfactor.apca(data, k = i, refine = refine, check = FALSE)
 		f1 <- cbind(1, f$factors)
 		B <- backsolve(chol(crossprod(f1)), diag(i + 1))
-		eps <- x - f1 %*% crossprod(t(B)) %*% crossprod(f1, x)
+		eps <- data - f1 %*% crossprod(t(B)) %*% crossprod(f1, data)
 		s <- eps^2/(1 - (i + 1)/m - i/n)
 		delta <- rowMeans(s.old[idx - 1,  , drop = FALSE]) - rowMeans(
 			s[idx,  , drop = FALSE])
@@ -139,8 +149,8 @@
 }
 
 # funciton of check  
-  mfactor.check <- function(x) {
-  temp <- apply(x, 2, range)
+  mfactor.check <- function(data) {
+  temp <- apply(data, 2, range)
   if(any(abs(temp[2,  ] - temp[1,  ]) < .Machine$single.eps)) {
 		TRUE
 	}
@@ -150,21 +160,21 @@
 }
 
   # function of bn
-  mfactor.bn <- function(x, max.k, refine = TRUE) {
+  mfactor.bn <- function(data, max.k, refine = TRUE) {
   
   # Parameters:
-	#         x : T x N return matrix
+	#         data : T x N return matrix
 	#     max.k : maxinum number of factors to be considered
 		# Returns:
 	#      k : the optimum number of factors
-	n <- ncol(x)
-	m <- nrow(x)
+	n <- ncol(data)
+	m <- nrow(data)
 	s <- vector("list", max.k) 
 	for(i in 1:max.k) {
-		f <- cbind(1, mfactor.apca(x, k = i, refine = refine, check = 
+		f <- cbind(1, mfactor.apca(data, k = i, refine = refine, check = 
 			FALSE)$factors)
 		B <- backsolve(chol(crossprod(f)), diag(i + 1))
-		eps <- x - f %*% crossprod(t(B)) %*% crossprod(f, x)
+		eps <- data - f %*% crossprod(t(B)) %*% crossprod(f, data)
 		sigma <- colSums(eps^2)/(m - i - 1)
 		s[[i]] <- mean(sigma)
 	}
@@ -177,27 +187,27 @@
 		warning("Cp1 and Cp2 did not yield same result. The smaller one is used."	)
 	}
 	k <- min(order(Cp1)[1], order(Cp2)[1])
-	f <- mfactor.apca(x, k = k, refine = refine, check = FALSE)
+	f <- mfactor.apca(data, k = k, refine = refine, check = FALSE)
  return(f)  
  }
 
   
   # function of pca
-  mfactor.pca <- function(x, k, check = FALSE, ret.cov = NULL) {
+  mfactor.pca <- function(data, k, check = FALSE, ret.cov = NULL) {
   
   if(check) {
-		if(mfactor.check(x)) {
+		if(mfactor.check(data)) {
 			warning("Some variables have identical observations.")
 			return(list(factors = NA, loadings = NA, k = NA))
 		}
 	}
-	n <- ncol(x)
-	m <- nrow(x)
-	if(is.null(dimnames(x))) {
-		dimnames(x) <- list(1:m, paste("V", 1:n, sep = "."))
+	n <- ncol(data)
+	m <- nrow(data)
+	if(is.null(dimnames(data))) {
+		dimnames(data) <- list(1:m, paste("V", 1:n, sep = "."))
 	}
-	x.names <- dimnames(x)[[2]]
-	xc <- t(t(x) - colMeans(x))
+	data.names <- dimnames(data)[[2]]
+	xc <- t(t(data) - colMeans(data))
 	if(is.null(ret.cov)) {
 		ret.cov <- crossprod(xc)/m
 	}
@@ -205,29 +215,29 @@
   # compute loadings beta
 	B <- t(eigen.tmp$vectors[, 1:k, drop = FALSE])
   # compute estimated factors
-	f <- x %*% eigen.tmp$vectors[, 1:k, drop = FALSE]
-	tmp <- x - f %*% B
+	f <- data %*% eigen.tmp$vectors[, 1:k, drop = FALSE]
+	tmp <- data - f %*% B
 	alpha <- colMeans(tmp)
   # compute residuals
 	tmp <- t(t(tmp) - alpha)
 	r2 <- (1 - colSums(tmp^2)/colSums(xc^2))
 	ret.cov <- t(B) %*% var(f) %*% B
 	diag(ret.cov) <- diag(ret.cov) + colSums(tmp^2)/(m - k - 1)
-	dimnames(B) <- list(paste("F", 1:k, sep = "."), x.names)
-	dimnames(f) <- list(dimnames(x)[[1]], paste("F", 1:k, sep = "."))
-	dimnames(ret.cov) <- list(x.names, x.names)
-	names(alpha) <- x.names
+	dimnames(B) <- list(paste("F", 1:k, sep = "."), data.names)
+	dimnames(f) <- list(dimnames(data)[[1]], paste("F", 1:k, sep = "."))
+	dimnames(ret.cov) <- list(data.names, data.names)
+	names(alpha) <- data.names
   # create lm list for plot
   reg.list = list()
-  for (i in x.names) {
-    reg.df = as.data.frame(cbind(x[,i],f))
+  for (i in data.names) {
+    reg.df = as.data.frame(cbind(data[,i],f))
     colnames(reg.df)[1] <- i
     fm.formula = as.formula(paste(i,"~", ".", sep=" "))
     fm.fit = lm(fm.formula, data=reg.df)
     reg.list[[i]] = fm.fit
     }
 	ans <-  list(factors = f, loadings = B, k = k, alpha = alpha, ret.cov = ret.cov,
-	            	r2 = r2, eigen = eigen.tmp$values, residuals=tmp, asset.ret = x,
+	            	r2 = r2, eigen = eigen.tmp$values, residuals=tmp, asset.ret = data,
                asset.fit=reg.list)
  
   return(ans)
@@ -235,21 +245,21 @@
 }
 
   # funciont of apca
-  mfactor.apca <- function(x, k, refine = TRUE, check = FALSE, ret.cov = NULL) {
+  mfactor.apca <- function(data, k, refine = TRUE, check = FALSE, ret.cov = NULL) {
   
   if(check) {
-		if(mfactor.check(x)) {
+		if(mfactor.check(data)) {
 			warning("Some variables have identical observations.")
 			return(list(factors = NA, loadings = NA, k = NA))
 		}
 	}
-	n <- ncol(x)
-	m <- nrow(x)
-	if(is.null(dimnames(x))) {
-		dimnames(x) <- list(1:m, paste("V", 1:n, sep = "."))
+	n <- ncol(data)
+	m <- nrow(data)
+	if(is.null(dimnames(data))) {
+		dimnames(data) <- list(1:m, paste("V", 1:n, sep = "."))
 	}
-	x.names <- dimnames(x)[[2]]
-	xc <- t(t(x) - colMeans(x))
+	data.names <- dimnames(data)[[2]]
+	xc <- t(t(data) - colMeans(data))
 	if(is.null(ret.cov)) {
 		ret.cov <- crossprod(t(xc))/n
 	}
@@ -257,8 +267,8 @@
 	f <- eig.tmp$vectors[, 1:k, drop = FALSE]
 	f1 <- cbind(1, f)
 	B <- backsolve(chol(crossprod(f1)), diag(k + 1))
-	B <- crossprod(t(B)) %*% crossprod(f1, x)
-	sigma <- colSums((x - f1 %*% B)^2)/(m - k - 1)
+	B <- crossprod(t(B)) %*% crossprod(f1, data)
+	sigma <- colSums((data - f1 %*% B)^2)/(m - k - 1)
 	if(refine) {
 		xs <- t(xc)/sqrt(sigma)
 		ret.cov <- crossprod(xs)/n
@@ -266,52 +276,52 @@
 		f <- eig.tmp$vectors[, 1:k, drop = FALSE]
 		f1 <- cbind(1, f)
 		B <- backsolve(chol(crossprod(f1)), diag(k + 1))
-		B <- crossprod(t(B)) %*% crossprod(f1, x)
-		sigma <- colSums((x - f1 %*% B)^2)/(m - k - 1)
+		B <- crossprod(t(B)) %*% crossprod(f1, data)
+		sigma <- colSums((data - f1 %*% B)^2)/(m - k - 1)
 	}
 	alpha <- B[1,  ]
 	B <- B[-1,  , drop = FALSE]
 	ret.cov <- t(B) %*% var(f) %*% B
 	diag(ret.cov) <- diag(ret.cov) + sigma
-	dimnames(B) <- list(paste("F", 1:k, sep = "."), x.names)
-	dimnames(f) <- list(dimnames(x)[[1]], paste("F", 1:k, sep = "."))
-	names(alpha) <- x.names
-	res <- t(t(x) - alpha) - f %*% B
+	dimnames(B) <- list(paste("F", 1:k, sep = "."), data.names)
+	dimnames(f) <- list(dimnames(data)[[1]], paste("F", 1:k, sep = "."))
+	names(alpha) <- data.names
+	res <- t(t(data) - alpha) - f %*% B
 	r2 <- (1 - colSums(res^2)/colSums(xc^2))
   ans <- 	list(factors = f, loadings = B, k = k, alpha = alpha, ret.cov = ret.cov,
-		           r2 = r2, eigen = eig.tmp$values, residuals=res,asset.ret = x)
+		           r2 = r2, eigen = eig.tmp$values, residuals=res,asset.ret = data)
  return(ans)
 }
 
   call <- match.call()  
-  pos <- rownames(x)
-	x <- as.matrix(x)
-	if(any(is.na(x))) {
+  pos <- rownames(data)
+	data <- as.matrix(data)
+	if(any(is.na(data))) {
 		if(na.rm) {
-			x <- na.omit(x)
+			data <- na.omit(data)
 		}		else {
 			stop("Missing values are not allowed if na.rm=F.")
 		}
 	}
 	# use PCA if T > N
-	if(ncol(x) < nrow(x)) {
+	if(ncol(data) < nrow(data)) {
 		if(is.character(k)) {
 			stop("k must be the number of factors for PCA.")
 		}
-		if(k >= ncol(x)) {
+		if(k >= ncol(data)) {
 			stop("Number of factors must be smaller than number of variables."
 				)
 		}
-		ans <- mfactor.pca(x, k, check = check)
+		ans <- mfactor.pca(data, k, check = check)
 	}	else if(is.character(k)) {
-		ans <- mfactor.test(x, k, refine = refine, check = 
+		ans <- mfactor.test(data, k, refine = refine, check = 
 			check, max.k = max.k, sig = sig)
 	}	else { # use aPCA if T <= N
-		if(k >= ncol(x)) {
+		if(k >= ncol(data)) {
 			stop("Number of factors must be smaller than number of variables."
 				)
 		}
-		ans <- mfactor.apca(x, k, refine = refine, check = 
+		ans <- mfactor.apca(data, k, refine = refine, check = 
 			check)
 	}
   
@@ -322,17 +332,17 @@
 		f <- as.matrix(f)
 	}
 
-	if(nrow(x) < ncol(x)) {
-		mimic <- ginv(x) %*% f
+	if(nrow(data) < ncol(data)) {
+		mimic <- ginv(data) %*% f
 	}	else {
-		mimic <- qr.solve(x, f)
+		mimic <- qr.solve(data, f)
 	}
 	
   mimic <- t(t(mimic)/colSums(mimic))
-	dimnames(mimic)[[1]] <- dimnames(x)[[2]]
+	dimnames(mimic)[[1]] <- dimnames(data)[[2]]
   
   ans$mimic <- mimic
-  ans$residVars.vec <- apply(ans$residuals,2,var)
+  ans$resid.variance <- apply(ans$residuals,2,var)
   ans$call <- call
 class(ans) <- "StatFactorModel"
   return(ans)

Added: pkg/FactorAnalytics/R/fitTimeSeriesFactorModel.R
===================================================================
--- pkg/FactorAnalytics/R/fitTimeSeriesFactorModel.R	                        (rev 0)
+++ pkg/FactorAnalytics/R/fitTimeSeriesFactorModel.R	2013-06-24 16:57:24 UTC (rev 2415)
@@ -0,0 +1,372 @@
+#' Fit time series factor model by time series regression techniques.
+#' 
+#' Fit time series factor model by time series regression techniques. It
+#' creates the class of "TimeSeriesFactorModel".
+#' 
+#' If \code{Robust} is chosen, there is no subsets but all factors will be
+#' used.  Cp is defined in
+#' http://www-stat.stanford.edu/~hastie/Papers/LARS/LeastAngle_2002.pdf. p17.
+#' 
+#' @param assets.names  names of assets returns.
+#' @param factors.names names of factors returns.
+#' @param num.factor.subset scalar. Number of factors selected by all subsets.
+#' @param data a vector, matrix, data.frame, xts, timeSeries or zoo object with asset returns 
+#' and factors retunrs rownames 
+#' @param fit.method "OLS" is ordinary least squares method, "DLS" is
+#' discounted least squares method. Discounted least squares (DLS) estimation
+#' is weighted least squares estimation with exponentially declining weights
+#' that sum to unity. "Robust"
+#' @param variable.selection "none" will not activate variables sellection. Default is "none".
+#' "stepwise" is traditional forward/backward #' stepwise OLS regression, starting from the initial set of factors, that adds
+#' factors only if the regression fit as measured by the Bayesian Information
+#' Criteria (BIC) or Akaike Information Criteria (AIC) can be done using the R
+#' function step() from the stats package. If "Robust" is chosen, the
+#' function step.lmRob in Robust package will be used. "all subsets" is
+#' Traditional all subsets regression can be done using the R function
+#' regsubsets() from the package leaps. "lar" , "lasso" is based on package
+#' "lars", linear angle regression. If "lar" or "lasso" is chose. fit.method will be ignored. 
+#' @param decay.factor for DLS. Default is 0.95.
+#' @param nvmax control option for all subsets. maximum size of subsets to
+#' examine
+#' @param force.in control option for all subsets. The factors that should be
+#' in all models.
+#' @param subsets.method control option for all subsets. se exhaustive search,
+#' forward selection, backward selection or sequential replacement to search.
+#' @param lars.criteria either choose minimum "Cp": unbiased estimator of the
+#' true rist or "cv" 10 folds cross-validation. See detail.
+#' @return an S3 object containing
+#'   \item{asset.fit}{Fit objects for each asset. This is the class "lm" for
+#' each object.}
+#'   \item{alpha}{N x 1 Vector of estimated alphas.}
+#'   \item{beta}{N x K Matrix of estimated betas.}
+#'   \item{r2}{N x 1 Vector of R-square values.}
+#'   \item{resid.variance}{N x 1 Vector of residual variances.}
+#'   \item{call}{function call.}
+#' @author Eric Zivot and Yi-An Chen.
+#' @references 1. Efron, Hastie, Johnstone and Tibshirani (2002) "Least Angle
+#' Regression" (with discussion) Annals of Statistics; see also
+#' http://www-stat.stanford.edu/~hastie/Papers/LARS/LeastAngle_2002.pdf.  2.
+#' Hastie, Tibshirani and Friedman (2008) Elements of Statistical Learning 2nd
+#' edition, Springer, NY.
+#' @examples
+#'  \dontrun{
+#' # load data from the database
+#' data(managers.df)
+#' ret.assets = managers.df[,(1:6)]
+#' factors    = managers.df[,(7:9)]
+#' # fit the factor model with OLS
+#' fit <- fitTimeseriesFactorModel(ret.assets,factors,fit.method="OLS",
+#'                                  variable.selection="all subsets")
+#' # summary of HAM1 
+#' summary(fit$asset.fit$HAM1)
+#' # plot actual vs. fitted over time for HAM1
+#' # use chart.TimeSeries() function from PerformanceAnalytics package
+#' dataToPlot = cbind(fitted(fit$asset.fit$HAM1), na.omit(managers.df$HAM1))
+#' colnames(dataToPlot) = c("Fitted","Actual")
+#' chart.TimeSeries(dataToPlot, main="FM fit for HAM1",
+#'                  colorset=c("black","blue"), legend.loc="bottomleft")
+#'  }
+fitTimeseriesFactorModel <-
+function(assets.names, factors.names, data=data, num.factor.subset = 1, 
+          fit.method=c("OLS","DLS","Robust"),
+         variable.selection="none",
+          decay.factor = 0.95,nvmax=8,force.in=NULL,
+          subsets.method = c("exhaustive", "backward", "forward", "seqrep"),
+          lars.criteria = c("Cp","cv")) {
+  
+  require(PerformanceAnalytics)
+  require(leaps)
+  require(lars)
+  require(robust)
+  require(MASS)
+  this.call <- match.call()
+  
+  # convert data into xts and hereafter compute in xts
+  data.xts <- checkData(data) 
+  reg.xts <- merge(data.xts[,assets.names],data.xts[,factors.names])
+  
+  # initialize list object to hold regression objects
+reg.list = list()
+
+
+# initialize matrices and vectors to hold estimated betas,
+# residual variances, and R-square values from
+# fitted factor models
+
+Alphas = ResidVars = R2values = rep(0, length(assets.names))
+names(Alphas) = names(ResidVars) = names(R2values) = assets.names
+Betas = matrix(0, length(assets.names), length(factors.names))
+colnames(Betas) = factors.names
+rownames(Betas) = assets.names
+
+
+if (variable.selection == "none") {
+  if (fit.method == "OLS") {
+          for (i in assets.names) {
+        reg.df = na.omit(reg.xts[, c(i, factors.names)])    
+        fm.formula = as.formula(paste(i,"~", ".", sep=" "))
+        fm.fit = lm(fm.formula, data=reg.df)
+        fm.summary = summary(fm.fit)
+        reg.list[[i]] = fm.fit
+        Alphas[i] = coef(fm.fit)[1]
+        Betas.names = names(coef(fm.fit)[-1])
+        Betas[i,Betas.names] = coef(fm.fit)[-1]
+        ResidVars[i] = fm.summary$sigma^2
+        R2values[i] =  fm.summary$r.squared
+      }
+  } else if (fit.method == "DLS") {
+    for (i in assets.names) {
+      reg.df = na.omit(reg.xts[, c(i, factors.names)])
+      t.length <- nrow(reg.df)
+      w <- rep(decay.factor^(t.length-1),t.length)
+      for (k in 2:t.length) {
+        w[k] = w[k-1]/decay.factor 
+      }   
+      # sum weigth to unitary  
+      w <- w/sum(w) 
+      fm.formula = as.formula(paste(i,"~", ".", sep=""))                              
+      fm.fit = lm(fm.formula, data=reg.df,weight=w)
+      fm.summary = summary(fm.fit)
+      reg.list[[i]] = fm.fit
+      Alphas[i] = coef(fm.fit)[1]
+      Betas.names = names(coef(fm.fit)[-1])
+      Betas[i,Betas.names] = coef(fm.fit)[-1]
+      ResidVars[i] = fm.summary$sigma^2
+      R2values[i] =  fm.summary$r.squared
+    } 
+  } else if (fit.method=="Robust") {
+    for (i in assets.names) {
+      reg.df = na.omit(reg.xts[, c(i, factors.names)])
+      fm.formula = as.formula(paste(i,"~", ".", sep=" "))
+      fm.fit = lmRob(fm.formula, data=reg.df)
+      fm.summary = summary(fm.fit)
+      reg.list[[i]] = fm.fit
+      Alphas[i] = coef(fm.fit)[1]
+      Betas[i, ] = coef(fm.fit)[-1]
+      ResidVars[i] = fm.summary$sigma^2
+      R2values[i] =  fm.summary$r.squared
+    }
+    
+  }  else {
+    stop("invalid method")
+  }
+  
+  
+} else if (variable.selection == "all subsets") {
+# estimate multiple factor model using loop b/c of unequal histories for the hedge funds
+
+
+
+if (fit.method == "OLS") {
+
+if (num.factor.subset == length(force.in)) {
+  for (i in assets.names) {
+ reg.df = na.omit(reg.xts[, c(i, force.in)])
+ fm.formula = as.formula(paste(i,"~", ".", sep=" "))
+ fm.fit = lm(fm.formula, data=reg.df)
+ fm.summary = summary(fm.fit)
+ reg.list[[i]] = fm.fit
+ Alphas[i] = coef(fm.fit)[1]
+ Betas.names = names(coef(fm.fit)[-1])
+ Betas[i,Betas.names] = coef(fm.fit)[-1]
+ ResidVars[i] = fm.summary$sigma^2
+ R2values[i] =  fm.summary$r.squared
+  }
+}  else if (num.factor.subset > length(force.in)) {
+    
+for (i in assets.names) {
+ reg.df = na.omit(reg.xts[, c(i, factors.names)])
+ fm.formula = as.formula(paste(i,"~", ".", sep=" "))
+ fm.subsets <- regsubsets(fm.formula,data=reg.df,nvmax=nvmax,force.in=force.in,
+                          method=subsets.method)
+ sum.sub <- summary(fm.subsets)
+ reg.df <- na.omit(reg.xts[,c(i,names(which(sum.sub$which[as.character(num.factor.subset),-1]==TRUE))  )])
+ fm.fit = lm(fm.formula, data=reg.df)
+ fm.summary = summary(fm.fit)
+ reg.list[[i]] = fm.fit
+ Alphas[i] = coef(fm.fit)[1]
+ Betas.names = names(coef(fm.fit)[-1])
+ Betas[i,Betas.names] = coef(fm.fit)[-1]
+ ResidVars[i] = fm.summary$sigma^2
+ R2values[i] =  fm.summary$r.squared
+  }
+} else {
+  stop("ERROR! number of force.in should less or equal to num.factor.subset")
+}
+  
+
+
+
+} else if (fit.method == "DLS"){
+  
+
+  if (num.factor.subset == length(force.in)) {  
+  # define weight matrix 
+for (i in assets.names) {
+  reg.df = na.omit(reg.xts[, c(i, force.in)])
+ t.length <- nrow(reg.df)
+ w <- rep(decay.factor^(t.length-1),t.length)
+   for (k in 2:t.length) {
+    w[k] = w[k-1]/decay.factor 
+  }   
+# sum weigth to unitary  
+ w <- w/sum(w) 
+ fm.formula = as.formula(paste(i,"~", ".", sep=""))                              
+ fm.fit = lm(fm.formula, data=reg.df,weight=w)
+ fm.summary = summary(fm.fit)
+ reg.list[[i]] = fm.fit
+ Alphas[i] = coef(fm.fit)[1]
+ Betas.names = names(coef(fm.fit)[-1])
+ Betas[i,Betas.names] = coef(fm.fit)[-1]
+ ResidVars[i] = fm.summary$sigma^2
+ R2values[i] =  fm.summary$r.squared
+ } 
+} else if  (num.factor.subset > length(force.in)) {
+  for (i in assets.names) {
+  reg.df = na.omit(reg.xts[, c(i, factors.names)])
+  t.length <- nrow(reg.df)
+  w <- rep(decay.factor^(t.length-1),t.length)
+  for (k in 2:t.length) {
+  w[k] = w[k-1]/decay.factor 
+  }   
+  w <- w/sum(w) 
+ fm.formula = as.formula(paste(i,"~", ".", sep=""))                              
+ fm.subsets <- regsubsets(fm.formula,data=reg.df,nvmax=nvmax,force.in=force.in,
+                          method=subsets.method,weights=w) # w is called from global envio
+ sum.sub <- summary(fm.subsets)
+ reg.df <- na.omit(reg.xts[,c(i,names(which(sum.sub$which[as.character(num.factor.subset),-1]==TRUE))  )])
+ fm.fit = lm(fm.formula, data=reg.df,weight=w)
+ fm.summary = summary(fm.fit)
+ reg.list[[i]] = fm.fit
+ Alphas[i] = coef(fm.fit)[1]
+ Betas.names = names(coef(fm.fit)[-1])
+ Betas[i,Betas.names] = coef(fm.fit)[-1]
+ ResidVars[i] = fm.summary$sigma^2
+ R2values[i] =  fm.summary$r.squared
+ }
+} else {
+  stop("ERROR! number of force.in should less or equal to num.factor.subset")
+}
+
+
+} else if (fit.method=="Robust") {
+  for (i in assets.names) {
+ reg.df = na.omit(reg.xts[, c(i, factors.names)])
+ fm.formula = as.formula(paste(i,"~", ".", sep=" "))
+ fm.fit = lmRob(fm.formula, data=reg.df)
+ fm.summary = summary(fm.fit)
+ reg.list[[i]] = fm.fit
+ Alphas[i] = coef(fm.fit)[1]
+ Betas[i, ] = coef(fm.fit)[-1]
+ ResidVars[i] = fm.summary$sigma^2
+ R2values[i] =  fm.summary$r.squared
+ }
+
+}  else {
+  stop("invalid method")
+}
+
+
+} else if (variable.selection == "stepwise") {
+
+  
+  if (fit.method == "OLS") {
+# loop over all assets and estimate time series regression
+for (i in assets.names) {
+ reg.df = na.omit(reg.xts[, c(i, factors.names)])
+ fm.formula = as.formula(paste(i,"~", ".", sep=" "))
+ fm.fit = step(lm(fm.formula, data=reg.df),trace=0)
+ fm.summary = summary(fm.fit)
+ reg.list[[i]] = fm.fit
+ Alphas[i] = coef(fm.fit)[1]
+ Betas.names = names(coef(fm.fit)[-1])
+ Betas[i,Betas.names] = coef(fm.fit)[-1]
+ ResidVars[i] = fm.summary$sigma^2
+ R2values[i] =  fm.summary$r.squared
+  }
+
+
+}  else if (fit.method == "DLS"){
+  # define weight matrix 
+for (i in assets.names) {
+  reg.df = na.omit(reg.xts[, c(i, factors.names)])
+  t.length <- nrow(reg.df)
+  w <- rep(decay.factor^(t.length-1),t.length)
+  for (k in 2:t.length) {
+    w[k] = w[k-1]/decay.factor 
+  }   
+# sum weigth to unitary  
+ w <- w/sum(w) 
+ fm.formula = as.formula(paste(i,"~", ".", sep=""))                              
+ fm.fit = step(lm(fm.formula, data=reg.df,weight=w),trace=0)
+ fm.summary = summary(fm.fit)
+ reg.list[[i]] = fm.fit
+ Alphas[i] = coef(fm.fit)[1]
+ Betas.names = names(coef(fm.fit)[-1])
+ Betas[i,Betas.names] = coef(fm.fit)[-1]
+ ResidVars[i] = fm.summary$sigma^2
+ R2values[i] =  fm.summary$r.squared
+ } 
+
+} else if (fit.method=="Robust") {  
+  for (i in assets.names) {
+ assign("reg.df" , na.omit(reg.xts[, c(i, factors.names)]),envir = .GlobalEnv )
+ fm.formula = as.formula(paste(i,"~", ".", sep=" "))
+ lmRob.obj <- lmRob(fm.formula, data=reg.df)
+ fm.fit = step.lmRob(lmRob.obj,trace=FALSE)
+ fm.summary = summary(fm.fit)
+ reg.list[[i]] = fm.fit
+ Alphas[i] = coef(fm.fit)[1]
+ Betas.names = names(coef(fm.fit)[-1])
+ Betas[i,Betas.names] = coef(fm.fit)[-1]
+ ResidVars[i] = fm.summary$sigma^2
+ R2values[i] =  fm.summary$r.squared
+  }
+
+}
+  
+} else if (variable.selection == "lar" | variable.selection == "lasso") {
+  # use min Cp as criteria to choose predictors
+  
+  for (i in assets.names) {
+ reg.df = na.omit(reg.xts[, c(i, factors.names)])
+ reg.df = as.matrix(reg.df)
+ lars.fit = lars(reg.df[,factors.names],reg.df[,i],type=variable.selection,trace=FALSE)
+ sum.lars <- summary(lars.fit)
+ if (lars.criteria == "Cp") {
+ s<- which.min(sum.lars$Cp)
+ } else {
+ lars.cv <- cv.lars(reg.df[,factors.names],reg.df[,i],trace=FALSE,
+                    type=variable.selection,mode="step",plot.it=FALSE)
+ s<- which.min(lars.cv$cv)
+   }
+ coef.lars <- predict(lars.fit,s=s,type="coef",mode="step")
+ reg.list[[i]] = lars.fit
+ fitted <- predict(lars.fit,reg.df[,factors.names],s=s,type="fit",mode="step")
+ Alphas[i] = (fitted$fit - reg.df[,factors.names]%*%coef.lars$coefficients)[1]
+ Betas.names = names(coef.lars$coefficients)
+ Betas[i,Betas.names] = coef.lars$coefficients
+ ResidVars[i] = sum.lars$Rss[s]/(nrow(reg.df)-s)
+ R2values[i] =  lars.fit$R2[s]
+  } 
+ 
+  }  else  {
+  stop("wrong method")
+}
+  
+
+  
+  
+  
+  # return results
+# add option to return list
+ans = list (asset.fit = reg.list,
+            alpha = Alphas,
+            beta  = Betas,
+            r2    = R2values,
+            resid.variance = ResidVars,
+            call      = this.call   )
+class(ans) = "TimeSeriesFactorModel"
+return(ans)
+}
+

Modified: pkg/FactorAnalytics/man/fitStatisticalFactorModel.Rd
===================================================================
--- pkg/FactorAnalytics/man/fitStatisticalFactorModel.Rd	2013-06-24 14:02:34 UTC (rev 2414)
+++ pkg/FactorAnalytics/man/fitStatisticalFactorModel.Rd	2013-06-24 16:57:24 UTC (rev 2415)
@@ -1,80 +1,97 @@
-\name{fitStatisticalFactorModel}
-\alias{fitStatisticalFactorModel}
-\title{Fit statistical factor model using principle components}
-\usage{
-  fitStatisticalFactorModel(x, k = 1, refine = TRUE,
-    check = FALSE, max.k = NULL, sig = 0.05, na.rm = FALSE)
-}
-\arguments{
-  \item{x}{T x N assets returns data which is saved as
-  data.frame class.}
-
-  \item{k}{numbers of factors if it is scalar or method of
-  choosing optimal number of factors. "bn" represents Bai
-  and Ng (2002) method and "ck" represents Connor and
-  korajczyk (1993) method. Default is k = 1.}
-
-  \item{refine}{\code{TRUE} By default, the APCA fit will
-  use the Connor-Korajczyk refinement.}
-
-  \item{check}{check if some variables has identical
-  values. Default is FALSE.}
-
-  \item{max.k}{scalar, select the number that maximum
-  number of factors to be considered.}
-
-  \item{sig}{significant level when ck method uses.}
-
-  \item{na.rm}{if allow missing values. Default is FALSE.}
-}
-\value{
-  :
-}
-\description{
-  Fit statistical factor model using principle components.
-  This function is mainly adapted from S+FinMetric function
-  mfactor.
-}
-\examples{
-# load data for fitStatisticalFactorModel.r
-# data from finmetric berndt.dat and folio.dat
-
-data(stat.fm.data)
-##
-# sfm.dat is for pca
-# sfm.apca.dat is for apca
-class(sfm.dat)
-class(sfm.apca.dat)
-
-# pca
-args(fitStatisticalFactorModel)
-sfm.pca.fit <- fitStatisticalFactorModel(sfm.dat,k=2)
-class(sfm.pca.fit)
-names(sfm.pca.fit)
-sfm.pca.fit$factors
-sfm.pca.fit$loadings
-sfm.pca.fit$r2
-sfm.pca.fit$residuals
-sfm.pca.fit$residVars.vec
-sfm.pca.fit$mimic
-# apca
-sfm.apca.fit <- fitStatisticalFactorModel(sfm.apca.dat,k=1)
-names(sfm.apca.fit)
-sfm.apca.res <- sfm.apca.fit$residuals
-sfm.apca.mimic <- sfm.apca.fit$mimic
-# apca with bai and Ng method
-sfm.apca.fit.bn <- fitStatisticalFactorModel(sfm.apca.dat,k="bn")
-class(sfm.apca.fit.bn)
-names(sfm.apca.fit.bn)
-sfm.apca.fit.bn$mimic
-
-# apca with ck method
-sfm.apca.fit.ck <- fitStatisticalFactorModel(sfm.apca.dat,k="ck")
-class(sfm.apca.fit.ck)
-names(sfm.apca.fit.ck)
-sfm.apca.fit.ck$mimic
-}
-\author{
-  Eric Zivot and Yi-An Chen
-}
-
+\name{fitStatisticalFactorModel}
+\alias{fitStatisticalFactorModel}
+\title{Fit statistical factor model using principle components}
+\usage{
+  fitStatisticalFactorModel(data, k = 1, refine = TRUE,
+    check = FALSE, max.k = NULL, sig = 0.05, na.rm = FALSE,
+    ckeckData.method = "xts")
+}
+\arguments{
+  \item{data}{a vector, matrix, data.frame, xts, timeSeries
+  or zoo object with asset returns and factors retunrs
+  rownames}
+
+  \item{k}{numbers of factors if it is scalar or method of
+  choosing optimal number of factors. "bn" represents Bai
+  and Ng (2002) method and "ck" represents Connor and
+  korajczyk (1993) method. Default is k = 1.}
+
+  \item{refine}{\code{TRUE} By default, the APCA fit will
+  use the Connor-Korajczyk refinement.}
+
+  \item{check}{check if some variables has identical
+  values. Default is FALSE.}
+
+  \item{max.k}{scalar, select the number that maximum
+  number of factors to be considered.}
+
+  \item{sig}{significant level when ck method uses.}
+
+  \item{na.rm}{if allow missing values. Default is FALSE.}
+}
+\value{
+  \item{factors}{T x K the estimated factors.}
+  \item{loadings}{K x N the asset specific factor loadings
+  beta_i. estimated from regress the asset returns on
+  factors.} \item{alpha}{1 x N the estimated intercepts
+  alpha_i} \item{ret.cov}{N x N asset returns sample
+  variance covariance matrix.} \item{r2}{regression r
+  square value from regress the asset returns on factors.}
+  \item{k}{the number of the facotrs.}
+  \item{eigen}{eigenvalues from the sample covariance
+  matrix.} \item{residuals}{T x N matrix of residuals from
+  regression.} \item{asset.ret}{asset returns}
+  \item{asset.fit}{List of regression lm class of
+  individual returns on factors.}
+  \item{resid.variance}{vector of residual variances.}
+  \item{mimic}{N x K matrix of factor mimicking portfolio
+  returns.}
+}
+\description{
+  Fit statistical factor model using principle components.
+  This function is mainly adapted from S+FinMetric function
+  mfactor.
+}
+\examples{
+# load data for fitStatisticalFactorModel.r
+# data from finmetric berndt.dat and folio.dat
+
+data(stat.fm.data)
+##
+# sfm.dat is for pca
+# sfm.apca.dat is for apca
+class(sfm.dat)
+class(sfm.apca.dat)
+
+# pca
+args(fitStatisticalFactorModel)
+sfm.pca.fit <- fitStatisticalFactorModel(sfm.dat,k=2)
+class(sfm.pca.fit)
+names(sfm.pca.fit)
+sfm.pca.fit$factors
+sfm.pca.fit$loadings
+sfm.pca.fit$r2
+sfm.pca.fit$residuals
+sfm.pca.fit$resid.variance
+sfm.pca.fit$mimic
+# apca
+sfm.apca.fit <- fitStatisticalFactorModel(sfm.apca.dat,k=1)
+names(sfm.apca.fit)
+sfm.apca.res <- sfm.apca.fit$residuals
+sfm.apca.mimic <- sfm.apca.fit$mimic
+# apca with bai and Ng method
+sfm.apca.fit.bn <- fitStatisticalFactorModel(sfm.apca.dat,k="bn")
+class(sfm.apca.fit.bn)
+names(sfm.apca.fit.bn)
+sfm.apca.fit.bn$mimic
+
+# apca with ck method
+sfm.apca.fit.ck <- fitStatisticalFactorModel(sfm.apca.dat,k="ck")
+class(sfm.apca.fit.ck)
+names(sfm.apca.fit.ck)
+sfm.apca.fit.ck$mimic
+}
+\author{
+  Eric Zivot and Yi-An Chen
+}
+

Added: pkg/FactorAnalytics/man/fitTimeseriesFactorModel.Rd
===================================================================
--- pkg/FactorAnalytics/man/fitTimeseriesFactorModel.Rd	                        (rev 0)
+++ pkg/FactorAnalytics/man/fitTimeseriesFactorModel.Rd	2013-06-24 16:57:24 UTC (rev 2415)
@@ -0,0 +1,112 @@
+\name{fitTimeseriesFactorModel}
+\alias{fitTimeseriesFactorModel}
+\title{Fit time series factor model by time series regression techniques.}
+\usage{
+  fitTimeseriesFactorModel(assets.names, factors.names,
+    data = data, num.factor.subset = 1,
+    fit.method = c("OLS", "DLS", "Robust"),
+    variable.selection = "none", decay.factor = 0.95,
+    nvmax = 8, force.in = NULL,
+    subsets.method = c("exhaustive", "backward", "forward", "seqrep"),
+    lars.criteria = c("Cp", "cv"))
+}
+\arguments{
+  \item{assets.names}{names of assets returns.}
+
+  \item{factors.names}{names of factors returns.}
+
+  \item{num.factor.subset}{scalar. Number of factors
+  selected by all subsets.}
+
+  \item{data}{a vector, matrix, data.frame, xts, timeSeries
+  or zoo object with asset returns and factors retunrs
+  rownames}
+
+  \item{fit.method}{"OLS" is ordinary least squares method,
+  "DLS" is discounted least squares method. Discounted
+  least squares (DLS) estimation is weighted least squares
+  estimation with exponentially declining weights that sum
+  to unity. "Robust"}
+
+  \item{variable.selection}{"none" will not activate
+  variables sellection. Default is "none". "stepwise" is
+  traditional forward/backward #' stepwise OLS regression,
+  starting from the initial set of factors, that adds
+  factors only if the regression fit as measured by the
+  Bayesian Information Criteria (BIC) or Akaike Information
+  Criteria (AIC) can be done using the R function step()
+  from the stats package. If "Robust" is chosen, the
+  function step.lmRob in Robust package will be used. "all
+  subsets" is Traditional all subsets regression can be
[TRUNCATED]

To get the complete diff run:
    svnlook diff /svnroot/returnanalytics -r 2415


More information about the Returnanalytics-commits mailing list