[Returnanalytics-commits] r2370 - pkg/FactorAnalytics/R

noreply at r-forge.r-project.org noreply at r-forge.r-project.org
Wed Jun 19 02:45:16 CEST 2013


Author: chenyian
Date: 2013-06-19 02:45:16 +0200 (Wed, 19 Jun 2013)
New Revision: 2370

Modified:
   pkg/FactorAnalytics/R/fitStatisticalFactorModel.R
Log:
change description: @returnitem to \item

Modified: pkg/FactorAnalytics/R/fitStatisticalFactorModel.R
===================================================================
--- pkg/FactorAnalytics/R/fitStatisticalFactorModel.R	2013-06-19 00:28:54 UTC (rev 2369)
+++ pkg/FactorAnalytics/R/fitStatisticalFactorModel.R	2013-06-19 00:45:16 UTC (rev 2370)
@@ -1,367 +1,365 @@
-#' Fit statistical factor model using principle components
-#' 
-#' Fit statistical factor model using principle components. This function is
-#' mainly adapted from S+FinMetric function mfactor.
-#' 
-#' 
-#' @param x T x N assets returns data which is saved as data.frame class.
-#' @param k numbers of factors if it is scalar or method of choosing optimal
-#' number of factors. "bn" represents Bai and Ng (2002) method and "ck"
-#' represents Connor and korajczyk (1993) method. Default is k = 1.
-#' @param refine \code{TRUE} By default, the APCA fit will use the
-#' Connor-Korajczyk refinement.
-#' @param check check if some variables has identical values. Default is FALSE.
-#' @param max.k scalar, select the number that maximum number of factors to be
-#' considered.
-#' @param sig significant level when ck method uses.
-#' @param na.rm if allow missing values. Default is FALSE.
-#' @return
-#' 
-#' :
-#' @returnItem factors T x K the estimated factors.
-#' @returnItem loadings K x N the asset specific factor loadings beta_i
-#' estimated from regress the asset returns on factors.
-#' @returnItem alpha 1 x N the estimated intercepts alpha_i
-#' @returnItem Omega N x N asset returns sample variance covariance matrix.
-#' @returnItem r2 regression r square value from regress the asset returns on
-#' factors.
-#' @returnItem k the number of the facotrs.
-#' @returnItem eigen eigenvalues from the sample covariance matrix.
-#' @returnItem residuals T x N matrix of residuals from regression.
-#' @returnItem asset.ret asset returns
-#' @returnItem asset.fit List of regression lm class of individual returns on
-#' factors.
-#' @returnItem residVars.vec vector of residual variances
-#' @returnItem mimic N x K matrix of factor mimicking portfolio returns.
-#' @author Eric Zivot and Yi-An Chen
-#' @examples
-#' 
-#' # load data for fitStatisticalFactorModel.r
-#' # data from finmetric berndt.dat and folio.dat
-#' 
-#' data(stat.fm.data)
-#' ##
-#' # sfm.dat is for pca
-#' # sfm.apca.dat is for apca
-#' class(sfm.dat)
-#' class(sfm.apca.dat)
-#' 
-#' # pca
-#' args(fitStatisticalFactorModel)
-#' sfm.pca.fit <- fitStatisticalFactorModel(sfm.dat,k=2)
-#' class(sfm.pca.fit)
-#' names(sfm.pca.fit)
-#' sfm.pca.fit$factors
-#' sfm.pca.fit$loadings
-#' sfm.pca.fit$r2
-#' sfm.pca.fit$residuals
-#' sfm.pca.fit$residVars.vec
-#' sfm.pca.fit$mimic
-#' # apca
-#' sfm.apca.fit <- fitStatisticalFactorModel(sfm.apca.dat,k=1)
-#' names(sfm.apca.fit)
-#' sfm.apca.res <- sfm.apca.fit$residuals
-#' sfm.apca.mimic <- sfm.apca.fit$mimic
-#' # apca with bai and Ng method
-#' sfm.apca.fit.bn <- fitStatisticalFactorModel(sfm.apca.dat,k="bn")
-#' class(sfm.apca.fit.bn)
-#' names(sfm.apca.fit.bn)
-#' sfm.apca.fit.bn$mimic
-#' 
-#' # apca with ck method
-#' sfm.apca.fit.ck <- fitStatisticalFactorModel(sfm.apca.dat,k="ck")
-#' class(sfm.apca.fit.ck)
-#' names(sfm.apca.fit.ck)
-#' sfm.apca.fit.ck$mimic
-#' 
-fitStatisticalFactorModel <-
-function(x, k = 1, refine = TRUE, check = FALSE, max.k = NULL, sig = 0.05, na.rm = FALSE){
-	
-## Inputs:  
-## 
-## x      :  T x N assets returns data which is saved as data.frame class. 
-## k      :  numbers of factors if it is scalar or method of choosing optimal number of factors.
-##           "bn" represents Bai and Ng (2002) method and "ck" represents  Connor and korajczyk
-##          (1993) method. Default is k = 1.
-## refine :      : TRUE By default, the APCA fit will use the Connor-Korajczyk refinement. 
-## check         : check if some variables has identical values. Default is FALSE.
-## max.k         : scalar, select the number that maximum number of factors to be considered.
-## sig           : significant level than ck method uses.
-## na.rm         : if allow missing values. Default is FALSE.
-## Outputs:
-## 
-## factors       : T x K the estimated factors.
-## loadings      : K x N   the asset specific factor loadings beta_i estimated from regress the asset
-##                 returns on factors.
-## alpha         : 1 x N  the estimated intercepts alpha_i
-## Omega         : N x N asset returns sample variance covariance matrix.
-## r2            : regression r square value from regress the asset returns on factors.
-## k             : the number of the facotrs.
-## eigen         : eigenvalues from the sample covariance matrix.
-## residuals     : T x N matrix of residuals from regression.
-# residVars.vec  : vector of residual variances    
-# mimic          : N x K matrix of factor mimicking portfolio returns.   
- 
-# load package
-require(MASS)  
-  
-  
- # function of test
- mfactor.test <- function(x, method = "bn", refine = TRUE, check = FALSE, max.k = NULL, sig = 0.05){
-  
-    if(is.null(max.k)) {
-		max.k <- min(10, nrow(x) - 1)
-	} 	else if (max.k >= nrow(x)) {
-		stop("max.k must be less than the number of observations.")
-	}
-	if(check) {
-		if(mfactor.check(x)) {
-			warning("Some variables have identical observations.")
-			return(list(factors = NA, loadings = NA, k = NA))
-		}
-	}
-	method <- casefold(method)
-	if(method == "bn") {
-		ans <- mfactor.bn(x, max.k, refine = refine)
-	}
-	else if(method == "ck") {
-		ans <- mfactor.ck(x, max.k, refine = refine, sig = sig)
-	}
-	else {
-		stop("Invalid choice for optional argument method.")
-	}
- return(ans)
-    
-}
-
- 
-# function of ck
-mfactor.ck <- function(x, max.k, sig = 0.05, refine = TRUE) {
-  
-  n <- ncol(x)
-  m <- nrow(x)
-	idx <- 2 * (1:(m/2))
-	#
-	f <- mfactor.apca(x, k = 1, refine = refine, check = FALSE)
-	f1 <- cbind(1, f$factors)
-	B <- backsolve(chol(crossprod(f1)), diag(2))
-	eps <- x - f1 %*% crossprod(t(B)) %*% crossprod(f1, x)
-	s <- eps^2/(1 - 2/m - 1/n)
-	#	
-	for(i in 2:max.k) {
-		f.old <- f
-		s.old <- s
-		f <- mfactor.apca(x, k = i, refine = refine, check = FALSE)
-		f1 <- cbind(1, f$factors)
-		B <- backsolve(chol(crossprod(f1)), diag(i + 1))
-		eps <- x - f1 %*% crossprod(t(B)) %*% crossprod(f1, x)
-		s <- eps^2/(1 - (i + 1)/m - i/n)
-		delta <- rowMeans(s.old[idx - 1,  , drop = FALSE]) - rowMeans(
-			s[idx,  , drop = FALSE])
-		if(t.test(delta, alternative = "greater")$p.value > sig) {
-			return(f.old)
-		}
-	}
-	return(f)
-}
-
-# funciton of check  
-  mfactor.check <- function(x) {
-  temp <- apply(x, 2, range)
-  if(any(abs(temp[2,  ] - temp[1,  ]) < .Machine$single.eps)) {
-		TRUE
-	}
-	else {
-		FALSE
-	}
-}
-
-  # function of bn
-  mfactor.bn <- function(x, max.k, refine = TRUE) {
-  
-  # Parameters:
-	#         x : T x N return matrix
-	#     max.k : maxinum number of factors to be considered
-		# Returns:
-	#      k : the optimum number of factors
-	n <- ncol(x)
-	m <- nrow(x)
-	s <- vector("list", max.k) 
-	for(i in 1:max.k) {
-		f <- cbind(1, mfactor.apca(x, k = i, refine = refine, check = 
-			FALSE)$factors)
-		B <- backsolve(chol(crossprod(f)), diag(i + 1))
-		eps <- x - f %*% crossprod(t(B)) %*% crossprod(f, x)
-		sigma <- colSums(eps^2)/(m - i - 1)
-		s[[i]] <- mean(sigma)
-	}
-	s <- unlist(s)
-	idx <- 1:max.k
-	Cp1 <- s[idx] + (idx * s[max.k] * (n + m))/(n * m) * log((n * m)/
-		(n + m))
-	Cp2 <- s[idx] + (idx * s[max.k] * (n + m))/(n * m) * log(min(n, m))
-	if(order(Cp1)[1] != order(Cp2)[1]) {
-		warning("Cp1 and Cp2 did not yield same result. The smaller one is used."	)
-	}
-	k <- min(order(Cp1)[1], order(Cp2)[1])
-	f <- mfactor.apca(x, k = k, refine = refine, check = FALSE)
- return(f)  
- }
-
-  
-  # function of pca
-  mfactor.pca <- function(x, k, check = FALSE, Omega = NULL) {
-  
-  if(check) {
-		if(mfactor.check(x)) {
-			warning("Some variables have identical observations.")
-			return(list(factors = NA, loadings = NA, k = NA))
-		}
-	}
-	n <- ncol(x)
-	m <- nrow(x)
-	if(is.null(dimnames(x))) {
-		dimnames(x) <- list(1:m, paste("V", 1:n, sep = "."))
-	}
-	x.names <- dimnames(x)[[2]]
-	xc <- t(t(x) - colMeans(x))
-	if(is.null(Omega)) {
-		Omega <- crossprod(xc)/m
-	}
-	eigen.tmp <- eigen(Omega, symm = TRUE)
-  # compute loadings beta
-	B <- t(eigen.tmp$vectors[, 1:k, drop = FALSE])
-  # compute estimated factors
-	f <- x %*% eigen.tmp$vectors[, 1:k, drop = FALSE]
-	tmp <- x - f %*% B
-	alpha <- colMeans(tmp)
-  # compute residuals
-	tmp <- t(t(tmp) - alpha)
-	r2 <- (1 - colSums(tmp^2)/colSums(xc^2))
-	Omega <- t(B) %*% var(f) %*% B
-	diag(Omega) <- diag(Omega) + colSums(tmp^2)/(m - k - 1)
-	dimnames(B) <- list(paste("F", 1:k, sep = "."), x.names)
-	dimnames(f) <- list(dimnames(x)[[1]], paste("F", 1:k, sep = "."))
-	dimnames(Omega) <- list(x.names, x.names)
-	names(alpha) <- x.names
-  # create lm list for plot
-  reg.list = list()
-  for (i in x.names) {
-    reg.df = as.data.frame(cbind(x[,i],f))
-    colnames(reg.df)[1] <- i
-    fm.formula = as.formula(paste(i,"~", ".", sep=" "))
-    fm.fit = lm(fm.formula, data=reg.df)
-    reg.list[[i]] = fm.fit
-    }
-	ans <-  list(factors = f, loadings = B, k = k, alpha = alpha, Omega = Omega,
-	            	r2 = r2, eigen = eigen.tmp$values, residuals=tmp, asset.ret = x,
-               asset.fit=reg.list)
- 
-  return(ans)
- 
-}
-
-  # funciont of apca
-  mfactor.apca <- function(x, k, refine = TRUE, check = FALSE, Omega = NULL) {
-  
-  if(check) {
-		if(mfactor.check(x)) {
-			warning("Some variables have identical observations.")
-			return(list(factors = NA, loadings = NA, k = NA))
-		}
-	}
-	n <- ncol(x)
-	m <- nrow(x)
-	if(is.null(dimnames(x))) {
-		dimnames(x) <- list(1:m, paste("V", 1:n, sep = "."))
-	}
-	x.names <- dimnames(x)[[2]]
-	xc <- t(t(x) - colMeans(x))
-	if(is.null(Omega)) {
-		Omega <- crossprod(t(xc))/n
-	}
-	eig.tmp <- eigen(Omega, symmetric = TRUE)
-	f <- eig.tmp$vectors[, 1:k, drop = FALSE]
-	f1 <- cbind(1, f)
-	B <- backsolve(chol(crossprod(f1)), diag(k + 1))
-	B <- crossprod(t(B)) %*% crossprod(f1, x)
-	sigma <- colSums((x - f1 %*% B)^2)/(m - k - 1)
-	if(refine) {
-		xs <- t(xc)/sqrt(sigma)
-		Omega <- crossprod(xs)/n
-		eig.tmp <- eigen(Omega, symm = TRUE)
-		f <- eig.tmp$vectors[, 1:k, drop = FALSE]
-		f1 <- cbind(1, f)
-		B <- backsolve(chol(crossprod(f1)), diag(k + 1))
-		B <- crossprod(t(B)) %*% crossprod(f1, x)
-		sigma <- colSums((x - f1 %*% B)^2)/(m - k - 1)
-	}
-	alpha <- B[1,  ]
-	B <- B[-1,  , drop = FALSE]
-	Omega <- t(B) %*% var(f) %*% B
-	diag(Omega) <- diag(Omega) + sigma
-	dimnames(B) <- list(paste("F", 1:k, sep = "."), x.names)
-	dimnames(f) <- list(dimnames(x)[[1]], paste("F", 1:k, sep = "."))
-	names(alpha) <- x.names
-	res <- t(t(x) - alpha) - f %*% B
-	r2 <- (1 - colSums(res^2)/colSums(xc^2))
-  ans <- 	list(factors = f, loadings = B, k = k, alpha = alpha, Omega = Omega,
-		           r2 = r2, eigen = eig.tmp$values, residuals=res,asset.ret = x)
- return(ans)
-}
-
-  call <- match.call()  
-  pos <- rownames(x)
-	x <- as.matrix(x)
-	if(any(is.na(x))) {
-		if(na.rm) {
-			x <- na.omit(x)
-		}		else {
-			stop("Missing values are not allowed if na.rm=F.")
-		}
-	}
-	# use PCA if T > N
-	if(ncol(x) < nrow(x)) {
-		if(is.character(k)) {
-			stop("k must be the number of factors for PCA.")
-		}
-		if(k >= ncol(x)) {
-			stop("Number of factors must be smaller than number of variables."
-				)
-		}
-		ans <- mfactor.pca(x, k, check = check)
-	}	else if(is.character(k)) {
-		ans <- mfactor.test(x, k, refine = refine, check = 
-			check, max.k = max.k, sig = sig)
-	}	else { # use aPCA if T <= N
-		if(k >= ncol(x)) {
-			stop("Number of factors must be smaller than number of variables."
-				)
-		}
-		ans <- mfactor.apca(x, k, refine = refine, check = 
-			check)
-	}
-  
-  # mimic function
-  f <- ans$factors
-	
-	if(is.data.frame(f)) {
-		f <- as.matrix(f)
-	}
-
-	if(nrow(x) < ncol(x)) {
-		mimic <- ginv(x) %*% f
-	}	else {
-		mimic <- qr.solve(x, f)
-	}
-	
-  mimic <- t(t(mimic)/colSums(mimic))
-	dimnames(mimic)[[1]] <- dimnames(x)[[2]]
-  
-  ans$mimic <- mimic
-  ans$residVars.vec <- apply(ans$residuals,2,var)
-  ans$call <- call
-class(ans) <- "StatFactorModel"
-  return(ans)
-}
-
+#' Fit statistical factor model using principle components
+#' 
+#' Fit statistical factor model using principle components. This function is
+#' mainly adapted from S+FinMetric function mfactor.
+#' 
+#' 
+#' @param x T x N assets returns data which is saved as data.frame class.
+#' @param k numbers of factors if it is scalar or method of choosing optimal
+#' number of factors. "bn" represents Bai and Ng (2002) method and "ck"
+#' represents Connor and korajczyk (1993) method. Default is k = 1.
+#' @param refine \code{TRUE} By default, the APCA fit will use the
+#' Connor-Korajczyk refinement.
+#' @param check check if some variables has identical values. Default is FALSE.
+#' @param max.k scalar, select the number that maximum number of factors to be
+#' considered.
+#' @param sig significant level when ck method uses.
+#' @param na.rm if allow missing values. Default is FALSE.
+#' @return
+#' \item{factors}{T x K the estimated factors.}
+#' \item{loadings}{K x N the asset specific factor loadings beta_i.
+#' estimated from regress the asset returns on factors.}
+#' \item{alpha}{1 x N the estimated intercepts alpha_i}
+#' \item{ret.cov}{N x N asset returns sample variance covariance matrix.}
+#' \item{r2}{regression r square value from regress the asset returns on
+#' factors.}
+#' \item{k}{the number of the facotrs.}
+#' \item{eigen}{eigenvalues from the sample covariance matrix.}
+#' \item{residuals}{T x N matrix of residuals from regression.}
+#' \item{asset.ret}{asset returns}
+#' \item{asset.fit}{List of regression lm class of individual returns on
+#' factors.}
+#' \item{residVars.vec}{vector of residual variances.}
+#' \item{mimic}{N x K matrix of factor mimicking portfolio returns.}
+#' @author Eric Zivot and Yi-An Chen
+#' @examples
+#' 
+#' # load data for fitStatisticalFactorModel.r
+#' # data from finmetric berndt.dat and folio.dat
+#' 
+#' data(stat.fm.data)
+#' ##
+#' # sfm.dat is for pca
+#' # sfm.apca.dat is for apca
+#' class(sfm.dat)
+#' class(sfm.apca.dat)
+#' 
+#' # pca
+#' args(fitStatisticalFactorModel)
+#' sfm.pca.fit <- fitStatisticalFactorModel(sfm.dat,k=2)
+#' class(sfm.pca.fit)
+#' names(sfm.pca.fit)
+#' sfm.pca.fit$factors
+#' sfm.pca.fit$loadings
+#' sfm.pca.fit$r2
+#' sfm.pca.fit$residuals
+#' sfm.pca.fit$residVars.vec
+#' sfm.pca.fit$mimic
+#' # apca
+#' sfm.apca.fit <- fitStatisticalFactorModel(sfm.apca.dat,k=1)
+#' names(sfm.apca.fit)
+#' sfm.apca.res <- sfm.apca.fit$residuals
+#' sfm.apca.mimic <- sfm.apca.fit$mimic
+#' # apca with bai and Ng method
+#' sfm.apca.fit.bn <- fitStatisticalFactorModel(sfm.apca.dat,k="bn")
+#' class(sfm.apca.fit.bn)
+#' names(sfm.apca.fit.bn)
+#' sfm.apca.fit.bn$mimic
+#' 
+#' # apca with ck method
+#' sfm.apca.fit.ck <- fitStatisticalFactorModel(sfm.apca.dat,k="ck")
+#' class(sfm.apca.fit.ck)
+#' names(sfm.apca.fit.ck)
+#' sfm.apca.fit.ck$mimic
+#' 
+fitStatisticalFactorModel <-
+function(x, k = 1, refine = TRUE, check = FALSE, max.k = NULL, sig = 0.05, na.rm = FALSE){
+	
+## Inputs:  
+## 
+## x      :  T x N assets returns data which is saved as data.frame class. 
+## k      :  numbers of factors if it is scalar or method of choosing optimal number of factors.
+##           "bn" represents Bai and Ng (2002) method and "ck" represents  Connor and korajczyk
+##          (1993) method. Default is k = 1.
+## refine :      : TRUE By default, the APCA fit will use the Connor-Korajczyk refinement. 
+## check         : check if some variables has identical values. Default is FALSE.
+## max.k         : scalar, select the number that maximum number of factors to be considered.
+## sig           : significant level than ck method uses.
+## na.rm         : if allow missing values. Default is FALSE.
+## Outputs:
+## 
+## factors       : T x K the estimated factors.
+## loadings      : K x N   the asset specific factor loadings beta_i estimated from regress the asset
+##                 returns on factors.
+## alpha         : 1 x N  the estimated intercepts alpha_i
+## Omega         : N x N asset returns sample variance covariance matrix.
+## r2            : regression r square value from regress the asset returns on factors.
+## k             : the number of the facotrs.
+## eigen         : eigenvalues from the sample covariance matrix.
+## residuals     : T x N matrix of residuals from regression.
+# residVars.vec  : vector of residual variances    
+# mimic          : N x K matrix of factor mimicking portfolio returns.   
+ 
+# load package
+require(MASS)  
+  
+  
+ # function of test
+ mfactor.test <- function(x, method = "bn", refine = TRUE, check = FALSE, max.k = NULL, sig = 0.05){
+  
+    if(is.null(max.k)) {
+		max.k <- min(10, nrow(x) - 1)
+	} 	else if (max.k >= nrow(x)) {
+		stop("max.k must be less than the number of observations.")
+	}
+	if(check) {
+		if(mfactor.check(x)) {
+			warning("Some variables have identical observations.")
+			return(list(factors = NA, loadings = NA, k = NA))
+		}
+	}
+	method <- casefold(method)
+	if(method == "bn") {
+		ans <- mfactor.bn(x, max.k, refine = refine)
+	}
+	else if(method == "ck") {
+		ans <- mfactor.ck(x, max.k, refine = refine, sig = sig)
+	}
+	else {
+		stop("Invalid choice for optional argument method.")
+	}
+ return(ans)
+    
+}
+
+ 
+# function of ck
+mfactor.ck <- function(x, max.k, sig = 0.05, refine = TRUE) {
+  
+  n <- ncol(x)
+  m <- nrow(x)
+	idx <- 2 * (1:(m/2))
+	#
+	f <- mfactor.apca(x, k = 1, refine = refine, check = FALSE)
+	f1 <- cbind(1, f$factors)
+	B <- backsolve(chol(crossprod(f1)), diag(2))
+	eps <- x - f1 %*% crossprod(t(B)) %*% crossprod(f1, x)
+	s <- eps^2/(1 - 2/m - 1/n)
+	#	
+	for(i in 2:max.k) {
+		f.old <- f
+		s.old <- s
+		f <- mfactor.apca(x, k = i, refine = refine, check = FALSE)
+		f1 <- cbind(1, f$factors)
+		B <- backsolve(chol(crossprod(f1)), diag(i + 1))
+		eps <- x - f1 %*% crossprod(t(B)) %*% crossprod(f1, x)
+		s <- eps^2/(1 - (i + 1)/m - i/n)
+		delta <- rowMeans(s.old[idx - 1,  , drop = FALSE]) - rowMeans(
+			s[idx,  , drop = FALSE])
+		if(t.test(delta, alternative = "greater")$p.value > sig) {
+			return(f.old)
+		}
+	}
+	return(f)
+}
+
+# funciton of check  
+  mfactor.check <- function(x) {
+  temp <- apply(x, 2, range)
+  if(any(abs(temp[2,  ] - temp[1,  ]) < .Machine$single.eps)) {
+		TRUE
+	}
+	else {
+		FALSE
+	}
+}
+
+  # function of bn
+  mfactor.bn <- function(x, max.k, refine = TRUE) {
+  
+  # Parameters:
+	#         x : T x N return matrix
+	#     max.k : maxinum number of factors to be considered
+		# Returns:
+	#      k : the optimum number of factors
+	n <- ncol(x)
+	m <- nrow(x)
+	s <- vector("list", max.k) 
+	for(i in 1:max.k) {
+		f <- cbind(1, mfactor.apca(x, k = i, refine = refine, check = 
+			FALSE)$factors)
+		B <- backsolve(chol(crossprod(f)), diag(i + 1))
+		eps <- x - f %*% crossprod(t(B)) %*% crossprod(f, x)
+		sigma <- colSums(eps^2)/(m - i - 1)
+		s[[i]] <- mean(sigma)
+	}
+	s <- unlist(s)
+	idx <- 1:max.k
+	Cp1 <- s[idx] + (idx * s[max.k] * (n + m))/(n * m) * log((n * m)/
+		(n + m))
+	Cp2 <- s[idx] + (idx * s[max.k] * (n + m))/(n * m) * log(min(n, m))
+	if(order(Cp1)[1] != order(Cp2)[1]) {
+		warning("Cp1 and Cp2 did not yield same result. The smaller one is used."	)
+	}
+	k <- min(order(Cp1)[1], order(Cp2)[1])
+	f <- mfactor.apca(x, k = k, refine = refine, check = FALSE)
+ return(f)  
+ }
+
+  
+  # function of pca
+  mfactor.pca <- function(x, k, check = FALSE, Omega = NULL) {
+  
+  if(check) {
+		if(mfactor.check(x)) {
+			warning("Some variables have identical observations.")
+			return(list(factors = NA, loadings = NA, k = NA))
+		}
+	}
+	n <- ncol(x)
+	m <- nrow(x)
+	if(is.null(dimnames(x))) {
+		dimnames(x) <- list(1:m, paste("V", 1:n, sep = "."))
+	}
+	x.names <- dimnames(x)[[2]]
+	xc <- t(t(x) - colMeans(x))
+	if(is.null(Omega)) {
+		Omega <- crossprod(xc)/m
+	}
+	eigen.tmp <- eigen(Omega, symm = TRUE)
+  # compute loadings beta
+	B <- t(eigen.tmp$vectors[, 1:k, drop = FALSE])
+  # compute estimated factors
+	f <- x %*% eigen.tmp$vectors[, 1:k, drop = FALSE]
+	tmp <- x - f %*% B
+	alpha <- colMeans(tmp)
+  # compute residuals
+	tmp <- t(t(tmp) - alpha)
+	r2 <- (1 - colSums(tmp^2)/colSums(xc^2))
+	Omega <- t(B) %*% var(f) %*% B
+	diag(Omega) <- diag(Omega) + colSums(tmp^2)/(m - k - 1)
+	dimnames(B) <- list(paste("F", 1:k, sep = "."), x.names)
+	dimnames(f) <- list(dimnames(x)[[1]], paste("F", 1:k, sep = "."))
+	dimnames(Omega) <- list(x.names, x.names)
+	names(alpha) <- x.names
+  # create lm list for plot
+  reg.list = list()
+  for (i in x.names) {
+    reg.df = as.data.frame(cbind(x[,i],f))
+    colnames(reg.df)[1] <- i
+    fm.formula = as.formula(paste(i,"~", ".", sep=" "))
+    fm.fit = lm(fm.formula, data=reg.df)
+    reg.list[[i]] = fm.fit
+    }
+	ans <-  list(factors = f, loadings = B, k = k, alpha = alpha, Omega = Omega,
+	            	r2 = r2, eigen = eigen.tmp$values, residuals=tmp, asset.ret = x,
+               asset.fit=reg.list)
+ 
+  return(ans)
+ 
+}
+
+  # funciont of apca
+  mfactor.apca <- function(x, k, refine = TRUE, check = FALSE, Omega = NULL) {
+  
+  if(check) {
+		if(mfactor.check(x)) {
+			warning("Some variables have identical observations.")
+			return(list(factors = NA, loadings = NA, k = NA))
+		}
+	}
+	n <- ncol(x)
+	m <- nrow(x)
+	if(is.null(dimnames(x))) {
+		dimnames(x) <- list(1:m, paste("V", 1:n, sep = "."))
+	}
+	x.names <- dimnames(x)[[2]]
+	xc <- t(t(x) - colMeans(x))
+	if(is.null(Omega)) {
+		Omega <- crossprod(t(xc))/n
+	}
+	eig.tmp <- eigen(Omega, symmetric = TRUE)
+	f <- eig.tmp$vectors[, 1:k, drop = FALSE]
+	f1 <- cbind(1, f)
+	B <- backsolve(chol(crossprod(f1)), diag(k + 1))
+	B <- crossprod(t(B)) %*% crossprod(f1, x)
+	sigma <- colSums((x - f1 %*% B)^2)/(m - k - 1)
+	if(refine) {
+		xs <- t(xc)/sqrt(sigma)
+		Omega <- crossprod(xs)/n
+		eig.tmp <- eigen(Omega, symm = TRUE)
+		f <- eig.tmp$vectors[, 1:k, drop = FALSE]
+		f1 <- cbind(1, f)
+		B <- backsolve(chol(crossprod(f1)), diag(k + 1))
+		B <- crossprod(t(B)) %*% crossprod(f1, x)
+		sigma <- colSums((x - f1 %*% B)^2)/(m - k - 1)
+	}
+	alpha <- B[1,  ]
+	B <- B[-1,  , drop = FALSE]
+	Omega <- t(B) %*% var(f) %*% B
+	diag(Omega) <- diag(Omega) + sigma
+	dimnames(B) <- list(paste("F", 1:k, sep = "."), x.names)
+	dimnames(f) <- list(dimnames(x)[[1]], paste("F", 1:k, sep = "."))
+	names(alpha) <- x.names
+	res <- t(t(x) - alpha) - f %*% B
+	r2 <- (1 - colSums(res^2)/colSums(xc^2))
+  ans <- 	list(factors = f, loadings = B, k = k, alpha = alpha, Omega = Omega,
+		           r2 = r2, eigen = eig.tmp$values, residuals=res,asset.ret = x)
+ return(ans)
+}
+
+  call <- match.call()  
+  pos <- rownames(x)
+	x <- as.matrix(x)
+	if(any(is.na(x))) {
+		if(na.rm) {
+			x <- na.omit(x)
+		}		else {
+			stop("Missing values are not allowed if na.rm=F.")
+		}
+	}
+	# use PCA if T > N
+	if(ncol(x) < nrow(x)) {
+		if(is.character(k)) {
+			stop("k must be the number of factors for PCA.")
+		}
+		if(k >= ncol(x)) {
+			stop("Number of factors must be smaller than number of variables."
+				)
+		}
+		ans <- mfactor.pca(x, k, check = check)
+	}	else if(is.character(k)) {
+		ans <- mfactor.test(x, k, refine = refine, check = 
+			check, max.k = max.k, sig = sig)
+	}	else { # use aPCA if T <= N
+		if(k >= ncol(x)) {
+			stop("Number of factors must be smaller than number of variables."
+				)
+		}
+		ans <- mfactor.apca(x, k, refine = refine, check = 
+			check)
+	}
+  
+  # mimic function
+  f <- ans$factors
+	
+	if(is.data.frame(f)) {
+		f <- as.matrix(f)
+	}
+
+	if(nrow(x) < ncol(x)) {
+		mimic <- ginv(x) %*% f
+	}	else {
+		mimic <- qr.solve(x, f)
+	}
+	
+  mimic <- t(t(mimic)/colSums(mimic))
+	dimnames(mimic)[[1]] <- dimnames(x)[[2]]
+  
+  ans$mimic <- mimic
+  ans$residVars.vec <- apply(ans$residuals,2,var)
+  ans$call <- call
+class(ans) <- "StatFactorModel"
+  return(ans)
+}
+



More information about the Returnanalytics-commits mailing list