[Depmix-commits] r405 - in pkg/depmixS4: R man

Tue Mar 9 20:12:49 CET 2010

Author: maarten
Date: 2010-03-09 20:12:49 +0100 (Tue, 09 Mar 2010)
New Revision: 405

Modified:
   pkg/depmixS4/R/EM.R
   pkg/depmixS4/R/depmixfit.R
   pkg/depmixS4/man/depmix.fit.Rd
Log:
- added random.start option to EM
- fit method now has an em.control argument

Modified: pkg/depmixS4/R/EM.R
===================================================================

--- pkg/depmixS4/R/EM.R	2010-03-09 16:21:10 UTC (rev 404)
+++ pkg/depmixS4/R/EM.R	2010-03-09 19:12:49 UTC (rev 405)
@@ -2,7 +2,7 @@
 # Maarten Speekenbrink 23-3-2008
 # 
 
-em <- function(object,maxit=100,tol=1e-8,crit=c("relative","absolute"),verbose=FALSE,...) {
+em <- function(object,...) {
 	if(!is(object,"mix")) stop("object is not of class '(dep)mix'")
 	call <- match.call()
 	if(is(object,"depmix")) {
@@ -15,13 +15,13 @@
 }
 
 # em for lca and mixture models
-em.mix <- function(object,maxit=100,tol=1e-8,crit=c("relative","absolute"),verbose=FALSE,...) {
+em.mix <- function(object,maxit=100,tol=1e-8,crit=c("relative","absolute"),random.start=FALSE,verbose=FALSE,...) {
 	
 	if(!is(object,"mix")) stop("object is not of class 'mix'")
 	
 	crit <- match.arg(crit)
 	
-	ns <- object at nstates
+	ns <- nstates(object)
 	ntimes <- ntimes(object)
 	lt <- length(ntimes)
 	et <- cumsum(ntimes)
@@ -30,13 +30,18 @@
 	converge <- FALSE
 	j <- 0
 	
-	# compute responsibilities
-	B <- apply(object at dens,c(1,3),prod)
-	gamma <- object at init*B
-	LL <- sum(log(rowSums(gamma)))
-	# normalize
-	gamma <- gamma/rowSums(gamma)
-	
+	if(random.start) {
+		nr <- sum(ntimes(object))
+		gamma <- matrix(runif(nr*ns,min=.0001,max=.9999),nr=nr,nc=ns)
+		gamma <- gamma/rowSums(gamma)
+	} else {
+		# compute responsibilities
+		B <- apply(object at dens,c(1,3),prod)
+		gamma <- object at init*B
+		LL <- sum(log(rowSums(gamma)))
+		# normalize
+		gamma <- gamma/rowSums(gamma)
+	}
 	LL.old <- LL + 1
 	
 	while(j <= maxit & !converge) {
@@ -104,12 +109,12 @@
 }
 
 # em for hidden markov models
-em.depmix <- function(object,maxit=100,tol=1e-8,crit=c("relative","absolute"),verbose=FALSE,...) {
+em.depmix <- function(object,maxit=100,tol=1e-8,crit=c("relative","absolute"),random.start=FALSE,verbose=FALSE,...) {
 	
 	if(!is(object,"depmix")) stop("object is not of class '(dep)mix'")
 	crit <- match.arg(crit)
 	
-	ns <- object at nstates
+	ns <- nstates(object)
 	
 	ntimes <- ntimes(object)
 	lt <- length(ntimes)
@@ -128,6 +133,12 @@
 	LL <- fbo$logLike
 	LL.old <- LL + 1
 	
+	if(random.start) {
+		nr <- sum(ntimes(object))
+		fbo$gamma <- matrix(runif(nr*ns,min=.0001,max=.9999),nr=nr,nc=ns)
+		fbo$gamma <- fbo$gamma/rowSums(fbo$gamma)
+	}
+	
 	while(j <= maxit & !converge) {
 		
 		# maximization

Modified: pkg/depmixS4/R/depmixfit.R
===================================================================
--- pkg/depmixS4/R/depmixfit.R	2010-03-09 16:21:10 UTC (rev 404)
+++ pkg/depmixS4/R/depmixfit.R	2010-03-09 19:12:49 UTC (rev 405)
@@ -1,7 +1,7 @@
 
 setMethod("fit",
     signature(object="mix"),
-    function(object,fixed=NULL,equal=NULL,conrows=NULL,conrows.upper=0,conrows.lower=0,method=NULL,tol=1e-8,crit=c("relative","absolute"),verbose=TRUE,...) {
+    function(object,fixed=NULL,equal=NULL,conrows=NULL,conrows.upper=0,conrows.lower=0,method=NULL,em.control=list(tol=1e-8,crit=c("relative","absolute"),random.start=FALSE),verbose=TRUE,...) {
 	
 		fi <- !is.null(fixed)
 		cr <- !is.null(conrows)
@@ -30,7 +30,10 @@
 		if(is.nan(logLik(object))) stop("Initial model infeasible, log likelihood is NaN; please provide better starting values. ")
 		
 		if(method=="EM") {
-			object <- em(object,tol=tol,crit=crit,verbose=verbose,...)
+			if(is.null(em.control$tol)) em.control$tol <- 1e-8
+			if(is.null(em.control$crit)) em.control$crit <- "relative"
+			if(is.null(em.control$random.start)) em.control$random.start <- FALSE
+			object <- em(object,tol=em.control$tol,crit=em.control$crit,random.start=em.control$random.start,verbose=verbose,...)
 		}
 		
 		if(method=="donlp"||method=="rsolnp") {

Modified: pkg/depmixS4/man/depmix.fit.Rd
===================================================================
--- pkg/depmixS4/man/depmix.fit.Rd	2010-03-09 16:21:10 UTC (rev 404)
+++ pkg/depmixS4/man/depmix.fit.Rd	2010-03-09 19:12:49 UTC (rev 405)
@@ -28,14 +28,14 @@
 \usage{
 	
 	\S4method{fit}{depmix}(object, fixed=NULL, equal=NULL, conrows=NULL,
-		conrows.upper=0, conrows.lower=0, method=NULL,tol=1e-8,
-		crit=c("relative","absolute"),verbose=TRUE,...)
+		conrows.upper=0, conrows.lower=0, method=NULL,em.control=list(tol=1e-8,
+		crit=c("relative","absolute"),random.start=FALSE),verbose=TRUE,...)
 	
 	\S4method{summary}{depmix.fitted}(object,which="all")
 
 	\S4method{fit}{mix}(object, fixed=NULL, equal=NULL, conrows=NULL,
-		conrows.upper=0, conrows.lower=0, method=NULL,tol=1e-8,
-		crit=c("relative","absolute"),verbose=TRUE,...)
+		conrows.upper=0, conrows.lower=0, method=NULL,em.control=list(tol=1e-8,
+		crit=c("relative","absolute"),random.start=FALSE),verbose=FALSE,...)
 	
 	\S4method{summary}{mix.fitted}(object,which="all")
 
@@ -59,11 +59,8 @@
 	\item{method}{The optimization method; mostly determined by
 		constraints.}
 		
-	\item{tol}{The tolerance level for convergence.}
+	\item{em.control}{Named list with control parameters for the EM algorithm (see details).}
   
-	\item{crit}{The convergence criterion in the EM algorithm; either the relative
-  		change in the log likelihood, or the absolute change in the log-likelihood.}
-  
 	\item{verbose}{Should optimization information be displayed on screen?}
 	
 	\item{which}{Should summaries be provided for "all" submodels? Options 
@@ -76,7 +73,9 @@
 \details{ 
 
 	Models are fitted by the EM algorithm if there are no constraints on
-	the parameters.  Otherwise the general optimizers \code{solnp}, the
+	the parameters. Aspects of the EM algorithm can be controlled through
+	the \code{control.em} argument (see below). 
+	Otherwise the general optimizers \code{solnp}, the
 	default (from package \code{Rsolnp}) or \code{donlp2} (from package
 	\code{Rdonlp2}) are used which handle general linear (in-)equality
 	constraints.
@@ -118,6 +117,28 @@
 	
 }
 
+\section{Control parameters for the EM algorithm}{
+  Aspects of EM algorithm can be controlled by the \code{em.control} argument.
+  This named list currently observes the following parameters:
+  \describe{
+  \item{\code{tol}:}{sets the the tolerance level for convergence.}
+  
+  \item{\code{criterion}:}{sets the convergence criterion to 	either the relative 
+  change in the log-likelihood or the absolute change in the log-likelihood.
+  The relative likelihood criterion (the default) assumes convergence on iteration 
+  \eqn{i}{i} when \eqn{\frac{\log L(i) - \log L(i-1)}{\log L(i-1)} < \text{tol}}{%
+  (logLik(i) - logLik(i-1))/logLik(i-1) < tol}. The absolute likelihood
+  criterion assumes convergence on iteration i when 
+  \eqn{\log L(i) - \log L(i-1) < tol}{(logLik(i) - logLik(i-1)) < tol}.}
+  
+  \item{\code{random.start}:}{is used for a (limited) random initialization of the parameters. In 
+  particular, if \code{random.start=TRUE}, the (posterior) state probabilities are randomized
+  at iteration 0 (using a uniform distribution). Random initialization is useful when no
+  initial parameters can be given to distinguish between the states. It is also useful for
+  repeated estimation from different starting values.}
+  }
+}
+
 \value{
 	
 	\code{fit} returns an object of class