[Depmix-commits] r584 - in pkg/depmixS4: . R man

Wed Sep 4 11:21:17 CEST 2013

Author: maarten
Date: 2013-09-04 11:21:17 +0200 (Wed, 04 Sep 2013)
New Revision: 584

Modified:
   pkg/depmixS4/NEWS
   pkg/depmixS4/R/EM.R
   pkg/depmixS4/man/em.control.Rd
Log:
- To increase speed with large objects, in em.mix and em.depmix, we now make local copies of slots, update these, and finally assign them back to the object, rather than repeatedly changing the slots in the object itself. 

Modified: pkg/depmixS4/NEWS
===================================================================

--- pkg/depmixS4/NEWS	2013-09-03 21:10:22 UTC (rev 583)
+++ pkg/depmixS4/NEWS	2013-09-04 09:21:17 UTC (rev 584)
@@ -25,16 +25,21 @@
     the first level of a factor is treated as a failure, and the remaining
     levels as successes.
 
-	o Un-fitted models now also have a summary method, which is identical to 
+  o Un-fitted models now also have a summary method, which is identical to 
     the show method for these models.  
 
   o Added function getmodel(object) to select submodels of a full mix or 
-	  depmix model, eg to use in deriving predictions, getting at parameter 
-		values, et cetera. 
+    depmix model, eg to use in deriving predictions, getting at parameter 
+    values, et cetera. 
 
-	o Small parameter values in multinomial response models and the initial
-		probabilities and transition models (with identity link) are now set to
-		zero to speed-up convergence.  Current threshold is 1e-6. 
+  o Small parameter values in multinomial response models and the initial
+    probabilities and transition models (with identity link) are now set to
+    zero to speed-up convergence.  Current threshold is 1e-6.
+  
+  o Updated the EM routines to make local copies of objects slots to 
+    avoid repeatedly copying large objects. Thanks to Robert McGehee for 
+    this suggestion.
+    
 
 Changes in depmixS4 version 1.2-2
 

Modified: pkg/depmixS4/R/EM.R
===================================================================
--- pkg/depmixS4/R/EM.R	2013-09-03 21:10:22 UTC (rev 583)
+++ pkg/depmixS4/R/EM.R	2013-09-04 09:21:17 UTC (rev 584)
@@ -1,6 +1,7 @@
-# 
-# Maarten Speekenbrink 23-3-2008
-# 
+# Author: Maarten Speekenbrink
+# With suggestions from Robert McGehee for speeding up the code for large objects
+# First version: 23-3-2008
+# latest change:
 
 rdirichlet <- function(n, alpha) {
   # taken from gtools
@@ -23,6 +24,66 @@
     out
 }
 
+emviterbi <- function(A,B,init,ntimes,nstates,stationary,na.allow=TRUE) {
+    # used for EM with hard classification, so that we don't need to change the object...
+    # returns the most likely state sequence
+    nt <- sum(ntimes)
+    lt <- length(ntimes)
+    et <- cumsum(ntimes)
+    bt <- c(1,et[-lt]+1)
+    
+    ns <- nstates
+    
+    delta <- psi <- matrix(nrow=nt,ncol=ns)
+    state <- vector(length=nt)
+    
+    prior <- init
+    
+    #if(max(ntimes>1)) A <- object at trDens
+    #B <- object at dens
+    if(na.allow) B <- replace(B,is.na(B),1)
+    B <- apply(B,c(1,3),prod)
+    
+    for(case in 1:lt) {
+      # initialization
+      delta[bt[case],] <- prior[case,]*B[bt[case],]
+      delta[bt[case],] <- delta[bt[case],]/(sum(delta[bt[case],]))
+      psi[bt[case],] <- 0
+      # recursion
+      if(ntimes[case]>1) {
+        for(tt in ((bt[case]+1):et[case])) {
+          for(j in 1:ns) {
+            if(!stationary) {
+              delta[tt,j] <- max(delta[tt-1,]*(A[tt,j,]))*B[tt,j]
+              k <- which.max(delta[tt-1,]*A[tt,j,])
+            } else {
+              delta[tt,j] <- max(delta[tt-1,]*(A[1,j,]))*B[tt,j]
+              k <- which.max(delta[tt-1,]*A[1,j,])
+            }
+            if(length(k) == 0) k <- 0 # what's this doing here??? can this ever occur? FIX ME
+            psi[tt,j] <- k
+          }
+          delta[tt,] <- delta[tt,]/(sum(delta[tt,]))
+          
+        }
+      }
+      
+      # trace maximum likely state
+      state[et[case]] <- which.max(delta[et[case],])
+      
+      # this doesn't need a for loop does it???? FIX ME
+      if(ntimes[case]>1) {
+        for(i in (et[case]-1):bt[case]) {
+          state[i] <- psi[i+1,state[i+1]]
+        }
+      }
+    }
+    
+    delta <- data.frame(state,delta) 	
+    return(delta)
+}
+
+
 em <- function(object,...) {
 	if(!is(object,"mix")) stop("object is not of class '(dep)mix'")
 	call <- match.call()
@@ -36,9 +97,10 @@
 }
 
 # em for lca and mixture models
-em.mix <- function(object,maxit=100,tol=1e-8,crit="relative",random.start=TRUE,verbose=FALSE,classification=c("soft","hard"),na.allow=TRUE,...) {
+em.mix <- function(object,maxit=100,tol=1e-8,crit=c("relative","absolute"),random.start=TRUE,verbose=FALSE,classification=c("soft","hard"),na.allow=TRUE,...) {
 	
 	clsf <- match.arg(classification)
+	crit <- match.arg(crit)
 	
 	if(!is(object,"mix")) stop("object is not of class 'mix'")
 		
@@ -47,15 +109,17 @@
 	lt <- length(ntimes)
 	et <- cumsum(ntimes)
 	bt <- c(1,et[-lt]+1)
-	
+  
+	prior <- object at prior
+	response <- object at response
+	dens <- object at dens
+	init <- dens(object at prior)
+
 	converge <- FALSE
-	j <- 0
 	
 	if(random.start) {
-				
 		nr <- sum(ntimes(object))
 		gamma <- rdirichlet(nr,alpha=rep(.01,ns))
-		
 		if(clsf == "hard") {
 		    gamma <- t(apply(gamma,1,ind.max))
 		}
@@ -63,21 +127,20 @@
 		
 		for(i in 1:ns) {
 			for(k in 1:nresp(object)) {
-			    object at response[[i]][[k]] <- fit(object at response[[i]][[k]],w=gamma[,i])
+			  response[[i]][[k]] <- fit(response[[i]][[k]],w=gamma[,i])
 				# update dens slot of the model
-				object at dens[,k,i] <- dens(object at response[[i]][[k]])
+				dens[,k,i] <- dens(response[[i]][[k]])
 			}
 		}
-		
 		# initial expectation
 		if(clsf == "hard") {
 		  fbo <- list()
 		  vstate <- apply(gamma,1,which.max)
-		  B <- object at dens
+		  B <- dens
 		  if(na.allow) B[is.na(B)] <- 1
 		  fbo$logLike <- sum(log((apply(B,c(1,3),prod))[cbind(1:sum(ntimes),vstate)]))
 		} else {
-		  fbo <- fb(init=object at init,matrix(0,1,1),B=object at dens,ntimes=ntimes(object))
+		  fbo <- fb(init=init,matrix(0,1,1),B=dens,ntimes=ntimes(object))
 		}
 		LL <- fbo$logLike
 		
@@ -85,78 +148,45 @@
 		
 	} else {
 		# initial expectation
-		# treat missing values: FIX ME WITH NA.ALLOW OPTION OR SO. 
-		# WHY NOT USE fb TO COMPUTE LL AND GAMMA?
-		fbo <- fb(init=object at init,A=matrix(0,1,1),B=object at dens,ntimes=ntimes(object))
-				
-		#LL <- fbo$logLike
-		#if(is.nan(LL)) stop("Starting values not feasible; please provide them.")
-		
-		
-		#B <- object at dens
-		#B <- replace(B,is.na(B) & !is.nan(B),1)
-		#B <- apply(object at dens,c(1,3),prod)
-		
+		fbo <- fb(init=init,A=matrix(0,1,1),B=dens,ntimes=ntimes(object))
 		if(clsf == "hard") {
-		  #gamma <- t(apply(object at init*B,1,ind.max))
-          #LL <- sum(log(rowSums(gamma*B)))
-          fbo$gamma <- t(apply(fbo$gamma,1,ind.max))
+      fbo$gamma <- t(apply(fbo$gamma,1,ind.max))
 		  vstate <- apply(fbo$gamma,1,which.max)
 		  B <- object at dens
 		  if(na.allow) B[is.na(B)] <- 1
 		  fbo$logLike <- sum(log((apply(B,c(1,3),prod))[cbind(1:sum(ntimes),vstate)]))
           
-          #LL <- sum(log(rowSums(gamma*B)))
-		} #else {
-		  #gamma <- object at init*B
-		  #LL <- sum(log(rowSums(gamma)))
-		  #gamma <- gamma/rowSums(gamma)
-		#}
+		}
 		LL <- fbo$logLike
 		if(is.nan(LL)) stop("Starting values not feasible; please provide them.")
-		
 	}
 	
 	LL.old <- LL + 1
 	
-	while(j <= maxit & !converge) {
+	for(j in 0:maxit) {
 		
-		# maximization
+		# maximization		
+		prior at y <- fbo$gamma[bt,,drop=FALSE]
+		prior <- fit(prior, w=NULL,ntimes=NULL)
+		init <- dens(prior)
 		
-		# should become object at prior <- fit(object at prior)
-		
-		object at prior@y <- fbo$gamma[bt,,drop=FALSE]
-		object at prior <- fit(object at prior, w=NULL,ntimes=NULL)
-		object at init <- dens(object at prior)
-		
 		for(i in 1:ns) {
 			for(k in 1:nresp(object)) {
-				object at response[[i]][[k]] <- fit(object at response[[i]][[k]],w=fbo$gamma[,i])
+				response[[i]][[k]] <- fit(response[[i]][[k]],w=fbo$gamma[,i])
 				# update dens slot of the model
-				object at dens[,k,i] <- dens(object at response[[i]][[k]])
+				dens[,k,i] <- dens(response[[i]][[k]])
 			}
 		}
 		
 		# expectation
-		# treat missing values: FIX ME WITH NA.ALLOW OPTION OR SO. 
-		# WHY NOT USE fb TO COMPUTE LL AND GAMMA?
-		#B <- object at dens
-		#B <- replace(B,is.na(B) & !is.nan(B),1)
-		#B <- apply(object at dens,c(1,3),prod)
-		
-		fbo <- fb(init=object at init,A=matrix(0,1,1),B=object at dens,ntimes=ntimes(object))
+		fbo <- fb(init=init,A=matrix(0,1,1),B=dens,ntimes=ntimes(object))
 		if(clsf == "hard") {
 		  fbo$gamma <- t(apply(fbo$gamma,1,ind.max))
 		  vstate <- apply(fbo$gamma,1,which.max)
-		  B <- object at dens
+		  B <- dens
 		  if(na.allow) B[is.na(B)] <- 1
 		  fbo$logLike <- sum(log((apply(B,c(1,3),prod))[cbind(1:sum(ntimes),vstate)]))
-		  #LL <- sum(log(rowSums(gamma*B)))
-		} #else {
-		#	fbo <- fb(init=object at init,matrix(0,1,1),B=object at dens,ntimes=ntimes(object))
-		#	gamma <- fbo$gamma
-		#  LL <- fbo$logLike
-		#}
+		}
 		LL <- fbo$logLike
 		# print stuff
 		if(verbose&((j%%5)==0)) {
@@ -164,19 +194,23 @@
 		}
 		
 		if(LL >= LL.old) {
-		  if((crit == "absolute" &&  LL - LL.old < tol) || (crit == "relative" && (LL - LL.old)/abs(LL.old)  < tol)) {
-			  cat("iteration",j,"logLik:",LL,"\n")
-			  converge <- TRUE
+		  converge <- (crit == "absolute" &&  LL - LL.old < tol) || (crit == "relative" && (LL - LL.old)/abs(LL.old)  < tol)
+      if(converge) {
+			  cat("converged at iteration",j,"with logLik:",LL,"\n")
+			  break
 			}
 		} else {
 			# this should not really happen...
-			if(j > 0 && (LL.old - LL) > tol) warning("likelihood decreased on iteration ",j)
+			if(j > 0 && (LL.old - LL) > tol) stop("likelihood decreased on iteration ",j)
 		}
 
 		LL.old <- LL
-		j <- j+1
-
 	}
+  
+	object at prior <- prior
+	object at init <- init
+	object at response <- response
+	object at dens <- dens
 
 	if(clsf == "hard") {
 	    object <- as(object,"mix.fitted.classLik") # class(object) <- "mix.fitted.classLik"
@@ -212,27 +246,30 @@
 }
 
 # em for hidden markov models
-em.depmix <- function(object,maxit=100,tol=1e-8,crit="relative",random.start=TRUE,verbose=FALSE,classification=c("soft","hard"),na.allow=TRUE,...) {
+em.depmix <- function(object,maxit=100,tol=1e-8,crit=c("relative","absolute"),random.start=TRUE,verbose=FALSE,classification=c("soft","hard"),na.allow=TRUE,...) {
 	
 	if(!is(object,"depmix")) stop("object is not of class 'depmix'")
 	
 	clsf <- match.arg(classification)
-	
+	crit <- match.arg(crit)
+  
 	ns <- nstates(object)
 	
 	ntimes <- ntimes(object)
 	lt <- length(ntimes)
 	et <- cumsum(ntimes)
 	bt <- c(1,et[-lt]+1)
+  
+	prior <- object at prior
+	transition <- object at transition
+	trDens <- object at trDens
+	response <- object at response
+	dens <- object at dens
+	init <- dens(object at prior)
 	
-	converge <- FALSE
-	j <- 0
-	
 	if(random.start) {
 				
 		nr <- sum(ntimes(object))
-		#gamma <- matrix(runif(nr*ns,min=.0001,max=.9999),nrow=nr,ncol=ns)
-		#gamma <- gamma/rowSums(gamma)
 		gamma <- rdirichlet(nr,alpha=rep(.01,ns))
 		if(clsf == "hard") {
 		    gamma <- t(apply(gamma,1,ind.max))
@@ -241,118 +278,108 @@
 		
 		for(i in 1:ns) {
 			for(k in 1:nresp(object)) {
-				object at response[[i]][[k]] <- fit(object at response[[i]][[k]],w=gamma[,i])
+				response[[i]][[k]] <- fit(response[[i]][[k]],w=gamma[,i])
 				# update dens slot of the model
-				object at dens[,k,i] <- dens(object at response[[i]][[k]])
+				dens[,k,i] <- dens(response[[i]][[k]])
 			}
 		}
-		
-		# initial expectation
-	  if(clsf == "hard") {
-	      fbo <- list()
-		  vstate <- viterbi(object)[,1]
-		  fbo$gamma <- as.matrix(model.matrix(~ factor(vstate,levels=1:ns) - 1))
-		  fbo$xi <- array(0,dim=c(sum(ntimes),ns,ns))
-		  fbo$xi[cbind(1:(sum(ntimes)- 1),vstate[-1],vstate[-length(vstate)])] <- 1
-		  B <- object at dens
-		  if(na.allow) B[is.na(B)] <- 1
-		  fbo$logLike <- sum(log((apply(B,c(1,3),prod))[cbind(1:sum(ntimes),vstate)]))
-		} else {
-		  fbo <- fb(init=object at init,A=object at trDens,B=object at dens,ntimes=ntimes(object),stationary=object at stationary)
-		}
-		LL <- fbo$logLike
-		
-		if(is.nan(LL)) stop("Cannot find suitable starting values; please provide them.")
-		
+	}
+
+	# initial expectation
+  if(clsf == "hard") {
+    fbo <- list()
+	  vstate <- emviterbi(A=trDens,B=dens,init=init,ntimes=object at ntimes,nstates=ns,stationary=object at stationary,na.allow=na.allow)[,1]
+	  fbo$gamma <- as.matrix(model.matrix(~ factor(vstate,levels=1:ns) - 1))
+	  fbo$xi <- array(0,dim=c(sum(ntimes),ns,ns))
+	  fbo$xi[cbind(1:(sum(ntimes)- 1),vstate[-1],vstate[-length(vstate)])] <- 1
+	  B <- dens
+	  if(na.allow) B[is.na(B)] <- 1
+	  fbo$logLike <- sum(log((apply(B,c(1,3),prod))[cbind(1:sum(ntimes),vstate)]))
 	} else {
-		# initial expectation
-	  if(clsf == "hard") {
-	    fbo <- list()
-		  vstate <- viterbi(object)[,1]
-		  fbo$gamma <- as.matrix(model.matrix(~ factor(vstate,levels=1:ns) - 1))
-		  fbo$xi <- array(0,dim=c(sum(ntimes),ns,ns))
-		  fbo$xi[cbind(1:(sum(ntimes)- 1),vstate[-1],vstate[-length(vstate)])] <- 1
-		  B <- object at dens
-		  if(na.allow) B[is.na(B)] <- 1
-		  fbo$logLike <- sum(log((apply(B,c(1,3),prod))[cbind(1:sum(ntimes),vstate)]))
-		} else {
-		  fbo <- fb(init=object at init,A=object at trDens,B=object at dens,ntimes=ntimes(object),stationary=object at stationary)
-	  }
-		LL <- fbo$logLike
-		if(is.nan(LL)) stop("Starting values not feasible; please provide them.")
-	}
+	  fbo <- fb(init=init,A=trDens,B=dens,ntimes=ntimes(object),stationary=object at stationary)
+  }
+	LL <- fbo$logLike
+	if(is.nan(LL)) stop("Starting values not feasible; please provide them.")
+	LL.old <- LL + 1 # force the "old" likelihood to be larger...
 	
-	LL.old <- LL + 1
-	
-	while(j <= maxit & !converge) {
+	converge <- FALSE
+	for(j in 0:maxit) {
 		
 		# maximization
+		prior at y <- fbo$gamma[bt,,drop=FALSE]
+		prior <- fit(prior, w=NULL, ntimes=NULL)
+		init <- dens(prior)
 				
-		# should become object at prior <- fit(object at prior, gamma)
-			
-		object at prior@y <- fbo$gamma[bt,,drop=FALSE]
-		object at prior <- fit(object at prior, w=NULL, ntimes=NULL)
-		object at init <- dens(object at prior)
-				
 		trm <- matrix(0,ns,ns)
 		for(i in 1:ns) {
 			if(!object at stationary) {
-				object at transition[[i]]@y <- fbo$xi[,,i]/fbo$gamma[,i]
-				object at transition[[i]] <- fit(object at transition[[i]],w=as.matrix(fbo$gamma[,i]),ntimes=ntimes(object)) # check this
+				transition[[i]]@y <- fbo$xi[,,i]/fbo$gamma[,i]
+				transition[[i]] <- fit(transition[[i]],w=as.matrix(fbo$gamma[,i]),ntimes=ntimes(object)) # check this
 			} else {
 				for(k in 1:ns) {
 					trm[i,k] <- sum(fbo$xi[-c(et),k,i])/sum(fbo$gamma[-c(et),i])
 				}
 				# FIX THIS; it will only work with specific trinModels
 				# should become object at transition = fit(object at transition, xi, gamma)
-				object at transition[[i]]@parameters$coefficients <- switch(object at transition[[i]]@family$link,
-					identity = object at transition[[i]]@family$linkfun(trm[i,]),
-					mlogit = object at transition[[i]]@family$linkfun(trm[i,],base=object at transition[[i]]@family$base),
-					object at transition[[i]]@family$linkfun(trm[i,])
+				transition[[i]]@parameters$coefficients <- switch(transition[[i]]@family$link,
+					identity = transition[[i]]@family$linkfun(trm[i,]),
+					mlogit = transition[[i]]@family$linkfun(trm[i,],base=transition[[i]]@family$base),
+					transition[[i]]@family$linkfun(trm[i,])
 				)
 			}
 			# update trDens slot of the model
-			object at trDens[,,i] <- dens(object at transition[[i]])
+			trDens[,,i] <- dens(transition[[i]])
 		}
 		
 		for(i in 1:ns) {
 			for(k in 1:nresp(object)) {
-				object at response[[i]][[k]] <- fit(object at response[[i]][[k]],w=fbo$gamma[,i])
+				response[[i]][[k]] <- fit(response[[i]][[k]],w=fbo$gamma[,i])
 				# update dens slot of the model
-				object at dens[,k,i] <- dens(object at response[[i]][[k]])
+				dens[,k,i] <- dens(response[[i]][[k]])
 			}
 		}
 		
 		if(clsf == "hard") {
-		  vstate <- viterbi(object)[,1]
+      fbo <- list()
+      vstate <- emviterbi(A=trDens,B=dens,init=init,ntimes=object at ntimes,nstates=ns,stationary=object at stationary,na.allow=na.allow)[,1]
+		  #vstate <- viterbi(object)[,1]
 		  fbo$gamma <- as.matrix(model.matrix(~ factor(vstate,levels=1:ns) - 1))
 		  fbo$xi <- array(0,dim=c(sum(ntimes),ns,ns))
 		  fbo$xi[cbind(1:(sum(ntimes)- 1),vstate[-1],vstate[-length(vstate)])] <- 1
-		  B <- object at dens
+		  B <- dens
 		  if(na.allow) B[is.na(B)] <- 1
 		  fbo$logLike <- sum(log((apply(B,c(1,3),prod))[cbind(1:sum(ntimes),vstate)]))
 		} else {
 		  # expectation
-		  fbo <- fb(init=object at init,A=object at trDens,B=object at dens,ntimes=ntimes(object),stationary=object at stationary)	  
+		  fbo <- fb(init=init,A=trDens,B=dens,ntimes=ntimes(object),stationary=object at stationary)	  
 	  }
 	  
 	  LL <- fbo$logLike	
-		if(verbose&((j%%5)==0)) cat("iteration",j,"logLik:",LL,"\n")
 		
+
 		if( (LL >= LL.old)) {
-		  if((crit == "absolute" &&  LL - LL.old < tol) || (crit == "relative" && (LL - LL.old)/abs(LL.old)  < tol)) {
-			  cat("iteration",j,"logLik:",LL,"\n")
-			  converge <- TRUE
+		  converge <- (crit == "absolute" &&  LL - LL.old < tol) || (crit == "relative" && (LL - LL.old)/abs(LL.old)  < tol) 
+      if(converge) {
+			  cat("converged at iteration",j,"with logLik:",LL,"\n")
+        break
 			}
 		} else {
 		  # this should not really happen...
-			if(j > 0 && (LL.old - LL) > tol) warning("likelihood decreased on iteration ",j)
+		  if(j > 0 && (LL.old - LL) > tol) stop("likelihood decreased on iteration ",j)
 		}
 		
 		LL.old <- LL
-		j <- j+1
+		#j <- j+1
 		
+		if(verbose&((j%%5)==0)) cat("iteration",j,"logLik:",LL,"\n")
 	}
+                      
+  object at prior <- prior
+  object at init <- init
+  object at transition <- transition
+  object at trDens <- trDens
+  object at response <- response
+  object at dens <- dens
 		
 	if(clsf == "hard") {
 	    object <- as(object,"depmix.fitted.classLik") # class(object) <- "depmix.fitted.classLik"

Modified: pkg/depmixS4/man/em.control.Rd
===================================================================
--- pkg/depmixS4/man/em.control.Rd	2013-09-03 21:10:22 UTC (rev 583)
+++ pkg/depmixS4/man/em.control.Rd	2013-09-04 09:21:17 UTC (rev 584)
@@ -61,7 +61,8 @@
 the state. When using hard classification, observations are assigned to states
 according to the maximum a posteriori (MAP) states (i.e., each observation
 is assigned to one state, which is determined by the Viterbi algorithm in the
-case of \code{depmix} models).
+case of \code{depmix} models). Warning: hard classification is mainly 
+implemented for testing purposes and its use is currently not advised.
 
 }