[Depmix-commits] r531 - in pkg/depmixS4: . R man

Thu Jun 21 16:12:50 CEST 2012

Author: maarten
Date: 2012-06-21 16:12:50 +0200 (Thu, 21 Jun 2012)
New Revision: 531

Modified:
   pkg/depmixS4/NEWS
   pkg/depmixS4/R/EM.R
   pkg/depmixS4/R/em.control.R
   pkg/depmixS4/R/responseGLM.R
   pkg/depmixS4/man/depmix.fit.Rd
   pkg/depmixS4/man/em.control.Rd
Log:
added classification likelihood in em.control and EM for mix and depmix models

Modified: pkg/depmixS4/NEWS
===================================================================

--- pkg/depmixS4/NEWS	2012-06-21 10:28:52 UTC (rev 530)
+++ pkg/depmixS4/NEWS	2012-06-21 14:12:50 UTC (rev 531)
@@ -1,6 +1,20 @@
-Changes in depmixS4 version 1.1-2
+Changes in depmixS4 version 1.3-0
 
 Major changes
+
+  o Fit will now allow a choice between maximising the regular or 
+    classification likelihood.
+
+Minor changes
+
+  o Binomial models now treat factors in the same way as glm(); that is
+    the first level of a factor is treated as a failure, and the remaining
+    levels as successes.
+    
+
+Changes in depmixS4 version 1.2-0
+
+Major changes
   
   o Missing values for responses are now allowed. Note that missing values 
 	in covariates will cause errors. 

Modified: pkg/depmixS4/R/EM.R
===================================================================
--- pkg/depmixS4/R/EM.R	2012-06-21 10:28:52 UTC (rev 530)
+++ pkg/depmixS4/R/EM.R	2012-06-21 14:12:50 UTC (rev 531)
@@ -157,14 +157,12 @@
 }
 
 # em for hidden markov models
-em.depmix <- function(object,maxit=100,tol=1e-8,crit="relative",random.start=TRUE,verbose=FALSE, classification=c("soft","hard"),...) {
+em.depmix <- function(object,maxit=100,tol=1e-8,crit="relative",random.start=TRUE,verbose=FALSE,classification=c("soft","hard"),...) {
 	
 	if(!is(object,"depmix")) stop("object is not of class 'depmix'")
 	
 	clsf <- match.arg(classification)
 	
-	clsf="soft"
-	
 	ns <- nstates(object)
 	
 	ntimes <- ntimes(object)
@@ -214,6 +212,12 @@
 		# maximization
 				
 		# should become object at prior <- fit(object at prior, gamma)
+		
+		if(clsf == "hard") {
+		    vstate <- as.factor(viterbi(object)[,1])
+		    gamma <- as.matrix(model.matrix(~ vstate - 1))
+		}
+	
 		object at prior@y <- fbo$gamma[bt,,drop=FALSE]
 		object at prior <- fit(object at prior, w=NULL, ntimes=NULL)
 		object at init <- dens(object at prior)
@@ -271,10 +275,17 @@
 	class(object) <- "depmix.fitted"
 	
 	if(converge) {
-		object at message <- switch(crit,
-			relative = "Log likelihood converged to within tol. (relative change)",
-			absolute = "Log likelihood converged to within tol. (absolute change)"
-		)
+	    if(clsf == "hard") {
+		    object at message <- switch(crit,
+			    relative = "Log classification likelihood converged to within tol. (relative change)",
+			    absolute = "Log classification likelihood converged to within tol. (absolute change)"
+		    )	    
+	    } else {
+		    object at message <- switch(crit,
+			    relative = "Log likelihood converged to within tol. (relative change)",
+			    absolute = "Log likelihood converged to within tol. (absolute change)"
+		    )
+		}
 	} else object at message <- "'maxit' iterations reached in EM without convergence."
 	
 	# no constraints in EM, except for the standard constraints ...

Modified: pkg/depmixS4/R/em.control.R
===================================================================
--- pkg/depmixS4/R/em.control.R	2012-06-21 10:28:52 UTC (rev 530)
+++ pkg/depmixS4/R/em.control.R	2012-06-21 14:12:50 UTC (rev 531)
@@ -1,4 +1,4 @@
 em.control <- 
-function(maxit=500,tol=1e-8,crit="relative",random.start=TRUE) {
-	return(list(maxit=maxit,tol=tol,crit=crit,random.start=random.start))
-}
+function(maxit=500,tol=1e-8,crit="relative",random.start=TRUE,classification=c("soft","hard")) {
+    classification <- match.arg(classification)
+	return(list(maxit=maxit,tol=tol,crit=crit,random.start=random.start,classification=classification))}

Modified: pkg/depmixS4/R/responseGLM.R
===================================================================
--- pkg/depmixS4/R/responseGLM.R	2012-06-21 10:28:52 UTC (rev 530)
+++ pkg/depmixS4/R/responseGLM.R	2012-06-21 14:12:50 UTC (rev 531)
@@ -43,7 +43,7 @@
 			# FIX ME
 			y <- model.response(mf)
 			if(NCOL(y) == 1) {
-				if(is.factor(y)) y <- as.matrix(as.numeric(as.numeric(y)==1)) else {
+				if(is.factor(y)) y <- as.matrix(as.numeric(as.numeric(y)!=1)) else { # 21/06/12 changed this from "==" to "!=" in line with glm
 					if(!is.numeric(y)) stop("model response not valid for binomial model")
 					if(sum(y %in% c(0,1)) != length(y)) stop("model response not valid for binomial model")
 					y <- as.matrix(y)

Modified: pkg/depmixS4/man/depmix.fit.Rd
===================================================================
--- pkg/depmixS4/man/depmix.fit.Rd	2012-06-21 10:28:52 UTC (rev 530)
+++ pkg/depmixS4/man/depmix.fit.Rd	2012-06-21 14:12:50 UTC (rev 531)
@@ -251,6 +251,14 @@
 # check the likelihood ratio; adding age significantly improves the goodness-of-fit
 llratio(fmod5,fmod4)
 
+# instead of the normal likelihood, we can also maximise the "classification" likelihood
+# this uses the maximum a posteriori state sequence to assign observations to states
+# and to compute initial and transition probabilities. 
+
+fmod1b <- fit(mod1,emcontrol=em.control(classification="hard"))
+fmod1b # to see the logLik and optimization information
+
+
 }
 
 \author{Ingmar Visser & Maarten Speekenbrink}

Modified: pkg/depmixS4/man/em.control.Rd
===================================================================
--- pkg/depmixS4/man/em.control.Rd	2012-06-21 10:28:52 UTC (rev 530)
+++ pkg/depmixS4/man/em.control.Rd	2012-06-21 14:12:50 UTC (rev 531)
@@ -8,7 +8,7 @@
 
 \usage{
 	
-	em.control(maxit = 500, tol = 1e-08, crit = "relative", random.start = TRUE)
+	em.control(maxit = 500, tol = 1e-08, crit = "relative", random.start = TRUE, classification = c("soft","hard"))
 	
 }
 
@@ -24,6 +24,9 @@
 	\item{random.start}{This is used for a (limited) random
 	initialization of the parameters. See Details.}
 	
+	\item{classification}{Type of classification to states
+	used. See Details.}
+	
 }
 
 \details{
@@ -52,6 +55,14 @@
 given to distinguish between the states.  It is also useful for repeated
 estimation from different starting values.
 
+Argument \code{classification} is used to choose either soft (default) or
+hard classification to states. When using soft classification, observations
+are assigned to states with a weight equal to the posterior probability of
+the state. When using hard classification, observations are assigned to states
+according to the maximum a posteriori (MAP) states (i.e., each observation
+is assigned to one state, which is determined by the Viterbi algorithm in the
+case of \code{depmix} models).
+
 }
 
 \references{