[Depmix-commits] r71 - trunk/R

Fri Mar 7 15:40:09 CET 2008

Author: ingmarvisser
Date: 2008-03-07 15:40:09 +0100 (Fri, 07 Mar 2008)
New Revision: 71

Modified:
   trunk/R/EM.R
   trunk/R/depmix.R
   trunk/R/depmix.fitted.R
   trunk/R/fb.R
   trunk/R/responses.R
Log:
Moved generics into allGenerics.R and cleaned up comments etc

Modified: trunk/R/EM.R
===================================================================

--- trunk/R/EM.R	2008-03-07 14:39:32 UTC (rev 70)
+++ trunk/R/EM.R	2008-03-07 14:40:09 UTC (rev 71)
@@ -2,7 +2,6 @@
 	
 	if(!is(object,"depmix")) stop("object is not of class 'depmix'")
 	
-	# pseudocode
 	ns <- object at nstates
 	
 	ntimes <- ntimes(object)
@@ -23,7 +22,6 @@
 		}
 		
 		B <- apply(object at dens,c(1,3),prod)
-		# TODO: add functionality for inModel
 		init <- object at init
 		LL.old <- LL
 		j <- j+1
@@ -33,30 +31,16 @@
 		LL <- fbo$logLike
 		
 		# maximization
-		
-		# Here we need an average of gamma[bt[case],], which may need to be weighted ?? (see Rabiner, p283)
-		# this is without weighting
-		initprobs <- apply(fbo$gamma[bt,],2,mean)
-		
+				
 		# should become object at prior <- fit(object at prior)
 		object at prior@y <- fbo$gamma[bt,]
 		object at prior <- fit(object at prior, w=NULL,ntimes=NULL)
 		object at init <- dens(object at prior)
-		# init needs to be recomputed here?
-		
-		#object at initModel <- setpars(object at initModel,values=object at initModel@family$linkfun(initprobs,base=object at initModel@family$base))
-		
-		# This should become:
-		# lt <- length(object at ntimes)
-		# et <- cumsum(object at ntimes)
-		# bt <- c(1,et[-lt]+1)
-		# object at initModel@y <- fbo$gamma[bt,]
-		# object at initModel <- fit(object at initModel)
 				
 		trm <- matrix(0,ns,ns)
 		for(i in 1:ns) {
 			
-			if(max(ntimes(object)>1)) { #skip transition parameters update in case of latent class model
+			if(max(ntimes(object)>1)) { # skip transition parameters update in case of latent class model
 				if(!object at stationary) {
 					object at transition[[i]]@y <- fbo$xi[,,i]/fbo$gamma[,i]
 					object at transition[[i]] <- fit(object at transition[[i]],w=as.matrix(fbo$gamma[,i]),ntimes=ntimes(object)) # check this
@@ -68,13 +52,13 @@
 					object at transition[[i]]@parameters$coefficients <- object at transition[[i]]@family$linkfun(trm[i,],base=object at transition[[i]]@family$base)
 				}
 				
-				# update trans slot of the model
+				# update trDens slot of the model
 				object at trDens[,,i] <- dens(object at transition[[i]])
 			}
 			
 			for(k in 1:nresp(object)) {
 				object at response[[i]][[k]] <- fit(object at response[[i]][[k]],w=fbo$gamma[,i])
-				# update logdens slot of the model
+				# update dens slot of the model
 				object at dens[,k,i] <- dens(object at response[[i]][[k]])
 			}
 		}
@@ -89,9 +73,7 @@
 	else object at message <- "'maxit' iterations reached in EM without convergence."
 	
 	# no constraints in EM
-	# NULL values not allowed in slot conMat!!!
 	object at conMat <- matrix()
-	#object at conMat <- NULL
 	
 	# what do we want in slot posterior?
 	object at posterior <- fbo$gamma

Modified: trunk/R/depmix.R
===================================================================
--- trunk/R/depmix.R	2008-03-07 14:39:32 UTC (rev 70)
+++ trunk/R/depmix.R	2008-03-07 14:40:09 UTC (rev 71)
@@ -11,25 +11,12 @@
 # DEPMIX CLASS
 # 
 
-# What does the user want to know after having defined a model?
-
-# 1) the models: formulae or otherwise and the parameter values
-# 2) whether parameters are feasible: ie does logLik exist or return nan (very likely with bad parameters)
-
-# What do we need to be able to optimize the model?
-# 1) density of the responses
-# 2) density of the transitions
-# 3) density of the priors
-# 4) ntimes: the lengths of individual time series
-# 5) whether the models are stationary, ie time-dependent parameters or not
-
 setClass("depmix",
-	representation(response="list",
-		transition="list",
-		# these are the initial state probabilities or priors as they are called in eg mixture or lca models
-		prior="ANY",
-		dens="array", # B 
-		trDens="array", # A
+	representation(response="list", # response models
+		transition="list", # transition models (multinomial logistic)
+		prior="ANY", # the prior model (multinomial logistic)
+		dens="array", # response densities (B)
+		trDens="array", # transition densities (A)
 		init="array", # usually called pi 
 		stationary="logical",
 		ntimes="numeric",
@@ -43,15 +30,8 @@
 # METHODS
 # 
 
-# Which methods are needed for this class? 
-# 1) construction methods: by inputting various models for responses, transitions and priors
-# 2) accessor methods: getpars, getdf, nlin, npars, nobs (etc)
-# 3) setpars method
-# 4) gof methods: logLik, AIC, BIC (etc) 
-# 5) print method
-# 6) summary method: I am not quite sure what the differences are between print and summary methods?
+# TODO: change print and add summary method for depmix objects
 
-
 # CONSTRUCTORS
 
 # the main function constructing a depmix model with full information, ie all models already in place
@@ -288,8 +268,6 @@
 	}
 )
 
-setGeneric("nobs", function(object, ...) standardGeneric("nobs"))
-
 setMethod("nobs", signature(object="depmix"),
 	function(object, ...) {
 		sum(object at ntimes)
@@ -316,14 +294,11 @@
 	function(object) return(object at nresp)
 )
 
-
-setGeneric("freepars", function(object, ...) standardGeneric("freepars"))
-
 # depends on nlin(object) and getpars(object)
 setMethod("freepars","depmix",
 	function(object) {
 		free <- sum(!getpars(object,which="fixed"))
-# 		free <- free-nlin(object)
+# 		free <- free-nlin(object) # FIX ME!!!!
 		free
 	}
 )
@@ -332,7 +307,7 @@
 
 # depends on getpars and nobs
 setMethod("logLik",signature(object="depmix"),
-	function(object,method="lystig") { # TODO: initial dens and response densities are recomputed here, but this is also done in setpars at least for the response densities !!!!!!!!
+	function(object,method="lystig") { 
 		if(method=="fb") ll <- fb(object at init,object at trDens,apply(object at dens,c(1,3),prod),object at ntimes,object at stationary)$logLike
 		if(method=="lystig") ll <- lystig(object at init,object at trDens,apply(object at dens,c(1,3),prod),object at ntimes,object at stationary)$logLike
 		attr(ll, "df") <- freepars(object)
@@ -349,8 +324,6 @@
 	}
 )
 
-setGeneric("BIC", function(object, ...) standardGeneric("BIC"))
-
 # depends on logLik, freepars and nobs
 setMethod("BIC", signature(object="depmix"),
 	function(object, ...){

Modified: trunk/R/depmix.fitted.R
===================================================================
--- trunk/R/depmix.fitted.R	2008-03-07 14:39:32 UTC (rev 70)
+++ trunk/R/depmix.fitted.R	2008-03-07 14:40:09 UTC (rev 71)
@@ -7,13 +7,6 @@
 # DEPMIX.FITTED CLASS
 # 
 
-# this has information from fitting a depmix model
-
-# 1) whether fitting was succesful: convergence etc
-# 2) some gof information: logLik, AIC, BIC etc: these are provided through methods on the depmix object
-# 3) constraints that were fitted on the parameters
-# 4) posterior probabilities or viterbi sequence
-
 setClass("depmix.fitted",
 	representation(message="character", # convergence information
 		conMat="matrix", # constraint matrix on the parameters for general linear constraints
@@ -22,13 +15,6 @@
 	contains="depmix"
 )
 
-# Which methods are needed for this class?
-# 1) construction method: the fit function which is called on a depmix
-# class object, this is where the real work is done: done!!!
-# 2) print method: done!!!!
-# 3) summary method
-
-
 setMethod("fit",signature(object="depmix"),
 	function(object,w=NULL,fixed=NULL,equal=NULL,conrows=NULL,conrows.upper=0,conrows.lower=0,method=NULL,...) {
 		
Modified: trunk/R/fb.R
===================================================================
--- trunk/R/fb.R	2008-03-07 14:39:32 UTC (rev 70)
+++ trunk/R/fb.R	2008-03-07 14:40:09 UTC (rev 71)
@@ -48,13 +48,11 @@
 		
 		if(ntimes[case]>1) {
 			for(i in (et[case]-1):bt[case]) {
-				#beta[i,] <- (A[i,,]%*%(B[i+1,]*beta[i+1,]))*sca[i]
 				if(stationary) beta[i,] <-(B[i+1,]*beta[i+1,])%*%A[1,,]*sca[i]
 				else beta[i,] <-(B[i+1,]*beta[i+1,])%*%A[i,,]*sca[i]
 			}
 			
 			for(i in bt[case]:(et[case]-1)) {
-				#xi[i+1,,] <- (alpha[i,]%*%t(B[i+1,]*beta[i+1,]))*t(A[i,,])
 				if(stationary) xi[i,,] <- rep(alpha[i,],each=ns)*(B[i+1,]*beta[i+1,]*A[1,,])
 				else xi[i,,] <- rep(alpha[i,],each=ns)*(B[i+1,]*beta[i+1,]*A[i,,])
 			}
Modified: trunk/R/responses.R
===================================================================
--- trunk/R/responses.R	2008-03-07 14:39:32 UTC (rev 70)
+++ trunk/R/responses.R	2008-03-07 14:40:09 UTC (rev 71)
@@ -3,13 +3,6 @@
 # response, transition and prior models for DEPMIX models
 # 
 
-# set up response models in such a way that they are easily extensible
-# all it needs is:
-# 1) the actual response y
-# 2) optional covariates x
-# 3) parameters
-# 4) further information about parameters: fixed or otherwise contrained
-
 # 
 # RESPONSE CLASS
 # 
@@ -27,26 +20,12 @@
 # RESPONSE CLASS METHODS
 # 
 
-
-# What methods are neccessary for response models?
-# 1) construction method: done!
-# 2) dens function (with argument object and log=TRUE/FALSE to return density on log scale): done!
-# 3) getpars function to return the parameter, with argument which="pars"/"fixed" to determine 
-# whether to return parameter or fixed/free information: done!
-# 4) setpars function to set parameters to new values: done!
-# 5) accessor function npar: done!
-# 6) accessor function getdf (get the number of free parameters): done!
-
-setGeneric("npar",function(object) standardGeneric("npar"))
-
 setMethod("npar","response",
 	function(object) {
 		return(object at npar)
 	}
 )
 
-setGeneric("getdf",function(object) standardGeneric("getdf"))
-
 setMethod("getdf","response",
 	function(object) {
 		return(sum(!object at fixed))
@@ -62,24 +41,16 @@
 		family="ANY"
 	),
 	prototype(
-# 		parameters=list(coefficients=0,other=1),
-# 		fixed=c(FALSE,FALSE),
-# 		y = matrix(1,ncol=1),
-# 		x = matrix(1,ncol=1),
 		formula=.~.,
 		family=gaussian()
 	),
 	contains="response"
 )
 
-# Which methods are neccessary for this in addition to the general response class methods?
-# 1) predict function (needed for dens): done!
-# 2) fit function (needed for EM): done!
-# 3) print method: done!
-# 4) summary method
+# 
+# TODO: review print and summary methods
+# 
 
-setGeneric("GLMresponse", function(formula, ...) standardGeneric("GLMresponse"))
-
 setMethod("GLMresponse",
 	signature(formula="formula"),
 	function(formula,family,data,pstart=NULL,fixed=NULL,prob=TRUE) {
@@ -118,7 +89,6 @@
 			if(family$family=="multinomial") {
 				if(family$link=="identity") parameters$coefficients[1,] <- family$linkfun(pstart[1:ncol(parameters$coefficients)])
 				else {
-# 					print("ok")
 					if(prob) parameters$coefficients[1,] <- family$linkfun(pstart[1:ncol(parameters$coefficients)],base=family$base)
 					else parameters$coefficients[1,] <- pstart[1:ncol(parameters$coefficients)]
 				}
@@ -149,12 +119,8 @@
 # neccessary in the mulinomial model)
 # 
 
-# setClass("trinMultinom",contains=c("trinModel","rMultinom"))
-
 setClass("transInit",contains="GLMresponse")
 
-setGeneric("transInit", function(formula, ... ) standardGeneric("transInit"))
-
 # FIX ME: data is a necessary argument to determine the dimension of x, even when there
 # are no covariates (and there are by definition no responses ...)
 setMethod("transInit",
@@ -168,11 +134,7 @@
 		mf[[1]] <- as.name("model.frame")
 		mf <- eval(mf, parent.frame())
 		x <- model.matrix(attr(mf, "terms"),mf)
-		y <- matrix(1,ncol=1) # y is not needed in the transition and init models
-		
-		# use ntimes 
-    #if(!is.null(ntimes)) if(nrow(x) < sum(ntimes)) x <- apply(x,2,function(y) rep(y,length=sum(ntimes)))
-		    
+		y <- matrix(1,ncol=1) # y is not needed in the transition and init models		    
 		parameters <- list()
 		if(is.null(nstates)) stop("'nstates' must be provided in call to trinModel")
 		if(family$family=="multinomial") {
@@ -224,8 +186,6 @@
 	}
 )
 
-setGeneric("setpars", function(object,values,which="pars",...) standardGeneric("setpars"))
-
 setMethod("setpars","GLMresponse",
 	function(object,values,which="pars",...) {
 		npar <- npar(object)
@@ -252,8 +212,6 @@
 	}
 )
 
-setGeneric("getpars", function(object,which="pars",...) standardGeneric("getpars"))
-
 setMethod("getpars","GLMresponse",
 	function(object,which="pars",...) {
 		switch(which,
@@ -289,8 +247,6 @@
 # use: in EM (M step)
 # returns: (fitted) response with (new) estimates of parameters
 
-setGeneric("fit",function(object,w,...) standardGeneric("fit"))
-
 setMethod("fit","GLMresponse",
 	function(object,w) {
 		pars <- object at parameters
@@ -305,10 +261,8 @@
 	function(object,w) {
 		pars <- object at parameters
 		fit <- lm.wfit(x=object at x,y=object at y,w=w)
-		#fit <- glm.fit(x=object at x,y=object at y,weights=w,family=object at family)
 		pars$coefficients <- fit$coefficients
 		pars$sd <- sqrt(sum(w*fit$residuals^2/sum(w)))
-		#pars$sd <- sqrt(sum(w*residuals(fit)^2/sum(w)))
 		object <- setpars(object,unlist(pars))
 		object
 	}
@@ -344,11 +298,6 @@
 # method 'logDens'
 # use: instead of density slot in rModel
 # returns: matrix with log(p(y|x,parameters))
-
-setGeneric("logDens",function(object,...) standardGeneric("logDens"))
-
-setGeneric("dens",function(object,...) standardGeneric("dens"))
-
 setMethod("logDens","BINOMresponse",
 	function(object) {
 		dbinom(x=object at y,size=object at n,prob=predict(object),log=TRUE)
@@ -533,10 +482,8 @@
 		}
 	}
 	variance <- function(mu) {
-		# diag(mu) - mu%*%t(mu)
 		n <- length(mu)
 		v <- diag(n)*outer(mu,1-mu) - (1-diag(n))*outer(mu,-mu)
-		#diag(v) <- diag(v) + 1e-50
 	}
 	validmu <- function(mu) {
 		all(mu > 0) && all(mu < 1)
@@ -548,7 +495,7 @@
 	structure(list(family = "multinomial", link = linktemp, linkfun = stats$linkfun,
 			linkinv = stats$linkinv, variance = variance, dev.resids = dev.resids,
 			mu.eta = stats$mu.eta, initialize = initialize, validmu = validmu, valideta = stats$valideta, base=base),
-		class = "family")
+			class = "family")
 }
 
 setMethod("fit","transInit",
@@ -556,7 +503,6 @@
 		pars <- object at parameters
 		if(missing(w)) w <- NULL
 		oldfit <- function() {
-			#fit.trMultinom(object,w,ntimes)
 			tol <- 1e-5 # TODO: check global options
 			pars <- object at parameters
 			b <- pars$coefficients
@@ -646,11 +592,11 @@
 		x <- as.matrix(x)
 		na <- unique(na)
 		if(length(na)>0) {
-  		x <- x[-na,]
-  		y <- y[-na,]
-		#y <- round(y) # delete me
-		if(!is.null(w)) w <- w[-na]
-	}
+			x <- x[-na,]
+			y <- y[-na,]
+			#y <- round(y) # delete me
+			if(!is.null(w)) w <- w[-na]
+		}
 		#mask <- matrix(1,nrow=nrow(pars$coefficients),ncol=ncol(pars$coefficients))
 		#mask[,base] <- 0
 		if(!is.null(w)) fit <- multinom(y~x-1,weights=w,trace=FALSE) else fit <- multinom(y~x-1,trace=FALSE)