[Depmix-commits] r514 - in pkg/depmixS4: . R

Thu Apr 26 21:53:50 CEST 2012

Author: maarten
Date: 2012-04-26 21:53:50 +0200 (Thu, 26 Apr 2012)
New Revision: 514

Modified:
   pkg/depmixS4/NEWS
   pkg/depmixS4/R/depmix.R
   pkg/depmixS4/R/fb.R
   pkg/depmixS4/R/lystig.R
   pkg/depmixS4/R/nobs.R
   pkg/depmixS4/R/responseGLM.R
   pkg/depmixS4/R/responseGLMMULTINOM.R
   pkg/depmixS4/R/responseMVN.R
   pkg/depmixS4/R/responseNORM.R
   pkg/depmixS4/R/transInit.R
   pkg/depmixS4/R/viterbi.R
Log:
- started adding functionality for NA's in responses (NEEDS TO BE TESTED PROPERLY!!!)


Modified: pkg/depmixS4/NEWS
===================================================================

--- pkg/depmixS4/NEWS	2012-03-30 10:44:05 UTC (rev 513)
+++ pkg/depmixS4/NEWS	2012-04-26 19:53:50 UTC (rev 514)
@@ -1,4 +1,9 @@
+Changes in depmixS4 version 1.1-2
 
+Major changes
+  
+  o Missing values for responses are now allowed.
+    
 Changes in depmixS4 version 1.1-0
   
   Major changes

Modified: pkg/depmixS4/R/depmix.R
===================================================================
--- pkg/depmixS4/R/depmix.R	2012-03-30 10:44:05 UTC (rev 513)
+++ pkg/depmixS4/R/depmix.R	2012-04-26 19:53:50 UTC (rev 514)
@@ -20,7 +20,7 @@
     data = NULL, nstates, family = gaussian(), prior = ~1, initdata = NULL, 
     respstart = NULL, instart = NULL, ...) {
     
-	if(!is.null(data) & any(is.na(data))) stop("'depmixS4' does not currently handle missing data.")
+	#if(!is.null(data) & any(is.na(data))) stop("'depmixS4' does not currently handle missing data.")
 	
     # make response models
     response <- makeResponseModels(response = response, data = data, 
@@ -61,7 +61,7 @@
 	if(is.null(data)) {
 		if(is.null(ntimes)) stop("'ntimes' must be provided if not in the data")
 	} else {
-		if(any(is.na(data))) stop("'depmixS4' does not currently handle missing data.")
+		#if(any(is.na(data))) stop("'depmixS4' does not currently handle missing data.")
 		if(is.null(attr(data, "ntimes"))) {
 			if (is.null(ntimes)) ntimes <- nrow(data)
 		} else {

Modified: pkg/depmixS4/R/fb.R
===================================================================
--- pkg/depmixS4/R/fb.R	2012-03-30 10:44:05 UTC (rev 513)
+++ pkg/depmixS4/R/fb.R	2012-04-26 19:53:50 UTC (rev 514)
@@ -4,24 +4,31 @@
 # FORWARD-BACKWARD algoritme, 23-3-2008
 # 
 
-fb <- function(init,A,B,ntimes=NULL,return.all=FALSE,stationary=TRUE,useC=TRUE) {
+fb <- function(init,A,B,ntimes=NULL,return.all=FALSE,stationary=TRUE,useC=TRUE,na.allow=TRUE) {
 
 	# Forward-Backward algorithm (used in Baum-Welch)
 	# Returns alpha, beta, and full data likelihood
 	
 	# NOTE THE CHANGE IN FROM ROW TO COLUMN SUCH THAT TRANSPOSING A IS NOT NECCESSARY ANYMORE
 	# IN COMPUTING ALPHA AND BETA BUT IS NOW NECCESSARY IN COMPUTING XI
-	# A = T*K*K matrix with transition probabilities, from row to column!!!!!!!
-	# B = T*K matrix with elements ab_{ij} = P(y_i|s_j)
-	# init = K vector with initial probabilities
+	# A = T*K*K array with transition probabilities, from row to column!!!!!!!
+	# B = T*D*K matrix with elements ab_{ij} = P(y_i|s_j)
+	# init = N*K vector with initial probabilities
 
+    # T = total number of time points
+    # K = number of states
+    # D = dimension of observations (D>1 is multivariate)
+    # N = number of participants
+     
 	# NOTE: to prevent underflow, alpha and beta are scaled, using sca
 	
 	# NOTE: xi[t,i,j] = P(S[t] = j & S[t+1] = i) !!!NOTE the order of i and j!!!
 	
-	nt <- nrow(B)	
+	#nt <- nrow(B)
+	nt <- dim(B)[1]
 	ns <- ncol(init)
 	
+	if(na.allow) B <- replace(B,is.na(B),1)
 	B <- apply(B,c(1,3),prod)
 	
 	if(is.null(ntimes)) ntimes <- nt

Modified: pkg/depmixS4/R/lystig.R
===================================================================
--- pkg/depmixS4/R/lystig.R	2012-03-30 10:44:05 UTC (rev 513)
+++ pkg/depmixS4/R/lystig.R	2012-04-26 19:53:50 UTC (rev 514)
@@ -4,7 +4,7 @@
 # LYSTIG algoritme voor de loglikelihood, 23-3-2008
 # 
 
-lystig <- function(init,A,B,ntimes=NULL,stationary=TRUE) {
+lystig <- function(init,A,B,ntimes=NULL,stationary=TRUE,na.allow=TRUE) {
 
 	# Log likelihood computation according to Lystig & Hughes (2002).  This
 	# is very similar to the Forward part of the Forward-Backward algorithm
@@ -16,7 +16,8 @@
 	# IN COMPUTING ALPHA AND BETA BUT IS NOW NECCESSARY IN COMPUTING XI
 	# A = K*K matrix with transition probabilities, from column to row !!!!!!!
 	# change to T*K*K
-		
+    
+    if(na.allow) B <- replace(B,is.na(B),1)
 	B <- apply(B,c(1,3),prod)
 
 	# B = T*K*nresp matrix with elements ab_{tij} = P(y_t_i|s_j)

Modified: pkg/depmixS4/R/nobs.R
===================================================================
--- pkg/depmixS4/R/nobs.R	2012-03-30 10:44:05 UTC (rev 513)
+++ pkg/depmixS4/R/nobs.R	2012-04-26 19:53:50 UTC (rev 514)
@@ -1,5 +1,8 @@
 setMethod("nobs", signature(object="mix"),
 	function(object, ...) {
-		sum(object at ntimes)
+		nt <- sum(object at ntimes)
+		n <- sum(!apply(object at dens,1,function(x) any(is.na(x))))
+		if(n!=nt) warning("missing values detected; nobs is number of cases without any missing values")
+		return(n)
 	}
 )

Modified: pkg/depmixS4/R/responseGLM.R
===================================================================
--- pkg/depmixS4/R/responseGLM.R	2012-03-30 10:44:05 UTC (rev 513)
+++ pkg/depmixS4/R/responseGLM.R	2012-04-26 19:53:50 UTC (rev 514)
@@ -16,15 +16,17 @@
 
 setMethod("GLMresponse",
 	signature(formula="formula"),
-	function(formula,data=NULL,family=gaussian(),pstart=NULL,fixed=NULL,prob=TRUE, ...) {
+	function(formula,data=NULL,family=gaussian(),pstart=NULL,fixed=NULL,prob=TRUE,na.action="na.pass", ...) {
 		call <- match.call()
 		mf <- match.call(expand.dots = FALSE)
 		m <- match(c("formula", "data"), names(mf), 0)
 		mf <- mf[c(1, m)]
 		mf$drop.unused.levels <- TRUE
+		mf$na.action <- na.action
 		mf[[1]] <- as.name("model.frame")
 		mf <- eval(mf, parent.frame())
 		x <- model.matrix(attr(mf, "terms"),mf)
+		if(any(is.na(x))) stop("'depmixS4' does not currently handle covariates with missing data.")
 		y <- model.response(mf)
 		if(!is.matrix(y)) y <- matrix(y,ncol=1)
 		parameters <- list()
@@ -56,10 +58,14 @@
 			y <- model.response(mf)
 			if(NCOL(y) == 1) {
 				if(is.factor(y)) {
-					y <- model.matrix(~y-1) 
+				    mf <- model.frame(~y-1,na.action=na.action)
+				    y <- model.matrix(attr(mf, "terms"),mf)
+					#y <- model.matrix(~y-1,na.action=na.action) 
 				} else {
 					if(!is.numeric(y)) stop("model response not valid for multinomial model")
-					y <- model.matrix(~factor(y)-1)
+					mf <- model.frame(~factor(y)-1,na.action=na.action)
+				    y <- model.matrix(attr(mf, "terms"),mf)
+					#y <- model.matrix(~factor(y)-1,na.action=na.action)
 				}
 			}
 			if(family$link=="mlogit") {

Modified: pkg/depmixS4/R/responseGLMMULTINOM.R
===================================================================
--- pkg/depmixS4/R/responseGLMMULTINOM.R	2012-03-30 10:44:05 UTC (rev 513)
+++ pkg/depmixS4/R/responseGLMMULTINOM.R	2012-04-26 19:53:50 UTC (rev 514)
@@ -4,11 +4,12 @@
 setMethod("fit","MULTINOMresponse",
 	function(object,w) {
 		if(missing(w)) w <- NULL
+		nas <- is.na(rowSums(object at y))
 		if(object at family$link=="mlogit") {
 			pars <- object at parameters
 			base <- object at family$base # delete me
-			y <- object at y
-			x <- object at x
+			y <- as.matrix(object at y[!nas,])
+			x <- as.matrix(object at x[!nas,])
 			#if(is.null(w)) w <- rep(1,nrow(y))
 			# mask is an nx*ny matrix (x are inputs, y are output levels)
 			mask <- matrix(1,nrow=nrow(pars$coefficients),ncol=ncol(pars$coefficients))
@@ -18,9 +19,9 @@
 			Wts[-1,] <- pars$coefficients # set starting weights
 			if(!is.null(w)) {
 				if(NCOL(y) < 3) {
-					fit <- nnet.default(x,y,weights=w,size=0,entropy=TRUE,skip=TRUE,mask=mask,Wts=Wts,trace=FALSE)
+					fit <- nnet.default(x,y,weights=w[!nas],size=0,entropy=TRUE,skip=TRUE,mask=mask,Wts=Wts,trace=FALSE)
 				} else {
-					fit <- nnet.default(x,y,weights=w,size=0,softmax=TRUE,skip=TRUE,mask=mask,Wts=Wts,trace=FALSE)
+					fit <- nnet.default(x,y,weights=w[!nas],size=0,softmax=TRUE,skip=TRUE,mask=mask,Wts=Wts,trace=FALSE)
 				}
 			} else {
 				if(NCOL(y) < 3) {
@@ -36,8 +37,8 @@
 		}
 		if(object at family$link=="identity") {
 			if(is.null(w)) w <- rep(1,nrow(object at y))
-			sw <- sum(w)
-			pars <- c(apply(object at y,2,function(x){sum(x*w)/sw}))
+			sw <- sum(w[!nas])
+			pars <- c(apply(as.matrix(object at y[!nas,]),2,function(x){sum(x*w[!nas])/sw}))
 			pars <- pars/sum(pars)
 			object <- setpars(object,pars)
 		}
@@ -47,7 +48,8 @@
 
 setMethod("logDens","MULTINOMresponse",
 	function(object) {
-		if(all(rowSums(object at y)==1)) {
+	    rsums <- rowSums(object at y)
+		if(all(rsums[!is.na(rsums)]==1)) {
 			return(log(rowSums(object at y*predict(object))))
 		} else {
 			nr <- nrow(object at y)
@@ -65,7 +67,8 @@
 
 setMethod("dens","MULTINOMresponse",
 	function(object,log=FALSE) {
-		if(all(rowSums(object at y)==1)) {
+	    rsums <- rowSums(object at y)
+		if(all(rsums[!is.na(rsums)]==1)) {
 			if(log) return(log(rowSums(object at y*predict(object))))
 			else return(rowSums(object at y*predict(object)))
 		} else {

Modified: pkg/depmixS4/R/responseMVN.R
===================================================================
--- pkg/depmixS4/R/responseMVN.R	2012-03-30 10:44:05 UTC (rev 513)
+++ pkg/depmixS4/R/responseMVN.R	2012-04-26 19:53:50 UTC (rev 514)
@@ -129,15 +129,17 @@
 
 setMethod("MVNresponse",
 	signature(formula="formula"),
-	function(formula,data,pstart=NULL,fixed=NULL,...) {
+	function(formula,data,pstart=NULL,fixed=NULL,na.action="na.pass",...) {
 		call <- match.call()
 		mf <- match.call(expand.dots = FALSE)
 		m <- match(c("formula", "data"), names(mf), 0)
 		mf <- mf[c(1, m)]
 		mf$drop.unused.levels <- TRUE
+		mf$na.action <- na.action
 		mf[[1]] <- as.name("model.frame")
 		mf <- eval(mf, parent.frame())
 		x <- model.matrix(attr(mf, "terms"),mf)
+		if(any(is.na(x))) stop("'depmixS4' does not currently handle covariates with missing data.")
 		y <- model.response(mf)
 		if(!is.matrix(y)) y <- matrix(y,ncol=1)
 		parameters <- list()

Modified: pkg/depmixS4/R/responseNORM.R
===================================================================
--- pkg/depmixS4/R/responseNORM.R	2012-03-30 10:44:05 UTC (rev 513)
+++ pkg/depmixS4/R/responseNORM.R	2012-04-26 19:53:50 UTC (rev 514)
@@ -8,15 +8,16 @@
 setMethod("fit","NORMresponse",
 	function(object,w) {
 		if(missing(w)) w <- NULL
+	    nas <- is.na(rowSums(object at y))
 		pars <- object at parameters
 		if(!is.null(w)) {
-			fit <- lm.wfit(x=object at x,y=object at y,w=w)
+			fit <- lm.wfit(x=as.matrix(object at x[!nas,]),y=as.matrix(object at y[!nas,]),w=w[!nas])
 		} else {
-			fit <- lm.fit(x=object at x,y=object at y)
+			fit <- lm.fit(x=as.matrix(object at x[!nas,]),y=as.matrix(object at y[!nas,]))
 		}
 		pars$coefficients <- fit$coefficients
 		if(!is.null(w)) {
-			pars$sd <- sqrt(sum(w*fit$residuals^2/sum(w)))
+			pars$sd <- sqrt(sum(w[!nas]*fit$residuals^2/sum(w[!nas])))
 		} else {
 			pars$sd <- sd(fit$residuals)
 		}

Modified: pkg/depmixS4/R/transInit.R
===================================================================
--- pkg/depmixS4/R/transInit.R	2012-03-30 10:44:05 UTC (rev 513)
+++ pkg/depmixS4/R/transInit.R	2012-04-26 19:53:50 UTC (rev 514)
@@ -20,6 +20,7 @@
 			mf[[1]] <- as.name("model.frame")
 			mf <- eval(mf, parent.frame())
 			x <- model.matrix(attr(mf, "terms"),mf)
+			if(any(is.na(x))) stop("'depmixS4' does not currently handle covariates with missing data.")
 		}
 		y <- matrix(1,ncol=1) # y is not needed in the transition and init models
 		parameters <- list()

Modified: pkg/depmixS4/R/viterbi.R
===================================================================
--- pkg/depmixS4/R/viterbi.R	2012-03-30 10:44:05 UTC (rev 513)
+++ pkg/depmixS4/R/viterbi.R	2012-04-26 19:53:50 UTC (rev 514)
@@ -3,7 +3,7 @@
 # 
 
 viterbi <-
-function(object) {
+function(object,na.allow=TRUE) {
 	# returns the most likely state sequence
 	nt <- sum(object at ntimes)
 	lt <- length(object at ntimes)
@@ -18,7 +18,9 @@
 	prior <- object at init
 	
 	if(max(ntimes(object)>1)) A <- object at trDens
-	B <- apply((object at dens),c(1,3),prod)
+	B <- object at dens
+	if(na.allow) B <- replace(B,is.na(B),1)
+	B <- apply(B,c(1,3),prod)
 	
 	for(case in 1:lt) {
 		# initialization