[Rsiena-commits] r137 - in pkg/RSienaTest: . R doc man

noreply at r-forge.r-project.org noreply at r-forge.r-project.org
Tue Feb 22 02:30:23 CET 2011


Author: jalospinoso
Date: 2011-02-22 02:30:23 +0100 (Tue, 22 Feb 2011)
New Revision: 137

Modified:
   pkg/RSienaTest/DESCRIPTION
   pkg/RSienaTest/R/sienaGOF.r
   pkg/RSienaTest/R/sienaTimeTest.r
   pkg/RSienaTest/changeLog
   pkg/RSienaTest/doc/s_man400.tex
   pkg/RSienaTest/man/sienaGOF.Rd
Log:
Revision 137, additions to sienaGOF and sienaTimeTest

Modified: pkg/RSienaTest/DESCRIPTION
===================================================================
--- pkg/RSienaTest/DESCRIPTION	2011-02-21 00:54:43 UTC (rev 136)
+++ pkg/RSienaTest/DESCRIPTION	2011-02-22 01:30:23 UTC (rev 137)
@@ -1,12 +1,12 @@
 Package: RSienaTest
 Type: Package
 Title: Siena - Simulation Investigation for Empirical Network Analysis
-Version: 1.0.12.136
-Date: 2011-02-21
+Version: 1.0.12.137
+Date: 2012-02-21
 Author: Various
 Depends: R (>= 2.9.0), xtable
 Imports: Matrix
-Suggests: tcltk, snow, rlecuyer, network, codetools, lattice, snopgof
+Suggests: tcltk, snow, rlecuyer, network, codetools, lattice, MASS, sna, vioplot
 SystemRequirements: GNU make, tcl/tk 8.5, Tktable
 Maintainer: Ruth Ripley <ruth at stats.ox.ac.uk>
 Description: Fits models to longitudinal networks

Modified: pkg/RSienaTest/R/sienaGOF.r
===================================================================
--- pkg/RSienaTest/R/sienaGOF.r	2011-02-21 00:54:43 UTC (rev 136)
+++ pkg/RSienaTest/R/sienaGOF.r	2011-02-22 01:30:23 UTC (rev 137)
@@ -18,92 +18,343 @@
 	}
 }
 
-## Sienafit object, returns a list of auxiliary statistics from the given function
+## Sienafit object, returns a list of auxiliary 
+## statistics from the given function
 evaluateAuxiliaryStatistics.simulated <- function(sienaFitObject, 
-		groupName, varName, wave, auxiliaryFunction, verbose=FALSE) {
+		groupName, varName, auxiliaryFunction, wave=NULL,
+		verbose=FALSE, join=TRUE) {
 	if (! sienaFitObject$returnDeps) {
-		stop("You must instruct siena07 to return the simulated networks, i.e. returnDeps=TRUE")
+		stop("You must instruct siena07 to return the simulated networks")
 	}
 	iterations = length(sienaFitObject$sims)
 	if (iterations < 1) {
 		stop("You need at least one iteration.")
 	}
 	groups = length(sienaFitObject$sims[[1]])
-	if (verbose) cat("Detected", iterations, "iterations and", groups, "groups.\n")
+	if (verbose) cat("Detected", iterations, "iterations and", groups,
+				"groups.\n")
 	groupNumber = which(names(sienaFitObject$sims[[1]]) == groupName)
 	if (is.na(groupNumber)) {
 		stop("Invalid group name.")
 	}
-	if (verbose) cat("Group", groupName, "corresponds to index",groupNumber,".\n")
-	varNumber = which(names(sienaFitObject$sims[[1]][[groupNumber]]) == varName)
-	if (varNumber < 1 | varNumber > length(sienaFitObject$sims[[1]][[groupNumber]])) {
+	if (verbose) cat("Group", groupName, "corresponds to index",
+				groupNumber,".\n")
+	varNumber = which(names(sienaFitObject$sims[[1]][[groupNumber]]) == 
+					varName)
+	if (varNumber < 1 | varNumber > 
+			length(sienaFitObject$sims[[1]][[groupNumber]])) {
 		stop("Invalid variable number -- out of bounds.")
 	}
-	if (verbose) cat("Variable", varName, "corresponds to index",varNumber,".\n")
-	if (min(wave) < 1 | max(wave) > length(sienaFitObject$sims[[1]][[groupNumber]][[varNumber]])){
+	if (verbose) cat("Variable", varName, "corresponds to index",
+				varNumber,".\n")
+	if (is.null(wave)) {
+		wave = 1:length(sienaFitObject$sims[[1]][[groupNumber]][[varNumber]])
+	}
+	if (min(wave) < 1 | max(wave) > 
+			length(sienaFitObject$sims[[1]][[groupNumber]][[varNumber]])){
 		stop("Invalid wave index -- out of bounds")
 	}
 	if (verbose) cat("Calculating auxiliary statistics for waves",wave,".\n")
-	ret <- lapply(wave, function (j) {
-			tmp <- sapply(1:iterations, function (i) { auxiliaryFunction(sienaFitObject$sims[[i]][[groupNumber]][[varNumber]][[j]])})
-			dimnames(tmp)[[2]] <-  1:iterations
-			t(tmp)
-		})
-	names(ret) <- paste("Wave",wave)
+	if (join) {
+		ttc <- system.time(tmp <- lapply(wave, function (j) {
+					tmp <- sapply(1:iterations, function (i) 
+					{ auxiliaryFunction(sienaFitObject$
+					sims[[i]][[groupNumber]][[varNumber]][[j]])})
+					dimnames(tmp)[[2]] <-  1:iterations
+					t(tmp)
+				}))
+		ret <- tmp[[1]]
+		if (length(tmp)>1) {
+			for (i in 2:length(tmp)) {
+				ret = ret + tmp[[i]]
+			}
+		}
+		ret <- list(Joint=ret)
+	} else {
+		ttc <- system.time(ret <- lapply(wave, function (j) {
+					tmp <- sapply(1:iterations, function (i) {
+					auxiliaryFunction(sienaFitObject$
+					sims[[i]][[groupNumber]][[varNumber]][[j]])})
+					dimnames(tmp)[[2]] <-  1:iterations
+					t(tmp)
+				}))
+		names(ret) <- paste("Wave",wave)
+	}
 	class(ret) <- "simulatedAuxiliaryStatistics"
 	attr(ret,"auxiliaryStatisticName") <- deparse(substitute(auxiliaryFunction))
+	attr(ret,"joint") = join
+	attr(ret,"time") = ttc
 	ret
 }
 
 ## Sienafit object, returns a list of auxiliary statistics from the given function
 evaluateAuxiliaryStatistics.observed <- function(sienaDataObject, 
-		varName, wave, auxiliaryFunction, verbose=FALSE) {
+		varName, auxiliaryFunction, wave=NULL, verbose=FALSE, join=TRUE) {
 	varNumber = which(names(sienaDataObject$depvars) == varName)
 	if (varNumber < 1 | varNumber > length(sienaDataObject$depvars)) {
 		stop("Invalid variable number -- out of bounds.")
 	}
-	if (verbose) cat("Variable", varName, "corresponds to index",varNumber,".\n")
-	if (min(wave) < 1 | max(wave) > dim(sienaDataObject$depvars[[varNumber]])[3] - 1){
+	if (verbose) cat("Variable", varName, 
+				"corresponds to index",varNumber,".\n")
+	if (is.null(wave) ) {
+		wave = 1:(dim(sienaDataObject$depvars[[varNumber]])[3] - 1)
+	}
+	if (min(wave) < 1 | max(wave) > 
+			dim(sienaDataObject$depvars[[varNumber]])[3] - 1){
 		stop("Invalid wave index -- out of bounds")
 	}
 	if (verbose) cat("Calculating auxiliary statistics for waves",wave,".\n")
-	ret <- lapply(wave, function (j) {
-				auxiliaryFunction(sienaDataObject$depvars[[varNumber]][,,j+1])
-			})
-	names(ret) <- paste("Wave",wave)
+	if (join) {
+		tmp <- lapply(wave, function (j) {
+					auxiliaryFunction(sienaDataObject$
+									depvars[[varNumber]][,,j+1])
+				})
+		ret <- tmp[[1]]
+		if (length(tmp)>1) {
+			for (i in 2:length(tmp)) {
+				ret = ret + tmp[[i]]
+			}
+		}
+		ret <- list(Joint=ret)
+	} else {
+		ret <- lapply(wave, function (j) {
+					auxiliaryFunction(sienaDataObject$
+									depvars[[varNumber]][,,j+1])
+				})
+		names(ret) <- paste("Wave",wave)
+	}
 	class(ret) <- "observedAuxiliaryStatistics"
-	attr(ret,"auxiliaryStatisticName") <- deparse(substitute(auxiliaryFunction))
+	attr(ret,"auxiliaryStatisticName") <-
+			deparse(substitute(auxiliaryFunction))
+	attr(ret,"joint") = join
 	ret
 }
 
+
+sienaGOFPreprocess <- function(simulated, expectationFunction=mean) {
+	require(MASS)
+	if (class(simulated) != "matrix") {
+		stop("Invalid input.")
+	}
+	a <- cov(simulated)
+	 # How do we make this error get thrown
+	 #up the call stack? It will look much
+	 # nicer if we can get that to work
+	 # tryCatch(
+	 # ainv <- solve(a)	
+	 #, error = 
+	 # function(e)simpleError(paste("Your testing functions are producing",
+     # "collinear statistics which cannot be handled by the",
+ 	 # "Mahalanobis distance. Try reducing the number of statistics",
+ 	 # "returned by your testing function (e.g. Geodesic distances 1:10",
+	 # "to 1:5 or so).")))
+	# Using generalized inverse instead of regular inverse so
+	# we don't have to worry
+	# about collinearity
+	ainv <- ginv(a)
+	simulations=nrow(simulated)
+	variates=ncol(simulated)
+	expectation = apply(simulated, 2, expectationFunction);
+	centeredSimulations <- t(sapply(1:simulations, 
+			function(i) simulated[i,] - expectation))
+	if (variates==1) {
+		centeredSimulations <- t(centeredSimulations)
+	}
+	ttc <- system.time(mahalanobisDistances <- 
+					sapply(1:simulations, function(i) 
+					  centeredSimulations[i,] %*% 
+					  ainv %*% centeredSimulations[i,]))
+	ret <- list(
+			Iterations=dim(simulated)[1], 
+			Variates=dim(simulated)[2], 
+			Original=simulated, 
+			Covariance=a, InverseCovariance=ainv,
+			Expectation=expectation,
+			CenteredSimulations=centeredSimulations, 
+			MHDistances = sort(mahalanobisDistances),
+			ExpectationFunction=expectationFunction, 
+			ComputeTime=ttc)
+	class(ret) <- "sienaGOFPreprocess"
+	ret
+}
+
+sienaGOFProcess <- function(observed, simulated, twoTailed=FALSE) {
+	if (class(observed) == "numeric") {
+		observed <- matrix(observed,nrow=1)
+	}
+	if (class(observed) != "matrix" |
+			class(simulated) != "sienaGOFPreprocess") {
+		stop("simulated must be a sienaGOFPreprocess object.")
+	}
+	if (ncol(observed) != simulated$Variates) {
+		stop("Dimensionality of function parameters do not match.")
+	}
+	observations = nrow(observed)
+	mhd <- sapply(1:observations, function(i) 
+			  t(observed[i,] - simulated$Expectation) %*%
+			  simulated$InverseCovariance %*% 
+			  (observed[i,] - simulated$Expectation) )
+	if (twoTailed) {
+		p.mhd <- sapply(1:observations, function (i) 
+			1 - abs(1 - 2 * sum(mhd[i] <= 
+			simulated$MHDistances)/simulated$Iterations) )
+	} else {
+		p.mhd <- sapply(1:observations, function (i) 
+			sum(mhd[i] <=  simulated$MHDistances)
+			/simulated$Iterations)
+	}
+	return = list(
+			Observation=observed, 
+			Preprocess=simulated, 
+			MHDistance=mhd, 
+			p.MHD = p.mhd,
+			TwoTailed = twoTailed)
+	class(return) <- "sienaGOFProcess"
+	return
+}
+
 sienaGOF <- function(obsList, simList) {
-	require(snopgof)
 	if ((class(obsList) != "observedAuxiliaryStatistics") |
 			(class(simList) != "simulatedAuxiliaryStatistics") |
 			(length(obsList) != length(simList))
-			| (attr(obsList,"auxiliaryStatisticName") != attr(simList,"auxiliaryStatisticName"))) {
+			| (attr(obsList,"auxiliaryStatisticName")
+				!= attr(simList,"auxiliaryStatisticName"))) {
 		stop("Arguments invalid")
 	}
-	pre <- lapply(1:length(simList), function (i) gof.preprocess(simList[[i]]))
-	# We could try inverse covariance for weighting or something here. Just identity weighting for now.
-	#weight <- lapply(1:length(simList), function (i) solve(cov(simList[[i]])))
-	#res <- lapply(1:length(simList), function (i) gof(obsList[[i]], pre[[i]], weight[[i]]))
-	res <- lapply(1:length(simList), function (i) gof(obsList[[i]], pre[[i]]) )
-	ret <- list(ObservedStatistics=obsList, SimulatedStatistics=simList, Results=res)
+
+	pre <- lapply(1:length(simList), function (i)
+				sienaGOFPreprocess(simList[[i]]))
+	res <- lapply(1:length(simList), function (i) 
+				sienaGOFProcess(obsList[[i]], pre[[i]]) )
+	
+	ret <- list(ObservedStatistics=obsList, SimulatedStatistics=simList, 
+			Results=res)
 	class(ret) <- "sienaGOF"
-	attr(ret,"auxiliaryStatisticName") <- attr(obsList,"auxiliaryStatisticName")
+	attr(ret,"auxiliaryStatisticName") <-
+			attr(obsList,"auxiliaryStatisticName")
 	ret
 }
 
 print.sienaGOF <- function (x, ...) {
-	cat("Siena Goodness of Fit (", attr(x,"auxiliaryStatisticName") ,")\n > P-values by wave:\n")
-	pVals = sapply(1:length(x$Results), function(i) x$Results[[i]]$p)
-	for (i in 1:length(pVals)) {
-		cat(" > Wave ", i, ": ", pVals[i], "\n")
+	pVals = sapply(1:length(x$Results), function(i) 
+				x$Results[[i]]$p.MHD)
+	ctimes = sapply(1:length(x$Results), function(i)
+				as.numeric(x$Results[[i]]$Preprocess$ComputeTime["elapsed"]))
+	cat("Siena Goodness of Fit (", 
+			attr(x,"auxiliaryStatisticName") ,")\n=====\n")
+	if (! attr(x$SimulatedStatistics,"joint")) {
+		cat(" > Monte Carlo Mahalanobis distance test P-values:\n")
+		for (i in 1:length(pVals)) {
+			cat(" > Wave ", i, ": ", pVals[i], "\n")
+		}
+	} else {
+		cat("Monte Carlo Mahalanobis distance test P-value: ",pVals[1], "\n")
 	}
+	if (x$Results[[1]]$TwoTailed) {
+		cat("-----\nTwo tailed test used.")	
+	} else {
+		cat("-----\nOne tailed test used ",
+		"(i.e. area under curve for greater distance than observation).")	
+	}
+	cat("\nBased on ", x$Results[[1]]$Preprocess$Iterations,
+			" simulated draws from the null distribution.")
+	cat("\nComputation time for auxiliary statistic calculations: ",
+			attr(x$SimulatedStatistics,"time")["elapsed"] , "seconds.")
+	cat("\nComputation time for distance/test statistic calculations: ",
+			sum(ctimes), "seconds.\n")
 }
 
-plot.sienaGOF <- function (x, wave=1, ...) {
-	plot(x$Results[[wave]], main=paste("Goodness of Fit of ", attr(x, "auxiliaryStatisticName")),
-			xlab="Variate Index", ylab="Variate Value", ...)
+plot.sienaGOF <- function (x, standardize=3, violin=TRUE, 
+			ylim=NULL, xlim=NULL, 
+			xlab=NULL, ylab=NULL, key=NULL, perc=.05, wave=1, main=NULL, ...) {
+		if (is.null(main)) {
+			main=paste("Goodness of Fit of ", 
+					attr(x$Observed,"auxiliaryStatisticName"))	
+		}
+		if (!attr(x$Observed,"joint")){
+			main = paste(main, "Wave", wave)
+		}
+		x <- x$Results[[wave]]
+		itns <- x$Preprocess$Iterations
+		vars <- x$Preprocess$Variates
+		sims <- x$Preprocess$Original
+		obs <- x$Observation
+		n.obs <- nrow(obs)
+		
+		if (standardize==3) {
+			sims.median <- apply(sims, 2, median)
+			sims.min <- apply(sims, 2, min)
+			sims.max <- apply(sims, 2, max)
+			sims <- sapply(1:ncol(sims), function(i)
+						(sims[,i] - sims.median[i])/(sims.max[i] - 
+									sims.min[i] ) )
+			obs <- matrix(sapply(1:ncol(sims), function(i) 
+						(obs[,i] - sims.median[i])/(sims.max[i] -
+									sims.min[i] ) ), nrow=n.obs )
+		} else if (standardize==2) {
+			sims.min <- apply(sims, 2, min)
+			sims.max <- apply(sims, 2, max)
+			sims <- sapply(1:ncol(sims), function(i) (sims[,i] -
+						sims.min[i])/(sims.max[i] - sims.min[i] ) )
+			obs <- matrix(sapply(1:ncol(sims), function(i) (obs[,i] -
+						sims.min[i])/(sims.max[i] - sims.min[i] )
+						), nrow=n.obs )
+		} else if (standardize==1) {
+			sims.mean <- apply(sims, 2, mean)
+			sims <- sapply(1:ncol(sims), function(i)
+						(sims[,i] - sims.mean[i]) )
+			obs <- matrix(sapply(1:ncol(sims), function(i)
+						(obs[,i] - sims.mean[i]) ), nrow=n.obs )
+		}
+		
+		if (is.null(ylim)) {
+			ylim = c(min(obs, sims), max(obs, sims))
+		}
+		if (is.null(xlim)) {
+			xlim = c(0, ncol(obs)+1)
+		}
+		if (is.null(xlab)) {
+			xlab= paste( paste("p:", round(x$p, 3), 
+						collapse = " "), collapse = "\n")
+		}
+		if (is.null(ylab)) {
+			ylab = "Statistic Values"
+		}
+		xAxis <- (1:vars)
+		
+		plot(obs[1,]~xAxis, col="white", type="p",
+				ylim=ylim, xlim=xlim, main=main,
+				xlab=xlab, ylab=ylab, axes=FALSE, ...)
+		if (!is.null(key)) {
+			if (length(key) != ncol(obs)) {
+				stop("Key length does not match the number of variates.")
+			}
+			axis(1, at=xAxis, lab=key)
+		} else {
+			axis(1, at=xAxis, lab=paste("v", xAxis, sep=""))
+		}
+		
+		ind.lower = round(itns * perc/2)
+		ind.upper = round(itns * (1-perc/2))
+		yperc.lower = sapply(1:ncol(sims), function(i)
+					sort(sims[,i])[ind.lower]  )
+		yperc.upper = sapply(1:ncol(sims), function(i)
+					sort(sims[,i])[ind.upper]  )
+		lines(yperc.lower~xAxis, lty=3, col = "gray", lwd=3)
+		lines(yperc.upper~xAxis, lty=3, col = "gray", lwd=3)
+		
+		if (violin) {
+			require(vioplot)
+			for (i in 1:ncol(sims)) {
+				vioplot(sims[,i], at=xAxis[i],
+						add=TRUE, col="gray", wex=.75, ...)
+			}
+		} else {
+			boxplot(as.numeric(sim)~rep(1:vars, each=itns), add=TRUE, ...)
+		}
+		for(i in 1:nrow(obs)) {
+			lines(obs[i,]~xAxis, col="red", type="l", lwd=1, ...)
+			lines(obs[i,]~xAxis, col="red", type="p", lwd=3, pch=19, ...)
+			text(xAxis, obs[i,], labels=round(x$Observation[i,],3), pos=4)
+		}
 }
\ No newline at end of file

Modified: pkg/RSienaTest/R/sienaTimeTest.r
===================================================================
--- pkg/RSienaTest/R/sienaTimeTest.r	2011-02-21 00:54:43 UTC (rev 136)
+++ pkg/RSienaTest/R/sienaTimeTest.r	2011-02-22 01:30:23 UTC (rev 137)
@@ -294,7 +294,8 @@
                       sqrt(diag(sienaFit$covtheta))[estimatedInFit],
 					  ToTest=toTest,
 					  ScreenedEffects=which(!use),
-                      WaveNumbers=waveNumbers
+                      WaveNumbers=waveNumbers,
+					  IndividualTestsOrthogonalized=condition
 					  )
 	class(returnObj) <- "sienaTimeTest"
 	returnObj
@@ -323,11 +324,14 @@
 	cat("\nParameter-wise joint significance tests (i.e. each
 		parameter across all dummies):\n")
 	print(x$EffectTest)
-	if (x$Waves <=2)
+	if (x$Waves <=2 && ! x$IndividualTestsOrthogonalized)
 	{
 		cat("\n\nNote that these parameter-wise tests have a different
 			form than the individual tests, thus testing with 3 observations
 			may yield different individual and parameter-wise values.\n\n")
+	} else if (x$IndividualTestsOrthogonalized) {
+		cat("\nNote that the individual test statistics were orthogonalized",
+				" with respect to each other (condition=TRUE).")
 	}
 	tmp <- paste(" (", 1:length(x$BaseRowInD), ") ",
 				 rownames(x$IndividualTest)[x$BaseRowInD], "\n", sep="")
@@ -499,7 +503,8 @@
                          tmprows$type %in% effects$type[i], ]
         tmprows$fix <- fix
         tmprows$include <- include
-        tmprows$effectNumber <- max(newEffects$effectNumber) + (1:nrow(tmprows))
+        tmprows$effectNumber <- max(newEffects$effectNumber) + 
+				(1:nrow(tmprows))
         tmprows$timeDummy <- timeDummy
         rownames(tmprows) <- paste(newname, effects$type[i], sep=".")
         newEffects <- rbind(newEffects, tmprows)
@@ -541,7 +546,7 @@
         effects$timeDummy <- ","
     }
 
- # Josh tested these covariate effects, they work as-is for sienaTimeFix.
+ # JAL: tested these covariate effects, they work as-is for sienaTimeFix.
  #   covar <- effects$interaction1 != ""
  #   if (any(effects$timeDummy[covar] != ","))
  #   {
@@ -555,17 +560,21 @@
 #                " for network effects of type eval or for RateX.")
 #        effects$timeDummy[!implemented] <- ","
 #	}
-    structuralRate <- effects$type == "rate" & effects$rateType %in% "structural"
+    structuralRate <- effects$type == "rate" & effects$rateType %in% 
+			"structural"
     if (any(effects$timeDummy[structuralRate] != ","))
     {
 		warning("Time dummy effects are not implemented",
                 " for structural rate effects.")
         effects$timeDummy[structuralRate] <- ","
     }
-    behaviorNonRateX <- effects$netType =="behavior" & effects$type != "rate"
+ # JAL: Implementing these 20-FEB-2011 in RSeinaTest
+ # TODO: Behavioral interactions need to be implemented for these to work.
+ # Once they are, we can comment lines 575-577 and 717-720 out.
+ 	behaviorNonRateX <- effects$netType =="behavior" & effects$type != "rate"
     if (any(effects$timeDummy[behaviorNonRateX] != ","))
     {
-		warning("Time dummy effects are not implemented",
+ 		warning("Time dummy effects are not implemented",
                 " for behavior effects of type eval or endow.")
         effects$timeDummy[behaviorNonRateX] <- ","
     }
@@ -602,7 +611,11 @@
 
         timesd <- lapply(timesd, function(x)as.numeric(x[x %in% periodNos]))
         dummiedEffects <- sapply(timesd, function(x)length(x) > 0)
-
+		
+		baseType <- list(number=which(dummiedEffects), 
+				type=effects$netType[dummiedEffects],
+				name=effects$name[dummiedEffects])
+		
         rateXDummies <- effects$shortName == "RateX" & dummiedEffects
 
         newEffects <- effects
@@ -742,29 +755,62 @@
                         }
                     }
                 }
-
-                ##find the density rows for this depvar
-                i <- which(effects$name == depvar &
-                           effects$shortName=="density" &
-                           effects$type %in% types)
-
-                if (length(i) == 0)
-                {
-                    stop("Cannot find density effect(s) for ", depvar,
-                         " ", types)
-                }
-
-                ## establish whether we want the egoXs included, fixed or not
-                fix <- sapply(timesd[i], function(x)!p %in% x)
-                typesNames <- paste(depvar, effects$type[i], sep=".")
-                includeTypes <- !sapply(timesTypelist[typesNames], is.null)
-
-                ## add one or more effects (depending on types requested)
-                newEffects <- addEffect(newEffects, i, dname,
-                                     "covarNonSymmetricObjective", "egoX",
-                                     paste('isDummy', p,
-                                           effects$effectNumber[i], sep=','),
-                                     fix=fix, include=includeTypes)
+				# This is where the behaviors are treated differently 
+				if (baseType$type[baseType$name==depvar][1] == "behavior") {
+					##find the linear effect rows for this depvar
+					i <- which(effects$name == depvar &
+									effects$shortName=="linear" &
+									effects$type %in% types)
+					
+					if (length(i) == 0)
+					{
+						stop("Cannot find 'effect from' effect(s) for ", depvar,
+								" ", types)
+					}
+					
+					## establish whether we want the effFroms
+					## included, fixed or not
+					fix <- sapply(timesd[i], function(x)!p %in% x)
+					typesNames <- paste(depvar, effects$type[i], sep=".")
+					includeTypes <- !sapply(timesTypelist[typesNames], is.null)
+					
+					## add one or more effects (depending on types requested)
+					newEffects <- addEffect(newEffects, i, dname,
+							"covarBehaviorObjective", "effFrom",
+							paste('isDummy', p,
+									effects$effectNumber[i], sep=','),
+							fix=fix, include=includeTypes)
+					newEffects[nrow(newEffects),"effectName"] <-
+							gsub("yyyyyy", dname,
+									newEffects[nrow(newEffects),"effectName"])
+					newEffects[nrow(newEffects),"functionName"] <-
+							gsub("yyyyyy", dname,
+									newEffects[nrow(newEffects),"functionName"])
+					newEffects[nrow(newEffects),"interaction1"] <- dname
+				} else {
+					##find the density rows for this depvar
+					i <- which(effects$name == depvar &
+									effects$shortName=="density" &
+									effects$type %in% types)
+					
+					if (length(i) == 0)
+					{
+						stop("Cannot find density effect(s) for ", depvar,
+								" ", types)
+					}
+					
+					## establish whether we want the egoXs included, fixed or not
+					fix <- sapply(timesd[i], function(x)!p %in% x)
+					typesNames <- paste(depvar, effects$type[i], sep=".")
+					includeTypes <- !sapply(timesTypelist[typesNames], is.null)
+					
+					## add one or more effects (depending on types requested)
+					newEffects <- addEffect(newEffects, i, dname,
+							"covarNonSymmetricObjective", "egoX",
+							paste('isDummy', p,
+									effects$effectNumber[i], sep=','),
+							fix=fix, include=includeTypes)
+				}
             }
 
             ## now the interactions for any dummied effects for this
@@ -778,15 +824,27 @@
                 {
                     dname <- paste("Dummy", p, ":", depvar, sep="")
                     effect <- effects[j, ]
-                    newEffects <-
-                        includeInteraction(newEffects,
-                                           effect$shortName, "egoX",
-                                           character=TRUE,
-                                           type=effect$type,
-                                           interaction1= c(effect$interaction1,
-                                           dname),
-                                           interaction2=effect$interaction2,
-                                           name=depvar, verbose=FALSE)
+					if (baseType$type[baseType$name==depvar][1] == "behavior") {
+						newEffects <-
+								includeInteraction(newEffects,
+										effect$shortName, "effFrom",
+										character=TRUE,
+										type=effect$type,
+										interaction1= c(effect$interaction1,
+												dname),
+										interaction2=effect$interaction2,
+										name=depvar, verbose=FALSE)
+					} else {
+	                    newEffects <-
+	                        includeInteraction(newEffects,
+	                                        effect$shortName, "egoX",
+	                                        character=TRUE,
+	                                        type=effect$type,
+	                                        interaction1= c(effect$interaction1,
+	                                        dname),
+	                                        interaction2=effect$interaction2,
+	                                        name=depvar, verbose=FALSE)
+					}
                     ## find the row altered
                     newrow <- newEffects$effect1 == j &
                     newEffects$effect2 ==

Modified: pkg/RSienaTest/changeLog
===================================================================
--- pkg/RSienaTest/changeLog	2011-02-21 00:54:43 UTC (rev 136)
+++ pkg/RSienaTest/changeLog	2011-02-22 01:30:23 UTC (rev 137)
@@ -1,3 +1,20 @@
+2011-02-22 R-forge revision 137
+
+	* Changes to RSienaTest Only
+	* DESCRIPTION: removed dependency on snopgof (for now!) and add
+	  -ed dependency for MASS, sna, vioplot for sienaGOF.
+	* R/sienaTimeTest.r: added functionality for behavioral dummies
+	  but they will not work until behavioral interaction effects
+	  are supported.
+	* R/sienaGOF.r: removed dependency on snopgof (runs standalone)
+	  and a number of smaller features like being able to run a
+	  joint test across all waves or break them out, having control
+	  over the centrality measure (mean, median, etc.), and not
+	  requiring that the auxiliary statistics are non-collinear
+	  (this is where MASS's ginv comes in). I also moved the
+	  relevant functionality from snopgof directly into the code.
+	* man/sienaGOF.Rd: Changes to reflect the above
+
 2011-02-21 R-forge revision 136
 
 	* R/initializeFRAN.r: fix bug in bipartite networks: diagonals

Modified: pkg/RSienaTest/doc/s_man400.tex
===================================================================
--- pkg/RSienaTest/doc/s_man400.tex	2011-02-21 00:54:43 UTC (rev 136)
+++ pkg/RSienaTest/doc/s_man400.tex	2011-02-22 01:30:23 UTC (rev 137)
@@ -8285,6 +8285,8 @@
 (Programmers should consult the changeLog file on CRAN or in the R-forge
 repository.)
 \begin{itemize}
+\item 2011-02-22 R-forge revision 137: (RSienaTest only)
+Additional work on the goodness of fit functionality (see ?sienaGOF)
 \item 2011-02-21 R-forge revision 136:
 \begin{itemize}
 \item Fixed bug in bipartite network processing. Diagonal (up to number of

Modified: pkg/RSienaTest/man/sienaGOF.Rd
===================================================================
--- pkg/RSienaTest/man/sienaGOF.Rd	2011-02-21 00:54:43 UTC (rev 136)
+++ pkg/RSienaTest/man/sienaGOF.Rd	2011-02-22 01:30:23 UTC (rev 137)
@@ -5,20 +5,25 @@
 \alias{evaluateAuxiliaryStatistics.observed}
 \alias{print.sienaGOF}
 \alias{plot.sienaGOF}
+\alias{sienaGOFPreprocess}
+\alias{sienaGOFProcess}
+
 \title{Functions to assess goodness of fit for SAOMs}
 \description{
  A series of functions which start with a \code{sienaFit} object, a
  \code{siena} data object, and a function for calculating auxiliary
- statistics on graphs. 
- This function uses the \code{snopgof} package to evaluate goodness
- of fit from the resulting vectors of statistics.
+ statistics on graphs. A Monte Carlo test based on the Mahalanobis
+ distance based test is used to calculate frequentist p-values and
+ plotting functions can be used to diagnose bad fit.
  }
 
 \usage{
 sienaGOF(obsList, simList)
-evaluateAuxiliaryStatistics.observed(sienaDataObject, varName, wave, auxiliaryFunction, verbose=FALSE)
-evaluateAuxiliaryStatistics.simulated(sienaFitObject, groupName, varName, wave, auxiliaryFunction, verbose=FALSE)
-# Alias for the above two functions
+evaluateAuxiliaryStatistics.observed(sienaDataObject, varName, auxiliaryFunction, wave=NULL, 
+verbose=FALSE, join=TRUE)
+evaluateAuxiliaryStatistics.simulated(sienaFitObject, groupName, varName, 
+auxiliaryFunction, wave=NULL, verbose=FALSE, join=TRUE)
+# Alias for the above two functions, for user convenience
 evaluateAuxiliaryStatistics(sienaObject, ...)
 }
 \arguments{
@@ -30,58 +35,83 @@
   \item{sienaFitObject}{Results from a call to \code{siena07}.}
   \item{groupName}{The name of the sienaGroup to be used.}
   \item{varName}{The value of the variable to be used, whether a network, behavior, etc.}
-  \item{wave}{Wave(s) to be used for calculating the statistics.}
+  \item{join}{Boolean: should sienaGOF do tests on all of the waves individually, or sum across waves?}
+  \item{wave}{Wave(s) to be used for calculating the statistics. Ignored if join=TRUE}
   \item{auxiliaryFunction}{Function to be used to calculate statistics, e.g. from the \code{sna} package.}
   \item{verbose}{Whether to print intermediate results. Calculations can take some time, and user feedback may be useful.}
   \item{...}{Other arguments to be used by the appropriate subfunction which will be called by the alias.}
 }
 \details{
- This function is currently under construction. Contact the author for working papers on how it works.
- In general, it tests goodness of fit of statistics that should not be explicitly fit. If the p value is
- below the desired false positive rate, there is sufficient evidence to reject the goodness of fit of the model
- with respect to the implied statistic.
+ This function is used to assess the goodness of fit of a stochastic actor oriented model
+ for an arbitrarily defined auxiliary statistic. These statistics should be chosen to
+ represent features of the network that are not explicitly fit but can be considered
+ important properties that the model at hand should represent well. Some examples are:
+ 
+ -Geodesic distances
+ 
+ -Triad census
+ 
+ -Outdegree distribution
+ 
+ -Indegree distribution
+ 
+ -Behavioral distribution
+ 
+ -Edgewise homophily counts
+ 
+ -Edgewise shared partners
+
+The function is written so that the user can easily define a function to capture some
+other relevant aspects of the network, behaviors, etc.
+
+We recommend the following heuristic approach to fitting your model:
+
+1. Check convergence of your estimation.
+
+2. Assess time heterogeneity and either modify the base effects or include time dummy terms.
+
+3. Assess goodness of fit (i.e. using join=TRUE) on auxiliary statistics, and refine the model.
 }
 \value{
   \code{sienaGOF} Returns a list:
   \item{ObservedStatistics }{ The values of the calculated statistics for the observations. }
-  \item{SimulatedStatistics }{ A preprocess object (see \code{snopgof} package) calculated on the statistics for the simulations. }
-  \item{Results }{ The p values of the \code{snopgof} test. }
+  \item{SimulatedStatistics }{ A preprocess object calculated on the statistics for the simulations. }
+  \item{Results }{ A list including, among other things, p values of the Monte Carlo Mahalanobis distance test. }
 }
 
 \references{See \url{http://www.stats.ox.ac.uk/~snijders/siena/}
-  for general information on RSiena.
+  for general information on RSiena and \url{http://www.stats.ox.ac.uk/~lospinos/}
+ for information on the following presentation and working paper:
+ 
+Lospinoso, J.A. and Snijders, T.A.B, "Goodness of fit for
+Stochastic Actor Oriented Models." Presentation given at Sunbelt XXXI, St. Pete's Beach, Fl. 2011.
 
-Lospinoso, J.A. and Snijders, T.A.B, "A Non-parametric Test for
-Goodness of Fit." Working Paper
+Lospinoso, J.A. and Snijders, T.A.B, "Goodness of fit for
+Stochastic Actor Oriented Models." Working Paper.
 }
 \author{Josh Lospinoso}
-\seealso{\code{\link{siena07}}, \code{\link{sienaTimeTest}},
-  \code{snopgof}}
+\seealso{\code{\link{siena07}}, \code{\link{sienaTimeTest}} }
 \examples{
-\dontrun{
 # Fifty iterations being used, but we recommend using many more (perhaps 1000 or 1500)
 mymodel <- sienaModelCreate(fn=simstats0c, nsub=4, n3=50)
 mynet1 <- sienaNet(array(c(s501, s502, s503), dim=c(50, 50, 3)))
 mydata <- sienaDataCreate(mynet1)
 myeff <- getEffects(mydata)
 myeff <- includeEffects(myeff, transTrip, balance)
-ans <- siena07(mymodel, data=mydata, effects=myeff, returnDeps=T, batch=TRUE)
+ans <- siena07(mymodel, data=mydata, effects=myeff, returnDeps=TRUE, batch=TRUE)
 
-library(sna)
-# install.packages("sna")
-library(network)
-# install.packages("network")
+require(sna)
+require(network)
 triadCensus <- function (x) {
 	triad.census(network(x))[-1]
 }
 
-sim <- evaluateAuxiliaryStatistics(ans, "Data1", "mynet1", 1:2, triadCensus, verbose=T)
-obs <- evaluateAuxiliaryStatistics(mydata, "mynet1", 1:2, triadCensus, verbose=T)
+sim <- evaluateAuxiliaryStatistics(ans, "Data1", "mynet1", triadCensus)
+obs <- evaluateAuxiliaryStatistics(mydata, "mynet1", triadCensus)
 (res <- sienaGOF(obs, sim))
 
 # And plots if desired
-plot(res, wave=1)
-plot(res, wave=2)
+triadCensus.names <- c("012","102","021D","021U","021C","111D","111U","030T","030C","201","120D","120U","120C","210","300")
+plot(res, key=triadCensus.names)
 }
-}
 \keyword{models}



More information about the Rsiena-commits mailing list