[Rsiena-commits] r149 - in pkg: RSiena RSiena/man RSienaTest RSienaTest/R RSienaTest/doc RSienaTest/man

noreply at r-forge.r-project.org noreply at r-forge.r-project.org
Thu May 26 18:14:47 CEST 2011


Author: jalospinoso
Date: 2011-05-26 18:14:47 +0200 (Thu, 26 May 2011)
New Revision: 149

Modified:
   pkg/RSiena/changeLog
   pkg/RSiena/man/RSiena-package.Rd
   pkg/RSienaTest/NAMESPACE
   pkg/RSienaTest/R/sienaGOF.r
   pkg/RSienaTest/changeLog
   pkg/RSienaTest/doc/RSiena.bib
   pkg/RSienaTest/doc/s_man400.tex
   pkg/RSienaTest/man/RSiena-package.Rd
   pkg/RSienaTest/man/sienaGOF.Rd
Log:
R 149

Modified: pkg/RSiena/changeLog
===================================================================
--- pkg/RSiena/changeLog	2011-05-25 21:19:19 UTC (rev 148)
+++ pkg/RSiena/changeLog	2011-05-26 16:14:47 UTC (rev 149)
@@ -1,7 +1,14 @@
+2011-05-26 R-forge revision 149 RSienaTest only
+
+	* R/sienaGOF.R, NAMESPACE, doc/RSiena.bib, doc/s_man400.tex:
+	Minor debugs for multi-wave
+	data in sienaGOF, added summary.sienaGOF functionality,
+	added a model selection script to the manual.
+	
 2011-05-25 R-forge revision 148 RSienaTest only
 
 	* man/sienaGOF.Rd, man/sienaGOFsupplement.Rd,
-	R/sienaGOF.R, NAMESPACE, src/sienaGOF.r, 
+	R/sienaGOF.R, NAMESPACE,
 	src/initializeFRAN.r: overhaul of sienaGOF.
 
 2011-05-19 R-forge revision 147 RSienaTest only

Modified: pkg/RSiena/man/RSiena-package.Rd
===================================================================
--- pkg/RSiena/man/RSiena-package.Rd	2011-05-25 21:19:19 UTC (rev 148)
+++ pkg/RSiena/man/RSiena-package.Rd	2011-05-26 16:14:47 UTC (rev 149)
@@ -30,8 +30,8 @@
 \tabular{ll}{
 Package: \tab RSiena\cr
 Type: \tab Package\cr
-Version: \tab 1.0.12.148\cr
-Date: \tab 2011-05-25\cr
+Version: \tab 1.0.12.149\cr
+Date: \tab 2011-05-26\cr
 License: \tab GPL-2 \cr
 LazyLoad: \tab yes\cr
 }

Modified: pkg/RSienaTest/NAMESPACE
===================================================================
--- pkg/RSienaTest/NAMESPACE	2011-05-25 21:19:19 UTC (rev 148)
+++ pkg/RSienaTest/NAMESPACE	2011-05-26 16:14:47 UTC (rev 149)
@@ -45,5 +45,6 @@
 S3method(summary, sienaEffects)
 S3method(print, summary.sienaEffects)
 S3method(edit, sienaEffects)
+S3method(summary, sienaGOF)
 S3method(print, sienaGOF)
 S3method(plot, sienaGOF)

Modified: pkg/RSienaTest/R/sienaGOF.r
===================================================================
--- pkg/RSienaTest/R/sienaGOF.r	2011-05-25 21:19:19 UTC (rev 148)
+++ pkg/RSienaTest/R/sienaGOF.r	2011-05-26 16:14:47 UTC (rev 149)
@@ -8,7 +8,7 @@
 ##  * Description: This file contains the code to assess goodness of fit
 ##  *
 ##  ****************************************************************************/
-
+##@sienaGOF siena07 Does test for goodness of fit
 sienaGOF <- function(
 		sienaFitObject, 
 		auxiliaryFunction, 
@@ -86,7 +86,7 @@
 						matrix(
 						auxiliaryFunction(NULL,
 								sienaFitObject$f, sienaFitObject$sims,
-								groupName, varName, wave), nrow=1) })
+								groupName, varName, j), nrow=1) })
 	if (join) 
 	{
 		obsStats <- Reduce("+", obsStatsByWave)
@@ -113,7 +113,7 @@
 						{ auxiliaryFunction(i,
 									sienaFitObject$f, 
 									sienaFitObject$sims,
-									groupName, varName, wave)
+									groupName, varName, j)
 							if (verbose && (i %% 100 == 0) ) 
 								cat("  > Completed ", i,
 									" calculations\n")
@@ -138,7 +138,7 @@
 											auxiliaryFunction(i,
 													sienaFitObject$f, 
 													sienaFitObject$sims,
-													groupName, varName, wave)
+													groupName, varName, j)
 									})
 							simStatsByWave <-
 									matrix(simStatsByWave, ncol=iterations)
@@ -238,17 +238,15 @@
 				 applyTest(obsStats[[i]], simStats[[i]]) })
 	mhdTemplate <- rep(0, sum(sienaFitObject$test))
 	names(mhdTemplate) <- rep(0, sum(sienaFitObject$test))
-	if (join) {
-		OneStepMHD <- list(mhdTemplate)
-		PartialOneStepMHD <- list(mhdTemplate)
-		GmmMhdValue <- list(mhdTemplate)
-	} 
-	else  
-	{
-		OneStepMHD <- lapply(wave, function(i) (mhdTemplate))
-		PartialOneStepMHD <- lapply(wave, function(i) (mhdTemplate))
-		GmmMhdValue <- lapply(wave, function(i) (mhdTemplate))
-	}
+
+	JoinedOneStepMHD <- mhdTemplate
+	JoinedPartialOneStepMHD <- mhdTemplate
+	JoinedGmmMhdValue <- mhdTemplate
+
+	OneStepMHD <- lapply(wave, function(i) (mhdTemplate))
+	PartialOneStepMHD <- lapply(wave, function(i) (mhdTemplate))
+	GmmMhdValue <- lapply(wave, function(i) (mhdTemplate))
+
 	obsMhd <- NULL
 	
 	ExpStat <- lapply(wave, function(i) {
@@ -279,25 +277,21 @@
 	if (sum(sienaFitObject$test) > 0) {
 		effectsObject <- sienaFitObject$requestedEffects
 		nSims <- sienaFitObject$Phase3nits
-		if (join) {
-			names(OneStepMHD[[1]]) <- 
+		for (i in wave) {
+			names(OneStepMHD[[i]]) <- 
 					effectsObject$effectName[sienaFitObject$test]
-			names(PartialOneStepMHD[[1]]) <- 
+			names(PartialOneStepMHD[[i]]) <-
 					effectsObject$effectName[sienaFitObject$test]
-			names(GmmMhdValue[[1]]) <- 
+			names(GmmMhdValue[[i]]) <-
 					effectsObject$effectName[sienaFitObject$test]
-		} 
-		else  
-		{
-			for (i in wave) {
-				names(OneStepMHD[[i]]) <- 
-						effectsObject$effectName[sienaFitObject$test]
-				names(PartialOneStepMHD[[i]]) <-
-						effectsObject$effectName[sienaFitObject$test]
-				names(GmmMhdValue[[i]]) <-
-						effectsObject$effectName[sienaFitObject$test]
-			}
 		}
+		names(JoinedOneStepMHD) <- 
+				effectsObject$effectName[sienaFitObject$test]
+		names(JoinedPartialOneStepMHD) <- 
+				effectsObject$effectName[sienaFitObject$test]
+		names(JoinedGmmMhdValue) <- 
+				effectsObject$effectName[sienaFitObject$test]
+		
 		rownames(OneStepSpecs) <- effectsObject$effectName
 		colnames(OneStepSpecs) <- effectsObject$effectName[sienaFitObject$test]
 		rownames(PartialOneStepSpecs) <- effectsObject$effectName
@@ -370,50 +364,34 @@
 					theta0 + mmPartialThetaDelta
 			GmmOneStepSpecs[effectsToInclude,counterTestEffects] <- theta0 + 
 					gmmThetaDelta
-			if (join) {
-				OneStepMHD[[1]][counterTestEffects] <-
-					as.numeric(
-					sum(obsMhd) + 
-					mmThetaDelta %*% Reduce("+", Gradient) + 0.5 *
-					mmThetaDelta %*% Reduce("+", Hessian) %*% 
-					mmThetaDelta)
-				PartialOneStepMHD[[1]][counterTestEffects] <-
-					as.numeric(
-					sum(obsMhd) + 
-					mmPartialThetaDelta %*% Reduce("+", Gradient) + 0.5 *
-					mmPartialThetaDelta %*% Reduce("+", Hessian) %*% 
-					mmPartialThetaDelta)
-				GmmMhdValue[[1]][counterTestEffects] <-
-					as.numeric( obsMhd + 
-					gmmThetaDelta %*% 
-					Reduce("+", Gradient) + 0.5 *
-					gmmThetaDelta %*% 
-					Reduce("+", Hessian) %*%
-					gmmThetaDelta )
-		} 
-		else  
-		{
-				for (i in 1:length(obsMhd)) {
-					OneStepMHD[[i]][counterTestEffects] <-  as.numeric(
+			for (i in 1:length(obsMhd)) {
+				OneStepMHD[[i]][counterTestEffects] <-  as.numeric(
+					obsMhd[i] + 
+					mmThetaDelta %*% Gradient[[i]] + 0.5 *
+					mmThetaDelta %*% Hessian[[i]] %*% mmThetaDelta)
+				GmmMhdValue[[i]][counterTestEffects] <-
+						as.numeric( obsMhd[i] + 
+						gmmThetaDelta %*% 
+						Gradient[[i]] + 0.5 *
+						gmmThetaDelta %*% 
+						Hessian[[i]] %*%
+						gmmThetaDelta )
+				PartialOneStepMHD[[i]][counterTestEffects] <-
+						as.numeric(
 						obsMhd[i] + 
-						mmThetaDelta %*% Gradient[[i]] + 0.5 *
-						mmThetaDelta %*% Hessian[[i]] %*% mmThetaDelta)
-					GmmMhdValue[[i]][counterTestEffects] <-
-							as.numeric( obsMhd[i] + 
-							gmmThetaDelta %*% 
-							Gradient[[i]] + 0.5 *
-							gmmThetaDelta %*% 
-							Hessian[[i]] %*%
-							gmmThetaDelta )
-					PartialOneStepMHD[[1]][counterTestEffects] <-
-							as.numeric(
-							sum(obsMhd) + 
-							mmPartialThetaDelta %*% Reduce("+", Gradient) + 
-							0.5 *
-							mmPartialThetaDelta %*% Reduce("+", Hessian) %*% 
-							mmPartialThetaDelta)
-				}
+						mmPartialThetaDelta %*% 
+						Gradient[[i]] + 
+						0.5 *
+						mmPartialThetaDelta %*% 
+						Hessian[[i]] %*% 
+						mmPartialThetaDelta)
 			}
+			JoinedOneStepMHD[counterTestEffects] <-
+					Reduce("+",OneStepMHD)[counterTestEffects]
+			JoinedPartialOneStepMHD[counterTestEffects] <-
+					Reduce("+",PartialOneStepMHD)[counterTestEffects]
+			JoinedGmmMhdValue[counterTestEffects] <-
+					Reduce("+",GmmMhdValue)[counterTestEffects]
 		}
 	}
 
@@ -421,12 +399,16 @@
 	class(res) <- "sienaGOF"
 	attr(res, "scoreTest") <- (sum(sienaFitObject$test) > 0)
 	attr(res, "originalMahalanobisDistances") <- obsMhd
-	attr(res, "oneStepMahalanobisDistances") <- OneStepMHD
+	attr(res, "joinedOneStepMahalanobisDistances") <- 
+			JoinedOneStepMHD
 	attr(res, "oneStepSpecs") <- OneStepSpecs
 	attr(res, "partialOneStepMahalanobisDistances") <- PartialOneStepMHD
+	attr(res, "joinedPartialOneStepMahalanobisDistances") <- 
+			JoinedPartialOneStepMHD
 	attr(res, "partialOneStepSpecs") <- PartialOneStepSpecs
 	attr(res, "gmmOneStepSpecs") <- GmmOneStepSpecs
 	attr(res, "gmmOneStepMahalanobisDistances") <- GmmMhdValue
+	attr(res, "joinedGmmOneStepMahalanobisDistances") <- JoinedGmmMhdValue
 	attr(res,"auxiliaryStatisticName") <-
 			attr(obsStats,"auxiliaryStatisticName")
 	attr(res, "simTime") <- attr(simStats,"time")
@@ -434,7 +416,7 @@
 	attr(res, "joined") <- join
 	res
 }
-
+##@print.sienaGOF siena07 Print method for sienaGOF
 print.sienaGOF <- function (x, ...) {
 	## require(Matrix)
 	levels <- 1:length(x)
@@ -442,7 +424,7 @@
 	titleStr <- "Monte Carlo Mahalanobis distance test P-value: "
 	cat("Siena Goodness of Fit (", 
 			attr(x,"auxiliaryStatisticName") ,")\n=====\n")
-	if (! attr(x,"join"))
+	if (! attr(x,"joined"))
 	{
 		cat(" >",titleStr, "\n")
 		for (i in 1:length(pVals))
@@ -484,6 +466,11 @@
 					originalMhd[j],") for current model.")
 		}
 	}
+}
+##@summary.sienaGOF siena07 Summary method for sienaGOF
+summary.sienaGOF <- function(object, ...) {
+	x <- object
+	print(x)
 	if (attr(x, "scoreTest")) {
 		oneStepSpecs <- attr(x, "oneStepSpecs")
 		oneStepMhd <- attr(x, "oneStepMahalanobisDistances")
@@ -491,15 +478,18 @@
 		gmmOneStepSpecs <- attr(x, "gmmOneStepSpecs")
 		partialOneStepSpecs <- attr(x, "partialOneStepSpecs")
 		partialOneStepMhd <- attr(x, "partialOneStepMahalanobisDistances")
-		
+		joinedPartialOneStepMhd <- 
+				attr(x, "joinedPartialOneStepMahalanobisDistances")
+		joinedOneStepMhd <- attr(x, "joinedOneStepMahalanobisDistances")
+		joinedGmmMhd <- attr(x, "joinedGmmOneStepMahalanobisDistances")
 		if (attr(x, "joined")) {
 			for (i in 1:ncol(oneStepSpecs)) {
 				a <- cbind(oneStepSpecs[,i, drop=FALSE],
 						partialOneStepSpecs[,i, drop=FALSE],
 						gmmOneStepSpecs[,i, drop=FALSE] )
-				b <- matrix( c(oneStepMhd[[1]][i], 
-								partialOneStepMhd[[1]][i],
-								gmmMhd[[1]][i]), ncol=3)
+				b <- matrix( c(joinedOneStepMhd[i], 
+								joinedPartialOneStepMhd[i],
+								joinedGmmMhd[i]), ncol=3)
 				rownames(b) <- c("MHD")
 				a <- rbind(a, b)
 				a <- round(a, 3)
@@ -529,10 +519,10 @@
 		}
 		cat("\n-----")
 	}
-cat("\nComputation time for auxiliary statistic calculations on simulations: ",
+	cat("\nComputation time for auxiliary statistic calculations on simulations: ",
 			attr(x, "simTime")["elapsed"] , "seconds.")
 }
-
+##@plot.sienaGOF siena07 Plot method for sienaGOF
 plot.sienaGOF <- function (x, center=FALSE, scale=FALSE, violin=TRUE, 
 		key=NULL, perc=.05, wave=1, main=main, ylab=ylab,  ...) 
 {
@@ -710,8 +700,8 @@
 	require(sna)
 	dimsOfDepVar=dim(data[[groupName]]$depvars[[varName]][,,wave])
 	missing <- as.sociomatrix.sna(
-			as.edgelist.sna(
-					is.na(data[[groupName]]$depvars[[varName]][,,wave])))
+		1*is.na(data[[groupName]]$depvars[[varName]][,,wave]))
+
 	if (is.null(i)) {
 		# sienaGOF wants the observation:
 		returnValue <- data[[groupName]]$depvars[[varName]][,,wave]

Modified: pkg/RSienaTest/changeLog
===================================================================
--- pkg/RSienaTest/changeLog	2011-05-25 21:19:19 UTC (rev 148)
+++ pkg/RSienaTest/changeLog	2011-05-26 16:14:47 UTC (rev 149)
@@ -1,7 +1,14 @@
+2011-05-26 R-forge revision 149 RSienaTest only
+
+	* R/sienaGOF.R, NAMESPACE, doc/RSiena.bib, doc/s_man400.tex:
+	Minor debugs for multi-wave
+	data in sienaGOF, added summary.sienaGOF functionality,
+	added a model selection script to the manual.
+	
 2011-05-25 R-forge revision 148 RSienaTest only
 
 	* man/sienaGOF.Rd, man/sienaGOFsupplement.Rd,
-	R/sienaGOF.R, NAMESPACE, src/sienaGOF.r, 
+	R/sienaGOF.R, NAMESPACE,
 	src/initializeFRAN.r: overhaul of sienaGOF.
 
 2011-05-19 R-forge revision 147 RSienaTest only

Modified: pkg/RSienaTest/doc/RSiena.bib
===================================================================
--- pkg/RSienaTest/doc/RSiena.bib	2011-05-25 21:19:19 UTC (rev 148)
+++ pkg/RSienaTest/doc/RSiena.bib	2011-05-26 16:14:47 UTC (rev 149)
@@ -1785,17 +1785,25 @@
     doi = {DOI: 10.1016/j.socnet.2010.03.001},
     URL = {http://www.stats.ox.ac.uk/~lospinos}
 }
- at Article{Lospinoso2010b,
-    TITLE = {Testing and Modeling Time Heterogeneity in Longitudinal Studies of
-Social Networks: A Tutorial in RSiena},
-    AUTHOR = {Joshua A. Lospinoso},
-    JOURNAL = {Connections},
-    VOLUME = {In progress.},
-    YEAR = {2010},
+ at Article{Lospinoso2011b,
+    TITLE = {Goodness of Fit for Stochastic Actor Oriented Models},
+    AUTHOR = {Joshua A. Lospinoso and Tom A.B. Snijders},
+    JOURNAL = {Working paper.},
+    YEAR = {2011},
     URL = {http://www.stats.ox.ac.uk/~lospinos}
 }
+ at ARTICLE{Lospinoso2011,
+    TITLE={Assessing and Accounting for Time Heterogeneity in Stochastic Actor
+Oriented Models},
+    AUTHOR={Joshua A. Lospinoso and M. Schweinberger and T. A. B. Snijders and
+ R.M. Ripley},
+    JOURNAL={Advances in Data Analysis and Computation},
+    VOLUME={Special Issue on Social Networks},
+    YEAR={2011},
+    doi = {DOI: 10.1016/j.socnet.2010.03.001},
+    URL = {http://www.stats.ox.ac.uk/~lospinos}
+}
 
-
 @InCollection{Lepkowski89,
   author =       {J. M. Lepkowski},
   title =        {Treatment of wave nonresponse in panel surveys.},

Modified: pkg/RSienaTest/doc/s_man400.tex
===================================================================
--- pkg/RSienaTest/doc/s_man400.tex	2011-05-25 21:19:19 UTC (rev 148)
+++ pkg/RSienaTest/doc/s_man400.tex	2011-05-26 16:14:47 UTC (rev 149)
@@ -3042,8 +3042,360 @@
 where $n$ is the number of actors in the network (or the largest number,
 if there are multiple groups).
 
+\subsection{Goodness of fit with auxiliary statistics}
+There is now available in RSienaTest a function \verb!sienaGOF! which permits users to
+define auxiliary functions of networks, e.g. geodesic distributions, that are not explicitly fit
+by a particular effect, but are nonetheless important features of the network to represent in the
+probability model. The following script gives a walkthrough for this functionality. The interface
+may change in the future, as this function is still in progress. See the working paper by
+\citet{Lospinoso2011b} for technical information about the test.
+\begin{verbatim}
+################################################################################
+## Model selection and RSiena								                   #
+## Script prepared by Josh Lospinoso (stats.ox.ac.uk/~lospinos)                #
+## Date: 5/26/2011											                   #
+## Version: 3												                   #
+################################################################################
 
+# In this script, we will load the s50 data (type ?s50 for more information) 
+# and go through an iteration of forward model selection. The idea is to specify
+# a restricted model containing a conservative number of effects and then 
+# evaluate evidence for inclusion of more elaborate terms. This approach 
+# contrasts with backward model selection, which would entail inclusion of a 
+# large number of terms and subsequent evaluation for omission of terms.
 
+# It is strongly emphasized here that theory should always guide model 
+# selection! In this script, our intent is to illustrate some of the tools that
+# are available to you and not necessarily to provide a "cookie cutter" solution
+# to fitting SAOMs to all datasets:
+#
+# 1) sienaTimeTest
+# 2) sienaGOF
+# 3) The score type test of Schweinberger
+
+# First, we will install the experimental "RSienaTest" from R-Forge.
+# This version is updated more often than the RSiena package on CRAN.
+# Currently, one of the functions (sienaGOF) is not yet available in the 
+# CRAN version.
+install.packages("RSienaTest", repos="http://R-Forge.R-project.org")
+
+# Later, we will use the sna and network packages to show the new GOF
+# functionality, as well as a few other packages to help us later on:
+install.packages(c("network", "sna"))
+
+# After these packages have been installed, you must tell R to load the
+# RSienaTest package:
+library(RSienaTest)
+
+# First, give some estimation settings. Here we will use 4 phase 2 subphases 
+# and 1000 phase 3 iterations. These settings come generally recommended for
+# most datasets.
+estimationSettings <- sienaModelCreate(fn=simstats0c, nsub=4, n3=1000)
+
+# The s50 dataset is already loaded into the R environment once the 
+# library(.) function is called. See e.g. ?s501 for information on 
+# each data object.
+
+# This will load three waves of 50 actors into a sienaNet
+friendship <- sienaNet(array(c(s501, s502, s503), dim=c(50, 50, 3)))
+alcohol <- varCovar(s50a)
+s50data <- sienaDataCreate(friendship, alcohol)
+
+# Here we begin to build up a preliminary model selection. By default, this
+# model will have density and reciprocity effects included from getEffects(.)
+model1 <- getEffects(s50data)
+
+# Or with the include effects (preferred option):
+
+model1 <- includeEffects(model1, transTrip, cycle3, name="friendship")
+model1 <- includeEffects(model1, simX, interaction1="alcohol", 
+	name="friendship")
+# Here, we can add a few score type tests for covariate terms (which can be 
+# highly correlated). These tests will help us to determine whether we should 
+# include more complicated selection effects in future iterations.
+model1 <- setEffect(model1, egoX, interaction1="alcohol", name="friendship", 
+	fix=TRUE, test=TRUE, include=TRUE)
+model1 <- setEffect(model1, altX, interaction1="alcohol", name="friendship", 
+	fix=TRUE, test=TRUE, include=TRUE)
+model1 <- setEffect(model1, higher, interaction1="alcohol", name="friendship", 
+	fix=TRUE, test=TRUE, include=TRUE)
+
+# Check what is specified in model1:
+model1
+
+# We have chosen to start with two transitivity terms (transitive triplets 
+# and three cycles), and one selection, one influence term for alcohol 
+# consumption. 
+
+# We use the siena07() function to estimate the parameters of model1
+# NOTE: you MUST specify returnDeps=TRUE in order to use the goodness of 
+# fit testing functionality.
+( results1 <- siena07(estimationSettings, data=s50data, effects=model1, 
+	batch=TRUE, returnDeps=TRUE) )
+
+# Next, we perform a test for time heterogeneity. This will help us see 
+# if the parameters have shifted over time. It comes highly
+# recommended to perform this step before checking other goodness of fit!
+( timetest1 <- sienaTimeTest(results1) )
+# With a p value of about .30, there is very little joint evidence against
+# the hypothesis of time homogeneity.
+
+# To get a more informal feel for how time heterogeneity may be present in
+# the data, we can plot the test for each effect:
+plot(timetest1, effects=1:4)
+plot(timetest1, effects=5:8)
+# Gray bands indicate the base periods (or, alternately, time dummies that
+# have been estimated using siena07), and red bands indicate potential time
+# dummies. These bands are 95% confidence bands. Where they do not overlap,
+# there is (individual) evidence against the time homogeneity hypothesis.
+
+# There is some information on sienaTimeTest available in the help file,
+# where you can find the citation for Lospinoso et. al (2010) 
+# where the test is developed.
+?sienaTimeTest
+
+# We can now check the score type test results for more involved 
+# selection terms. Summary of a siena07 object gives us a wealth
+# of information about estimation, but it will also give us a score-type
+# test of Schweinberger (2007) calculated for those effects which we have
+# denoted fix=TRUE, test=TRUE above:
+summary(results1)
+# In the section "Generalised score test <c>", we are presented with the
+# results. Since the joint p value is about .66, there is very little
+# evidence that these effects (alch alter, alch ego, higher alch)
+# are fit poorly with respect to their statistics.
+
+# We have checked time heterogeneity and the goodness of fit of a few
+# alcohol-related statistics, but we perform one last check using the
+# sienaGOF function. Lospinoso and Snijders (2011) propose to calculate
+# auxiliary statistics, i.e. statistics that are important to represent
+# with the model but that are not strictly fit by the parameters of the
+# model. There is a simple battery of auxiliary statistics that is already
+# implemented in RSiena: indegree and outdegree distributions, triad census,
+# and average nearest neighboor degree (KNN) distributions. First, let us 
+# calculate these using our results:
+
+# - Indegree distribution ( # of nodes with indegree < A for A in 1:MAX)
+( id1 <- sienaGOF(results1, IndegreeDistribution, verbose=TRUE) )
+
+# - Outdegree distribution ( # of nodes with outdegree < A for A in 1:MAX)
+( od1 <- sienaGOF(results1, OutdegreeDistribution, verbose=TRUE) )
+
+# - Triad census (see ?triad.census in the sna package)
+( tc1 <- sienaGOF(results1, TriadCensus, verbose=TRUE) )
+
+# - Geodesic distribution  ( # of geodesces < A for A in 1:MAX)
+( ge1 <- sienaGOF(results1, GeodesicDistribution, verbose=TRUE) )
+
+# For now, we will not worry about the contents of the text output for these
+# objects, but we will seek to get an intuitive feel for how well these features
+# are fitting by using the plot.sienaGOF functionality:
+plot(id1)
+# These plots give a solid red line denoting the observed value of the auxiliary
+# statistic (in this case, # of nodes with outdegree < x). The "violin plots"
+# show the simulated values of these statistics as both a box plot and a
+# kernel density estimate. The dashed gray line represents a 95% confidence
+# band for the simulations. Finally, a p value is displayed on the X axis
+# label. This p value comes from the Monte Carlo Mahalanobis Distance Test
+# of Lospinoso and Snijders (2011) (see ?sienaGOF for more information).
+# The null hypothesis for this p value is that the auxiliary statistics
+# for the observed data are distributed according to the statistics shown in
+# the plot. For id1, we see that the p value is about .35, so we are left
+# with the impression that fit is quite good. (This can be seen visually also!)
+
+# Try outdegree also:
+plot(od1)
+# We see some marginal fit on the outdegree (p is .062), but the observations
+# are within the 95% bands. We will see what we can do to improve this fit.
+
+# Triad census:
+plot(tc1)
+# How can we possibly read this? Use centering and scaling:
+plot(tc1, scale=TRUE, center=TRUE)
+# We can use the keys given in ?triad.classify of the sna package to help
+# us understand where fit is lacking:
+triad.keys <- c("003","012","102","021D","021U","021C","111D","111U",
+		"030T","030C","201","120D","120U","120C","210","300")
+# and add these to the plot:
+plot(tc1, scale=TRUE, center=TRUE, key = triad.keys)
+# The following triads are not being represented well in the model, based
+# on a 95% confidence interval cutoff:
+# 111U: a     b  -> c ; a <-> c  (Too many being simulated)
+# 030T: a  -> b <-  c ; a  -> c  (Too few being simulated)
+#  201: a  -> b <-> c ; a <-> c  (Too many being simulated)
+
+# And geodesic distance:
+plot(ge1)
+# The fit is marginal (p is roughly .09), but for a fairly simple model not
+# too bad. The observations are on the bottom of the confidence interval.
+# Since there are a finite number of dyads in the graph, this means that the
+# simulations are too connected--that is, there are more geodesces
+# less than, say, 8 in the simulations than in the observations, so the
+# observed geodesces must be more frequent at higher numbers.
+
+# So far, we have reached two conclusions about goodness of fit:
+# 1) More involved alter selection effects do not seem necessary
+# 2) There is pretty good fit for such a simple model. We could try to improve
+# 	 fit on the geodesic distribution and a few of the triads.
+#
+# Our strategy is then to try fitting geodesic distances better. Let us add
+# actors at distance two:
+model2 <- includeEffects(model1, nbrDist2)
+# We also tidy up the effects by removing the tests for more involved selection
+# effects on alcohol:
+model2 <- setEffect(model2, egoX, interaction1="alcohol", name="friendship", 
+		fix=FALSE, test=FALSE, include=FALSE)
+model2 <- setEffect(model2, altX, interaction1="alcohol", name="friendship", 
+		fix=FALSE, test=FALSE, include=FALSE)
+model2 <- setEffect(model2, higher, interaction1="alcohol", name="friendship", 
+		fix=FALSE, test=FALSE, include=FALSE)
+# Not wanting to make too many changes to the model at once, let us also try
+# some degree related effects to help our outdegree fit:
+model2 <- setEffect(model2, outPop, fix=TRUE, test=TRUE, include=TRUE)
+model2 <- setEffect(model2, outAct, fix=TRUE, test=TRUE, include=TRUE)
+
+# Estimate our model (note the use of prevAns to speed up convergence):
+( results2 <- siena07(estimationSettings, data=s50data, effects=model2, 
+					batch=TRUE, returnDeps=TRUE, prevAns=results1) )
+
+# Next, we perform a test for time heterogeneity:
+( timetest2 <- sienaTimeTest(results2) )
+# Which still indicates that there is little evidence against the
+# time homogeneity assumption.
+
+# Again, we perform sienaGOF on the indegree, outdegree, and geodesic
+# distributions as well as triad census:
+( id2 <- sienaGOF(results2, IndegreeDistribution, verbose=TRUE) )
+( od2 <- sienaGOF(results2, OutdegreeDistribution, verbose=TRUE) )
+( tc2 <- sienaGOF(results2, TriadCensus, verbose=TRUE) )
+( ge2 <- sienaGOF(results2, GeodesicDistribution, verbose=TRUE) )
+# We will treat each of these in turn.
+
+# First, we will look at indegree distribution:
+summary(id2)
+# The summary output tells us information first about the Monte Carlo
+# Mahalanobis Distance goodness of fit test of Lospinoso and Snijders (2011).
+# The p value of .03 tells us that fit is marginal on indegree distribution.
+# This is a potentially frustrating aspect of testing GOF on auxiliary
+# statistics! There is no guarantee that adding more terms to a model will
+# not decrease fit! Recall that
+id1
+# showed good fit.
+# The next item to notice in
+summary(id2)
+# are the outdegree - popularity and outdegree - activity displays.
+# These two effects had test=TRUE selected on the effects object, so 
+# sienaGOF treats these specially. It considers a polynomial approximation
+# to the Mahalanobis distance with respect to the parameters of the model
+# so that we can posit various specifications and compare their expected
+# Mahalanobis distances (for details see Lospinoso and Snijders 2011):
+#
+# MM(Full) -- For "method of moments  (full)" corresponds to the one step
+# 	estimates of Schweinberger (2007). This specification allows the most
+#	drastic differences from the current parameter estimates, and is our
+#	best guess at what the new parameter estimates would be at the proposed
+#	specification. Since the one step estimates can be erratic for values
+#	far away from the current parameters, we also provide some other estimates
+#	below.
+#
+# MM(Par.) -- For "method of moments  (partial)" corresponds to an estimate
+#	where we only take the one step estimate coordinate from MM(full) for
+#	the effect under consideration. E.g. for "outdegree - popularity", 
+#	all parameters are equivalent to the current estimates, but we replace
+#	the 0 for outdegree - popularity with its one step estimate.
+#
+# 	   GMM -- If MM(Full) and MM(Par.) are not well behaved, there is another
+#	alternative to consider. GMM, for "generalized method of moments", does
+#	not depend on the one step estimates at all. It is the parameterization
+#	for the particular model under consideration that minimizes the quadratic
+#	approximation for the Mahalanobis distance. It, by definition, will have
+#	a MHD less than both MM(Full) and MM(Par.).
+#
+#	Unfortunately, it appears that for both models under consideration, the
+#	Mahalanobis distance will increase. We must take this guidance with a heavy
+#	grain of salt for the reasons mentioned above; we recommend comparing the
+#	relative MHDs to guide selection. If we place some trust in the one step
+#	estimates (which seem fairly plausible for both models), we are led to
+#	select outdegree - popularity to help with indegree distribution. Indeed,
+# 	this selection tends to make theortical sense, since activity is synonymous
+# 	with indegree!
+
+# For outdegree,
+summary(od2)
+# it seems also that outdegree - popularity is the best choice (although we note
+# that the fit is quite good on outdegree already!)
+
+# For geodesic distances,
+summary(ge2)
+# we might also be led towards including outdegree popularity (although, again,
+# it seems that we have achieved quite good fit for geodesic distances with a
+# p value of .707)
+
+# For triad census,
+summary(tc2)
+# where fit is the worst across all of the statistics, outdegree popularity seems
+# to be the best candidate as well.
+
+# Model selection is an art, and thus there is no strictly superior criteria
+# for updating our model. In the current selection, it seems that the
+# outdegree popularity model has accumulated enough support across the four
+# tests that we decide to include it here:
+model3 <- setEffect(model2, outPop, fix=FALSE, test=FALSE, include=TRUE)
+model3 <- setEffect(model3, outAct, fix=FALSE, test=FALSE, include=FALSE)
+
+# Estimate our model
+( results3 <- siena07(estimationSettings, data=s50data, effects=model3, 
+					batch=TRUE, returnDeps=TRUE, prevAns=results2) )
+# Convergence could be improved:
+( results3 <- siena07(estimationSettings, data=s50data, effects=model3, 
+					batch=TRUE, returnDeps=TRUE, prevAns=results3) )
+
+# Time test
+( tt3 <- sienaTimeTest(results3) )
+# All is still well with time homogeneity
+
+# Check goodness of fit again:
+( id3 <- sienaGOF(results3, IndegreeDistribution, verbose=TRUE) )
+( od3 <- sienaGOF(results3, OutdegreeDistribution, verbose=TRUE) )
+( tc3 <- sienaGOF(results3, TriadCensus, verbose=TRUE) )
+( ge3 <- sienaGOF(results3, GeodesicDistribution, verbose=TRUE) )
+
+# It seems that the test for indegree distribution indicates evidence of
+# poor fit:
+id3
+# but when we plot it:
+plot(id3)
+# the fit seems quite good. This is an artifact of the default settings for our
+# IndegreeDistribution function. It seems that there are few actors with indegree
+# higher than, say, 6, so the non-normality of this data is causing problems.
+# We can define a custom function easily which restricts the support:
+CustomIndegreeDistribution <- function(...) {
+	IndegreeDistribution(..., levels=1:6)
+}
+# and run this test again: 
+( id3 <- sienaGOF(results3, CustomIndegreeDistribution, verbose=TRUE) )
+plot(id3)
+# where now we have a test agreeing with the plot.
+
+# The only other problems we seem to have are with triad census:
+plot(tc3, scale=TRUE, center=TRUE, key=triad.keys)
+# which have persisted throughout the two model updates we have proposed!
+# Graphically, the data is not too implausible, so we might take heed of
+# George Box's famous quote "All models are wrong, some are useful" and
+# proceed with our inference. If, however, triad census across the configurations
+# that seem to not fit well could have an impact on your inference, we strongly
+# recommend spending some more time trying new specifications.
+
+# Geodesic distance distribution
+plot(ge3)
+# and outdegree
+plot(od3)
+# fit adequately.
+
+# Now that goodness of fit has been addressed, and we have settled in on a model,
+# we can continue on drawing inference on the sustantive hypotheses of interest.
+\end{verbatim}
+
 \newpage
 
 \section{Estimation}
@@ -8443,6 +8795,11 @@
 repository.)
 \begin{small}
 \begin{itemize}
+\item 2011-05-26 R-forge 148:
+\begin{itemize}
+\item Improvements to sienaGOF, added script to manual.
+\end{itemize}
+\begin{itemize}
 \item 2011-05-16 R-forge 146:
 \begin{itemize}
 \item Documentation improvements

Modified: pkg/RSienaTest/man/RSiena-package.Rd
===================================================================
--- pkg/RSienaTest/man/RSiena-package.Rd	2011-05-25 21:19:19 UTC (rev 148)
+++ pkg/RSienaTest/man/RSiena-package.Rd	2011-05-26 16:14:47 UTC (rev 149)
@@ -30,8 +30,8 @@
 \tabular{ll}{
 Package: \tab RSiena\cr
 Type: \tab Package\cr
-Version: \tab 1.0.12.148\cr
-Date: \tab 2011-05-25\cr
+Version: \tab 1.0.12.149\cr
+Date: \tab 2011-05-26\cr
 License: \tab GPL-2 \cr
 LazyLoad: \tab yes\cr
 }

Modified: pkg/RSienaTest/man/sienaGOF.Rd
===================================================================
--- pkg/RSienaTest/man/sienaGOF.Rd	2011-05-25 21:19:19 UTC (rev 148)
+++ pkg/RSienaTest/man/sienaGOF.Rd	2011-05-26 16:14:47 UTC (rev 149)
@@ -1,6 +1,5 @@
 \name{sienaGOF}
 \alias{sienaGOF}
-\alias{print.sienaGOF}
 \alias{plot.sienaGOF}
 \title{Functions to assess goodness of fit for SAOMs}
 \description{
@@ -19,7 +18,6 @@
 		cluster=NULL, robust=FALSE, \dots)
 \method{plot}{sienaGOF}(x,  center=FALSE, scale=FALSE, violin=TRUE, 
 		key=NULL, perc=.05, wave=1, main=main, ylab=ylab, \dots)
-\method{print}{sienaGOF}(x, \dots)
 }
 \arguments{
   \item{sienaFitObject}{Results from a call to \code{siena07}.}



More information about the Rsiena-commits mailing list