[Rsiena-commits] r131 - in pkg/RSienaTest: . R man

noreply at r-forge.r-project.org noreply at r-forge.r-project.org
Wed Jan 19 02:59:39 CET 2011


Author: jalospinoso
Date: 2011-01-19 02:59:38 +0100 (Wed, 19 Jan 2011)
New Revision: 131

Added:
   pkg/RSienaTest/R/sienaGOF.r
   pkg/RSienaTest/man/sienaGOF.Rd
Modified:
   pkg/RSienaTest/DESCRIPTION
   pkg/RSienaTest/NAMESPACE
   pkg/RSienaTest/changeLog
Log:
Adding goodness of fit testing functionality (very rough draft...)

Modified: pkg/RSienaTest/DESCRIPTION
===================================================================
--- pkg/RSienaTest/DESCRIPTION	2011-01-16 17:07:30 UTC (rev 130)
+++ pkg/RSienaTest/DESCRIPTION	2011-01-19 01:59:38 UTC (rev 131)
@@ -6,7 +6,7 @@
 Author: Various
 Depends: R (>= 2.9.0), xtable
 Imports: Matrix
-Suggests: tcltk, snow, rlecuyer, network, codetools, lattice
+Suggests: tcltk, snow, rlecuyer, network, codetools, lattice, snopgof
 SystemRequirements: GNU make, tcl/tk 8.5, Tktable
 Maintainer: Ruth Ripley <ruth at stats.ox.ac.uk>
 Description: Fits models to longitudinal networks

Modified: pkg/RSienaTest/NAMESPACE
===================================================================
--- pkg/RSienaTest/NAMESPACE	2011-01-16 17:07:30 UTC (rev 130)
+++ pkg/RSienaTest/NAMESPACE	2011-01-19 01:59:38 UTC (rev 131)
@@ -5,7 +5,9 @@
 sienaGroupCreate,  sienaModelCreate, sienaNet, sienaNodeSet, #simstats0c,
        varCovar, varDyadCovar, setEffect, includeEffects, includeInteraction,
        effectsDocumentation, sienaDataConstraint, #maxlikefn,
-       installGui, siena08, iwlsm, sienaTimeTest, includeTimeDummy)
+       installGui, siena08, iwlsm, sienaTimeTest, includeTimeDummy,
+       evaluateAuxiliaryStatistics.observed, evaluateAuxiliaryStatistics.simulated,
+       evaluateAuxiliaryStatistics, sienaGOF)
 
 import(Matrix)
 import(xtable)
@@ -42,3 +44,5 @@
 S3method(summary, sienaEffects)
 S3method(print, summary.sienaEffects)
 S3method(edit, sienaEffects)
+S3method(print, sienaGOF)
+S3method(plot, sienaGOF)

Added: pkg/RSienaTest/R/sienaGOF.r
===================================================================
--- pkg/RSienaTest/R/sienaGOF.r	                        (rev 0)
+++ pkg/RSienaTest/R/sienaGOF.r	2011-01-19 01:59:38 UTC (rev 131)
@@ -0,0 +1,109 @@
+##/*****************************************************************************
+## * SIENA: Simulation Investigation for Empirical Network Analysis
+## *
+## * Web: http://www.stats.ox.ac.uk/~snidjers/siena
+## *
+## * File: gof.r
+## *
+## * Description: This file contains the code to assess goodness of fit
+## *
+## ****************************************************************************/
+
+## Convenience function for evaluating statistics
+evaluateAuxiliaryStatistics <- function(sienaObject, ...) {
+	if (class(sienaObject) == "sienaFit") {
+		evaluateAuxiliaryStatistics.simulated(sienaObject, ...)
+	} else if (class(sienaObject) == "siena") {
+		evaluateAuxiliaryStatistics.observed(sienaObject, ...)
+	}
+}
+
+## Sienafit object, returns a list of auxiliary statistics from the given function
+evaluateAuxiliaryStatistics.simulated <- function(sienaFitObject, 
+		groupName, varName, wave, auxiliaryFunction, verbose=FALSE) {
+	if (! sienaFitObject$returnDeps) {
+		stop("You must instruct siena07 to return the simulated networks, i.e. returnDeps=TRUE")
+	}
+	iterations = length(sienaFitObject$sims)
+	if (iterations < 1) {
+		stop("You need at least one iteration.")
+	}
+	groups = length(sienaFitObject$sims[[1]])
+	if (verbose) cat("Detected", iterations, "iterations and", groups, "groups.\n")
+	groupNumber = which(names(sienaFitObject$sims[[1]]) == groupName)
+	if (is.na(groupNumber)) {
+		stop("Invalid group name.")
+	}
+	if (verbose) cat("Group", groupName, "corresponds to index",groupNumber,".\n")
+	varNumber = which(names(sienaFitObject$sims[[1]][[groupNumber]]) == varName)
+	if (varNumber < 1 | varNumber > length(sienaFitObject$sims[[1]][[groupNumber]])) {
+		stop("Invalid variable number -- out of bounds.")
+	}
+	if (verbose) cat("Variable", varName, "corresponds to index",varNumber,".\n")
+	if (min(wave) < 1 | max(wave) > length(sienaFitObject$sims[[1]][[groupNumber]][[varNumber]])){
+		stop("Invalid wave index -- out of bounds")
+	}
+	if (verbose) cat("Calculating auxiliary statistics for waves",wave,".\n")
+	ret <- lapply(wave, function (j) {
+			tmp <- sapply(1:iterations, function (i) { auxiliaryFunction(sienaFitObject$sims[[i]][[groupNumber]][[varNumber]][[j]])})
+			dimnames(tmp)[[2]] <-  1:iterations
+			t(tmp)
+		})
+	names(ret) <- paste("Wave",wave)
+	class(ret) <- "simulatedAuxiliaryStatistics"
+	attr(ret,"auxiliaryStatisticName") <- deparse(substitute(auxiliaryFunction))
+	ret
+}
+
+## Sienafit object, returns a list of auxiliary statistics from the given function
+evaluateAuxiliaryStatistics.observed <- function(sienaDataObject, 
+		varName, wave, auxiliaryFunction, verbose=FALSE) {
+	varNumber = which(names(sienaDataObject$depvars) == varName)
+	if (varNumber < 1 | varNumber > length(sienaDataObject$depvars)) {
+		stop("Invalid variable number -- out of bounds.")
+	}
+	if (verbose) cat("Variable", varName, "corresponds to index",varNumber,".\n")
+	if (min(wave) < 1 | max(wave) > dim(sienaDataObject$depvars[[varNumber]])[3] - 1){
+		stop("Invalid wave index -- out of bounds")
+	}
+	if (verbose) cat("Calculating auxiliary statistics for waves",wave,".\n")
+	ret <- lapply(wave, function (j) {
+				auxiliaryFunction(sienaDataObject$depvars[[varNumber]][,,j+1])
+			})
+	names(ret) <- paste("Wave",wave)
+	class(ret) <- "observedAuxiliaryStatistics"
+	attr(ret,"auxiliaryStatisticName") <- deparse(substitute(auxiliaryFunction))
+	ret
+}
+
+sienaGOF <- function(obsList, simList) {
+	require(snopgof)
+	if ((class(obsList) != "observedAuxiliaryStatistics") |
+			(class(simList) != "simulatedAuxiliaryStatistics") |
+			(length(obsList) != length(simList))
+			| (attr(obsList,"auxiliaryStatisticName") != attr(simList,"auxiliaryStatisticName"))) {
+		stop("Arguments invalid")
+	}
+	pre <- lapply(1:length(simList), function (i) gof.preprocess(simList[[i]]))
+	# We could try inverse covariance for weighting or something here. Just identity weighting for now.
+	#weight <- lapply(1:length(simList), function (i) solve(cov(simList[[i]])))
+	#res <- lapply(1:length(simList), function (i) gof(obsList[[i]], pre[[i]], weight[[i]]))
+	res <- lapply(1:length(simList), function (i) gof(obsList[[i]], pre[[i]]) )
+	ret <- list(ObservedStatistics=obsList, SimulatedStatistics=simList, Results=res)
+	class(ret) <- "sienaGOF"
+	attr(ret,"auxiliaryStatisticName") <- attr(obsList,"auxiliaryStatisticName")
+	ret
+}
+
+print.sienaGOF <- function (x, ...) {
+	cat("Siena Goodness of Fit (", attr(x,"auxiliaryStatisticName") ,")\n > P-values by wave:\n")
+	pVals = sapply(1:length(x$Results), function(i) x$Results[[i]]$p)
+	for (i in 1:length(pVals)) {
+		cat(" > Wave ", i, ": ", pVals[i], "\n")
+	}
+}
+
+plot.sienaGOF <- function (x, wave=1, ...) {
+	plot(x$Results[[wave]], main=paste("Goodness of Fit of ", attr(x, "auxiliaryStatisticName")),
+			xlab="Variate Index", ylab="Variate Value", ...)
+}
\ No newline at end of file

Modified: pkg/RSienaTest/changeLog
===================================================================
--- pkg/RSienaTest/changeLog	2011-01-16 17:07:30 UTC (rev 130)
+++ pkg/RSienaTest/changeLog	2011-01-19 01:59:38 UTC (rev 131)
@@ -1,3 +1,12 @@
+2011-01-17 R-forge revision 131
+	* R/gof.r: Added. Contains the function sienaGof and other functions
+	for performing basic goodness of fit tests from arbitrary functions.
+	This function requires the snopgof package to perform the Monte
+	Carlo test. There are also plotting facilities.
+	* man/gof.rd: Added. Contains basics of how to use sienaGof
+	* NAMESPACE: Added sienaGof functions to namespace for proper operation
+	* DESCRIPTION: Added snopgof to the recommended packages.
+
 2011-01-16 R-forge revision 130
 
 	* R/phase3.r: correct number of iterations in field Phase3nits

Added: pkg/RSienaTest/man/sienaGOF.Rd
===================================================================
--- pkg/RSienaTest/man/sienaGOF.Rd	                        (rev 0)
+++ pkg/RSienaTest/man/sienaGOF.Rd	2011-01-19 01:59:38 UTC (rev 131)
@@ -0,0 +1,87 @@
+\name{sienaGOF}
+\alias{sienaGOF}
+\alias{evaluateAuxiliaryStatistics}
+\alias{evaluateAuxiliaryStatistics.simulated}
+\alias{evaluateAuxiliaryStatistics.observed}
+\alias{print.sienaGOF}
+\alias{plot.sienaGOF}
+\title{Functions to assess goodness of fit for SAOMs}
+\description{
+ A series of functions which start with a \code{sienaFit} object, a
+ \code{siena} data object, and a function for calculating auxiliary
+ statistics on graphs. 
+ This function uses the \code{snopgof} package to evaluate goodness
+ of fit from the resulting vectors of statistics.
+ }
+
+\usage{
+sienaGOF(obsList, simList)
+evaluateAuxiliaryStatistics.observed(sienaDataObject, varName, wave, auxiliaryFunction, verbose=FALSE)
+evaluateAuxiliaryStatistics.simulated(sienaFitObject, groupName, varName, wave, auxiliaryFunction, verbose=FALSE)
+# Alias for the above two functions
+evaluateAuxiliaryStatistics(sienaObject, ...)
+}
+\arguments{
+  \item{obsList}{The results from a call to \code{evaluateAuxiliaryStatistics.observed}.}
+  \item{simList}{The results from a call to \code{evaluateAuxiliaryStatistics.simulated}.}
+  \item{sienaObject}{Either a \code{sienaDataObject} or a \code{sienaFitObject}; alias will determine which and call
+  the appropriate function.}
+  \item{sienaDataObject}{Results from a call to \code{sienaDataCreate}.}
+  \item{sienaFitObject}{Results from a call to \code{siena07}.}
+  \item{groupName}{The name of the sienaGroup to be used.}
+  \item{varName}{The value of the variable to be used, whether a network, behavior, etc.}
+  \item{wave}{Wave(s) to be used for calculating the statistics.}
+  \item{auxiliaryFunction}{Function to be used to calculate statistics, e.g. from the \code{sna} package.}
+  \item{verbose}{Whether to print intermediate results. Calculations can take some time, and user feedback may be useful.}
+  \item{...}{Other arguments to be used by the appropriate subfunction which will be called by the alias.}
+}
+\details{
+ This function is currently under construction. Contact the author for working papers on how it works.
+ In general, it tests goodness of fit of statistics that should not be explicitly fit. If the p value is
+ below the desired false positive rate, there is sufficient evidence to reject the goodness of fit of the model
+ with respect to the implied statistic.
+}
+\value{
+  \code{sienaGOF} Returns a list:
+  \item{ObservedStatistics }{ The values of the calculated statistics for the observations. }
+  \item{SimulatedStatistics }{ A preprocess object (see \code{snopgof} package) calculated on the statistics for the simulations. }
+  \item{Results }{ The p values of the \code{snopgof} test. }
+}
+
+\references{See \url{http://www.stats.ox.ac.uk/~snijders/siena/}
+  for general information on RSiena.
+
+Lospinoso, J.A. and Snijders, T.A.B, "A Non-parametric Test for
+Goodness of Fit." Working Paper
+}
+\author{Josh Lospinoso}
+\seealso{\code{\link{siena07}}, \code{\link{sienaTimeTest}},
+  \code{snopgof}}
+\examples{
+\dontrun{
+# Fifty iterations being used, but we recommend using many more (perhaps 1000 or 1500)
+mymodel <- sienaModelCreate(fn=simstats0c, nsub=4, n3=50)
+mynet1 <- sienaNet(array(c(s501, s502, s503), dim=c(50, 50, 3)))
+mydata <- sienaDataCreate(mynet1)
+myeff <- getEffects(mydata)
+myeff <- includeEffects(myeff, transTrip, balance)
+ans <- siena07(mymodel, data=mydata, effects=myeff, returnDeps=T, batch=TRUE)
+
+library(sna)
+# install.packages("sna")
+library(network)
+# install.packages("network")
+triadCensus <- function (x) {
+	triad.census(network(x))[-1]
+}
+
+sim <- evaluateAuxiliaryStatistics(ans, "Data1", "mynet1", 1:2, triadCensus, verbose=T)
+obs <- evaluateAuxiliaryStatistics(mydata, "mynet1", 1:2, triadCensus, verbose=T)
+(res <- sienaGOF(obs, sim))
+
+# And plots if desired
+plot(res, wave=1)
+plot(res, wave=2)
+}
+}
+\keyword{models}



More information about the Rsiena-commits mailing list