[Rsiena-commits] r137 - in pkg/RSienaTest: . R doc man
noreply at r-forge.r-project.org
noreply at r-forge.r-project.org
Tue Feb 22 02:30:23 CET 2011
Author: jalospinoso
Date: 2011-02-22 02:30:23 +0100 (Tue, 22 Feb 2011)
New Revision: 137
Modified:
pkg/RSienaTest/DESCRIPTION
pkg/RSienaTest/R/sienaGOF.r
pkg/RSienaTest/R/sienaTimeTest.r
pkg/RSienaTest/changeLog
pkg/RSienaTest/doc/s_man400.tex
pkg/RSienaTest/man/sienaGOF.Rd
Log:
Revision 137, additions to sienaGOF and sienaTimeTest
Modified: pkg/RSienaTest/DESCRIPTION
===================================================================
--- pkg/RSienaTest/DESCRIPTION 2011-02-21 00:54:43 UTC (rev 136)
+++ pkg/RSienaTest/DESCRIPTION 2011-02-22 01:30:23 UTC (rev 137)
@@ -1,12 +1,12 @@
Package: RSienaTest
Type: Package
Title: Siena - Simulation Investigation for Empirical Network Analysis
-Version: 1.0.12.136
-Date: 2011-02-21
+Version: 1.0.12.137
+Date: 2012-02-21
Author: Various
Depends: R (>= 2.9.0), xtable
Imports: Matrix
-Suggests: tcltk, snow, rlecuyer, network, codetools, lattice, snopgof
+Suggests: tcltk, snow, rlecuyer, network, codetools, lattice, MASS, sna, vioplot
SystemRequirements: GNU make, tcl/tk 8.5, Tktable
Maintainer: Ruth Ripley <ruth at stats.ox.ac.uk>
Description: Fits models to longitudinal networks
Modified: pkg/RSienaTest/R/sienaGOF.r
===================================================================
--- pkg/RSienaTest/R/sienaGOF.r 2011-02-21 00:54:43 UTC (rev 136)
+++ pkg/RSienaTest/R/sienaGOF.r 2011-02-22 01:30:23 UTC (rev 137)
@@ -18,92 +18,343 @@
}
}
-## Sienafit object, returns a list of auxiliary statistics from the given function
+## Sienafit object, returns a list of auxiliary
+## statistics from the given function
evaluateAuxiliaryStatistics.simulated <- function(sienaFitObject,
- groupName, varName, wave, auxiliaryFunction, verbose=FALSE) {
+ groupName, varName, auxiliaryFunction, wave=NULL,
+ verbose=FALSE, join=TRUE) {
if (! sienaFitObject$returnDeps) {
- stop("You must instruct siena07 to return the simulated networks, i.e. returnDeps=TRUE")
+ stop("You must instruct siena07 to return the simulated networks")
}
iterations = length(sienaFitObject$sims)
if (iterations < 1) {
stop("You need at least one iteration.")
}
groups = length(sienaFitObject$sims[[1]])
- if (verbose) cat("Detected", iterations, "iterations and", groups, "groups.\n")
+ if (verbose) cat("Detected", iterations, "iterations and", groups,
+ "groups.\n")
groupNumber = which(names(sienaFitObject$sims[[1]]) == groupName)
if (is.na(groupNumber)) {
stop("Invalid group name.")
}
- if (verbose) cat("Group", groupName, "corresponds to index",groupNumber,".\n")
- varNumber = which(names(sienaFitObject$sims[[1]][[groupNumber]]) == varName)
- if (varNumber < 1 | varNumber > length(sienaFitObject$sims[[1]][[groupNumber]])) {
+ if (verbose) cat("Group", groupName, "corresponds to index",
+ groupNumber,".\n")
+ varNumber = which(names(sienaFitObject$sims[[1]][[groupNumber]]) ==
+ varName)
+ if (varNumber < 1 | varNumber >
+ length(sienaFitObject$sims[[1]][[groupNumber]])) {
stop("Invalid variable number -- out of bounds.")
}
- if (verbose) cat("Variable", varName, "corresponds to index",varNumber,".\n")
- if (min(wave) < 1 | max(wave) > length(sienaFitObject$sims[[1]][[groupNumber]][[varNumber]])){
+ if (verbose) cat("Variable", varName, "corresponds to index",
+ varNumber,".\n")
+ if (is.null(wave)) {
+ wave = 1:length(sienaFitObject$sims[[1]][[groupNumber]][[varNumber]])
+ }
+ if (min(wave) < 1 | max(wave) >
+ length(sienaFitObject$sims[[1]][[groupNumber]][[varNumber]])){
stop("Invalid wave index -- out of bounds")
}
if (verbose) cat("Calculating auxiliary statistics for waves",wave,".\n")
- ret <- lapply(wave, function (j) {
- tmp <- sapply(1:iterations, function (i) { auxiliaryFunction(sienaFitObject$sims[[i]][[groupNumber]][[varNumber]][[j]])})
- dimnames(tmp)[[2]] <- 1:iterations
- t(tmp)
- })
- names(ret) <- paste("Wave",wave)
+ if (join) {
+ ttc <- system.time(tmp <- lapply(wave, function (j) {
+ tmp <- sapply(1:iterations, function (i)
+ { auxiliaryFunction(sienaFitObject$
+ sims[[i]][[groupNumber]][[varNumber]][[j]])})
+ dimnames(tmp)[[2]] <- 1:iterations
+ t(tmp)
+ }))
+ ret <- tmp[[1]]
+ if (length(tmp)>1) {
+ for (i in 2:length(tmp)) {
+ ret = ret + tmp[[i]]
+ }
+ }
+ ret <- list(Joint=ret)
+ } else {
+ ttc <- system.time(ret <- lapply(wave, function (j) {
+ tmp <- sapply(1:iterations, function (i) {
+ auxiliaryFunction(sienaFitObject$
+ sims[[i]][[groupNumber]][[varNumber]][[j]])})
+ dimnames(tmp)[[2]] <- 1:iterations
+ t(tmp)
+ }))
+ names(ret) <- paste("Wave",wave)
+ }
class(ret) <- "simulatedAuxiliaryStatistics"
attr(ret,"auxiliaryStatisticName") <- deparse(substitute(auxiliaryFunction))
+ attr(ret,"joint") = join
+ attr(ret,"time") = ttc
ret
}
## Sienafit object, returns a list of auxiliary statistics from the given function
evaluateAuxiliaryStatistics.observed <- function(sienaDataObject,
- varName, wave, auxiliaryFunction, verbose=FALSE) {
+ varName, auxiliaryFunction, wave=NULL, verbose=FALSE, join=TRUE) {
varNumber = which(names(sienaDataObject$depvars) == varName)
if (varNumber < 1 | varNumber > length(sienaDataObject$depvars)) {
stop("Invalid variable number -- out of bounds.")
}
- if (verbose) cat("Variable", varName, "corresponds to index",varNumber,".\n")
- if (min(wave) < 1 | max(wave) > dim(sienaDataObject$depvars[[varNumber]])[3] - 1){
+ if (verbose) cat("Variable", varName,
+ "corresponds to index",varNumber,".\n")
+ if (is.null(wave) ) {
+ wave = 1:(dim(sienaDataObject$depvars[[varNumber]])[3] - 1)
+ }
+ if (min(wave) < 1 | max(wave) >
+ dim(sienaDataObject$depvars[[varNumber]])[3] - 1){
stop("Invalid wave index -- out of bounds")
}
if (verbose) cat("Calculating auxiliary statistics for waves",wave,".\n")
- ret <- lapply(wave, function (j) {
- auxiliaryFunction(sienaDataObject$depvars[[varNumber]][,,j+1])
- })
- names(ret) <- paste("Wave",wave)
+ if (join) {
+ tmp <- lapply(wave, function (j) {
+ auxiliaryFunction(sienaDataObject$
+ depvars[[varNumber]][,,j+1])
+ })
+ ret <- tmp[[1]]
+ if (length(tmp)>1) {
+ for (i in 2:length(tmp)) {
+ ret = ret + tmp[[i]]
+ }
+ }
+ ret <- list(Joint=ret)
+ } else {
+ ret <- lapply(wave, function (j) {
+ auxiliaryFunction(sienaDataObject$
+ depvars[[varNumber]][,,j+1])
+ })
+ names(ret) <- paste("Wave",wave)
+ }
class(ret) <- "observedAuxiliaryStatistics"
- attr(ret,"auxiliaryStatisticName") <- deparse(substitute(auxiliaryFunction))
+ attr(ret,"auxiliaryStatisticName") <-
+ deparse(substitute(auxiliaryFunction))
+ attr(ret,"joint") = join
ret
}
+
+sienaGOFPreprocess <- function(simulated, expectationFunction=mean) {
+ require(MASS)
+ if (class(simulated) != "matrix") {
+ stop("Invalid input.")
+ }
+ a <- cov(simulated)
+ # How do we make this error get thrown
+ #up the call stack? It will look much
+ # nicer if we can get that to work
+ # tryCatch(
+ # ainv <- solve(a)
+ #, error =
+ # function(e)simpleError(paste("Your testing functions are producing",
+ # "collinear statistics which cannot be handled by the",
+ # "Mahalanobis distance. Try reducing the number of statistics",
+ # "returned by your testing function (e.g. Geodesic distances 1:10",
+ # "to 1:5 or so).")))
+ # Using generalized inverse instead of regular inverse so
+ # we don't have to worry
+ # about collinearity
+ ainv <- ginv(a)
+ simulations=nrow(simulated)
+ variates=ncol(simulated)
+ expectation = apply(simulated, 2, expectationFunction);
+ centeredSimulations <- t(sapply(1:simulations,
+ function(i) simulated[i,] - expectation))
+ if (variates==1) {
+ centeredSimulations <- t(centeredSimulations)
+ }
+ ttc <- system.time(mahalanobisDistances <-
+ sapply(1:simulations, function(i)
+ centeredSimulations[i,] %*%
+ ainv %*% centeredSimulations[i,]))
+ ret <- list(
+ Iterations=dim(simulated)[1],
+ Variates=dim(simulated)[2],
+ Original=simulated,
+ Covariance=a, InverseCovariance=ainv,
+ Expectation=expectation,
+ CenteredSimulations=centeredSimulations,
+ MHDistances = sort(mahalanobisDistances),
+ ExpectationFunction=expectationFunction,
+ ComputeTime=ttc)
+ class(ret) <- "sienaGOFPreprocess"
+ ret
+}
+
+sienaGOFProcess <- function(observed, simulated, twoTailed=FALSE) {
+ if (class(observed) == "numeric") {
+ observed <- matrix(observed,nrow=1)
+ }
+ if (class(observed) != "matrix" |
+ class(simulated) != "sienaGOFPreprocess") {
+ stop("simulated must be a sienaGOFPreprocess object.")
+ }
+ if (ncol(observed) != simulated$Variates) {
+ stop("Dimensionality of function parameters do not match.")
+ }
+ observations = nrow(observed)
+ mhd <- sapply(1:observations, function(i)
+ t(observed[i,] - simulated$Expectation) %*%
+ simulated$InverseCovariance %*%
+ (observed[i,] - simulated$Expectation) )
+ if (twoTailed) {
+ p.mhd <- sapply(1:observations, function (i)
+ 1 - abs(1 - 2 * sum(mhd[i] <=
+ simulated$MHDistances)/simulated$Iterations) )
+ } else {
+ p.mhd <- sapply(1:observations, function (i)
+ sum(mhd[i] <= simulated$MHDistances)
+ /simulated$Iterations)
+ }
+ return = list(
+ Observation=observed,
+ Preprocess=simulated,
+ MHDistance=mhd,
+ p.MHD = p.mhd,
+ TwoTailed = twoTailed)
+ class(return) <- "sienaGOFProcess"
+ return
+}
+
sienaGOF <- function(obsList, simList) {
- require(snopgof)
if ((class(obsList) != "observedAuxiliaryStatistics") |
(class(simList) != "simulatedAuxiliaryStatistics") |
(length(obsList) != length(simList))
- | (attr(obsList,"auxiliaryStatisticName") != attr(simList,"auxiliaryStatisticName"))) {
+ | (attr(obsList,"auxiliaryStatisticName")
+ != attr(simList,"auxiliaryStatisticName"))) {
stop("Arguments invalid")
}
- pre <- lapply(1:length(simList), function (i) gof.preprocess(simList[[i]]))
- # We could try inverse covariance for weighting or something here. Just identity weighting for now.
- #weight <- lapply(1:length(simList), function (i) solve(cov(simList[[i]])))
- #res <- lapply(1:length(simList), function (i) gof(obsList[[i]], pre[[i]], weight[[i]]))
- res <- lapply(1:length(simList), function (i) gof(obsList[[i]], pre[[i]]) )
- ret <- list(ObservedStatistics=obsList, SimulatedStatistics=simList, Results=res)
+
+ pre <- lapply(1:length(simList), function (i)
+ sienaGOFPreprocess(simList[[i]]))
+ res <- lapply(1:length(simList), function (i)
+ sienaGOFProcess(obsList[[i]], pre[[i]]) )
+
+ ret <- list(ObservedStatistics=obsList, SimulatedStatistics=simList,
+ Results=res)
class(ret) <- "sienaGOF"
- attr(ret,"auxiliaryStatisticName") <- attr(obsList,"auxiliaryStatisticName")
+ attr(ret,"auxiliaryStatisticName") <-
+ attr(obsList,"auxiliaryStatisticName")
ret
}
print.sienaGOF <- function (x, ...) {
- cat("Siena Goodness of Fit (", attr(x,"auxiliaryStatisticName") ,")\n > P-values by wave:\n")
- pVals = sapply(1:length(x$Results), function(i) x$Results[[i]]$p)
- for (i in 1:length(pVals)) {
- cat(" > Wave ", i, ": ", pVals[i], "\n")
+ pVals = sapply(1:length(x$Results), function(i)
+ x$Results[[i]]$p.MHD)
+ ctimes = sapply(1:length(x$Results), function(i)
+ as.numeric(x$Results[[i]]$Preprocess$ComputeTime["elapsed"]))
+ cat("Siena Goodness of Fit (",
+ attr(x,"auxiliaryStatisticName") ,")\n=====\n")
+ if (! attr(x$SimulatedStatistics,"joint")) {
+ cat(" > Monte Carlo Mahalanobis distance test P-values:\n")
+ for (i in 1:length(pVals)) {
+ cat(" > Wave ", i, ": ", pVals[i], "\n")
+ }
+ } else {
+ cat("Monte Carlo Mahalanobis distance test P-value: ",pVals[1], "\n")
}
+ if (x$Results[[1]]$TwoTailed) {
+ cat("-----\nTwo tailed test used.")
+ } else {
+ cat("-----\nOne tailed test used ",
+ "(i.e. area under curve for greater distance than observation).")
+ }
+ cat("\nBased on ", x$Results[[1]]$Preprocess$Iterations,
+ " simulated draws from the null distribution.")
+ cat("\nComputation time for auxiliary statistic calculations: ",
+ attr(x$SimulatedStatistics,"time")["elapsed"] , "seconds.")
+ cat("\nComputation time for distance/test statistic calculations: ",
+ sum(ctimes), "seconds.\n")
}
-plot.sienaGOF <- function (x, wave=1, ...) {
- plot(x$Results[[wave]], main=paste("Goodness of Fit of ", attr(x, "auxiliaryStatisticName")),
- xlab="Variate Index", ylab="Variate Value", ...)
+plot.sienaGOF <- function (x, standardize=3, violin=TRUE,
+ ylim=NULL, xlim=NULL,
+ xlab=NULL, ylab=NULL, key=NULL, perc=.05, wave=1, main=NULL, ...) {
+ if (is.null(main)) {
+ main=paste("Goodness of Fit of ",
+ attr(x$Observed,"auxiliaryStatisticName"))
+ }
+ if (!attr(x$Observed,"joint")){
+ main = paste(main, "Wave", wave)
+ }
+ x <- x$Results[[wave]]
+ itns <- x$Preprocess$Iterations
+ vars <- x$Preprocess$Variates
+ sims <- x$Preprocess$Original
+ obs <- x$Observation
+ n.obs <- nrow(obs)
+
+ if (standardize==3) {
+ sims.median <- apply(sims, 2, median)
+ sims.min <- apply(sims, 2, min)
+ sims.max <- apply(sims, 2, max)
+ sims <- sapply(1:ncol(sims), function(i)
+ (sims[,i] - sims.median[i])/(sims.max[i] -
+ sims.min[i] ) )
+ obs <- matrix(sapply(1:ncol(sims), function(i)
+ (obs[,i] - sims.median[i])/(sims.max[i] -
+ sims.min[i] ) ), nrow=n.obs )
+ } else if (standardize==2) {
+ sims.min <- apply(sims, 2, min)
+ sims.max <- apply(sims, 2, max)
+ sims <- sapply(1:ncol(sims), function(i) (sims[,i] -
+ sims.min[i])/(sims.max[i] - sims.min[i] ) )
+ obs <- matrix(sapply(1:ncol(sims), function(i) (obs[,i] -
+ sims.min[i])/(sims.max[i] - sims.min[i] )
+ ), nrow=n.obs )
+ } else if (standardize==1) {
+ sims.mean <- apply(sims, 2, mean)
+ sims <- sapply(1:ncol(sims), function(i)
+ (sims[,i] - sims.mean[i]) )
+ obs <- matrix(sapply(1:ncol(sims), function(i)
+ (obs[,i] - sims.mean[i]) ), nrow=n.obs )
+ }
+
+ if (is.null(ylim)) {
+ ylim = c(min(obs, sims), max(obs, sims))
+ }
+ if (is.null(xlim)) {
+ xlim = c(0, ncol(obs)+1)
+ }
+ if (is.null(xlab)) {
+ xlab= paste( paste("p:", round(x$p, 3),
+ collapse = " "), collapse = "\n")
+ }
+ if (is.null(ylab)) {
+ ylab = "Statistic Values"
+ }
+ xAxis <- (1:vars)
+
+ plot(obs[1,]~xAxis, col="white", type="p",
+ ylim=ylim, xlim=xlim, main=main,
+ xlab=xlab, ylab=ylab, axes=FALSE, ...)
+ if (!is.null(key)) {
+ if (length(key) != ncol(obs)) {
+ stop("Key length does not match the number of variates.")
+ }
+ axis(1, at=xAxis, lab=key)
+ } else {
+ axis(1, at=xAxis, lab=paste("v", xAxis, sep=""))
+ }
+
+ ind.lower = round(itns * perc/2)
+ ind.upper = round(itns * (1-perc/2))
+ yperc.lower = sapply(1:ncol(sims), function(i)
+ sort(sims[,i])[ind.lower] )
+ yperc.upper = sapply(1:ncol(sims), function(i)
+ sort(sims[,i])[ind.upper] )
+ lines(yperc.lower~xAxis, lty=3, col = "gray", lwd=3)
+ lines(yperc.upper~xAxis, lty=3, col = "gray", lwd=3)
+
+ if (violin) {
+ require(vioplot)
+ for (i in 1:ncol(sims)) {
+ vioplot(sims[,i], at=xAxis[i],
+ add=TRUE, col="gray", wex=.75, ...)
+ }
+ } else {
+ boxplot(as.numeric(sim)~rep(1:vars, each=itns), add=TRUE, ...)
+ }
+ for(i in 1:nrow(obs)) {
+ lines(obs[i,]~xAxis, col="red", type="l", lwd=1, ...)
+ lines(obs[i,]~xAxis, col="red", type="p", lwd=3, pch=19, ...)
+ text(xAxis, obs[i,], labels=round(x$Observation[i,],3), pos=4)
+ }
}
\ No newline at end of file
Modified: pkg/RSienaTest/R/sienaTimeTest.r
===================================================================
--- pkg/RSienaTest/R/sienaTimeTest.r 2011-02-21 00:54:43 UTC (rev 136)
+++ pkg/RSienaTest/R/sienaTimeTest.r 2011-02-22 01:30:23 UTC (rev 137)
@@ -294,7 +294,8 @@
sqrt(diag(sienaFit$covtheta))[estimatedInFit],
ToTest=toTest,
ScreenedEffects=which(!use),
- WaveNumbers=waveNumbers
+ WaveNumbers=waveNumbers,
+ IndividualTestsOrthogonalized=condition
)
class(returnObj) <- "sienaTimeTest"
returnObj
@@ -323,11 +324,14 @@
cat("\nParameter-wise joint significance tests (i.e. each
parameter across all dummies):\n")
print(x$EffectTest)
- if (x$Waves <=2)
+ if (x$Waves <=2 && ! x$IndividualTestsOrthogonalized)
{
cat("\n\nNote that these parameter-wise tests have a different
form than the individual tests, thus testing with 3 observations
may yield different individual and parameter-wise values.\n\n")
+ } else if (x$IndividualTestsOrthogonalized) {
+ cat("\nNote that the individual test statistics were orthogonalized",
+ " with respect to each other (condition=TRUE).")
}
tmp <- paste(" (", 1:length(x$BaseRowInD), ") ",
rownames(x$IndividualTest)[x$BaseRowInD], "\n", sep="")
@@ -499,7 +503,8 @@
tmprows$type %in% effects$type[i], ]
tmprows$fix <- fix
tmprows$include <- include
- tmprows$effectNumber <- max(newEffects$effectNumber) + (1:nrow(tmprows))
+ tmprows$effectNumber <- max(newEffects$effectNumber) +
+ (1:nrow(tmprows))
tmprows$timeDummy <- timeDummy
rownames(tmprows) <- paste(newname, effects$type[i], sep=".")
newEffects <- rbind(newEffects, tmprows)
@@ -541,7 +546,7 @@
effects$timeDummy <- ","
}
- # Josh tested these covariate effects, they work as-is for sienaTimeFix.
+ # JAL: tested these covariate effects, they work as-is for sienaTimeFix.
# covar <- effects$interaction1 != ""
# if (any(effects$timeDummy[covar] != ","))
# {
@@ -555,17 +560,21 @@
# " for network effects of type eval or for RateX.")
# effects$timeDummy[!implemented] <- ","
# }
- structuralRate <- effects$type == "rate" & effects$rateType %in% "structural"
+ structuralRate <- effects$type == "rate" & effects$rateType %in%
+ "structural"
if (any(effects$timeDummy[structuralRate] != ","))
{
warning("Time dummy effects are not implemented",
" for structural rate effects.")
effects$timeDummy[structuralRate] <- ","
}
- behaviorNonRateX <- effects$netType =="behavior" & effects$type != "rate"
+ # JAL: Implementing these 20-FEB-2011 in RSeinaTest
+ # TODO: Behavioral interactions need to be implemented for these to work.
+ # Once they are, we can comment lines 575-577 and 717-720 out.
+ behaviorNonRateX <- effects$netType =="behavior" & effects$type != "rate"
if (any(effects$timeDummy[behaviorNonRateX] != ","))
{
- warning("Time dummy effects are not implemented",
+ warning("Time dummy effects are not implemented",
" for behavior effects of type eval or endow.")
effects$timeDummy[behaviorNonRateX] <- ","
}
@@ -602,7 +611,11 @@
timesd <- lapply(timesd, function(x)as.numeric(x[x %in% periodNos]))
dummiedEffects <- sapply(timesd, function(x)length(x) > 0)
-
+
+ baseType <- list(number=which(dummiedEffects),
+ type=effects$netType[dummiedEffects],
+ name=effects$name[dummiedEffects])
+
rateXDummies <- effects$shortName == "RateX" & dummiedEffects
newEffects <- effects
@@ -742,29 +755,62 @@
}
}
}
-
- ##find the density rows for this depvar
- i <- which(effects$name == depvar &
- effects$shortName=="density" &
- effects$type %in% types)
-
- if (length(i) == 0)
- {
- stop("Cannot find density effect(s) for ", depvar,
- " ", types)
- }
-
- ## establish whether we want the egoXs included, fixed or not
- fix <- sapply(timesd[i], function(x)!p %in% x)
- typesNames <- paste(depvar, effects$type[i], sep=".")
- includeTypes <- !sapply(timesTypelist[typesNames], is.null)
-
- ## add one or more effects (depending on types requested)
- newEffects <- addEffect(newEffects, i, dname,
- "covarNonSymmetricObjective", "egoX",
- paste('isDummy', p,
- effects$effectNumber[i], sep=','),
- fix=fix, include=includeTypes)
+ # This is where the behaviors are treated differently
+ if (baseType$type[baseType$name==depvar][1] == "behavior") {
+ ##find the linear effect rows for this depvar
+ i <- which(effects$name == depvar &
+ effects$shortName=="linear" &
+ effects$type %in% types)
+
+ if (length(i) == 0)
+ {
+ stop("Cannot find 'effect from' effect(s) for ", depvar,
+ " ", types)
+ }
+
+ ## establish whether we want the effFroms
+ ## included, fixed or not
+ fix <- sapply(timesd[i], function(x)!p %in% x)
+ typesNames <- paste(depvar, effects$type[i], sep=".")
+ includeTypes <- !sapply(timesTypelist[typesNames], is.null)
+
+ ## add one or more effects (depending on types requested)
+ newEffects <- addEffect(newEffects, i, dname,
+ "covarBehaviorObjective", "effFrom",
+ paste('isDummy', p,
+ effects$effectNumber[i], sep=','),
+ fix=fix, include=includeTypes)
+ newEffects[nrow(newEffects),"effectName"] <-
+ gsub("yyyyyy", dname,
+ newEffects[nrow(newEffects),"effectName"])
+ newEffects[nrow(newEffects),"functionName"] <-
+ gsub("yyyyyy", dname,
+ newEffects[nrow(newEffects),"functionName"])
+ newEffects[nrow(newEffects),"interaction1"] <- dname
+ } else {
+ ##find the density rows for this depvar
+ i <- which(effects$name == depvar &
+ effects$shortName=="density" &
+ effects$type %in% types)
+
+ if (length(i) == 0)
+ {
+ stop("Cannot find density effect(s) for ", depvar,
+ " ", types)
+ }
+
+ ## establish whether we want the egoXs included, fixed or not
+ fix <- sapply(timesd[i], function(x)!p %in% x)
+ typesNames <- paste(depvar, effects$type[i], sep=".")
+ includeTypes <- !sapply(timesTypelist[typesNames], is.null)
+
+ ## add one or more effects (depending on types requested)
+ newEffects <- addEffect(newEffects, i, dname,
+ "covarNonSymmetricObjective", "egoX",
+ paste('isDummy', p,
+ effects$effectNumber[i], sep=','),
+ fix=fix, include=includeTypes)
+ }
}
## now the interactions for any dummied effects for this
@@ -778,15 +824,27 @@
{
dname <- paste("Dummy", p, ":", depvar, sep="")
effect <- effects[j, ]
- newEffects <-
- includeInteraction(newEffects,
- effect$shortName, "egoX",
- character=TRUE,
- type=effect$type,
- interaction1= c(effect$interaction1,
- dname),
- interaction2=effect$interaction2,
- name=depvar, verbose=FALSE)
+ if (baseType$type[baseType$name==depvar][1] == "behavior") {
+ newEffects <-
+ includeInteraction(newEffects,
+ effect$shortName, "effFrom",
+ character=TRUE,
+ type=effect$type,
+ interaction1= c(effect$interaction1,
+ dname),
+ interaction2=effect$interaction2,
+ name=depvar, verbose=FALSE)
+ } else {
+ newEffects <-
+ includeInteraction(newEffects,
+ effect$shortName, "egoX",
+ character=TRUE,
+ type=effect$type,
+ interaction1= c(effect$interaction1,
+ dname),
+ interaction2=effect$interaction2,
+ name=depvar, verbose=FALSE)
+ }
## find the row altered
newrow <- newEffects$effect1 == j &
newEffects$effect2 ==
Modified: pkg/RSienaTest/changeLog
===================================================================
--- pkg/RSienaTest/changeLog 2011-02-21 00:54:43 UTC (rev 136)
+++ pkg/RSienaTest/changeLog 2011-02-22 01:30:23 UTC (rev 137)
@@ -1,3 +1,20 @@
+2011-02-22 R-forge revision 137
+
+ * Changes to RSienaTest Only
+ * DESCRIPTION: removed dependency on snopgof (for now!) and add
+ -ed dependency for MASS, sna, vioplot for sienaGOF.
+ * R/sienaTimeTest.r: added functionality for behavioral dummies
+ but they will not work until behavioral interaction effects
+ are supported.
+ * R/sienaGOF.r: removed dependency on snopgof (runs standalone)
+ and a number of smaller features like being able to run a
+ joint test across all waves or break them out, having control
+ over the centrality measure (mean, median, etc.), and not
+ requiring that the auxiliary statistics are non-collinear
+ (this is where MASS's ginv comes in). I also moved the
+ relevant functionality from snopgof directly into the code.
+ * man/sienaGOF.Rd: Changes to reflect the above
+
2011-02-21 R-forge revision 136
* R/initializeFRAN.r: fix bug in bipartite networks: diagonals
Modified: pkg/RSienaTest/doc/s_man400.tex
===================================================================
--- pkg/RSienaTest/doc/s_man400.tex 2011-02-21 00:54:43 UTC (rev 136)
+++ pkg/RSienaTest/doc/s_man400.tex 2011-02-22 01:30:23 UTC (rev 137)
@@ -8285,6 +8285,8 @@
(Programmers should consult the changeLog file on CRAN or in the R-forge
repository.)
\begin{itemize}
+\item 2011-02-22 R-forge revision 137: (RSienaTest only)
+Additional work on the goodness of fit functionality (see ?sienaGOF)
\item 2011-02-21 R-forge revision 136:
\begin{itemize}
\item Fixed bug in bipartite network processing. Diagonal (up to number of
Modified: pkg/RSienaTest/man/sienaGOF.Rd
===================================================================
--- pkg/RSienaTest/man/sienaGOF.Rd 2011-02-21 00:54:43 UTC (rev 136)
+++ pkg/RSienaTest/man/sienaGOF.Rd 2011-02-22 01:30:23 UTC (rev 137)
@@ -5,20 +5,25 @@
\alias{evaluateAuxiliaryStatistics.observed}
\alias{print.sienaGOF}
\alias{plot.sienaGOF}
+\alias{sienaGOFPreprocess}
+\alias{sienaGOFProcess}
+
\title{Functions to assess goodness of fit for SAOMs}
\description{
A series of functions which start with a \code{sienaFit} object, a
\code{siena} data object, and a function for calculating auxiliary
- statistics on graphs.
- This function uses the \code{snopgof} package to evaluate goodness
- of fit from the resulting vectors of statistics.
+ statistics on graphs. A Monte Carlo test based on the Mahalanobis
+ distance based test is used to calculate frequentist p-values and
+ plotting functions can be used to diagnose bad fit.
}
\usage{
sienaGOF(obsList, simList)
-evaluateAuxiliaryStatistics.observed(sienaDataObject, varName, wave, auxiliaryFunction, verbose=FALSE)
-evaluateAuxiliaryStatistics.simulated(sienaFitObject, groupName, varName, wave, auxiliaryFunction, verbose=FALSE)
-# Alias for the above two functions
+evaluateAuxiliaryStatistics.observed(sienaDataObject, varName, auxiliaryFunction, wave=NULL,
+verbose=FALSE, join=TRUE)
+evaluateAuxiliaryStatistics.simulated(sienaFitObject, groupName, varName,
+auxiliaryFunction, wave=NULL, verbose=FALSE, join=TRUE)
+# Alias for the above two functions, for user convenience
evaluateAuxiliaryStatistics(sienaObject, ...)
}
\arguments{
@@ -30,58 +35,83 @@
\item{sienaFitObject}{Results from a call to \code{siena07}.}
\item{groupName}{The name of the sienaGroup to be used.}
\item{varName}{The value of the variable to be used, whether a network, behavior, etc.}
- \item{wave}{Wave(s) to be used for calculating the statistics.}
+ \item{join}{Boolean: should sienaGOF do tests on all of the waves individually, or sum across waves?}
+ \item{wave}{Wave(s) to be used for calculating the statistics. Ignored if join=TRUE}
\item{auxiliaryFunction}{Function to be used to calculate statistics, e.g. from the \code{sna} package.}
\item{verbose}{Whether to print intermediate results. Calculations can take some time, and user feedback may be useful.}
\item{...}{Other arguments to be used by the appropriate subfunction which will be called by the alias.}
}
\details{
- This function is currently under construction. Contact the author for working papers on how it works.
- In general, it tests goodness of fit of statistics that should not be explicitly fit. If the p value is
- below the desired false positive rate, there is sufficient evidence to reject the goodness of fit of the model
- with respect to the implied statistic.
+ This function is used to assess the goodness of fit of a stochastic actor oriented model
+ for an arbitrarily defined auxiliary statistic. These statistics should be chosen to
+ represent features of the network that are not explicitly fit but can be considered
+ important properties that the model at hand should represent well. Some examples are:
+
+ -Geodesic distances
+
+ -Triad census
+
+ -Outdegree distribution
+
+ -Indegree distribution
+
+ -Behavioral distribution
+
+ -Edgewise homophily counts
+
+ -Edgewise shared partners
+
+The function is written so that the user can easily define a function to capture some
+other relevant aspects of the network, behaviors, etc.
+
+We recommend the following heuristic approach to fitting your model:
+
+1. Check convergence of your estimation.
+
+2. Assess time heterogeneity and either modify the base effects or include time dummy terms.
+
+3. Assess goodness of fit (i.e. using join=TRUE) on auxiliary statistics, and refine the model.
}
\value{
\code{sienaGOF} Returns a list:
\item{ObservedStatistics }{ The values of the calculated statistics for the observations. }
- \item{SimulatedStatistics }{ A preprocess object (see \code{snopgof} package) calculated on the statistics for the simulations. }
- \item{Results }{ The p values of the \code{snopgof} test. }
+ \item{SimulatedStatistics }{ A preprocess object calculated on the statistics for the simulations. }
+ \item{Results }{ A list including, among other things, p values of the Monte Carlo Mahalanobis distance test. }
}
\references{See \url{http://www.stats.ox.ac.uk/~snijders/siena/}
- for general information on RSiena.
+ for general information on RSiena and \url{http://www.stats.ox.ac.uk/~lospinos/}
+ for information on the following presentation and working paper:
+
+Lospinoso, J.A. and Snijders, T.A.B, "Goodness of fit for
+Stochastic Actor Oriented Models." Presentation given at Sunbelt XXXI, St. Pete's Beach, Fl. 2011.
-Lospinoso, J.A. and Snijders, T.A.B, "A Non-parametric Test for
-Goodness of Fit." Working Paper
+Lospinoso, J.A. and Snijders, T.A.B, "Goodness of fit for
+Stochastic Actor Oriented Models." Working Paper.
}
\author{Josh Lospinoso}
-\seealso{\code{\link{siena07}}, \code{\link{sienaTimeTest}},
- \code{snopgof}}
+\seealso{\code{\link{siena07}}, \code{\link{sienaTimeTest}} }
\examples{
-\dontrun{
# Fifty iterations being used, but we recommend using many more (perhaps 1000 or 1500)
mymodel <- sienaModelCreate(fn=simstats0c, nsub=4, n3=50)
mynet1 <- sienaNet(array(c(s501, s502, s503), dim=c(50, 50, 3)))
mydata <- sienaDataCreate(mynet1)
myeff <- getEffects(mydata)
myeff <- includeEffects(myeff, transTrip, balance)
-ans <- siena07(mymodel, data=mydata, effects=myeff, returnDeps=T, batch=TRUE)
+ans <- siena07(mymodel, data=mydata, effects=myeff, returnDeps=TRUE, batch=TRUE)
-library(sna)
-# install.packages("sna")
-library(network)
-# install.packages("network")
+require(sna)
+require(network)
triadCensus <- function (x) {
triad.census(network(x))[-1]
}
-sim <- evaluateAuxiliaryStatistics(ans, "Data1", "mynet1", 1:2, triadCensus, verbose=T)
-obs <- evaluateAuxiliaryStatistics(mydata, "mynet1", 1:2, triadCensus, verbose=T)
+sim <- evaluateAuxiliaryStatistics(ans, "Data1", "mynet1", triadCensus)
+obs <- evaluateAuxiliaryStatistics(mydata, "mynet1", triadCensus)
(res <- sienaGOF(obs, sim))
# And plots if desired
-plot(res, wave=1)
-plot(res, wave=2)
+triadCensus.names <- c("012","102","021D","021U","021C","111D","111U","030T","030C","201","120D","120U","120C","210","300")
+plot(res, key=triadCensus.names)
}
-}
\keyword{models}
More information about the Rsiena-commits
mailing list