[Rsiena-commits] r140 - in pkg: RSiena RSiena/man RSienaTest RSienaTest/R RSienaTest/doc RSienaTest/man
noreply at r-forge.r-project.org
noreply at r-forge.r-project.org
Fri Feb 25 00:34:11 CET 2011
Author: jalospinoso
Date: 2011-02-25 00:34:08 +0100 (Fri, 25 Feb 2011)
New Revision: 140
Modified:
pkg/RSiena/DESCRIPTION
pkg/RSiena/changeLog
pkg/RSiena/man/RSiena-package.Rd
pkg/RSienaTest/DESCRIPTION
pkg/RSienaTest/NAMESPACE
pkg/RSienaTest/R/sienaGOF.r
pkg/RSienaTest/changeLog
pkg/RSienaTest/doc/s_man400.tex
pkg/RSienaTest/man/RSiena-package.Rd
pkg/RSienaTest/man/sienaGOF.Rd
Log:
Various additions to sienaGOF.
Modified: pkg/RSiena/DESCRIPTION
===================================================================
--- pkg/RSiena/DESCRIPTION 2011-02-23 23:48:16 UTC (rev 139)
+++ pkg/RSiena/DESCRIPTION 2011-02-24 23:34:08 UTC (rev 140)
@@ -1,8 +1,8 @@
Package: RSiena
Type: Package
Title: Siena - Simulation Investigation for Empirical Network Analysis
-Version: 1.0.12.139
-Date: 2011-02-23
+Version: 1.0.12.140
+Date: 2011-02-24
Author: Various
Depends: R (>= 2.9.0), xtable
Imports: Matrix
Modified: pkg/RSiena/changeLog
===================================================================
--- pkg/RSiena/changeLog 2011-02-23 23:48:16 UTC (rev 139)
+++ pkg/RSiena/changeLog 2011-02-24 23:34:08 UTC (rev 140)
@@ -1,3 +1,10 @@
+2011-02-24 R-forge revision 140
+
+ * R/sienaGOF.r: Added cumulative tests, simulated image
+ function imageGOF, and cleaned up the code to reflect
+ RSiena coding standards. Added image plot to plot.sienaGOF.
+ * man/sienaGOF.Rd: Changes to reflect the above
+
2011-02-23 R-forge revision 139
* src/model/ml/MLsimulation.cpp,
Modified: pkg/RSiena/man/RSiena-package.Rd
===================================================================
--- pkg/RSiena/man/RSiena-package.Rd 2011-02-23 23:48:16 UTC (rev 139)
+++ pkg/RSiena/man/RSiena-package.Rd 2011-02-24 23:34:08 UTC (rev 140)
@@ -30,8 +30,8 @@
\tabular{ll}{
Package: \tab RSiena\cr
Type: \tab Package\cr
-Version: \tab 1.0.12.139cr
-Date: \tab 2011-02-23\cr
+Version: \tab 1.0.12.140cr
+Date: \tab 2011-02-24\cr
License: \tab GPL-2 \cr
LazyLoad: \tab yes\cr
}
Modified: pkg/RSienaTest/DESCRIPTION
===================================================================
--- pkg/RSienaTest/DESCRIPTION 2011-02-23 23:48:16 UTC (rev 139)
+++ pkg/RSienaTest/DESCRIPTION 2011-02-24 23:34:08 UTC (rev 140)
@@ -1,8 +1,8 @@
Package: RSienaTest
Type: Package
Title: Siena - Simulation Investigation for Empirical Network Analysis
-Version: 1.0.12.139
-Date: 2012-02-23
+Version: 1.0.12.140
+Date: 2012-02-24
Author: Various
Depends: R (>= 2.9.0), xtable
Imports: Matrix
Modified: pkg/RSienaTest/NAMESPACE
===================================================================
--- pkg/RSienaTest/NAMESPACE 2011-02-23 23:48:16 UTC (rev 139)
+++ pkg/RSienaTest/NAMESPACE 2011-02-24 23:34:08 UTC (rev 140)
@@ -5,10 +5,7 @@
sienaGroupCreate, sienaModelCreate, sienaNet, sienaNodeSet, #simstats0c,
varCovar, varDyadCovar, setEffect, includeEffects, includeInteraction,
effectsDocumentation, sienaDataConstraint, #maxlikefn,
- installGui, siena08, iwlsm, sienaTimeTest, includeTimeDummy,
- evaluateAuxiliaryStatistics.observed,
- evaluateAuxiliaryStatistics.simulated,
- evaluateAuxiliaryStatistics, sienaGOF)
+ installGui, siena08, iwlsm, sienaTimeTest, includeTimeDummy, sienaGOF)
import(Matrix)
import(xtable)
Modified: pkg/RSienaTest/R/sienaGOF.r
===================================================================
--- pkg/RSienaTest/R/sienaGOF.r 2011-02-23 23:48:16 UTC (rev 139)
+++ pkg/RSienaTest/R/sienaGOF.r 2011-02-24 23:34:08 UTC (rev 140)
@@ -1,287 +1,441 @@
-##/*****************************************************************************
-## * SIENA: Simulation Investigation for Empirical Network Analysis
-## *
-## * Web: http://www.stats.ox.ac.uk/~snidjers/siena
-## *
-## * File: gof.r
-## *
-## * Description: This file contains the code to assess goodness of fit
-## *
-## ****************************************************************************/
+## /*****************************************************************************
+## * SIENA: Simulation Investigation for Empirical Network Analysis
+## *
+## * Web: http://www.stats.ox.ac.uk/~snidjers/siena
+## *
+## * File: gof.r
+## *
+## * Description: This file contains the code to assess goodness of fit
+## *
+## ****************************************************************************/
-## Convenience function for evaluating statistics
-evaluateAuxiliaryStatistics <- function(sienaObject, ...) {
- if (class(sienaObject) == "sienaFit") {
- evaluateAuxiliaryStatistics.simulated(sienaObject, ...)
- } else if (class(sienaObject) == "siena") {
- evaluateAuxiliaryStatistics.observed(sienaObject, ...)
- }
-}
-
-## Sienafit object, returns a list of auxiliary
-## statistics from the given function
-evaluateAuxiliaryStatistics.simulated <- function(sienaFitObject,
- groupName, varName, auxiliaryFunction, wave=NULL,
- verbose=FALSE, join=TRUE) {
- if (! sienaFitObject$returnDeps) {
+sienaGOF <- function(sienaDataObject,
+ sienaFitObject, groupName, varName, auxiliaryFunction, wave=NULL,
+ verbose=FALSE, join=TRUE, expectationFunction=mean,
+ twoTailed=FALSE, cumulative=FALSE,...)
+ {
+ require(MASS)
+ ## require(Matrix)
+ ## Check input
+ if (! sienaFitObject$returnDeps)
+ {
stop("You must instruct siena07 to return the simulated networks")
}
iterations = length(sienaFitObject$sims)
- if (iterations < 1) {
+ if (iterations < 1)
+ {
stop("You need at least one iteration.")
}
groups = length(sienaFitObject$sims[[1]])
if (verbose) cat("Detected", iterations, "iterations and", groups,
"groups.\n")
groupNumber = which(names(sienaFitObject$sims[[1]]) == groupName)
- if (is.na(groupNumber)) {
+ if (is.na(groupNumber))
+ {
stop("Invalid group name.")
}
if (verbose) cat("Group", groupName, "corresponds to index",
groupNumber,".\n")
varNumber = which(names(sienaFitObject$sims[[1]][[groupNumber]]) ==
varName)
- if (varNumber < 1 | varNumber >
- length(sienaFitObject$sims[[1]][[groupNumber]])) {
+ if (varNumber < 1 | varNumber > length(sienaDataObject$depvars))
+ {
stop("Invalid variable number -- out of bounds.")
}
- if (verbose) cat("Variable", varName, "corresponds to index",
- varNumber,".\n")
- if (is.null(wave)) {
- wave = 1:length(sienaFitObject$sims[[1]][[groupNumber]][[varNumber]])
+ if (verbose)
+ {
+ cat("Variable", varName,
+ "corresponds to index",varNumber,".\n")
}
+ if (is.null(wave) )
+ {
+ wave = 1:(dim(sienaDataObject$depvars[[varNumber]])[3] - 1)
+ }
if (min(wave) < 1 | max(wave) >
- length(sienaFitObject$sims[[1]][[groupNumber]][[varNumber]])){
+ dim(sienaDataObject$depvars[[varNumber]])[3] - 1 |
+ max(wave) >
+ length(sienaFitObject$sims[[1]][[groupNumber]][[varNumber]])
+ )
+ {
stop("Invalid wave index -- out of bounds")
}
+ if (varNumber < 1 | varNumber >
+ length(sienaFitObject$sims[[1]][[groupNumber]]))
+ {
+ stop("Invalid variable number -- out of bounds.")
+ }
+ if (is.null(wave))
+ {
+ wave = 1:length(sienaFitObject$sims[[1]][[groupNumber]][[varNumber]])
+ }
+
+ ## Need to cast all of the observations and statistics into sparse matrices
+ observedSp <- lapply(wave, function(j) {
+ Matrix(sienaDataObject$depvars[[varNumber]][,, j+1],
+ sparse=TRUE)})
+ dimsOfDepVar <- dim(observedSp[[1]])
+ missingSp <- lapply(wave, function(j) {
+ Matrix(is.na(sienaDataObject$depvars[[varNumber]][,, j+1]),
+ sparse=TRUE)})
+ simsSp <- lapply(1:iterations, function(i){ lapply(wave, function(j)
+ {
+ sparseMatrix(sienaFitObject$
+ sims[[i]][[groupNumber]][[varNumber]][[j]][,1],
+ sienaFitObject$
+ sims[[i]][[groupNumber]][[varNumber]][[j]][,2],
+ x=1,dims=dimsOfDepVar ) })})
+
+ ## Now observation auxiliary statistics
+ if (join)
+ {
+ tmp <- lapply(wave, function (j)
+ {
+ auxiliaryFunction(observedSp[[j]], missingSp[[j]]) })
+ obsStats <- tmp[[1]]
+ if (length(tmp)>1) {
+ for (i in 2:length(tmp)) {
+ obsStats = obsStats + tmp[[i]]
+ }
+ }
+ ttcObservation <- system.time(obsStats <- list(Joint=obsStats))
+ } else {
+ ttcObservation <- system.time( obsStats <- lapply(wave, function (j) {
+ auxiliaryFunction(observedSp[[j]], missingSp[[j]]) }))
+ screen = lapply(wave, function (j) {
+ is.na(sienaDataObject$depvars[[varNumber]][,,j+1])})
+ names(obsStats) <- paste("Wave",wave)
+ }
+ class(obsStats) <- "observedAuxiliaryStatistics"
+ attr(obsStats,"auxiliaryStatisticName") <-
+ deparse(substitute(auxiliaryFunction))
+ attr(obsStats,"joint") = join
+ attr(obsStats,"time") = ttcObservation
+
+ ## Calculate the simulated auxiliary statistics
if (verbose) cat("Calculating auxiliary statistics for waves",wave,".\n")
- if (join) {
- ttc <- system.time(tmp <- lapply(wave, function (j) {
+ if (join)
+ {
+ ttcSimulation <- system.time(tmp <- lapply(wave, function (j) {
tmp <- sapply(1:iterations, function (i)
- { auxiliaryFunction(sienaFitObject$
- sims[[i]][[groupNumber]][[varNumber]][[j]])})
+ { auxiliaryFunction(simsSp[[i]][[j]], missingSp[[j]])})
dimnames(tmp)[[2]] <- 1:iterations
t(tmp)
}))
- ret <- tmp[[1]]
+ simStats <- tmp[[1]]
if (length(tmp)>1) {
for (i in 2:length(tmp)) {
- ret = ret + tmp[[i]]
+ simStats = simStats + tmp[[i]]
}
}
- ret <- list(Joint=ret)
+ simStats <- list(Joint=simStats)
} else {
- ttc <- system.time(ret <- lapply(wave, function (j) {
+ ttcSimulation <- system.time(simStats <- lapply(wave, function (j) {
tmp <- sapply(1:iterations, function (i) {
- auxiliaryFunction(sienaFitObject$
- sims[[i]][[groupNumber]][[varNumber]][[j]])})
+ auxiliaryFunction(simsSp[[i]][[j]], missingSp[[j]])
+ })
dimnames(tmp)[[2]] <- 1:iterations
t(tmp)
}))
- names(ret) <- paste("Wave",wave)
+ names(simStats) <- paste("Wave",wave)
}
- class(ret) <- "simulatedAuxiliaryStatistics"
- attr(ret,"auxiliaryStatisticName") <- deparse(substitute(auxiliaryFunction))
- attr(ret,"joint") = join
- attr(ret,"time") = ttc
- ret
-}
-
-## Sienafit object, returns a list of auxiliary statistics from the given function
-evaluateAuxiliaryStatistics.observed <- function(sienaDataObject,
- varName, auxiliaryFunction, wave=NULL, verbose=FALSE, join=TRUE) {
- varNumber = which(names(sienaDataObject$depvars) == varName)
- if (varNumber < 1 | varNumber > length(sienaDataObject$depvars)) {
- stop("Invalid variable number -- out of bounds.")
+ class(simStats) <- "simulatedAuxiliaryStatistics"
+ attr(simStats,"auxiliaryStatisticName") <-
+ deparse(substitute(auxiliaryFunction))
+ attr(simStats,"joint") = join
+ attr(simStats,"time") = ttcSimulation
+
+ applyTest <- function (observed, simulated)
+ {
+ if (class(simulated) != "matrix")
+ {
+ stop("Invalid input.")
+ }
+ if (class(observed) != "matrix")
+ {
+ observed <- matrix(observed,nrow=1)
+ }
+ if (class(observed) != "matrix")
+ {
+ stop("Observation must be a matrix.")
+ }
+ if (ncol(observed) != ncol(simulated))
+ {
+ stop("Dimensionality of function parameters do not match.")
+ }
+ observations = nrow(observed)
+ simulations=nrow(simulated)
+ variates=ncol(simulated)
+
+ a <- cov(simulated)
+ ainv <- ginv(a)
+ arank <- rankMatrix(a)
+ expectation = apply(simulated, 2, expectationFunction);
+ centeredSimulations <- t(sapply(1:simulations,
+ function(i) simulated[i,] - expectation))
+ if (variates==1)
+ {
+ centeredSimulations <- t(centeredSimulations)
+ }
+ simTestStat <- sapply(1:simulations, function(i)
+ centeredSimulations[i,] %*%
+ ainv %*% centeredSimulations[i,])
+ obsTestStat <- sapply(1:observations, function(i)
+ t(observed[i,] - expectation) %*%
+ ainv %*%
+ (observed[i,] - expectation))
+ if (twoTailed)
+ {
+ p <- sapply(1:observations, function (i)
+ 1 - abs(1 - 2 * sum(obsTestStat[i] <=
+ simTestStat)/length(simTestStat)) )
+ }
+ else
+ {
+ p <- sapply(1:observations, function (i)
+ sum(obsTestStat[i] <= simTestStat) /length(simTestStat))
+ }
+ ret <- list( p = p,
+ SimulatedTestStat=simTestStat,
+ ObservedTestStat=obsTestStat,
+ TwoTailed=twoTailed,
+ Simulations=simulated,
+ Observations=observed,
+ Rank=arank)
+ class(ret) <- "sienaGofTest"
+ attr(ret,"auxiliaryStatisticName") <-
+ attr(obsStats,"auxiliaryStatisticName")
+ ret
}
- if (verbose) cat("Variable", varName,
- "corresponds to index",varNumber,".\n")
- if (is.null(wave) ) {
- wave = 1:(dim(sienaDataObject$depvars[[varNumber]])[3] - 1)
- }
- if (min(wave) < 1 | max(wave) >
- dim(sienaDataObject$depvars[[varNumber]])[3] - 1){
- stop("Invalid wave index -- out of bounds")
- }
- if (verbose) cat("Calculating auxiliary statistics for waves",wave,".\n")
- if (join) {
- tmp <- lapply(wave, function (j) {
- auxiliaryFunction(sienaDataObject$
- depvars[[varNumber]][,,j+1])
- })
- ret <- tmp[[1]]
- if (length(tmp)>1) {
- for (i in 2:length(tmp)) {
- ret = ret + tmp[[i]]
- }
+ applyCumulativeTest <- function (observed, simulated)
+ {
+ if (class(simulated) != "matrix")
+ {
+ stop("Invalid input.")
}
- ret <- list(Joint=ret)
- } else {
- ret <- lapply(wave, function (j) {
- auxiliaryFunction(sienaDataObject$
- depvars[[varNumber]][,,j+1])
- })
- names(ret) <- paste("Wave",wave)
+ if (class(observed) != "matrix")
+ {
+ observed <- matrix(observed,nrow=1)
+ }
+ if (class(observed) != "matrix")
+ {
+ stop("Observation must be a matrix.")
+ }
+ if (ncol(observed) != ncol(simulated))
+ {
+ stop("Dimensionality of function parameters do not match.")
+ }
+ observations = nrow(observed)
+ simulations=nrow(simulated)
+ variates=ncol(simulated)
+ nonnormalizedCdf = apply(simulated,2,mean)
+
+ simTestStat <- sapply(1:simulations, function(i)
+ max(abs(simulated[i,] - nonnormalizedCdf)))
+ obsTestStat <- sapply(1:observations, function(i)
+ max(abs(observed[i,] - nonnormalizedCdf)))
+ if (twoTailed)
+ {
+ p <- sapply(1:observations, function (i)
+ 1 - abs(1 - 2 * sum(obsTestStat[i] <=
+ simTestStat)/length(simTestStat)) )
+ }
+ else
+ {
+ p <- sapply(1:observations, function (i)
+ sum(obsTestStat[i] <= simTestStat) /length(simTestStat))
+ }
+ ret <- list( p = p,
+ SimulatedTestStat=simTestStat,
+ ObservedTestStat=obsTestStat,
+ TwoTailed=twoTailed,
+ Simulations=simulated,
+ Observations=observed,
+ Rank=NULL)
+ class(ret) <- "sienaGofTest"
+ attr(ret,"auxiliaryStatisticName") <-
+ attr(obsStats,"auxiliaryStatisticName")
+ ret
}
- class(ret) <- "observedAuxiliaryStatistics"
- attr(ret,"auxiliaryStatisticName") <-
- deparse(substitute(auxiliaryFunction))
- attr(ret,"joint") = join
- ret
-}
-
-
-sienaGOFPreprocess <- function(simulated, expectationFunction=mean) {
- require(MASS)
- if (class(simulated) != "matrix") {
- stop("Invalid input.")
+ if (cumulative)
+ {
+ testTime <- system.time(res <- lapply(1:length(simStats), function (i)
+ applyCumulativeTest(obsStats[[i]], simStats[[i]]) ))
}
- a <- cov(simulated)
- # How do we make this error get thrown
- #up the call stack? It will look much
- # nicer if we can get that to work
- # tryCatch(
- # ainv <- solve(a)
- #, error =
- # function(e)simpleError(paste("Your testing functions are producing",
- # "collinear statistics which cannot be handled by the",
- # "Mahalanobis distance. Try reducing the number of statistics",
- # "returned by your testing function (e.g. Geodesic distances 1:10",
- # "to 1:5 or so).")))
- # Using generalized inverse instead of regular inverse so
- # we don't have to worry
- # about collinearity
- ainv <- ginv(a)
- simulations=nrow(simulated)
- variates=ncol(simulated)
- expectation = apply(simulated, 2, expectationFunction);
- centeredSimulations <- t(sapply(1:simulations,
- function(i) simulated[i,] - expectation))
- if (variates==1) {
- centeredSimulations <- t(centeredSimulations)
+ else
+ {
+ testTime <- system.time(res <- lapply(1:length(simStats), function (i)
+ applyTest(obsStats[[i]], simStats[[i]]) ))
}
- ttc <- system.time(mahalanobisDistances <-
- sapply(1:simulations, function(i)
- centeredSimulations[i,] %*%
- ainv %*% centeredSimulations[i,]))
- ret <- list(
- Iterations=dim(simulated)[1],
- Variates=dim(simulated)[2],
- Original=simulated,
- Covariance=a, InverseCovariance=ainv,
- Expectation=expectation,
- CenteredSimulations=centeredSimulations,
- MHDistances = sort(mahalanobisDistances),
- ExpectationFunction=expectationFunction,
- ComputeTime=ttc)
- class(ret) <- "sienaGOFPreprocess"
- ret
-}
-sienaGOFProcess <- function(observed, simulated, twoTailed=FALSE) {
- if (class(observed) == "numeric") {
- observed <- matrix(observed,nrow=1)
- }
- if (class(observed) != "matrix" |
- class(simulated) != "sienaGOFPreprocess") {
- stop("simulated must be a sienaGOFPreprocess object.")
- }
- if (ncol(observed) != simulated$Variates) {
- stop("Dimensionality of function parameters do not match.")
- }
- observations = nrow(observed)
- mhd <- sapply(1:observations, function(i)
- t(observed[i,] - simulated$Expectation) %*%
- simulated$InverseCovariance %*%
- (observed[i,] - simulated$Expectation) )
- if (twoTailed) {
- p.mhd <- sapply(1:observations, function (i)
- 1 - abs(1 - 2 * sum(mhd[i] <=
- simulated$MHDistances)/simulated$Iterations) )
- } else {
- p.mhd <- sapply(1:observations, function (i)
- sum(mhd[i] <= simulated$MHDistances)
- /simulated$Iterations)
- }
- return = list(
- Observation=observed,
- Preprocess=simulated,
- MHDistance=mhd,
- p.MHD = p.mhd,
- TwoTailed = twoTailed)
- class(return) <- "sienaGOFProcess"
- return
+ names(res) <- names(obsStats)
+ class(res) <- "sienaGOF"
+ attr(res, "originalSimulations") <- simsSp
+ attr(res, "originalObservations") <- observedSp
+ attr(res, "originalMissings") <- missingSp
+ attr(res,"auxiliaryStatisticName") <-
+ attr(obsStats,"auxiliaryStatisticName")
+ attr(res, "obsTime") <- attr(obsStats,"time")
+ attr(res, "simTime") <- attr(simStats,"time")
+ attr(res, "testTime") <- testTime
+ attr(res, "twoTailed") <- twoTailed
+ attr(res, "joined") <- join
+ attr(res, "cumulative") <- cumulative
+ res
}
-sienaGOF <- function(obsList, simList) {
- if ((class(obsList) != "observedAuxiliaryStatistics") |
- (class(simList) != "simulatedAuxiliaryStatistics") |
- (length(obsList) != length(simList))
- | (attr(obsList,"auxiliaryStatisticName")
- != attr(simList,"auxiliaryStatisticName"))) {
- stop("Arguments invalid")
+print.sienaGOF <- function (x, ...) {
+ ## require(Matrix)
+ levels <- 1:length(x)
+ pVals = sapply(levels, function(i) x[[i]]$p)
+ if (attr(x, "cumulative"))
+ {
+ titleStr= "Monte Carlo Kolmogorov-Smirnov test P-value: "
+ }
+ else
+ {
+ titleStr= "Monte Carlo Mahalanobis distance test P-value: "
}
-
- pre <- lapply(1:length(simList), function (i)
- sienaGOFPreprocess(simList[[i]]))
- res <- lapply(1:length(simList), function (i)
- sienaGOFProcess(obsList[[i]], pre[[i]]) )
-
- ret <- list(ObservedStatistics=obsList, SimulatedStatistics=simList,
- Results=res)
- class(ret) <- "sienaGOF"
- attr(ret,"auxiliaryStatisticName") <-
- attr(obsList,"auxiliaryStatisticName")
- ret
-}
-
-print.sienaGOF <- function (x, ...) {
- pVals = sapply(1:length(x$Results), function(i)
- x$Results[[i]]$p.MHD)
- ctimes = sapply(1:length(x$Results), function(i)
- as.numeric(x$Results[[i]]$Preprocess$ComputeTime["elapsed"]))
cat("Siena Goodness of Fit (",
attr(x,"auxiliaryStatisticName") ,")\n=====\n")
- if (! attr(x$SimulatedStatistics,"joint")) {
- cat(" > Monte Carlo Mahalanobis distance test P-values:\n")
- for (i in 1:length(pVals)) {
+ if (! attr(x,"join"))
+ {
+ cat(" >",titleStr, "\n")
+ for (i in 1:length(pVals))
+ {
cat(" > Wave ", i, ": ", pVals[i], "\n")
}
- } else {
- cat("Monte Carlo Mahalanobis distance test P-value: ",pVals[1], "\n")
+ for (i in 1:length(pVals))
+ {
+ if (! attr(x, "cumulative") &&
+ x[[i]]$Rank != ncol(x[[i]]$Simulations))
+ {
+ cat(" * Note for wave ",i,
+ ": Only ", x[[i]]$Rank, " statistics are ",
+ "necessary in the auxiliary function.\n")
+ }
+ }
}
- if (x$Results[[1]]$TwoTailed) {
+ else
+ {
+ cat(titleStr,pVals[1], "\n")
+ if (! attr(x, "cumulative") &&
+ x[[1]]$Rank != ncol(x[[1]]$Simulations))
+ {
+ cat("**Note: Only ", x[[1]]$Rank, " statistics are ",
+ "necessary in the auxiliary function.\n")
+ }
+ }
+
+ if ( attr(x, "twoTailed") )
+ {
cat("-----\nTwo tailed test used.")
- } else {
+ }
+ else
+ {
cat("-----\nOne tailed test used ",
"(i.e. area under curve for greater distance than observation).")
}
- cat("\nBased on ", x$Results[[1]]$Preprocess$Iterations,
- " simulated draws from the null distribution.")
- cat("\nComputation time for auxiliary statistic calculations: ",
- attr(x$SimulatedStatistics,"time")["elapsed"] , "seconds.")
+
+ cat("\nComputation time for auxiliary statistic calculations on observation: ",
+ attr(x, "obsTime")["elapsed"] , "seconds.")
+
+ cat("\nComputation time for auxiliary statistic calculations on simulations: ",
+ attr(x, "simTime")["elapsed"] , "seconds.")
+
cat("\nComputation time for distance/test statistic calculations: ",
- sum(ctimes), "seconds.\n")
+ attr(x, "testTime")["elapsed"] , "seconds.\n")
}
-plot.sienaGOF <- function (x, standardize=3, violin=TRUE,
+dyadGOF <- function (x, wave, threshold, ...)
+{
+ ## require(Matrix)
+ a <- attr(x,"originalSimulations")
+ b <- attr(x,"originalMissings")
+ c <- attr(x,"originalObservations")
+ truePositive <- a[[1]][[wave]] * 0
+ trueNegative <- a[[1]][[wave]] * 0
+ falsePositive <- a[[1]][[wave]] * 0
+ falseNegative <- a[[1]][[wave]] * 0
+
+ for(i in 1:length(a))
+ {
+ truePositive = truePositive +
+ a[[i]][[wave]] * c[[wave]]
+ falseNegative = falseNegative +
+ (1-a[[i]][[wave]]) * c[[wave]]
+ falsePositive = falsePositive +
+ a[[i]][[wave]] * (1-c[[wave]])
+ trueNegative = trueNegative +
+ (1-a[[i]][[wave]]) * (1-c[[wave]])
+ }
+ threshold = length(a) * threshold
+
+## trueNegative[trueNegative < threshold] <- 0
+## trueNegative[trueNegative > 0] <- 1
+## truePositive[truePositive < threshold] <- 0
+## truePositive[truePositive > 0] <- 1
+## falseNegative[falseNegative < (1-threshold)] <- 0
+## falseNegative[falseNegative > 0] <- 1
+## falsePositive[falsePositive < (1-threshold)] <- 0
+## falsePositive[falsePositive > 0] <- 1
+
+ falseNegative[falseNegative > threshold] <- 1
+ falseNegative[falseNegative < 1] <- 0
+ falsePositive[falsePositive > threshold] <- 1
+ falsePositive[falsePositive < 1] <- 0
+
+ plotted = as.matrix(-1*falsePositive + falseNegative)
+ if (sum(plotted)==0)
+ {
+ plotted <- matrix(0, nrow=nrow(plotted),ncol=ncol(plotted))
+ }
+ diag(plotted)
+ image(z=plotted, zlim=c(-1,1),
+ col=c("red", "white", "blue"), ...)
+}
+
+plot.sienaGOF <- function (x, standardize=NA, violin=TRUE,
ylim=NULL, xlim=NULL,
- xlab=NULL, ylab=NULL, key=NULL, perc=.05, wave=1, main=NULL, ...) {
- if (is.null(main)) {
+ xlab=NULL, ylab=NULL, key=NULL,
+ perc=.05, wave=1, main=NULL,
+ image=FALSE, imageThreshold=.3, ...)
+ {
+ ## require(Matrix)
+ if (image)
+ {
+ dyadGOF(x, wave, imageThreshold)
+ }
+ else
+ {
+ if (is.null(main))
+ {
main=paste("Goodness of Fit of ",
- attr(x$Observed,"auxiliaryStatisticName"))
+ attr(x,"auxiliaryStatisticName"))
}
- if (!attr(x$Observed,"joint")){
+ if (!attr(x,"joined"))
+ {
main = paste(main, "Wave", wave)
}
- x <- x$Results[[wave]]
- itns <- x$Preprocess$Iterations
- vars <- x$Preprocess$Variates
- sims <- x$Preprocess$Original
- obs <- x$Observation
+ cumulative = attr(x,"cumulative")
+ if (is.na(standardize))
+ {
+ if (attr(x,"cumulative"))
+ {
+ standardize=0
+ }
+ else
+ {
+ standardize=3
+ }
+ }
+ x <- x[[wave]]
+ itns <- nrow(x$Simulations)
+ vars <- ncol(x$Simulations)
+ sims <- x$Simulations
+ obs <- x$Observations
n.obs <- nrow(obs)
-
- if (standardize==3) {
+ if (standardize==3)
+ {
sims.median <- apply(sims, 2, median)
sims.min <- apply(sims, 2, min)
sims.max <- apply(sims, 2, max)
@@ -289,35 +443,44 @@
(sims[,i] - sims.median[i])/(sims.max[i] -
sims.min[i] ) )
obs <- matrix(sapply(1:ncol(sims), function(i)
- (obs[,i] - sims.median[i])/(sims.max[i] -
- sims.min[i] ) ), nrow=n.obs )
- } else if (standardize==2) {
+ (obs[,i] - sims.median[i])/(sims.max[i] -
+ sims.min[i] ) ), nrow=n.obs )
+ }
+ else if (standardize==2)
+ {
sims.min <- apply(sims, 2, min)
sims.max <- apply(sims, 2, max)
sims <- sapply(1:ncol(sims), function(i) (sims[,i] -
- sims.min[i])/(sims.max[i] - sims.min[i] ) )
+ sims.min[i])/(sims.max[i] - sims.min[i] ) )
obs <- matrix(sapply(1:ncol(sims), function(i) (obs[,i] -
- sims.min[i])/(sims.max[i] - sims.min[i] )
- ), nrow=n.obs )
- } else if (standardize==1) {
+ sims.min[i])/(sims.max[i] - sims.min[i] )
+ ), nrow=n.obs )
+ }
+ else if (standardize==1)
+ {
sims.mean <- apply(sims, 2, mean)
sims <- sapply(1:ncol(sims), function(i)
(sims[,i] - sims.mean[i]) )
obs <- matrix(sapply(1:ncol(sims), function(i)
- (obs[,i] - sims.mean[i]) ), nrow=n.obs )
+ (obs[,i] - sims.mean[i]) ), nrow=n.obs )
}
-
- if (is.null(ylim)) {
+ sims[is.nan(sims)] <- 0
+ obs[is.nan(obs)] <- 0
+ if (is.null(ylim))
+ {
ylim = c(min(obs, sims), max(obs, sims))
}
- if (is.null(xlim)) {
+ if (is.null(xlim))
+ {
xlim = c(0, ncol(obs)+1)
}
- if (is.null(xlab)) {
+ if (is.null(xlab))
+ {
xlab= paste( paste("p:", round(x$p, 3),
- collapse = " "), collapse = "\n")
+ collapse = " "), collapse = "\n")
}
- if (is.null(ylab)) {
+ if (is.null(ylab))
+ {
ylab = "Statistic Values"
}
xAxis <- (1:vars)
@@ -325,12 +488,16 @@
plot(obs[1,]~xAxis, col="white", type="p",
ylim=ylim, xlim=xlim, main=main,
xlab=xlab, ylab=ylab, axes=FALSE, ...)
- if (!is.null(key)) {
- if (length(key) != ncol(obs)) {
+ if (!is.null(key))
+ {
+ if (length(key) != ncol(obs))
+ {
stop("Key length does not match the number of variates.")
}
axis(1, at=xAxis, lab=key)
- } else {
+ }
+ else
+ {
axis(1, at=xAxis, lab=paste("v", xAxis, sep=""))
}
@@ -343,18 +510,24 @@
lines(yperc.lower~xAxis, lty=3, col = "gray", lwd=3)
lines(yperc.upper~xAxis, lty=3, col = "gray", lwd=3)
- if (violin) {
+ if (violin)
+ {
require(vioplot)
- for (i in 1:ncol(sims)) {
+ for (i in 1:ncol(sims))
+ {
vioplot(sims[,i], at=xAxis[i],
add=TRUE, col="gray", wex=.75, ...)
}
- } else {
+ }
+ else
+ {
boxplot(as.numeric(sim)~rep(1:vars, each=itns), add=TRUE, ...)
}
- for(i in 1:nrow(obs)) {
+ for(i in 1:nrow(obs))
+ {
lines(obs[i,]~xAxis, col="red", type="l", lwd=1, ...)
lines(obs[i,]~xAxis, col="red", type="p", lwd=3, pch=19, ...)
text(xAxis, obs[i,], labels=round(x$Observation[i,],3), pos=4)
}
+ }
}
\ No newline at end of file
Modified: pkg/RSienaTest/changeLog
===================================================================
--- pkg/RSienaTest/changeLog 2011-02-23 23:48:16 UTC (rev 139)
+++ pkg/RSienaTest/changeLog 2011-02-24 23:34:08 UTC (rev 140)
@@ -1,3 +1,10 @@
+2011-02-24 R-forge revision 140
+
+ * R/sienaGOF.r: Added cumulative tests, simulated image
+ function imageGOF, and cleaned up the code to reflect
+ RSiena coding standards. Added image plot to plot.sienaGOF.
+ * man/sienaGOF.Rd: Changes to reflect the above
+
2011-02-23 R-forge revision 139
* src/model/ml/MLsimulation.cpp,
Modified: pkg/RSienaTest/doc/s_man400.tex
===================================================================
--- pkg/RSienaTest/doc/s_man400.tex 2011-02-23 23:48:16 UTC (rev 139)
+++ pkg/RSienaTest/doc/s_man400.tex 2011-02-24 23:34:08 UTC (rev 140)
@@ -8286,6 +8286,9 @@
(Programmers should consult the changeLog file on CRAN or in the R-forge
repository.)
\begin{itemize}
+\item 2011-02-24 R-forge revision 140: adds functionality to sienaGOF for plotting
+ image matrices of the simulations, cumulative tests based on the Kolmogorov-
+ Smirnov test statistic, and conforms to coding standards.
\item 2011-02-24 R-forge revision 139: fixes for bipartite networks with
ML. ML is still incomplete, and will not work correctly with missing data or
endowment effects.
Modified: pkg/RSienaTest/man/RSiena-package.Rd
===================================================================
--- pkg/RSienaTest/man/RSiena-package.Rd 2011-02-23 23:48:16 UTC (rev 139)
+++ pkg/RSienaTest/man/RSiena-package.Rd 2011-02-24 23:34:08 UTC (rev 140)
@@ -30,8 +30,8 @@
\tabular{ll}{
Package: \tab RSiena\cr
Type: \tab Package\cr
-Version: \tab 1.0.12.139\cr
-Date: \tab 2011-02-23\cr
+Version: \tab 1.0.12.140\cr
+Date: \tab 2011-02-24\cr
License: \tab GPL-2 \cr
LazyLoad: \tab yes\cr
}
Modified: pkg/RSienaTest/man/sienaGOF.Rd
===================================================================
--- pkg/RSienaTest/man/sienaGOF.Rd 2011-02-23 23:48:16 UTC (rev 139)
+++ pkg/RSienaTest/man/sienaGOF.Rd 2011-02-24 23:34:08 UTC (rev 140)
@@ -1,51 +1,90 @@
\name{sienaGOF}
\alias{sienaGOF}
-\alias{evaluateAuxiliaryStatistics}
-\alias{evaluateAuxiliaryStatistics.simulated}
-\alias{evaluateAuxiliaryStatistics.observed}
\alias{print.sienaGOF}
\alias{plot.sienaGOF}
-\alias{sienaGOFPreprocess}
-\alias{sienaGOFProcess}
\title{Functions to assess goodness of fit for SAOMs}
\description{
- A series of functions which start with a \code{sienaFit} object, a
+ Goodness of fit functionality requiring a \code{sienaFit} object, a
\code{siena} data object, and a function for calculating auxiliary
statistics on graphs. A Monte Carlo test based on the Mahalanobis
distance based test is used to calculate frequentist p-values and
- plotting functions can be used to diagnose bad fit.
+ plotting functions can be used to diagnose bad fit. Alternately, a
+ Kolmogorov-Smirnov test statistic can be used for cumulative
+ auxiliary statistics.
}
\usage{
-sienaGOF(obsList, simList)
-evaluateAuxiliaryStatistics.observed(sienaDataObject, varName, auxiliaryFunction, wave=NULL,
-verbose=FALSE, join=TRUE)
-evaluateAuxiliaryStatistics.simulated(sienaFitObject, groupName, varName,
-auxiliaryFunction, wave=NULL, verbose=FALSE, join=TRUE)
-# Alias for the above two functions, for user convenience
-evaluateAuxiliaryStatistics(sienaObject, ...)
+sienaGOF(sienaDataObject,
+ sienaFitObject, groupName, varName, auxiliaryFunction, wave=NULL,
+ verbose=FALSE, join=TRUE, expectationFunction=mean,
+ twoTailed=FALSE, cumulative=FALSE, \dots)
+\method{plot}{sienaGOF}(x, standardize=NA, violin=TRUE,
+ ylim=NULL, xlim=NULL,
+ xlab=NULL, ylab=NULL, key=NULL,
+ perc=.05, wave=1, main=NULL,
+ image=FALSE, imageThreshold=.3, \dots)
+\method{print}{sienaGOF}(x, \dots)
}
\arguments{
- \item{obsList}{The results from a call to \code{evaluateAuxiliaryStatistics.observed}.}
- \item{simList}{The results from a call to \code{evaluateAuxiliaryStatistics.simulated}.}
- \item{sienaObject}{Either a \code{sienaDataObject} or a \code{sienaFitObject}; alias will determine which and call
- the appropriate function.}
+ ## sienaGOF(\dots):
\item{sienaDataObject}{Results from a call to \code{sienaDataCreate}.}
\item{sienaFitObject}{Results from a call to \code{siena07}.}
\item{groupName}{The name of the sienaGroup to be used.}
- \item{varName}{The value of the variable to be used, whether a network, behavior, etc.}
- \item{join}{Boolean: should sienaGOF do tests on all of the waves individually, or sum across waves?}
- \item{wave}{Wave(s) to be used for calculating the statistics. Ignored if join=TRUE}
- \item{auxiliaryFunction}{Function to be used to calculate statistics, e.g. from the \code{sna} package.}
- \item{verbose}{Whether to print intermediate results. Calculations can take some time, and user feedback may be useful.}
- \item{...}{Other arguments to be used by the appropriate subfunction which will be called by the alias.}
+ \item{varName}{The value of the variable to be used, whether a
+ network, behavior, etc.}
+ \item{join}{Boolean: should sienaGOF do tests on all of the waves
+ individually, or sum across waves?}
+ \item{auxiliaryFunction}{Function to be used to calculate statistics,
+ e.g. from the \code{sna} package.
+ The function must accept two arguments of the order (dependentVariable,
+ missingData), and return a
+ numeric vector of constant length.}
+ \item{verbose}{Whether to print intermediate results. Calculations can
+ take some time, and user feedback may be useful.}
+ \item{expectationFunction}{The function to be used for centering the
+ Mahalanobis distances.
+ Not used if \code{cumulative=TRUE}.}
+ \item{cumulative}{If the auxiliary function returns cumulative values,
+ this option will use a Kolmogorov-Smirnov
+ test statistic instead of the Mahalanobis distance.}
+ \item{twoTailed}{Whether to use two tails for calculating p values on the
+ Monte Carlo test. Recommended for
+ advanced users only, as it is probably only applicable in rare cases.}
+ ## plot(\dots):
+ \item{x}{ Object from a call to sienaGOF. }
+ \item{standardize}{ One of three levels of standardization to be performed
+ on the auxiliary statistics (for layout):
+ 3 -- center by median and scale;
+ 2 -- map to [0,1] interval;
+ 1 -- center by mean;
+ 0 -- perform no manipulations.
+ Default for non-cumulative is 3, default for cumulative is 0.}
+ \item{violin}{ Use violin plots (vs. box plots)? }
+ \item{ylim}{ Limits on x axis. For plotting sections of the test. }
+ \item{xlim}{ Limits on y axis. For plotting sections of the test. }
+ \item{xlab}{ Label for the x axis. }
+ \item{ylab}{ Label for the y axis. }
+ \item{key}{ Keys for the auxiliary statistic levels. }
[TRUNCATED]
To get the complete diff run:
svnlook diff /svnroot/rsiena -r 140
More information about the Rsiena-commits
mailing list