[Splm-commits] r210 - in pkg: . R man
noreply at r-forge.r-project.org
noreply at r-forge.r-project.org
Thu Nov 17 18:55:25 CET 2016
Author: the_sculler
Date: 2016-11-17 18:55:24 +0100 (Thu, 17 Nov 2016)
New Revision: 210
Added:
pkg/R/rwtest.R
pkg/man/rwtest.Rd
Modified:
pkg/ChangeLog
pkg/DESCRIPTION
pkg/NAMESPACE
pkg/man/spml.Rd
Log:
Added rwtest, with formula, pseries and panelmodel methods.
Modified: pkg/ChangeLog
===================================================================
--- pkg/ChangeLog 2016-02-13 09:44:03 UTC (rev 209)
+++ pkg/ChangeLog 2016-11-17 17:55:24 UTC (rev 210)
@@ -1,3 +1,6 @@
+Changes in Version 1.4-5
+ o Added rwtest() for the randomized CD-p procedure, with methods for formula, pseries and panelmodel.
+
Changes in Version 1.4-4
o Fixed clmmtest() in bsktest.R (now takes residuals for the restricted model from spreml(..., errors="sem", lag=F) instead of spfeml()). Temporarily disabled standardized LM tests SLM1 and SLM2; default set at 'standardize=FALSE'in main function.
Modified: pkg/DESCRIPTION
===================================================================
--- pkg/DESCRIPTION 2016-02-13 09:44:03 UTC (rev 209)
+++ pkg/DESCRIPTION 2016-11-17 17:55:24 UTC (rev 210)
@@ -1,7 +1,7 @@
Package: splm
Title: Econometric Models for Spatial Panel Data
-Version: 1.4-4
-Date: 2016-01-12
+Version: 1.4-5
+Date: 2016-11-17
Authors at R: c(person(given = "Giovanni", family = "Millo", role = c("aut", "cre"), email = "giovanni.millo at generali.com"),
person(given = "Gianfranco", family = "Piras", role = c("aut"), email = "gpiras at mac.com"))
Author: Giovanni Millo [aut, cre],
@@ -9,6 +9,6 @@
Maintainer: Giovanni Millo <giovanni.millo at generali.com>
Description: ML and GM estimation and diagnostic testing of econometric models for spatial panel data.
Depends: R (>= 2.12.0), spdep
-Imports: plm, maxLik, MASS, bdsmatrix, ibdreg, nlme, Matrix, spam
+Imports: plm, maxLik, MASS, bdsmatrix, ibdreg, nlme, Matrix, spam, methods
License: GPL-2
LazyLoad: yes
Modified: pkg/NAMESPACE
===================================================================
--- pkg/NAMESPACE 2016-02-13 09:44:03 UTC (rev 209)
+++ pkg/NAMESPACE 2016-11-17 17:55:24 UTC (rev 210)
@@ -1,3 +1,10 @@
+importFrom("methods", "new")
+importFrom("stats", "as.formula", "coef", "coefficients",
+ "fitted.values", "lm", "model.extract", "model.frame",
+ "na.fail", "optimize", "pchisq", "pnorm", "printCoefmat",
+ "residuals", "terms", "var", "vcov")
+importFrom("stats", "cor", "lm.fit", "reshape", "resid")
+importFrom("plm", "pdata.frame")
importFrom(stats, model.matrix, model.response, aggregate, effects)
importFrom(stats, optim, nlminb)
importFrom(plm, plm.data)
@@ -12,11 +19,13 @@
importFrom(maxLik, maxLik)
importFrom(bdsmatrix, bdsmatrix)
importFrom(MASS, ginv)
+importFrom(parallel, mclapply)
export(bsktest, sphtest, bsjktest, vcov.splm,
effects.splm, print.effects.splm, slag,
-print.splm, spml, spgm, summary.splm, sphtest, listw2dgCMatrix, spreml)
+print.splm, spml, spgm, summary.splm, sphtest,
+listw2dgCMatrix, spreml, rwtest)
@@ -32,3 +41,6 @@
S3method(sphtest, splm)
S3method(impacts, splm)
S3method(slag, pseries)
+S3method(rwtest, formula)
+S3method(rwtest, panelmodel)
+S3method(rwtest, pseries)
Added: pkg/R/rwtest.R
===================================================================
--- pkg/R/rwtest.R (rev 0)
+++ pkg/R/rwtest.R 2016-11-17 17:55:24 UTC (rev 210)
@@ -0,0 +1,345 @@
+
+## this version 2 (10 for mcwtest): pass on pseries of residuals,
+## then reshape and use fast cor() to calculate the rho matrix
+## (much speedier); from changes to pcdtest in plm_1.6-2
+## moreover, parallelized simulation step.
+## Giovanni Millo, 17/11/2016
+
+
+rwtest<-function (x, ...)
+ {
+ UseMethod("rwtest")
+ }
+
+
+rwtest.formula <- function (x, data, w, index = NULL,
+ model = NULL,
+ replications=99, seed=NULL,
+ order=1, mc=1,
+ test = c("rho", "cd", "sclm"),
+ alternative=c("twosided",
+ "onesided",
+ "symmetric"), ...) {
+
+ ## much like pcdtest.formula
+ mymod <- plm(x, data, index = index,
+ model = "pooling", ...)
+ if (is.null(model) & min(pdim(mymod)$Tint$Ti) <
+ length(mymod$coefficients) + 1) {
+ warning("Insufficient number of observations in time to estimate heterogeneous model: using within residuals",
+ call. = FALSE)
+ model <- "within"
+ }
+
+ ind0 <- attr(model.frame(mymod), "index")
+ tind <- as.numeric(ind0[[2]])
+ ind <- as.numeric(ind0[[1]])
+
+ if (is.null(model)) {
+ ## estimate individual regressions one by one
+ X <- model.matrix(mymod)
+ y <- model.response(model.frame(mymod))
+ unind <- unique(ind)
+ n <- length(unind)
+ ti.res <- vector("list", n)
+ ind.res <- vector("list", n)
+ tind.res <- vector("list", n)
+ for (i in 1:n) {
+ tX <- X[ind == unind[i], , drop = FALSE]
+ ty <- y[ind == unind[i]]
+ res.i <- lm.fit(tX, ty)$resid
+ ti.res[[i]] <- res.i
+ names(ti.res[[i]]) <- tind[ind == unind[i]]
+ ind.res[[i]] <- rep(i, length(res.i))
+ tind.res[[i]] <- tind[ind == unind[i]]
+ }
+ ## make pseries of (all) residuals
+ resdata <- data.frame(ee = unlist(ti.res),
+ ind = unlist(ind.res),
+ tind = unlist(tind.res))
+ pee <- pdata.frame(resdata, index = c("ind", "tind"))
+ tres <- pee$ee
+ }
+ else {
+ mymod <- plm(x, data, index = index, model = model, ...)
+ tres <- resid(mymod)
+ unind <- unique(ind)
+ n <- length(unind)
+ t <- min(pdim(mymod)$Tint$Ti)
+ nT <- length(ind)
+ k <- length(mymod$coefficients)
+ }
+
+ return(mcwres(tres=tres, n=n, w=w,
+ form=paste(deparse(substitute(formula))),
+ test=match.arg(test),
+ alternative=match.arg(alternative),
+ replications=replications,
+ seed=seed, order=order, mc=mc))
+
+}
+
+rwtest.pseries <- function(x, w, replications=99,
+ seed=NULL, order=1, mc=1,
+ test = c("rho", "cd", "sclm"),
+ alternative=c("twosided",
+ "onesided",
+ "symmetric"), ...) {
+ ## get indices
+ index <- attr(x, "index")
+ tind <- as.numeric(index[[2]])
+ ind <- as.numeric(index[[1]])
+ n <- length(unique(ind))
+
+ return(mcwres(tres=x, n=n, w=w,
+ form=paste(deparse(substitute(formula))),
+ test=match.arg(test),
+ alternative=match.arg(alternative),
+ replications=replications, seed=seed,
+ order=order, mc=mc))
+
+}
+
+
+rwtest.panelmodel <- function(x, w, replications=99,
+ seed=NULL, order=1, mc=1,
+ test = c("rho", "cd", "sclm"),
+ alternative=c("twosided",
+ "onesided",
+ "symmetric"), ...) {
+
+ ## this is taken from the last piece of pcdtest.formula,
+ ## after estimating relevant model
+
+ ## fetch residuals
+ tres <- resid(x)
+
+ ## get indices
+ index <- attr(model.frame(x), "index")
+ tind <- as.numeric(index[[2]])
+ ind <- as.numeric(index[[1]])
+
+ ## det. number of groups
+ unind<-unique(ind)
+ n<-length(unind)
+
+ return(mcwres(tres=tres, n=n, w=w,
+ form=paste(deparse(substitute(formula))),
+ test=match.arg(test),
+ alternative=match.arg(alternative),
+ replications=replications, seed=seed,
+ order=order, mc=mc))
+
+}
+
+mcwres<-function(tres, n, w, form, test, alternative,
+ replications, seed, order, mc) {
+ ## as in pcdtest;
+ ## now tres is the pseries of model residuals
+
+ ## calc matrix of all possible pairwise corr.
+ ## coeffs. (200x speedup from using cor())
+ wideres <- t(preshape(tres, na.rm=FALSE))
+ rho <- cor(wideres, use="pairwise.complete.obs")
+
+ ## find length of intersecting pairs
+ ## fast method, times down 200x
+ data.res <- data.frame(time=attr(tres, "index")[[2]],
+ indiv=attr(tres, "index")[[1]])
+ ## tabulate which obs in time for each ind are !na
+ presence.tab <- table(data.res)
+ ## calculate t.ij
+ t.ij <- crossprod(presence.tab)
+
+ ## input check
+ if (!is.null(w)) {
+ dims.w <- dim(w)
+ if(dims.w[1] != n || dims.w[2] != n)
+ stop(paste0("matrix 'w' describing proximity of individuals has wrong dimensions: ",
+ "should be ", n, " x ", n,
+ " (no. of individuals) but is ",
+ dims.w[1], " x ", dims.w[2]))
+ }
+
+ ## 'til here as in panelCDtest. Now for the selection of
+ ## rho's based on (p-th order) contiguity
+
+ ## make proximity matrix up to the p-th lag
+ ## TODO: outsource this to wlag()
+
+ if(order>1) {
+ ## if it is a matrix make it an nb object
+ if(is.matrix(w)) w<-mat2listw(w)
+ ## make lagged proximity matrix
+ w<-wlag(w$neighbours,maxlag=order)
+ } else {
+ ## if order=1, just make w<-listw2mat(w) if it is
+ ## a listw obj.
+ if("listw"%in%class(w)) {
+ w<-listw2mat(w)
+ }
+ }
+
+ ## until here all the necessary rho_ij's have been
+ ## calculated;
+ ## the relevant W matrix has been determined;
+ ## now for the randomization of W and collection of the
+ ## distr. of CD(p)s
+
+ ##### begin: random W snippet (from MCWtest.R) #####
+ ##### (until end) ##################################
+
+ mcwdist<-rep(NA,replications+1)
+
+ ## calc. ''true'' CD(p) statistic:
+
+ ## make (binary) selector matrix based on the contiguity
+ ## matrix w and extracting elements corresponding to ones
+ ## in the lower triangle excluding the diagonal
+
+ ## transform in logicals (0=FALSE, else=TRUE: no need to
+ ## worry about row-std. matrices)
+ selector.mat<-matrix(as.logical(w),ncol=n)
+ ## set upper tri and diagonal to false
+ selector.mat[upper.tri(selector.mat,diag=TRUE)]<-FALSE
+
+ ## number of elements in selector.mat
+ elem.num<-sum(selector.mat)
+
+ ## set components not dependent on w once for all,
+ ## depending on test type
+
+ switch(test,
+ cd = {
+ ## Pesaran statistic for (p-th order) local
+ ## cross-sectional dependence
+ rad.n<-sqrt(1/elem.num)
+ t.rhos<-sqrt(t.ij)*rho
+ testname<-"CD test"
+ },
+ sclm = {
+ ## Scaled LM statistic for (p-th order) local
+ ## cross-sectional dependence
+ rad.n<-sqrt(1/(2*elem.num))
+ t.rhos<-t.ij*rho^2-1
+ testname<-"NLM test"
+ },
+ rho = {
+ rad.n <- 1/elem.num
+ t.rhos <- rho
+ testname <- "Average correlation coefficient"
+ }
+ )
+
+ cdpstat <- rad.n*sum(t.rhos[selector.mat])
+
+ ## calc. ''randomized'' CD(p) stats
+ ## reuse t.rhos, rad.n, independent from w's
+
+ if(!is.null(seed)) set.seed(seed)
+ rwfun <- function(x) rad.n*sum(t.rhos[brandW(w)])
+ lfun <- if(mc==1) {
+ lapply}
+ else {
+ function(x, FUN, ...) {
+ mclapply(x, FUN, mc.cores=mc, ...)
+ }
+ }
+ mcwlist <- lfun(seq_along(1:replications), FUN=rwfun)
+ mcwdist <- c(cdpstat, unlist(mcwlist))
+
+ ## how many random draws are more extreme?
+ switch(alternative,
+ twosided={quasi.pval <- 2 * min(sum(mcwdist < cdpstat)/(replications+1), sum(mcwdist >= cdpstat)/(replications+1))
+ },
+ onesided={
+ quasi.pval <- sum(mcwdist >= cdpstat)/(replications+1)
+ },
+ symmetric={
+ quasi.pval <- sum(abs(mcwdist) >= abs(cdpstat))/(replications+1)
+ })
+ myres <- quasi.pval
+
+ ## setup htest object
+ RVAL <- list(statistic = NULL, parameter = NULL,
+ method = paste("Randomized W test for spatial correlation of order", order), #, " \n\n Based on:", testname),
+ alternative = alternative,
+ p.value = quasi.pval,
+ data.name = form)
+ class(RVAL) <- "htest"
+ return(RVAL)
+}
+
+## utilities
+
+wlag<-function(x,maxlag) {
+ ## define (incremental!) lag function for w
+ ## returns the nb list of all neighbours up to order=maxlag
+ ## standalone version under wlag.R
+
+ n<-length(x)
+ mynb<-nblag(x,maxlag=maxlag)
+
+ mytot<-vector("list",n)
+
+ for(i in 1:n) {
+ mytot[[i]]<-mynb[[1]][[i]]
+ for(j in 2:maxlag) mytot[[i]] <- c(mytot[[i]],
+ mynb[[j]][[i]])
+ ## reorder
+ mytot[[i]]<-mytot[[i]][order(mytot[[i]])]
+ }
+ lagmat<-matrix(0,ncol=n,nrow=n)
+ for(i in 1:n) lagmat[i,mytot[[i]]]<-1
+ return(lagmat)
+}
+
+brandW<-function(w) {
+ ## randomize a contiguity matrix, boolean output matrix
+ ## argument: a contiguity matrix (doesn't matter whether
+ ## standardized or not)
+
+ n<-dim(w)[[1]]
+ brW<-matrix(FALSE,ncol=n,nrow=n)
+
+ ## calc. filling rate
+ ## (sum(ones)/(total cells-diagonal) must be
+ ## integer and even, by construction)
+
+ ## make everything binary:
+ booleW<-as.logical(w)
+ ## calc. (diagonal is always excluded)
+ fillW<-sum(booleW)/2
+
+ ## fill in randomly the lower part of matrix with
+ ## fillW 'TRUE's
+ smplW<-sample(n*(n-1)/2,fillW) # w/o replacement
+ brW[lower.tri(brW)][smplW]<-TRUE
+
+ ## don't need to mirror upper triangle any more
+ ## (only lower tri is used in CD(p) test
+ return(brW)
+ }
+
+preshape <- function(x, na.rm=TRUE, ...) {
+ ## to be substituted by importing plm:::preshape
+
+ ## reshapes pseries, e.g. of residuals from a
+ ## panelmodel, in wide form
+ inames <- names(attr(x, "index"))
+ mres <- reshape(cbind(as.vector(x), attr(x, "index")),
+ direction="wide",
+ timevar=inames[2], idvar=inames[1])
+ ## drop ind in first column
+ mres <- mres[,-1]
+ ## reorder columns (may be scrambled depending on first
+ ## available obs in unbalanced panels)
+ mres <- mres[, order(dimnames(mres)[[2]])]
+ ## if requested, drop columns (time periods) with NAs
+ if(na.rm) {
+ rmc <- which(is.na(apply(mres, 2, sum)))
+ if(sum(rmc)>0) mres <- mres[,-rmc]
+ }
+ return(mres)
+}
+
Added: pkg/man/rwtest.Rd
===================================================================
--- pkg/man/rwtest.Rd (rev 0)
+++ pkg/man/rwtest.Rd 2016-11-17 17:55:24 UTC (rev 210)
@@ -0,0 +1,140 @@
+\name{rwtest}
+\alias{rwtest}
+\alias{rwtest.formula}
+\alias{rwtest.panelmodel}
+\alias{rwtest.pseries}
+\title{Randomization-based test of spatial dependence for panel models}
+
+\description{
+ Randomization-based test of spatial dependence for panel models, robust
+ to global dependence induced by common factors and to persistence
+ (serial correlation) in the data
+}
+
+\usage{
+rwtest(x, \dots)
+\method{rwtest}{formula}(x, data, w, index = NULL, model = NULL,
+ replications = 99, seed=NULL, order=1,
+ mc=1, test = c("rho", "cd", "sclm"),
+ alternative=c("twosided", "onesided",
+ "symmetric"), \dots)
+\method{rwtest}{panelmodel}(x, w, replications = 99, seed=NULL,
+ order=1, mc=1,
+ test = c("rho", "cd", "sclm"),
+ alternative=c("twosided", "onesided",
+ "symmetric"), \dots)
+\method{rwtest}{pseries}(x, w, replications = 99, seed=NULL,
+ order=1, mc=1,
+ test = c("rho", "cd", "sclm"),
+ alternative=c("twosided", "onesided",
+ "symmetric"), \dots)
+
+}
+
+\arguments{
+ \item{x}{an object of class \code{formula}, \code{panelmodel}, or \code{pseries}
+ (depending on the respective interface) describing the model to be tested}
+ \item{data}{a \code{data.frame}}
+ \item{w}{a \code{n x n} \code{matrix} describing proximity between
+ individuals, with \eqn{w_ij = a} where \eqn{a} is any number such
+ that \code{as.logical(a)==TRUE},
+ if \eqn{i,j} are neighbours, \eqn{0} or any number \eqn{b} such that
+ \code{as.logical(b)==FALSE} elsewhere. Only the lower triangluar part
+ (without diagonal) of \code{w} after coercing by \code{as.logical()}
+ is evaluated for neighbouring information (but \code{w} can be
+ symmetric). See also \bold{Details} and \bold{Examples}.}
+ \item{index}{an optional numerical index, in case \code{data} has to be
+ formatted by \code{plm.data}}
+ \item{model}{an optional character string indicating which type of
+ model to estimate;
+ if left to \code{NULL}, the original heterogeneous specification of
+ Pesaran is used}
+ \item{replications}{the number of Monte Carlo randomizations of the
+ neighbourhood matrix (default: 99),}
+ \item{seed}{the optional random seed,}
+ \item{order}{the order of neighbourhood to test for,}
+ \item{mc}{the number of parallel threads to execute; defaults to 1
+ (serial execution); is limited to the number of execution cores
+ actually available.}
+ \item{test}{the type of test statistic to be returned. One of
+ \itemize{
+ \item \code{"rho"} for the average correlation coefficient,
+ \item \code{"cd"} for Pesaran's CD statistic, or
+ \item \code{"sclm"} for the scaled version of Breusch and Pagan's LM
+ statistic,}}
+ \item{alternative}{the alternative hypothesis for the test, defaulting
+ to (asymmetric) twosided,}
+ \item{\dots}{further arguments to be passed on to \code{plm}, such as
+ e.g. \code{effect} or \code{random.method}}
+}
+
+\value{
+An object of class \code{"htest"}.
+}
+
+\details{
+ This test is meant as a generalization of Pesaran's spatial dependence
+ test "CD(p)" for robustness against global dependence (perhaps of the
+ factor type) and persistence in the data, both of which the original
+ test does not tolerate.
+
+ The procedure can be applied to model residuals as well as to
+ individual \code{pseries}.
+ See the comments in \code{pcdtest} as for the different methods.
+
+ Space is defined supplying a proximity matrix (elements coercible to
+ \code{logical}) with argument \code{w} which provides information on
+ whether any pair of individuals are neighbours or not. If
+ \code{order=1}, only first-order neighbouring pairs will be used in
+ computing the test; else, \code{w} will be transformed in the
+ neighbourhood matrix of the appropriate order. The matrix need not be
+ binary, so commonly used ``row--standardized'' matrices can be employed
+ as well. \code{nb} objects from \pkg{spdep} must instead be transformed
+ into matrices by \pkg{spdep}'s function \code{nb2mat} before using.
+
+ Notice that the \code{"rho"} and \code{"cd"} tests are permutationally
+ equivalent.
+
+ The test is suitable also for unbalanced panels.
+
+ The test on a \code{pseries} is the same as a test on a pooled
+ regression model of that variable on a constant,
+ i.e. \code{rwtest(some_pseries)} is equivalent to
+ \code{rwtest(plm(some_var ~ 1, data = some_pdata.frame, model =
+ "pooling")} and also equivalent to \code{rwtest(some_var ~ 1, data =
+ some_data)}, where \code{some_var} is the variable name in the data
+ which corresponds to \code{some_pseries}.
+}
+
+\references{
+ Millo, G. (2016), A simple randomization test for spatial dependence in
+ the presence of common factors and serial correlation,
+ \emph{(unpublished)}, \bold{xx}(x), pp. xxx--xxx.
+ Pesaran, M.H. (2004), General Diagnostic Tests for Cross Section
+ Dependence in Panels, \emph{CESifo} Working Paper 1229.
+ Pesaran, M.H. (2015), Testing Weak Cross--Sectional Dependence in Large
+ Panels, \emph{Econometric Reviews}, \bold{34}(6-10), pp. 1089--1117.
+}
+
+\examples{
+data(Produc, package = "plm")
+data(usaww)
+fm <- log(gsp) ~ log(pcap) + log(pc) + log(emp) + unemp
+## test on heterogeneous model (separate time series regressions)
+rwtest(fm, data = Produc, w=usaww, index = c("state", "year"))
+
+## test on two-way fixed effects homogeneous model
+rwtest(fm, data = Produc, w=usaww, index = c("state", "year"),
+ model = "within", effect = "twoways")
+
+## test on panelmodel object
+library(plm)
+g <- plm(fm, data = Produc)
+rwtest(g, w=usaww)
+
+## test on pseries, higher-order neighbourhood
+pprod <- pdata.frame(Produc)
+rwtest(pprod$gsp, w=usaww, order=3)
+}
+
+\keyword{htest}
Modified: pkg/man/spml.Rd
===================================================================
--- pkg/man/spml.Rd 2016-02-13 09:44:03 UTC (rev 209)
+++ pkg/man/spml.Rd 2016-11-17 17:55:24 UTC (rev 210)
@@ -94,6 +94,7 @@
respatlag <- spml(fm, data = Produc, listw = mat2listw(usaww),
model="random", spatial.error="none", lag=TRUE)
summary(respatlag)
+## calculate impact measures
impac1 <- impacts(respatlag, listw = mat2listw(usaww, style = "W"), time = 17)
summary(impac1, zstats=TRUE, short=TRUE)
}
More information about the Splm-commits
mailing list