[Splm-commits] r210 - in pkg: . R man

Thu Nov 17 18:55:25 CET 2016

Author: the_sculler
Date: 2016-11-17 18:55:24 +0100 (Thu, 17 Nov 2016)
New Revision: 210

Added:
   pkg/R/rwtest.R
   pkg/man/rwtest.Rd
Modified:
   pkg/ChangeLog
   pkg/DESCRIPTION
   pkg/NAMESPACE
   pkg/man/spml.Rd
Log:
Added rwtest, with formula, pseries and panelmodel methods.


Modified: pkg/ChangeLog
===================================================================

--- pkg/ChangeLog	2016-02-13 09:44:03 UTC (rev 209)
+++ pkg/ChangeLog	2016-11-17 17:55:24 UTC (rev 210)
@@ -1,3 +1,6 @@
+Changes in Version 1.4-5
+ o Added rwtest() for the randomized CD-p procedure, with methods for formula, pseries and panelmodel.
+
 Changes in Version 1.4-4
  o Fixed clmmtest() in bsktest.R (now takes residuals for the restricted model from spreml(..., errors="sem", lag=F) instead of spfeml()). Temporarily disabled standardized LM tests SLM1 and SLM2; default set at 'standardize=FALSE'in main function.
 	

Modified: pkg/DESCRIPTION
===================================================================
--- pkg/DESCRIPTION	2016-02-13 09:44:03 UTC (rev 209)
+++ pkg/DESCRIPTION	2016-11-17 17:55:24 UTC (rev 210)
@@ -1,7 +1,7 @@
 Package: splm
 Title: Econometric Models for Spatial Panel Data
-Version: 1.4-4
-Date: 2016-01-12
+Version: 1.4-5
+Date: 2016-11-17
 Authors at R: c(person(given = "Giovanni", family = "Millo", role = c("aut", "cre"), email = "giovanni.millo at generali.com"),
              person(given = "Gianfranco", family = "Piras", role = c("aut"), email = "gpiras at mac.com"))
 Author: Giovanni Millo [aut, cre],
@@ -9,6 +9,6 @@
 Maintainer: Giovanni Millo <giovanni.millo at generali.com>
 Description: ML and GM estimation and diagnostic testing of econometric models for spatial panel data.
 Depends: R (>= 2.12.0), spdep
-Imports: plm, maxLik, MASS, bdsmatrix, ibdreg, nlme, Matrix, spam
+Imports: plm, maxLik, MASS, bdsmatrix, ibdreg, nlme, Matrix, spam, methods
 License: GPL-2
 LazyLoad: yes

Modified: pkg/NAMESPACE
===================================================================
--- pkg/NAMESPACE	2016-02-13 09:44:03 UTC (rev 209)
+++ pkg/NAMESPACE	2016-11-17 17:55:24 UTC (rev 210)
@@ -1,3 +1,10 @@
+importFrom("methods", "new")
+importFrom("stats", "as.formula", "coef", "coefficients",
+           "fitted.values", "lm", "model.extract", "model.frame",
+           "na.fail", "optimize", "pchisq", "pnorm", "printCoefmat",
+           "residuals", "terms", "var", "vcov")
+importFrom("stats", "cor", "lm.fit", "reshape", "resid")
+importFrom("plm", "pdata.frame")
 importFrom(stats, model.matrix, model.response, aggregate, effects)
 importFrom(stats, optim, nlminb)
 importFrom(plm, plm.data)
@@ -12,11 +19,13 @@
 importFrom(maxLik, maxLik)
 importFrom(bdsmatrix, bdsmatrix)
 importFrom(MASS, ginv)
+importFrom(parallel, mclapply)
 
 
 export(bsktest, sphtest, bsjktest, vcov.splm,
 effects.splm, print.effects.splm, slag, 
-print.splm, spml, spgm, summary.splm, sphtest, listw2dgCMatrix, spreml)
+print.splm, spml, spgm, summary.splm, sphtest,
+listw2dgCMatrix, spreml, rwtest)
 
 
 
@@ -32,3 +41,6 @@
 S3method(sphtest, splm)
 S3method(impacts, splm)
 S3method(slag, pseries)
+S3method(rwtest, formula)
+S3method(rwtest, panelmodel)
+S3method(rwtest, pseries)

Added: pkg/R/rwtest.R
===================================================================
--- pkg/R/rwtest.R	                        (rev 0)
+++ pkg/R/rwtest.R	2016-11-17 17:55:24 UTC (rev 210)
@@ -0,0 +1,345 @@
+
+## this version 2 (10 for mcwtest): pass on pseries of residuals,
+## then reshape and use fast cor() to calculate the rho matrix
+## (much speedier); from changes to pcdtest in plm_1.6-2
+## moreover, parallelized simulation step.
+## Giovanni Millo, 17/11/2016
+
+
+rwtest<-function (x, ...)
+ {
+    UseMethod("rwtest")
+ }
+
+
+rwtest.formula <- function (x, data, w, index = NULL,
+                            model = NULL,
+                            replications=99, seed=NULL,
+                            order=1, mc=1,
+                            test = c("rho", "cd", "sclm"),
+                            alternative=c("twosided",
+                                          "onesided",
+                                          "symmetric"), ...) {
+
+    ## much like pcdtest.formula
+    mymod <- plm(x, data, index = index,
+                 model = "pooling", ...)
+    if (is.null(model) & min(pdim(mymod)$Tint$Ti) <
+        length(mymod$coefficients) + 1) {
+        warning("Insufficient number of observations in time to estimate heterogeneous model: using within residuals",
+            call. = FALSE)
+        model <- "within"
+    }
+    
+    ind0 <- attr(model.frame(mymod), "index")
+    tind <- as.numeric(ind0[[2]])
+    ind <- as.numeric(ind0[[1]])
+    
+    if (is.null(model)) {
+        ## estimate individual regressions one by one
+        X <- model.matrix(mymod)
+        y <- model.response(model.frame(mymod))
+        unind <- unique(ind)
+        n <- length(unind)
+        ti.res <- vector("list", n)
+        ind.res <- vector("list", n)
+        tind.res <- vector("list", n)
+        for (i in 1:n) {
+            tX <- X[ind == unind[i], , drop = FALSE]
+            ty <- y[ind == unind[i]]
+            res.i <- lm.fit(tX, ty)$resid
+            ti.res[[i]] <- res.i
+            names(ti.res[[i]]) <- tind[ind == unind[i]]
+            ind.res[[i]] <- rep(i, length(res.i))
+            tind.res[[i]] <- tind[ind == unind[i]]
+        }
+        ## make pseries of (all) residuals
+        resdata <- data.frame(ee = unlist(ti.res),
+                              ind = unlist(ind.res),
+                              tind = unlist(tind.res))
+        pee <- pdata.frame(resdata, index = c("ind", "tind"))
+        tres <- pee$ee
+    }
+    else {
+        mymod <- plm(x, data, index = index, model = model, ...)
+        tres <- resid(mymod)
+        unind <- unique(ind)
+        n <- length(unind)
+        t <- min(pdim(mymod)$Tint$Ti)
+        nT <- length(ind)
+        k <- length(mymod$coefficients)
+        }
+
+    return(mcwres(tres=tres, n=n, w=w,
+                  form=paste(deparse(substitute(formula))),
+                  test=match.arg(test),
+                  alternative=match.arg(alternative),
+                  replications=replications,
+                  seed=seed, order=order, mc=mc))
+
+}
+
+rwtest.pseries <- function(x, w, replications=99,
+                           seed=NULL, order=1, mc=1,
+                           test = c("rho", "cd", "sclm"),
+                           alternative=c("twosided",
+                                         "onesided",
+                                         "symmetric"), ...) {
+  ## get indices
+  index <- attr(x, "index")
+  tind <- as.numeric(index[[2]])
+  ind <- as.numeric(index[[1]])
+  n <- length(unique(ind))
+
+  return(mcwres(tres=x, n=n, w=w,
+                form=paste(deparse(substitute(formula))),
+                test=match.arg(test),
+                alternative=match.arg(alternative),
+                replications=replications, seed=seed,
+                order=order, mc=mc))
+
+}
+
+    
+rwtest.panelmodel <- function(x, w, replications=99,
+                              seed=NULL, order=1, mc=1,
+                              test = c("rho", "cd", "sclm"),
+                              alternative=c("twosided",
+                                            "onesided",
+                                           "symmetric"), ...) {
+
+  ## this is taken from the last piece of pcdtest.formula,
+  ## after estimating relevant model
+
+  ## fetch residuals
+  tres <- resid(x)
+
+  ## get indices
+  index <- attr(model.frame(x), "index")
+  tind <- as.numeric(index[[2]])
+  ind <- as.numeric(index[[1]])
+
+  ## det. number of groups
+  unind<-unique(ind)
+  n<-length(unind)
+
+  return(mcwres(tres=tres, n=n, w=w,
+                form=paste(deparse(substitute(formula))),
+                test=match.arg(test),
+                alternative=match.arg(alternative),
+                replications=replications, seed=seed,
+                order=order, mc=mc))
+
+}
+
+mcwres<-function(tres, n, w, form, test, alternative,
+                 replications, seed, order, mc) {
+    ## as in pcdtest;
+    ## now tres is the pseries of model residuals
+
+    ## calc matrix of all possible pairwise corr.
+    ## coeffs. (200x speedup from using cor())
+    wideres <- t(preshape(tres, na.rm=FALSE))
+    rho <- cor(wideres, use="pairwise.complete.obs")
+    
+    ## find length of intersecting pairs
+    ## fast method, times down 200x
+    data.res <- data.frame(time=attr(tres, "index")[[2]],
+                           indiv=attr(tres, "index")[[1]])
+    ## tabulate which obs in time for each ind are !na
+    presence.tab <- table(data.res)
+    ## calculate t.ij
+    t.ij <- crossprod(presence.tab)
+    
+    ## input check
+    if (!is.null(w)) {
+        dims.w <- dim(w)
+        if(dims.w[1] != n || dims.w[2] != n)
+            stop(paste0("matrix 'w' describing proximity of individuals has wrong dimensions: ",
+                        "should be ", n, " x ", n,
+                        " (no. of individuals) but is ",
+                        dims.w[1], " x ", dims.w[2]))
+    }
+   
+  ## 'til here as in panelCDtest. Now for the selection of
+  ## rho's based on (p-th order) contiguity
+    
+  ## make proximity matrix up to the p-th lag
+  ## TODO: outsource this to wlag()
+
+  if(order>1) {
+      ## if it is a matrix make it an nb object
+      if(is.matrix(w)) w<-mat2listw(w)
+      ## make lagged proximity matrix
+      w<-wlag(w$neighbours,maxlag=order)
+  } else {
+      ## if order=1, just make w<-listw2mat(w) if it is
+      ## a listw obj.
+      if("listw"%in%class(w)) {
+          w<-listw2mat(w)
+      }
+  }
+
+    ## until here all the necessary rho_ij's have been
+    ## calculated;
+    ## the relevant W matrix has been determined;
+    ## now for the randomization of W and collection of the
+    ## distr. of CD(p)s
+
+    ##### begin: random W snippet (from MCWtest.R) #####
+    ##### (until end) ##################################
+
+    mcwdist<-rep(NA,replications+1)
+    
+    ## calc. ''true'' CD(p) statistic:
+
+    ## make (binary) selector matrix based on the contiguity
+    ## matrix w and extracting elements corresponding to ones
+    ## in the lower triangle excluding the diagonal
+
+    ## transform in logicals (0=FALSE, else=TRUE: no need to
+    ## worry about row-std. matrices)
+    selector.mat<-matrix(as.logical(w),ncol=n)
+    ## set upper tri and diagonal to false
+    selector.mat[upper.tri(selector.mat,diag=TRUE)]<-FALSE
+
+    ## number of elements in selector.mat
+    elem.num<-sum(selector.mat)
+
+    ## set components not dependent on w once for all,
+    ## depending on test type
+
+    switch(test,
+           cd = {
+               ## Pesaran statistic for (p-th order) local
+               ## cross-sectional dependence
+               rad.n<-sqrt(1/elem.num)
+               t.rhos<-sqrt(t.ij)*rho
+               testname<-"CD test"
+           },
+           sclm = {
+               ## Scaled LM statistic for (p-th order) local
+               ## cross-sectional dependence
+               rad.n<-sqrt(1/(2*elem.num))
+               t.rhos<-t.ij*rho^2-1
+               testname<-"NLM test"
+           },
+           rho = {
+               rad.n <- 1/elem.num
+               t.rhos <- rho
+               testname <- "Average correlation coefficient"
+           }
+           )
+
+    cdpstat <- rad.n*sum(t.rhos[selector.mat])
+
+    ## calc. ''randomized'' CD(p) stats
+    ## reuse t.rhos, rad.n, independent from w's
+
+    if(!is.null(seed)) set.seed(seed)
+    rwfun <- function(x) rad.n*sum(t.rhos[brandW(w)])
+    lfun <- if(mc==1) {
+                lapply}
+            else {
+                    function(x, FUN, ...) {
+                        mclapply(x, FUN, mc.cores=mc, ...)
+                    }
+                }
+    mcwlist <- lfun(seq_along(1:replications), FUN=rwfun)
+    mcwdist <- c(cdpstat, unlist(mcwlist))
+    
+    ## how many random draws are more extreme?
+    switch(alternative,
+           twosided={quasi.pval <- 2 * min(sum(mcwdist < cdpstat)/(replications+1), sum(mcwdist >= cdpstat)/(replications+1))
+           },
+           onesided={
+               quasi.pval <- sum(mcwdist >= cdpstat)/(replications+1)
+           },
+           symmetric={
+               quasi.pval <- sum(abs(mcwdist) >= abs(cdpstat))/(replications+1)
+           })
+    myres <- quasi.pval
+    
+    ## setup htest object
+    RVAL <- list(statistic = NULL, parameter = NULL,
+                 method = paste("Randomized W test for spatial correlation of order", order),  #, " \n\n Based on:", testname), 
+                 alternative = alternative,
+                 p.value = quasi.pval,
+                 data.name = form)
+    class(RVAL) <- "htest"
+    return(RVAL)
+}
+
+## utilities
+
+wlag<-function(x,maxlag) {
+    ## define (incremental!) lag function for w
+    ## returns the nb list of all neighbours up to order=maxlag
+    ## standalone version under wlag.R
+
+    n<-length(x)
+    mynb<-nblag(x,maxlag=maxlag)
+    
+    mytot<-vector("list",n)
+    
+    for(i in 1:n) {
+        mytot[[i]]<-mynb[[1]][[i]]
+        for(j in 2:maxlag) mytot[[i]] <- c(mytot[[i]],
+                                           mynb[[j]][[i]])
+        ## reorder
+        mytot[[i]]<-mytot[[i]][order(mytot[[i]])]
+    }
+    lagmat<-matrix(0,ncol=n,nrow=n)
+    for(i in 1:n) lagmat[i,mytot[[i]]]<-1
+    return(lagmat)
+}
+
+brandW<-function(w) {
+  ## randomize a contiguity matrix, boolean output matrix
+  ## argument: a contiguity matrix (doesn't matter whether
+  ## standardized or not)
+
+  n<-dim(w)[[1]]
+  brW<-matrix(FALSE,ncol=n,nrow=n)
+
+  ## calc. filling rate
+  ## (sum(ones)/(total cells-diagonal) must be
+  ## integer and even, by construction)
+
+  ## make everything binary:
+  booleW<-as.logical(w)
+  ## calc. (diagonal is always excluded)
+  fillW<-sum(booleW)/2
+
+  ## fill in randomly the lower part of matrix with
+  ## fillW 'TRUE's
+  smplW<-sample(n*(n-1)/2,fillW)   # w/o replacement
+  brW[lower.tri(brW)][smplW]<-TRUE
+
+  ## don't need to mirror upper triangle any more
+  ## (only lower tri is used in CD(p) test
+  return(brW)
+ }
+
+preshape <- function(x, na.rm=TRUE, ...) {
+    ## to be substituted by importing plm:::preshape
+    
+    ## reshapes pseries, e.g. of residuals from a
+    ## panelmodel, in wide form
+    inames <- names(attr(x, "index"))
+    mres <- reshape(cbind(as.vector(x), attr(x, "index")),
+                    direction="wide",
+                    timevar=inames[2], idvar=inames[1])
+    ## drop ind in first column
+    mres <- mres[,-1]
+    ## reorder columns (may be scrambled depending on first
+    ## available obs in unbalanced panels)
+    mres <- mres[, order(dimnames(mres)[[2]])]
+    ## if requested, drop columns (time periods) with NAs
+    if(na.rm) {
+        rmc <- which(is.na(apply(mres, 2, sum)))
+        if(sum(rmc)>0) mres <- mres[,-rmc]
+    }
+    return(mres)
+}
+

Added: pkg/man/rwtest.Rd
===================================================================
--- pkg/man/rwtest.Rd	                        (rev 0)
+++ pkg/man/rwtest.Rd	2016-11-17 17:55:24 UTC (rev 210)
@@ -0,0 +1,140 @@
+\name{rwtest}
+\alias{rwtest}
+\alias{rwtest.formula}
+\alias{rwtest.panelmodel}
+\alias{rwtest.pseries}
+\title{Randomization-based test of spatial dependence for panel models}
+
+\description{
+ Randomization-based test of spatial dependence for panel models, robust
+ to global dependence induced by common factors and to persistence
+ (serial correlation) in the data
+}
+
+\usage{
+rwtest(x, \dots)
+\method{rwtest}{formula}(x, data, w, index = NULL, model = NULL,
+                         replications = 99, seed=NULL, order=1,
+                         mc=1, test = c("rho", "cd", "sclm"),
+                         alternative=c("twosided", "onesided",
+                                       "symmetric"), \dots)
+\method{rwtest}{panelmodel}(x, w, replications = 99, seed=NULL,
+                         order=1, mc=1,
+                         test = c("rho", "cd", "sclm"),
+                         alternative=c("twosided", "onesided",
+                                       "symmetric"), \dots)
+\method{rwtest}{pseries}(x, w, replications = 99, seed=NULL,
+                         order=1, mc=1,
+                         test = c("rho", "cd", "sclm"),
+                         alternative=c("twosided", "onesided",
+                                       "symmetric"), \dots)
+                          
+}
+
+\arguments{
+  \item{x}{an object of class \code{formula}, \code{panelmodel}, or \code{pseries} 
+          (depending on the respective interface) describing the model to be tested}
+  \item{data}{a \code{data.frame}}
+  \item{w}{a \code{n x n} \code{matrix} describing proximity between
+    individuals, with \eqn{w_ij = a} where \eqn{a} is any number such
+    that \code{as.logical(a)==TRUE},
+    if \eqn{i,j} are neighbours, \eqn{0} or any number \eqn{b} such that
+    \code{as.logical(b)==FALSE} elsewhere. Only the lower triangluar part
+    (without diagonal) of \code{w} after coercing by \code{as.logical()}
+    is evaluated for neighbouring information (but \code{w} can be
+    symmetric). See also \bold{Details} and \bold{Examples}.}
+ \item{index}{an optional numerical index, in case \code{data} has to be
+  formatted by \code{plm.data}} 
+ \item{model}{an optional character string indicating which type of
+  model to estimate; 
+   if left to \code{NULL}, the original heterogeneous specification of
+   Pesaran is used}
+ \item{replications}{the number of Monte Carlo randomizations of the
+   neighbourhood matrix (default: 99),}
+ \item{seed}{the optional random seed,}
+ \item{order}{the order of neighbourhood to test for,}
+ \item{mc}{the number of parallel threads to execute; defaults to 1
+  (serial execution); is limited to the number of execution cores
+  actually available.}
+ \item{test}{the type of test statistic to be returned. One of 
+   \itemize{
+    \item \code{"rho"} for the average correlation coefficient,
+    \item \code{"cd"} for Pesaran's CD statistic, or
+    \item \code{"sclm"} for the scaled version of Breusch and Pagan's LM
+    statistic,}}
+ \item{alternative}{the alternative hypothesis for the test, defaulting
+  to (asymmetric) twosided,}
+ \item{\dots}{further arguments to be passed on to \code{plm}, such as
+  e.g. \code{effect} or \code{random.method}} 
+}
+
+\value{
+An object of class \code{"htest"}.
+}
+
+\details{
+  This test is meant as a generalization of Pesaran's spatial dependence
+  test "CD(p)" for robustness against global dependence (perhaps of the
+  factor type) and persistence in the data, both of which the original
+  test does not tolerate. 
+
+  The procedure can be applied to model residuals as well as to
+  individual \code{pseries}.
+  See the comments in \code{pcdtest} as for the different methods.
+  
+  Space is defined supplying a proximity matrix (elements coercible to
+  \code{logical}) with argument \code{w} which provides information on
+  whether any pair of individuals are neighbours or not. If
+  \code{order=1}, only first-order neighbouring pairs will be used in
+  computing the test; else, \code{w} will be transformed in the
+  neighbourhood matrix of the appropriate order. The matrix need not be
+  binary, so commonly used ``row--standardized'' matrices can be employed
+  as well. \code{nb} objects from \pkg{spdep} must instead be transformed
+  into matrices by \pkg{spdep}'s function \code{nb2mat} before using.
+
+  Notice that the \code{"rho"} and \code{"cd"} tests are permutationally
+  equivalent. 
+ 
+  The test is suitable also for unbalanced panels.
+ 
+  The test on a \code{pseries} is the same as a test on a pooled
+  regression model of that variable on a constant,
+  i.e. \code{rwtest(some_pseries)} is equivalent to
+  \code{rwtest(plm(some_var ~ 1, data = some_pdata.frame, model =
+  "pooling")} and also equivalent to \code{rwtest(some_var ~ 1, data =
+  some_data)}, where \code{some_var} is the variable name in the data
+  which corresponds to \code{some_pseries}.
+}
+
+\references{
+ Millo, G. (2016), A simple randomization test for spatial dependence in
+ the presence of common factors and serial correlation,
+ \emph{(unpublished)}, \bold{xx}(x), pp. xxx--xxx. 
+ Pesaran, M.H. (2004), General Diagnostic Tests for Cross Section
+ Dependence in Panels, \emph{CESifo} Working Paper 1229. 
+ Pesaran, M.H. (2015), Testing Weak Cross--Sectional Dependence in Large
+ Panels, \emph{Econometric Reviews}, \bold{34}(6-10), pp. 1089--1117. 
+}
+
+\examples{
+data(Produc, package = "plm")
+data(usaww)
+fm <- log(gsp) ~ log(pcap) + log(pc) + log(emp) + unemp
+## test on heterogeneous model (separate time series regressions)
+rwtest(fm, data = Produc, w=usaww, index = c("state", "year"))
+
+## test on two-way fixed effects homogeneous model
+rwtest(fm, data = Produc, w=usaww, index = c("state", "year"),
+       model = "within", effect = "twoways")
+
+## test on panelmodel object
+library(plm)
+g <- plm(fm, data = Produc)
+rwtest(g, w=usaww)
+
+## test on pseries, higher-order neighbourhood
+pprod <- pdata.frame(Produc)
+rwtest(pprod$gsp, w=usaww, order=3)
+}
+
+\keyword{htest}

Modified: pkg/man/spml.Rd
===================================================================
--- pkg/man/spml.Rd	2016-02-13 09:44:03 UTC (rev 209)
+++ pkg/man/spml.Rd	2016-11-17 17:55:24 UTC (rev 210)
@@ -94,6 +94,7 @@
 respatlag <- spml(fm, data = Produc, listw = mat2listw(usaww),
                    model="random", spatial.error="none", lag=TRUE)
 summary(respatlag)
+## calculate impact measures
 impac1 <- impacts(respatlag, listw = mat2listw(usaww, style = "W"), time = 17)
 summary(impac1, zstats=TRUE, short=TRUE)
 }