[Gmm-commits] r158 - in pkg: causalGel causalGel/R causalGel/man causalGel/vignettes gmm4/R gmm4/man

noreply at r-forge.r-project.org noreply at r-forge.r-project.org
Tue Dec 3 23:40:22 CET 2019


Author: chaussep
Date: 2019-12-03 23:40:22 +0100 (Tue, 03 Dec 2019)
New Revision: 158

Added:
   pkg/causalGel/man/causalGEL.Rd
   pkg/causalGel/man/checkConv-methods.Rd
Modified:
   pkg/causalGel/NAMESPACE
   pkg/causalGel/R/allClasses.R
   pkg/causalGel/R/causalGel.R
   pkg/causalGel/R/causalMethods.R
   pkg/causalGel/R/causalfitMethods.R
   pkg/causalGel/man/subsetting.Rd
   pkg/causalGel/vignettes/causalGel.Rnw
   pkg/causalGel/vignettes/causalGel.pdf
   pkg/gmm4/R/allClasses.R
   pkg/gmm4/man/union-class.Rd
Log:
some change but mostly added stuff to the vignette

Modified: pkg/causalGel/NAMESPACE
===================================================================
--- pkg/causalGel/NAMESPACE	2019-11-22 22:54:38 UTC (rev 157)
+++ pkg/causalGel/NAMESPACE	2019-12-03 22:40:22 UTC (rev 158)
@@ -12,9 +12,9 @@
 
 exportClasses("causalData", "causalGel", "causalGelfit")
 
-exportMethods("causalMomFct")
+exportMethods("causalMomFct", "checkConv")
 
-export("causalModel")
+export("causalModel", "causalGEL")
 
 
 

Modified: pkg/causalGel/R/allClasses.R
===================================================================
--- pkg/causalGel/R/allClasses.R	2019-11-22 22:54:38 UTC (rev 157)
+++ pkg/causalGel/R/allClasses.R	2019-12-03 22:40:22 UTC (rev 158)
@@ -5,7 +5,7 @@
 ## Causal Model Classes
 
 setClass("causalGel", contains="functionGel")
- 
+
 setClass("causalData", representation(momType="character",
                                       balCov="character",
                                       balMom="numericORNULL",
@@ -15,3 +15,6 @@
 
 setClass("causalGelfit", contains="gelfit")
 
+
+## converters
+

Modified: pkg/causalGel/R/causalGel.R
===================================================================
--- pkg/causalGel/R/causalGel.R	2019-11-22 22:54:38 UTC (rev 157)
+++ pkg/causalGel/R/causalGel.R	2019-12-03 22:40:22 UTC (rev 158)
@@ -67,4 +67,41 @@
     new("causalGel", mod)
 }
 
+causalGEL <- function(g, balm, data, theta0=NULL,
+                   momType=c("ACE","ACT","ACC", "uncondBal","fixedMom"),
+                   popMom = NULL, rhoFct=NULL,ACTmom=1L, 
+                   gelType = c("EL", "ET", "EEL", "ETEL", "HD", "ETHD","REEL"),
+                   initTheta = c("gmm","theta0"), getVcov=FALSE,
+                   lambda0=NULL, 
+                   lamSlv=NULL, coefSlv= c("optim","nlminb","constrOptim"),
+                   lControl=list(), tControl=list())
+{
+    Call <- try(match.call(call=sys.call(sys.parent())), silent=TRUE)
+    if (class(Call)=="try-error")
+        Call <- NULL              
+    momType <- match.arg(momType)
+    initTheta <- match.arg(initTheta)
+    coefSlv <- match.arg(coefSlv)
+    gelType <- match.arg(gelType)
+    if (initTheta=="theta0" & is.null(theta0))
+        stop("theta0 is required when initTheta='theta0'")
 
+    model <- causalModel(g, balm, data, theta0, momType, popMom, rhoFct, ACTmom,
+                       gelType)
+    
+    if (initTheta == "theta0")
+    {
+        initTheta <- "modelTheta0"
+        names(theta0) = model at parNames
+    } else {
+        theta0 <- NULL
+    }
+    fit <- modelFit(object=model, initTheta=initTheta, theta0=theta0,
+                    lambda0=lambda0, vcov=getVcov, coefSlv=coefSlv,
+                    lamSlv=lamSlv, tControl=tControl, lControl=lControl)
+    fit at call <- Call
+    fit    
+}
+
+
+

Modified: pkg/causalGel/R/causalMethods.R
===================================================================
--- pkg/causalGel/R/causalMethods.R	2019-11-22 22:54:38 UTC (rev 157)
+++ pkg/causalGel/R/causalMethods.R	2019-12-03 22:40:22 UTC (rev 158)
@@ -39,7 +39,7 @@
               G[(k+1):ntet, (k+1):ntet] <- -sum(impProb)*diag(k-1)
               uK <- colSums(impProb*X[,-1,drop=FALSE])
               G[(2*k):q, (k+1):ntet] <- -kronecker(diag(k-1), uK)
-              if (dat at momType != "uncondBal" |  dat at momType=="fixedMon")
+              if (dat at momType != "uncondBal" |  dat at momType=="fixedMom")
                   {
                       G <- rbind(G, matrix(0, ncol(X)-1, ntet))
                       if (augmented)
@@ -94,8 +94,8 @@
 
 setMethod("modelFit", signature("causalGel"), valueClass="causalGelfit", 
           definition = function(object, gelType=NULL, rhoFct=NULL,
-                                initTheta=c("gmm", "theta0"), start.tet=NULL,
-                                start.lam=NULL, vcov=FALSE, ...)
+                                initTheta=c("gmm", "modelTheta0"), theta0=NULL,
+                                lambda0=NULL, vcov=FALSE, ...)
           {
               Call <- try(match.call(call=sys.call(sys.parent())), silent=TRUE)
               if (class(Call)=="try-error")
@@ -103,6 +103,7 @@
               res <- callNextMethod()
               res at call <- Call
               obj <- new("causalGelfit", res)
+              obj
           })
 
 ## model.matrix and modelResponse
@@ -196,16 +197,32 @@
               x
           })
 
-setMethod("[", c("causalGel", "numeric", "numeric"),
+setMethod("[", c("causalGel", "numeric", "numericORlogical"),
           function(x, i, j){
-              x <- x[j]
-              subset(x, i)
+              x <- x[i]
+              subset(x, j)
           })
 
-setMethod("[", c("causalGel", "missing", "numeric"),
+setMethod("[", c("causalGel", "missing", "numericORlogical"),
           function(x, i, j){
-              x[j]
+              subset(x, j)
           })
 
+setMethod("[", c("causalGelfit", "numeric", "missing"),
+          function(x, i, j){
+              mod <- x at model[i]
+              update(x, newModel=mod)
+          })
 
+setMethod("[", c("causalGelfit", "numeric", "numericORlogical"),
+          function(x, i, j){
+              mod <- x at model[i,j]
+              update(x, newModel=mod)
+          })
 
+setMethod("[", c("causalGelfit", "missing", "numericORlogical"),
+          function(x, i, j){
+              mod <- x at model[,j]
+              update(x, newModel=mod)
+          })
+

Modified: pkg/causalGel/R/causalfitMethods.R
===================================================================
--- pkg/causalGel/R/causalfitMethods.R	2019-11-22 22:54:38 UTC (rev 157)
+++ pkg/causalGel/R/causalfitMethods.R	2019-12-03 22:40:22 UTC (rev 158)
@@ -126,6 +126,58 @@
                   allV$vcov_Allpar <-  V[1:(k+addPar), 1:(k+addPar)]
                   allV$vcov_Alllambda <- V[-(1:(k+addPar)), -(1:(k+addPar))]
               }
-              allV$gtR <- qr.R(res$qrGt)
               allV              
           })
+
+
+## checkConv
+
+setGeneric("checkConv", function(object, ...) standardGeneric("checkConv"))
+
+setMethod("checkConv", "causalGelfit",
+          function(object, tolConv=1e-4, verbose=TRUE, ...)
+          {
+              spec <- modelDims(object at model)
+              momType <- spec$momType
+              m <-  spec$balMom
+              ACTmom <- spec$ACTmom
+              conv <- c(Lambda=object at lconvergence==0, Coef= object at convergence == 0)
+              x <- model.matrix(object at model, "balancingCov")
+              z <- model.matrix(object at model)[,-1, drop=FALSE]
+              nZ <- ncol(z)
+              pt <- getImpProb(object, ...)$pt
+              pt1 <- lapply(1:nZ, function(i) pt[z[,i]==1]/sum(pt[z[,i]==1]))
+              pt0 <- pt[rowSums(z)==0]/sum(pt[rowSums(z)==0])
+              m0 <- colSums(x[rowSums(z)==0,,drop=FALSE]*pt0)
+              m1 <- sapply(1:nZ, function(i) colSums(x[z[,i]==1,,drop=FALSE]*pt1[[i]]))
+              mAll <- cbind(m0, m1)
+              n0 <- paste(paste(colnames(z),collapse="=", sep=""),"=0",sep="")
+              colnames(mAll) <- c(n0, paste(colnames(z),"=1",sep=""))
+              chk <- all(abs(mAll-m)<tolConv)
+              conv <- c(conv, Balance=all(chk))
+              momType <- switch(momType,
+                                uncondBal = "Unconditional balancing",
+                                ACT = "Causal effect on the treated",
+                                ACE = "Average causal effect",
+                                ACC = "Causal effect on the control",
+                                fixedMom = "Balancing based on fixed Moments")
+              if (momType == "ACT" & ACTmom > 1)
+                  momType <- paste(momType, "(treatment group ",
+                                   ACTmom, ")")
+              
+              if (verbose)
+              {
+                  cat("Convergence details of the Causal estimation\n")
+                  cat("********************************************\n")
+                  cat(momType,"\n\n")
+                  cat("Convergence of the Lambdas: ", conv["Lambda"], "\n",sep="")
+                  cat("Convergence of the Coefficients: ", conv["Coef"], "\n",sep="")
+                  cat("Achieved moment balancing: ", conv["Balance"], "\n\n",sep="")
+                  cat("Moments for each group:\n")
+                  print.default(mAll, quote=FALSE, right=TRUE)
+                  invisible()
+              } else {
+                  return(list(conv=conv, moments=mAll))
+              }
+          })
+

Added: pkg/causalGel/man/causalGEL.Rd
===================================================================
--- pkg/causalGel/man/causalGEL.Rd	                        (rev 0)
+++ pkg/causalGel/man/causalGEL.Rd	2019-12-03 22:40:22 UTC (rev 158)
@@ -0,0 +1,107 @@
+\name{causalGEL}
+
+\alias{causalGEL}
+	
+\title{Causal inference using balancing methods}
+
+\description{
+It fit a causality model using balancing based on GEL methods. It
+creates  an object of class \code{"causalGelfit"}.
+}
+\usage{
+
+causalGEL(g, balm, data, theta0=NULL,
+          momType=c("ACE","ACT","ACC", "uncondBal","fixedMom"),
+          popMom = NULL, rhoFct=NULL,ACTmom=1L, 
+          gelType = c("EL", "ET", "EEL", "ETEL", "HD", "ETHD","REEL"),
+          initTheta = c("gmm","theta0"), getVcov=FALSE,
+          lambda0=NULL, 
+          lamSlv=NULL, coefSlv= c("optim","nlminb","constrOptim"),
+          lControl=list(), tControl=list())
+}
+\arguments{
+
+  \item{g}{A formula that links the outcome variable to the treatment
+    indicators.}
+
+  \item{balm}{A formula or a matrix with balancing covariates}
+
+  \item{data}{A data.frame or a matrix with column names.}
+
+  \item{theta0}{A vector of starting values (optional). If not provided,
+    the least squares method is use to generate them}
+
+  \item{momType}{How the moments of the covariates should be
+    balanced. By default, it is balanced using the sample mean of the
+    covariates, which corresponds to the ACE. Alternatively, to the
+    sample moments of the treated group (ACT), the control group (ACC),
+    or to a known population mean. The option 'uncondBal' means that it
+    is unconditionally balanced.}
+        
+  \item{popMom}{A vector of population moments to use for balancing. It
+    can be used if those moments are available from a census, for
+    example. When available, it greatly improves efficiency.}
+
+  \item{rhoFct}{An optional function that return \eqn{\rho(v)}. This is
+    for users who want a GEL model that is not built in the package. The
+    four arguments of the function must be \code{"gmat"}, the matrix of
+    moments, \code{"lambda"}, the vector of Lagrange multipliers,
+    \code{"derive"}, which specify the order of derivative to return, and
+    \code{k} a numeric scale factor required for time series and kernel
+    smoothed moments.}
+
+  \item{ACTmom}{When \code{momType} is set to 'ACT', that integer
+    indicates which treated group to use to balance the covariates.} 
+  
+  \item{gelType}{"EL" for empirical likelihood, "ET" for exponential tilting,
+    "EEL" for Euclidean empirical likelihood, "ETEL" for exponentially
+    tilted empirical likelihood of Schennach(2007), "HD" for Hellinger
+    Distance of Kitamura-Otsu-Evdokimov (2013), and "ETHD" for the
+    exponentially tilted Hellinger distance of Antoine-Dovonon
+    (2015). "REEL" is a restricted version of "EEL" in which the
+    probabilities are bounded below by zero. In that case, an analytical
+    Kuhn-Tucker method is used to find the solution.}
+
+  \item{initTheta}{Method to obtain the starting values for the
+    coefficient vector. By default the GMM estimate with identity matrix
+    is used. The second argument means that the theta0 of the
+    object, if any, should be used.}
+  
+  \item{lambda0}{Manual starting values for the Lagrange
+    multiplier. By default, it is a vector of zeros.}
+
+  \item{getVcov}{Should the method computes the covariance matrices of the
+              coefficients and Lagrange multipliers.}
+
+  \item{lamSlv}{An alternative solver for the Lagrange multiplier. By
+    default, either \code{\link{Wu_lam}}, \code{\link{EEL_lam}},
+    \code{\link{REEL_lam}} or \code{\link{getLambda}} is
+    used.}
+  
+  \item{coefSlv}{Minimization solver for the coefficient vector.}
+  
+  \item{lControl}{A list of controls for the Lagrange multiplier
+    algorithm.}
+  
+  \item{tControl}{A list of controls for the coefficient algorithm.}
+	    
+}
+
+\value{
+'causalGEL' returns an object of classesof \code{"causalGelfit"}.
+ }
+
+\examples{
+data(nsw)
+
+balm <- ~age+ed+black+hisp:married+nodeg+re75+I(re75^2)
+g <- re78~treat
+           
+fit1 <-  causalGEL(g, balm, nsw, gelType="ET")
+fit1
+
+fit2 <-  causalGEL(g, balm, nsw, gelType="EL")
+fit2
+
+}
+

Added: pkg/causalGel/man/checkConv-methods.Rd
===================================================================
--- pkg/causalGel/man/checkConv-methods.Rd	                        (rev 0)
+++ pkg/causalGel/man/checkConv-methods.Rd	2019-12-03 22:40:22 UTC (rev 158)
@@ -0,0 +1,39 @@
+\name{checkConv-methods}
+\docType{methods}
+
+\alias{checkConv}
+\alias{checkConv-methods}
+\alias{checkConv,causalGelfit-method}
+
+\title{Methods that returns information about convergence.}
+\description{
+Returns convergence codes and compares moments for the treated and the
+control groups.
+}
+\usage{
+
+\S4method{checkConv}{causalGelfit}(object, tolConv=1e-4, verbose=TRUE, \dots)
+
+}
+\arguments{
+  \item{object}{An object of class \code{"causalGelfit"}}
+  \item{tolConv}{Tolerance to decide if balancing is achieved. }
+  \item{verbose}{If FALSE nothinf is printed.}
+   \item{\dots}{Arguments to pass to other methods.}
+ }
+
+\examples{
+data(nsw)
+
+balm <- ~age+ed+black+hisp:married+nodeg+re75+I(re75^2)
+g <- re78~treat
+           
+model <- causalModel(g, balm, nsw)
+fit <- modelFit(model)
+checkConv(fit)
+
+}
+
+  
+\keyword{methods}
+\keyword{Balancing moments}

Modified: pkg/causalGel/man/subsetting.Rd
===================================================================
--- pkg/causalGel/man/subsetting.Rd	2019-11-22 22:54:38 UTC (rev 157)
+++ pkg/causalGel/man/subsetting.Rd	2019-12-03 22:40:22 UTC (rev 158)
@@ -1,9 +1,13 @@
 \name{[-causalGel}
 \docType{methods}
-\alias{[,causalGel,missing,numeric-method}
+\alias{[,causalGel,missing,numericORlogical-method}
 \alias{[,causalGel,numeric,missing-method}
-\alias{[,causalGel,numeric,numeric-method}
+\alias{[,causalGel,numeric,numericORlogical-method}
+\alias{[,causalGelfit,missing,numericORlogical-method}
+\alias{[,causalGelfit,numeric,missing-method}
+\alias{[,causalGelfit,numeric,numericORlogical-method}
 
+
 \title{Subsetting methods}
 \description{
 Different subsetting methods for S4 class objects of the package. The
@@ -13,19 +17,50 @@
 \section{Methods}{
 \describe{
 
+\item{\code{signature(x = "causalGel", i = "missing", j = "numericORlogical")}}{
+  Subsets observations.
+}
+  
 \item{\code{signature(x = "causalGel", i = "numeric", j = "missing")}}{
-  Selects observations
+  Selects balancing covatriates.
 }
 
-\item{\code{signature(x = "causalGel", i = "missing", j = "numeric")}}{
-  Selects balancing moments
+\item{\code{signature(x = "causalGel", i = "numeric", j = "numericORlogical")}}{
+  Selects balancing covariates and observations.
 }
 
-\item{\code{signature(x = "causalGel", i = "numeric", j = "numeric")}}{
-  \code{i} selects the observations and "j" selects the balancing
-  moments.
+\item{\code{signature(x = "causalGelfit", i = "missing", j = "numericORlogical")}}{
+  Subsets observations and refit the model.
 }
+  
+\item{\code{signature(x = "causalGelfit", i = "numeric", j = "missing")}}{
+  Selects balancing covatriates and refit the model.
+}
+
+\item{\code{signature(x = "causalGelfit", i = "numeric", j = "numericORlogical")}}{
+  Selects balancing covariates and observations and refit the model.
+}
+
+
+
 }}
 
+\examples{
+data(nsw)
+
+balm <- ~age+ed+black+hisp:married+nodeg+re75+I(re75^2)
+g <- re78~treat
+         
+model <- causalModel(g, balm, nsw)           
+model[1:5, 1:500]
+
+fit <-  modelFit(model)
+fit[1:5,1:500]
+
+}
+
+
+
 \keyword{methods}
 \keyword{subsetting}
+

Modified: pkg/causalGel/vignettes/causalGel.Rnw
===================================================================
--- pkg/causalGel/vignettes/causalGel.Rnw	2019-11-22 22:54:38 UTC (rev 157)
+++ pkg/causalGel/vignettes/causalGel.Rnw	2019-12-03 22:40:22 UTC (rev 158)
@@ -1,25 +1,24 @@
-\documentclass[11pt,letterpaper]{article}
+\documentclass[12pt]{article}
 
+\usepackage{graphicx}
 \usepackage{amsthm}
-\usepackage[hmargin=2cm,vmargin=2.5cm]{geometry}
+\usepackage[hmargin=1in,vmargin=1in]{geometry}
 \usepackage[utf8x]{inputenc}
+\usepackage[active]{srcltx}
 \usepackage{amsmath}
+\usepackage{amssymb}
 \usepackage{verbatim}
 \usepackage[round]{natbib}
 \usepackage{amsfonts}
 \usepackage{amssymb}
 \usepackage{graphicx}
+\usepackage{multirow}
+\usepackage{rotating}
+\usepackage{footmisc}
+\usepackage{xspace}
+\usepackage{soul}
 \usepackage{mathtools}
-\usepackage{hyperref}
-\hypersetup{
-  colorlinks,
-  citecolor=black,
-  filecolor=black,
-  linkcolor=black,
-  urlcolor=black
-}
 
-
 \newtheorem{theorem}{Theorem}
 \newtheorem{col}{Corollary}
 \newtheorem{lem}{Lemma}
@@ -26,11 +25,15 @@
 \newtheorem{ass}{Assumption}
 
 \DeclareMathOperator{\Ex}{E}
+\DeclareMathOperator{\Diag}{Diag}
 \newcommand{\ellhat}{\hat \ell}
+
 \newcommand\independent{\protect\mathpalette{\protect\independenT}{\perp}}
 \def\independenT#1#2{\mathrel{\rlap{$#1#2$}\mkern2mu{#1#2}}}
 
 % additional commands
+\newcommand{\gtilde}{\tilde g}
+\newcommand{\ghat}{\hat g}
 \newcommand{\GEL}{GEL\xspace}
 \newcommand{\as}{\ \text{a.s.}\xspace}
 \newcommand{\HD}{HD\xspace}
@@ -38,7 +41,7 @@
 \newcommand{\GMM}{GMM\xspace}
 \newcommand{\LRT}{LRT\xspace}
 \newcommand{\EL}{EL\xspace}
-\newcommand{\EUL}{EUL\xspace}
+\newcommand{\EEL}{EEL\xspace}
 \newcommand{\ELB}{ELB\xspace}
 \newcommand{\MCAR}{MCAR\xspace}
 \newcommand{\ET}{ET\xspace}
@@ -49,6 +52,8 @@
 \newcommand{\ACT}{ACT\xspace}
 \newcommand{\ACN}{ACN\xspace}
 \newcommand{\PFC}{PFC\xspace}
+\newcommand{\CH}{CH\xspace}
+
 \newcommand{\ate}{\tau_{\text{\ATE}}}
 \newcommand{\atehat}{\hat\tau_{\text{\ATE}}}
 \newcommand{\ace}{\tau_{\text{\ACE}}}
@@ -79,17 +84,26 @@
 \newcommand{\Ztilde}{\tilde Z}
 \newcommand{\Ytilde}{\tilde Y}
 \newcommand{\epsilontilde}{\tilde\epsilon}
+\newcommand{\tautrue}{\tau^0}
+\newcommand{\pitrue}{\pi^0}
+\newcommand{\pithat}{\hat \pi}
+\newcommand{\uhat}{\hat u}
+
+
 \DeclareMathOperator{\argmin}{argmin}
 \DeclareMathOperator{\argmax}{argmax}
 \DeclarePairedDelimiter{\abs}{\lvert}{\rvert}
 \DeclarePairedDelimiter{\norm}{\lVert}{\rVert}         % Norm
+
 \newcommand{\thetahat}{\hat \theta}
+\newcommand{\etahat}{\hat \eta}
+\newcommand{\etatrue}{\eta^0}
 \newcommand{\lambdahat}{\hat \lambda}
 \newcommand{\betahat}{\hat \beta}
 \newcommand{\vp}{p}
 \newcommand{\spv}{\mathbb{P}}
 \newcommand{\pdiv}{D}
-\newcommand{\vone}{e}
+\newcommand{\vone}{1_n}
 \newcommand{\tp}[1]{#1^T}
 \newcommand{\phat}{\hat p}
 \newcommand{\cvgindist}{\xrightarrow{\text{d}}}
@@ -112,6 +126,60 @@
     Methods with R}}
 \date{\today}
 
+<<extract, message=FALSE, warning=FALSE, echo=FALSE>>=
+library(texreg)
+setMethod("extract", "causalGelfit", 
+          function(model, includeSpecTest=FALSE, 
+                   specTest=c("LR","LM","J"), include.nobs=TRUE, 
+                   include.obj.fcn=TRUE, ...)
+              {
+                  specTest <- match.arg(specTest)
+                  s <- summary(model, ...)
+                  wspecTest <- grep(specTest, rownames(s at specTest@test))
+                  spec <- modelDims(model at model)
+                  coefs <- s at coef
+                  names <- rownames(coefs)
+                  coef <- coefs[, 1]
+                  se <- coefs[, 2]
+                  pval <- coefs[, 4]
+                  n <- model at model@n
+                  gof <- numeric()
+                  gof.names <- character()
+                  gof.decimal <- logical()
+                  if (includeSpecTest) {
+                      if (spec$k == spec$q)
+                          {
+                              obj.fcn <- NA
+                              obj.pv <- NA
+                          } else {
+                              obj.fcn <- s at specTest@test[wspecTest,1]
+                              obj.pv <- s at specTest@test[wspecTest,3]
+                          }
+                      gof <- c(gof, obj.fcn, obj.pv)                      
+                      gof.names <- c(gof.names, 
+                                     paste(specTest,"-test Statistics", sep=""),
+                                     paste(specTest,"-test p-value", sep=""))
+                      gof.decimal <- c(gof.decimal, TRUE, TRUE)
+                  }
+                  if (include.nobs == TRUE) {
+                      gof <- c(gof, n)
+                      gof.names <- c(gof.names, "Num.\\ obs.")
+                      gof.decimal <- c(gof.decimal, FALSE)
+                  }
+                  nbal <- length(model at model@X at balCov)
+                  gof.names <- c(gof.names, "Num. Bal. Cov.")
+                  gof <- c(gof, nbal)
+                  gof.decimal <- c(gof.decimal, FALSE)
+                  tr <- createTexreg(coef.names = names, coef = coef, 
+                                     se = se, pvalues = pval, 
+                                     gof.names = gof.names, gof = gof, 
+                                     gof.decimal = gof.decimal)
+                  return(tr)
+              })
+@ 
+
+
+
 \begin{document}
 
 \maketitle
@@ -144,9 +212,10 @@
 %\VignettePackage{causalGel}
 %\VignetteEngine{knitr::knitr}
 
-<<echo=FALSE>>=
+<<echo=FALSE, message=FALSE, warning=FALSE>>=
 library(knitr)
 opts_chunk$set(size='footnotesize')
+library(causalGel)
 @ 
 
 \newpage
@@ -155,9 +224,467 @@
 
 \section{Introduction}\label{sec:intro}
 
-We want to do causal inference using the GEL of \cite{newey-smith04}
+In the following, let $X \in \reals^q$ be a random $q$-vector of
+covariates, $Y(j)$ be the random (potential) outcome when the subject
+is exposed to the treatment $j$, where $0 \leq j \leq p$ and $j = 0$
+corresponds to the control treatment.  We consider $Z_0$ as an
+indicator for the control treatment, and in the case when the
+observational study does not have a well defined control treatment,
+then we treat $Z_0$ as the baseline treatment, i.e., the treatment
+with which we are interested to compare all other treatments. Let
+further $Z = (Z_j) \in \reals^{p}$ be a random $p$-vector of treatment
+indicators (other than the control treatment), with $Z_j = 1$ and $Z_k
+= 0$ for all $k \neq j$ if the subject receives the treatment $j$,
+where $j = 1 , \ldots, p$.  Since all individuals receive only one
+treatment, then $\sum_{j=0}^p Z_j = 1$, and we only observe $Y =
+\sum_{j = 0}^p Y(j) Z_j$. Note that we do not consider here the case
+of clinical trials with non-compliance where the subjects are assigned
+to treatments but they may receive other treatments, and thus, the
+available data consist of only the treatment received and the outcome
+under the treatment received in addition to the covariates for all
+subjects.
 
+\subsection{Randomized experiments}
+\label{sec:rand}
+
+Let $\theta_1 \in \reals$, $\theta_2 = (\theta_{2,1}, \ldots,
+\theta_{2,p}) \in \reals^{p}$, and $\theta_3 = (\theta_{3,1}, \ldots,
+\theta_{3,p}) \in \reals^{p}$ be defined as $\theta_1 = \Ex(Y(0))$,
+$\theta_{2,j} = [\Ex(Y(j))-\Ex(Y(0))]$, $\theta_{3,j} = \Ex(Z_j)$, $1 \leq j \leq
+p$, and let $\theta = (\theta_1, \theta_2, \theta_3) \in \reals^{2p +
+  1}$.  By the definition of conditional expectation,
+\begin{equation}
+    \label{eq:1}
+      \Ex(Y \given Z_j = 1) = \Ex(Y(j)) = \Ex(Y Z_j) / \Ex(Z_j) \,,
+      \quad 1 \leq j \leq p \,.
+  \end{equation}
+  Hence, $\Ex[(Y - \theta_1 - \tp{\theta_2} Z) Z_j] = 0$ for all $1 \leq j \leq
+  p$, and thus,
+  \begin{equation}
+    \label{eq:2}
+    \Ex\bigl((Y - \theta_1- \tp{\theta_2} Z) Z \bigr) = 0  \,.
+  \end{equation}
+  By the law of total expectation formula,
+  \begin{equation}
+    \label{eq:3}
+    \Ex(Y) = \sum_{j=0}^p \Ex(Y(j)) \Pr(Z_j = 1) \,. 
+  \end{equation}
+  Hence, 
+  \begin{equation}
+    \label{eq:4}
+    \Ex(Y - \theta_1 - \tp{\theta_2} Z) = 0 \,. 
+  \end{equation}
+  By the definition of $\theta_3$, $\theta_{3,j} = \Ex(Z_j)$ for $1
+  \leq j \leq p$; hence
+  \begin{equation}
+    \label{eq:5}
+    \Ex(Z - \theta_{3}) = 0 \,. 
+  \end{equation}
+  Note that $\Ex(Z_0) = 1 - \sum_{j=1}^p \theta_{3,j}$.
+
+  In randomized trials, $(Z_0,Z)\independent X$, which implies that
+  $\Ex[(Z_j-\theta_{3,j})u(X)]=0$ for $1\leq j\leq p$, where
+  $u(X)\in\reals^k$ is a $k$-vector of functions of $X$. Hence,
+  \begin{equation}
+    \label{eq:6}
+    \Ex[(Z - \theta_{3})\otimes u(X)]  = 0 \,. 
+  \end{equation}
+To illustrate what $u(x)$ can be, suppose $x = (x_1, x_2) \in
+\reals^2$, then we can define $u(x) = x \in \reals^2$, or $u(x) =
+(x_1, x_2, x_1 x_2) \in \reals^3$, or $u(x) = (x_1, x_2, x_1^2, x_2^2,
+x_1 x_2) \in \reals^5$. 
+
+Let $T = (X, Z, Y) \in \reals^{p+q+1}$ denote a generic random
+variable distributed according to a distribution on
+$\reals^{p+q+1}$\footnote{Notice that we omit $Z_0$ from $T$ because
+  its value is implied by $Z$ through $Z_0=1-\sum_{j=1}^p
+  Z_j$.}. Therefore, Equations \eqref{eq:2} and \eqref{eq:4} to
+\eqref{eq:6} imply that the parameter of interest $\thetatrue$
+satisfies the following moment conditions:
+\begin{equation}
+  \label{eq:7}
+  \Ex(g(T, \thetatrue)) = 0 \,, 
+\end{equation}
+where $\thetatrue = (\thetatrue_1, \thetatrue_2, \thetatrue_3) \in
+\reals^{2p + 1}$ and $g(t, \theta)$ is defined as
+\begin{equation}
+  \label{eq:8}
+  g(t; \theta) = 
+  \begin{pmatrix}
+    y - \theta_1 - \tp{\theta_2} z \\
+    (y - \theta_1 - \tp{\theta_2} z) z \\
+    z - \theta_{3} \\
+    (z - \theta_3) \otimes u(x) \\
+  \end{pmatrix} \,, \quad 
+  t = (x, z, y) \in \reals^{p + q + 1} \,.
+\end{equation}
+The \ACE of the treatment $j$ is given by $\tautrue_j
+= \thetatrue_{2, j}$, for $1 \leq j \leq p$.
+
+The package offers a way to estimate $\thetatrue$ using the
+generalized method of moments (\GEL). Using the primal form of \GEL,
+the estimator of $\thetatrue$ is defined as:
+\begin{equation}
+  \label{eq:9}
+  \thetahat = \argmin_{\theta \in \reals^{2p+1}} \min_{p \in \spv^n}
+  \Bigl\{  \pdiv_\gamma(\vp, n^{-1} \vone)
+  : \sum_{i=1}^n p_i g(T_i; \theta) = 0 \Bigr\}  \,,
+\end{equation}
+where 
+\begin{equation*}
+  \spv^n = \Bigl\{ \vp = (p_i) \in \reals^n :  
+  \sum_{i=1}^{n} p_{i}=1 \,,  p_{i} \geq 0 \Bigr\} \,,
+\end{equation*}
+ and $\pdiv_\gamma(\vp, n^{-1} \vone)$ is the power divergence discrepancy
+function \citep{newey-smith04}:
+\begin{equation*}
+  \pdiv_\gamma(\vp, n^{-1}\vone) = \sum_{i=1}^n 
+  \frac{(np_i)^{\gamma + 1} - 1}{n \gamma(\gamma + 1)} \,.
+\end{equation*}
+In particular, $\gamma = -1$ corresponds to the empirical likelihood
+(\EL), $\gamma = 0$ corresponds to the exponential tilting (\ET),
+$\gamma = 1$ corresponds to the Euclidean empirical likelihood (\EEL)
+estimator also known as the continuously updated GMM estimator
+  (\CUE), and $\gamma = -1/2$ corresponds to the Hellinger distance
+(\HD) used by \citet{kitamura-otsu-evdokimov13}. \citet{newey-smith04}
+present the \GEL method in its dual form, which is the following saddle
+point problem:
+\begin{equation}
+  \label{eq:10}
+  \thetahat = \argmin_{\theta \in \reals^{2p+1}}
+  \max_{\lambda \in \reals^{1+p(2+k)}} \sum_{i=1}^n 
+  \rho_{\gamma}\bigl(\tp{\lambda} g(T_i;\theta)\bigr) \,,
+\end{equation}
+where
+\begin{equation*}
+  \rho_\gamma(v) = - \frac{(1+\gamma v)^{(\gamma+1)/\gamma}}{\gamma + 1} \,.
+\end{equation*}
+In particular $\rho_{-1}(v) = \log(1-v)$ for \EL, $\rho_{0}(v) =
+-\exp(v)$ for \ET, $\rho_1(v) = -1/2 - v -v^2/2$ for \EEL, and
+$\rho_{-1/2}(v) = -2/(1-v/2)$ for \HD. Using the dual form, the
+estimated probability weights from the primal problem are defined as:
+\begin{equation}
+  \label{eq:11}
+  \phat_i(\theta,\lambda) = \frac{
+    \rho_{\gamma}'(\tp{\lambda} g(T_i; \theta))}
+  {\sum_{j=1}^n \rho_{\gamma}'(\tp{\lambda}g(T_i;\theta))} \,, 
+\end{equation}
+where $\rho_{\gamma}'(v)$ is the first order derivative of
+$\rho_{\gamma}(v)$. 
+
+\subsection{Observational studies}
+\label{sec:obs}
+When the treatment (group) assignment is not random, we can still use
+the \GEL as a weighting method.  \GEL is used as a way to re-weight
+the probability of each observation so that our sample is as if it had
+been generated by a randomized experiment. The parameter of interest
+$\thetatrue$ satisfies the following moment conditions:
+\begin{equation}
+  \label{eq:12}
+  \Ex_0(g(T, \thetatrue)) = 0 \,, 
+\end{equation}
+where $\thetatrue$ is as in Section \ref{sec:rand}, and $g(t,
+\theta)$ is defined as
+\begin{equation}
+  \label{eq:13}
+  g(t; \theta) = 
+  \begin{pmatrix}
+    y - \theta_1 - \tp{\theta_2} z \\
+    (y - \theta_1 - \tp{\theta_2} z) z \\
+    z - \theta_{3} \\
+    (z - \theta_3) \otimes u(x) \\
+    u(x) - u_0
+  \end{pmatrix} \,, \quad 
+  t = (x, z, y) \in \reals^{p + q + 1} \,,
+\end{equation}
+where $u_0$ is the expected value of $u(X)$ for a target
+population. Note that while the first three moment conditions (under
+$\Ex_0$) identify the parameters, the fourth moment condition makes
+$Z$ ``almost independent'' of $X$ as $k \to \infty$.  The last
+condition is what differentiates randomized experiments from
+observational studies. It imposes moments of $X$ to match the ones
+from a given target population. The choice of $u_0$ is driven by the
+type of causal effect we are interested in. We will present the
+different options in the next section.
+
+\section{Estimating the causal effect}\label{sec:estim}
+
+The package is based on the ``gmm4'' package \citep{gmm4}. As for the
+methods included in the package, there are two ways to proceed. We can
+create a model object and use the \textit{modelFit} method to estimate
+it, or we can directly estimate a model using a general function. We
+first present the first approach because it helps better understand
+the structure of the package. 
+
+\subsection{An S4 class object for causal inference}\label{sec:causalobj}
+
+To illustrate the methods, we consider the experiment analyzed first
+by \cite{lalonde86} and used later by \cite{dehejia-wahba99,
+  dehejia-wahba02}. The objective of the original paper was to measure
+the effect of a training program on the real income. The dependent
+variable is the real income in 1978 and the covariates used for
+matching the treated group to the control are age, education, 1975
+real income and dummy variables for race, marital status, and academic
+achievement.
+
+First, we load the package and the dataset:
+
+<<>>=
+library(causalGel)
+data(nsw)
+## We express income in thousands for better stability
+nsw$re78 <- nsw$re78/1000
+nsw$re75 <- nsw$re75/1000
+@ 
+
+The model class, is ``causalGel'' which inherits directly from
+``functionGel'' class. The constructor is the causalModel()
+function. The arguments are:
+
+\begin{itemize}
+\item \textit{g}: A formula that defines the regression of the outome
+  on the treatment indicator. For our dataset, the variable ``treat''
+  is the indicator, and ``re78'' is the outcome. The formula is therefore:
+  
+<<>>=
+g <- re78~treat
+@   
+
+\item \textit{balm}: A formula or a data.frame representing
+  $u(X)$. For example, if we want to balance 1975 income, age,
+  education and race, we would use the following:
+  
+<<>>=
+balm <- ~age+ed+black+hisp+re75
+@   
+
+\item \textit{theta0}: An optional starting value to be passed to the
+  numerical algorithm.
+  
+\item \textit{momType}: This is the main argument to determine which
+  type of causal effect we want to estimate. The options are:
+  \begin{itemize}
+    \item ``ACE'': This one is for estimating the average causal
+      effect. The moment function $g(t;\theta)$ is defined by
+      Equation \eqref{eq:13}, and $\mu_0$ is defined as:
+      \[
+      \mu_0 = \frac{1}{n}\sum_{i=1}^n u(X_i)
+      \]
+    \item ``ACT'': This is for the causal effect on the treated. In
+      that case, the argument ``ACTmom'' determines which of the
+      treatment groups we are refering to. The moment function
+      $g(t;\theta)$ is defined by Equation \eqref{eq:13}, and $\mu_0$
+      is defined as:
+      \[
+      \mu_0 = \frac{1}{n_j}\sum_{i=1}^n Z_{ji}u(X_i)\,,
+      \]
+      where $n_j=\sum_{i=1}^n Z_{ji}$, and $j$ is the value of
+      ``ACTmom''.
+    \item ``ACC'': This is for the causal effect on the control. The moment function
+      $g(t;\theta)$ is defined by Equation \eqref{eq:13}, and $\mu_0$
+      is defined as:
+      \[
+      \mu_0 = \frac{1}{n_0}\sum_{i=1}^n Z_{0i}u(X_i)\,,
+      \]
+      where $n_0=\sum_{i=1}^n Z_{0i}$.
+    \item ``uncondBal'': This is used to estimate the average causal
+      effect in randomized trials. The moment function $g(t;\theta)$
+      is defined by Equation \eqref{eq:8}. In the case of
+      observational data, it is not recommended because the moments
+      are balanced, but represent estimates of the moments of an
+      undefined population.
+    \item ``fixedMom'': The causal effect of a target population for
+      which $\Ex(\mu(X)$ is known is estimated. The moment function
+      $g(t;\theta)$ is defined by Equation \eqref{eq:13}, and $\mu_0$
+      is set to ``popMom'', which is another argument of
+      causalModel() (see below).
+    \end{itemize}
[TRUNCATED]

To get the complete diff run:
    svnlook diff /svnroot/gmm -r 158


More information about the Gmm-commits mailing list