[Gmm-commits] r158 - in pkg: causalGel causalGel/R causalGel/man causalGel/vignettes gmm4/R gmm4/man
noreply at r-forge.r-project.org
noreply at r-forge.r-project.org
Tue Dec 3 23:40:22 CET 2019
Author: chaussep
Date: 2019-12-03 23:40:22 +0100 (Tue, 03 Dec 2019)
New Revision: 158
Added:
pkg/causalGel/man/causalGEL.Rd
pkg/causalGel/man/checkConv-methods.Rd
Modified:
pkg/causalGel/NAMESPACE
pkg/causalGel/R/allClasses.R
pkg/causalGel/R/causalGel.R
pkg/causalGel/R/causalMethods.R
pkg/causalGel/R/causalfitMethods.R
pkg/causalGel/man/subsetting.Rd
pkg/causalGel/vignettes/causalGel.Rnw
pkg/causalGel/vignettes/causalGel.pdf
pkg/gmm4/R/allClasses.R
pkg/gmm4/man/union-class.Rd
Log:
some change but mostly added stuff to the vignette
Modified: pkg/causalGel/NAMESPACE
===================================================================
--- pkg/causalGel/NAMESPACE 2019-11-22 22:54:38 UTC (rev 157)
+++ pkg/causalGel/NAMESPACE 2019-12-03 22:40:22 UTC (rev 158)
@@ -12,9 +12,9 @@
exportClasses("causalData", "causalGel", "causalGelfit")
-exportMethods("causalMomFct")
+exportMethods("causalMomFct", "checkConv")
-export("causalModel")
+export("causalModel", "causalGEL")
Modified: pkg/causalGel/R/allClasses.R
===================================================================
--- pkg/causalGel/R/allClasses.R 2019-11-22 22:54:38 UTC (rev 157)
+++ pkg/causalGel/R/allClasses.R 2019-12-03 22:40:22 UTC (rev 158)
@@ -5,7 +5,7 @@
## Causal Model Classes
setClass("causalGel", contains="functionGel")
-
+
setClass("causalData", representation(momType="character",
balCov="character",
balMom="numericORNULL",
@@ -15,3 +15,6 @@
setClass("causalGelfit", contains="gelfit")
+
+## converters
+
Modified: pkg/causalGel/R/causalGel.R
===================================================================
--- pkg/causalGel/R/causalGel.R 2019-11-22 22:54:38 UTC (rev 157)
+++ pkg/causalGel/R/causalGel.R 2019-12-03 22:40:22 UTC (rev 158)
@@ -67,4 +67,41 @@
new("causalGel", mod)
}
+causalGEL <- function(g, balm, data, theta0=NULL,
+ momType=c("ACE","ACT","ACC", "uncondBal","fixedMom"),
+ popMom = NULL, rhoFct=NULL,ACTmom=1L,
+ gelType = c("EL", "ET", "EEL", "ETEL", "HD", "ETHD","REEL"),
+ initTheta = c("gmm","theta0"), getVcov=FALSE,
+ lambda0=NULL,
+ lamSlv=NULL, coefSlv= c("optim","nlminb","constrOptim"),
+ lControl=list(), tControl=list())
+{
+ Call <- try(match.call(call=sys.call(sys.parent())), silent=TRUE)
+ if (class(Call)=="try-error")
+ Call <- NULL
+ momType <- match.arg(momType)
+ initTheta <- match.arg(initTheta)
+ coefSlv <- match.arg(coefSlv)
+ gelType <- match.arg(gelType)
+ if (initTheta=="theta0" & is.null(theta0))
+ stop("theta0 is required when initTheta='theta0'")
+ model <- causalModel(g, balm, data, theta0, momType, popMom, rhoFct, ACTmom,
+ gelType)
+
+ if (initTheta == "theta0")
+ {
+ initTheta <- "modelTheta0"
+ names(theta0) = model at parNames
+ } else {
+ theta0 <- NULL
+ }
+ fit <- modelFit(object=model, initTheta=initTheta, theta0=theta0,
+ lambda0=lambda0, vcov=getVcov, coefSlv=coefSlv,
+ lamSlv=lamSlv, tControl=tControl, lControl=lControl)
+ fit at call <- Call
+ fit
+}
+
+
+
Modified: pkg/causalGel/R/causalMethods.R
===================================================================
--- pkg/causalGel/R/causalMethods.R 2019-11-22 22:54:38 UTC (rev 157)
+++ pkg/causalGel/R/causalMethods.R 2019-12-03 22:40:22 UTC (rev 158)
@@ -39,7 +39,7 @@
G[(k+1):ntet, (k+1):ntet] <- -sum(impProb)*diag(k-1)
uK <- colSums(impProb*X[,-1,drop=FALSE])
G[(2*k):q, (k+1):ntet] <- -kronecker(diag(k-1), uK)
- if (dat at momType != "uncondBal" | dat at momType=="fixedMon")
+ if (dat at momType != "uncondBal" | dat at momType=="fixedMom")
{
G <- rbind(G, matrix(0, ncol(X)-1, ntet))
if (augmented)
@@ -94,8 +94,8 @@
setMethod("modelFit", signature("causalGel"), valueClass="causalGelfit",
definition = function(object, gelType=NULL, rhoFct=NULL,
- initTheta=c("gmm", "theta0"), start.tet=NULL,
- start.lam=NULL, vcov=FALSE, ...)
+ initTheta=c("gmm", "modelTheta0"), theta0=NULL,
+ lambda0=NULL, vcov=FALSE, ...)
{
Call <- try(match.call(call=sys.call(sys.parent())), silent=TRUE)
if (class(Call)=="try-error")
@@ -103,6 +103,7 @@
res <- callNextMethod()
res at call <- Call
obj <- new("causalGelfit", res)
+ obj
})
## model.matrix and modelResponse
@@ -196,16 +197,32 @@
x
})
-setMethod("[", c("causalGel", "numeric", "numeric"),
+setMethod("[", c("causalGel", "numeric", "numericORlogical"),
function(x, i, j){
- x <- x[j]
- subset(x, i)
+ x <- x[i]
+ subset(x, j)
})
-setMethod("[", c("causalGel", "missing", "numeric"),
+setMethod("[", c("causalGel", "missing", "numericORlogical"),
function(x, i, j){
- x[j]
+ subset(x, j)
})
+setMethod("[", c("causalGelfit", "numeric", "missing"),
+ function(x, i, j){
+ mod <- x at model[i]
+ update(x, newModel=mod)
+ })
+setMethod("[", c("causalGelfit", "numeric", "numericORlogical"),
+ function(x, i, j){
+ mod <- x at model[i,j]
+ update(x, newModel=mod)
+ })
+setMethod("[", c("causalGelfit", "missing", "numericORlogical"),
+ function(x, i, j){
+ mod <- x at model[,j]
+ update(x, newModel=mod)
+ })
+
Modified: pkg/causalGel/R/causalfitMethods.R
===================================================================
--- pkg/causalGel/R/causalfitMethods.R 2019-11-22 22:54:38 UTC (rev 157)
+++ pkg/causalGel/R/causalfitMethods.R 2019-12-03 22:40:22 UTC (rev 158)
@@ -126,6 +126,58 @@
allV$vcov_Allpar <- V[1:(k+addPar), 1:(k+addPar)]
allV$vcov_Alllambda <- V[-(1:(k+addPar)), -(1:(k+addPar))]
}
- allV$gtR <- qr.R(res$qrGt)
allV
})
+
+
+## checkConv
+
+setGeneric("checkConv", function(object, ...) standardGeneric("checkConv"))
+
+setMethod("checkConv", "causalGelfit",
+ function(object, tolConv=1e-4, verbose=TRUE, ...)
+ {
+ spec <- modelDims(object at model)
+ momType <- spec$momType
+ m <- spec$balMom
+ ACTmom <- spec$ACTmom
+ conv <- c(Lambda=object at lconvergence==0, Coef= object at convergence == 0)
+ x <- model.matrix(object at model, "balancingCov")
+ z <- model.matrix(object at model)[,-1, drop=FALSE]
+ nZ <- ncol(z)
+ pt <- getImpProb(object, ...)$pt
+ pt1 <- lapply(1:nZ, function(i) pt[z[,i]==1]/sum(pt[z[,i]==1]))
+ pt0 <- pt[rowSums(z)==0]/sum(pt[rowSums(z)==0])
+ m0 <- colSums(x[rowSums(z)==0,,drop=FALSE]*pt0)
+ m1 <- sapply(1:nZ, function(i) colSums(x[z[,i]==1,,drop=FALSE]*pt1[[i]]))
+ mAll <- cbind(m0, m1)
+ n0 <- paste(paste(colnames(z),collapse="=", sep=""),"=0",sep="")
+ colnames(mAll) <- c(n0, paste(colnames(z),"=1",sep=""))
+ chk <- all(abs(mAll-m)<tolConv)
+ conv <- c(conv, Balance=all(chk))
+ momType <- switch(momType,
+ uncondBal = "Unconditional balancing",
+ ACT = "Causal effect on the treated",
+ ACE = "Average causal effect",
+ ACC = "Causal effect on the control",
+ fixedMom = "Balancing based on fixed Moments")
+ if (momType == "ACT" & ACTmom > 1)
+ momType <- paste(momType, "(treatment group ",
+ ACTmom, ")")
+
+ if (verbose)
+ {
+ cat("Convergence details of the Causal estimation\n")
+ cat("********************************************\n")
+ cat(momType,"\n\n")
+ cat("Convergence of the Lambdas: ", conv["Lambda"], "\n",sep="")
+ cat("Convergence of the Coefficients: ", conv["Coef"], "\n",sep="")
+ cat("Achieved moment balancing: ", conv["Balance"], "\n\n",sep="")
+ cat("Moments for each group:\n")
+ print.default(mAll, quote=FALSE, right=TRUE)
+ invisible()
+ } else {
+ return(list(conv=conv, moments=mAll))
+ }
+ })
+
Added: pkg/causalGel/man/causalGEL.Rd
===================================================================
--- pkg/causalGel/man/causalGEL.Rd (rev 0)
+++ pkg/causalGel/man/causalGEL.Rd 2019-12-03 22:40:22 UTC (rev 158)
@@ -0,0 +1,107 @@
+\name{causalGEL}
+
+\alias{causalGEL}
+
+\title{Causal inference using balancing methods}
+
+\description{
+It fit a causality model using balancing based on GEL methods. It
+creates an object of class \code{"causalGelfit"}.
+}
+\usage{
+
+causalGEL(g, balm, data, theta0=NULL,
+ momType=c("ACE","ACT","ACC", "uncondBal","fixedMom"),
+ popMom = NULL, rhoFct=NULL,ACTmom=1L,
+ gelType = c("EL", "ET", "EEL", "ETEL", "HD", "ETHD","REEL"),
+ initTheta = c("gmm","theta0"), getVcov=FALSE,
+ lambda0=NULL,
+ lamSlv=NULL, coefSlv= c("optim","nlminb","constrOptim"),
+ lControl=list(), tControl=list())
+}
+\arguments{
+
+ \item{g}{A formula that links the outcome variable to the treatment
+ indicators.}
+
+ \item{balm}{A formula or a matrix with balancing covariates}
+
+ \item{data}{A data.frame or a matrix with column names.}
+
+ \item{theta0}{A vector of starting values (optional). If not provided,
+ the least squares method is use to generate them}
+
+ \item{momType}{How the moments of the covariates should be
+ balanced. By default, it is balanced using the sample mean of the
+ covariates, which corresponds to the ACE. Alternatively, to the
+ sample moments of the treated group (ACT), the control group (ACC),
+ or to a known population mean. The option 'uncondBal' means that it
+ is unconditionally balanced.}
+
+ \item{popMom}{A vector of population moments to use for balancing. It
+ can be used if those moments are available from a census, for
+ example. When available, it greatly improves efficiency.}
+
+ \item{rhoFct}{An optional function that return \eqn{\rho(v)}. This is
+ for users who want a GEL model that is not built in the package. The
+ four arguments of the function must be \code{"gmat"}, the matrix of
+ moments, \code{"lambda"}, the vector of Lagrange multipliers,
+ \code{"derive"}, which specify the order of derivative to return, and
+ \code{k} a numeric scale factor required for time series and kernel
+ smoothed moments.}
+
+ \item{ACTmom}{When \code{momType} is set to 'ACT', that integer
+ indicates which treated group to use to balance the covariates.}
+
+ \item{gelType}{"EL" for empirical likelihood, "ET" for exponential tilting,
+ "EEL" for Euclidean empirical likelihood, "ETEL" for exponentially
+ tilted empirical likelihood of Schennach(2007), "HD" for Hellinger
+ Distance of Kitamura-Otsu-Evdokimov (2013), and "ETHD" for the
+ exponentially tilted Hellinger distance of Antoine-Dovonon
+ (2015). "REEL" is a restricted version of "EEL" in which the
+ probabilities are bounded below by zero. In that case, an analytical
+ Kuhn-Tucker method is used to find the solution.}
+
+ \item{initTheta}{Method to obtain the starting values for the
+ coefficient vector. By default the GMM estimate with identity matrix
+ is used. The second argument means that the theta0 of the
+ object, if any, should be used.}
+
+ \item{lambda0}{Manual starting values for the Lagrange
+ multiplier. By default, it is a vector of zeros.}
+
+ \item{getVcov}{Should the method computes the covariance matrices of the
+ coefficients and Lagrange multipliers.}
+
+ \item{lamSlv}{An alternative solver for the Lagrange multiplier. By
+ default, either \code{\link{Wu_lam}}, \code{\link{EEL_lam}},
+ \code{\link{REEL_lam}} or \code{\link{getLambda}} is
+ used.}
+
+ \item{coefSlv}{Minimization solver for the coefficient vector.}
+
+ \item{lControl}{A list of controls for the Lagrange multiplier
+ algorithm.}
+
+ \item{tControl}{A list of controls for the coefficient algorithm.}
+
+}
+
+\value{
+'causalGEL' returns an object of classesof \code{"causalGelfit"}.
+ }
+
+\examples{
+data(nsw)
+
+balm <- ~age+ed+black+hisp:married+nodeg+re75+I(re75^2)
+g <- re78~treat
+
+fit1 <- causalGEL(g, balm, nsw, gelType="ET")
+fit1
+
+fit2 <- causalGEL(g, balm, nsw, gelType="EL")
+fit2
+
+}
+
Added: pkg/causalGel/man/checkConv-methods.Rd
===================================================================
--- pkg/causalGel/man/checkConv-methods.Rd (rev 0)
+++ pkg/causalGel/man/checkConv-methods.Rd 2019-12-03 22:40:22 UTC (rev 158)
@@ -0,0 +1,39 @@
+\name{checkConv-methods}
+\docType{methods}
+
+\alias{checkConv}
+\alias{checkConv-methods}
+\alias{checkConv,causalGelfit-method}
+
+\title{Methods that returns information about convergence.}
+\description{
+Returns convergence codes and compares moments for the treated and the
+control groups.
+}
+\usage{
+
+\S4method{checkConv}{causalGelfit}(object, tolConv=1e-4, verbose=TRUE, \dots)
+
+}
+\arguments{
+ \item{object}{An object of class \code{"causalGelfit"}}
+ \item{tolConv}{Tolerance to decide if balancing is achieved. }
+ \item{verbose}{If FALSE nothinf is printed.}
+ \item{\dots}{Arguments to pass to other methods.}
+ }
+
+\examples{
+data(nsw)
+
+balm <- ~age+ed+black+hisp:married+nodeg+re75+I(re75^2)
+g <- re78~treat
+
+model <- causalModel(g, balm, nsw)
+fit <- modelFit(model)
+checkConv(fit)
+
+}
+
+
+\keyword{methods}
+\keyword{Balancing moments}
Modified: pkg/causalGel/man/subsetting.Rd
===================================================================
--- pkg/causalGel/man/subsetting.Rd 2019-11-22 22:54:38 UTC (rev 157)
+++ pkg/causalGel/man/subsetting.Rd 2019-12-03 22:40:22 UTC (rev 158)
@@ -1,9 +1,13 @@
\name{[-causalGel}
\docType{methods}
-\alias{[,causalGel,missing,numeric-method}
+\alias{[,causalGel,missing,numericORlogical-method}
\alias{[,causalGel,numeric,missing-method}
-\alias{[,causalGel,numeric,numeric-method}
+\alias{[,causalGel,numeric,numericORlogical-method}
+\alias{[,causalGelfit,missing,numericORlogical-method}
+\alias{[,causalGelfit,numeric,missing-method}
+\alias{[,causalGelfit,numeric,numericORlogical-method}
+
\title{Subsetting methods}
\description{
Different subsetting methods for S4 class objects of the package. The
@@ -13,19 +17,50 @@
\section{Methods}{
\describe{
+\item{\code{signature(x = "causalGel", i = "missing", j = "numericORlogical")}}{
+ Subsets observations.
+}
+
\item{\code{signature(x = "causalGel", i = "numeric", j = "missing")}}{
- Selects observations
+ Selects balancing covatriates.
}
-\item{\code{signature(x = "causalGel", i = "missing", j = "numeric")}}{
- Selects balancing moments
+\item{\code{signature(x = "causalGel", i = "numeric", j = "numericORlogical")}}{
+ Selects balancing covariates and observations.
}
-\item{\code{signature(x = "causalGel", i = "numeric", j = "numeric")}}{
- \code{i} selects the observations and "j" selects the balancing
- moments.
+\item{\code{signature(x = "causalGelfit", i = "missing", j = "numericORlogical")}}{
+ Subsets observations and refit the model.
}
+
+\item{\code{signature(x = "causalGelfit", i = "numeric", j = "missing")}}{
+ Selects balancing covatriates and refit the model.
+}
+
+\item{\code{signature(x = "causalGelfit", i = "numeric", j = "numericORlogical")}}{
+ Selects balancing covariates and observations and refit the model.
+}
+
+
+
}}
+\examples{
+data(nsw)
+
+balm <- ~age+ed+black+hisp:married+nodeg+re75+I(re75^2)
+g <- re78~treat
+
+model <- causalModel(g, balm, nsw)
+model[1:5, 1:500]
+
+fit <- modelFit(model)
+fit[1:5,1:500]
+
+}
+
+
+
\keyword{methods}
\keyword{subsetting}
+
Modified: pkg/causalGel/vignettes/causalGel.Rnw
===================================================================
--- pkg/causalGel/vignettes/causalGel.Rnw 2019-11-22 22:54:38 UTC (rev 157)
+++ pkg/causalGel/vignettes/causalGel.Rnw 2019-12-03 22:40:22 UTC (rev 158)
@@ -1,25 +1,24 @@
-\documentclass[11pt,letterpaper]{article}
+\documentclass[12pt]{article}
+\usepackage{graphicx}
\usepackage{amsthm}
-\usepackage[hmargin=2cm,vmargin=2.5cm]{geometry}
+\usepackage[hmargin=1in,vmargin=1in]{geometry}
\usepackage[utf8x]{inputenc}
+\usepackage[active]{srcltx}
\usepackage{amsmath}
+\usepackage{amssymb}
\usepackage{verbatim}
\usepackage[round]{natbib}
\usepackage{amsfonts}
\usepackage{amssymb}
\usepackage{graphicx}
+\usepackage{multirow}
+\usepackage{rotating}
+\usepackage{footmisc}
+\usepackage{xspace}
+\usepackage{soul}
\usepackage{mathtools}
-\usepackage{hyperref}
-\hypersetup{
- colorlinks,
- citecolor=black,
- filecolor=black,
- linkcolor=black,
- urlcolor=black
-}
-
\newtheorem{theorem}{Theorem}
\newtheorem{col}{Corollary}
\newtheorem{lem}{Lemma}
@@ -26,11 +25,15 @@
\newtheorem{ass}{Assumption}
\DeclareMathOperator{\Ex}{E}
+\DeclareMathOperator{\Diag}{Diag}
\newcommand{\ellhat}{\hat \ell}
+
\newcommand\independent{\protect\mathpalette{\protect\independenT}{\perp}}
\def\independenT#1#2{\mathrel{\rlap{$#1#2$}\mkern2mu{#1#2}}}
% additional commands
+\newcommand{\gtilde}{\tilde g}
+\newcommand{\ghat}{\hat g}
\newcommand{\GEL}{GEL\xspace}
\newcommand{\as}{\ \text{a.s.}\xspace}
\newcommand{\HD}{HD\xspace}
@@ -38,7 +41,7 @@
\newcommand{\GMM}{GMM\xspace}
\newcommand{\LRT}{LRT\xspace}
\newcommand{\EL}{EL\xspace}
-\newcommand{\EUL}{EUL\xspace}
+\newcommand{\EEL}{EEL\xspace}
\newcommand{\ELB}{ELB\xspace}
\newcommand{\MCAR}{MCAR\xspace}
\newcommand{\ET}{ET\xspace}
@@ -49,6 +52,8 @@
\newcommand{\ACT}{ACT\xspace}
\newcommand{\ACN}{ACN\xspace}
\newcommand{\PFC}{PFC\xspace}
+\newcommand{\CH}{CH\xspace}
+
\newcommand{\ate}{\tau_{\text{\ATE}}}
\newcommand{\atehat}{\hat\tau_{\text{\ATE}}}
\newcommand{\ace}{\tau_{\text{\ACE}}}
@@ -79,17 +84,26 @@
\newcommand{\Ztilde}{\tilde Z}
\newcommand{\Ytilde}{\tilde Y}
\newcommand{\epsilontilde}{\tilde\epsilon}
+\newcommand{\tautrue}{\tau^0}
+\newcommand{\pitrue}{\pi^0}
+\newcommand{\pithat}{\hat \pi}
+\newcommand{\uhat}{\hat u}
+
+
\DeclareMathOperator{\argmin}{argmin}
\DeclareMathOperator{\argmax}{argmax}
\DeclarePairedDelimiter{\abs}{\lvert}{\rvert}
\DeclarePairedDelimiter{\norm}{\lVert}{\rVert} % Norm
+
\newcommand{\thetahat}{\hat \theta}
+\newcommand{\etahat}{\hat \eta}
+\newcommand{\etatrue}{\eta^0}
\newcommand{\lambdahat}{\hat \lambda}
\newcommand{\betahat}{\hat \beta}
\newcommand{\vp}{p}
\newcommand{\spv}{\mathbb{P}}
\newcommand{\pdiv}{D}
-\newcommand{\vone}{e}
+\newcommand{\vone}{1_n}
\newcommand{\tp}[1]{#1^T}
\newcommand{\phat}{\hat p}
\newcommand{\cvgindist}{\xrightarrow{\text{d}}}
@@ -112,6 +126,60 @@
Methods with R}}
\date{\today}
+<<extract, message=FALSE, warning=FALSE, echo=FALSE>>=
+library(texreg)
+setMethod("extract", "causalGelfit",
+ function(model, includeSpecTest=FALSE,
+ specTest=c("LR","LM","J"), include.nobs=TRUE,
+ include.obj.fcn=TRUE, ...)
+ {
+ specTest <- match.arg(specTest)
+ s <- summary(model, ...)
+ wspecTest <- grep(specTest, rownames(s at specTest@test))
+ spec <- modelDims(model at model)
+ coefs <- s at coef
+ names <- rownames(coefs)
+ coef <- coefs[, 1]
+ se <- coefs[, 2]
+ pval <- coefs[, 4]
+ n <- model at model@n
+ gof <- numeric()
+ gof.names <- character()
+ gof.decimal <- logical()
+ if (includeSpecTest) {
+ if (spec$k == spec$q)
+ {
+ obj.fcn <- NA
+ obj.pv <- NA
+ } else {
+ obj.fcn <- s at specTest@test[wspecTest,1]
+ obj.pv <- s at specTest@test[wspecTest,3]
+ }
+ gof <- c(gof, obj.fcn, obj.pv)
+ gof.names <- c(gof.names,
+ paste(specTest,"-test Statistics", sep=""),
+ paste(specTest,"-test p-value", sep=""))
+ gof.decimal <- c(gof.decimal, TRUE, TRUE)
+ }
+ if (include.nobs == TRUE) {
+ gof <- c(gof, n)
+ gof.names <- c(gof.names, "Num.\\ obs.")
+ gof.decimal <- c(gof.decimal, FALSE)
+ }
+ nbal <- length(model at model@X at balCov)
+ gof.names <- c(gof.names, "Num. Bal. Cov.")
+ gof <- c(gof, nbal)
+ gof.decimal <- c(gof.decimal, FALSE)
+ tr <- createTexreg(coef.names = names, coef = coef,
+ se = se, pvalues = pval,
+ gof.names = gof.names, gof = gof,
+ gof.decimal = gof.decimal)
+ return(tr)
+ })
+@
+
+
+
\begin{document}
\maketitle
@@ -144,9 +212,10 @@
%\VignettePackage{causalGel}
%\VignetteEngine{knitr::knitr}
-<<echo=FALSE>>=
+<<echo=FALSE, message=FALSE, warning=FALSE>>=
library(knitr)
opts_chunk$set(size='footnotesize')
+library(causalGel)
@
\newpage
@@ -155,9 +224,467 @@
\section{Introduction}\label{sec:intro}
-We want to do causal inference using the GEL of \cite{newey-smith04}
+In the following, let $X \in \reals^q$ be a random $q$-vector of
+covariates, $Y(j)$ be the random (potential) outcome when the subject
+is exposed to the treatment $j$, where $0 \leq j \leq p$ and $j = 0$
+corresponds to the control treatment. We consider $Z_0$ as an
+indicator for the control treatment, and in the case when the
+observational study does not have a well defined control treatment,
+then we treat $Z_0$ as the baseline treatment, i.e., the treatment
+with which we are interested to compare all other treatments. Let
+further $Z = (Z_j) \in \reals^{p}$ be a random $p$-vector of treatment
+indicators (other than the control treatment), with $Z_j = 1$ and $Z_k
+= 0$ for all $k \neq j$ if the subject receives the treatment $j$,
+where $j = 1 , \ldots, p$. Since all individuals receive only one
+treatment, then $\sum_{j=0}^p Z_j = 1$, and we only observe $Y =
+\sum_{j = 0}^p Y(j) Z_j$. Note that we do not consider here the case
+of clinical trials with non-compliance where the subjects are assigned
+to treatments but they may receive other treatments, and thus, the
+available data consist of only the treatment received and the outcome
+under the treatment received in addition to the covariates for all
+subjects.
+\subsection{Randomized experiments}
+\label{sec:rand}
+
+Let $\theta_1 \in \reals$, $\theta_2 = (\theta_{2,1}, \ldots,
+\theta_{2,p}) \in \reals^{p}$, and $\theta_3 = (\theta_{3,1}, \ldots,
+\theta_{3,p}) \in \reals^{p}$ be defined as $\theta_1 = \Ex(Y(0))$,
+$\theta_{2,j} = [\Ex(Y(j))-\Ex(Y(0))]$, $\theta_{3,j} = \Ex(Z_j)$, $1 \leq j \leq
+p$, and let $\theta = (\theta_1, \theta_2, \theta_3) \in \reals^{2p +
+ 1}$. By the definition of conditional expectation,
+\begin{equation}
+ \label{eq:1}
+ \Ex(Y \given Z_j = 1) = \Ex(Y(j)) = \Ex(Y Z_j) / \Ex(Z_j) \,,
+ \quad 1 \leq j \leq p \,.
+ \end{equation}
+ Hence, $\Ex[(Y - \theta_1 - \tp{\theta_2} Z) Z_j] = 0$ for all $1 \leq j \leq
+ p$, and thus,
+ \begin{equation}
+ \label{eq:2}
+ \Ex\bigl((Y - \theta_1- \tp{\theta_2} Z) Z \bigr) = 0 \,.
+ \end{equation}
+ By the law of total expectation formula,
+ \begin{equation}
+ \label{eq:3}
+ \Ex(Y) = \sum_{j=0}^p \Ex(Y(j)) \Pr(Z_j = 1) \,.
+ \end{equation}
+ Hence,
+ \begin{equation}
+ \label{eq:4}
+ \Ex(Y - \theta_1 - \tp{\theta_2} Z) = 0 \,.
+ \end{equation}
+ By the definition of $\theta_3$, $\theta_{3,j} = \Ex(Z_j)$ for $1
+ \leq j \leq p$; hence
+ \begin{equation}
+ \label{eq:5}
+ \Ex(Z - \theta_{3}) = 0 \,.
+ \end{equation}
+ Note that $\Ex(Z_0) = 1 - \sum_{j=1}^p \theta_{3,j}$.
+
+ In randomized trials, $(Z_0,Z)\independent X$, which implies that
+ $\Ex[(Z_j-\theta_{3,j})u(X)]=0$ for $1\leq j\leq p$, where
+ $u(X)\in\reals^k$ is a $k$-vector of functions of $X$. Hence,
+ \begin{equation}
+ \label{eq:6}
+ \Ex[(Z - \theta_{3})\otimes u(X)] = 0 \,.
+ \end{equation}
+To illustrate what $u(x)$ can be, suppose $x = (x_1, x_2) \in
+\reals^2$, then we can define $u(x) = x \in \reals^2$, or $u(x) =
+(x_1, x_2, x_1 x_2) \in \reals^3$, or $u(x) = (x_1, x_2, x_1^2, x_2^2,
+x_1 x_2) \in \reals^5$.
+
+Let $T = (X, Z, Y) \in \reals^{p+q+1}$ denote a generic random
+variable distributed according to a distribution on
+$\reals^{p+q+1}$\footnote{Notice that we omit $Z_0$ from $T$ because
+ its value is implied by $Z$ through $Z_0=1-\sum_{j=1}^p
+ Z_j$.}. Therefore, Equations \eqref{eq:2} and \eqref{eq:4} to
+\eqref{eq:6} imply that the parameter of interest $\thetatrue$
+satisfies the following moment conditions:
+\begin{equation}
+ \label{eq:7}
+ \Ex(g(T, \thetatrue)) = 0 \,,
+\end{equation}
+where $\thetatrue = (\thetatrue_1, \thetatrue_2, \thetatrue_3) \in
+\reals^{2p + 1}$ and $g(t, \theta)$ is defined as
+\begin{equation}
+ \label{eq:8}
+ g(t; \theta) =
+ \begin{pmatrix}
+ y - \theta_1 - \tp{\theta_2} z \\
+ (y - \theta_1 - \tp{\theta_2} z) z \\
+ z - \theta_{3} \\
+ (z - \theta_3) \otimes u(x) \\
+ \end{pmatrix} \,, \quad
+ t = (x, z, y) \in \reals^{p + q + 1} \,.
+\end{equation}
+The \ACE of the treatment $j$ is given by $\tautrue_j
+= \thetatrue_{2, j}$, for $1 \leq j \leq p$.
+
+The package offers a way to estimate $\thetatrue$ using the
+generalized method of moments (\GEL). Using the primal form of \GEL,
+the estimator of $\thetatrue$ is defined as:
+\begin{equation}
+ \label{eq:9}
+ \thetahat = \argmin_{\theta \in \reals^{2p+1}} \min_{p \in \spv^n}
+ \Bigl\{ \pdiv_\gamma(\vp, n^{-1} \vone)
+ : \sum_{i=1}^n p_i g(T_i; \theta) = 0 \Bigr\} \,,
+\end{equation}
+where
+\begin{equation*}
+ \spv^n = \Bigl\{ \vp = (p_i) \in \reals^n :
+ \sum_{i=1}^{n} p_{i}=1 \,, p_{i} \geq 0 \Bigr\} \,,
+\end{equation*}
+ and $\pdiv_\gamma(\vp, n^{-1} \vone)$ is the power divergence discrepancy
+function \citep{newey-smith04}:
+\begin{equation*}
+ \pdiv_\gamma(\vp, n^{-1}\vone) = \sum_{i=1}^n
+ \frac{(np_i)^{\gamma + 1} - 1}{n \gamma(\gamma + 1)} \,.
+\end{equation*}
+In particular, $\gamma = -1$ corresponds to the empirical likelihood
+(\EL), $\gamma = 0$ corresponds to the exponential tilting (\ET),
+$\gamma = 1$ corresponds to the Euclidean empirical likelihood (\EEL)
+estimator also known as the continuously updated GMM estimator
+ (\CUE), and $\gamma = -1/2$ corresponds to the Hellinger distance
+(\HD) used by \citet{kitamura-otsu-evdokimov13}. \citet{newey-smith04}
+present the \GEL method in its dual form, which is the following saddle
+point problem:
+\begin{equation}
+ \label{eq:10}
+ \thetahat = \argmin_{\theta \in \reals^{2p+1}}
+ \max_{\lambda \in \reals^{1+p(2+k)}} \sum_{i=1}^n
+ \rho_{\gamma}\bigl(\tp{\lambda} g(T_i;\theta)\bigr) \,,
+\end{equation}
+where
+\begin{equation*}
+ \rho_\gamma(v) = - \frac{(1+\gamma v)^{(\gamma+1)/\gamma}}{\gamma + 1} \,.
+\end{equation*}
+In particular $\rho_{-1}(v) = \log(1-v)$ for \EL, $\rho_{0}(v) =
+-\exp(v)$ for \ET, $\rho_1(v) = -1/2 - v -v^2/2$ for \EEL, and
+$\rho_{-1/2}(v) = -2/(1-v/2)$ for \HD. Using the dual form, the
+estimated probability weights from the primal problem are defined as:
+\begin{equation}
+ \label{eq:11}
+ \phat_i(\theta,\lambda) = \frac{
+ \rho_{\gamma}'(\tp{\lambda} g(T_i; \theta))}
+ {\sum_{j=1}^n \rho_{\gamma}'(\tp{\lambda}g(T_i;\theta))} \,,
+\end{equation}
+where $\rho_{\gamma}'(v)$ is the first order derivative of
+$\rho_{\gamma}(v)$.
+
+\subsection{Observational studies}
+\label{sec:obs}
+When the treatment (group) assignment is not random, we can still use
+the \GEL as a weighting method. \GEL is used as a way to re-weight
+the probability of each observation so that our sample is as if it had
+been generated by a randomized experiment. The parameter of interest
+$\thetatrue$ satisfies the following moment conditions:
+\begin{equation}
+ \label{eq:12}
+ \Ex_0(g(T, \thetatrue)) = 0 \,,
+\end{equation}
+where $\thetatrue$ is as in Section \ref{sec:rand}, and $g(t,
+\theta)$ is defined as
+\begin{equation}
+ \label{eq:13}
+ g(t; \theta) =
+ \begin{pmatrix}
+ y - \theta_1 - \tp{\theta_2} z \\
+ (y - \theta_1 - \tp{\theta_2} z) z \\
+ z - \theta_{3} \\
+ (z - \theta_3) \otimes u(x) \\
+ u(x) - u_0
+ \end{pmatrix} \,, \quad
+ t = (x, z, y) \in \reals^{p + q + 1} \,,
+\end{equation}
+where $u_0$ is the expected value of $u(X)$ for a target
+population. Note that while the first three moment conditions (under
+$\Ex_0$) identify the parameters, the fourth moment condition makes
+$Z$ ``almost independent'' of $X$ as $k \to \infty$. The last
+condition is what differentiates randomized experiments from
+observational studies. It imposes moments of $X$ to match the ones
+from a given target population. The choice of $u_0$ is driven by the
+type of causal effect we are interested in. We will present the
+different options in the next section.
+
+\section{Estimating the causal effect}\label{sec:estim}
+
+The package is based on the ``gmm4'' package \citep{gmm4}. As for the
+methods included in the package, there are two ways to proceed. We can
+create a model object and use the \textit{modelFit} method to estimate
+it, or we can directly estimate a model using a general function. We
+first present the first approach because it helps better understand
+the structure of the package.
+
+\subsection{An S4 class object for causal inference}\label{sec:causalobj}
+
+To illustrate the methods, we consider the experiment analyzed first
+by \cite{lalonde86} and used later by \cite{dehejia-wahba99,
+ dehejia-wahba02}. The objective of the original paper was to measure
+the effect of a training program on the real income. The dependent
+variable is the real income in 1978 and the covariates used for
+matching the treated group to the control are age, education, 1975
+real income and dummy variables for race, marital status, and academic
+achievement.
+
+First, we load the package and the dataset:
+
+<<>>=
+library(causalGel)
+data(nsw)
+## We express income in thousands for better stability
+nsw$re78 <- nsw$re78/1000
+nsw$re75 <- nsw$re75/1000
+@
+
+The model class, is ``causalGel'' which inherits directly from
+``functionGel'' class. The constructor is the causalModel()
+function. The arguments are:
+
+\begin{itemize}
+\item \textit{g}: A formula that defines the regression of the outome
+ on the treatment indicator. For our dataset, the variable ``treat''
+ is the indicator, and ``re78'' is the outcome. The formula is therefore:
+
+<<>>=
+g <- re78~treat
+@
+
+\item \textit{balm}: A formula or a data.frame representing
+ $u(X)$. For example, if we want to balance 1975 income, age,
+ education and race, we would use the following:
+
+<<>>=
+balm <- ~age+ed+black+hisp+re75
+@
+
+\item \textit{theta0}: An optional starting value to be passed to the
+ numerical algorithm.
+
+\item \textit{momType}: This is the main argument to determine which
+ type of causal effect we want to estimate. The options are:
+ \begin{itemize}
+ \item ``ACE'': This one is for estimating the average causal
+ effect. The moment function $g(t;\theta)$ is defined by
+ Equation \eqref{eq:13}, and $\mu_0$ is defined as:
+ \[
+ \mu_0 = \frac{1}{n}\sum_{i=1}^n u(X_i)
+ \]
+ \item ``ACT'': This is for the causal effect on the treated. In
+ that case, the argument ``ACTmom'' determines which of the
+ treatment groups we are refering to. The moment function
+ $g(t;\theta)$ is defined by Equation \eqref{eq:13}, and $\mu_0$
+ is defined as:
+ \[
+ \mu_0 = \frac{1}{n_j}\sum_{i=1}^n Z_{ji}u(X_i)\,,
+ \]
+ where $n_j=\sum_{i=1}^n Z_{ji}$, and $j$ is the value of
+ ``ACTmom''.
+ \item ``ACC'': This is for the causal effect on the control. The moment function
+ $g(t;\theta)$ is defined by Equation \eqref{eq:13}, and $\mu_0$
+ is defined as:
+ \[
+ \mu_0 = \frac{1}{n_0}\sum_{i=1}^n Z_{0i}u(X_i)\,,
+ \]
+ where $n_0=\sum_{i=1}^n Z_{0i}$.
+ \item ``uncondBal'': This is used to estimate the average causal
+ effect in randomized trials. The moment function $g(t;\theta)$
+ is defined by Equation \eqref{eq:8}. In the case of
+ observational data, it is not recommended because the moments
+ are balanced, but represent estimates of the moments of an
+ undefined population.
+ \item ``fixedMom'': The causal effect of a target population for
+ which $\Ex(\mu(X)$ is known is estimated. The moment function
+ $g(t;\theta)$ is defined by Equation \eqref{eq:13}, and $\mu_0$
+ is set to ``popMom'', which is another argument of
+ causalModel() (see below).
+ \end{itemize}
[TRUNCATED]
To get the complete diff run:
svnlook diff /svnroot/gmm -r 158
More information about the Gmm-commits
mailing list