[Gmm-commits] r117 - in pkg/gmm: R man

noreply at r-forge.r-project.org noreply at r-forge.r-project.org
Wed Sep 27 20:51:43 CEST 2017


Author: chaussep
Date: 2017-09-27 20:51:43 +0200 (Wed, 27 Sep 2017)
New Revision: 117

Modified:
   pkg/gmm/R/ategel.R
   pkg/gmm/R/getModel.R
   pkg/gmm/man/ATEgel.Rd
Log:
Fixed a bug with ATEgel covariance matrix estimation and added the option w to ATEgel

Modified: pkg/gmm/R/ategel.R
===================================================================
--- pkg/gmm/R/ategel.R	2017-09-26 19:18:02 UTC (rev 116)
+++ pkg/gmm/R/ategel.R	2017-09-27 18:51:43 UTC (rev 117)
@@ -1,4 +1,5 @@
-ATEgel <- function(g, balm, y=NULL, treat=NULL, tet0=NULL,momType=c("bal","balSample","ATT"),
+ATEgel <- function(g, balm, w=NULL, y=NULL, treat=NULL, tet0=NULL,
+                   momType=c("bal","balSample","ATT"),
                    popMom = NULL, family=c("linear","logit", "probit"),
                    type = c("EL", "ET", "CUE", "ETEL", "HD", "ETHD"),
                    tol_lam = 1e-9, tol_obj = 1e-9, tol_mom = 1e-9, maxiterlam = 100,
@@ -14,7 +15,7 @@
         family <- match.arg(family)
         TypeGel <- "ateGel"
 
-	all_args <- list(g = g, x = balm, y=y, treat=treat, tet0 = tet0, type = type,
+	all_args <- list(g = g, x = balm, w=w, y=y, treat=treat, tet0 = tet0, type = type,
                          tol_lam = tol_lam, tol_obj = tol_obj, tol_mom = tol_mom,
                          maxiterlam = maxiterlam, optfct = optfct, 
                          optlam = optlam, model = model, X = X, Y = Y,
@@ -43,15 +44,22 @@
 .momentFctATE <- function(tet, dat)
     {
         x <- dat$x
-        #k <- attr(dat, "k")
         k <- dat$k
-        ZT <- c(x[,2:(k+1),drop=FALSE]%*%tet[1:k])
+        if (is.null(dat$w))
+            {
+                Z <- x[,2:(k+1),drop=FALSE]
+            } else {
+                Z <- cbind(x[,2:(k+1)], dat$w)
+            }
+        tetz <- tet[1:ncol(Z)]
+        tetb <- tail(tet, k-1)
+        ZT <- c(Z%*%tetz)
         if (is.null(attr(dat, "family")))
             e <- x[,1] - ZT
         else
             e <- x[,1] - attr(dat, "family")$linkinv(ZT)
-        gt1 <- e * x[,2:(k+1)]
-        gt2 <- sweep(x[,3:(k+1),drop=FALSE],2,tet[(k+1):(2*k-1)],"-")
+        gt1 <- e * Z
+        gt2 <- sweep(x[,3:(k+1),drop=FALSE],2,tetb,"-")
         gt3 <- lapply(1:(k-1), function(i) gt2[,i]*x[,-(1:(k+1))])
         gt3 <- do.call(cbind, gt3)
         gt <- cbind(gt1,gt2,gt3)
@@ -81,25 +89,32 @@
         if (is.null(pt))
             pt <- rep(1/nrow(dat$x), nrow(dat$x))
         x <- dat$x
-        #k <- attr(dat, "k")
         k <- dat$k
         q <- dat$nh*(k-1)+2*k-1
-        Z <- x[,2:(k+1), drop=FALSE]
-        ZT <- c(Z%*%tet[1:k])
-        G <- matrix(0, q, 2*k-1)
+        if (is.null(dat$w))
+            {
+                Z <- x[,2:(k+1),drop=FALSE]
+            } else {
+                Z <- cbind(x[,2:(k+1)], dat$w)
+                q <- q+ncol(dat$w)
+            }
+        l <- ncol(Z)
+        ntet <- length(tet)
+        ZT <- c(Z%*%tet[1:l])
+        G <- matrix(0, q, ntet)
         if (is.null(attr(dat, "family")))
             {
                 tau <- rep(1, nrow(x))
             } else {                
                 tau <- attr(dat, "family")$mu.eta(ZT)
             }
-        G11 <- lapply(1:k, function(i) -colSums(pt*Z[,i]*tau*Z))
-        G[1:k, 1:k] <- do.call(rbind, G11)
-        G[(k+1):(2*k-1), (k+1):(2*k-1)] <- -sum(pt)*diag(k-1)
+        G11 <- lapply(1:l, function(i) -colSums(pt*Z[,i]*tau*Z))
+        G[1:l, 1:l] <- do.call(rbind, G11)
+        G[(l+1):ntet, (l+1):ntet] <- -sum(pt)*diag(k-1)
         uK <- colSums(pt*x[,-(1:(k+1)),drop=FALSE])
-        G[(2*k):q, (k+1):(2*k-1)] <- -kronecker(diag(k-1), uK)
+        G[(l+k):q, (l+1):ntet] <- -kronecker(diag(k-1), uK)
         if (attr(dat, "momType") != "bal" | !is.null(attr(dat, "popMom")))
-            G <- rbind(G, matrix(0, dat$nh, 2*k-1))
+            G <- rbind(G, matrix(0, dat$nh, ntet))
         return(G)
     }
 
@@ -109,7 +124,7 @@
         G <- .DmomentFctATE(tet, dat, pt)
         #k <- attr(dat, "k")
         k <- dat$k
-        q <- dat$nh*(k-1)+2*k-1
+        q <- nrow(G)-dat$nh
         if (is.null(pt))
             pt <- rep(1/nrow(dat$x), nrow(dat$x))
         if (attr(dat, "momType") != "bal" & is.null(attr(dat, "popMom")))
@@ -127,9 +142,15 @@
         theta <- object$coefficients
         gt <- object$gt
         rho1 <- .rho(x=gt, lamb=lam, derive=1, type=object$type)
-        rho2 <- .rho(x=gt, lamb=lam, derive=1, type=object$type)
-        Z <- object$dat$x[,2:(k+1)]
-        ZT <- c(Z%*%theta[1:k])
+        rho2 <- .rho(x=gt, lamb=lam, derive=2, type=object$type)
+        if (is.null(object$dat$w))
+            {
+                Z <- object$dat$x[,2:(k+1)]
+            } else {
+                Z <- cbind(object$dat$x[,2:(k+1)], object$dat$w)
+            }
+        l <- ncol(Z)
+        ZT <- c(Z%*%theta[1:l])
         X <- object$dat$x[,-(1:(k+1)), drop=FALSE]
         family <- attr(object$dat, "family")
         momType <- attr(object$dat, "momType")
@@ -141,10 +162,10 @@
                 tau1 <- family$mu.eta(ZT)
                 tau2 <- family$mu.eta2(ZT, family)
             }
-        lG1 <- sapply(1:k, function(i) -(tau1*Z[,i]*Z)%*%lam[1:k])
-        q2 <- nh*(k-1)+2*k-1
-        lamM <- matrix(lam[(2*k):q2], ncol=(k-1))
-        lG2 <- sapply(1:(k-1), function(i) -lam[k+i]-X%*%lamM[,i])
+        lG1 <- sapply(1:l, function(i) -(tau1*Z[,i]*Z)%*%lam[1:l])
+        q2 <- nh*(k-1)+l+k-1
+        lamM <- matrix(lam[(l+k):q2], ncol=(k-1))
+        lG2 <- sapply(1:(k-1), function(i) -lam[l+i]-X%*%lamM[,i])
         lG <- cbind(lG1, lG2)
         G <- .DmomentFctATE2(theta, object$dat, rho1)
         G22 <- crossprod(rho2*gt, gt)/n
@@ -155,10 +176,10 @@
                 G12 <- t(G)/n + crossprod(rho2*lG, gt)/n
                 if (!is.null(family))
                     {
-                        G12tmp <- lapply(1:k, function(i)
-                            colSums(-rho1*tau2*Z[,i]*c(Z%*%lam[1:k])*Z))
+                        G12tmp <- lapply(1:l, function(i)
+                            colSums(-rho1*tau2*Z[,i]*c(Z%*%lam[1:l])*Z))
                         G12.2 <- matrix(0, nrow(G12), ncol(G12))
-                        G12.2[1:k,1:k] <- do.call(rbind, G12tmp)
+                        G12.2[1:l,1:l] <- do.call(rbind, G12tmp)
                         G12 <- G12 + G12.2/n
                     }
                 Gamma <- rbind(cbind(G11, G12),
@@ -170,25 +191,25 @@
                 G12 <- t(G)/n + crossprod(rho2*lG, gt)/n
                 if (!is.null(family))
                     {
-                        G12tmp <- lapply(1:k, function(i)
-                            colSums(-rho1*tau2*Z[,i]*c(Z%*%lam[1:k])*Z))
+                        G12tmp <- lapply(1:l, function(i)
+                            colSums(-rho1*tau2*Z[,i]*c(Z%*%lam[1:l])*Z))
                         G12.2 <- matrix(0, nrow(G12), ncol(G12))
-                        G12.2[1:k,1:k] <- do.call(rbind, G12tmp)
+                        G12.2[1:l,1:l] <- do.call(rbind, G12tmp)
                         G12 <- G12 + G12.2/n
                     }                
                 if (momType == "balSample")
                     Xi <- rep(1,n)
                 else
-                    Xi <- Z[,-1]
+                    Xi <- Z[,(2:k)]
                 nj <- sum(Xi)
                 lam2 <- -sum(rho1)*tail(lam,nh)/nj
                 theta4 <- colSums(Xi*X)/nj
-                G13 <- rbind(matrix(0, 2*k-1, nh), -nj/n*diag(nh))
+                G13 <- rbind(matrix(0, l+k-1, nh), -nj/n*diag(nh))
                 G23 <- matrix(0,q, nh)
                 G33 <- matrix(0, nh, nh)
                 Psi <- cbind(rho1*lG, rho1*gt,
                              Xi*sweep(X, 2, theta4, "-"))
-                Psi[,(2*k):(2*k+nh-1)] <- Psi[,(2*k):(2*k+nh-1)]-Xi%*%t(lam2)
+                Psi[,(l+k):(l+k+nh-1)] <- Psi[,(l+k):(l+k+nh-1)]-Xi%*%t(lam2)
                 Gamma <- rbind(cbind(G11, G12, G13),
                                cbind(t(G12), G22, G23),
                                cbind(t(G13), t(G23), G33))
@@ -267,6 +288,7 @@
             }
         object$allArg$g <- .momentFctATE
         object$allArg$y <- NULL
+        object$allArg$w <- NULL
         object$allArg$treat <- NULL
         object$allArg$popMom <- NULL
         object$allArg$momType <- NULL

Modified: pkg/gmm/R/getModel.R
===================================================================
--- pkg/gmm/R/getModel.R	2017-09-26 19:18:02 UTC (rev 116)
+++ pkg/gmm/R/getModel.R	2017-09-27 18:51:43 UTC (rev 117)
@@ -438,6 +438,13 @@
         if(is(object$g, "formula"))
             {
                 dat <- getDat(object$g, object$x, data = object$data)
+                if (!is.null(object$w))
+                    if (is(object$w, "formula"))
+                        {
+                            dat$w <- model.matrix(object$w, object$data)[,-1,drop=FALSE]
+                        } else {
+                            stop("w must be a formula")
+                        }                
                 if (dat$ny>1 | dat$ny==0)
                     stop("You need one and only one dependent variable")
                 k <- dat$k
@@ -459,13 +466,19 @@
                     }
                 if (is.null(object$tet0))
                     {
-                        tet0 <- lm(dat$x[,1]~dat$x[,3:(k+1)])$coef
+                        if (is.null(dat$w))
+                            {
+                                tet0 <- lm(dat$x[,1]~dat$x[,3:(k+1)])$coef
+                            } else {
+                                tet0 <- lm(dat$x[,1]~dat$x[,3:(k+1)]+dat$w)$coef
+                            }
                         tet0 <- c(tet0, colMeans(dat$x[,3:(k+1),drop=FALSE]))
                         names(tet0) <- NULL
                         object$tet0 <- tet0
                     } else {
-                        if (length(object$tet0) != (2*k-1))
-                            stop("tet0 should have a length equal to 2x(number of treatments)+1")
+                        ntet0 <- 2*k-1 + ifelse(is.null(dat$w), 0, ncol(dat$w))
+                        if (length(object$tet0) != ntet0)
+                            stop("tet0 should have a length equal to 2x(number of treatments)+1+number of w's if any")
                     }
                 if (object$family != "linear")
                     {
@@ -483,14 +496,22 @@
                 q <- dat$nh + 2*k+1
                 if (object$momType != "bal" | !is.null(object$popMom))
                     q  <- q+dat$nh
+                if (!is.null(dat$w))
+                    {
+                        q <- q+ncol(dat$w)
+                        namew <- colnames(dat$w)
+                    } else {
+                        namew <- NULL
+                    }
                 object$gradv <- .DmomentFctATE
                 object$x <- dat
                 object$gradvf <- FALSE
                 object$gform<-object$g                
                 namex <- colnames(dat$x[,2:(k+1)])
                 nameh <- colnames(dat$x[,(k+2):ncol(dat$x), drop=FALSE])
-                namesCoef <- c(namex, paste("TreatProb", 1:(k-1), sep=""))
-                namesgt <- paste(rep(paste("Treat", 1:(k-1), sep=""), rep(dat$nh, k-1)), "_", rep(nameh, k-1), sep="")
+                namesCoef <- c(namex, namew, paste("TreatProb", 1:(k-1), sep=""))
+                namesgt <- paste(rep(paste("Treat", 1:(k-1), sep=""),
+                                     rep(dat$nh, k-1)), "_", rep(nameh, k-1), sep="")
                 object$namesgt <- c(namesCoef,namesgt)
                 if (object$momType != "bal" | !is.null(object$popMom))
                     object$namesgt <- c(object$namesgt, paste("bal_", nameh, sep=""))

Modified: pkg/gmm/man/ATEgel.Rd
===================================================================
--- pkg/gmm/man/ATEgel.Rd	2017-09-26 19:18:02 UTC (rev 116)
+++ pkg/gmm/man/ATEgel.Rd	2017-09-27 18:51:43 UTC (rev 117)
@@ -10,7 +10,7 @@
 balanced by GEL.
 }
 \usage{
-ATEgel(g, balm, y=NULL, treat=NULL, tet0=NULL,momType=c("bal","balSample","ATT"),
+ATEgel(g, balm, w=NULL, y=NULL, treat=NULL, tet0=NULL,momType=c("bal","balSample","ATT"),
                    popMom = NULL, family=c("linear","logit", "probit"),
                    type = c("EL", "ET", "CUE", "ETEL", "HD", "ETHD"),
                    tol_lam = 1e-9, tol_obj = 1e-9, tol_mom = 1e-9, maxiterlam = 100,
@@ -39,6 +39,10 @@
 \item{treat}{The treatment indicator when \code{g} is a function. Not
   implemented yet}
 
+\item{w}{A formula to add covariates to the main regression. When
+  \code{NULL}, the default value, the main regression only include
+  treatment indicators.}
+
 \item{tet0}{A \eqn{3 \times 1} vector of starting values. If not
   provided, they are obtained using an OLS regression}
 



More information about the Gmm-commits mailing list